require 'cgi'
CODE_BLOCK = /<td class="rouge-code"><pre>(.*?)<\/pre><\/td>/m

module Jekyll
  class PlainTextGenerator < Generator
    safe true

    def generate(site)
      site.collections.each do |collection|
        collection_name, collection_documents = collection
        if collection_name != "slides"
          collection_documents.docs.each do |document|
            n = 0
            Renderer
              .new(site, document)                       # create a renderer for the document
              .run                                       # generate the HTML string
              .scan(CODE_BLOCK)                          # match all occurrences of regexp
              .each do |code_block|                      # iterate on each match
                unhighlighted_code = code_block[0]       # regexp only defines 1 match (only 1 parens)
                                      .gsub(/<span class=".*?">(.*?)<\/span>/m, '\1')
                content = CGI.unescapeHTML unhighlighted_code
                name = "#{document.url}.#{n}.txt"
                plain = PlaintextPageWithoutAFile.new(site, site.source, '', name)
                plain.content = content
                site.pages << plain
                n += 1
            end
          end
        end
      end
    end
  end

  # Taken from:
  # https://github.com/jekyll/jekyll-feed/blob/c552b8ef7bd7a4babcfb5aec2b22283a5bc354dd/lib/jekyll-feed/page-without-a-file.rb#L4
  class PlaintextPageWithoutAFile < Jekyll::Page
    def read_yaml(*)
      @data ||= {
        "plaintext" => true,
        "generated" => true
      }
    end
  end
end