require 'cgi' CODE_BLOCK = /
(.*?)<\/pre><\/td>/m

module Jekyll
  class PlainTextGenerator < Generator
    safe true

    def generate(site)
      site.collections.each do |collection|
        _collection_name, collection_documents = collection
        collection_documents.docs.each do |document|
          n = 1
          Renderer
            .new(site, document)                       # create a renderer for the document
            .run                                       # generate the HTML string
            .scan(CODE_BLOCK)                          # match all occurrences of regexp
            .each do |code_block|                      # iterate on each match
              unhighlighted_code = code_block[0]       # regexp only defines 1 match (only 1 parens)
                                     .gsub(/(.*?)<\/span>/m, '\1')
              content = CGI.unescapeHTML unhighlighted_code
              name = "#{document.url}.#{n}.txt"
              plain = PageWithoutAFile.new(site, site.source, "", name)
              plain.content = content
              site.pages << plain
              n += 1
          end
        end
      end
    end
  end

  # Taken from:
  # https://github.com/jekyll/jekyll-feed/blob/c552b8ef7bd7a4babcfb5aec2b22283a5bc354dd/lib/jekyll-feed/page-without-a-file.rb#L4
  class PageWithoutAFile < Jekyll::Page
    def read_yaml(*)
      @data ||= {
        "plaintext" => "true"
      }
    end
  end
end