require 'cgi' CODE_BLOCK = /<td class="rouge-code"><pre>(.*?)<\/pre><\/td>/m module Jekyll class PlainTextGenerator < Generator safe true def generate(site) site.collections.each do |collection| collection_name, collection_documents = collection if collection_name != "slides" collection_documents.docs.each do |document| n = 0 Renderer .new(site, document) # create a renderer for the document .run # generate the HTML string .scan(CODE_BLOCK) # match all occurrences of regexp .each do |code_block| # iterate on each match unhighlighted_code = code_block[0] # regexp only defines 1 match (only 1 parens) .gsub(/<span class=".*?">(.*?)<\/span>/m, '\1') content = CGI.unescapeHTML unhighlighted_code name = "#{document.url}.#{n}.txt" plain = PlaintextPageWithoutAFile.new(site, site.source, '', name) plain.content = content site.pages << plain n += 1 end end end end end end # Taken from: # https://github.com/jekyll/jekyll-feed/blob/c552b8ef7bd7a4babcfb5aec2b22283a5bc354dd/lib/jekyll-feed/page-without-a-file.rb#L4 class PlaintextPageWithoutAFile < Jekyll::Page def read_yaml(*) @data ||= { "plaintext" => true, "generated" => true } end end end