diff --git a/lib/deface/parser.rb b/lib/deface/parser.rb index 2dad5c7..d21b428 100644 --- a/lib/deface/parser.rb +++ b/lib/deface/parser.rb @@ -4,13 +4,12 @@ module Deface class Parser # converts erb to markup - # def self.erb_markup!(source) - #all opening html tags that contain <% %> blocks + # All opening html tags that contain <% %> blocks source.scan(/<\w+[^<>]+(?:<%.*?%>[^<>]*)+/m).each do |line| - #regexs to catch <% %> inside attributes id="<% something %>" - with double, single or no quotes + # Regexs to catch <% %> inside attributes id="<% something %>" - with double, single or no quotes erb_attrs_regexs = [/([\w-]+)(\s?=\s?)(")([^"]*<%.*?%>[^"]*)/m, /([\w-]+)(\s?=\s?)(')([^']*<%.*?%>[^']*)'/m, /([\w-]+)(\s?=\s?)()(<%.*?%>)(?:\s|>|\z)/m] @@ -25,7 +24,7 @@ def self.erb_markup!(source) end i = -1 - #catch all <% %> inside tags id

> , not inside attrs + # Catch all <% %> inside tags id

> , not inside attrs replace_line.scan(/(<%.*?%>)/m).each do |match| replace_line.sub!(match[0]) { |m| m = " data-erb-#{i += 1}=\"#{CGI.escapeHTML(match[0])}\"" } end @@ -33,7 +32,7 @@ def self.erb_markup!(source) source.sub!(line) { |m| m = replace_line } end - #replaces all <% %> not inside opening html tags + # Replaces all <% %> not inside opening html tags replacements = [ {"<%=" => ""}, {"<%" => ""}, {"%>" => ""} ] @@ -47,8 +46,7 @@ def self.erb_markup!(source) source end - # undoes ERB markup generated by Deface::Parser::ERB - # + # Undoes ERB markup generated by Deface::Parser::ERB def self.undo_erb_markup!(source) replacements = [ {"" => '<%'}, {"" => '<%'}, @@ -66,14 +64,13 @@ def self.undo_erb_markup!(source) source.gsub!("data-erb-#{match[0]}=#{match[1]}#{match[2]}#{match[1]}") { |m| "#{match[0]}=#{match[1]}#{CGI.unescapeHTML(match[2])}#{match[1]}" } end - #un-escape changes from Nokogiri and erb-markup! + # Un-escape changes from Nokogiri and erb-markup! source.scan(/(<%.*?)((?:(?!%>)[\s\S])*)(%>)/).each do |match| source.gsub!("#{match[0]}#{match[1]}#{match[2]}") { |m| m = "#{match[0]}#{ CGI.unescapeHTML match[1] }#{match[2]}" } end if RUBY_PLATFORM == 'java' - #un-escapes changes from Nokogiri under Java, where " are converted to %22 when in an attribute of an element - # + # Un-escapes changes from Nokogiri under Java, where " are converted to %22 when in an attribute of an element source.scan(/(<%.*?)((?:(?!%>)[\s\S])*)(%>)/).each do |match| source.gsub!("#{match[0]}#{match[1]}#{match[2]}") { |m| m = "#{match[0]}#{ match[1].gsub('%22', '"') }#{match[2]}" } end @@ -82,24 +79,24 @@ def self.undo_erb_markup!(source) source end - def self.convert(source) - # Look for # encoding: *. If we find one, we'll encode the - # String in that encoding, otherwise, we'll use the - # default external encoding. - encoding = source.scan(/#{ActionView::Template::Handlers::ERB.const_get(:ENCODING_TAG)}/).first.try(:last) || Encoding.default_external - - # Tag the source with the default external encoding - # or the encoding specified in the file - if source.frozen? - source = source.dup.force_encoding(encoding) - else - source.force_encoding(encoding) - end + # @private + # Not part of the public API, but used internally + # Look for # encoding: *. If we find one, we'll encode the + # String in that encoding, otherwise, we'll use the default external encoding. + def self.apply_encoding!(source) + encoding_tag = ActionView::Template::Handlers::ERB::ENCODING_TAG + encoding = source.scan(/#{encoding_tag}/).first.try(:last) || Encoding.default_external - unless source.valid_encoding? - raise ActionView::WrongEncodingError.new(source, encoding) - end + source = source.force_encoding(encoding) unless source.encoding == encoding + + raise ActionView::WrongEncodingError.new(source, encoding) unless source.valid_encoding? + source + end + + def self.convert(source) + source = source.dup + apply_encoding!(source) erb_markup!(source) if source =~ /)[\s\S])*>/ diff --git a/lib/deface/search.rb b/lib/deface/search.rb index 8795c31..db70e92 100644 --- a/lib/deface/search.rb +++ b/lib/deface/search.rb @@ -1,25 +1,29 @@ +# frozen_string_literal: true + module Deface module Search module ClassMethods + VIRTUAL_PATH_CLEANUP_REGEXES = [ + /^\//, # starting with a slash + /\.\w+\z/ # ending with a file extension + ].freeze - # finds all applicable overrides for supplied template - # + # Finds all applicable overrides for supplied template def find(details) return [] if self.all.empty? || details.empty? virtual_path = details[:virtual_path].dup return [] if virtual_path.nil? - [/^\//, /\.\w+\z/].each { |regex| virtual_path.gsub!(regex, '') } - - result = [] - result << self.all[virtual_path.to_sym].try(:values) + VIRTUAL_PATH_CLEANUP_REGEXES.each { |regex| virtual_path.gsub!(regex, '') } - result.flatten.compact.sort_by &:sequence + result = self.all[virtual_path.to_sym]&.values&.flatten || [] + result.compact! + result.sort_by!(&:sequence) + result end - # finds all overrides that are using a template / parital as there source - # + # Finds all overrides that are using a template / parital as there source def find_using(virtual_path) self.all.map do |key, overrides_by_name| overrides_by_name.values.select do |override| diff --git a/spec/deface/parser_spec.rb b/spec/deface/parser_spec.rb index e0d1dd1..ad79d08 100644 --- a/spec/deface/parser_spec.rb +++ b/spec/deface/parser_spec.rb @@ -4,6 +4,28 @@ module Deface describe Parser do + describe "#apply_encoding!" do + it "respects a valid encoding tag" do + source = %q{<%# encoding: ISO-8859-1 %>Can you say ümlaut?} + source = Deface::Parser.apply_encoding!(source) + expect(source.encoding.name).to eq('ISO-8859-1') + end + + it "forces the default encoding if the encoding tag is missing" do + source = %q{Can you say ümlaut?} + source.force_encoding('ISO-8859-1') + + source = Deface::Parser.apply_encoding!(source) + + expect(Encoding.default_external.name).to eq('UTF-8') + expect(source.encoding).to eq(Encoding.default_external) + end + + it "raises an error if the source is not valid in the specified encoding" do + source = %q{<%# encoding: US-ASCII %>Can you say ümlaut?} + expect { Deface::Parser.apply_encoding!(source) }.to raise_error(ActionView::WrongEncodingError) + end + end describe "#convert" do it "should parse html fragment" do @@ -127,8 +149,11 @@ module Deface end it "should convert multiple <% ... %> inside html tag" do - tag = Deface::Parser.convert(%q{

alt="<% x = 'y' + - \"2\" %>" title='<% method_name %>' <%= other_method %>

}) + tag = Deface::Parser.convert( + %{

alt="<% x = 'y' + \n} + + %q{ \"2\" %>" } + + %{title='<% method_name %>' <%= other_method %>

} + ) tag = tag.css('p').first expect(tag.attributes['data-erb-0'].value).to eq("<%= method_name %>") @@ -163,24 +188,6 @@ module Deface expect(tag.attributes.key?('silent')).to be_truthy expect(tag.text).to eq " method_name( :key => 'value' ) " end - - it "should respect valid encoding tag" do - source = %q{<%# encoding: ISO-8859-1 %>Can you say ümlaut?} - Deface::Parser.convert(source) - expect(source.encoding.name).to eq('ISO-8859-1') - end - - it "should force default encoding" do - source = %q{Can you say ümlaut?} - source.force_encoding('ISO-8859-1') - Deface::Parser.convert(source) - expect(source.encoding).to eq(Encoding.default_external) - end - - it "should force default encoding" do - source = %q{<%# encoding: US-ASCII %>Can you say ümlaut?} - expect { Deface::Parser.convert(source) }.to raise_error(ActionView::WrongEncodingError) - end end describe "#undo_erb_markup" do