diff --git a/lib/rdoc/markdown.kpeg b/lib/rdoc/markdown.kpeg index 91d05c57a9..cde3e927f4 100644 --- a/lib/rdoc/markdown.kpeg +++ b/lib/rdoc/markdown.kpeg @@ -303,6 +303,20 @@ end end + # Escape character that has special meaning in RDoc format. + # To allow rdoc-styled link used in markdown format for now, bracket and brace are not escaped. + + def rdoc_escape(text) + text.gsub(/[*+<\\_]/) {|s| "\\#{s}" } + end + + # Escape link url that contains brackets. + # Brackets needs escape because link url will be surrounded by `[]` in RDoc format. + + def rdoc_link_url_escape(text) + text.gsub(/[\[\]\\]/) {|s| "\\#{s}" } + end + ## # :category: Extensions # @@ -969,11 +983,11 @@ Space = @Spacechar+ { " " } Str = @StartList:a < @NormalChar+ > { a = text } - ( StrChunk:c { a << c } )* { a } + ( StrChunk:c { a << c } )* { rdoc_escape(a) } StrChunk = < (@NormalChar | /_+/ &Alphanumeric)+ > { text } -EscapedChar = "\\" !@Newline < /[:\\`|*_{}\[\]()#+.!><-]/ > { text } +EscapedChar = "\\" !@Newline < /[:\\`|*_{}\[\]()#+.!><-]/ > { rdoc_escape(text) } Entity = ( HexEntity | DecEntity | CharEntity ):a { a } @@ -988,7 +1002,7 @@ TerminalEndline = @Sp @Newline @Eof LineBreak = " " @NormalEndline { RDoc::Markup::HardBreak.new } Symbol = < @SpecialChar > - { text } + { rdoc_escape(text) } # This keeps the parser from getting bogged down on long strings of '*' or '_', # or strings of '*' or '_' with space on each side: @@ -1053,7 +1067,7 @@ ReferenceLinkSingle = Label:content < (Spnl "[]")? > { link_to content, content, text } ExplicitLink = ExplicitLinkWithLabel:a - { "{#{a[:label]}}[#{a[:link]}]" } + { "{#{a[:label]}}[#{rdoc_link_url_escape(a[:link])}]" } ExplicitLinkWithLabel = Label:label "(" @Sp Source:link Spnl Title @Sp ")" { { label: label, link: link } } @@ -1163,12 +1177,12 @@ Newline = %literals.Newline Spacechar = %literals.Spacechar HexEntity = //i < /[0-9a-fA-F]+/ > ";" - { [text.to_i(16)].pack 'U' } + { rdoc_escape([text.to_i(16)].pack('U')) } DecEntity = "" < /[0-9]+/ > ";" - { [text.to_i].pack 'U' } + { rdoc_escape([text.to_i].pack('U')) } CharEntity = "&" [A-Za-z0-9]+/ > ";" { if entity = HTML_ENTITIES[text] then - entity.pack 'U*' + rdoc_escape(entity.pack('U*')) else "{text};" end diff --git a/lib/rdoc/markdown.rb b/lib/rdoc/markdown.rb index 811c065ec1..7e4adcefc3 100644 --- a/lib/rdoc/markdown.rb +++ b/lib/rdoc/markdown.rb @@ -688,6 +688,20 @@ def emphasis text end end + # Escape character that has special meaning in RDoc format. + # To allow rdoc-styled link used in markdown format for now, bracket and brace are not escaped. + + def rdoc_escape(text) + text.gsub(/[*+<\\_]/) {|s| "\\#{s}" } + end + + # Escape link url that contains brackets. + # Brackets needs escape because link url will be surrounded by `[]` in RDoc format. + + def rdoc_link_url_escape(text) + text.gsub(/[\[\]\\]/) {|s| "\\#{s}" } + end + ## # :category: Extensions # @@ -9731,7 +9745,7 @@ def _Space return _tmp end - # Str = @StartList:a < @NormalChar+ > { a = text } (StrChunk:c { a << c })* { a } + # Str = @StartList:a < @NormalChar+ > { a = text } (StrChunk:c { a << c })* { rdoc_escape(a) } def _Str _save = self.pos @@ -9792,7 +9806,7 @@ def _Str self.pos = _save break end - @result = begin; a ; end + @result = begin; rdoc_escape(a) ; end _tmp = true unless _tmp self.pos = _save @@ -9894,7 +9908,7 @@ def _StrChunk return _tmp end - # EscapedChar = "\\" !@Newline < /[:\\`|*_{}\[\]()#+.!><-]/ > { text } + # EscapedChar = "\\" !@Newline < /[:\\`|*_{}\[\]()#+.!><-]/ > { rdoc_escape(text) } def _EscapedChar _save = self.pos @@ -9921,7 +9935,7 @@ def _EscapedChar self.pos = _save break end - @result = begin; text ; end + @result = begin; rdoc_escape(text) ; end _tmp = true unless _tmp self.pos = _save @@ -10122,7 +10136,7 @@ def _LineBreak return _tmp end - # Symbol = < @SpecialChar > { text } + # Symbol = < @SpecialChar > { rdoc_escape(text) } def _Symbol _save = self.pos @@ -10136,7 +10150,7 @@ def _Symbol self.pos = _save break end - @result = begin; text ; end + @result = begin; rdoc_escape(text) ; end _tmp = true unless _tmp self.pos = _save @@ -11189,7 +11203,7 @@ def _ReferenceLinkSingle return _tmp end - # ExplicitLink = ExplicitLinkWithLabel:a { "{#{a[:label]}}[#{a[:link]}]" } + # ExplicitLink = ExplicitLinkWithLabel:a { "{#{a[:label]}}[#{rdoc_link_url_escape(a[:link])}]" } def _ExplicitLink _save = self.pos @@ -11200,7 +11214,7 @@ def _ExplicitLink self.pos = _save break end - @result = begin; "{#{a[:label]}}[#{a[:link]}]" ; end + @result = begin; "{#{a[:label]}}[#{rdoc_link_url_escape(a[:link])}]" ; end _tmp = true unless _tmp self.pos = _save @@ -14615,7 +14629,7 @@ def _Spacechar return _tmp end - # HexEntity = //i < /[0-9a-fA-F]+/ > ";" { [text.to_i(16)].pack 'U' } + # HexEntity = //i < /[0-9a-fA-F]+/ > ";" { rdoc_escape([text.to_i(16)].pack('U')) } def _HexEntity _save = self.pos @@ -14639,7 +14653,7 @@ def _HexEntity self.pos = _save break end - @result = begin; [text.to_i(16)].pack 'U' ; end + @result = begin; rdoc_escape([text.to_i(16)].pack('U')) ; end _tmp = true unless _tmp self.pos = _save @@ -14651,7 +14665,7 @@ def _HexEntity return _tmp end - # DecEntity = "" < /[0-9]+/ > ";" { [text.to_i].pack 'U' } + # DecEntity = "" < /[0-9]+/ > ";" { rdoc_escape([text.to_i].pack('U')) } def _DecEntity _save = self.pos @@ -14675,7 +14689,7 @@ def _DecEntity self.pos = _save break end - @result = begin; [text.to_i].pack 'U' ; end + @result = begin; rdoc_escape([text.to_i].pack('U')) ; end _tmp = true unless _tmp self.pos = _save @@ -14687,7 +14701,7 @@ def _DecEntity return _tmp end - # CharEntity = "&" < /[A-Za-z0-9]+/ > ";" { if entity = HTML_ENTITIES[text] then entity.pack 'U*' else "{text};" end } + # CharEntity = "&" < /[A-Za-z0-9]+/ > ";" { if entity = HTML_ENTITIES[text] then rdoc_escape(entity.pack('U*')) else "{text};" end } def _CharEntity _save = self.pos @@ -14712,7 +14726,7 @@ def _CharEntity break end @result = begin; if entity = HTML_ENTITIES[text] then - entity.pack 'U*' + rdoc_escape(entity.pack('U*')) else "{text};" end @@ -16563,15 +16577,15 @@ def _DefinitionListDefinition Rules[:_Inlines] = rule_info("Inlines", "(!@Endline Inline:i { i } | @Endline:c !(&{ github? } Ticks3 /[^`\\n]*$/) &Inline { c })+:chunks @Endline? { chunks }") Rules[:_Inline] = rule_info("Inline", "(Str | @Endline | UlOrStarLine | @Space | Strong | Emph | Strike | Image | Link | NoteReference | InlineNote | Code | RawHtml | Entity | EscapedChar | Symbol)") Rules[:_Space] = rule_info("Space", "@Spacechar+ { \" \" }") - Rules[:_Str] = rule_info("Str", "@StartList:a < @NormalChar+ > { a = text } (StrChunk:c { a << c })* { a }") + Rules[:_Str] = rule_info("Str", "@StartList:a < @NormalChar+ > { a = text } (StrChunk:c { a << c })* { rdoc_escape(a) }") Rules[:_StrChunk] = rule_info("StrChunk", "< (@NormalChar | /_+/ &Alphanumeric)+ > { text }") - Rules[:_EscapedChar] = rule_info("EscapedChar", "\"\\\\\" !@Newline < /[:\\\\`|*_{}\\[\\]()\#+.!><-]/ > { text }") + Rules[:_EscapedChar] = rule_info("EscapedChar", "\"\\\\\" !@Newline < /[:\\\\`|*_{}\\[\\]()\#+.!><-]/ > { rdoc_escape(text) }") Rules[:_Entity] = rule_info("Entity", "(HexEntity | DecEntity | CharEntity):a { a }") Rules[:_Endline] = rule_info("Endline", "(@LineBreak | @TerminalEndline | @NormalEndline)") Rules[:_NormalEndline] = rule_info("NormalEndline", "@Sp @Newline !@BlankLine !\">\" !AtxStart !(Line /={1,}|-{1,}/ @Newline) { \"\\n\" }") Rules[:_TerminalEndline] = rule_info("TerminalEndline", "@Sp @Newline @Eof") Rules[:_LineBreak] = rule_info("LineBreak", "\" \" @NormalEndline { RDoc::Markup::HardBreak.new }") - Rules[:_Symbol] = rule_info("Symbol", "< @SpecialChar > { text }") + Rules[:_Symbol] = rule_info("Symbol", "< @SpecialChar > { rdoc_escape(text) }") Rules[:_UlOrStarLine] = rule_info("UlOrStarLine", "(UlLine | StarLine):a { a }") Rules[:_StarLine] = rule_info("StarLine", "(< /\\*{4,}/ > { text } | < @Spacechar /\\*+/ &@Spacechar > { text })") Rules[:_UlLine] = rule_info("UlLine", "(< /_{4,}/ > { text } | < @Spacechar /_+/ &@Spacechar > { text })") @@ -16588,7 +16602,7 @@ def _DefinitionListDefinition Rules[:_ReferenceLink] = rule_info("ReferenceLink", "(ReferenceLinkDouble | ReferenceLinkSingle)") Rules[:_ReferenceLinkDouble] = rule_info("ReferenceLinkDouble", "Label:content < Spnl > !\"[]\" Label:label { link_to content, label, text }") Rules[:_ReferenceLinkSingle] = rule_info("ReferenceLinkSingle", "Label:content < (Spnl \"[]\")? > { link_to content, content, text }") - Rules[:_ExplicitLink] = rule_info("ExplicitLink", "ExplicitLinkWithLabel:a { \"{\#{a[:label]}}[\#{a[:link]}]\" }") + Rules[:_ExplicitLink] = rule_info("ExplicitLink", "ExplicitLinkWithLabel:a { \"{\#{a[:label]}}[\#{rdoc_link_url_escape(a[:link])}]\" }") Rules[:_ExplicitLinkWithLabel] = rule_info("ExplicitLinkWithLabel", "Label:label \"(\" @Sp Source:link Spnl Title @Sp \")\" { { label: label, link: link } }") Rules[:_Source] = rule_info("Source", "(\"<\" < SourceContents > \">\" | < SourceContents >) { text }") Rules[:_SourceContents] = rule_info("SourceContents", "((!\"(\" !\")\" !\">\" Nonspacechar)+ | \"(\" SourceContents \")\")*") @@ -16631,9 +16645,9 @@ def _DefinitionListDefinition Rules[:_BOM] = rule_info("BOM", "%literals.BOM") Rules[:_Newline] = rule_info("Newline", "%literals.Newline") Rules[:_Spacechar] = rule_info("Spacechar", "%literals.Spacechar") - Rules[:_HexEntity] = rule_info("HexEntity", "/&\#x/i < /[0-9a-fA-F]+/ > \";\" { [text.to_i(16)].pack 'U' }") - Rules[:_DecEntity] = rule_info("DecEntity", "\"&\#\" < /[0-9]+/ > \";\" { [text.to_i].pack 'U' }") - Rules[:_CharEntity] = rule_info("CharEntity", "\"&\" < /[A-Za-z0-9]+/ > \";\" { if entity = HTML_ENTITIES[text] then entity.pack 'U*' else \"&\#{text};\" end }") + Rules[:_HexEntity] = rule_info("HexEntity", "/&\#x/i < /[0-9a-fA-F]+/ > \";\" { rdoc_escape([text.to_i(16)].pack('U')) }") + Rules[:_DecEntity] = rule_info("DecEntity", "\"&\#\" < /[0-9]+/ > \";\" { rdoc_escape([text.to_i].pack('U')) }") + Rules[:_CharEntity] = rule_info("CharEntity", "\"&\" < /[A-Za-z0-9]+/ > \";\" { if entity = HTML_ENTITIES[text] then rdoc_escape(entity.pack('U*')) else \"&\#{text};\" end }") Rules[:_NonindentSpace] = rule_info("NonindentSpace", "/ {0,3}/") Rules[:_Indent] = rule_info("Indent", "/\\t| /") Rules[:_IndentedLine] = rule_info("IndentedLine", "Indent Line") diff --git a/lib/rdoc/markup/inline_parser.rb b/lib/rdoc/markup/inline_parser.rb index 6bbd15e7e2..4e2b86c630 100644 --- a/lib/rdoc/markup/inline_parser.rb +++ b/lib/rdoc/markup/inline_parser.rb @@ -303,9 +303,10 @@ def scan_token # Returns nil if no valid URL part is found. # URL part is enclosed in square brackets and may contain escaped brackets. # Example: [http://example.com/?q=\[\]] represents http://example.com/?q=[]. + # If we're accepting rdoc-style links in markdown, url may include *+<_ with backslash escape. def read_tidylink_url - bracketed_url = strscan(/\[([^\s\[\]\\]|\\[\[\]\\])+\]/) + bracketed_url = strscan(/\[([^\s\[\]\\]|\\[\[\]\\*+<_])+\]/) bracketed_url[1...-1].gsub(/\\(.)/, '\1') if bracketed_url end end diff --git a/test/rdoc/markup/to_html_test.rb b/test/rdoc/markup/to_html_test.rb index 459bcb140e..bb57e78e86 100644 --- a/test/rdoc/markup/to_html_test.rb +++ b/test/rdoc/markup/to_html_test.rb @@ -736,6 +736,22 @@ def test_convert_TIDYLINK_multiple assert_equal expected, result end + def test_convert_TIDYLINK_url_unescape + # markdown: [{label}](http://example.com/foo?q=bar+baz[]) + result = @to.convert '{\{label\}}[http://example.com/_foo?q=bar+baz\[\]]' + expected = "\n
\n" + assert_equal expected, result + end + + def test_convert_TIDYLINK_rdoc_in_markdown_url_unescape + # markdown: {label}[http://example.com/?q=<+_*] + # The ubove text is a plain text in markdown, so <+_* are escaped in HTML. + # If we're accepting rdoc-style link in markdown, these escape should be allowed in [url] part. + result = @to.convert '{label}[http://example.com/?q=\<\+\_\*]' + expected = "\n\n" + assert_equal expected, result + end + def test_convert_TIDYLINK_with_code_label result = @to.convert '{Link to +Foo+}[https://example.com]' diff --git a/test/rdoc/rdoc_markdown_test.rb b/test/rdoc/rdoc_markdown_test.rb index e851466f72..1b3e4e96ce 100644 --- a/test/rdoc/rdoc_markdown_test.rb +++ b/test/rdoc/rdoc_markdown_test.rb @@ -480,11 +480,11 @@ def test_parse_emphasis_underscore end def test_parse_emphasis_underscore_embedded - doc = parse "foo_bar bar_baz\n" + doc = parse "foo_bar bar_baz _em1_ *em2*\n" expected = doc( - para("foo_bar bar_baz")) + para("foo\\_bar bar\\_baz _em1_ _em2_")) assert_equal expected, doc end @@ -494,15 +494,64 @@ def test_parse_emphasis_underscore_in_word expected = doc( - para("it foo_bar_baz")) + para("it foo\\_bar\\_baz")) assert_equal expected, doc end + def test_rdoc_code_escaped_in_normal_text + doc = parse "+notcode+ \\+notcode+ \\\\+notcode+" + expected = doc(para("\\+notcode\\+ \\+notcode\\+ \\\\\\+notcode\\+")) + assert_equal expected, doc + end + + def test_escape_character_entities + doc = parse "<tt>*\\ <tt>+\\ <tt>_\\" + expected = doc(para("\\\\*\\ \\\\+\\ \\\\_\\")) + assert_equal expected, doc + end + + def test_rdoc_escape_in_markdown_styling + doc = parse "_a \\_b\\_ c_ **+d+** `_1+2*3`" + expected = doc(para("a \\_b\\_ c \\+d\\+_1+2*3"))
+ assert_equal expected, doc
+ end
+
+ def test_rdoc_heading_escaped_inside_markdown
+ doc = parse "= notheading\n"
+ expected = doc(para("= notheading"))
+ assert_equal expected, doc
+ end
+
+ def test_rdoc_code_escaped_inside_markdown
+ doc = parse "~~+notcode+~~"
+ expected = doc(para("+foo+"))
+ assert_equal expected, doc
+ end
+
+ def test_rdoc_format_escaped_inside_markdown_link
+ doc = parse "[Link +to+ `tap{ +1+ }`](http://example.com/?q=[])"
+ expected = doc(para("{Link \\+to\\+ tap{ +1+ }}[http://example.com/?q=\\[\\]]"))
+ assert_equal expected, doc
+ end
+
+ def test_lt_escape
+ doc = parse "\\`a`\\ \\`b`"
+ expected = doc(para("\\a\\ \\b"))
+ assert_equal expected, doc
+ end
+
def test_parse_escape
assert_equal doc(para("Backtick: `")), parse("Backtick: \\`")
- assert_equal doc(para("Backslash: \\")), parse("Backslash: \\\\")
+ # Unescaped as markdown and then escaped as RDoc
+ assert_equal doc(para("Backslash: \\\\")), parse("Backslash: \\\\")
assert_equal doc(para("Colon: :")), parse("Colon: \\:")
end
diff --git a/test/rdoc/rdoc_markdown_test_test.rb b/test/rdoc/rdoc_markdown_test_test.rb
index c28fe6bebd..ce9481e7ed 100644
--- a/test/rdoc/rdoc_markdown_test_test.rb
+++ b/test/rdoc/rdoc_markdown_test_test.rb
@@ -25,7 +25,7 @@ def test_amps_and_angle_encoding
para("AT&T has an ampersand in their name."),
para("AT&T is another way to write it."),
para("This & that."),
- para("4 < 5."),
+ para("4 \\< 5."),
para("6 > 5."),
para("Here's a {link}[http://example.com/?foo=1&bar=2] with " +
"an ampersand in the URL."),
@@ -69,10 +69,10 @@ def test_backslash_escapes
doc(
para("These should all get escaped:"),
- para("Backslash: \\"),
+ para("Backslash: \\\\"),
para("Backtick: `"),
- para("Asterisk: *"),
- para("Underscore: _"),
+ para("Asterisk: \\*"),
+ para("Underscore: \\_"),
para("Left brace: {"),
para("Right brace: }"),
para("Left bracket: ["),
@@ -83,7 +83,7 @@ def test_backslash_escapes
para("Hash: #"),
para("Period: ."),
para("Bang: !"),
- para("Plus: +"),
+ para("Plus: \\+"),
para("Minus: -"),
para("These should not, because they occur within a code block:"),
@@ -142,8 +142,8 @@ def test_backslash_escapes
para("These should get escaped, even though they're matching pairs for\n" +
"other Markdown constructs:"),
- para("\*asterisks\*"),
- para("\_underscores\_"),
+ para("\\*asterisks\\*"),
+ para("\\_underscores\\_"),
para("`backticks`"),
para("This is a code span with a literal backslash-backtick " +
@@ -227,7 +227,7 @@ def test_hard_wrapped_paragraphs_with_list_like_lines
"middle of a paragraph looked like a\n" +
"list item."),
para("Here's one with a bullet.\n" +
- "* criminey."))
+ "\\* criminey."))
assert_equal expected, doc
end
@@ -866,7 +866,7 @@ def test_markdown_documentation_syntax
para("To this end, Markdown's syntax is comprised entirely of punctuation\n" +
"characters, which punctuation characters have been carefully chosen so\n" +
"as to look like what they mean. E.g., asterisks around a word actually\n" +
- "look like \*emphasis\*. Markdown lists look like, well, lists. Even\n" +
+ "look like \\*emphasis\\*. Markdown lists look like, well, lists. Even\n" +
"blockquotes look like quoted passages of text, assuming you've ever\n" +
"used email."),