diff --git a/src/main/java/org/apache/maven/shared/utils/xml/XMLEncode.java b/src/main/java/org/apache/maven/shared/utils/xml/XMLEncode.java index 44b6dd9c..60fa65d7 100644 --- a/src/main/java/org/apache/maven/shared/utils/xml/XMLEncode.java +++ b/src/main/java/org/apache/maven/shared/utils/xml/XMLEncode.java @@ -109,7 +109,19 @@ static void xmlEncodeTextAsPCDATA(String text, boolean forAttribute, char quoteC break; default: - n.append(c); + // C0 control characters (except tab, LF, CR) are encoded + // as numeric character references. This produces valid + // XML 1.1 but is not valid XML 1.0 (which forbids these + // characters in any form). Callers that require strict + // XML 1.0 compliance should strip these characters before + // encoding. + if (c < 0x20 && c != 0x09 && c != 0x0A && c != 0x0D) { + n.append("&#x"); + n.append(Integer.toHexString(c)); + n.append(';'); + } else { + n.append(c); + } break; } } @@ -135,6 +147,12 @@ private static String xmlEncodeTextAsCDATABlock(String text) { /** * Checks if this text needs encoding in order to be represented in XML. + * Note: C0 control characters (U+0000-U+001F except tab, LF, CR) are + * classified as needing encoding, but encoding them as numeric character + * references produces output that is not valid XML 1.0 (which forbids + * these characters outright in any form). The encoded output is valid + * XML 1.1. Callers that require strict XML 1.0 compliance should strip + * these characters before encoding. */ private static boolean needsEncoding(String text) { if (text == null) { @@ -145,6 +163,9 @@ private static boolean needsEncoding(String text) { if (c == '&' || c == '<') { return true; } + if (c < 0x20 && c != 0x09 && c != 0x0A && c != 0x0D) { + return true; + } } return false; } diff --git a/src/test/java/org/apache/maven/shared/utils/xml/PrettyPrintXmlWriterTest.java b/src/test/java/org/apache/maven/shared/utils/xml/PrettyPrintXmlWriterTest.java index ee210cb0..e88c695d 100644 --- a/src/test/java/org/apache/maven/shared/utils/xml/PrettyPrintXmlWriterTest.java +++ b/src/test/java/org/apache/maven/shared/utils/xml/PrettyPrintXmlWriterTest.java @@ -93,6 +93,26 @@ public void testPrettyPrintXMLWriterWithGivenLineIndenter() throws IOException { assertEquals(expectedResult(" "), w.toString()); } + @Test + public void testEncodeIllegalControlCharsInText() throws IOException { + StringWriter sw = new StringWriter(); + PrettyPrintXMLWriter w = new PrettyPrintXMLWriter(sw); + w.startElement("div"); + w.writeText("hello\u0001world"); + w.endElement(); + assertEquals("
helloworld
", sw.toString()); + } + + @Test + public void testEncodeIllegalControlCharsInAttribute() throws IOException { + StringWriter sw = new StringWriter(); + PrettyPrintXMLWriter w = new PrettyPrintXMLWriter(sw); + w.startElement("div"); + w.addAttribute("title", "hello\u0001world"); + w.endElement(); + assertEquals("
", sw.toString()); + } + @Test public void testEscapeXmlAttributeWindows() throws IOException { // Windows