From 8d4f17b28134e6f07f8bf56e212600584bc6773f Mon Sep 17 00:00:00 2001 From: Elliotte Rusty Harold Date: Wed, 1 Jul 2026 08:51:26 -0400 Subject: [PATCH 1/3] Add failing tests: XMLEncode illegal control chars --- .../utils/xml/PrettyPrintXmlWriterTest.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/test/java/org/apache/maven/shared/utils/xml/PrettyPrintXmlWriterTest.java b/src/test/java/org/apache/maven/shared/utils/xml/PrettyPrintXmlWriterTest.java index ee210cb0..e88c695d 100644 --- a/src/test/java/org/apache/maven/shared/utils/xml/PrettyPrintXmlWriterTest.java +++ b/src/test/java/org/apache/maven/shared/utils/xml/PrettyPrintXmlWriterTest.java @@ -93,6 +93,26 @@ public void testPrettyPrintXMLWriterWithGivenLineIndenter() throws IOException { assertEquals(expectedResult(" "), w.toString()); } + @Test + public void testEncodeIllegalControlCharsInText() throws IOException { + StringWriter sw = new StringWriter(); + PrettyPrintXMLWriter w = new PrettyPrintXMLWriter(sw); + w.startElement("div"); + w.writeText("hello\u0001world"); + w.endElement(); + assertEquals("
helloworld
", sw.toString()); + } + + @Test + public void testEncodeIllegalControlCharsInAttribute() throws IOException { + StringWriter sw = new StringWriter(); + PrettyPrintXMLWriter w = new PrettyPrintXMLWriter(sw); + w.startElement("div"); + w.addAttribute("title", "hello\u0001world"); + w.endElement(); + assertEquals("
", sw.toString()); + } + @Test public void testEscapeXmlAttributeWindows() throws IOException { // Windows From ecdc7470be485b42694ee168737112e5c84be75e Mon Sep 17 00:00:00 2001 From: Elliotte Rusty Harold Date: Wed, 1 Jul 2026 08:52:39 -0400 Subject: [PATCH 2/3] XMLEncode: encode illegal XML control characters as &#xHH; --- .../org/apache/maven/shared/utils/xml/XMLEncode.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/apache/maven/shared/utils/xml/XMLEncode.java b/src/main/java/org/apache/maven/shared/utils/xml/XMLEncode.java index 44b6dd9c..18e9a5c3 100644 --- a/src/main/java/org/apache/maven/shared/utils/xml/XMLEncode.java +++ b/src/main/java/org/apache/maven/shared/utils/xml/XMLEncode.java @@ -109,7 +109,13 @@ static void xmlEncodeTextAsPCDATA(String text, boolean forAttribute, char quoteC break; default: - n.append(c); + if (c < 0x20 && c != 0x09 && c != 0x0A && c != 0x0D) { + n.append("&#x"); + n.append(Integer.toHexString(c)); + n.append(';'); + } else { + n.append(c); + } break; } } @@ -145,6 +151,9 @@ private static boolean needsEncoding(String text) { if (c == '&' || c == '<') { return true; } + if (c < 0x20 && c != 0x09 && c != 0x0A && c != 0x0D) { + return true; + } } return false; } From d6913c87a7b4c9a3f3298d22a07c9c187c954467 Mon Sep 17 00:00:00 2001 From: Elliotte Rusty Harold Date: Wed, 1 Jul 2026 10:25:46 -0400 Subject: [PATCH 3/3] add comments about XML 1.0/1.1 C0 control character handling --- .../org/apache/maven/shared/utils/xml/XMLEncode.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/main/java/org/apache/maven/shared/utils/xml/XMLEncode.java b/src/main/java/org/apache/maven/shared/utils/xml/XMLEncode.java index 18e9a5c3..60fa65d7 100644 --- a/src/main/java/org/apache/maven/shared/utils/xml/XMLEncode.java +++ b/src/main/java/org/apache/maven/shared/utils/xml/XMLEncode.java @@ -109,6 +109,12 @@ static void xmlEncodeTextAsPCDATA(String text, boolean forAttribute, char quoteC break; default: + // C0 control characters (except tab, LF, CR) are encoded + // as numeric character references. This produces valid + // XML 1.1 but is not valid XML 1.0 (which forbids these + // characters in any form). Callers that require strict + // XML 1.0 compliance should strip these characters before + // encoding. if (c < 0x20 && c != 0x09 && c != 0x0A && c != 0x0D) { n.append("&#x"); n.append(Integer.toHexString(c)); @@ -141,6 +147,12 @@ private static String xmlEncodeTextAsCDATABlock(String text) { /** * Checks if this text needs encoding in order to be represented in XML. + * Note: C0 control characters (U+0000-U+001F except tab, LF, CR) are + * classified as needing encoding, but encoding them as numeric character + * references produces output that is not valid XML 1.0 (which forbids + * these characters outright in any form). The encoded output is valid + * XML 1.1. Callers that require strict XML 1.0 compliance should strip + * these characters before encoding. */ private static boolean needsEncoding(String text) { if (text == null) {