diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 55f955f2c1a9a..502cc2ed3ee69 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1412,6 +1412,32 @@ public function serialize_token(): string { $html .= '>'; + /* + * The HTML parser strips a leading newline immediately after the start + * tag of TEXTAREA, PRE, and LISTING elements. When serializing, prepend + * a leading newline to ensure the semantic HTML content is preserved. + * + * For example, `
\n\nX
` must not become `
\nX
` because its content + * has changed. However, `
X
` and `
\nX
` are _equivalent_. + * + * > A start tag whose tag name is "textarea" + * > … + * > If the next token is a U+000A LINE FEED (LF) character token, then ignore + * > that token and move on to the next one. (Newlines at the start of textarea + * > elements are ignored as an authoring convenience.) + * + * > A start tag whose tag name is one of: "pre", "listing" + * > … + * > If the next token is a U+000A LINE FEED (LF) character token, then ignore + * > that token and move on to the next one. (Newlines at the start of pre blocks + * > are ignored as an authoring convenience.) + * + * @see https://html.spec.whatwg.org/multipage/parsing.html + */ + if ( 'TEXTAREA' === $tag_name || 'PRE' === $tag_name || 'LISTING' === $tag_name ) { + $html .= "\n"; + } + // Flush out self-contained elements. if ( $in_html && in_array( $tag_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) { $text = $this->get_modifiable_text(); diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php index e2b5a79c2de2f..175bb3845d554 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php @@ -321,4 +321,72 @@ public static function data_provider_serialize_doctype() { 'Double quotes in system ID' => array( '', '' ), ); } + + /** + * Ensures that leading newlines in PRE, LISTING, and TEXTAREA elements are preserved upon normalization, + * and that normalization is idempotent in these cases. + * + * @ticket 64607 + * + * @dataProvider data_provider_normalize_special_leading_newline_cases + * + * @param string $input HTML input containing leading newlines in PRE, LISTING, or TEXTAREA elements. + * @param string $expected Expected output after normalization, which should preserve leading newlines. + */ + public function test_normalize_special_leading_newline_handling( string $input, string $expected ) { + $normalized = WP_HTML_Processor::normalize( $input ); + $this->assertEqualHTML( $expected, $normalized ); + $normalized_twice = WP_HTML_Processor::normalize( $normalized ); + $this->assertEqualHTML( $expected, $normalized_twice ); + } + + /** + * Data provider. + * + * @return array[] + */ + public static function data_provider_normalize_special_leading_newline_cases() { + return array( + 'Leading newline in PRE' => array( + "
\nline 1\nline 2
", + "
line 1\nline 2
", + ), + 'Double leading newline in PRE' => array( + "
\n\nline 2\nline 3
", + "
\n\nline 2\nline 3
", + ), + 'Multiple text nodes inside PRE' => array( + "
\nline 1 still line 1
", + '
line 1 still line 1
', + ), + 'Multiple text nodes inside PRE with leading newlines' => array( + "
\n\nline 2 still line 2
", + "
\n\nline 2 still line 2
", + ), + 'Leading newline in LISTING' => array( + "\nline 1\nline 2", + "line 1\nline 2", + ), + 'Double leading newline in LISTING' => array( + "\n\nline 2\nline 3", + "\n\nline 2\nline 3", + ), + 'Multiple text nodes inside LISTING' => array( + "\nline 1 still line 1", + 'line 1 still line 1', + ), + 'Multiple text nodes inside LISTING with leading newlines' => array( + "\n\nline 2 still line 2", + "\n\nline 2 still line 2", + ), + 'Leading newline in TEXTAREA' => array( + "", + "", + ), + 'Double leading newline in TEXTAREA' => array( + "", + "", + ), + ); + } }