Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -1412,6 +1412,32 @@ public function serialize_token(): string {

$html .= '>';

/*
* The HTML parser strips a leading newline immediately after the start
* tag of TEXTAREA, PRE, and LISTING elements. When serializing, prepend
* a leading newline to ensure the semantic HTML content is preserved.
*
* For example, `<pre>\n\nX</pre>` must not become `<pre>\nX</pre>` because its content
* has changed. However, `<pre>X</pre>` and `<pre>\nX</pre>` are _equivalent_.
*
* > A start tag whose tag name is "textarea"
* > …
* > If the next token is a U+000A LINE FEED (LF) character token, then ignore
* > that token and move on to the next one. (Newlines at the start of textarea
* > elements are ignored as an authoring convenience.)
*
* > A start tag whose tag name is one of: "pre", "listing"
* > …
* > If the next token is a U+000A LINE FEED (LF) character token, then ignore
* > that token and move on to the next one. (Newlines at the start of pre blocks
* > are ignored as an authoring convenience.)
*
* @see https://html.spec.whatwg.org/multipage/parsing.html
*/
if ( 'TEXTAREA' === $tag_name || 'PRE' === $tag_name || 'LISTING' === $tag_name ) {
$html .= "\n";
}

// Flush out self-contained elements.
if ( $in_html && in_array( $tag_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) {
$text = $this->get_modifiable_text();
Expand Down
68 changes: 68 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php
Original file line number Diff line number Diff line change
Expand Up @@ -321,4 +321,72 @@ public static function data_provider_serialize_doctype() {
'Double quotes in system ID' => array( '<!DOCTYPE html SYSTEM \'"quoted"\'\>', '<!DOCTYPE html SYSTEM \'"quoted"\'>' ),
);
}

/**
* Ensures that leading newlines in PRE, LISTING, and TEXTAREA elements are preserved upon normalization,
* and that normalization is idempotent in these cases.
*
* @ticket 64607
*
* @dataProvider data_provider_normalize_special_leading_newline_cases
*
* @param string $input HTML input containing leading newlines in PRE, LISTING, or TEXTAREA elements.
* @param string $expected Expected output after normalization, which should preserve leading newlines.
*/
public function test_normalize_special_leading_newline_handling( string $input, string $expected ) {
$normalized = WP_HTML_Processor::normalize( $input );
$this->assertEqualHTML( $expected, $normalized );
$normalized_twice = WP_HTML_Processor::normalize( $normalized );
$this->assertEqualHTML( $expected, $normalized_twice );
}

/**
* Data provider.
*
* @return array[]
*/
public static function data_provider_normalize_special_leading_newline_cases() {
return array(
'Leading newline in PRE' => array(
"<pre>\nline 1\nline 2</pre>",
"<pre>line 1\nline 2</pre>",
),
'Double leading newline in PRE' => array(
"<pre>\n\nline 2\nline 3</pre>",
"<pre>\n\nline 2\nline 3</pre>",
),
'Multiple text nodes inside PRE' => array(
"<pre>\nline 1<!--comment--> still line 1</pre>",
'<pre>line 1<!--comment--> still line 1</pre>',
),
'Multiple text nodes inside PRE with leading newlines' => array(
"<pre>\n\nline 2<!--comment--> still line 2</pre>",
"<pre>\n\nline 2<!--comment--> still line 2</pre>",
),
'Leading newline in LISTING' => array(
"<listing>\nline 1\nline 2</listing>",
"<listing>line 1\nline 2</listing>",
),
'Double leading newline in LISTING' => array(
"<listing>\n\nline 2\nline 3</listing>",
"<listing>\n\nline 2\nline 3</listing>",
),
'Multiple text nodes inside LISTING' => array(
"<listing>\nline 1<!--comment--> still line 1</listing>",
'<listing>line 1<!--comment--> still line 1</listing>',
),
'Multiple text nodes inside LISTING with leading newlines' => array(
"<listing>\n\nline 2<!--comment--> still line 2</listing>",
"<listing>\n\nline 2<!--comment--> still line 2</listing>",
),
'Leading newline in TEXTAREA' => array(
"<textarea>\nline 1\nline 2</textarea>",
"<textarea>line 1\nline 2</textarea>",
),
'Double leading newline in TEXTAREA' => array(
"<textarea>\n\nline 2\nline 3</textarea>",
"<textarea>\n\nline 2\nline 3</textarea>",
),
);
}
}
Loading