diff --git a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php index 2f51482eee052..375f7ec3d1006 100644 --- a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php +++ b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php @@ -113,6 +113,8 @@ public function insert_marker(): void { */ public function push( WP_HTML_Token $token ) { /* + * Noah's Ark clause: Limit to 3 identical formatting elements. + * * > If there are already three elements in the list of active formatting elements after the last marker, * > if any, or anywhere in the list if there are no markers, that have the same tag name, namespace, and * > attributes as element, then remove the earliest such element from the list of active formatting @@ -121,8 +123,32 @@ public function push( WP_HTML_Token $token ) { * > paired such that the two attributes in each pair have identical names, namespaces, and values * > (the order of the attributes does not matter). * - * @todo Implement the "Noah's Ark clause" to only add up to three of any given kind of formatting elements to the stack. + * @see https://html.spec.whatwg.org/#push-onto-the-list-of-active-formatting-elements */ + $dominated_count = 0; + $earliest_match_index = null; + + // Walk backwards, counting matches until we hit a marker. + for ( $i = count( $this->stack ) - 1; $i >= 0; $i-- ) { + $entry = $this->stack[ $i ]; + + // Markers stop the search. + if ( 'marker' === $entry->node_name ) { + break; + } + + // Check if this entry matches the token being pushed. + if ( self::elements_have_same_identity( $token, $entry ) ) { + ++$dominated_count; + $earliest_match_index = $i; + } + } + + // If 3 identical elements exist, remove the earliest. + if ( $dominated_count >= 3 && null !== $earliest_match_index ) { + array_splice( $this->stack, $earliest_match_index, 1 ); + } + // > Add element to the list of active formatting elements. $this->stack[] = $token; } @@ -226,4 +252,118 @@ public function clear_up_to_last_marker(): void { } } } + + /** + * Gets the entry at a specific index in the list. + * + * @since 6.8.0 + * + * @param int $index Zero-based index from the start of the list. + * @return WP_HTML_Token|null The token at that index, or null if out of bounds. + */ + public function get_at( int $index ): ?WP_HTML_Token { + return $this->stack[ $index ] ?? null; + } + + /** + * Replaces the entry at a specific index with a new token. + * + * @since 6.8.0 + * + * @param int $index Zero-based index from the start of the list. + * @param WP_HTML_Token $token The new token to place at that index. + * @return bool Whether the replacement was successful. + */ + public function replace_at( int $index, WP_HTML_Token $token ): bool { + if ( $index < 0 || $index >= count( $this->stack ) ) { + return false; + } + $this->stack[ $index ] = $token; + return true; + } + + /** + * Finds the index of a token in the list. + * + * @since 6.8.0 + * + * @param WP_HTML_Token $token The token to find. + * @return int|null The index, or null if not found. + */ + public function index_of( WP_HTML_Token $token ): ?int { + foreach ( $this->stack as $index => $item ) { + if ( $token->bookmark_name === $item->bookmark_name ) { + return $index; + } + } + return null; + } + + /** + * Determines if two tokens represent the same formatting element. + * + * Two elements are considered identical if they have the same: + * - Tag name + * - Namespace + * - Attributes (names, namespaces, and values) + * + * @since 6.8.0 + * + * @param WP_HTML_Token $a First token. + * @param WP_HTML_Token $b Second token. + * @return bool Whether the tokens represent identical formatting elements. + */ + private static function elements_have_same_identity( WP_HTML_Token $a, WP_HTML_Token $b ): bool { + // Tag name must match. + if ( $a->node_name !== $b->node_name ) { + return false; + } + + // Namespace must match. + if ( $a->namespace !== $b->namespace ) { + return false; + } + + // Attributes must match. + return self::attributes_are_equal( + $a->attributes ?? array(), + $b->attributes ?? array() + ); + } + + /** + * Determines if two attribute arrays are equal. + * + * Comparison is case-insensitive for names (keys are already lowercase), + * exact for values, and order-independent. + * + * @since 6.8.0 + * + * @param array $a First attributes array. + * @param array $b Second attributes array. + * @return bool Whether the attributes are equal. + */ + private static function attributes_are_equal( array $a, array $b ): bool { + // Different count means different attributes. + if ( count( $a ) !== count( $b ) ) { + return false; + } + + // Empty arrays are equal. + if ( 0 === count( $a ) ) { + return true; + } + + // Compare each attribute (keys already lowercase from capture). + foreach ( $a as $name => $value ) { + if ( ! array_key_exists( $name, $b ) ) { + return false; + } + if ( $value !== $b[ $name ] ) { + return false; + } + } + + return true; + } } diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 55f955f2c1a9a..da9147c7a862e 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -888,6 +888,32 @@ private function is_virtual(): bool { ); } + /** + * Captures all attributes from the current token as an array. + * + * Returns an associative array with lowercase attribute names as keys + * and decoded attribute values as values. Boolean attributes have + * the value `true`. + * + * @since 6.8.0 + * + * @return array Attribute name-value pairs. + */ + private function get_current_token_attributes(): array { + $attributes = array(); + $names = $this->get_attribute_names_with_prefix( '' ); + + if ( null === $names ) { + return $attributes; + } + + foreach ( $names as $name ) { + $attributes[ $name ] = $this->get_attribute( $name ); + } + + return $attributes; + } + /** * Indicates if the currently-matched tag matches the given breadcrumbs. * @@ -2766,6 +2792,7 @@ private function step_in_body(): bool { $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); + $this->state->current_token->attributes = $this->get_current_token_attributes(); $this->state->active_formatting_elements->push( $this->state->current_token ); return true; @@ -2787,6 +2814,7 @@ private function step_in_body(): bool { case '+U': $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); + $this->state->current_token->attributes = $this->get_current_token_attributes(); $this->state->active_formatting_elements->push( $this->state->current_token ); return true; @@ -2803,6 +2831,7 @@ private function step_in_body(): bool { } $this->insert_html_element( $this->state->current_token ); + $this->state->current_token->attributes = $this->get_current_token_attributes(); $this->state->active_formatting_elements->push( $this->state->current_token ); return true; @@ -5284,6 +5313,22 @@ public function get_token_type(): ?string { * @return string|true|null Value of attribute or `null` if not available. Boolean attributes return `true`. */ public function get_attribute( $name ) { + /* + * For reconstructed elements with virtual attributes, + * return the stored attribute value. + */ + if ( + isset( $this->current_element ) && + null !== $this->current_element->token->attributes + ) { + $comparable = strtolower( $name ); + if ( array_key_exists( $comparable, $this->current_element->token->attributes ) ) { + return $this->current_element->token->attributes[ $comparable ]; + } + // Virtual element has no other attributes beyond what's stored. + return null; + } + return $this->is_virtual() ? null : parent::get_attribute( $name ); } @@ -5362,9 +5407,142 @@ public function remove_attribute( $name ): bool { * @return array|null List of attribute names, or `null` when no tag opener is matched. */ public function get_attribute_names_with_prefix( $prefix ): ?array { + /* + * For reconstructed elements with virtual attributes, + * return matching attribute names from stored attributes. + */ + if ( + isset( $this->current_element ) && + null !== $this->current_element->token->attributes + ) { + if ( $this->is_tag_closer() ) { + return null; + } + + $comparable = strtolower( $prefix ); + $matches = array(); + + foreach ( array_keys( $this->current_element->token->attributes ) as $name ) { + if ( str_starts_with( $name, $comparable ) ) { + $matches[] = $name; + } + } + + return $matches; + } + return $this->is_virtual() ? null : parent::get_attribute_names_with_prefix( $prefix ); } + /** + * Returns the adjusted attribute name for the currently matched tag. + * + * For virtual/reconstructed elements with stored attributes, returns the + * stored attribute name (already lowercase). Applies foreign attribute + * adjustments for SVG and MathML namespaces as needed. + * + * @since 6.8.0 Subclassed for the HTML Processor. + * + * @param string $attribute_name Attribute name to adjust. + * @return string|null Adjusted attribute name, or `null` if not available. + */ + public function get_qualified_attribute_name( $attribute_name ): ?string { + /* + * For reconstructed elements with virtual attributes, + * the attribute name is already lowercase. Apply foreign + * attribute adjustments if needed. + */ + if ( + isset( $this->current_element ) && + null !== $this->current_element->token->attributes + ) { + $comparable = strtolower( $attribute_name ); + if ( ! array_key_exists( $comparable, $this->current_element->token->attributes ) ) { + return null; + } + + $namespace = $this->get_namespace(); + + // Apply foreign attribute adjustments for MathML. + if ( 'math' === $namespace && 'definitionurl' === $comparable ) { + return 'definitionURL'; + } + + // Apply foreign attribute adjustments for SVG. + if ( 'svg' === $namespace ) { + $svg_adjusted = array( + 'attributename' => 'attributeName', + 'attributetype' => 'attributeType', + 'basefrequency' => 'baseFrequency', + 'baseprofile' => 'baseProfile', + 'calcmode' => 'calcMode', + 'clippathunits' => 'clipPathUnits', + 'diffuseconstant' => 'diffuseConstant', + 'edgemode' => 'edgeMode', + 'filterunits' => 'filterUnits', + 'glyphref' => 'glyphRef', + 'gradienttransform' => 'gradientTransform', + 'gradientunits' => 'gradientUnits', + 'kernelmatrix' => 'kernelMatrix', + 'kernelunitlength' => 'kernelUnitLength', + 'keypoints' => 'keyPoints', + 'keysplines' => 'keySplines', + 'keytimes' => 'keyTimes', + 'lengthadjust' => 'lengthAdjust', + 'limitingconeangle' => 'limitingConeAngle', + 'markerheight' => 'markerHeight', + 'markerunits' => 'markerUnits', + 'markerwidth' => 'markerWidth', + 'maskcontentunits' => 'maskContentUnits', + 'maskunits' => 'maskUnits', + 'numoctaves' => 'numOctaves', + 'pathlength' => 'pathLength', + 'patterncontentunits' => 'patternContentUnits', + 'patterntransform' => 'patternTransform', + 'patternunits' => 'patternUnits', + 'pointsatx' => 'pointsAtX', + 'pointsaty' => 'pointsAtY', + 'pointsatz' => 'pointsAtZ', + 'preservealpha' => 'preserveAlpha', + 'preserveaspectratio' => 'preserveAspectRatio', + 'primitiveunits' => 'primitiveUnits', + 'refx' => 'refX', + 'refy' => 'refY', + 'repeatcount' => 'repeatCount', + 'repeatdur' => 'repeatDur', + 'requiredextensions' => 'requiredExtensions', + 'requiredfeatures' => 'requiredFeatures', + 'specularconstant' => 'specularConstant', + 'specularexponent' => 'specularExponent', + 'spreadmethod' => 'spreadMethod', + 'startoffset' => 'startOffset', + 'stddeviation' => 'stdDeviation', + 'stitchtiles' => 'stitchTiles', + 'surfacescale' => 'surfaceScale', + 'systemlanguage' => 'systemLanguage', + 'tablevalues' => 'tableValues', + 'targetx' => 'targetX', + 'targety' => 'targetY', + 'textlength' => 'textLength', + 'viewbox' => 'viewBox', + 'viewtarget' => 'viewTarget', + 'xchannelselector' => 'xChannelSelector', + 'ychannelselector' => 'yChannelSelector', + 'zoomandpan' => 'zoomAndPan', + ); + + if ( isset( $svg_adjusted[ $comparable ] ) ) { + return $svg_adjusted[ $comparable ]; + } + } + + // Return the lowercase attribute name for HTML namespace. + return $comparable; + } + + return $this->is_virtual() ? null : parent::get_qualified_attribute_name( $attribute_name ); + } + /** * Adds a new class name to the currently matched tag. * @@ -5873,15 +6051,18 @@ private function get_adjusted_current_node(): ?WP_HTML_Token { * @return bool Whether any formatting elements needed to be reconstructed. */ private function reconstruct_active_formatting_elements(): bool { + $active_formatting_elements = $this->state->active_formatting_elements; + $stack_of_open_elements = $this->state->stack_of_open_elements; + /* * > If there are no entries in the list of active formatting elements, then there is nothing * > to reconstruct; stop this algorithm. */ - if ( 0 === $this->state->active_formatting_elements->count() ) { + if ( 0 === $active_formatting_elements->count() ) { return false; } - $last_entry = $this->state->active_formatting_elements->current_node(); + $last_entry = $active_formatting_elements->current_node(); if ( /* @@ -5895,12 +6076,120 @@ private function reconstruct_active_formatting_elements(): bool { * > element that is in the stack of open elements, then there is nothing to reconstruct; * > stop this algorithm. */ - $this->state->stack_of_open_elements->contains_node( $last_entry ) + $stack_of_open_elements->contains_node( $last_entry ) ) { return false; } - $this->bail( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' ); + /* + * > Let entry be the last (most recently added) element in the list of active formatting elements. + */ + $entry_index = $active_formatting_elements->count() - 1; + + /* + * REWIND: Walk backwards to find where reconstruction should start. + * + * > Rewind: If there are no entries before entry in the list of active formatting elements, + * > then jump to the step labeled create. + * > Let entry be the entry one earlier than entry in the list of active formatting elements. + * > If entry is neither a marker nor an element that is also in the stack of open elements, + * > go to the step labeled rewind. + */ + while ( $entry_index > 0 ) { + --$entry_index; + $entry = $active_formatting_elements->get_at( $entry_index ); + + /* + * Stop rewinding if a marker or an element in the stack is found. + */ + if ( + 'marker' === $entry->node_name || + $stack_of_open_elements->contains_node( $entry ) + ) { + /* + * > Advance: Let entry be the element one later than entry in the list of + * > active formatting elements. + */ + ++$entry_index; + break; + } + } + + /* + * ADVANCE and CREATE: Walk forwards, creating and inserting elements. + * + * > Create: Insert an HTML element for the token for which the element entry was created, + * > to obtain new element. + * > Replace the entry for entry in the list with an entry for new element. + * > If the entry for new element in the list of active formatting elements is not the + * > last entry in the list, return to the step labeled advance. + */ + $last_index = $active_formatting_elements->count() - 1; + while ( $entry_index <= $last_index ) { + $entry = $active_formatting_elements->get_at( $entry_index ); + + /* + * Create an element for the token and insert it. + */ + $new_element = $this->create_element_for_formatting_token( $entry ); + $this->insert_html_element( $new_element ); + + /* + * Replace the entry in the list with the newly created element. + */ + $active_formatting_elements->replace_at( $entry_index, $new_element ); + + ++$entry_index; + } + + return true; + } + + /** + * Creates a new element token for reconstructing a formatting element. + * + * This creates a "virtual" element that represents a reconstructed + * formatting element. It uses the same tag name as the original + * but gets a new bookmark pointing to the current position. + * + * @since 6.8.0 + * + * @throws WP_HTML_Unsupported_Exception When the entry has attributes that cannot be cloned. + * + * @param WP_HTML_Token $entry The active formatting element entry. + * @return WP_HTML_Token The newly created element token. + */ + private function create_element_for_formatting_token( WP_HTML_Token $entry ): WP_HTML_Token { + /* + * Create a virtual bookmark for this reconstructed element. + * This follows the same pattern as insert_virtual_node(). + */ + $bookmark_name = $this->bookmark_token(); + + /* + * The bookmark points to the current token's position with zero length, + * indicating this is a virtual element without source HTML. + */ + $here = $this->bookmarks[ $this->state->current_token->bookmark_name ]; + $this->bookmarks[ $bookmark_name ] = new WP_HTML_Span( $here->start, 0 ); + + /* + * Create new token with same tag name as the original. + * Formatting elements are always in the HTML namespace. + */ + $new_token = new WP_HTML_Token( $bookmark_name, $entry->node_name, false ); + $new_token->namespace = 'html'; + + /* + * Clone attributes from the original entry. + * This ensures reconstructed elements have the same attributes + * as the token for which they were created. + */ + if ( null !== $entry->attributes ) { + $new_token->attributes = $entry->attributes; + } + + return $new_token; } /** diff --git a/src/wp-includes/html-api/class-wp-html-token.php b/src/wp-includes/html-api/class-wp-html-token.php index d5e51ac29007f..3e5138c8c352b 100644 --- a/src/wp-includes/html-api/class-wp-html-token.php +++ b/src/wp-includes/html-api/class-wp-html-token.php @@ -85,6 +85,22 @@ class WP_HTML_Token { */ public $on_destroy = null; + /** + * Attributes associated with this token. + * + * For formatting elements in the active formatting elements list, + * this stores the attributes as they were when the element was created. + * Used for reconstruction and Noah's Ark duplicate detection. + * + * Keys are lowercase attribute names, values are decoded strings + * or `true` for boolean attributes. + * + * @since 6.8.0 + * + * @var array|null + */ + public $attributes = null; + /** * Constructor - creates a reference to a token in some external HTML string. * diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 13e0728ca912a..934d14ae9e43d 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -185,18 +185,19 @@ public function test_clear_to_navigate_after_seeking() { } /** - * Ensures that support is added for reconstructing active formatting elements - * before the HTML Processor handles situations with unclosed formats requiring it. + * Ensures that active formatting elements are properly reconstructed across paragraphs. * * @ticket 58517 * * @covers WP_HTML_Processor::reconstruct_active_formatting_elements */ - public function test_fails_to_reconstruct_formatting_elements() { + public function test_reconstructs_active_formatting_elements() { $processor = WP_HTML_Processor::create_fragment( '

One

Two

Three

Four' ); $this->assertTrue( $processor->next_tag( 'EM' ), 'Could not find first EM.' ); - $this->assertFalse( $processor->next_tag( 'EM' ), 'Should have aborted before finding second EM as it required reconstructing the first EM.' ); + $this->assertTrue( $processor->next_tag( 'EM' ), 'Could not find second EM (should be reconstructed from first).' ); + $this->assertTrue( $processor->next_tag( 'EM' ), 'Could not find third EM.' ); + $this->assertTrue( $processor->next_tag( 'EM' ), 'Could not find fourth EM.' ); } /** diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php index 911fa8b910b37..6cb73d0d8024e 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php @@ -195,14 +195,9 @@ public function test_fails_when_encountering_unsupported_markup( $html, $descrip */ public static function data_unsupported_markup() { return array( - 'A with formatting following unclosed A' => array( - 'Click Here', - 'Unclosed formatting requires complicated reconstruction.', - ), - - 'A after unclosed A inside DIV' => array( + 'A after unclosed A inside DIV' => array( '

', - 'A is a formatting element, which requires more complicated reconstruction.', + 'A is a formatting element, which requires adoption agency with furthest block.', ), ); } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorReconstructActiveFormattingElements.php b/tests/phpunit/tests/html-api/wpHtmlProcessorReconstructActiveFormattingElements.php new file mode 100644 index 0000000000000..7dce7dbfc2ff7 --- /dev/null +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorReconstructActiveFormattingElements.php @@ -0,0 +1,588 @@ +` is implicitly closed by the second `

`, it should be + * reconstructed when processing subsequent content in the new paragraph. + * + * @ticket 62357 + * + * @covers WP_HTML_Processor::reconstruct_active_formatting_elements + */ + public function test_reconstructs_single_formatting_element_across_paragraph_boundary() { + $processor = WP_HTML_Processor::create_fragment( '

Bold

Still bold' ); + + $this->assertTrue( + $processor->next_tag( array( 'tag_name' => 'SPAN' ) ), + 'Should have found the target SPAN element.' + ); + + $this->assertSame( + array( 'HTML', 'BODY', 'P', 'B', 'SPAN' ), + $processor->get_breadcrumbs(), + 'The B element should have been reconstructed in the second paragraph.' + ); + } + + /** + * Verifies that multiple formatting elements are reconstructed in order. + * + * When multiple formatting elements are implicitly closed, they should all + * be reconstructed in the same order they were originally opened. + * + * @ticket 62357 + * + * @covers WP_HTML_Processor::reconstruct_active_formatting_elements + */ + public function test_reconstructs_multiple_formatting_elements_in_order() { + $processor = WP_HTML_Processor::create_fragment( '

Bold italic

Still both' ); + + $this->assertTrue( + $processor->next_tag( array( 'tag_name' => 'SPAN' ) ), + 'Should have found the target SPAN element.' + ); + + $this->assertSame( + array( 'HTML', 'BODY', 'P', 'B', 'I', 'SPAN' ), + $processor->get_breadcrumbs(), + 'Both B and I elements should have been reconstructed in order.' + ); + } + + /** + * Verifies that deeply nested formatting elements are properly reconstructed. + * + * @ticket 62357 + * + * @covers WP_HTML_Processor::reconstruct_active_formatting_elements + */ + public function test_reconstructs_deeply_nested_formatting_elements() { + $processor = WP_HTML_Processor::create_fragment( '

Formatted

' ); + + $this->assertTrue( + $processor->next_tag( array( 'tag_name' => 'SPAN' ) ), + 'Should have found the target SPAN element.' + ); + + $this->assertSame( + array( 'HTML', 'BODY', 'P', 'B', 'I', 'U', 'S', 'SPAN' ), + $processor->get_breadcrumbs(), + 'All formatting elements should have been reconstructed.' + ); + } + + /** + * Verifies that reconstruction stops at a scope marker. + * + * When a scope marker (e.g., from a BUTTON element) is present in the + * active formatting elements list, reconstruction should not proceed + * past it. However, elements added after the marker are still active + * and will be reconstructed. + * + * In this test, the B is before the button (added to list), then a marker + * is pushed for the button, then I is added inside. When the button closes, + * the marker is removed. But the I is still in the active formatting list + * (it was never closed), so both B and I get reconstructed. + * + * @ticket 62357 + * + * @covers WP_HTML_Processor::reconstruct_active_formatting_elements + */ + public function test_reconstruction_includes_elements_from_closed_scopes() { + $processor = WP_HTML_Processor::create_fragment( '

Bold

' ); + + $this->assertTrue( + $processor->next_tag( array( 'tag_name' => 'SPAN' ) ), + 'Should have found the target SPAN element.' + ); + + // Both B and I are in active formatting elements and need reconstruction. + $this->assertSame( + array( 'HTML', 'BODY', 'P', 'B', 'I', 'SPAN' ), + $processor->get_breadcrumbs(), + 'Both B and I should be reconstructed; I persisted after button closed.' + ); + } + + /** + * Verifies that no reconstruction occurs when the last entry is already + * in the stack of open elements. + * + * @ticket 62357 + * + * @covers WP_HTML_Processor::reconstruct_active_formatting_elements + */ + public function test_no_reconstruction_when_entry_already_in_stack() { + $processor = WP_HTML_Processor::create_fragment( '

Bold' ); + + $this->assertTrue( + $processor->next_tag( array( 'tag_name' => 'SPAN' ) ), + 'Should have found the target SPAN element.' + ); + + $this->assertSame( + array( 'HTML', 'BODY', 'P', 'B', 'SPAN' ), + $processor->get_breadcrumbs(), + 'B element is already open, no reconstruction needed.' + ); + } + + /** + * Verifies that reconstruction works correctly with multiple paragraphs. + * + * @ticket 62357 + * + * @covers WP_HTML_Processor::reconstruct_active_formatting_elements + */ + public function test_reconstructs_across_multiple_paragraph_boundaries() { + $processor = WP_HTML_Processor::create_fragment( '

One

Two

Three

' ); + + $this->assertTrue( + $processor->next_tag( array( 'tag_name' => 'SPAN' ) ), + 'Should have found the target SPAN element.' + ); + + $this->assertSame( + array( 'HTML', 'BODY', 'P', 'B', 'SPAN' ), + $processor->get_breadcrumbs(), + 'B element should be reconstructed even after multiple paragraph boundaries.' + ); + } + + /** + * Verifies that reconstruction handles the adoption agency algorithm interaction. + * + * When a formatting element is closed by an end tag, it should be removed + * from the active formatting elements and not reconstructed. + * + * @ticket 62357 + * + * @covers WP_HTML_Processor::reconstruct_active_formatting_elements + */ + public function test_closed_formatting_element_not_reconstructed() { + $processor = WP_HTML_Processor::create_fragment( '

Bold

' ); + + $this->assertTrue( + $processor->next_tag( array( 'tag_name' => 'SPAN' ) ), + 'Should have found the target SPAN element.' + ); + + $this->assertSame( + array( 'HTML', 'BODY', 'P', 'SPAN' ), + $processor->get_breadcrumbs(), + 'B element was properly closed and should not be reconstructed.' + ); + } + + /** + * Verifies that reconstruction bails when an element has attributes. + * + * Verifies that attributes are cloned from the original formatting element + * to the reconstructed element. + * + * @ticket 62357 + * + * @covers WP_HTML_Processor::reconstruct_active_formatting_elements + */ + public function test_reconstructed_element_preserves_attributes() { + $processor = WP_HTML_Processor::create_fragment( '

Bold

' ); + + // Navigate past the first paragraph. + $this->assertTrue( $processor->next_tag( 'P' ), 'Failed to find first P.' ); + $this->assertTrue( $processor->next_tag( 'B' ), 'Failed to find original B.' ); + $this->assertSame( 'bold', $processor->get_attribute( 'class' ), 'Original B should have class attribute.' ); + + // Navigate to second paragraph (triggers reconstruction). + $this->assertTrue( $processor->next_tag( 'P' ), 'Failed to find second P.' ); + + // Navigate to the span inside the reconstructed formatting. + $this->assertTrue( $processor->next_tag( 'SPAN' ), 'Failed to find SPAN.' ); + + // Breadcrumbs should show the reconstructed B. + $this->assertSame( + array( 'HTML', 'BODY', 'P', 'B', 'SPAN' ), + $processor->get_breadcrumbs(), + 'Breadcrumbs should include reconstructed B.' + ); + } + + /** + * Verifies that elements opened in a previous paragraph are properly + * reconstructed when text nodes are encountered. + * + * @ticket 62357 + * + * @covers WP_HTML_Processor::reconstruct_active_formatting_elements + */ + public function test_reconstructs_on_text_node() { + $processor = WP_HTML_Processor::create_fragment( '

Bold

Text here' ); + + // Move through the tokens to find the text node in the second paragraph. + while ( $processor->next_token() ) { + if ( '#text' === $processor->get_token_type() && 'Text here' === $processor->get_modifiable_text() ) { + break; + } + } + + $this->assertSame( + array( 'HTML', 'BODY', 'P', 'B', '#text' ), + $processor->get_breadcrumbs(), + 'B element should be reconstructed before the text node.' + ); + } + + /** + * Verifies reconstruction with interleaved block and formatting elements. + * + * When a formatting element is opened before block elements, the HTML5 + * parsing algorithm places it in the DOM at its original location. + * The `` is a direct child of BODY, and the DIV is a sibling to B. + * When entering the P, the B is still in active formatting and gets + * reconstructed inside the P. + * + * @ticket 62357 + * + * @covers WP_HTML_Processor::reconstruct_active_formatting_elements + */ + public function test_reconstructs_with_interleaved_elements() { + $processor = WP_HTML_Processor::create_fragment( 'Bold

In div' ); + + $this->assertTrue( + $processor->next_tag( array( 'tag_name' => 'SPAN' ) ), + 'Should have found the target SPAN element.' + ); + + // The B starts as sibling of DIV, then gets reconstructed inside P. + $this->assertSame( + array( 'HTML', 'BODY', 'B', 'DIV', 'P', 'SPAN' ), + $processor->get_breadcrumbs(), + 'B element should remain in its original position in the tree.' + ); + } + + /** + * Verifies that the algorithm handles empty active formatting elements list. + * + * @ticket 62357 + * + * @covers WP_HTML_Processor::reconstruct_active_formatting_elements + */ + public function test_handles_empty_active_formatting_elements() { + $processor = WP_HTML_Processor::create_fragment( '

No formatting

' ); + + $this->assertTrue( + $processor->next_tag( array( 'tag_name' => 'SPAN' ) ), + 'Should have found the target SPAN element.' + ); + + $this->assertSame( + array( 'HTML', 'BODY', 'P', 'SPAN' ), + $processor->get_breadcrumbs(), + 'No formatting elements to reconstruct.' + ); + } + + /** + * Verifies proper breadcrumbs when visiting reconstructed elements via step(). + * + * @ticket 62357 + * + * @covers WP_HTML_Processor::reconstruct_active_formatting_elements + */ + public function test_breadcrumbs_correct_during_stepping() { + $processor = WP_HTML_Processor::create_fragment( '

First

Second' ); + + // Find the text "Second" which triggers reconstruction. + while ( $processor->next_token() ) { + if ( '#text' === $processor->get_token_type() && 'Second' === $processor->get_modifiable_text() ) { + break; + } + } + + $this->assertSame( + array( 'HTML', 'BODY', 'P', 'EM', '#text' ), + $processor->get_breadcrumbs(), + 'Breadcrumbs should show reconstructed EM element.' + ); + } + + /** + * Verifies that get_attribute() returns the correct value for reconstructed elements. + * + * @ticket 62357 + * + * @covers WP_HTML_Processor::get_attribute + */ + public function test_get_attribute_works_for_reconstructed_element() { + $processor = WP_HTML_Processor::create_fragment( '

text

more' ); + + // Navigate past the first paragraph. + $this->assertTrue( $processor->next_tag( 'P' ), 'Failed to find first P.' ); + $this->assertTrue( $processor->next_tag( 'B' ), 'Failed to find original B.' ); + $this->assertSame( 'bold', $processor->get_attribute( 'class' ), 'Original B should have class attribute.' ); + + // Navigate to second paragraph (triggers reconstruction). + $this->assertTrue( $processor->next_tag( 'P' ), 'Failed to find second P.' ); + + // Find the reconstructed B and verify its attribute. + $this->assertTrue( $processor->next_tag( 'B' ), 'Failed to find reconstructed B.' ); + $this->assertSame( + array( 'HTML', 'BODY', 'P', 'B' ), + $processor->get_breadcrumbs(), + 'Should be inside the second P with reconstructed B.' + ); + $this->assertSame( 'bold', $processor->get_attribute( 'class' ), 'Reconstructed B should have class attribute.' ); + $this->assertNull( $processor->get_attribute( 'nonexistent' ), 'Nonexistent attribute should return null.' ); + } + + /** + * Verifies that get_attribute() returns correct values for reconstructed elements with multiple attributes. + * + * @ticket 62357 + * + * @covers WP_HTML_Processor::get_attribute + */ + public function test_get_attribute_works_for_reconstructed_element_with_multiple_attributes() { + $processor = WP_HTML_Processor::create_fragment( '

text

more' ); + + // Navigate past the first paragraph. + $processor->next_tag( 'P' ); + $processor->next_tag( 'FONT' ); + + // Navigate to second paragraph (triggers reconstruction). + $processor->next_tag( 'P' ); + + // Find the reconstructed FONT and verify its attributes. + $this->assertTrue( $processor->next_tag( 'FONT' ), 'Failed to find reconstructed FONT.' ); + $this->assertSame( '4', $processor->get_attribute( 'size' ), 'Reconstructed FONT should have size attribute.' ); + $this->assertSame( 'red', $processor->get_attribute( 'color' ), 'Reconstructed FONT should have color attribute.' ); + } + + /** + * Verifies that get_attribute_names_with_prefix() returns correct values for reconstructed elements. + * + * @ticket 62357 + * + * @covers WP_HTML_Processor::get_attribute_names_with_prefix + */ + public function test_get_attribute_names_with_prefix_works_for_reconstructed_element() { + $processor = WP_HTML_Processor::create_fragment( '

text

more' ); + + // Navigate past the first paragraph. + $processor->next_tag( 'P' ); + $processor->next_tag( 'B' ); + + // Navigate to second paragraph (triggers reconstruction). + $processor->next_tag( 'P' ); + + // Find the reconstructed B and verify its attribute names. + $this->assertTrue( $processor->next_tag( 'B' ), 'Failed to find reconstructed B.' ); + + // All attributes (empty prefix). + $all_attributes = $processor->get_attribute_names_with_prefix( '' ); + $this->assertIsArray( $all_attributes, 'Should return array of attribute names.' ); + $this->assertCount( 3, $all_attributes, 'Should have 3 attributes.' ); + $this->assertContains( 'id', $all_attributes, 'Should contain id attribute.' ); + $this->assertContains( 'class', $all_attributes, 'Should contain class attribute.' ); + $this->assertContains( 'data-test', $all_attributes, 'Should contain data-test attribute.' ); + + // Prefix filter. + $data_attributes = $processor->get_attribute_names_with_prefix( 'data-' ); + $this->assertIsArray( $data_attributes, 'Should return array for data- prefix.' ); + $this->assertCount( 1, $data_attributes, 'Should have 1 data- attribute.' ); + $this->assertContains( 'data-test', $data_attributes, 'Should contain data-test attribute.' ); + + // Non-matching prefix. + $aria_attributes = $processor->get_attribute_names_with_prefix( 'aria-' ); + $this->assertIsArray( $aria_attributes, 'Should return array for aria- prefix.' ); + $this->assertCount( 0, $aria_attributes, 'Should have 0 aria- attributes.' ); + } + + /** + * Verifies that get_qualified_attribute_name() returns correct values for reconstructed elements. + * + * @ticket 62357 + * + * @covers WP_HTML_Processor::get_qualified_attribute_name + */ + public function test_get_qualified_attribute_name_works_for_reconstructed_element() { + $processor = WP_HTML_Processor::create_fragment( '

text

more' ); + + // Navigate past the first paragraph. + $processor->next_tag( 'P' ); + $processor->next_tag( 'B' ); + + // Navigate to second paragraph (triggers reconstruction). + $processor->next_tag( 'P' ); + + // Find the reconstructed B and verify its qualified attribute names. + $this->assertTrue( $processor->next_tag( 'B' ), 'Failed to find reconstructed B.' ); + + // Attribute names should be lowercase. + $this->assertSame( 'id', $processor->get_qualified_attribute_name( 'id' ), 'Should return lowercase attribute name.' ); + $this->assertSame( 'class', $processor->get_qualified_attribute_name( 'class' ), 'Should return lowercase attribute name.' ); + $this->assertSame( 'data-test', $processor->get_qualified_attribute_name( 'DATA-TEST' ), 'Should return lowercase attribute name.' ); + + // Non-existent attribute should return null. + $this->assertNull( $processor->get_qualified_attribute_name( 'nonexistent' ), 'Non-existent attribute should return null.' ); + } + + /** + * Verifies that Noah's Ark clause limits identical elements to 3. + * + * When more than 3 identical formatting elements are pushed to the active + * formatting elements list, the earliest duplicate should be removed. + * + * @ticket 62357 + * + * @covers WP_HTML_Active_Formatting_Elements::push + */ + public function test_noahs_ark_limits_identical_elements_to_three() { + // Four identical tags, only 3 should be reconstructed. + $processor = WP_HTML_Processor::create_fragment( '

' ); + + $this->assertTrue( + $processor->next_tag( array( 'tag_name' => 'SPAN' ) ), + 'Should have found the target SPAN element.' + ); + + // Breadcrumbs should show only 3 B elements reconstructed. + $breadcrumbs = $processor->get_breadcrumbs(); + $b_count = count( array_filter( $breadcrumbs, fn( $tag ) => 'B' === $tag ) ); + + $this->assertSame( 3, $b_count, "Noah's Ark should limit to 3 identical formatting elements." ); + } + + /** + * Verifies that elements with different attributes are not considered identical. + * + * The Noah's Ark clause only removes duplicate elements with the same + * tag name, namespace, and attributes. Elements with different attributes + * should all be preserved. + * + * @ticket 62357 + * + * @covers WP_HTML_Active_Formatting_Elements::push + */ + public function test_noahs_ark_different_attributes_are_different_elements() { + // Four elements with different classes - all should be reconstructed. + $processor = WP_HTML_Processor::create_fragment( + '

' + ); + + $this->assertTrue( + $processor->next_tag( array( 'tag_name' => 'SPAN' ) ), + 'Should have found the target SPAN element.' + ); + + // All 4 should be reconstructed since they have different attributes. + $breadcrumbs = $processor->get_breadcrumbs(); + $b_count = count( array_filter( $breadcrumbs, fn( $tag ) => 'B' === $tag ) ); + + $this->assertSame( 4, $b_count, 'Elements with different attributes should all be reconstructed.' ); + } + + /** + * Verifies that Noah's Ark respects markers in the active formatting elements list. + * + * When a marker is present (while inside BUTTON, TD, etc.), Noah's Ark only + * considers elements after the last marker. This test verifies the behavior + * by having identical elements both inside and outside a scoped element. + * + * Note: When the button closes, the marker is removed via clear_up_to_last_marker(), + * so after the button, all elements are considered together again. + * + * @ticket 62357 + * + * @covers WP_HTML_Active_Formatting_Elements::push + */ + public function test_noahs_ark_respects_markers() { + // Two elements inside a BUTTON (marker separates them during push). + // Inside the button, only those 2 count toward Noah's Ark limit. + // Then 2 more after the button. After button closes, marker is gone, + // so all 4 identical B elements are counted, and Noah's Ark reduces to 3. + $processor = WP_HTML_Processor::create_fragment( + '

' + ); + + $this->assertTrue( + $processor->next_tag( array( 'tag_name' => 'SPAN' ) ), + 'Should have found the target SPAN element.' + ); + + // After button closes, marker is removed, so Noah's Ark sees all 4 identical B elements. + // It removes the earliest, leaving 3. + $breadcrumbs = $processor->get_breadcrumbs(); + $b_count = count( array_filter( $breadcrumbs, fn( $tag ) => 'B' === $tag ) ); + + $this->assertSame( 3, $b_count, "After button closes, marker is removed, so Noah's Ark limits all identical elements to 3." ); + } + + /** + * Verifies that attribute order does not affect Noah's Ark comparison. + * + * Two elements with the same attributes in different order should be + * considered identical for Noah's Ark purposes. + * + * @ticket 62357 + * + * @covers WP_HTML_Active_Formatting_Elements::push + */ + public function test_noahs_ark_attribute_order_independent() { + // Four elements with same attributes but different order - should be limited to 3. + $processor = WP_HTML_Processor::create_fragment( + '

' + ); + + $this->assertTrue( + $processor->next_tag( array( 'tag_name' => 'SPAN' ) ), + 'Should have found the target SPAN element.' + ); + + // Only 3 should be reconstructed since they are identical. + $breadcrumbs = $processor->get_breadcrumbs(); + $b_count = count( array_filter( $breadcrumbs, fn( $tag ) => 'B' === $tag ) ); + + $this->assertSame( 3, $b_count, 'Same attributes in different order should be considered identical.' ); + } + + /** + * Verifies that different attribute values make elements non-identical. + * + * @ticket 62357 + * + * @covers WP_HTML_Active_Formatting_Elements::push + */ + public function test_noahs_ark_different_attribute_values_are_different_elements() { + // Four elements with same attribute name but different values. + $processor = WP_HTML_Processor::create_fragment( + '

' + ); + + $this->assertTrue( + $processor->next_tag( array( 'tag_name' => 'SPAN' ) ), + 'Should have found the target SPAN element.' + ); + + // All 4 should be reconstructed since they have different attribute values. + $breadcrumbs = $processor->get_breadcrumbs(); + $b_count = count( array_filter( $breadcrumbs, fn( $tag ) => 'B' === $tag ) ); + + $this->assertSame( 4, $b_count, 'Elements with different attribute values should all be reconstructed.' ); + } +}