From ed4f11dde6479e01a88ade5df1eb1e4f84ca89bd Mon Sep 17 00:00:00 2001 From: Anton Timmermans Date: Wed, 4 Dec 2019 12:54:33 +0100 Subject: [PATCH 1/2] Account for comment length in formatting offsets Formatting after comments wouldn't take the comment length into account for their position within the text container. This fixes that so comments are taken into account for the current offset, which will result in correct offsets on the formatting elements. --- .../tree/cleanup/calculateTextIndicesSpec.js | 2 -- .../tree/cleanup/calculateTextIndices.js | 24 ++++++++++++------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/packages/yoastseo/spec/parsedPaper/build/tree/cleanup/calculateTextIndicesSpec.js b/packages/yoastseo/spec/parsedPaper/build/tree/cleanup/calculateTextIndicesSpec.js index 55309580e3..b8d9fcedee 100644 --- a/packages/yoastseo/spec/parsedPaper/build/tree/cleanup/calculateTextIndicesSpec.js +++ b/packages/yoastseo/spec/parsedPaper/build/tree/cleanup/calculateTextIndicesSpec.js @@ -193,9 +193,7 @@ describe( "calculateTextIndices", () => { expect( formattingElement.textStartIndex ).toEqual( 10 ); expect( formattingElement.textEndIndex ).toEqual( 10 ); } ); -} ); -describe.skip( "These tests are currently broken, will be fixed in https://github.com/Yoast/javascript/issues/409", () => { it( "correctly processes comments before another tag", () => { const source = "

This is a paragraph

"; diff --git a/packages/yoastseo/src/parsedPaper/build/tree/cleanup/calculateTextIndices.js b/packages/yoastseo/src/parsedPaper/build/tree/cleanup/calculateTextIndices.js index d7bab93f29..0c72baa722 100644 --- a/packages/yoastseo/src/parsedPaper/build/tree/cleanup/calculateTextIndices.js +++ b/packages/yoastseo/src/parsedPaper/build/tree/cleanup/calculateTextIndices.js @@ -88,17 +88,23 @@ const handleIgnoredContent = function( element, html, currentOffset ) { /** * Sets the start and end text positions of a comment. * - * @param {module:parsedPaper/structure.FormattingElement} element The formatting element to assign start and end text positions to. + * @param {module:parsedPaper/structure.FormattingElement} comment The formatting element to assign start and end text positions to. * @param {int} currentOffset A sum of all characters in the source code that don't get rendered * (e.g., tags, comments). * - * @returns {void} + * @returns {number} The new current offset after taking this comment into account. * * @private */ -const computeCommentStartEndTextIndices = function( element, currentOffset ) { - element.textStartIndex = element.location.startOffset - currentOffset; - element.textEndIndex = element.textStartIndex; +const computeCommentStartEndTextIndices = function( comment, currentOffset ) { + comment.textStartIndex = comment.location.startOffset - currentOffset; + comment.textEndIndex = comment.textStartIndex; + + const length = comment.location.endOffset - comment.location.startOffset; + + const newOffset = currentOffset + length; + + return newOffset; }; /** @@ -165,7 +171,7 @@ const calculateTextIndices = function( node, html ) { // Comments are self-closing formatting elements that are completely ignored in rendering. if ( element.tag === "comment" ) { - computeCommentStartEndTextIndices( element, currentOffset ); + currentOffset = computeCommentStartEndTextIndices( element, currentOffset ); return; } @@ -177,9 +183,9 @@ const calculateTextIndices = function( node, html ) { } /* - If this element is an ignored element its contents are not in the text, - so its content should be added to the respective formatting element instead, - and the current offset should be updated. + * If this element is an ignored element its contents are not in the text, + * so its content should be added to the respective formatting element instead, + * and the current offset should be updated. */ if ( ignoredHtmlElements.includes( element.tag ) ) { currentOffset = handleIgnoredContent( element, html, currentOffset ); From 3342ab6860303d89b25ba686c86bfce497acf78a Mon Sep 17 00:00:00 2001 From: Anton Timmermans Date: Thu, 5 Dec 2019 10:43:54 +0100 Subject: [PATCH 2/2] Handle cases with nested comments under formatting When a comment would be nested within a piece of formatting our custom tree adapter would not return the correct children. This resulted in the location information not being present on the resulting tree. That crashed our later functions that relied on the location information being there. --- .../build/tree/html/TreeAdapter.js | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/src/parsedPaper/build/tree/html/TreeAdapter.js b/packages/yoastseo/src/parsedPaper/build/tree/html/TreeAdapter.js index a7afe1f699..5c3a72cb74 100644 --- a/packages/yoastseo/src/parsedPaper/build/tree/html/TreeAdapter.js +++ b/packages/yoastseo/src/parsedPaper/build/tree/html/TreeAdapter.js @@ -434,10 +434,35 @@ class TreeAdapter { } /* - Some node types do not have children (like Paragraph and Heading), - but parse5 always expects a node to have children. + * Some node types do not have children (like Paragraph and Heading), + * but parse5 always expects a node to have children. */ - return node.children || []; + let children = node.children || []; + + /* + * This code is because of the following scenario: + * + * ```html + *

This is a paragraph

+ * ``` + * + * In that case parse5 assumes that the comment is a child of the , + * but that will not happen because of our TreeAdapter. That's why we + * need to return the formatting elements after the `node` if we + * determine that the node is a formatting element. + */ + if ( children.length === 0 && node instanceof FormattingElement ) { + const formattingElement = node; + + const container = formattingElement.parent; + const parentFormatting = container.textContainer.formatting; + + const elementIndex = parentFormatting.indexOf( formattingElement ); + + children = parentFormatting.slice( elementIndex + 1 ); + } + + return children; } /**