From 82bb65ca298127cd511cf5b3a3c59355cb8be8dd Mon Sep 17 00:00:00 2001 From: azerr Date: Tue, 3 Feb 2026 21:01:34 +0100 Subject: [PATCH 1/2] Incremental Parser Signed-off-by: azerr --- .../java/org/eclipse/lemminx/dom/DOMNode.java | 2 +- .../lemminx/dom/IncrementalDOMParser.java | 292 ++++++++++++++++++ .../lemminx/dom/IncrementalDOMParserTest.java | 109 +++++++ 3 files changed, 402 insertions(+), 1 deletion(-) create mode 100644 org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/IncrementalDOMParser.java create mode 100644 org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserTest.java diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMNode.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMNode.java index 504c18033..7bbb22cb2 100644 --- a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMNode.java +++ b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMNode.java @@ -65,7 +65,7 @@ public abstract class DOMNode implements Node, DOMRange { private XMLNamedNodeMap attributeNodes; private XMLNodeList children; - final int start; // | + int start; // | int end; // | DOMNode parent; diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/IncrementalDOMParser.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/IncrementalDOMParser.java new file mode 100644 index 000000000..c9fa75f30 --- /dev/null +++ b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/IncrementalDOMParser.java @@ -0,0 +1,292 @@ +package org.eclipse.lemminx.dom; + +import java.util.List; + +import org.eclipse.lemminx.commons.BadLocationException; +import org.eclipse.lemminx.commons.TextDocument; +import org.eclipse.lsp4j.TextDocumentContentChangeEvent; + +public class IncrementalDOMParser { + + public DOMDocument parseIncremental(DOMDocument oldDocument, List changes) { + + DOMDocument document = oldDocument; + for (TextDocumentContentChangeEvent event : changes) { + document = parseIncremental(document, event); + } + return document; + } + + private DOMDocument parseIncremental(DOMDocument document, TextDocumentContentChangeEvent event) { + try { + if (event.getRange() == null) { + return parseFull(document.getTextDocument()); + } + + int changeOffset = document.offsetAt(event.getRange().getStart()); + + int oldLength; + if (event.getRangeLength() != null) { + oldLength = event.getRangeLength(); + } else { + int endOffset = document.offsetAt(event.getRange().getEnd()); + oldLength = endOffset - changeOffset; + } + + int newLength = event.getText() != null ? event.getText().length() : 0; + + TextDocument textDoc = document.getTextDocument(); + + return parseIncremental(document, textDoc, changeOffset, oldLength, newLength); + + } catch (BadLocationException e) { + return parseFull(document.getTextDocument()); + } + } + + public DOMDocument parseIncremental(DOMDocument oldDocument, TextDocument newTextDocument, int changeOffset, + int oldLength, int newLength) { + + if (oldDocument == null) { + return parseFull(newTextDocument); + } + + DOMNode affectedNode = oldDocument.findNodeAt(changeOffset); + if (affectedNode == null) { + return parseFull(newTextDocument); + } + + DOMNode adjusteddNode = DOMNode.findTextAt(affectedNode, changeOffset); + if (adjusteddNode == null) { + adjusteddNode = DOMNode.findAttrAt(affectedNode, changeOffset); + } + if (adjusteddNode != null) { + affectedNode = adjusteddNode; + } + + int delta = newLength - oldLength; + + // Text node + if (affectedNode.isText()) { + return reparseTextNode(oldDocument, affectedNode, delta, newTextDocument); + } + + // Element + if (affectedNode.isElement()) { + DOMElement element = (DOMElement) affectedNode; + + // Check if in attribute + if (element.hasAttributes()) { + for (DOMAttr attr : element.getAttributeNodes()) { + if (changeOffset >= attr.getStart() && changeOffset + oldLength <= attr.getEnd()) { + return reparseAttribute(oldDocument, element, attr, delta, newTextDocument); + } + } + } + + // Check if in element content + if (element.getStartTagCloseOffset() != DOMNode.NULL_VALUE + && element.getEndTagOpenOffset() != DOMNode.NULL_VALUE + && changeOffset > element.getStartTagCloseOffset() + && changeOffset + oldLength < element.getEndTagOpenOffset()) { + + String newText = newTextDocument.getText(); + int safeEnd = Math.min(changeOffset + newLength, newText.length()); + String changedText = newText.substring(changeOffset, safeEnd); + + if (!changedText.contains("<") && !changedText.contains(">")) { + return reparseElementContent(oldDocument, element, delta, newTextDocument); + } + } + } + + return parseFull(newTextDocument); + } + + private DOMDocument reparseTextNode(DOMDocument oldDoc, DOMNode textNode, int delta, TextDocument newTextDoc) { + + // 1. Update the text node + textNode.end += delta; + + // 2. Adjust PARENT offsets + adjustParentOffsets(textNode, delta); + + // 3. Adjust following nodes + shiftOffsetsAfter(textNode, delta); + + return createUpdatedDocument(oldDoc, newTextDoc); + } + + private DOMDocument reparseAttribute(DOMDocument oldDoc, DOMElement element, DOMAttr attr, int delta, + TextDocument newTextDoc) { + + attr.end += delta; + /* + * if (attr.nodeAttrValueEnd != null) { attr.nodeAttrValueEnd += delta; } + */ + + if (element.startTagCloseOffset != DOMNode.NULL_VALUE) { + element.startTagCloseOffset += delta; + } + if (element.endTagOpenOffset != DOMNode.NULL_VALUE) { + element.endTagOpenOffset += delta; + } + if (element.endTagCloseOffset != DOMNode.NULL_VALUE) { + element.endTagCloseOffset += delta; + } + element.end += delta; + + // Adjust children offsets (all children are after the attribute) + if (element.hasChildNodes()) { + for (DOMNode child : element.getChildren()) { + adjustNodeOffsets(child, delta); + } + } + + // Adjust parent offsets + adjustParentOffsets(element, delta); + + // Adjust following nodes + shiftOffsetsAfter(element, delta); + + return createUpdatedDocument(oldDoc, newTextDoc); + } + + private DOMDocument reparseElementContent(DOMDocument oldDoc, DOMElement element, int delta, + TextDocument newTextDoc) { + + // Update element end offsets + if (element.endTagOpenOffset != DOMNode.NULL_VALUE) { + element.endTagOpenOffset += delta; + } + if (element.endTagCloseOffset != DOMNode.NULL_VALUE) { + element.endTagCloseOffset += delta; + } + element.end += delta; + + // Adjust children offsets (they are inside the element content) + if (element.hasChildNodes()) { + for (DOMNode child : element.getChildren()) { + adjustNodeOffsets(child, delta); + } + } + + // Adjust parent offsets + adjustParentOffsets(element, delta); + + // Adjust following nodes + shiftOffsetsAfter(element, delta); + + return createUpdatedDocument(oldDoc, newTextDoc); + } + + /** + * Ajuste les offsets de tous les parents du nœud + */ + private void adjustParentOffsets(DOMNode node, int delta) { + DOMNode parent = node.getParentNode(); + + while (parent != null && !(parent instanceof DOMDocument)) { + // Adjust parent end offset + parent.end += delta; + + // If element, adjust tag offsets + if (parent instanceof DOMElement) { + DOMElement element = (DOMElement) parent; + + if (element.endTagOpenOffset != DOMNode.NULL_VALUE) { + element.endTagOpenOffset += delta; + } + if (element.endTagCloseOffset != DOMNode.NULL_VALUE) { + element.endTagCloseOffset += delta; + } + } + + parent = parent.getParentNode(); + } + } + + private DOMDocument createUpdatedDocument(DOMDocument oldDoc, TextDocument newTextDoc) { + DOMDocument newDoc = new DOMDocument(newTextDoc, oldDoc.getResolverExtensionManager()); + newDoc.setCancelChecker(oldDoc.getCancelChecker()); + + for (DOMNode child : oldDoc.getChildren()) { + newDoc.addChild(child); + } + + return newDoc; + } + + private void shiftOffsetsAfter(DOMNode node, int delta) { + if (delta == 0) + return; + + DOMNode next = getNextNodeInDocumentOrder(node); + while (next != null) { + adjustNodeOffsets(next, delta); + next = getNextNodeInDocumentOrder(next); + } + } + + private void adjustNodeOffsets(DOMNode node, int delta) { + node.start += delta; + node.end += delta; + + if (node instanceof DOMElement) { + DOMElement e = (DOMElement) node; + if (e.startTagOpenOffset != DOMNode.NULL_VALUE) + e.startTagOpenOffset += delta; + if (e.startTagCloseOffset != DOMNode.NULL_VALUE) + e.startTagCloseOffset += delta; + if (e.endTagOpenOffset != DOMNode.NULL_VALUE) + e.endTagOpenOffset += delta; + if (e.endTagCloseOffset != DOMNode.NULL_VALUE) + e.endTagCloseOffset += delta; + + if (e.hasAttributes()) { + for (DOMAttr attr : e.getAttributeNodes()) { + adjustAttrOffsets(attr, delta); + } + } + } + + // Recursively adjust children + if (node.hasChildNodes()) { + for (DOMNode child : node.getChildren()) { + adjustNodeOffsets(child, delta); + } + } + } + + private void adjustAttrOffsets(DOMAttr attr, int delta) { + attr.start += delta; + attr.end += delta; + + /* + * if (attr.nodeAttrNameStart != null) attr.nodeAttrNameStart += delta; if + * (attr.nodeAttrNameEnd != null) attr.nodeAttrNameEnd += delta; if + * (attr.nodeAttrValueStart != null) attr.nodeAttrValueStart += delta; if + * (attr.nodeAttrValueEnd != null) attr.nodeAttrValueEnd += delta; if + * (attr.delimiterAssign != null) attr.delimiterAssign += delta; + */ + } + + private DOMNode getNextNodeInDocumentOrder(DOMNode node) { + DOMNode next = node.getNextSibling(); + if (next != null) + return next; + + DOMNode parent = node.getParentNode(); + while (parent != null) { + next = parent.getNextSibling(); + if (next != null) + return next; + parent = parent.getParentNode(); + } + return null; + } + + private DOMDocument parseFull(TextDocument doc) { + return DOMParser.getInstance().parse(doc, null); + } +} \ No newline at end of file diff --git a/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserTest.java b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserTest.java new file mode 100644 index 000000000..722cec91e --- /dev/null +++ b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserTest.java @@ -0,0 +1,109 @@ +package org.eclipse.lemminx.dom; + +import static org.eclipse.lemminx.XMLAssert.r; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.Collections; +import java.util.List; + +import org.eclipse.lsp4j.TextDocumentContentChangeEvent; +import org.junit.jupiter.api.Test; + +public class IncrementalDOMParserTest { + + @Test + public void replaceText() { + // Load XML + DOMDocument document = DOMParser.getInstance().parse("
\r\n" + // + " A\r\n" + // + " B\r\n" + // + "
", "test.xml", null); + document.getTextDocument().setIncremental(true); + + IncrementalDOMParser parser = new IncrementalDOMParser(); + + // Replace A with B + List changes = e(1, 10, 11, "C"); + document.getTextDocument().update(changes); + document = parser.parseIncremental(document, changes); + + assertIncrementalParser(document); + } + + @Test + public void insertSimpleText() { + // Load XML + DOMDocument document = DOMParser.getInstance().parse("
\r\n" + // + " A\r\n" + // + " B\r\n" + // + "
", "test.xml", null); + document.getTextDocument().setIncremental(true); + + IncrementalDOMParser parser = new IncrementalDOMParser(); + + // Insert C after A + List changes = e(1, 11, 11, "C"); + document.getTextDocument().update(changes); + document = parser.parseIncremental(document, changes); + + assertIncrementalParser(document); + } + + @Test + public void insertNewLine() { + // Load XML + DOMDocument document = DOMParser.getInstance().parse("
\r\n" + // + " A\r\n" + // + " B\r\n" + // + "
", "test.xml", null); + document.getTextDocument().setIncremental(true); + + IncrementalDOMParser parser = new IncrementalDOMParser(); + + // Insert 2 lines after B + List changes = e(2, 11, 11, "\r\n\r\n \r\n "); + document.getTextDocument().update(changes); + document = parser.parseIncremental(document, changes); + + assertIncrementalParser(document); + } + + @Test + public void removeText() { + // Load XML + DOMDocument document = DOMParser.getInstance().parse("
\r\n" + // + " AC\r\n" + // + " B\r\n" + // + "
", "test.xml", null); + document.getTextDocument().setIncremental(true); + + IncrementalDOMParser parser = new IncrementalDOMParser(); + + // Insert C after A + List changes = e(1, 11, 12, ""); + document.getTextDocument().update(changes); + document = parser.parseIncremental(document, changes); + + assertIncrementalParser(document); + } + + private static void assertIncrementalParser(DOMDocument incrementalDocument) { + String xml = incrementalDocument.getText(); + System.err.println(xml); + DOMDocument document = DOMParser.getInstance().parse(xml, "test.xml", null); + + // Compare the string representation of both documents + assertEquals(document.toString(), incrementalDocument.toString()); + } + + private static List e(int line, int startCharacter, int endCharacter, String text) { + return e(line, startCharacter, line, endCharacter, text); + } + + private static List e(int startLine, int startCharacter, int endLine, + int endCharacter, String text) { + TextDocumentContentChangeEvent event = new TextDocumentContentChangeEvent( + r(startLine, startCharacter, endLine, endCharacter), text); + return Collections.singletonList(event); + } +} \ No newline at end of file From ec1de669377162f6a5bafc7155a36077cddd0607 Mon Sep 17 00:00:00 2001 From: azerr Date: Wed, 4 Feb 2026 11:38:02 +0100 Subject: [PATCH 2/2] Experimental incremental DOM parser Signed-off-by: azerr --- org.eclipse.lemminx/pom.xml | 12 +- .../eclipse/lemminx/XMLLanguageServer.java | 4 + .../lemminx/XMLTextDocumentService.java | 44 +- .../lemminx/commons/ModelTextDocument.java | 44 +- .../lemminx/commons/ModelTextDocuments.java | 24 +- .../eclipse/lemminx/commons/ModelUpdater.java | 9 + .../eclipse/lemminx/commons/TextDocument.java | 20 +- .../lemminx/commons/TextDocumentChange.java | 88 ++ .../lemminx/commons/TreeLineTracker.java | 2 +- .../java/org/eclipse/lemminx/dom/DOMAttr.java | 23 +- .../eclipse/lemminx/dom/DOMCharacterData.java | 17 +- .../java/org/eclipse/lemminx/dom/DOMNode.java | 102 +- .../org/eclipse/lemminx/dom/DOMParser.java | 1084 ++++++++++------- .../lemminx/dom/IncrementalDOMParser.java | 789 ++++++++++-- .../IncrementalDOMParserTestGenerator.java | 319 +++++ .../settings/XMLGeneralClientSettings.java | 29 +- .../XMLIncrementalParserSettings.java | 20 + .../lemminx/XMLIncrementalParserAssert.java | 52 + .../IncrementalDOMParserAttributeTest.java | 72 ++ .../IncrementalDOMParserEdgeCasesTest.java | 163 +++ .../dom/IncrementalDOMParserFallbackTest.java | 71 ++ ...entalDOMParserPerformanceImprovedTest.java | 249 ++++ .../IncrementalDOMParserPerformanceTest.java | 182 +++ .../IncrementalDOMParserStructureTest.java | 119 ++ .../dom/IncrementalDOMParserSubtreeTest.java | 179 +++ .../lemminx/dom/IncrementalDOMParserTest.java | 109 -- .../IncrementalDOMParserTextChangeTest.java | 156 +++ 27 files changed, 3267 insertions(+), 715 deletions(-) create mode 100644 org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/ModelUpdater.java create mode 100644 org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/TextDocumentChange.java create mode 100644 org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/IncrementalDOMParserTestGenerator.java create mode 100644 org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/settings/XMLIncrementalParserSettings.java create mode 100644 org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/XMLIncrementalParserAssert.java create mode 100644 org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserAttributeTest.java create mode 100644 org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserEdgeCasesTest.java create mode 100644 org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserFallbackTest.java create mode 100644 org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserPerformanceImprovedTest.java create mode 100644 org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserPerformanceTest.java create mode 100644 org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserStructureTest.java create mode 100644 org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserSubtreeTest.java delete mode 100644 org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserTest.java create mode 100644 org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserTextChangeTest.java diff --git a/org.eclipse.lemminx/pom.xml b/org.eclipse.lemminx/pom.xml index dcf27beee..96ae84b43 100644 --- a/org.eclipse.lemminx/pom.xml +++ b/org.eclipse.lemminx/pom.xml @@ -227,12 +227,14 @@ xerces xercesImpl 2.12.2 + + + xml-apis + xml-apis + + - - xml-apis - xml-apis - 1.4.01 - + com.kotcrab.remark remark diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/XMLLanguageServer.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/XMLLanguageServer.java index 20c18d643..08d970f6f 100644 --- a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/XMLLanguageServer.java +++ b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/XMLLanguageServer.java @@ -50,6 +50,7 @@ import org.eclipse.lemminx.settings.XMLFoldingSettings; import org.eclipse.lemminx.settings.XMLFormattingOptions; import org.eclipse.lemminx.settings.XMLGeneralClientSettings; +import org.eclipse.lemminx.settings.XMLIncrementalParserSettings; import org.eclipse.lemminx.settings.XMLPreferences; import org.eclipse.lemminx.settings.XMLSymbolSettings; import org.eclipse.lemminx.settings.XMLTelemetrySettings; @@ -225,6 +226,9 @@ private synchronized void updateSettings(Object initOptions, boolean initLogs) { String workDir = serverSettings.getNormalizedWorkDir(); FilesUtils.setCachePathSetting(workDir); } + + XMLIncrementalParserSettings incrementalParserSettings = xmlClientSettings.getIncrementalParser(); + xmlTextDocumentService.setIncrementalParserSettings(incrementalParserSettings); } ContentModelSettings cmSettings = ContentModelSettings.getContentModelXMLSettings(initSettings); if (cmSettings != null) { diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/XMLTextDocumentService.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/XMLTextDocumentService.java index 9f2ecf911..278d4de8a 100644 --- a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/XMLTextDocumentService.java +++ b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/XMLTextDocumentService.java @@ -36,8 +36,11 @@ import org.eclipse.lemminx.commons.ModelTextDocuments; import org.eclipse.lemminx.commons.ModelValidatorDelayer; import org.eclipse.lemminx.commons.TextDocument; +import org.eclipse.lemminx.commons.TextDocumentChange; import org.eclipse.lemminx.dom.DOMDocument; import org.eclipse.lemminx.dom.DOMParser; +import org.eclipse.lemminx.dom.IncrementalDOMParser; +import org.eclipse.lemminx.dom.IncrementalDOMParserTestGenerator; import org.eclipse.lemminx.extensions.contentmodel.settings.XMLValidationRootSettings; import org.eclipse.lemminx.services.DocumentSymbolsResult; import org.eclipse.lemminx.services.SymbolInformationResult; @@ -50,6 +53,7 @@ import org.eclipse.lemminx.settings.XMLCompletionSettings; import org.eclipse.lemminx.settings.XMLFoldingSettings; import org.eclipse.lemminx.settings.XMLFormattingOptions; +import org.eclipse.lemminx.settings.XMLIncrementalParserSettings; import org.eclipse.lemminx.settings.XMLPreferences; import org.eclipse.lemminx.settings.XMLSymbolSettings; import org.eclipse.lemminx.utils.XMLPositionUtility; @@ -127,7 +131,7 @@ public class XMLTextDocumentService implements TextDocumentService { private SharedSettings sharedSettings; private LimitExceededWarner limitExceededWarner; - + /** * Enumeration for Validation triggered by. * @@ -194,12 +198,30 @@ public void triggerValidationIfNeeded() { private Boolean clientConfigurationSupport; + private XMLIncrementalParserSettings incrementalParser; + public XMLTextDocumentService(XMLLanguageServer xmlLanguageServer) { this.xmlLanguageServer = xmlLanguageServer; DOMParser parser = DOMParser.getInstance(); this.documents = new ModelTextDocuments((document, cancelChecker) -> { return parser.parse(document, getXMLLanguageService().getResolverExtensionManager(), true, cancelChecker); - }); + }, // + (document, changes, oldText) -> { + IncrementalDOMParser p = IncrementalDOMParser.getInstance(); + p.parseIncremental(document, changes); + + String generateTestWhen = incrementalParser != null ? incrementalParser.getGenerateTestWhen() : null; + if (generateTestWhen != null) { + if ("always".equals(generateTestWhen)) { + generateTest(document, changes, oldText); + } else if ("error".equals(generateTestWhen)) { + DOMDocument newDoc = parser.parse(document.getTextDocument(), getXMLLanguageService().getResolverExtensionManager(), true, () -> {}); + if (!newDoc.toString().equals(document.toString())) { + generateTest(document, changes, oldText); + } + } + } + }); this.sharedSettings = new SharedSettings(); this.limitExceededWarner = null; this.xmlValidatorDelayer = new ModelValidatorDelayer((document) -> { @@ -217,6 +239,20 @@ public XMLTextDocumentService(XMLLanguageServer xmlLanguageServer) { }); } + + private void generateTest(DOMDocument document, List changes, String oldText) { + IncrementalDOMParserTestGenerator.getInstance().generateTest(document, changes, oldText); + } + + public void setIncrementalParsing(boolean incrementalParsing) { + this.documents.setIncrementalModel(incrementalParsing); + } + + public void setIncrementalParserSettings(XMLIncrementalParserSettings incrementalParser) { + this.incrementalParser = incrementalParser; + this.documents.setIncrementalModel(incrementalParser != null ? incrementalParser.isEnabled() : false); + } + public void updateClientCapabilities(ClientCapabilities capabilities, ExtendedClientCapabilities extendedClientCapabilities) { if (capabilities != null) { @@ -719,8 +755,7 @@ void validate(DOMDocument xmlDocument, Map validationArgs) throw cancelChecker.checkCanceled(); getXMLLanguageService().publishDiagnostics(xmlDocument, params -> xmlLanguageServer.getLanguageClient().publishDiagnostics(params), - (doc) -> triggerValidationFor(doc, TriggeredBy.Other), - sharedSettings.getValidationSettings(), + (doc) -> triggerValidationFor(doc, TriggeredBy.Other), sharedSettings.getValidationSettings(), validationArgs, cancelChecker); } @@ -827,4 +862,5 @@ public LimitExceededWarner getLimitExceededWarner() { } return this.limitExceededWarner; } + } diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/ModelTextDocument.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/ModelTextDocument.java index 2e55e2d90..57bda4569 100644 --- a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/ModelTextDocument.java +++ b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/ModelTextDocument.java @@ -11,10 +11,12 @@ *******************************************************************************/ package org.eclipse.lemminx.commons; +import java.util.List; import java.util.concurrent.CancellationException; import java.util.function.BiFunction; import java.util.logging.Logger; +import org.eclipse.lsp4j.TextDocumentContentChangeEvent; import org.eclipse.lsp4j.TextDocumentItem; import org.eclipse.lsp4j.jsonrpc.CancelChecker; @@ -33,14 +35,21 @@ public class ModelTextDocument extends TextDocument { private T model; - public ModelTextDocument(TextDocumentItem document, BiFunction parse) { + private final ModelUpdater modelUpdater; + + private boolean incrementalModel; + + public ModelTextDocument(TextDocumentItem document, BiFunction parse, + ModelUpdater updateModel) { super(document); this.parse = parse; + this.modelUpdater = updateModel; } public ModelTextDocument(String text, String uri, BiFunction parse) { super(text, uri); this.parse = parse; + this.modelUpdater = null; } /** @@ -83,7 +92,8 @@ private synchronized T getSynchronizedModel() { LOGGER.fine("Start parsing of model with version '" + version); // Stop of parse process can be done when completable future is canceled or when // version of document changes - CancelChecker cancelChecker = new TextDocumentVersionChecker(this, version); + CancelChecker cancelChecker = isIncrementalModel() ? ModelTextDocuments.NO_CANCELLABLE + : new TextDocumentVersionChecker(this, version); // parse the model model = parse.apply(this, cancelChecker); } catch (CancellationException e) { @@ -101,14 +111,32 @@ private synchronized T getSynchronizedModel() { public void setText(String text) { super.setText(text); // text changed, cancel the completable future which load the model - cancelModel(); + if (!isIncrementalModel()) { + cancelModel(); + } } @Override public void setVersion(int version) { super.setVersion(version); // version changed, mark the model as dirty - cancelModel(); + if (!isIncrementalModel()) { + cancelModel(); + } + } + + @Override + public List update(List changes) { + String oldText = super.getText(); + List result = super.update(changes); + if (isIncrementalModel() && model != null && !result.isEmpty()) { + updateModel(model, oldText, result); + } + return result; + } + + private void updateModel(T model, String oldText, List changes) { + modelUpdater.updateModel(model, changes, oldText); } /** @@ -118,4 +146,12 @@ private void cancelModel() { model = null; } + public boolean isIncrementalModel() { + return incrementalModel; + } + + public void setIncrementalModel(boolean incrementalModel) { + this.incrementalModel = incrementalModel; + } + } \ No newline at end of file diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/ModelTextDocuments.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/ModelTextDocuments.java index d3431fc58..6083626c0 100644 --- a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/ModelTextDocuments.java +++ b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/ModelTextDocuments.java @@ -30,16 +30,26 @@ */ public class ModelTextDocuments extends TextDocuments> { + final static CancelChecker NO_CANCELLABLE = () -> { + }; + private final BiFunction parse; - public ModelTextDocuments(BiFunction parse) { + private final ModelUpdater modelUpdater; + + private boolean incrementalModel; + + public ModelTextDocuments(BiFunction parse, + ModelUpdater updateModel) { this.parse = parse; + this.modelUpdater = updateModel; } @Override public ModelTextDocument createDocument(TextDocumentItem document) { - ModelTextDocument doc = new ModelTextDocument(document, parse); + ModelTextDocument doc = new ModelTextDocument(document, parse, modelUpdater); doc.setIncremental(isIncremental()); + doc.setIncrementalModel(isIncrementalModel()); return doc; } @@ -115,7 +125,7 @@ public CompletableFuture computeModelAsync(TextDocumentIdentifier documen return null; } // Apply the function code by using the parsed model. - return code.apply(model, cancelChecker); + return code.apply(model, isIncrementalModel() ? NO_CANCELLABLE : cancelChecker); }); } @@ -150,4 +160,12 @@ private static CompletableFuture computeAsyncCompose(Function { + + void updateModel(T model, List changes, String oldText); +} diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/TextDocument.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/TextDocument.java index e62a3db94..e044afdbe 100644 --- a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/TextDocument.java +++ b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/TextDocument.java @@ -12,6 +12,8 @@ */ package org.eclipse.lemminx.commons; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.logging.Logger; import java.util.regex.Matcher; @@ -156,17 +158,20 @@ private synchronized ILineTracker createLineTracker() { /** * Update text of the document by using the changes and according the * incremental support. - * + * * @param changes the text document changes. + * @return list of changes with pre-calculated offsets (calculated before text update), + * or empty list if no changes or not incremental */ - public void update(List changes) { + public List update(List changes) { if (changes.size() < 1) { // no changes, ignore it. - return; + return Collections.emptyList(); } if (isIncremental()) { try { long start = System.currentTimeMillis(); + List result = new ArrayList<>(changes.size()); synchronized (lock) { // Initialize buffer and line tracker from the current text document StringBuilder buffer = new StringBuilder(getText()); @@ -188,6 +193,11 @@ public void update(List changes) { } String text = changeEvent.getText(); int startOffset = offsetAt(range.getStart()); + int newLength = text != null ? text.length() : 0; + + // Store the change with pre-calculated offsets (before text update) + result.add(new TextDocumentChange(changeEvent, startOffset, length, newLength)); + buffer.replace(startOffset, startOffset + length, text); lineTracker.replace(startOffset, length, text); } @@ -195,8 +205,11 @@ public void update(List changes) { setText(buffer.toString()); } LOGGER.fine("Text document content updated in " + (System.currentTimeMillis() - start) + "ms"); + return result; } catch (BadLocationException e) { + e.printStackTrace(); // Should never occur. + return Collections.emptyList(); } } else { // like vscode does, get the last changes @@ -207,6 +220,7 @@ public void update(List changes) { setText(last.getText()); lineTracker.set(last.getText()); } + return Collections.emptyList(); } } } diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/TextDocumentChange.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/TextDocumentChange.java new file mode 100644 index 000000000..79998213d --- /dev/null +++ b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/TextDocumentChange.java @@ -0,0 +1,88 @@ +/** + * Copyright (c) 2024 Angelo ZERR. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v2.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v20.html + * + * SPDX-License-Identifier: EPL-2.0 + * + * Contributors: + * Angelo Zerr - initial API and implementation + */ +package org.eclipse.lemminx.commons; + +import org.eclipse.lsp4j.TextDocumentContentChangeEvent; + +/** + * Represents a text document change with pre-calculated offsets. The offsets + * are calculated BEFORE the text is updated, so they are relative to the old + * text content. + */ +public class TextDocumentChange { + + private final TextDocumentContentChangeEvent event; + private final int startOffset; + private final int oldLength; + private final int newLength; + + /** + * Creates a new text document change. + * + * @param event the original change event + * @param startOffset the start offset in the old text (before update) + * @param oldLength the length of text being replaced + * @param newLength the length of the new text + */ + public TextDocumentChange(TextDocumentContentChangeEvent event, int startOffset, int oldLength, int newLength) { + this.event = event; + this.startOffset = startOffset; + this.oldLength = oldLength; + this.newLength = newLength; + } + + /** + * Returns the original change event. + * + * @return the original change event + */ + public TextDocumentContentChangeEvent getEvent() { + return event; + } + + /** + * Returns the start offset in the old text (before update). + * + * @return the start offset + */ + public int getStartOffset() { + return startOffset; + } + + /** + * Returns the length of text being replaced. + * + * @return the old length + */ + public int getOldLength() { + return oldLength; + } + + /** + * Returns the length of the new text. + * + * @return the new length + */ + public int getNewLength() { + return newLength; + } + + /** + * Returns the new text content. + * + * @return the new text + */ + public String getText() { + return event.getText(); + } +} diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/TreeLineTracker.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/TreeLineTracker.java index dca2c0a35..a0349f90d 100644 --- a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/TreeLineTracker.java +++ b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/commons/TreeLineTracker.java @@ -1243,7 +1243,7 @@ public int getOffsetAt(Position position) throws BadLocationException { int endLineOffset = lineOffset + lineLength; if (offset > endLineOffset) { throw new BadLocationException( - "The character value, {" + character + "} of the line" + line + "}, is out of bounds."); + "The character value, {" + character + "} of the line {" + line + "}, is out of bounds."); } return offset; diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMAttr.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMAttr.java index a9c69b565..3e2ef9429 100644 --- a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMAttr.java +++ b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMAttr.java @@ -30,7 +30,7 @@ public class DOMAttr extends DOMNode implements org.w3c.dom.Attr { private String name; - private final AttrName nodeAttrName; + private AttrName nodeAttrName; private int delimiter; @@ -256,6 +256,27 @@ public void setValue(String value, int start, int end) { this.nodeAttrValue = start != -1 ? new AttrValue(start, end) : null; } + /** + * Update nodeAttrName with adjusted offsets. + * Used during incremental parsing when attribute offsets change. + * + * @param start new start offset + * @param end new end offset + */ + void updateAttrNameOffsets(int start, int end) { + this.nodeAttrName = start != -1 ? new AttrName(start, end) : null; + } + + /** + * Clear cached attribute name and values to force recalculation from document text. + * Used during incremental parsing when attribute offsets change. + */ + public void clearCachedValue() { + this.name = null; + this.originalValue = null; + this.quotelessValue = null; + } + public DOMRange getNodeAttrValue() { return nodeAttrValue; } diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMCharacterData.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMCharacterData.java index 943c8cf08..6621a3a32 100644 --- a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMCharacterData.java +++ b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMCharacterData.java @@ -233,11 +233,26 @@ public void setData(String value) throws DOMException { /* * (non-Javadoc) - * + * * @see org.w3c.dom.CharacterData#substringData(int, int) */ @Override public String substringData(int offset, int count) throws DOMException { throw new UnsupportedOperationException(); } + + /** + * Clear cached data when the node content changes during incremental parsing. + * This forces the node to reload its content from the updated document. + * + * Note: isWhitespace is intentionally NOT cleared here because it's set during + * parsing based on the node's structure, not derived from the text content. + * The parser will update this flag if needed when reparsing. + */ + public void clearCache() { + this.data = null; + this.normalizedData = null; + this.delimiter = null; + // Note: isWhitespace is not cleared - it's set by the parser, not derived from content + } } diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMNode.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMNode.java index 7bbb22cb2..c0e9c9cc5 100644 --- a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMNode.java +++ b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMNode.java @@ -68,6 +68,12 @@ public abstract class DOMNode implements Node, DOMRange { int start; // | int end; // | + /** + * Index of this node in parent's children list. -1 if not set or if this is a + * root node. + */ + int indexInParent = -1; + DOMNode parent; private static final NodeList EMPTY_CHILDREN = new NodeList() { @@ -191,6 +197,41 @@ private String toString(int indent) { result.append(getNodeName()); result.append(", closed: "); result.append(closed); + + // Add attributes if this is an element with attributes + if (this instanceof DOMElement && hasAttributes()) { + result.append(", \n"); + for (int i = 0; i < indent + 1; i++) { + result.append("\t"); + } + result.append("attributes:["); + List attrs = ((DOMElement) this).getAttributeNodes(); + for (int i = 0; i < attrs.size(); i++) { + DOMAttr attr = attrs.get(i); + result.append("\n"); + for (int j = 0; j < indent + 2; j++) { + result.append("\t"); + } + result.append("{name: "); + result.append(attr.getName()); + result.append(", start: "); + result.append(attr.getStart()); + result.append(", end: "); + result.append(attr.getEnd()); + result.append(", value: \""); + result.append(attr.getValue()); + result.append("\"}"); + if (i < attrs.size() - 1) { + result.append(","); + } + } + result.append("\n"); + for (int i = 0; i < indent + 1; i++) { + result.append("\t"); + } + result.append("]"); + } + if (children != null && children.size() > 0) { result.append(", \n"); for (int i = 0; i < indent + 1; i++) { @@ -497,7 +538,39 @@ public void addChild(DOMNode child) { if (children == null) { children = new XMLNodeList<>(); } - getChildren().add(child); + // Set the index + child.indexInParent = children.size(); + children.add(child); + } + + /** + * Replace a child node at the given index and update indices + */ + protected void replaceChildAt(int index, DOMNode newChild) { + if (children == null || index < 0 || index >= children.size()) { + return; + } + + // Set new child's parent and index + newChild.parent = this; + newChild.indexInParent = index; + + // Replace in list + children.set(index, newChild); + } + + /** + * Clear all children and invalidate their indices + */ + protected void clearChildren() { + if (children != null) { + // Invalidate indices of removed children + for (DOMNode child : children) { + child.indexInParent = -1; + child.parent = null; + } + children.clear(); + } } /** @@ -727,11 +800,11 @@ public String getNamespaceURI() { @Override public DOMNode getNextSibling() { DOMNode parentNode = getParentNode(); - if (parentNode == null) { + if (parentNode == null || indexInParent < 0) { return null; } List children = parentNode.getChildren(); - int nextIndex = children.indexOf(this) + 1; + int nextIndex = indexInParent + 1; return nextIndex < children.size() ? children.get(nextIndex) : null; } @@ -751,15 +824,14 @@ public String getPrefix() { * @see org.w3c.dom.Node#getPreviousSibling() */ @Override - public DOMNode getPreviousSibling() { - DOMNode parentNode = getParentNode(); - if (parentNode == null) { - return null; - } - List children = parentNode.getChildren(); - int previousIndex = children.indexOf(this) - 1; - return previousIndex >= 0 ? children.get(previousIndex) : null; - } + public DOMNode getPreviousSibling() { + DOMNode parentNode = getParentNode(); + if (parentNode == null || indexInParent < 0) { + return null; + } + int previousIndex = indexInParent - 1; + return previousIndex >= 0 ? parentNode.getChildren().get(previousIndex) : null; + } public DOMNode getPreviousNonTextSibling() { DOMNode prev = getPreviousSibling(); @@ -776,8 +848,7 @@ public DOMNode getPreviousNonTextSibling() { * The following sample sample with tagName=foo will returns the <\foo> orphan * end element: *

- * | - * <\foo> + * | <\foo> *

* * @param offset the offset. @@ -797,8 +868,7 @@ public DOMElement getOrphanEndElement(int offset, String tagName) { * The following sample sample with tagName=bar will returns the <\foo> orphan * end element: *

- * | - * <\foo> + * | <\foo> *

* * @param offset the offset. diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMParser.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMParser.java index f036bed1b..36751a4db 100644 --- a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMParser.java +++ b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/DOMParser.java @@ -67,6 +67,7 @@ public DOMDocument parse(TextDocument document, URIResolverExtensionManager reso public DOMDocument parse(TextDocument document, URIResolverExtensionManager resolverExtensionManager, boolean ignoreWhitespaceContent, CancelChecker monitor) { + boolean isDTD = DOMUtils.isDTD(document.getUri()); boolean inDTDInternalSubset = false; String text = document.getText(); @@ -129,569 +130,802 @@ public DOMDocument parse(TextDocument document, URIResolverExtensionManager reso } } switch (token) { - case StartTagOpen: { - if (!curr.isClosed() && curr.parent != null) { - // The next node's parent (curr) is not closed at this point - // so the node's parent (curr) will have its end position updated - // to a newer end position. - curr.end = scanner.getTokenOffset(); - } - if ((curr.isClosed()) || curr.isDoctype()) { - // The next node being considered is a child of 'curr' - // and if 'curr' is already closed then 'curr' was not updated properly. - // Or if we get a Doctype node then we know it was not closed and 'curr' - // wasn't updated properly. - curr = curr.parent; - inDTDInternalSubset = false; // In case it was previously in the internal subset - } - DOMElement child = xmlDocument.createElement(scanner.getTokenOffset(), scanner.getTokenEnd()); - child.startTagOpenOffset = scanner.getTokenOffset(); - curr.addChild(child); - curr = child; - break; + case StartTagOpen: { + if (!curr.isClosed() && curr.parent != null) { + // The next node's parent (curr) is not closed at this point + // so the node's parent (curr) will have its end position updated + // to a newer end position. + curr.end = scanner.getTokenOffset(); } - - case StartTag: { - DOMElement element = (DOMElement) curr; - element.tag = scanner.getTokenText(); - curr.end = scanner.getTokenEnd(); - break; + if ((curr.isClosed()) || curr.isDoctype()) { + // The next node being considered is a child of 'curr' + // and if 'curr' is already closed then 'curr' was not updated properly. + // Or if we get a Doctype node then we know it was not closed and 'curr' + // wasn't updated properly. + curr = curr.parent; + inDTDInternalSubset = false; // In case it was previously in the internal subset } + DOMElement child = xmlDocument.createElement(scanner.getTokenOffset(), scanner.getTokenEnd()); + child.startTagOpenOffset = scanner.getTokenOffset(); + curr.addChild(child); + curr = child; + break; + } - case StartTagClose: - if (curr.isElement()) { - DOMElement element = (DOMElement) curr; - curr.end = scanner.getTokenEnd(); // might be later set to end tag position - element.startTagCloseOffset = scanner.getTokenOffset(); - - // never enters isEmptyElement() is always false - if (element.hasTagName() && isEmptyElement(element.getTagName()) && curr.parent != null) { - curr.closed = true; - curr = curr.parent; - } - } else if (curr.isProcessingInstruction() || curr.isProlog()) { - DOMProcessingInstruction element = (DOMProcessingInstruction) curr; - curr.end = scanner.getTokenEnd(); // might be later set to end tag position - element.startTagClose = true; - if (element.getTarget() != null && isEmptyElement(element.getTarget()) && curr.parent != null) { - curr.closed = true; - curr = curr.parent; - } - } - curr.end = scanner.getTokenEnd(); - break; - - case EndTagOpen: - if (tempWhitespaceContent != null) { - curr.addChild(tempWhitespaceContent); - tempWhitespaceContent = null; - } - endTagOpenOffset = scanner.getTokenOffset(); - curr.end = scanner.getTokenOffset(); - previousTokenWasEndTagOpen = true; - break; - - case EndTag: - // end tag (ex: ) - String closeTag = scanner.getTokenText(); - DOMNode current = curr; + case StartTag: { + DOMElement element = (DOMElement) curr; + element.tag = scanner.getTokenText(); + curr.end = scanner.getTokenEnd(); + break; + } - /** - * eg: will set a,b,c end position to the start of | - */ - while (!(curr.isElement() && ((DOMElement) curr).isSameTag(closeTag)) && curr.parent != null) { - curr.end = endTagOpenOffset; - curr = curr.parent; - } - if (curr != xmlDocument) { - curr.closed = true; - if (curr.isElement()) { - ((DOMElement) curr).endTagOpenOffset = endTagOpenOffset; - } else if (curr.isProcessingInstruction() || curr.isProlog()) { - ((DOMProcessingInstruction) curr).endTagOpenOffset = endTagOpenOffset; - } - curr.end = scanner.getTokenEnd(); - } else { - // element open tag not found (ex: ) add a fake element which only has an - // end tag (no start tag). - DOMElement element = xmlDocument.createElement(scanner.getTokenOffset() - 2, - scanner.getTokenEnd()); - element.endTagOpenOffset = endTagOpenOffset; - element.tag = closeTag; - current.addChild(element); - curr = element; - } - break; + case StartTagClose: + if (curr.isElement()) { + DOMElement element = (DOMElement) curr; + curr.end = scanner.getTokenEnd(); // might be later set to end tag position + element.startTagCloseOffset = scanner.getTokenOffset(); - case StartTagSelfClose: - if (curr.parent != null) { + // never enters isEmptyElement() is always false + if (element.hasTagName() && isEmptyElement(element.getTagName()) && curr.parent != null) { curr.closed = true; - ((DOMElement) curr).selfClosed = true; - curr.end = scanner.getTokenEnd(); - lastClosed = curr; curr = curr.parent; } - break; - - case EndTagClose: - if (curr.parent != null) { - curr.end = scanner.getTokenEnd(); - lastClosed = curr; - if (lastClosed.isElement()) { - ((DOMElement) curr).endTagCloseOffset = scanner.getTokenOffset(); - } - if (curr.isDoctype()) { - curr.closed = true; - } + } else if (curr.isProcessingInstruction() || curr.isProlog()) { + DOMProcessingInstruction element = (DOMProcessingInstruction) curr; + curr.end = scanner.getTokenEnd(); // might be later set to end tag position + element.startTagClose = true; + if (element.getTarget() != null && isEmptyElement(element.getTarget()) && curr.parent != null) { + curr.closed = true; curr = curr.parent; - } - break; - - case AttributeName: { - attr = new DOMAttr(null, scanner.getTokenOffset(), - scanner.getTokenEnd(), curr); - curr.setAttributeNode(attr); - curr.end = scanner.getTokenEnd(); - break; } - - case DelimiterAssign: { - if (attr != null) { - // Sets the value to the '=' position in case there is no AttributeValue - attr.setDelimiter(scanner.getTokenOffset()); + curr.end = scanner.getTokenEnd(); + break; + + case EndTagOpen: + endTagOpenOffset = scanner.getTokenOffset(); + curr.end = scanner.getTokenOffset(); + previousTokenWasEndTagOpen = true; + break; + + case EndTag: + // end tag (ex: ) + String closeTag = scanner.getTokenText(); + + // Add temp whitespace content only if the closing tag matches the current element + if (tempWhitespaceContent != null) { + if (curr.isElement() && ((DOMElement) curr).isSameTag(closeTag)) { + // Closing tag matches current element - add the whitespace + curr.addChild(tempWhitespaceContent); } - break; + // Clear temp whitespace in all cases + tempWhitespaceContent = null; } + DOMNode current = curr; - case AttributeValue: { - if (curr.hasAttributes() && attr != null) { - attr.setValue(null, scanner.getTokenOffset(), scanner.getTokenEnd()); + /** + * eg: will set a,b,c end position to the start of | + */ + while (!(curr.isElement() && ((DOMElement) curr).isSameTag(closeTag)) && curr.parent != null) { + curr.end = endTagOpenOffset; + curr = curr.parent; + } + if (curr != xmlDocument) { + curr.closed = true; + if (curr.isElement()) { + ((DOMElement) curr).endTagOpenOffset = endTagOpenOffset; + } else if (curr.isProcessingInstruction() || curr.isProlog()) { + ((DOMProcessingInstruction) curr).endTagOpenOffset = endTagOpenOffset; } - attr = null; curr.end = scanner.getTokenEnd(); - break; - } - - case CDATATagOpen: { - DOMCDATASection cdataNode = xmlDocument.createCDataSection(scanner.getTokenOffset(), text.length()); - curr.addChild(cdataNode); - curr = cdataNode; - break; + } else { + // element open tag not found (ex: ) add a fake element which only has an + // end tag (no start tag). + DOMElement element = xmlDocument.createElement(scanner.getTokenOffset() - 2, scanner.getTokenEnd()); + element.endTagOpenOffset = endTagOpenOffset; + element.tag = closeTag; + current.addChild(element); + curr = element; } + break; - case CDATAContent: { - DOMCDATASection cdataNode = (DOMCDATASection) curr; - cdataNode.startContent = scanner.getTokenOffset(); - cdataNode.endContent = scanner.getTokenEnd(); + case StartTagSelfClose: + if (curr.parent != null) { + curr.closed = true; + ((DOMElement) curr).selfClosed = true; curr.end = scanner.getTokenEnd(); - break; + lastClosed = curr; + curr = curr.parent; } + break; - case CDATATagClose: { + case EndTagClose: + if (curr.parent != null) { curr.end = scanner.getTokenEnd(); - curr.closed = true; + lastClosed = curr; + if (lastClosed.isElement()) { + ((DOMElement) curr).endTagCloseOffset = scanner.getTokenOffset(); + } + if (curr.isDoctype()) { + curr.closed = true; + } curr = curr.parent; - break; - } - case StartPrologOrPI: { - DOMProcessingInstruction prologOrPINode = xmlDocument - .createProcessingInstruction(scanner.getTokenOffset(), text.length()); - curr.addChild(prologOrPINode); - curr = prologOrPINode; - break; } + break; - case PIName: { - DOMProcessingInstruction processingInstruction = ((DOMProcessingInstruction) curr); - processingInstruction.target = scanner.getTokenText(); - processingInstruction.processingInstruction = true; - break; - } + case AttributeName: { + attr = new DOMAttr(null, scanner.getTokenOffset(), scanner.getTokenEnd(), curr); + curr.setAttributeNode(attr); + curr.end = scanner.getTokenEnd(); + break; + } - case PrologName: { - DOMProcessingInstruction processingInstruction = ((DOMProcessingInstruction) curr); - processingInstruction.target = scanner.getTokenText(); - processingInstruction.prolog = true; - break; + case DelimiterAssign: { + if (attr != null) { + // Sets the value to the '=' position in case there is no AttributeValue + attr.setDelimiter(scanner.getTokenOffset()); } + break; + } - case PIContent: { - DOMProcessingInstruction processingInstruction = (DOMProcessingInstruction) curr; - processingInstruction.startContent = scanner.getTokenOffset(); - processingInstruction.endContent = scanner.getTokenEnd(); - break; + case AttributeValue: { + if (curr.hasAttributes() && attr != null) { + attr.setValue(null, scanner.getTokenOffset(), scanner.getTokenEnd()); } + attr = null; + curr.end = scanner.getTokenEnd(); + break; + } - case PIEnd: - case PrologEnd: { - curr.end = scanner.getTokenEnd(); - curr.closed = true; - curr = curr.parent; - break; - } + case CDATATagOpen: { + DOMCDATASection cdataNode = xmlDocument.createCDataSection(scanner.getTokenOffset(), text.length()); + curr.addChild(cdataNode); + curr = cdataNode; + break; + } - case StartCommentTag: { - // Incase the tag before the comment tag (curr) was not properly closed - // curr should be set to the root node. - if (xmlDocument.isDTD() || inDTDInternalSubset) { - while (!curr.isDoctype()) { - curr = curr.parent; - } - } else if ((curr.isClosed())) { + case CDATAContent: { + DOMCDATASection cdataNode = (DOMCDATASection) curr; + cdataNode.startContent = scanner.getTokenOffset(); + cdataNode.endContent = scanner.getTokenEnd(); + curr.end = scanner.getTokenEnd(); + break; + } + + case CDATATagClose: { + curr.end = scanner.getTokenEnd(); + curr.closed = true; + curr = curr.parent; + break; + } + + case StartPrologOrPI: { + DOMProcessingInstruction prologOrPINode = xmlDocument + .createProcessingInstruction(scanner.getTokenOffset(), text.length()); + curr.addChild(prologOrPINode); + curr = prologOrPINode; + break; + } + + case PIName: { + DOMProcessingInstruction processingInstruction = ((DOMProcessingInstruction) curr); + processingInstruction.target = scanner.getTokenText(); + processingInstruction.processingInstruction = true; + break; + } + + case PrologName: { + DOMProcessingInstruction processingInstruction = ((DOMProcessingInstruction) curr); + processingInstruction.target = scanner.getTokenText(); + processingInstruction.prolog = true; + break; + } + + case PIContent: { + DOMProcessingInstruction processingInstruction = (DOMProcessingInstruction) curr; + processingInstruction.startContent = scanner.getTokenOffset(); + processingInstruction.endContent = scanner.getTokenEnd(); + break; + } + + case PIEnd: + case PrologEnd: { + curr.end = scanner.getTokenEnd(); + curr.closed = true; + curr = curr.parent; + break; + } + + case StartCommentTag: { + // Incase the tag before the comment tag (curr) was not properly closed + // curr should be set to the root node. + if (xmlDocument.isDTD() || inDTDInternalSubset) { + while (!curr.isDoctype()) { curr = curr.parent; } - DOMComment comment = xmlDocument.createComment(scanner.getTokenOffset(), text.length()); - curr.addChild(comment); - curr = comment; - try { - int endLine = document.positionAt(lastClosed.end).getLine(); - int startLine = document.positionAt(curr.start).getLine(); - if (endLine == startLine && lastClosed.end <= curr.start) { - comment.commentSameLineEndTag = true; - } - } catch (BadLocationException e) { - LOGGER.log(Level.SEVERE, "XMLParser StartCommentTag bad offset in document", e); + } else if ((curr.isClosed())) { + curr = curr.parent; + } + DOMComment comment = xmlDocument.createComment(scanner.getTokenOffset(), text.length()); + curr.addChild(comment); + curr = comment; + try { + int endLine = document.positionAt(lastClosed.end).getLine(); + int startLine = document.positionAt(curr.start).getLine(); + if (endLine == startLine && lastClosed.end <= curr.start) { + comment.commentSameLineEndTag = true; } - break; + } catch (BadLocationException e) { + LOGGER.log(Level.SEVERE, "XMLParser StartCommentTag bad offset in document", e); } + break; + } - case Comment: { - DOMComment comment = (DOMComment) curr; - comment.startContent = scanner.getTokenOffset(); - comment.endContent = scanner.getTokenEnd(); - break; - } + case Comment: { + DOMComment comment = (DOMComment) curr; + comment.startContent = scanner.getTokenOffset(); + comment.endContent = scanner.getTokenEnd(); + break; + } - case EndCommentTag: { - curr.end = scanner.getTokenEnd(); - curr.closed = true; - curr = curr.parent; - break; - } + case EndCommentTag: { + curr.end = scanner.getTokenEnd(); + curr.closed = true; + curr = curr.parent; + break; + } - case Content: { - boolean currIsDeclNode = curr instanceof DTDDeclNode; - if (currIsDeclNode) { - curr.end = scanner.getTokenOffset() - 1; - while (!curr.isDoctype()) { - curr = curr.getParentNode(); - } + case Content: { + boolean currIsDeclNode = curr instanceof DTDDeclNode; + if (currIsDeclNode) { + curr.end = scanner.getTokenOffset() - 1; + while (!curr.isDoctype()) { + curr = curr.getParentNode(); } - int start = scanner.getTokenOffset(); - int end = scanner.getTokenEnd(); - DOMText textNode = xmlDocument.createText(start, end); - textNode.closed = true; - - if (scanner.isTokenTextBlank()) { - if (ignoreWhitespaceContent) { - if (curr.hasChildNodes()) { - break; - } - - tempWhitespaceContent = textNode; - break; + } + int start = scanner.getTokenOffset(); + int end = scanner.getTokenEnd(); + DOMText textNode = xmlDocument.createText(start, end); + textNode.closed = true; - } else if (!currIsDeclNode) { - textNode.setWhitespace(true); - } else { + if (scanner.isTokenTextBlank()) { + if (ignoreWhitespaceContent) { + if (curr.hasChildNodes()) { break; } + tempWhitespaceContent = textNode; + break; + + } else if (!currIsDeclNode) { + textNode.setWhitespace(true); + } else { + break; } - curr.addChild(textNode); - break; } - // DTD + curr.addChild(textNode); + break; + } - case DTDStartDoctypeTag: { - DOMDocumentType doctype = xmlDocument.createDocumentType(scanner.getTokenOffset(), text.length()); - curr.addChild(doctype); - doctype.parent = curr; - curr = doctype; - break; - } + // DTD - case DTDDoctypeName: { - DOMDocumentType doctype = (DOMDocumentType) curr; - doctype.setName(scanner.getTokenOffset(), scanner.getTokenEnd()); - break; - } + case DTDStartDoctypeTag: { + DOMDocumentType doctype = xmlDocument.createDocumentType(scanner.getTokenOffset(), text.length()); + curr.addChild(doctype); + doctype.parent = curr; + curr = doctype; + break; + } - case DTDDocTypeKindPUBLIC: { - DOMDocumentType doctype = (DOMDocumentType) curr; - doctype.setKind(scanner.getTokenOffset(), scanner.getTokenEnd()); - break; - } + case DTDDoctypeName: { + DOMDocumentType doctype = (DOMDocumentType) curr; + doctype.setName(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } - case DTDDocTypeKindSYSTEM: { - DOMDocumentType doctype = (DOMDocumentType) curr; - doctype.setKind(scanner.getTokenOffset(), scanner.getTokenEnd()); - break; - } + case DTDDocTypeKindPUBLIC: { + DOMDocumentType doctype = (DOMDocumentType) curr; + doctype.setKind(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } - case DTDDoctypePublicId: { - DOMDocumentType doctype = (DOMDocumentType) curr; - doctype.setPublicId(scanner.getTokenOffset(), scanner.getTokenEnd()); - break; - } + case DTDDocTypeKindSYSTEM: { + DOMDocumentType doctype = (DOMDocumentType) curr; + doctype.setKind(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } - case DTDDoctypeSystemId: { - DOMDocumentType doctype = (DOMDocumentType) curr; - doctype.setSystemId(scanner.getTokenOffset(), scanner.getTokenEnd()); - break; + case DTDDoctypePublicId: { + DOMDocumentType doctype = (DOMDocumentType) curr; + doctype.setPublicId(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } + + case DTDDoctypeSystemId: { + DOMDocumentType doctype = (DOMDocumentType) curr; + doctype.setSystemId(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } + + case DTDStartInternalSubset: { + DOMDocumentType doctype = (DOMDocumentType) curr; + doctype.setStartInternalSubset(scanner.getTokenOffset()); + inDTDInternalSubset = true; + break; + } + + case DTDEndInternalSubset: { + while (!curr.isDoctype()) { + curr.end = scanner.getTokenOffset() - 1; + curr = curr.getParentNode(); } + inDTDInternalSubset = false; + DOMDocumentType doctype = (DOMDocumentType) curr; + doctype.setEndInternalSubset(scanner.getTokenEnd()); + break; + } - case DTDStartInternalSubset: { - DOMDocumentType doctype = (DOMDocumentType) curr; - doctype.setStartInternalSubset(scanner.getTokenOffset()); - inDTDInternalSubset = true; - break; + case DTDStartElement: { + // If previous 'curr' was an unclosed DTD Declaration + while (!curr.isDoctype()) { + curr.end = scanner.getTokenOffset(); + curr = curr.getParentNode(); } - case DTDEndInternalSubset: { - while (!curr.isDoctype()) { - curr.end = scanner.getTokenOffset() - 1; - curr = curr.getParentNode(); - } - inDTDInternalSubset = false; - DOMDocumentType doctype = (DOMDocumentType) curr; - doctype.setEndInternalSubset(scanner.getTokenEnd()); - break; + DTDElementDecl child = new DTDElementDecl(scanner.getTokenOffset(), text.length()); + curr.addChild(child); + curr = child; + break; + } + + case DTDElementDeclName: { + DTDElementDecl element = (DTDElementDecl) curr; + element.setName(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } + + case DTDElementCategory: { + DTDElementDecl element = (DTDElementDecl) curr; + element.setCategory(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } + + case DTDStartElementContent: { + DTDElementDecl element = (DTDElementDecl) curr; + element.setContent(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } + + case DTDElementContent: { + DTDElementDecl element = (DTDElementDecl) curr; + element.updateLastParameterEnd(scanner.getTokenEnd()); + break; + } + + case DTDEndElementContent: { + DTDElementDecl element = (DTDElementDecl) curr; + element.updateLastParameterEnd(scanner.getTokenEnd()); + break; + } + + case DTDStartAttlist: { + while (!curr.isDoctype()) { // If previous DTD Decl was unclosed + curr.end = scanner.getTokenOffset(); + curr = curr.getParentNode(); } + DTDAttlistDecl child = new DTDAttlistDecl(scanner.getTokenOffset(), text.length()); - case DTDStartElement: { - // If previous 'curr' was an unclosed DTD Declaration - while (!curr.isDoctype()) { - curr.end = scanner.getTokenOffset(); - curr = curr.getParentNode(); - } + isInitialDeclaration = true; + curr.addChild(child); + curr = child; + break; + } + + case DTDAttlistElementName: { + DTDAttlistDecl attribute = (DTDAttlistDecl) curr; + attribute.setName(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } + + case DTDAttlistAttributeName: { + DTDAttlistDecl attribute = (DTDAttlistDecl) curr; + if (isInitialDeclaration == false) { + // All additional declarations are created as new DTDAttlistDecl's + DTDAttlistDecl child = new DTDAttlistDecl(attribute.getStart(), attribute.getEnd()); + attribute.addAdditionalAttDecl(child); + child.parent = attribute; - DTDElementDecl child = new DTDElementDecl(scanner.getTokenOffset(), text.length()); - curr.addChild(child); + attribute = child; curr = child; - break; } + attribute.setAttributeName(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } - case DTDElementDeclName: { - DTDElementDecl element = (DTDElementDecl) curr; - element.setName(scanner.getTokenOffset(), scanner.getTokenEnd()); - break; - } + case DTDAttlistAttributeType: { + DTDAttlistDecl attribute = (DTDAttlistDecl) curr; + attribute.setAttributeType(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } - case DTDElementCategory: { - DTDElementDecl element = (DTDElementDecl) curr; - element.setCategory(scanner.getTokenOffset(), scanner.getTokenEnd()); - break; - } + case DTDAttlistAttributeValue: { + DTDAttlistDecl attribute = (DTDAttlistDecl) curr; + attribute.setAttributeValue(scanner.getTokenOffset(), scanner.getTokenEnd()); - case DTDStartElementContent: { - DTDElementDecl element = (DTDElementDecl) curr; - element.setContent(scanner.getTokenOffset(), scanner.getTokenEnd()); - break; + if (attribute.parent.isDTDAttListDecl()) { // Is not the root/main ATTLIST node + curr = attribute.parent; + } else { + isInitialDeclaration = false; } + break; + } - case DTDElementContent: { - DTDElementDecl element = (DTDElementDecl) curr; - element.updateLastParameterEnd(scanner.getTokenEnd()); - break; + case DTDStartEntity: { + while (!curr.isDoctype()) { // If previous DTD Decl was unclosed + curr.end = scanner.getTokenOffset(); + curr = curr.getParentNode(); } + DTDEntityDecl child = new DTDEntityDecl(scanner.getTokenOffset(), text.length()); + curr.addChild(child); + curr = child; + break; + } - case DTDEndElementContent: { - DTDElementDecl element = (DTDElementDecl) curr; - element.updateLastParameterEnd(scanner.getTokenEnd()); - break; - } + case DTDEntityPercent: { + DTDEntityDecl entity = (DTDEntityDecl) curr; + entity.setPercent(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } - case DTDStartAttlist: { - while (!curr.isDoctype()) { // If previous DTD Decl was unclosed - curr.end = scanner.getTokenOffset(); - curr = curr.getParentNode(); - } - DTDAttlistDecl child = new DTDAttlistDecl(scanner.getTokenOffset(), text.length()); + case DTDEntityName: { + DTDEntityDecl entity = (DTDEntityDecl) curr; + entity.setName(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } - isInitialDeclaration = true; - curr.addChild(child); - curr = child; - break; - } + case DTDEntityValue: { + DTDEntityDecl entity = (DTDEntityDecl) curr; + entity.setValue(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } - case DTDAttlistElementName: { - DTDAttlistDecl attribute = (DTDAttlistDecl) curr; - attribute.setName(scanner.getTokenOffset(), scanner.getTokenEnd()); - break; + case DTDEntityKindPUBLIC: + case DTDEntityKindSYSTEM: { + DTDEntityDecl entity = (DTDEntityDecl) curr; + entity.setKind(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } + + case DTDEntityPublicId: { + DTDEntityDecl entity = (DTDEntityDecl) curr; + entity.setPublicId(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } + + case DTDEntitySystemId: { + DTDEntityDecl entity = (DTDEntityDecl) curr; + entity.setSystemId(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } + + case DTDStartNotation: { + while (!curr.isDoctype()) { // If previous DTD Decl was unclosed + curr.end = scanner.getTokenOffset(); + curr = curr.getParentNode(); } + DTDNotationDecl child = new DTDNotationDecl(scanner.getTokenOffset(), text.length()); + curr.addChild(child); + curr = child; + isInitialDeclaration = true; + break; + } + + case DTDNotationName: { + DTDNotationDecl notation = (DTDNotationDecl) curr; + notation.setName(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } - case DTDAttlistAttributeName: { - DTDAttlistDecl attribute = (DTDAttlistDecl) curr; - if (isInitialDeclaration == false) { - // All additional declarations are created as new DTDAttlistDecl's - DTDAttlistDecl child = new DTDAttlistDecl(attribute.getStart(), attribute.getEnd()); - attribute.addAdditionalAttDecl(child); - child.parent = attribute; + case DTDNotationKindPUBLIC: { + DTDNotationDecl notation = (DTDNotationDecl) curr; + notation.setKind(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } - attribute = child; - curr = child; + case DTDNotationKindSYSTEM: { + DTDNotationDecl notation = (DTDNotationDecl) curr; + notation.setKind(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } + + case DTDNotationPublicId: { + DTDNotationDecl notation = (DTDNotationDecl) curr; + notation.setPublicId(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } + + case DTDNotationSystemId: { + DTDNotationDecl notation = (DTDNotationDecl) curr; + notation.setSystemId(scanner.getTokenOffset(), scanner.getTokenEnd()); + break; + } + + case DTDEndTag: { + if ((curr.isDTDElementDecl() || curr.isDTDAttListDecl() || curr.isDTDEntityDecl() + || curr.isDTDNotationDecl())) { + while (curr.parent != null && !curr.parent.isDoctype()) { + curr = curr.parent; } - attribute.setAttributeName(scanner.getTokenOffset(), scanner.getTokenEnd()); - break; + curr.end = scanner.getTokenEnd(); + curr.closed = true; + curr = curr.parent; } + break; + } - case DTDAttlistAttributeType: { - DTDAttlistDecl attribute = (DTDAttlistDecl) curr; - attribute.setAttributeType(scanner.getTokenOffset(), scanner.getTokenEnd()); - break; - } + case DTDEndDoctypeTag: { + ((DOMDocumentType) curr).end = scanner.getTokenEnd(); + curr.closed = true; + curr = curr.parent; + break; + } - case DTDAttlistAttributeValue: { - DTDAttlistDecl attribute = (DTDAttlistDecl) curr; - attribute.setAttributeValue(scanner.getTokenOffset(), scanner.getTokenEnd()); + case DTDUnrecognizedParameters: { + DTDDeclNode node = (DTDDeclNode) curr; + node.setUnrecognized(scanner.getTokenOffset(), ((XMLScanner) scanner).getLastNonWhitespaceOffset()); + break; + } - if (attribute.parent.isDTDAttListDecl()) { // Is not the root/main ATTLIST node - curr = attribute.parent; - } else { - isInitialDeclaration = false; + default: + } + token = scanner.scan(); + } + if (previousTokenWasEndTagOpen) { + previousTokenWasEndTagOpen = false; + if (token != TokenType.EndTag) { + // The excepted token is not an EndTag, create a fake end tag element + DOMElement element = xmlDocument.createElement(endTagOpenOffset, endTagOpenOffset + 2); + element.endTagOpenOffset = endTagOpenOffset; + curr.addChild(element); + } + } + while (curr.parent != null) { + curr.end = text.length(); + curr = curr.parent; + } + return xmlDocument; + } + + /** + * Parse a fragment of XML text starting at a specific offset. + * + * This method parses XML content without using substring, directly using the + * scanner's offset capabilities. It only handles XML tokens (not DTD) and uses + * the provided ownerDocument for creating nodes. + * + * Key features: - No substring() call - uses scanner with startOffset directly + * - Nodes created with ownerDocument.createElement() - correct document + * references - Offsets are absolute (relative to full document text) + * + * @param text the full document text (not a substring!) + * @param startOffset the offset where the fragment starts + * @param endOffset the offset where the fragment ends + * @param ownerDocument the document that will own the parsed nodes + * @return the root element of the parsed fragment, or null if parsing fails + */ + public DOMElement parseFragment(String text, int startOffset, int endOffset, DOMDocument ownerDocument) { + try { + // Create scanner with startOffset - no substring needed! + Scanner scanner = XMLScanner.createScanner(text, startOffset, false); + + DOMElement fragmentRoot = null; + DOMNode curr = null; + DOMAttr attr = null; + int endTagOpenOffset = -1; + boolean previousTokenWasEndTagOpen = false; + + TokenType token = scanner.scan(); + while (token != TokenType.EOS && scanner.getTokenOffset() < endOffset) { + + // Handle previous EndTagOpen without matching EndTag + if (previousTokenWasEndTagOpen) { + previousTokenWasEndTagOpen = false; + if (token != TokenType.EndTag) { + // Create fake end tag element + DOMElement element = ownerDocument.createElement(endTagOpenOffset, endTagOpenOffset + 2); + element.endTagOpenOffset = endTagOpenOffset; + if (curr != null) { + curr.addChild(element); + } } - break; } - case DTDStartEntity: { - while (!curr.isDoctype()) { // If previous DTD Decl was unclosed + switch (token) { + case StartTagOpen: { + // Close previous unclosed element + if (curr != null && !curr.isClosed()) { curr.end = scanner.getTokenOffset(); - curr = curr.getParentNode(); } - DTDEntityDecl child = new DTDEntityDecl(scanner.getTokenOffset(), text.length()); - curr.addChild(child); - curr = child; + if (curr != null && curr.isClosed()) { + curr = curr.parent; + } + + // Create new element using ownerDocument - correct document reference! + DOMElement child = ownerDocument.createElement(scanner.getTokenOffset(), scanner.getTokenEnd()); + child.startTagOpenOffset = scanner.getTokenOffset(); + + if (curr == null) { + // This is the root of the fragment + fragmentRoot = child; + curr = child; + } else { + curr.addChild(child); + curr = child; + } break; } - case DTDEntityPercent: { - DTDEntityDecl entity = (DTDEntityDecl) curr; - entity.setPercent(scanner.getTokenOffset(), scanner.getTokenEnd()); + case StartTag: { + if (curr instanceof DOMElement) { + DOMElement element = (DOMElement) curr; + element.tag = scanner.getTokenText(); + curr.end = scanner.getTokenEnd(); + } break; } - case DTDEntityName: { - DTDEntityDecl entity = (DTDEntityDecl) curr; - entity.setName(scanner.getTokenOffset(), scanner.getTokenEnd()); + case StartTagClose: { + if (curr instanceof DOMElement) { + DOMElement element = (DOMElement) curr; + curr.end = scanner.getTokenEnd(); + element.startTagCloseOffset = scanner.getTokenOffset(); + } break; } - case DTDEntityValue: { - DTDEntityDecl entity = (DTDEntityDecl) curr; - entity.setValue(scanner.getTokenOffset(), scanner.getTokenEnd()); + case EndTagOpen: { + endTagOpenOffset = scanner.getTokenOffset(); + if (curr != null) { + curr.end = scanner.getTokenOffset(); + } + previousTokenWasEndTagOpen = true; break; } - case DTDEntityKindPUBLIC: - case DTDEntityKindSYSTEM: { - DTDEntityDecl entity = (DTDEntityDecl) curr; - entity.setKind(scanner.getTokenOffset(), scanner.getTokenEnd()); + case EndTag: { + String closeTag = scanner.getTokenText(); + + // Find matching start tag + while (curr != null && !(curr instanceof DOMElement && ((DOMElement) curr).isSameTag(closeTag))) { + curr.end = endTagOpenOffset; + curr = curr.parent; + } + + if (curr != null) { + curr.closed = true; + if (curr instanceof DOMElement) { + ((DOMElement) curr).endTagOpenOffset = endTagOpenOffset; + } + curr.end = scanner.getTokenEnd(); + } break; } - case DTDEntityPublicId: { - DTDEntityDecl entity = (DTDEntityDecl) curr; - entity.setPublicId(scanner.getTokenOffset(), scanner.getTokenEnd()); + case StartTagSelfClose: { + if (curr != null && curr.parent != null) { + curr.closed = true; + ((DOMElement) curr).selfClosed = true; + curr.end = scanner.getTokenEnd(); + curr = curr.parent; + } break; } - case DTDEntitySystemId: { - DTDEntityDecl entity = (DTDEntityDecl) curr; - entity.setSystemId(scanner.getTokenOffset(), scanner.getTokenEnd()); + case EndTagClose: { + if (curr != null && curr.parent != null) { + curr.end = scanner.getTokenEnd(); + if (curr instanceof DOMElement) { + ((DOMElement) curr).endTagCloseOffset = scanner.getTokenOffset(); + } + curr = curr.parent; + } break; } - case DTDStartNotation: { - while (!curr.isDoctype()) { // If previous DTD Decl was unclosed - curr.end = scanner.getTokenOffset(); - curr = curr.getParentNode(); + case AttributeName: { + if (curr instanceof DOMElement) { + attr = new DOMAttr(scanner.getTokenText(), scanner.getTokenOffset(), scanner.getTokenEnd(), + ownerDocument); + ((DOMElement) curr).setAttributeNode(attr); } - DTDNotationDecl child = new DTDNotationDecl(scanner.getTokenOffset(), text.length()); - curr.addChild(child); - curr = child; - isInitialDeclaration = true; break; } - case DTDNotationName: { - DTDNotationDecl notation = (DTDNotationDecl) curr; - notation.setName(scanner.getTokenOffset(), scanner.getTokenEnd()); + case AttributeValue: { + if (attr != null) { + attr.setValue(scanner.getTokenText(), scanner.getTokenOffset(), scanner.getTokenEnd()); + attr = null; + } break; } - case DTDNotationKindPUBLIC: { - DTDNotationDecl notation = (DTDNotationDecl) curr; - notation.setKind(scanner.getTokenOffset(), scanner.getTokenEnd()); + case DelimiterAssign: { + if (attr != null) { + attr.setDelimiter(scanner.getTokenOffset()); + } break; } - case DTDNotationKindSYSTEM: { - DTDNotationDecl notation = (DTDNotationDecl) curr; - notation.setKind(scanner.getTokenOffset(), scanner.getTokenEnd()); + case CDATATagOpen: { + DOMCDATASection cdataNode = ownerDocument.createCDataSection(scanner.getTokenOffset(), + text.length()); + curr.addChild(cdataNode); + curr = cdataNode; break; } - case DTDNotationPublicId: { - DTDNotationDecl notation = (DTDNotationDecl) curr; - notation.setPublicId(scanner.getTokenOffset(), scanner.getTokenEnd()); + case CDATAContent: { + DOMCDATASection cdataNode = (DOMCDATASection) curr; + cdataNode.startContent = scanner.getTokenOffset(); + cdataNode.endContent = scanner.getTokenEnd(); + curr.end = scanner.getTokenEnd(); break; } - case DTDNotationSystemId: { - DTDNotationDecl notation = (DTDNotationDecl) curr; - notation.setSystemId(scanner.getTokenOffset(), scanner.getTokenEnd()); + case CDATATagClose: { + curr.end = scanner.getTokenEnd(); + curr.closed = true; + curr = curr.parent; break; } - case DTDEndTag: { - if ((curr.isDTDElementDecl() || curr.isDTDAttListDecl() || curr.isDTDEntityDecl() - || curr.isDTDNotationDecl())) { - while (curr.parent != null && !curr.parent.isDoctype()) { - curr = curr.parent; + case Content: { + if (curr != null) { + int start = scanner.getTokenOffset(); + int end = scanner.getTokenEnd(); + DOMText textNode = new DOMText(start, end); + textNode.parent = curr; + textNode.closed = true; + + // Match normal parsing behavior for whitespace handling + if (scanner.isTokenTextBlank()) { + // Ignore whitespace-only text nodes (both before first child and between elements) + // This matches the ignoreWhitespaceContent=true behavior in normal parsing + break; } - curr.end = scanner.getTokenEnd(); - curr.closed = true; - curr = curr.parent; + + curr.addChild(textNode); } break; } - case DTDEndDoctypeTag: { - ((DOMDocumentType) curr).end = scanner.getTokenEnd(); - curr.closed = true; - curr = curr.parent; + default: + // Ignore other token types (DTD, PI, etc.) for fragment parsing break; } - case DTDUnrecognizedParameters: { - DTDDeclNode node = (DTDDeclNode) curr; - node.setUnrecognized(scanner.getTokenOffset(), ((XMLScanner) scanner).getLastNonWhitespaceOffset()); - break; - } + token = scanner.scan(); + } - default: + // Close any unclosed nodes + while (curr != null && curr != fragmentRoot) { + curr.end = Math.min(endOffset, text.length()); + curr = curr.parent; } - token = scanner.scan(); - } - if (previousTokenWasEndTagOpen) { - previousTokenWasEndTagOpen = false; - if (token != TokenType.EndTag) { - // The excepted token is not an EndTag, create a fake end tag element - DOMElement element = xmlDocument.createElement(endTagOpenOffset, endTagOpenOffset + 2); - element.endTagOpenOffset = endTagOpenOffset; - curr.addChild(element); + + if (fragmentRoot != null) { + fragmentRoot.end = Math.min(endOffset, text.length()); } + + return fragmentRoot; + + } catch (Exception e) { + LOGGER.log(Level.SEVERE, "Error parsing XML fragment at offset " + startOffset, e); + return null; } - while (curr.parent != null) { - curr.end = text.length(); - curr = curr.parent; - } - return xmlDocument; } private static boolean isEmptyElement(String tag) { diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/IncrementalDOMParser.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/IncrementalDOMParser.java index c9fa75f30..b327f1501 100644 --- a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/IncrementalDOMParser.java +++ b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/IncrementalDOMParser.java @@ -2,129 +2,337 @@ import java.util.List; -import org.eclipse.lemminx.commons.BadLocationException; import org.eclipse.lemminx.commons.TextDocument; -import org.eclipse.lsp4j.TextDocumentContentChangeEvent; +import org.eclipse.lemminx.commons.TextDocumentChange; +/** + * Incremental DOM parser that reuses as much of the old DOM as possible when + * processing small text changes. + */ public class IncrementalDOMParser { - public DOMDocument parseIncremental(DOMDocument oldDocument, List changes) { + private static final IncrementalDOMParser INSTANCE = new IncrementalDOMParser(); - DOMDocument document = oldDocument; - for (TextDocumentContentChangeEvent event : changes) { - document = parseIncremental(document, event); - } - return document; + public static IncrementalDOMParser getInstance() { + return INSTANCE; } - private DOMDocument parseIncremental(DOMDocument document, TextDocumentContentChangeEvent event) { - try { - if (event.getRange() == null) { - return parseFull(document.getTextDocument()); - } + public enum UpdateStrategy { + NONE, // + FULL, // + TEXT, // + ELEMENT, // + ATTR, // + SUBTREE; // Re-parse a subtree + } - int changeOffset = document.offsetAt(event.getRange().getStart()); + private IncrementalDOMParser() { + } + + /** + * Parse a list of changes incrementally + * + * @param document the DOM document + * @param changes list of text changes with pre-calculated offsets + * @return the update strategy used + */ + public UpdateStrategy parseIncremental(DOMDocument document, List changes) { + if (changes == null || changes.isEmpty()) { + return UpdateStrategy.NONE; + } + UpdateStrategy strategy = UpdateStrategy.NONE; + for (TextDocumentChange change : changes) { + strategy = parseIncremental(document, change); + } + return strategy; + } - int oldLength; - if (event.getRangeLength() != null) { - oldLength = event.getRangeLength(); - } else { - int endOffset = document.offsetAt(event.getRange().getEnd()); - oldLength = endOffset - changeOffset; + /** + * Parse a single change incrementally + * + * @param document the DOM document + * @param change the text change with pre-calculated offsets + * @return the update strategy used + */ + private UpdateStrategy parseIncremental(DOMDocument document, TextDocumentChange change) { + try { + // Handle full content replacement + if (change.getEvent().getRange() == null) { + return parseFull(document, document.getTextDocument()); } - int newLength = event.getText() != null ? event.getText().length() : 0; + // Use pre-calculated offsets (calculated before text was updated) + int changeOffset = change.getStartOffset(); + int oldLength = change.getOldLength(); + int newLength = change.getNewLength(); + String insertedText = change.getText(); TextDocument textDoc = document.getTextDocument(); + return parseIncremental(document, textDoc, changeOffset, oldLength, newLength, insertedText); - return parseIncremental(document, textDoc, changeOffset, oldLength, newLength); - - } catch (BadLocationException e) { - return parseFull(document.getTextDocument()); + } catch (Exception e) { + return parseFull(document, document.getTextDocument()); } } - public DOMDocument parseIncremental(DOMDocument oldDocument, TextDocument newTextDocument, int changeOffset, - int oldLength, int newLength) { - - if (oldDocument == null) { - return parseFull(newTextDocument); - } + /** + * Core incremental parsing logic + * + * @param oldDocument the previous DOM document + * @param newTextDocument the updated text document + * @param changeOffset offset where the change occurred + * @param oldLength length of old text that was replaced + * @param newLength length of new text + * @param insertedText the text that was inserted/modified + * @return updated DOM document + */ + public UpdateStrategy parseIncremental(DOMDocument oldDocument, TextDocument newTextDocument, int changeOffset, + int oldLength, int newLength, String insertedText) { + + if (oldDocument == null) { + return parseFull(oldDocument, newTextDocument); + } + + // Find the affected node BEFORE any changes + DOMNode affectedNode = resolveAffectedNode(oldDocument, changeOffset); + if (affectedNode == null) { + return parseFull(oldDocument, newTextDocument); + } + + int delta = newLength - oldLength; + boolean isSimple = isSimpleChange(insertedText); + boolean isSimpleAttr = isSimpleAttributeChange(insertedText); + + // Strategy 1: Text node modification (simple text change) + if (affectedNode.isText() && isSimple) { + return reparseTextNode(oldDocument, affectedNode, delta, newTextDocument); + } + + // Strategy 2: Attribute modification + if (affectedNode.isAttribute()) { + DOMAttr attr = (DOMAttr) affectedNode; + DOMElement element = attr.getOwnerElement(); + if (element != null && isSimpleAttr) { + return reparseAttribute(oldDocument, element, attr, delta, newTextDocument); + } + } + + // Strategy 3: Element modification + if (affectedNode.isElement()) { + UpdateStrategy elementStrategy = tryElementStrategy((DOMElement) affectedNode, changeOffset, oldLength, + delta, isSimple, isSimpleAttr, oldDocument, newTextDocument); + if (elementStrategy != null) { + return elementStrategy; + } + } + + // Strategy 4: Subtree re-parsing + // Try to find a stable subtree root that can be re-parsed + DOMElement subtreeRoot = findSubtreeRoot(affectedNode, changeOffset, oldLength, newLength); + if (subtreeRoot != null) { + return reparseSubtree(oldDocument, subtreeRoot, delta, newTextDocument); + } + + // Default: change too complex, do full parse + return parseFull(oldDocument, newTextDocument); + } - DOMNode affectedNode = oldDocument.findNodeAt(changeOffset); - if (affectedNode == null) { - return parseFull(newTextDocument); - } + /** + * Resolve the most specific affected node at the change offset. + * Progressively refines from element → text node → attribute node. + * + * @param document the document + * @param changeOffset the offset where the change occurred + * @return the affected node, or null if not found + */ + private DOMNode resolveAffectedNode(DOMDocument document, int changeOffset) { + DOMNode node = document.findNodeAt(changeOffset); + if (node == null) { + return null; + } + + if (!node.isText()) { + DOMNode textNode = DOMNode.findTextAt(node, changeOffset); + if (textNode != null) { + return textNode; + } + } + + if (!node.isText()) { + DOMNode attrNode = DOMNode.findAttrAt(node, changeOffset); + if (attrNode != null) { + return attrNode; + } + } + + return node; + } - DOMNode adjusteddNode = DOMNode.findTextAt(affectedNode, changeOffset); - if (adjusteddNode == null) { - adjusteddNode = DOMNode.findAttrAt(affectedNode, changeOffset); - } - if (adjusteddNode != null) { - affectedNode = adjusteddNode; - } + /** + * Try to apply element-specific incremental parsing strategies. + * + * @param element the element node + * @param changeOffset the offset where the change occurred + * @param oldLength length of old text that was replaced + * @param delta the change in length + * @param isSimple whether the change is simple (no structural chars) + * @param isSimpleAttr whether the change is simple for attributes + * @param oldDocument the old document + * @param newTextDocument the new text document + * @return the update strategy if successful, null otherwise + */ + private UpdateStrategy tryElementStrategy(DOMElement element, int changeOffset, int oldLength, int delta, + boolean isSimple, boolean isSimpleAttr, DOMDocument oldDocument, TextDocument newTextDocument) { + + // Check if change is inside an attribute + DOMAttr attr = findAttributeAtOffset(element, changeOffset, oldLength); + if (attr != null && isSimpleAttr) { + return reparseAttribute(oldDocument, element, attr, delta, newTextDocument); + } + + // Check if change is in the start tag (adding/removing attributes) + if (element.startTagCloseOffset != DOMNode.NULL_VALUE && + changeOffset <= element.startTagCloseOffset) { + // Change is in start tag - reparse attributes + return reparseStartTag(oldDocument, element, delta, newTextDocument); + } + + // Special case: element has no closing ">" yet (startTagCloseOffset == NULL_VALUE) + // This means we're likely adding the ">" to close the start tag + // We need to use SUBTREE strategy because closing the tag may create new text nodes + if (element.startTagCloseOffset == DOMNode.NULL_VALUE) { + // Element start tag is not closed - use subtree reparse + return null; // Will fall through to subtree strategy + } + + // Check if change is inside element content + if (isChangeInElementContent(element, changeOffset, oldLength) && isSimple) { + return reparseElementContent(oldDocument, element, delta, newTextDocument); + } + + return null; + } - int delta = newLength - oldLength; + /** + * Find an attribute that contains the change range. + * + * @param element the element to search + * @param changeOffset the offset where the change occurred + * @param oldLength length of old text that was replaced + * @return the attribute if found, null otherwise + */ + private DOMAttr findAttributeAtOffset(DOMElement element, int changeOffset, int oldLength) { + if (!element.hasAttributes()) { + return null; + } + + for (DOMAttr attr : element.getAttributeNodes()) { + if (changeOffset >= attr.getStart() && changeOffset + oldLength <= attr.getEnd()) { + return attr; + } + } + return null; + } - // Text node - if (affectedNode.isText()) { - return reparseTextNode(oldDocument, affectedNode, delta, newTextDocument); + /** + * Check if the change is within the element's content (between start and end tags). + * Only returns true if the element contains simple text content (no child elements). + * + * @param element the element + * @param changeOffset the offset where the change occurred + * @param oldLength length of old text that was replaced + * @return true if change is in element content and element has no child elements + */ + private boolean isChangeInElementContent(DOMElement element, int changeOffset, int oldLength) { + // Check basic position constraints + if (element.getStartTagCloseOffset() == DOMNode.NULL_VALUE + || element.getEndTagOpenOffset() == DOMNode.NULL_VALUE + || changeOffset <= element.getStartTagCloseOffset() + || changeOffset + oldLength > element.getEndTagOpenOffset()) { + return false; } - - // Element - if (affectedNode.isElement()) { - DOMElement element = (DOMElement) affectedNode; - - // Check if in attribute - if (element.hasAttributes()) { - for (DOMAttr attr : element.getAttributeNodes()) { - if (changeOffset >= attr.getStart() && changeOffset + oldLength <= attr.getEnd()) { - return reparseAttribute(oldDocument, element, attr, delta, newTextDocument); - } - } - } - - // Check if in element content - if (element.getStartTagCloseOffset() != DOMNode.NULL_VALUE - && element.getEndTagOpenOffset() != DOMNode.NULL_VALUE - && changeOffset > element.getStartTagCloseOffset() - && changeOffset + oldLength < element.getEndTagOpenOffset()) { - - String newText = newTextDocument.getText(); - int safeEnd = Math.min(changeOffset + newLength, newText.length()); - String changedText = newText.substring(changeOffset, safeEnd); - - if (!changedText.contains("<") && !changedText.contains(">")) { - return reparseElementContent(oldDocument, element, delta, newTextDocument); + + // Only allow element content strategy if element has no child elements + // (only text nodes are allowed) + if (element.hasChildNodes()) { + for (DOMNode child : element.getChildren()) { + if (child.isElement()) { + // Element has child elements - cannot use simple element content strategy + return false; } } } - - return parseFull(newTextDocument); + + return true; } - private DOMDocument reparseTextNode(DOMDocument oldDoc, DOMNode textNode, int delta, TextDocument newTextDoc) { - - // 1. Update the text node - textNode.end += delta; + /** + * Incrementally update when a text node is modified + */ + private UpdateStrategy reparseTextNode(DOMDocument oldDoc, DOMNode textNode, int delta, TextDocument newTextDoc) { + + // IMPORTANT: The TEXT strategy only works correctly when we're just adding/removing characters + // without changing the document structure. However, when delta == 0 (same length replacement), + // we still need to ensure the DOM structure remains valid. + // + // The issue is that even with delta == 0, the document content has changed, and all nodes + // that read from the document will read incorrect content. The TEXT strategy assumes only + // the affected text node needs updating, but this is not safe when content changes. + // + // For now, we'll keep the TEXT strategy but ensure siblings are always processed, + // even when delta == 0, by removing the early return in shiftOffsetsAfter(). + + // 1. Update the text node's end offset + // Simply add delta to the current end position since delta represents the change in length + textNode.end = textNode.end + delta; + + // 2. Clear cached data to force reload from updated document + if (textNode instanceof DOMCharacterData) { + ((DOMCharacterData) textNode).clearCache(); + } - // 2. Adjust PARENT offsets + // 3. Adjust all parent offsets adjustParentOffsets(textNode, delta); - // 3. Adjust following nodes + // 4. Adjust all following sibling nodes + // NOTE: This must be called even when delta == 0 to ensure cache is cleared shiftOffsetsAfter(textNode, delta); - return createUpdatedDocument(oldDoc, newTextDoc); + return UpdateStrategy.TEXT; } - private DOMDocument reparseAttribute(DOMDocument oldDoc, DOMElement element, DOMAttr attr, int delta, + /** + * Incrementally update when an attribute value is modified + */ + private UpdateStrategy reparseAttribute(DOMDocument oldDoc, DOMElement element, DOMAttr attr, int delta, TextDocument newTextDoc) { + // Update modified attribute offsets attr.end += delta; - /* - * if (attr.nodeAttrValueEnd != null) { attr.nodeAttrValueEnd += delta; } - */ + + // Update nodeAttrValue offsets if it exists + DOMRange oldAttrValue = attr.getNodeAttrValue(); + if (oldAttrValue != null) { + // Create new AttrValue with updated end offset + attr.setValue(null, oldAttrValue.getStart(), oldAttrValue.getEnd() + delta); + } + // Adjust offsets of all following attributes in the same element + if (element.hasAttributes()) { + boolean foundModifiedAttr = false; + for (DOMAttr otherAttr : element.getAttributeNodes()) { + if (otherAttr == attr) { + foundModifiedAttr = true; + continue; + } + if (foundModifiedAttr) { + // This attribute comes after the modified one, adjust its offsets + adjustAttrOffsets(otherAttr, delta); + } + } + } + + // Update element tag offsets if (element.startTagCloseOffset != DOMNode.NULL_VALUE) { element.startTagCloseOffset += delta; } @@ -136,7 +344,7 @@ private DOMDocument reparseAttribute(DOMDocument oldDoc, DOMElement element, DOM } element.end += delta; - // Adjust children offsets (all children are after the attribute) + // Adjust all children (they come after the attribute) if (element.hasChildNodes()) { for (DOMNode child : element.getChildren()) { adjustNodeOffsets(child, delta); @@ -148,14 +356,30 @@ private DOMDocument reparseAttribute(DOMDocument oldDoc, DOMElement element, DOM // Adjust following nodes shiftOffsetsAfter(element, delta); - - return createUpdatedDocument(oldDoc, newTextDoc); + return UpdateStrategy.ATTR; } - private DOMDocument reparseElementContent(DOMDocument oldDoc, DOMElement element, int delta, + /** + * Incrementally update when element content is modified + */ + private UpdateStrategy reparseElementContent(DOMDocument oldDoc, DOMElement element, int delta, TextDocument newTextDoc) { - // Update element end offsets + boolean createdTextNode = false; + + // Special case: if element has no children and we're adding text, create a text node + if (!element.hasChildNodes() && delta > 0) { + // Start after the '>' of the start tag + int textStart = element.getStartTagCloseOffset() + 1; + int textEnd = textStart + delta; + DOMText textNode = new DOMText(textStart, textEnd); + textNode.parent = element; + textNode.closed = true; // Text nodes are always closed + element.addChild(textNode); + createdTextNode = true; + } + + // Update element end tag offsets if (element.endTagOpenOffset != DOMNode.NULL_VALUE) { element.endTagOpenOffset += delta; } @@ -164,8 +388,8 @@ private DOMDocument reparseElementContent(DOMDocument oldDoc, DOMElement element } element.end += delta; - // Adjust children offsets (they are inside the element content) - if (element.hasChildNodes()) { + // Adjust all children offsets (but not the text node we just created with correct offsets) + if (element.hasChildNodes() && !createdTextNode) { for (DOMNode child : element.getChildren()) { adjustNodeOffsets(child, delta); } @@ -177,23 +401,114 @@ private DOMDocument reparseElementContent(DOMDocument oldDoc, DOMElement element // Adjust following nodes shiftOffsetsAfter(element, delta); - return createUpdatedDocument(oldDoc, newTextDoc); + // Return TEXT strategy if we created a text node, otherwise ELEMENT + return createdTextNode ? UpdateStrategy.TEXT : UpdateStrategy.ELEMENT; } /** - * Ajuste les offsets de tous les parents du nœud - */ + * Re-parse the start tag to update attributes when they are added/removed/modified. + * Uses the scanner to parse attributes from the start tag. + */ + private UpdateStrategy reparseStartTag(DOMDocument oldDoc, DOMElement element, int delta, + TextDocument newTextDoc) { + + String newText = newTextDoc.getText(); + int startTagStart = element.getStart(); + int startTagEnd = element.getStartTagCloseOffset() + delta; + + if (startTagEnd > newText.length()) { + return parseFull(oldDoc, newTextDoc); + } + + // Clear existing attributes + element.getAttributeNodes().clear(); + + // Use scanner to parse the start tag and extract attributes + org.eclipse.lemminx.dom.parser.Scanner scanner = + org.eclipse.lemminx.dom.parser.XMLScanner.createScanner(newText, startTagStart, false); + + org.eclipse.lemminx.dom.parser.TokenType token = scanner.scan(); + DOMAttr currentAttr = null; + + while (token != org.eclipse.lemminx.dom.parser.TokenType.EOS && + scanner.getTokenOffset() < startTagEnd) { + + switch (token) { + case AttributeName: + currentAttr = new DOMAttr(scanner.getTokenText(), + scanner.getTokenOffset(), scanner.getTokenEnd(), oldDoc); + element.setAttributeNode(currentAttr); + break; + + case DelimiterAssign: + if (currentAttr != null) { + currentAttr.setDelimiter(scanner.getTokenOffset()); + } + break; + + case AttributeValue: + if (currentAttr != null) { + // Pass null as value - setValue will extract it from offsets + currentAttr.setValue(null, + scanner.getTokenOffset(), scanner.getTokenEnd()); + currentAttr = null; + } + break; + + case StartTagClose: + case StartTagSelfClose: + // End of start tag + break; + + default: + break; + } + + token = scanner.scan(); + } + + // Update element offsets + element.startTagCloseOffset += delta; + if (element.endTagOpenOffset != DOMNode.NULL_VALUE) { + element.endTagOpenOffset += delta; + } + if (element.endTagCloseOffset != DOMNode.NULL_VALUE) { + element.endTagCloseOffset += delta; + } + element.end += delta; + + // Adjust children offsets + if (element.hasChildNodes()) { + for (DOMNode child : element.getChildren()) { + adjustNodeOffsets(child, delta); + } + } + + // Adjust parent offsets + adjustParentOffsets(element, delta); + + // Adjust following nodes + shiftOffsetsAfter(element, delta); + + return UpdateStrategy.ELEMENT; + } + + /** + * Adjust offsets of all parent nodes up the tree + */ private void adjustParentOffsets(DOMNode node, int delta) { DOMNode parent = node.getParentNode(); - while (parent != null && !(parent instanceof DOMDocument)) { + while (parent != null) { // Adjust parent end offset parent.end += delta; - // If element, adjust tag offsets + // If it's an element, adjust tag offsets ONLY if they exist and come after the change if (parent instanceof DOMElement) { DOMElement element = (DOMElement) parent; + // Only adjust end tag offsets if the element actually has an end tag + // For elements without end tags (unclosed), these should remain NULL_VALUE if (element.endTagOpenOffset != DOMNode.NULL_VALUE) { element.endTagOpenOffset += delta; } @@ -206,32 +521,56 @@ private void adjustParentOffsets(DOMNode node, int delta) { } } - private DOMDocument createUpdatedDocument(DOMDocument oldDoc, TextDocument newTextDoc) { - DOMDocument newDoc = new DOMDocument(newTextDoc, oldDoc.getResolverExtensionManager()); - newDoc.setCancelChecker(oldDoc.getCancelChecker()); - - for (DOMNode child : oldDoc.getChildren()) { - newDoc.addChild(child); - } - - return newDoc; - } - - private void shiftOffsetsAfter(DOMNode node, int delta) { - if (delta == 0) - return; + private void shiftOffsetsAfter(DOMNode changedNode, int delta) { + // REMOVED: Early return when delta == 0 + // Even when delta == 0, we need to clear caches of all following nodes + // because the document content has changed and cached data may be stale + // if (delta == 0) + // return; + + DOMNode current = changedNode; + + // Go up the tree and adjust all nodes that come after + while (current != null && !(current instanceof DOMDocument)) { + // Collect all siblings after current first to avoid issues with getNextSibling() + // during offset adjustment + java.util.List siblings = new java.util.ArrayList<>(); + DOMNode sibling = current.getNextSibling(); + while (sibling != null) { + siblings.add(sibling); + sibling = sibling.getNextSibling(); + } + + // Now adjust offsets of collected siblings + for (DOMNode s : siblings) { + adjustNodeOffsets(s, delta); + } - DOMNode next = getNextNodeInDocumentOrder(node); - while (next != null) { - adjustNodeOffsets(next, delta); - next = getNextNodeInDocumentOrder(next); + // Move up to parent + current = current.getParentNode(); } } + /** + * Recursively adjust offsets of a node and all its descendants. + * + * IMPORTANT: This method must clear caches even when delta=0, because: + * - When text content changes with same length (delta=0), the document content + * has changed even though positions haven't + * - Cached data (like normalized text, whitespace flags) becomes stale + * - Sibling nodes after the change need their caches cleared to reflect the + * new document state + */ private void adjustNodeOffsets(DOMNode node, int delta) { node.start += delta; node.end += delta; + // Clear cached data for character data nodes (text, comments, CDATA, etc.) + // This is critical even when delta=0 because the document content has changed + if (node instanceof DOMCharacterData) { + ((DOMCharacterData) node).clearCache(); + } + if (node instanceof DOMElement) { DOMElement e = (DOMElement) node; if (e.startTagOpenOffset != DOMNode.NULL_VALUE) @@ -258,35 +597,217 @@ private void adjustNodeOffsets(DOMNode node, int delta) { } } + /** + * Adjust all offsets in an attribute node + */ private void adjustAttrOffsets(DOMAttr attr, int delta) { attr.start += delta; attr.end += delta; - /* - * if (attr.nodeAttrNameStart != null) attr.nodeAttrNameStart += delta; if - * (attr.nodeAttrNameEnd != null) attr.nodeAttrNameEnd += delta; if - * (attr.nodeAttrValueStart != null) attr.nodeAttrValueStart += delta; if - * (attr.nodeAttrValueEnd != null) attr.nodeAttrValueEnd += delta; if - * (attr.delimiterAssign != null) attr.delimiterAssign += delta; - */ + // Adjust delimiter offset + if (attr.hasDelimiter()) { + attr.setDelimiter(attr.getDelimiterOffset() + delta); + } + + // Update nodeAttrName offsets if it exists + DOMRange oldAttrName = attr.getNodeAttrName(); + if (oldAttrName != null) { + attr.updateAttrNameOffsets(oldAttrName.getStart() + delta, oldAttrName.getEnd() + delta); + } + + // Update nodeAttrValue offsets if it exists + DOMRange oldAttrValue = attr.getNodeAttrValue(); + if (oldAttrValue != null) { + // Create new AttrValue with adjusted offsets and clear cached values + attr.setValue(null, oldAttrValue.getStart() + delta, oldAttrValue.getEnd() + delta); + } + } + + /** + * Check if a text change is simple (no structural characters). + */ + private boolean isSimpleChange(String text) { + if (text == null || text.isEmpty()) { + return true; // Empty change is always simple + } + + // Reject if contains structural characters + return !text.contains("<") && !text.contains(">"); } - private DOMNode getNextNodeInDocumentOrder(DOMNode node) { - DOMNode next = node.getNextSibling(); - if (next != null) - return next; + /** + * Check if an attribute change is simple (no quotes or structural characters). + */ + private boolean isSimpleAttributeChange(String text) { + if (text == null || text.isEmpty()) { + return true; // Empty change is always simple + } + + // Reject if contains problematic characters + return !text.contains("<") && + !text.contains(">") && + !text.contains("\"") && + !text.contains("'"); + } - DOMNode parent = node.getParentNode(); - while (parent != null) { - next = parent.getNextSibling(); - if (next != null) - return next; - parent = parent.getParentNode(); + /** + * Find a stable subtree root that can be re-parsed. + * + * A subtree is stable if: + * - It's a complete element (has both start and end tags) + * - The change is contained within the element's content + * - The change doesn't affect the element's tags + * + * @param affectedNode the node affected by the change + * @param changeOffset the offset where the change occurred + * @param oldLength the length of the old text + * @param newLength the length of the new text + * @return the subtree root element, or null if no stable subtree found + */ + private DOMElement findSubtreeRoot(DOMNode affectedNode, int changeOffset, int oldLength, int newLength) { + // Start from the affected node and go up the tree + DOMNode current = affectedNode; + + while (current != null && !(current instanceof DOMDocument)) { + if (current instanceof DOMElement) { + DOMElement element = (DOMElement) current; + + // For subtree reparse, we need at least a closed start tag + // The element doesn't need to have an end tag (can be unclosed) + if (element.getStartTagCloseOffset() == DOMNode.NULL_VALUE) { + // Start tag is not closed, try parent + current = current.parent; + continue; + } + + // Check if change is within element's content (not in tags) + int contentStart = element.getStartTagCloseOffset() + 1; + // For unclosed elements, use element.end as content end + int contentEnd = element.endTagOpenOffset != DOMNode.NULL_VALUE + ? element.endTagOpenOffset + : element.end; + + if (changeOffset >= contentStart && changeOffset < contentEnd) { + // Change is in content area - this element is a good candidate + // For insertions (oldLength=0), only check start position + // For replacements, check if the replaced range is within content + if (oldLength == 0) { + // Pure insertion - only check insertion point + return element; + } else { + // Replacement - check if replaced range is within content + int changeEnd = changeOffset + oldLength; + if (changeEnd <= contentEnd) { + return element; + } + } + } + } + + // Try parent + current = current.parent; } + + // No stable subtree found return null; } - private DOMDocument parseFull(TextDocument doc) { - return DOMParser.getInstance().parse(doc, null); + /** + * Re-parse a subtree and replace it in the document. + * + * This method uses DOMParser.parseFragment() which: + * - Does NOT use substring() - uses scanner with offsets directly + * - Creates nodes with correct ownerDocument references + * - Parses with absolute offsets (no adjustment needed) + * + * @param document the document + * @param subtreeRoot the root of the subtree to re-parse + * @param delta the change in length + * @param newTextDoc the updated text document + * @return SUBTREE strategy + */ + private UpdateStrategy reparseSubtree(DOMDocument document, DOMElement subtreeRoot, int delta, TextDocument newTextDoc) { + try { + // 1. Calculate the fragment boundaries (with delta applied) + int startOffset = subtreeRoot.getStart(); + int oldEndOffset = subtreeRoot.getEnd(); + int newEndOffset = oldEndOffset + delta; + + String newText = newTextDoc.getText(); + if (newEndOffset > newText.length()) { + return parseFull(document, newTextDoc); + } + + // 2. Parse the fragment using parseFragment() - NO SUBSTRING! + // The scanner will parse directly from the full text using offsets + DOMElement newSubtreeRoot = DOMParser.getInstance().parseFragment( + newText, startOffset, newEndOffset, document); + + if (newSubtreeRoot == null) { + // Parsing failed, fall back to full parse + return parseFull(document, newTextDoc); + } + + // 3. Replace the old subtree with the new one + // No offset adjustment needed - parseFragment() uses absolute offsets! + replaceSubtree(document, subtreeRoot, newSubtreeRoot); + + // 4. Adjust parent offsets + adjustParentOffsets(newSubtreeRoot, delta); + + // 5. Adjust offsets of all nodes after the subtree + shiftOffsetsAfter(newSubtreeRoot, delta); + + return UpdateStrategy.SUBTREE; + + } catch (Exception e) { + // If anything goes wrong, fall back to full parse + return parseFull(document, newTextDoc); + } + } + + /** + * Replace an old subtree with a new one in the document. + */ + private void replaceSubtree(DOMDocument document, DOMElement oldRoot, DOMElement newRoot) { + DOMNode parent = oldRoot.getParentNode(); + + if (parent == null) { + // The subtree is the document root + document.getChildren().clear(); + document.getChildren().add(newRoot); + newRoot.parent = document; + } else { + // Replace in parent's children list + List siblings = parent.getChildren(); + int index = siblings.indexOf(oldRoot); + + if (index >= 0) { + siblings.set(index, newRoot); + newRoot.parent = parent; + } + } + } + + /** + * Fallback to full parse when incremental parsing is not possible + * + * @param document + * @return + */ + private UpdateStrategy parseFull(DOMDocument document, TextDocument doc) { + DOMDocument newDoc = DOMParser.getInstance().parse(doc, null); + document.getChildren().clear(); + document.getChildren().addAll(newDoc.getChildren()); + + // Update parent references for all children + for (DOMNode child : document.getChildren()) { + child.parent = document; + } + + // Update document end offset to match new content length + document.end = doc.getText().length(); + + return UpdateStrategy.FULL; } } \ No newline at end of file diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/IncrementalDOMParserTestGenerator.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/IncrementalDOMParserTestGenerator.java new file mode 100644 index 000000000..9905e5815 --- /dev/null +++ b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/dom/IncrementalDOMParserTestGenerator.java @@ -0,0 +1,319 @@ +package org.eclipse.lemminx.dom; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.eclipse.lemminx.commons.BadLocationException; +import org.eclipse.lemminx.commons.TextDocument; +import org.eclipse.lemminx.commons.TextDocumentChange; +import org.eclipse.lemminx.dom.IncrementalDOMParser.UpdateStrategy; +import org.eclipse.lsp4j.Position; + +/** + * Generates JUnit test code for incremental DOM parsing scenarios. + * This is useful for capturing real user editing scenarios and converting them into tests. + */ +public class IncrementalDOMParserTestGenerator { + + private static final Logger LOGGER = Logger.getLogger(IncrementalDOMParserTestGenerator.class.getName()); + private static final IncrementalDOMParserTestGenerator INSTANCE = new IncrementalDOMParserTestGenerator(); + private static final String TEST_OUTPUT_DIR = "src/test/java/org/eclipse/lemminx/dom/generated"; + + public static IncrementalDOMParserTestGenerator getInstance() { + return INSTANCE; + } + + /** + * Generate and save a JUnit test method from a document and its changes. + * The test will be saved to a file based on the document URI. + * + * @param document the document after changes + * @param changes the list of changes applied + * @param oldText the original text before changes + */ + public void generateTest(DOMDocument document, List changes, String oldText) { + if (changes == null || changes.isEmpty()) { + LOGGER.log(Level.FINE, "No changes to generate test for"); + return; + } + + try { + String testCode = generateTestCode(document, changes, oldText); + saveTestToFile(document.getDocumentURI(), testCode); + } catch (Exception e) { + LOGGER.log(Level.SEVERE, "Error generating test", e); + } + } + + /** + * Generate a JUnit test method from a document and its changes. + * + * @param document the document after changes + * @param changes the list of changes applied + * @param oldText the original text before changes + * @return the generated test code as a string + */ + private String generateTestCode(DOMDocument document, List changes, String oldText) { + if (changes == null || changes.isEmpty()) { + return "// No changes to generate test for"; + } + + StringBuilder s = new StringBuilder(); + + // Generate test method header with timestamp + String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")); + s.append("\t@Test\n"); + s.append("\tpublic void testGenerated_").append(timestamp).append("() {\n"); + + // Generate XML variable with original text + s.append("\t\t// Original XML\n"); + s.append("\t\tString xml = "); + s.append(formatXmlString(oldText)); + s.append(";\n\n"); + + // Generate comment describing the change + TextDocumentChange firstChange = changes.get(0); + s.append("\t\t// "); + s.append(describeChange(firstChange, oldText)); + s.append("\n"); + + // Generate the change event + try { + TextDocument textDoc = new TextDocument(oldText, "test.xml"); + Position startPos = textDoc.positionAt(firstChange.getStartOffset()); + Position endPos = textDoc.positionAt(firstChange.getStartOffset() + firstChange.getOldLength()); + + s.append("\t\tList changes = e("); + s.append(startPos.getLine()).append(", "); + s.append(startPos.getCharacter()).append(", "); + if (startPos.getLine() == endPos.getLine()) { + s.append(endPos.getCharacter()); + } else { + s.append(endPos.getLine()).append(", "); + s.append(endPos.getCharacter()); + } + s.append(", "); + s.append(formatString(firstChange.getText())); + s.append(");\n"); + + } catch (BadLocationException e) { + s.append("\t\t// Error calculating positions: ").append(e.getMessage()).append("\n"); + s.append("\t\tList changes = null; // TODO: Fix positions\n"); + } + + // Determine expected strategy + UpdateStrategy expectedStrategy = determineExpectedStrategy(firstChange); + s.append("\t\tassertIncremental(xml, UpdateStrategy.").append(expectedStrategy).append(", changes);\n"); + + s.append("\t}\n"); + + return s.toString(); + } + + /** + * Save the generated test to a file based on the document URI. + * + * @param documentURI the URI of the document being edited + * @param testCode the generated test code + */ + private void saveTestToFile(String documentURI, String testCode) { + try { + // Create output directory if it doesn't exist + Path outputDir = Paths.get(TEST_OUTPUT_DIR); + if (!Files.exists(outputDir)) { + Files.createDirectories(outputDir); + } + + // Generate filename from document URI + String filename = generateFilename(documentURI); + Path outputFile = outputDir.resolve(filename); + + // Check if file exists and append or create + if (Files.exists(outputFile)) { + // Append to existing file + Files.writeString(outputFile, "\n" + testCode, StandardOpenOption.APPEND); + LOGGER.log(Level.INFO, "Appended test to: " + outputFile); + } else { + // Create new file with class structure + String fullContent = generateTestClass(filename, testCode); + Files.writeString(outputFile, fullContent, StandardOpenOption.CREATE); + LOGGER.log(Level.INFO, "Created test file: " + outputFile); + } + } catch (IOException e) { + LOGGER.log(Level.SEVERE, "Error saving test to file", e); + } + } + + /** + * Generate a filename from the document URI. + */ + private String generateFilename(String documentURI) { + if (documentURI == null || documentURI.isEmpty()) { + return "GeneratedIncrementalTest.java"; + } + + // Extract filename from URI + String filename = documentURI; + int lastSlash = filename.lastIndexOf('/'); + if (lastSlash >= 0) { + filename = filename.substring(lastSlash + 1); + } + + // Remove extension and sanitize + int lastDot = filename.lastIndexOf('.'); + if (lastDot > 0) { + filename = filename.substring(0, lastDot); + } + + // Convert to valid Java class name + filename = filename.replaceAll("[^a-zA-Z0-9]", "_"); + if (!filename.isEmpty() && Character.isDigit(filename.charAt(0))) { + filename = "Test_" + filename; + } + + return "Generated_" + filename + "_Test.java"; + } + + /** + * Generate a complete test class with the given test method. + */ + private String generateTestClass(String filename, String testCode) { + String className = filename.replace(".java", ""); + StringBuilder s = new StringBuilder(); + + s.append("package org.eclipse.lemminx.dom.generated;\n\n"); + s.append("import static org.eclipse.lemminx.dom.IncrementalParsingTestUtils.e;\n"); + s.append("import static org.eclipse.lemminx.XMLIncrementalParserAssert.assertIncremental;\n\n"); + s.append("import java.util.List;\n\n"); + s.append("import org.eclipse.lemminx.dom.IncrementalDOMParser.UpdateStrategy;\n"); + s.append("import org.eclipse.lsp4j.TextDocumentContentChangeEvent;\n"); + s.append("import org.junit.jupiter.api.Test;\n\n"); + s.append("/**\n"); + s.append(" * Auto-generated test class for incremental DOM parsing.\n"); + s.append(" * Generated from user editing scenarios.\n"); + s.append(" */\n"); + s.append("public class ").append(className).append(" {\n\n"); + s.append(testCode); + s.append("\n}\n"); + + return s.toString(); + } + + /** + * Format XML string for Java code with proper escaping and line breaks. + */ + private String formatXmlString(String xml) { + if (xml == null || xml.isEmpty()) { + return "\"\""; + } + + String[] lines = xml.split("\n"); + if (lines.length == 1) { + return "\"" + escapeJavaString(xml) + "\""; + } + + StringBuilder result = new StringBuilder(); + for (int i = 0; i < lines.length; i++) { + if (i > 0) { + result.append(" + //\n\t\t\t\t"); + } + result.append("\"").append(escapeJavaString(lines[i])); + if (i < lines.length - 1) { + result.append("\\n"); + } + result.append("\""); + } + return result.toString(); + } + + /** + * Format a string for Java code with proper escaping. + */ + private String formatString(String str) { + if (str == null) { + return "\"\""; + } + return "\"" + escapeJavaString(str) + "\""; + } + + /** + * Escape special characters in a string for Java code. + */ + private String escapeJavaString(String str) { + if (str == null) { + return ""; + } + return str.replace("\\", "\\\\") + .replace("\"", "\\\"") + .replace("\n", "\\n") + .replace("\r", "\\r") + .replace("\t", "\\t"); + } + + /** + * Generate a human-readable description of the change. + */ + private String describeChange(TextDocumentChange change, String oldText) { + String text = change.getText(); + int oldLength = change.getOldLength(); + + if (oldLength == 0) { + // Insertion + if (text.contains("<") && text.contains(">")) { + return "Insert element/tag"; + } + return "Insert text: " + truncate(text, 30); + } else if (text.isEmpty()) { + // Deletion + String deleted = oldText.substring(change.getStartOffset(), + Math.min(change.getStartOffset() + oldLength, oldText.length())); + return "Delete: " + truncate(deleted, 30); + } else { + // Replacement + String oldContent = oldText.substring(change.getStartOffset(), + Math.min(change.getStartOffset() + oldLength, oldText.length())); + return "Replace '" + truncate(oldContent, 20) + "' with '" + truncate(text, 20) + "'"; + } + } + + /** + * Truncate a string to a maximum length with ellipsis. + */ + private String truncate(String str, int maxLength) { + if (str == null || str.length() <= maxLength) { + return str; + } + return str.substring(0, maxLength) + "..."; + } + + /** + * Determine the expected update strategy based on the change. + */ + private UpdateStrategy determineExpectedStrategy(TextDocumentChange change) { + String text = change.getText(); + + // Check for structural changes + if (text.contains("<") || text.contains(">")) { + if (text.contains("")) { + return UpdateStrategy.SUBTREE; + } + return UpdateStrategy.FULL; + } + + // Simple text change + if (change.getOldLength() == text.length()) { + return UpdateStrategy.TEXT; + } + + // Default to FULL for safety + return UpdateStrategy.FULL; + } +} diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/settings/XMLGeneralClientSettings.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/settings/XMLGeneralClientSettings.java index bd513d404..c7174bd96 100644 --- a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/settings/XMLGeneralClientSettings.java +++ b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/settings/XMLGeneralClientSettings.java @@ -17,8 +17,8 @@ /** * Class to hold all settings from the client side. * - * See https://github.com/eclipse/lemminx/blob/main/docs/Configuration.md for more - * information. + * See https://github.com/eclipse/lemminx/blob/main/docs/Configuration.md for + * more information. * * This class is created through the deseralization of a JSON object. Each * internal setting must be represented by a class and have: @@ -54,7 +54,9 @@ public class XMLGeneralClientSettings { private XMLTelemetrySettings telemetry; private boolean linkedEditingEnabled; - + + private XMLIncrementalParserSettings incrementalParser; + public void setLogs(LogsSettings logs) { this.logs = logs; } @@ -146,11 +148,11 @@ public XMLCompletionSettings getCompletion() { public void setLinkedEditingEnabled(boolean linkedEditingEnabled) { this.linkedEditingEnabled = linkedEditingEnabled; } - + public boolean isLinkedEditingEnabled() { return linkedEditingEnabled; } - + /** * Returns the XML preferences * @@ -205,15 +207,24 @@ public void setTelemetry(XMLTelemetrySettings telemetry) { this.telemetry = telemetry; } + public void setIncrementalParser(XMLIncrementalParserSettings incrementalParser) { + this.incrementalParser = incrementalParser; + } + + public XMLIncrementalParserSettings getIncrementalParser() { + return incrementalParser; + } + /** - * Returns a new instance of XMLGeneralClientSettings - * with contents from initializationOptionsSettings + * Returns a new instance of XMLGeneralClientSettings with contents + * from initializationOptionsSettings * * @param initializationOptionsSettings - * @return a new instance of XMLGeneralClientSettings - * with contents from initializationOptionsSettings + * @return a new instance of XMLGeneralClientSettings with contents + * from initializationOptionsSettings */ public static XMLGeneralClientSettings getGeneralXMLSettings(Object initializationOptionsSettings) { return JSONUtility.toModel(initializationOptionsSettings, XMLGeneralClientSettings.class); } + } diff --git a/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/settings/XMLIncrementalParserSettings.java b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/settings/XMLIncrementalParserSettings.java new file mode 100644 index 000000000..b5776ee22 --- /dev/null +++ b/org.eclipse.lemminx/src/main/java/org/eclipse/lemminx/settings/XMLIncrementalParserSettings.java @@ -0,0 +1,20 @@ +package org.eclipse.lemminx.settings; + +public class XMLIncrementalParserSettings { + + private boolean enabled = false; + + private String generateTestWhen; + + public boolean isEnabled() { + return enabled; + } + + public void setEnabled(boolean enabled) { + this.enabled = enabled; + } + + public String getGenerateTestWhen() { + return generateTestWhen; + } +} diff --git a/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/XMLIncrementalParserAssert.java b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/XMLIncrementalParserAssert.java new file mode 100644 index 000000000..b0ceda130 --- /dev/null +++ b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/XMLIncrementalParserAssert.java @@ -0,0 +1,52 @@ +package org.eclipse.lemminx; + +import static org.eclipse.lemminx.XMLAssert.r; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.Collections; +import java.util.List; + +import org.eclipse.lemminx.commons.TextDocumentChange; +import org.eclipse.lemminx.dom.DOMDocument; +import org.eclipse.lemminx.dom.DOMParser; +import org.eclipse.lemminx.dom.IncrementalDOMParser; +import org.eclipse.lemminx.dom.IncrementalDOMParser.UpdateStrategy; +import org.eclipse.lsp4j.TextDocumentContentChangeEvent; + +public class XMLIncrementalParserAssert { + + public static DOMDocument assertIncremental(String xml, UpdateStrategy expectedStrategy, + List changes) { + DOMDocument document = DOMParser.getInstance().parse(xml, "test.xml", null); + document.getTextDocument().setIncremental(true); + + IncrementalDOMParser parser = IncrementalDOMParser.getInstance(); + List documentChanges = document.getTextDocument().update(changes); + UpdateStrategy strategy = parser.parseIncremental(document, documentChanges); + + assertEquals(expectedStrategy, strategy); + assertDOMEquals(document); + return document; + } + + /** + * Verify that incremental parsing produces the same DOM as full parsing + */ + public static void assertDOMEquals(DOMDocument incrementalDocument) { + DOMDocument fullDocument = DOMParser.getInstance().parse(incrementalDocument.getTextDocument(), null, true, + null); + assertEquals(fullDocument.toString(), incrementalDocument.toString(), + "Incremental DOM should match full parse DOM"); + } + + public static List e(int line, int startCharacter, int endCharacter, String text) { + return e(line, startCharacter, line, endCharacter, text); + } + + public static List e(int startLine, int startCharacter, int endLine, + int endCharacter, String text) { + TextDocumentContentChangeEvent event = new TextDocumentContentChangeEvent( + r(startLine, startCharacter, endLine, endCharacter), text); + return Collections.singletonList(event); + } +} diff --git a/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserAttributeTest.java b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserAttributeTest.java new file mode 100644 index 000000000..298de26e9 --- /dev/null +++ b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserAttributeTest.java @@ -0,0 +1,72 @@ +package org.eclipse.lemminx.dom; + +import static org.eclipse.lemminx.XMLIncrementalParserAssert.assertIncremental; +import static org.eclipse.lemminx.XMLIncrementalParserAssert.e; + +import java.util.List; + +import org.eclipse.lemminx.dom.IncrementalDOMParser.UpdateStrategy; +import org.eclipse.lsp4j.TextDocumentContentChangeEvent; +import org.junit.jupiter.api.Test; + +/** + * Tests for attribute modifications + */ +public class IncrementalDOMParserAttributeTest { + + @Test + public void replaceAttributeValue() { + String xml = "
\r\n" + // + " Text\r\n" + // + "
"; + + // Replace id="A" with id="B" + List changes = e(1, 14, 15, "B"); + assertIncremental(xml, UpdateStrategy.ATTR, changes); + } + + @Test + public void insertInAttributeValue() { + String xml = "
\r\n" + // + " Text\r\n" + // + "
"; + + // Insert "0" after "item1" -> "item10" + List changes = e(1, 19, 19, "0"); + assertIncremental(xml, UpdateStrategy.ATTR, changes); + } + + @Test + public void removeFromAttributeValue() { + String xml = "
\r\n" + // + " Text\r\n" + // + "
"; + + // Remove "Value" from "TestValue" -> "Test" + List changes = e(1, 20, 25, ""); + assertIncremental(xml, UpdateStrategy.ATTR, changes); + } + + @Test + public void multipleAttributes() { + String xml = "
\r\n" + // + " Text\r\n" + // + "
"; + + // Modify middle attribute "Test" -> "Updated" + List changes = e(1, 23, 27, "Updated"); + assertIncremental(xml, UpdateStrategy.ATTR, changes); + } + + @Test + public void replaceEntireAttributeValue() { + String xml = "
\r\n" + // + " Text\r\n" + // + "
"; + + // Replace entire value "old-class" -> "new-class" + List changes = e(1, 17, 26, "new-class"); + assertIncremental(xml, UpdateStrategy.ATTR, changes); + } + +} \ No newline at end of file diff --git a/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserEdgeCasesTest.java b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserEdgeCasesTest.java new file mode 100644 index 000000000..c3fbcaf2e --- /dev/null +++ b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserEdgeCasesTest.java @@ -0,0 +1,163 @@ +package org.eclipse.lemminx.dom; + +import static org.eclipse.lemminx.XMLIncrementalParserAssert.assertIncremental; +import static org.eclipse.lemminx.XMLIncrementalParserAssert.e; + +import java.util.List; + +import org.eclipse.lemminx.dom.IncrementalDOMParser.UpdateStrategy; +import org.eclipse.lsp4j.TextDocumentContentChangeEvent; +import org.junit.jupiter.api.Test; + +/** + * Tests for edge cases in incremental parsing + */ +public class IncrementalDOMParserEdgeCasesTest { + + @Test + public void testModificationAtElementBoundary() { + // Modification right at the boundary of an element + String xml = "\n" + // + " Text\n" + // + ""; + + // Insert right after > + List changes = e(1, 8, 8, "New"); + assertIncremental(xml, UpdateStrategy.TEXT, changes); + } + + @Test + public void testModificationInEmptyElement() { + // Insert text in an empty element + String xml = "\n" + // + " \n" + // + ""; + + // Insert text in empty element + List changes = e(1, 8, 8, "Content"); + assertIncremental(xml, UpdateStrategy.TEXT, changes); + } + + @Test + public void testDeeplyNestedModification() { + // Modification in deeply nested structure + StringBuilder xml = new StringBuilder("\n"); + for (int i = 1; i <= 10; i++) { + xml.append(" ".repeat(i)).append("\n"); + } + xml.append(" ".repeat(11)).append("Text\n"); + for (int i = 10; i >= 1; i--) { + xml.append(" ".repeat(i)).append("\n"); + } + xml.append(""); + + // Modify the deeply nested text + List changes = e(10, 22, 26, "Modified"); + assertIncremental(xml.toString(), UpdateStrategy.FULL, changes); + } + + @Test + public void testLargeSubtreeThreshold() { + // Test that very large subtrees fall back to FULL parse + StringBuilder xml = new StringBuilder("\n"); + + // Create a large subtree (> 100KB) + for (int i = 0; i < 5000; i++) { + xml.append(" This is item number ").append(i).append("\n"); + } + xml.append(""); + + // Modify first item + List changes = e(1, 8, 29, "Modified item"); + assertIncremental(xml.toString(), UpdateStrategy.TEXT, changes); + } + + @Test + public void testModificationInPreserveSpace() { + // Modification in element with xml:space="preserve" + String xml = "\n" + // + " Text \n" + // + ""; + + // Modify text (spaces should be preserved) + // Line 1, char 31-35 is "Text" (not 34-38 which would include the closing tag) + List changes = e(1, 31, 35, "More"); + assertIncremental(xml.toString(), UpdateStrategy.TEXT, changes); + } + + @Test + public void testMultipleSiblings() { + // Modification with many siblings + StringBuilder xml = new StringBuilder("\n"); + for (int i = 0; i < 100; i++) { + xml.append(" ").append(i).append("\n"); + } + xml.append(""); + + // Modify item in the middle + List changes = e(50, 8, 10, "Modified"); + assertIncremental(xml.toString(), UpdateStrategy.TEXT, changes); + } + + @Test + public void testModificationWithEntities() { + // Modification involving HTML entities + String xml = "\n" + // + " Text\n" + // + ""; + + // Add entity + List changes = e(1, 12, 12, " & More"); + assertIncremental(xml.toString(), UpdateStrategy.TEXT, changes); + } + + @Test + public void testModificationInAttributeWithQuotes() { + // Modification in attribute value containing quotes + String xml = "\n" + // + " Text\n" + // + ""; + + // Modify attribute value + List changes = e(1, 15, 19, "New Value"); + assertIncremental(xml.toString(), UpdateStrategy.ATTR, changes); + } + + @Test + public void testEmptyDocument() { + // Start with empty document + String xml = ""; + + // Add root element + List changes = e(0, 0, 0, ""); + assertIncremental(xml, UpdateStrategy.SUBTREE, changes); + } + + @Test + public void testSingleCharacterChange() { + // Change a single character + String xml = "\n" + // + " A\n" + // + ""; + + // Change A to B + List changes = e(1, 8, 9, "B"); + assertIncremental(xml.toString(), UpdateStrategy.TEXT, changes); + } + + @Test + public void testInsertLessThanInElementWithIndentation() { + // Test inserting '<' in the middle of an element with indentation + // This is a critical case: inserting '<' creates invalid XML that needs proper + // handling + String xml = "\n" + // + " \n" + // + ""; + + // Insert '<' in the middle of the whitespace (after indentation) + // Line 1, position 4 is right after the 4 spaces of indentation + List changes = e(1, 4, 4, "<"); + assertIncremental(xml.toString(), UpdateStrategy.SUBTREE, changes); + } + +} diff --git a/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserFallbackTest.java b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserFallbackTest.java new file mode 100644 index 000000000..ade48f59a --- /dev/null +++ b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserFallbackTest.java @@ -0,0 +1,71 @@ +package org.eclipse.lemminx.dom; + +import static org.eclipse.lemminx.XMLIncrementalParserAssert.assertIncremental; +import static org.eclipse.lemminx.XMLIncrementalParserAssert.e; + +import java.util.List; + +import org.eclipse.lemminx.dom.IncrementalDOMParser.UpdateStrategy; +import org.eclipse.lsp4j.TextDocumentContentChangeEvent; +import org.junit.jupiter.api.Test; + +/** + * Tests that trigger full parse fallback + */ +public class IncrementalDOMParserFallbackTest { + + @Test + public void insertTag_shouldTriggerFullParse() { + String xml = "
\r\n" + // + " A\r\n" + // + "
"; + + // Insert a new tag C + List changes = e(1, 11, 11, "C"); + assertIncremental(xml, UpdateStrategy.SUBTREE, changes); + } + + @Test + public void insertOpeningBracket_shouldTriggerFullParse() { + String xml = "
\r\n" + // + " Text\r\n" + // + "
"; + + // Insert < in text + List changes = e(1, 12, 12, "<"); + assertIncremental(xml, UpdateStrategy.SUBTREE, changes); + } + + @Test + public void insertClosingBracket_shouldTriggerFullParse() { + String xml = "
\r\n" + // + " Text\r\n" + // + "
"; + + // Insert > in text + List changes = e(1, 12, 12, ">"); + assertIncremental(xml, UpdateStrategy.SUBTREE, changes); + } + + @Test + public void modifyTagName_shouldTriggerFullParse() { + String xml = "
\r\n" + // + " Text\r\n" + // + "
"; + + // Modify tag name "item" -> "items" + List changes = e(1, 5, 9, "items"); + assertIncremental(xml.toString(), UpdateStrategy.SUBTREE, changes); + } + + @Test + public void insertBothBrackets_shouldTriggerFullParse() { + String xml = "
\r\n" + // + " Text\r\n" + // + "
"; + + // Insert <> in text + List changes = e(1, 12, 12, "<>"); + assertIncremental(xml.toString(), UpdateStrategy.SUBTREE, changes); + } +} \ No newline at end of file diff --git a/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserPerformanceImprovedTest.java b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserPerformanceImprovedTest.java new file mode 100644 index 000000000..a3a4ccb68 --- /dev/null +++ b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserPerformanceImprovedTest.java @@ -0,0 +1,249 @@ +package org.eclipse.lemminx.dom; + +import static org.eclipse.lemminx.XMLIncrementalParserAssert.assertDOMEquals; +import static org.eclipse.lemminx.XMLIncrementalParserAssert.e; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.List; + +import org.eclipse.lemminx.commons.TextDocumentChange; +import org.eclipse.lemminx.dom.IncrementalDOMParser.UpdateStrategy; +import org.eclipse.lsp4j.TextDocumentContentChangeEvent; +import org.junit.jupiter.api.Test; + +/** + * Performance tests for improved incremental parsing with SUBTREE strategy + */ +public class IncrementalDOMParserPerformanceImprovedTest { + + @Test + public void testSubtreePerformance_mediumFile() { + // Test SUBTREE performance on medium file (1000 items) + int nbItems = 1000; + StringBuilder xml = new StringBuilder("\n"); + for (int i = 0; i < nbItems; i++) { + xml.append(" Text").append(i).append("\n"); + } + xml.append(""); + + DOMDocument document = DOMParser.getInstance().parse(xml.toString(), "test.xml", null); + document.getTextDocument().setIncremental(true); + + IncrementalDOMParser parser = IncrementalDOMParser.getInstance(); + + // Modify an item in the middle + List changes = e(500, 8, 15, "Modified"); + List documentChanges = document.getTextDocument().update(changes); + + long start = System.nanoTime(); + UpdateStrategy strategy = parser.parseIncremental(document, documentChanges); + long duration = (System.nanoTime() - start) / 1_000_000; + + System.out.println("=== Medium File (1000 items) ==="); + System.out.println("Strategy: " + strategy); + System.out.println("Incremental parse: " + duration + "ms"); + + // Should use TEXT and be fast + assertEquals(UpdateStrategy.TEXT, strategy); + assertTrue(duration < 100, "TEXT parsing should be < 100ms, was " + duration + "ms"); + + assertDOMEquals(document); + } + + @Test + public void testSubtreePerformance_largeFile() { + // Test SUBTREE performance on large file (10000 items) + int nbItems = 10000; + StringBuilder xml = new StringBuilder("\n"); + for (int i = 0; i < nbItems; i++) { + xml.append(" Text").append(i).append("\n"); + } + xml.append(""); + + DOMDocument document = DOMParser.getInstance().parse(xml.toString(), "test.xml", null); + document.getTextDocument().setIncremental(true); + + IncrementalDOMParser parser = IncrementalDOMParser.getInstance(); + + // Modify an item in the middle + List changes = e(5000, 8, 16, "Modified"); + List documentChanges = document.getTextDocument().update(changes); + + long start = System.nanoTime(); + UpdateStrategy strategy = parser.parseIncremental(document, documentChanges); + long duration = (System.nanoTime() - start) / 1_000_000; + + System.out.println("=== Large File (10000 items) ==="); + System.out.println("Strategy: " + strategy); + System.out.println("Incremental parse: " + duration + "ms"); + + // Should use FULL because root is too large, but still reasonable time + assertTrue(duration < 2000, "Parsing should be < 2s, was " + duration + "ms"); + + assertDOMEquals(document); + } + + @Test + public void testSubtreeVsFullParse_comparison() { + // Compare SUBTREE vs FULL parse performance + int nbItems = 5000; + StringBuilder xml = new StringBuilder("\n"); + for (int i = 0; i < nbItems; i++) { + xml.append(" Text").append(i).append("\n"); + } + xml.append(""); + + // Incremental parse + DOMDocument incrementalDoc = DOMParser.getInstance().parse(xml.toString(), "test.xml", null); + incrementalDoc.getTextDocument().setIncremental(true); + + List changes = e(2500, 8, 16, "Modified"); + List documentChanges = incrementalDoc.getTextDocument().update(changes); + + long startIncremental = System.nanoTime(); + UpdateStrategy strategy = IncrementalDOMParser.getInstance().parseIncremental(incrementalDoc, documentChanges); + long incrementalTime = (System.nanoTime() - startIncremental) / 1_000_000; + + // Full parse + long startFull = System.nanoTime(); + DOMDocument fullDoc = DOMParser.getInstance().parse(incrementalDoc.getText(), "test.xml", null); + long fullTime = (System.nanoTime() - startFull) / 1_000_000; + + System.out.println("=== SUBTREE vs FULL Comparison (5000 items) ==="); + System.out.println("Strategy: " + strategy); + System.out.println("Incremental parse: " + incrementalTime + "ms"); + System.out.println("Full parse: " + fullTime + "ms"); + System.out.println("Speedup: " + (fullTime / (double) incrementalTime) + "x"); + + // Incremental should be faster (unless it falls back to FULL) + if (strategy != UpdateStrategy.FULL) { + assertTrue(incrementalTime < fullTime, + "Incremental (" + incrementalTime + "ms) should be faster than full (" + fullTime + "ms)"); + } + + assertEquals(fullDoc.toString(), incrementalDoc.toString()); + } + + @Test + public void testNestedSubtreePerformance() { + // Test SUBTREE performance with nested structure + StringBuilder xml = new StringBuilder("\n"); + for (int i = 0; i < 100; i++) { + xml.append("
\n"); + for (int j = 0; j < 10; j++) { + xml.append(" Text").append(i).append("-").append(j).append("\n"); + } + xml.append("
\n"); + } + xml.append("
"); + + DOMDocument document = DOMParser.getInstance().parse(xml.toString(), "test.xml", null); + document.getTextDocument().setIncremental(true); + + IncrementalDOMParser parser = IncrementalDOMParser.getInstance(); + + // Modify an item in section 50, item 5 (line = 2 + 50*12 + 5 = 607) + // Replace "Text50-5" (positions 10-18) with "Modified" + List changes = e(607, 10, 18, "Modified"); + List documentChanges = document.getTextDocument().update(changes); + + long start = System.nanoTime(); + UpdateStrategy strategy = parser.parseIncremental(document, documentChanges); + long duration = (System.nanoTime() - start) / 1_000_000; + + System.out.println("=== Nested Structure (100 sections x 10 items) ==="); + System.out.println("Strategy: " + strategy); + System.out.println("Incremental parse: " + duration + "ms"); + + // Should use SUBTREE on the section + assertEquals(UpdateStrategy.TEXT, strategy); + assertTrue(duration < 50, "TEXT parsing should be < 50ms, was " + duration + "ms"); + + assertDOMEquals(document); + } + + @Test + public void testRapidSequentialChanges() { + // Simulate rapid typing with sequential changes + String xml = "\n" + " \n" + ""; + + DOMDocument document = DOMParser.getInstance().parse(xml, "test.xml", null); + document.getTextDocument().setIncremental(true); + + IncrementalDOMParser parser = IncrementalDOMParser.getInstance(); + + String textToType = "Hello World"; + long totalTime = 0; + int subtreeCount = 0; + int textCount = 0; + + for (int i = 0; i < textToType.length(); i++) { + char c = textToType.charAt(i); + long start = System.nanoTime(); + + List changes = e(1, 8 + i, 8 + i, String.valueOf(c)); + List documentChanges = document.getTextDocument().update(changes); + + UpdateStrategy strategy = parser.parseIncremental(document, documentChanges); + + totalTime += System.nanoTime() - start; + + if (strategy == UpdateStrategy.SUBTREE) { + subtreeCount++; + } else if (strategy == UpdateStrategy.TEXT) { + textCount++; + } + } + + long avgTime = (totalTime / textToType.length()) / 1_000_000; + + System.out.println("=== Rapid Sequential Changes ==="); + System.out.println("Total time for " + textToType.length() + " characters: " + totalTime / 1_000_000 + "ms"); + System.out.println("Average per character: " + avgTime + "ms"); + System.out.println("TEXT strategy: " + textCount + " times"); + System.out.println("SUBTREE strategy: " + subtreeCount + " times"); + + // Average should be very fast + assertTrue(avgTime < 10, "Average per character should be < 10ms, was " + avgTime + "ms"); + + assertDOMEquals(document); + } + + @Test + public void testAddMultipleElements_performance() { + // Test performance when adding multiple elements + String xml = "\n" + " A\n" + ""; + + DOMDocument document = DOMParser.getInstance().parse(xml, "test.xml", null); + document.getTextDocument().setIncremental(true); + + IncrementalDOMParser parser = IncrementalDOMParser.getInstance(); + + // Add 100 new items at once + StringBuilder newItems = new StringBuilder(); + for (int i = 0; i < 100; i++) { + newItems.append("\n ").append(i).append(""); + } + + // Insert after line 1 (after " A\n"), which is at line 2, + // character 0 + List changes = e(2, 0, 2, 0, newItems.toString()); + List documentChanges = document.getTextDocument().update(changes); + + long start = System.nanoTime(); + UpdateStrategy strategy = parser.parseIncremental(document, documentChanges); + long duration = (System.nanoTime() - start) / 1_000_000; + + System.out.println("=== Add 100 Elements ==="); + System.out.println("Strategy: " + strategy); + System.out.println("Incremental parse: " + duration + "ms"); + + // Should use SUBTREE + assertEquals(UpdateStrategy.SUBTREE, strategy); + assertTrue(duration < 100, "SUBTREE parsing should be < 100ms, was " + duration + "ms"); + + assertDOMEquals(document); + } + +} diff --git a/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserPerformanceTest.java b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserPerformanceTest.java new file mode 100644 index 000000000..9c415f4b2 --- /dev/null +++ b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserPerformanceTest.java @@ -0,0 +1,182 @@ +package org.eclipse.lemminx.dom; + +import static org.eclipse.lemminx.XMLAssert.r; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Collections; +import java.util.List; + +import org.eclipse.lemminx.commons.TextDocumentChange; +import org.eclipse.lemminx.dom.IncrementalDOMParser.UpdateStrategy; +import org.eclipse.lsp4j.TextDocumentContentChangeEvent; +import org.junit.jupiter.api.Test; + +/** + * Performance tests to measure incremental parsing efficiency + */ +public class IncrementalDOMParserPerformanceTest { + + @Test + public void performanceComparison_smallDocument() { + // Document with 100 items + StringBuilder xml = new StringBuilder("\r\n"); + for (int i = 0; i < 100; i++) { + xml.append(" Text").append(i).append("\r\n"); + } + xml.append(""); + + DOMDocument document = DOMParser.getInstance().parse(xml.toString(), "test.xml", null); + document.getTextDocument().setIncremental(true); + + IncrementalDOMParser incrementalParser = IncrementalDOMParser.getInstance(); + + // Measure: Incremental parse + long start = System.nanoTime(); + List changes = e(1, 10, 14, "Updated"); + List documentChanges = document.getTextDocument().update(changes); + + incrementalParser.parseIncremental(document, documentChanges); + long incrementalTime = System.nanoTime() - start; + + // Measure: Full parse + start = System.nanoTime(); + DOMDocument fullDoc = DOMParser.getInstance().parse(document.getText(), "test.xml", null); + long fullTime = System.nanoTime() - start; + + System.out.println("=== Small Document (100 items) ==="); + System.out.println("Incremental parse: " + incrementalTime / 1_000_000 + "ms"); + System.out.println("Full parse: " + fullTime / 1_000_000 + "ms"); + System.out.println("Speedup: " + (fullTime / (double) incrementalTime) + "x"); + + assertEquals(fullDoc.toString(), document.toString()); + + // Incremental should be faster + assertTrue(incrementalTime < fullTime, "Incremental parse should be faster than full parse"); + } + + @Test + public void performanceComparison_largeDocument() { + int nbItems = 1000000; + // Document with a lot of items + StringBuilder xml = new StringBuilder("\r\n"); + for (int i = 0; i < nbItems; i++) { + xml.append(" Text").append(i).append("\r\n"); + } + xml.append(""); + + DOMDocument document = DOMParser.getInstance().parse(xml.toString(), "test.xml", null); + document.getTextDocument().setIncremental(true); + + IncrementalDOMParser incrementalParser = IncrementalDOMParser.getInstance(); + + List changes = e(1, 10, 14, "Updated"); + List documentChanges = document.getTextDocument().update(changes); + + // Measure: Full parse + long start = System.nanoTime(); + DOMDocument fullDoc = DOMParser.getInstance().parse(document.getText(), "test.xml", null); + long fullTime = System.nanoTime() - start; + + // Measure: Incremental parse + start = System.nanoTime(); + UpdateStrategy strategy = incrementalParser.parseIncremental(document, documentChanges); + long incrementalTime = System.nanoTime() - start; + + System.out.println("=== Large Document (" + nbItems + " items) ==="); + System.out.println("Incremental parse: " + incrementalTime / 1_000_000 + "ms"); + System.out.println("Full parse: " + fullTime / 1_000_000 + "ms"); + System.out.println("Speedup: " + (fullTime / (double) incrementalTime) + "x"); + + assertEquals(strategy, UpdateStrategy.TEXT); + assertEquals(fullDoc.toString(), document.toString()); + + // Incremental should be significantly faster on large documents + assertTrue(incrementalTime < fullTime, "Incremental parse should be faster than full parse"); + } + + @Test + public void multipleSequentialChanges() { + // Load XML + DOMDocument document = DOMParser.getInstance().parse( + "
\r\n" + " A\r\n" + " B\r\n" + "
", "test.xml", null); + document.getTextDocument().setIncremental(true); + + IncrementalDOMParser parser = IncrementalDOMParser.getInstance(); + + long totalTime = 0; + + // Change 1: Insert C after A + long start = System.nanoTime(); + List changes1 = e(1, 11, 11, "C"); + List documentChanges1 = document.getTextDocument().update(changes1); + parser.parseIncremental(document, documentChanges1); + totalTime += System.nanoTime() - start; + + // Change 2: Insert D after B + start = System.nanoTime(); + List changes2 = e(2, 11, 11, "D"); + List documentChanges2 = document.getTextDocument().update(changes2); + parser.parseIncremental(document, documentChanges2); + totalTime += System.nanoTime() - start; + + // Change 3: Remove C + start = System.nanoTime(); + List changes3 = e(1, 11, 12, ""); + List documentChanges3 = document.getTextDocument().update(changes3); + + parser.parseIncremental(document, documentChanges3); + totalTime += System.nanoTime() - start; + + System.out.println("=== Multiple Sequential Changes ==="); + System.out.println("Total incremental time: " + totalTime / 1_000_000 + "ms"); + System.out.println("Average per change: " + (totalTime / 3) / 1_000_000 + "ms"); + + // Verify final result + DOMDocument fullDoc = DOMParser.getInstance().parse(document.getText(), "test.xml", null); + assertEquals(fullDoc.toString(), document.toString()); + } + + @Test + public void rapidTyping_simulation() { + // Simulate user typing "Hello World" character by character + DOMDocument document = DOMParser.getInstance().parse("
\r\n" + " \r\n" + "
", + "test.xml", null); + document.getTextDocument().setIncremental(true); + + IncrementalDOMParser parser = IncrementalDOMParser.getInstance(); + + String textToType = "Hello World"; + long totalTime = 0; + + for (int i = 0; i < textToType.length(); i++) { + char c = textToType.charAt(i); + long start = System.nanoTime(); + + List changes = e(1, 10 + i, 10 + i, String.valueOf(c)); + List documentChanges = document.getTextDocument().update(changes); + parser.parseIncremental(document, documentChanges); + + totalTime += System.nanoTime() - start; + } + + System.out.println("=== Rapid Typing Simulation ==="); + System.out.println("Total time for " + textToType.length() + " characters: " + totalTime / 1_000_000 + "ms"); + System.out.println("Average per character: " + (totalTime / textToType.length()) / 1_000_000 + "ms"); + + // Verify final result + DOMDocument fullDoc = DOMParser.getInstance().parse(document.getText(), "test.xml", null); + assertEquals(fullDoc.toString(), document.toString()); + } + + private static List e(int line, int startCharacter, int endCharacter, String text) { + return e(line, startCharacter, line, endCharacter, text); + } + + private static List e(int startLine, int startCharacter, int endLine, + int endCharacter, String text) { + TextDocumentContentChangeEvent event = new TextDocumentContentChangeEvent( + r(startLine, startCharacter, endLine, endCharacter), text); + return Collections.singletonList(event); + } +} \ No newline at end of file diff --git a/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserStructureTest.java b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserStructureTest.java new file mode 100644 index 000000000..3192a5d99 --- /dev/null +++ b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserStructureTest.java @@ -0,0 +1,119 @@ +package org.eclipse.lemminx.dom; + +import static org.eclipse.lemminx.XMLIncrementalParserAssert.assertIncremental; +import static org.eclipse.lemminx.XMLIncrementalParserAssert.e; + +import java.util.List; + +import org.eclipse.lemminx.dom.IncrementalDOMParser.UpdateStrategy; +import org.eclipse.lsp4j.TextDocumentContentChangeEvent; +import org.junit.jupiter.api.Test; + +/** + * Tests for complex XML structures + */ +public class IncrementalDOMParserStructureTest { + + @Test + public void nestedElements() { + String xml = "\r\n" + // + " \r\n" + // + " Text\r\n" + // + " \r\n" + // + ""; + + // Modify text in nested + List changes = e(2, 15, 19, "Updated"); + assertIncremental(xml, UpdateStrategy.TEXT, changes); + } + + @Test + public void deeplyNestedElements() { + String xml = "\r\n" + // + " \r\n" + // + " \r\n" + // + " DeepText\r\n" + // + " \r\n" + // + " \r\n" + // + ""; + + // Modify deeply nested text + List changes = e(3, 20, 28, "Modified"); + assertIncremental(xml, UpdateStrategy.TEXT, changes); + } + + @Test + public void emptyElement() { + String xml = "
\r\n" + // + " \r\n" + // + "
"; + + // Insert text in empty element + List changes = e(1, 10, 10, "New"); + assertIncremental(xml, UpdateStrategy.TEXT, changes); + } + + @Test + public void selfClosingTag() { + String xml = "
\r\n" + // + " \r\n" + // + " B\r\n" + // + "
"; + + // Modify B in second item + List changes = e(2, 10, 11, "C"); + assertIncremental(xml, UpdateStrategy.TEXT, changes); + } + + @Test + public void multipleSiblings() { + String xml = "\r\n" + // + " A\r\n" + // + " B\r\n" + // + " C\r\n" + // + " D\r\n" + // + ""; + + // Modify B (second item) + List changes = e(2, 10, 11, "Modified"); + assertIncremental(xml, UpdateStrategy.TEXT, changes); + } + + @Test + public void firstSibling() { + String xml = "\r\n" + // + " A\r\n" + // + " B\r\n" + // + " C\r\n" + // + ""; + + // Modify first item + List changes = e(1, 10, 11, "First"); + assertIncremental(xml, UpdateStrategy.TEXT, changes); + } + + @Test + public void lastSibling() { + String xml = "\r\n" + // + " A\r\n" + // + " B\r\n" + // + " C\r\n" + // + ""; + + // Modify last item + List changes = e(3, 10, 11, "Last"); + assertIncremental(xml, UpdateStrategy.TEXT, changes); + } + + @Test + public void closeStartTag() { + String xml = "\r\n" + // + " "; + + // Insert text: > + List changes = e(1, 8, 8, ">"); + assertIncremental(xml, UpdateStrategy.SUBTREE, changes); + } + +} \ No newline at end of file diff --git a/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserSubtreeTest.java b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserSubtreeTest.java new file mode 100644 index 000000000..e978bdbce --- /dev/null +++ b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserSubtreeTest.java @@ -0,0 +1,179 @@ +package org.eclipse.lemminx.dom; + +import static org.eclipse.lemminx.XMLIncrementalParserAssert.assertIncremental; +import static org.eclipse.lemminx.XMLIncrementalParserAssert.e; + +import java.util.List; + +import org.eclipse.lemminx.dom.IncrementalDOMParser.UpdateStrategy; +import org.eclipse.lsp4j.TextDocumentContentChangeEvent; +import org.junit.jupiter.api.Test; + +/** + * Tests for SUBTREE strategy - re-parsing subtrees instead of full document + */ +public class IncrementalDOMParserSubtreeTest { + + @Test + public void testAddElement() { + // Add a new element inside a parent + String xml = "\n" + // + " A\n" + // + ""; + + // Add new element after first item (at start of line 2, before
) + List changes = e(2, 0, 2, 0, "\n B"); + assertIncremental(xml, UpdateStrategy.SUBTREE, changes); + } + + @Test + public void testRemoveElement() { + // Remove an element + String xml = "\n" + // + " A\n" + // + " B\n" + // + ""; + // Remove second item + List changes = e(2, 0, 3, 0, ""); + assertIncremental(xml, UpdateStrategy.SUBTREE, changes); + } + + @Test + public void testModifyTagName() { + // Modify a tag name + String xml = "\n" + // + " Text\n" + // + ""; + + // Change "item" to "items" in opening tag + List changes = e(1, 3, 7, "items"); + assertIncremental(xml, UpdateStrategy.FULL, changes); + } + + @Test + public void testAddAttribute() { + // Add a new attribute to an element + String xml = "\n" + // + " Text\n" + // + ""; + + // Add id attribute + List changes = e(1, 7, 7, " id=\"1\""); + assertIncremental(xml, UpdateStrategy.ATTR, changes); + } + + @Test + public void testRemoveAttribute() { + // Remove an attribute + String xml = "\n" + // + " Text\n" + // + ""; + + // Remove name attribute + List changes = e(1, 15, 28, ""); + assertIncremental(xml, UpdateStrategy.SUBTREE, changes); + } + + @Test + public void testTransformToSelfClosing() { + // Transform element to self-closing + String xml = "\n" + // + " \n" + // + ""; + + // Change to self-closing + List changes = e(1, 7, 15, "/>"); + assertIncremental(xml, UpdateStrategy.SUBTREE, changes); + } + + @Test + public void testTransformFromSelfClosing() { + // Transform self-closing to regular element + String xml = "\n" + // + " \n" + // + ""; + + // Change to regular element + List changes = e(1, 7, 9, ">"); + assertIncremental(xml, UpdateStrategy.SUBTREE, changes); + } + + @Test + public void testAddComment() { + // Add a comment + String xml = "\n" + // + " A\n" + // + ""; + + // Add comment before item + List changes = e(1, 2, 2, "\n "); + assertIncremental(xml, UpdateStrategy.SUBTREE, changes); + } + + @Test + public void testAddCDATA() { + // Add CDATA section + String xml = "\n" + // + " Text\n" + // + ""; + + // Replace text with CDATA + List changes = e(1, 8, 12, "]]>"); + assertIncremental(xml, UpdateStrategy.SUBTREE, changes); + } + + @Test + public void testMultiLineModification() { + // Add multiple elements at once + String xml = "\n" + // + " A\n" + // + ""; + + // Add multiple items + String newItems = "\n B\n C\n D"; + List changes = e(1, 16, 16, newItems); + assertIncremental(xml, UpdateStrategy.SUBTREE, changes); + } + + @Test + public void testCopyPasteBlock() { + // Copy-paste a whole section + String xml = "\n" + // + " \n" + // + " A\n" + // + " \n" + // + ""; + + // Paste a new section + String newSection = "\n \n B\n "; + List changes = e(3, 13, 13, newSection); + assertIncremental(xml, UpdateStrategy.SUBTREE, changes); + } + + @Test + public void testNestedElementModification() { + // Modify deeply nested element + String xml = "\n" + // + " \n" + // + " \n" + // + " Text\n" + // + " \n" + // + " \n" + // + ""; + + // Add element in level3 + List changes = e(3, 22, 22, "New"); + assertIncremental(xml, UpdateStrategy.SUBTREE, changes); + } + + @Test + public void testIndentationChange() { + // Reformat/indent XML + String xml = "A"; + + // Add newlines and indentation + List changes = e(0, 6, 6, "\n "); + assertIncremental(xml, UpdateStrategy.SUBTREE, changes); + } + +} \ No newline at end of file diff --git a/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserTest.java b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserTest.java deleted file mode 100644 index 722cec91e..000000000 --- a/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserTest.java +++ /dev/null @@ -1,109 +0,0 @@ -package org.eclipse.lemminx.dom; - -import static org.eclipse.lemminx.XMLAssert.r; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import java.util.Collections; -import java.util.List; - -import org.eclipse.lsp4j.TextDocumentContentChangeEvent; -import org.junit.jupiter.api.Test; - -public class IncrementalDOMParserTest { - - @Test - public void replaceText() { - // Load XML - DOMDocument document = DOMParser.getInstance().parse("
\r\n" + // - " A\r\n" + // - " B\r\n" + // - "
", "test.xml", null); - document.getTextDocument().setIncremental(true); - - IncrementalDOMParser parser = new IncrementalDOMParser(); - - // Replace A with B - List changes = e(1, 10, 11, "C"); - document.getTextDocument().update(changes); - document = parser.parseIncremental(document, changes); - - assertIncrementalParser(document); - } - - @Test - public void insertSimpleText() { - // Load XML - DOMDocument document = DOMParser.getInstance().parse("
\r\n" + // - " A\r\n" + // - " B\r\n" + // - "
", "test.xml", null); - document.getTextDocument().setIncremental(true); - - IncrementalDOMParser parser = new IncrementalDOMParser(); - - // Insert C after A - List changes = e(1, 11, 11, "C"); - document.getTextDocument().update(changes); - document = parser.parseIncremental(document, changes); - - assertIncrementalParser(document); - } - - @Test - public void insertNewLine() { - // Load XML - DOMDocument document = DOMParser.getInstance().parse("
\r\n" + // - " A\r\n" + // - " B\r\n" + // - "
", "test.xml", null); - document.getTextDocument().setIncremental(true); - - IncrementalDOMParser parser = new IncrementalDOMParser(); - - // Insert 2 lines after B - List changes = e(2, 11, 11, "\r\n\r\n \r\n "); - document.getTextDocument().update(changes); - document = parser.parseIncremental(document, changes); - - assertIncrementalParser(document); - } - - @Test - public void removeText() { - // Load XML - DOMDocument document = DOMParser.getInstance().parse("
\r\n" + // - " AC\r\n" + // - " B\r\n" + // - "
", "test.xml", null); - document.getTextDocument().setIncremental(true); - - IncrementalDOMParser parser = new IncrementalDOMParser(); - - // Insert C after A - List changes = e(1, 11, 12, ""); - document.getTextDocument().update(changes); - document = parser.parseIncremental(document, changes); - - assertIncrementalParser(document); - } - - private static void assertIncrementalParser(DOMDocument incrementalDocument) { - String xml = incrementalDocument.getText(); - System.err.println(xml); - DOMDocument document = DOMParser.getInstance().parse(xml, "test.xml", null); - - // Compare the string representation of both documents - assertEquals(document.toString(), incrementalDocument.toString()); - } - - private static List e(int line, int startCharacter, int endCharacter, String text) { - return e(line, startCharacter, line, endCharacter, text); - } - - private static List e(int startLine, int startCharacter, int endLine, - int endCharacter, String text) { - TextDocumentContentChangeEvent event = new TextDocumentContentChangeEvent( - r(startLine, startCharacter, endLine, endCharacter), text); - return Collections.singletonList(event); - } -} \ No newline at end of file diff --git a/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserTextChangeTest.java b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserTextChangeTest.java new file mode 100644 index 000000000..f5bf92822 --- /dev/null +++ b/org.eclipse.lemminx/src/test/java/org/eclipse/lemminx/dom/IncrementalDOMParserTextChangeTest.java @@ -0,0 +1,156 @@ +package org.eclipse.lemminx.dom; + +import static org.eclipse.lemminx.XMLIncrementalParserAssert.assertIncremental; +import static org.eclipse.lemminx.XMLIncrementalParserAssert.e; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.List; + +import org.eclipse.lemminx.dom.IncrementalDOMParser.UpdateStrategy; +import org.eclipse.lsp4j.TextDocumentContentChangeEvent; +import org.junit.jupiter.api.Test; + +/** + * Tests for simple text modifications (replace, insert, remove) + */ +public class IncrementalDOMParserTextChangeTest { + + @Test + public void replaceText() { + String xml = "
\r\n" + // + " A\r\n" + // + " B\r\n" + // + "
"; + + // Replace A with C + List changes = e(1, 10, 11, "C"); + assertIncremental(xml, UpdateStrategy.TEXT, changes); + } + + @Test + public void insertSimpleText() { + String xml = "
\r\n" + // + " A\r\n" + // + " B\r\n" + // + "
"; + + // Insert C after A + List changes = e(1, 11, 11, "C"); + assertIncremental(xml, UpdateStrategy.TEXT, changes); + } + + @Test + public void insertNewLine() { + String xml = "
\r\n" + // + " A\r\n" + // + " B\r\n" + // + "
"; + + // Insert 2 lines after B + List changes = e(2, 11, 11, "\r\n\r\n \r\n "); + assertIncremental(xml, UpdateStrategy.TEXT, changes); + } + + @Test + public void removeText() { + String xml = "
\r\n" + // + " AC\r\n" + // + " B\r\n" + // + "
"; + + // Remove C + List changes = e(1, 11, 12, ""); + assertIncremental(xml, UpdateStrategy.TEXT, changes); + } + + @Test + public void removeMultipleCharacters() { + String xml = "
\r\n" + // + " ABCDEF\r\n" + // + "
"; + + // Remove BCDE (keep A and F) + List changes = e(1, 11, 15, ""); + assertIncremental(xml, UpdateStrategy.TEXT, changes); + } + + @Test + public void insertAtBeginning() { + String xml = "
\r\n" + // + " Text\r\n" + // + "
"; + + // Insert "Start" at beginning + List changes = e(1, 10, 10, "Start"); + assertIncremental(xml, UpdateStrategy.TEXT, changes); + } + + @Test + public void insertAtEnd() { + String xml = "
\r\n" + // + " Text\r\n" + // + "
"; + + // Insert "End" at end + List changes = e(1, 14, 14, "End"); + assertIncremental(xml, UpdateStrategy.TEXT, changes); + } + + @Test + public void replaceAllText() { + String xml = "
\r\n" + // + " OldText\r\n" + // + "
"; + + // Replace all text "OldText" -> "NewText" + List changes = e(1, 10, 17, "NewText"); + assertIncremental(xml, UpdateStrategy.TEXT, changes); + } + + @Test + public void insertNewLineInEmptyElement() { + String xml = "\n" + // + " \n" + // + " \n" + // + ""; + + // Insert newlines and spaces inside the second element + // Change to \n \n + // Position is after (line 2, character 9) + List changes = e(2, 9, 9, "\n \n "); + DOMDocument document = assertIncremental(xml, UpdateStrategy.TEXT, changes); + + // Verify the final structure + String expectedXml = "\n" + // + " \n" + // + " \n" + // + " \n" + // + " \n" + // + ""; + assertEquals(expectedXml, document.getText()); + } + + @Test + public void insertStructuralCharInEmptyElement() { + String xml = "\n" + // + " \n" + // + " \n" + // + ""; + + // Insert '<' inside the second element (structural character) + // Change to \n <\n + // Position is after (line 2, character 9) + List changes = e(2, 9, 9, "\n <\n "); + DOMDocument document = assertIncremental(xml, UpdateStrategy.SUBTREE, changes); + + // Verify the final structure + String expectedXml = "\n" + // + " \n" + // + " \n" + // + " <\n" + // + " \n" + // + ""; + assertEquals(expectedXml, document.getText()); + } + +} \ No newline at end of file