From 2805237bfdbb7cb1fa489a12c59aafad5b45a737 Mon Sep 17 00:00:00 2001 From: Steven Obiajulu Date: Thu, 11 Jun 2026 19:08:23 -0400 Subject: [PATCH 1/3] fix(docx-core): reuse paragraph mark insertion revisions Whole-paragraph insertion marking could add a second w:ins marker when another path had already placed a paragraph-mark insertion under the paragraph properties. That creates an invalid CT_ParaRPr shape and can also bypass the comparison revision context date and author. This searches the paragraph properties for an existing paragraph-mark marker before creating one, normalizes its author and date to the active context, and preserves the existing revision id when present. The regression covers the bypass shape and the #452 schema suppression is removed. Fixes: #452 --- coverage/emitted-schema-known-failures.json | 6 ---- .../atomizer/inPlaceModifier-wrappers.ts | 27 +++++++++++++-- .../atomizer/inPlaceModifier.test.ts | 33 +++++++++++++++++++ 3 files changed, 58 insertions(+), 8 deletions(-) diff --git a/coverage/emitted-schema-known-failures.json b/coverage/emitted-schema-known-failures.json index ee3e96a8..7abd5547 100644 --- a/coverage/emitted-schema-known-failures.json +++ b/coverage/emitted-schema-known-failures.json @@ -1,10 +1,4 @@ [ - { - "id": "duplicate-para-mark-ins", - "issue": "#452", - "match": "}ins': This element is not expected.", - "reason": "tracked paragraph insertion emits two paragraph-mark w:ins markers in w:pPr/w:rPr; CT_ParaRPr allows at most one" - }, { "id": "synthetic-table-missing-tblgrid", "issue": "#453", diff --git a/packages/docx-core/src/baselines/atomizer/inPlaceModifier-wrappers.ts b/packages/docx-core/src/baselines/atomizer/inPlaceModifier-wrappers.ts index 5c81380b..cf4d9a9d 100644 --- a/packages/docx-core/src/baselines/atomizer/inPlaceModifier-wrappers.ts +++ b/packages/docx-core/src/baselines/atomizer/inPlaceModifier-wrappers.ts @@ -181,6 +181,8 @@ export function addParagraphMarkRevisionMarker( paragraph.insertBefore(pPr, paragraph.firstChild); } + const existingMarker = findParagraphMarkRevisionMarker(pPr, markerTag); + // Find or create rPr within pPr (paragraph mark properties). let rPr = findChildByTagName(pPr, 'w:rPr'); if (!rPr) { @@ -197,8 +199,17 @@ export function addParagraphMarkRevisionMarker( } } - // Avoid duplicating markers. - if (findChildByTagName(rPr, markerTag)) return; + // Avoid duplicating markers. A legacy/bypass path may already have put the + // paragraph-mark marker in another w:rPr under the same pPr; keep that marker + // and normalize its revision context instead of adding a second CT_ParaRPr child. + if (existingMarker) { + existingMarker.setAttribute('w:author', author); + existingMarker.setAttribute('w:date', dateStr); + if (!existingMarker.getAttribute('w:id')) { + existingMarker.setAttribute('w:id', String(allocateRevisionId(state))); + } + return; + } const id = allocateRevisionId(state); const marker = createEl(markerTag, { @@ -211,6 +222,18 @@ export function addParagraphMarkRevisionMarker( rPr.insertBefore(marker, rPr.firstChild); } +function findParagraphMarkRevisionMarker( + pPr: Element, + markerTag: 'w:ins' | 'w:del' +): Element | null { + for (const child of childElements(pPr)) { + if (child.tagName !== 'w:rPr') continue; + const marker = findChildByTagName(child, markerTag); + if (marker) return marker; + } + return null; +} + // @lean-segment: field-wrapper-emission // Lean traceability anchor — cited by verification/lean/LeanSpike/Spec.lean and // packages/docx-core/src/integration/lean-spec-bridge.test.ts. These wrapping diff --git a/packages/docx-core/src/baselines/atomizer/inPlaceModifier.test.ts b/packages/docx-core/src/baselines/atomizer/inPlaceModifier.test.ts index 0a8e5983..8cf169c6 100644 --- a/packages/docx-core/src/baselines/atomizer/inPlaceModifier.test.ts +++ b/packages/docx-core/src/baselines/atomizer/inPlaceModifier.test.ts @@ -813,6 +813,39 @@ describe('inPlaceModifier', () => { expect(insMarker).toBeDefined(); }); }); + + test('reuses an existing paragraph-mark insertion and normalizes revision context', async ({ given, when, then }: AllureBddContext) => { + let pPr: Element, p: Element; + let state: ReturnType; + + await given('a paragraph with a pre-existing paragraph-mark w:ins from a bypass path', () => { + pPr = el('w:pPr', {}, [ + el('w:rPr', {}, [ + el('w:ins', { + 'w:id': '99', + 'w:author': 'Bypass', + 'w:date': '2020-01-01T00:00:00Z', + }), + ]), + ]); + p = el('w:p', {}, [pPr, el('w:r', {}, [el('w:t', {}, undefined, 'text')])]); + state = createRevisionIdState(); + }); + + await when('wrapParagraphAsInserted applies the comparison revision context', () => { + wrapParagraphAsInserted(p, author, dateStr, state); + }); + + await then('only one paragraph-mark w:ins remains with the comparison author and date', () => { + const markers = childElements(pPr) + .filter((c) => c.tagName === 'w:rPr') + .flatMap((rPr) => childElements(rPr).filter((c) => c.tagName === 'w:ins')); + expect(markers).toHaveLength(1); + expect(markers[0]!.getAttribute('w:id')).toBe('99'); + expect(markers[0]!.getAttribute('w:author')).toBe(author); + expect(markers[0]!.getAttribute('w:date')).toBe(dateStr); + }); + }); }); describe('addParagraphPropertyChange', () => { From c778152b40fb948bddecf2026419c218e4c7d23f Mon Sep 17 00:00:00 2001 From: Steven Obiajulu Date: Fri, 12 Jun 2026 02:38:30 -0400 Subject: [PATCH 2/3] fix(docx-core): place paragraph-mark revision markers in CT_ParaRPr order The reused paragraph-mark w:ins kept its legacy mid-sequence position, and a newly created w:del was always prepended even ahead of an existing w:ins, so the emitted-schema gate flagged w:ins after w:del/formatting children. CT_ParaRPr requires the tracked-change group (w:ins, w:del, w:moveFrom, w:moveTo) first and in that order. - addParagraphMarkRevisionMarker now routes both reused and new markers through placeParagraphMarkRevisionMarker: w:ins goes first in rPr, w:del goes right after an existing w:ins. - documentReconstructor.serializePPrWithParaRevisionMarker reuses a marker cloned from the source pPr instead of prepending a duplicate (the same issue #452 bug class on the reconstruction path), preserving the source revision's author/date/id. --- .../atomizer/documentReconstructor.ts | 22 +++++++---- .../atomizer/inPlaceModifier-wrappers.ts | 29 +++++++++++++- .../atomizer/inPlaceModifier.test.ts | 38 ++++++++++++++++++- 3 files changed, 79 insertions(+), 10 deletions(-) diff --git a/packages/docx-core/src/baselines/atomizer/documentReconstructor.ts b/packages/docx-core/src/baselines/atomizer/documentReconstructor.ts index b2b01e39..b8056d82 100644 --- a/packages/docx-core/src/baselines/atomizer/documentReconstructor.ts +++ b/packages/docx-core/src/baselines/atomizer/documentReconstructor.ts @@ -25,6 +25,7 @@ import { import { serializeToXml, cloneElement } from './xmlToWmlElement.js'; import { EMPTY_PARAGRAPH_TAG, isParagraphLevelLeaf, nearestHyperlinkAncestor } from '../../atomizer.js'; import { enforceConsumerCompatibility } from './consumerCompatibility.js'; +import { placeParagraphMarkRevisionMarker } from './inPlaceModifier-wrappers.js'; import { areRunPropertiesEqual } from '../../format-detection.js'; import { debug } from './debug.js'; @@ -751,13 +752,20 @@ function serializePPrWithParaRevisionMarker( } } - // Insert revision marker at start of rPr. - const marker = createEl(markerTag, { - 'w:id': String(id), - 'w:author': author, - 'w:date': dateStr, - }); - rPr.insertBefore(marker, rPr.firstChild); + // Reuse a pre-existing paragraph-mark marker of the same kind cloned from the + // source pPr (issue #452): CT_ParaRPr allows at most one of each tracked-change + // child, and the source revision's metadata (author/date/id) outranks a + // synthetic duplicate. Either way the marker is placed in its schema-correct + // slot ahead of formatting children. + const existingMarker = findChildByTagName(rPr, markerTag); + const marker = + existingMarker ?? + createEl(markerTag, { + 'w:id': String(id), + 'w:author': author, + 'w:date': dateStr, + }); + placeParagraphMarkRevisionMarker(rPr, marker, markerTag); // Append pPrChange at end if provided. if (pPrChangeEl) { diff --git a/packages/docx-core/src/baselines/atomizer/inPlaceModifier-wrappers.ts b/packages/docx-core/src/baselines/atomizer/inPlaceModifier-wrappers.ts index cf4d9a9d..1b53998b 100644 --- a/packages/docx-core/src/baselines/atomizer/inPlaceModifier-wrappers.ts +++ b/packages/docx-core/src/baselines/atomizer/inPlaceModifier-wrappers.ts @@ -208,6 +208,9 @@ export function addParagraphMarkRevisionMarker( if (!existingMarker.getAttribute('w:id')) { existingMarker.setAttribute('w:id', String(allocateRevisionId(state))); } + // The bypass path may have left the marker mid-sequence (or in another + // w:rPr); move it to the schema-correct slot in the canonical rPr. + placeParagraphMarkRevisionMarker(rPr, existingMarker, markerTag); return; } @@ -218,8 +221,30 @@ export function addParagraphMarkRevisionMarker( 'w:date': dateStr, }); - // Insert marker at the start of rPr for consistency with Aspose/Word patterns. - rPr.insertBefore(marker, rPr.firstChild); + placeParagraphMarkRevisionMarker(rPr, marker, markerTag); +} + +/** + * Position a paragraph-mark revision marker in its schema-correct rPr slot. + * + * CT_ParaRPr ordering: the tracked-change group (w:ins, w:del, w:moveFrom, + * w:moveTo — in that order) comes before every formatting child (w:rStyle, + * w:rFonts, ...). So w:ins always goes first, and w:del goes right after a + * w:ins sibling when one exists, else first. + */ +export function placeParagraphMarkRevisionMarker( + rPr: Element, + marker: Element, + markerTag: 'w:ins' | 'w:del' +): void { + const insSibling = markerTag === 'w:del' ? findChildByTagName(rPr, 'w:ins') : null; + if (insSibling) { + if (insSibling.nextSibling !== marker) { + insertAfterElement(insSibling, marker); + } + } else if (rPr.firstChild !== marker) { + rPr.insertBefore(marker, rPr.firstChild); + } } function findParagraphMarkRevisionMarker( diff --git a/packages/docx-core/src/baselines/atomizer/inPlaceModifier.test.ts b/packages/docx-core/src/baselines/atomizer/inPlaceModifier.test.ts index 8cf169c6..dabccfee 100644 --- a/packages/docx-core/src/baselines/atomizer/inPlaceModifier.test.ts +++ b/packages/docx-core/src/baselines/atomizer/inPlaceModifier.test.ts @@ -819,8 +819,11 @@ describe('inPlaceModifier', () => { let state: ReturnType; await given('a paragraph with a pre-existing paragraph-mark w:ins from a bypass path', () => { + // The bypass marker sits AFTER a formatting child — an invalid + // CT_ParaRPr sequence that reuse must repair, not preserve. pPr = el('w:pPr', {}, [ el('w:rPr', {}, [ + el('w:b'), el('w:ins', { 'w:id': '99', 'w:author': 'Bypass', @@ -836,7 +839,7 @@ describe('inPlaceModifier', () => { wrapParagraphAsInserted(p, author, dateStr, state); }); - await then('only one paragraph-mark w:ins remains with the comparison author and date', () => { + await then('only one paragraph-mark w:ins remains, first in rPr, with the comparison author and date', () => { const markers = childElements(pPr) .filter((c) => c.tagName === 'w:rPr') .flatMap((rPr) => childElements(rPr).filter((c) => c.tagName === 'w:ins')); @@ -844,6 +847,39 @@ describe('inPlaceModifier', () => { expect(markers[0]!.getAttribute('w:id')).toBe('99'); expect(markers[0]!.getAttribute('w:author')).toBe(author); expect(markers[0]!.getAttribute('w:date')).toBe(dateStr); + + // CT_ParaRPr puts the tracked-change group before formatting children, + // so the reused w:ins must be moved to the front of rPr. + const rPr = childElements(pPr).find((c) => c.tagName === 'w:rPr'); + expect(childElements(rPr!).map((c) => c.tagName)).toEqual(['w:ins', 'w:b']); + }); + }); + + test('keeps the paragraph-mark w:del after w:ins per CT_ParaRPr order', async ({ given, when, then }: AllureBddContext) => { + let pPr: Element, p: Element; + let state: ReturnType; + + await given('a paragraph whose mark already carries a w:ins marker', () => { + pPr = el('w:pPr', {}, [ + el('w:rPr', {}, [ + el('w:ins', { + 'w:id': '7', + 'w:author': 'Bypass', + 'w:date': '2020-01-01T00:00:00Z', + }), + ]), + ]); + p = el('w:p', {}, [pPr, el('w:r', {}, [el('w:t', {}, undefined, 'text')])]); + state = createRevisionIdState(); + }); + + await when('wrapParagraphAsDeleted marks the paragraph mark as deleted', () => { + wrapParagraphAsDeleted(p, author, dateStr, state); + }); + + await then('rPr orders the tracked-change group w:ins then w:del', () => { + const rPr = childElements(pPr).find((c) => c.tagName === 'w:rPr'); + expect(childElements(rPr!).map((c) => c.tagName)).toEqual(['w:ins', 'w:del']); }); }); }); From 86b91627a0a9fdb8343a7b94da416b2483219499 Mon Sep 17 00:00:00 2001 From: Steven Obiajulu Date: Fri, 12 Jun 2026 02:52:49 -0400 Subject: [PATCH 3/3] fix(docx-core): normalize paragraph mark revision reuse Reuse paragraph-mark revision markers across cloned paragraph properties in both in-place and reconstructor paths so CT_ParaRPr ordering remains valid and duplicate insertion markers are not emitted. Adds regression coverage for reconstructor reuse/ordering and updates the canonical insertion regression to reflect normalized comparison metadata. Fixes: #452 --- .../documentReconstructor-rpr.test.ts | 59 ++++++++++++++++++- .../atomizer/documentReconstructor.ts | 17 ++++-- .../atomizer/inPlaceModifier-wrappers.ts | 2 +- .../canonical-emission-regression.test.ts | 7 ++- 4 files changed, 76 insertions(+), 9 deletions(-) diff --git a/packages/docx-core/src/baselines/atomizer/documentReconstructor-rpr.test.ts b/packages/docx-core/src/baselines/atomizer/documentReconstructor-rpr.test.ts index 48186d92..44b05f2b 100644 --- a/packages/docx-core/src/baselines/atomizer/documentReconstructor-rpr.test.ts +++ b/packages/docx-core/src/baselines/atomizer/documentReconstructor-rpr.test.ts @@ -14,7 +14,7 @@ import { rejectAllChanges, } from './trackChangesAcceptorAst.js'; import { parseDocumentXml } from './xmlToWmlElement.js'; -import { findAllByTagName, findChildByTagName } from '../../primitives/index.js'; +import { childElements, findAllByTagName, findChildByTagName } from '../../primitives/index.js'; import { EMPTY_PARAGRAPH_TAG } from '../../atomizer.js'; import type { ComparisonUnitAtom, OpcPart } from '../../core-types.js'; import { CorrelationStatus } from '../../core-types.js'; @@ -535,6 +535,63 @@ describe('Step 7: Rebuild path emits pPrChange for inserted paragraphs', () => { expect(innerChildren.length).toBe(0); }); }); + + test('whole-paragraph insertion reuses and reorders an existing paragraph-mark w:ins', async ({ given, when, then }: AllureBddContext) => { + let atom: ComparisonUnitAtom; + let result: string; + + await given('an inserted atom whose source pPr already has a paragraph-mark insertion after w:del', () => { + const existingIns = el('w:ins', { + 'w:id': '77', + 'w:author': 'Source Author', + 'w:date': '2020-01-01T00:00:00Z', + }); + const rPr = el('w:rPr', {}, [ + el('w:del', { + 'w:id': '88', + 'w:author': 'Delete Author', + 'w:date': '2020-01-02T00:00:00Z', + }), + existingIns, + ]); + const pPrEl = el('w:pPr', {}, [rPr]); + const textEl = el('w:t', {}, undefined, 'preserved inserted paragraph'); + const run = el('w:r', {}, [textEl]); + const paragraph = el('w:p', {}, [pPrEl, run]); + + atom = { + sha1Hash: 'hash-existing-ins', + correlationStatus: CorrelationStatus.Inserted, + contentElement: textEl, + ancestorElements: [paragraph, run], + ancestorUnids: [], + part: PART, + paragraphIndex: 0, + rPr: null, + }; + }); + + await when('reconstructDocument serializes the paragraph-mark insertion', () => { + result = reconstructDocument([atom], MINIMAL_DOCXML, OPTS); + }); + + await then('the output has one normalized w:ins before w:del in pPr/rPr', () => { + const root = parseDocumentXml(result); + const pPr = findAllByTagName(root, 'w:pPr').find((candidate) => { + const rPr = findChildByTagName(candidate, 'w:rPr'); + return !!rPr && !!findChildByTagName(rPr, 'w:ins'); + }); + expect(pPr).toBeDefined(); + + const rPr = findChildByTagName(pPr!, 'w:rPr')!; + const markers = findAllByTagName(pPr!, 'w:ins'); + expect(markers).toHaveLength(1); + expect(markers[0]!.getAttribute('w:id')).toBe('77'); + expect(markers[0]!.getAttribute('w:author')).toBe('Test'); + expect(markers[0]!.getAttribute('w:date')).toBe('2025-01-01T00:00:00Z'); + expect(childElements(rPr).map((child) => child.tagName)).toEqual(['w:ins', 'w:del']); + }); + }); }); // ============================================================================= diff --git a/packages/docx-core/src/baselines/atomizer/documentReconstructor.ts b/packages/docx-core/src/baselines/atomizer/documentReconstructor.ts index b8056d82..146ec8f1 100644 --- a/packages/docx-core/src/baselines/atomizer/documentReconstructor.ts +++ b/packages/docx-core/src/baselines/atomizer/documentReconstructor.ts @@ -25,7 +25,10 @@ import { import { serializeToXml, cloneElement } from './xmlToWmlElement.js'; import { EMPTY_PARAGRAPH_TAG, isParagraphLevelLeaf, nearestHyperlinkAncestor } from '../../atomizer.js'; import { enforceConsumerCompatibility } from './consumerCompatibility.js'; -import { placeParagraphMarkRevisionMarker } from './inPlaceModifier-wrappers.js'; +import { + findParagraphMarkRevisionMarker, + placeParagraphMarkRevisionMarker, +} from './inPlaceModifier-wrappers.js'; import { areRunPropertiesEqual } from '../../format-detection.js'; import { debug } from './debug.js'; @@ -754,10 +757,9 @@ function serializePPrWithParaRevisionMarker( // Reuse a pre-existing paragraph-mark marker of the same kind cloned from the // source pPr (issue #452): CT_ParaRPr allows at most one of each tracked-change - // child, and the source revision's metadata (author/date/id) outranks a - // synthetic duplicate. Either way the marker is placed in its schema-correct + // child. Either way the marker is normalized and placed in its schema-correct // slot ahead of formatting children. - const existingMarker = findChildByTagName(rPr, markerTag); + const existingMarker = findParagraphMarkRevisionMarker(effectivePPr, markerTag); const marker = existingMarker ?? createEl(markerTag, { @@ -765,6 +767,13 @@ function serializePPrWithParaRevisionMarker( 'w:author': author, 'w:date': dateStr, }); + if (existingMarker) { + existingMarker.setAttribute('w:author', author); + existingMarker.setAttribute('w:date', dateStr); + if (!existingMarker.getAttribute('w:id')) { + existingMarker.setAttribute('w:id', String(id)); + } + } placeParagraphMarkRevisionMarker(rPr, marker, markerTag); // Append pPrChange at end if provided. diff --git a/packages/docx-core/src/baselines/atomizer/inPlaceModifier-wrappers.ts b/packages/docx-core/src/baselines/atomizer/inPlaceModifier-wrappers.ts index 1b53998b..e8e67ddf 100644 --- a/packages/docx-core/src/baselines/atomizer/inPlaceModifier-wrappers.ts +++ b/packages/docx-core/src/baselines/atomizer/inPlaceModifier-wrappers.ts @@ -247,7 +247,7 @@ export function placeParagraphMarkRevisionMarker( } } -function findParagraphMarkRevisionMarker( +export function findParagraphMarkRevisionMarker( pPr: Element, markerTag: 'w:ins' | 'w:del' ): Element | null { diff --git a/packages/docx-core/src/integration/canonical-emission-regression.test.ts b/packages/docx-core/src/integration/canonical-emission-regression.test.ts index 5920af91..5e79e208 100644 --- a/packages/docx-core/src/integration/canonical-emission-regression.test.ts +++ b/packages/docx-core/src/integration/canonical-emission-regression.test.ts @@ -556,7 +556,7 @@ describe('Round-trip with comparison', () => { }); }); - test('insert_paragraph round-trip retains the original AI paragraph-mark insertion alongside comparison output', async ({ + test('insert_paragraph round-trip keeps SafeDocX paragraph-mark insertion metadata normalized', async ({ given, when, then, @@ -585,9 +585,10 @@ describe('Round-trip with comparison', () => { }); }); - await then('comparison output keeps the original SafeDocX paragraph-mark metadata and avoids Comparison', () => { + await then('comparison output keeps SafeDocX metadata, avoids Comparison, and replaces the stale fixed date', () => { expectNoComparisonAuthor(comparedDocumentXml); - expect(comparedDocumentXml).toContain(`w:date="${FIXED_DATE}"`); + expect(comparedDocumentXml).not.toContain(`w:date="${FIXED_DATE}"`); + expect(revisionTuples(comparedDocumentXml, AI_AUTHOR).length).toBeGreaterThan(0); expect(comparedDocumentXml).toContain('Inserted paragraph'); }); });