From 1e6b86e06825e95b1d2fb10de4eb9a6cc90bcd0f Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Fri, 24 Oct 2025 12:52:35 -0500
Subject: [PATCH 1/9] refactor(web): implement SourcePath merging

As with the prior PR, this moves correction-search path merging onto SourcePath, rather than expecting ContextToken to manage it when multiple paths to construct a token exists.

Build-bot: skip build:web
Test-bot: skip
---
 .../src/main/correction/context-token.ts      | 59 +++----------------
 .../main/correction/context-tokenization.ts   |  2 +-
 .../src/main/correction/search-path.ts        | 57 ++++++++++++++++++
 .../src/main/correction/search-space.ts       |  9 +++
 .../context/context-token.tests.ts            |  8 +--
 5 files changed, 80 insertions(+), 55 deletions(-)
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts
index 452ae142af0..03db28ef294 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts
@@ -7,7 +7,6 @@
  * in the context and associated correction-search progress and results.
  */
 
-import { buildMergedTransform } from "@keymanapp/models-templates";
 import { LexicalModelTypes } from '@keymanapp/common-types';
 import { deepCopy, KMWString } from "@keymanapp/web-utils";
 
@@ -183,59 +182,19 @@ export class ContextToken {
    * @param lexicalModel
    * @returns
    */
-  static merge(tokensToMerge: ContextToken[], lexicalModel: LexicalModel): ContextToken {
+  static merge(tokensToMerge: ContextToken[]): ContextToken {
+    if(tokensToMerge.length < 1) {
+      return null;
+    }
+
     // Assumption:  if we're merging a token, it's not whitespace.
     // Thus, we don't set the .isWhitespace flag field.
-    const resultToken = new ContextToken(lexicalModel);
-
-    let lastSourceInput: PathInputProperties;
-    let lastInputDistrib: Distribution<Transform>;
-    for(const token of tokensToMerge) {
-      const inputCount = token.inputCount;
-      let startIndex = 0;
-
-      if(inputCount == 0) {
-        continue;
-      }
-
-      // Are we re-merging on a previously split transform?
-      if(lastSourceInput?.segment.trueTransform != token.inputSegments[0].segment.trueTransform) {
-        if(lastSourceInput) {
-          resultToken.addInput(lastSourceInput, lastInputDistrib);
-        } // else:  there's nothing to add as input
-      } else {
-        // If so, re-merge it!
-        startIndex++;
-
-        lastInputDistrib = lastInputDistrib?.map((entry, index) => {
-          return {
-            sample: buildMergedTransform(entry.sample, token.searchSpace.inputSequence[0][index].sample),
-            p: entry.p
-          }
-        });
-
-        // In case there's only one input that needs merging on both ends.
-        if(inputCount == 1) {
-          // There's potential that the next incoming token needs to merge with this.
-          continue;
-        } else {
-          resultToken.addInput(lastSourceInput, lastInputDistrib);
-        }
-      }
-      lastSourceInput = null;
-      lastInputDistrib = null;
-
-      // Ignore the last entry for now - it may need to merge with a matching
-      // entry in the next token!
-      for(let i = startIndex; i < inputCount - 1; i++) {
-        resultToken.addInput(token.inputSegments[i], token.searchSpace.inputSequence[i]);
-      }
-      lastSourceInput = token.inputSegments[inputCount-1];
-      lastInputDistrib = token.searchSpace.inputSequence[inputCount-1];
+    const resultToken = new ContextToken(tokensToMerge.shift());
+    while(tokensToMerge.length > 0) {
+      const next = tokensToMerge.shift();
+      resultToken._searchSpace = resultToken._searchSpace.merge(next._searchSpace);
     }
 
-    resultToken.addInput(lastSourceInput, lastInputDistrib);
-
     return resultToken;
   }
 
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts
index 27239dd0506..3d1e04ced6a 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts
@@ -526,7 +526,7 @@ export class ContextTokenization {
         // consider:  move to ContextToken as class method.  (static?)
         const merge = merges.shift();
         const tokensToMerge = merge.inputs.map((m) => baseTokenization[m.index]);
-        const mergeResult = ContextToken.merge(tokensToMerge, lexicalModel);
+        const mergeResult = ContextToken.merge(tokensToMerge);
         tokenization.push(mergeResult);
         i = merge.inputs[merge.inputs.length - 1].index;
         continue;
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts
index 71e9fa2be0c..b884a5437e9 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts
@@ -10,6 +10,7 @@
 
 import { QueueComparator as Comparator, KMWString, PriorityQueue } from '@keymanapp/web-utils';
 import { LexicalModelTypes } from '@keymanapp/common-types';
+import { buildMergedTransform } from '@keymanapp/models-templates';
 
 import { EDIT_DISTANCE_COST_SCALE, SearchNode, SearchResult } from './distance-modeler.js';
 import { generateSpaceSeed, PathResult, SearchSpace, PathInputProperties } from './search-space.js';
@@ -259,6 +260,62 @@ export class SearchPath implements SearchSpace {
     this.selectionQueue = new PriorityQueue<SearchNode>(QUEUE_NODE_COMPARATOR, entries);
   }
 
+  // spaces are in sequence here.
+  // `this` = head 'space'.
+  public merge(space: SearchSpace): SearchSpace {
+    // Head node for the incoming path is empty, so skip it.
+    if(space.parents.length == 0) {
+      return this;
+    }
+
+    // Merge any parents first as a baseline.  We have to come after their
+    // affects are merged in, anyway.
+    const parentMerges = space.parents?.length > 0 ? space.parents.map((p) => this.merge(p)) : [this];
+
+    // if parentMerges.length > 0, is a SearchCluster.
+    // const parentMerge = parentMerges.length > 0 ? new SearchCluster(parentMerges) : parentMerges[0];
+    const parentMerge = parentMerges[0];
+
+    // Special case:  if we've reached the head of the space to be merged, check
+    // for a split transform.
+    //  - we return `this` from the root, so if that's what we received, we're
+    //    on the first descendant - the first path component.
+    if(space instanceof SearchPath) {
+      if(parentMerge != this) {
+        return new SearchPath(parentMerge, space.inputs, space.inputSource);
+      }
+
+      const localInputId = this.inputSource?.segment.transitionId;
+      const spaceInputId = space.inputSource?.segment.transitionId;
+      // The 'id' may be undefined in some unit tests and for tokens
+      // reconstructed after a backspace.  In either case, we consider the
+      // related results as fully separate; our reconstructions are
+      // per-codepoint.
+      if(localInputId != spaceInputId || localInputId === undefined) {
+        return new SearchPath(parentMerge, space.inputs, space.inputSource);
+      } else {
+        // Get the twin halves that were split.
+        // Assumption:  the two halves are in their original order, etc.
+        const localInputs = this.inputs;
+        const spaceInputs = space.inputs;
+
+        // Merge them!
+        const mergedInputs = localInputs?.map((entry, index) => {
+          return {
+            sample: buildMergedTransform(entry.sample, spaceInputs[index].sample),
+            p: entry.p
+          }
+        });
+
+        // Now to re-merge the two halves.
+        return new SearchPath(this.parentSpace, mergedInputs, this.inputSource);
+      }
+    } else {
+      // If the parent was a cluster, the cluster itself is the merge.
+      return parentMerge;
+    }
+  }
+
   public split(charIndex: number): [SearchSpace, SearchPath] {
     const model = this.model;
     const internalSplitIndex = charIndex - (this.codepointLength - this.edgeLength);
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts
index a3e9f22796e..8ef7b523ea7 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts
@@ -225,6 +225,15 @@ export interface SearchSpace {
    */
   get sourceRangeKey(): string;
 
+  /**
+   * Appends this SearchSpace with the provided SearchSpace's search properties,
+   * extending the represented search range accordingly.  If this operation
+   * represents merging the result of a previous .split() call, the two halves
+   * of any split input components will be fully re-merged.
+   * @param space
+   */
+  merge(space: SearchSpace): SearchSpace;
+
   /**
    * Splits this SearchSpace into two halves at the specified codepoint index.
    * The 'head' component will maximally re-use existing cached data, while the
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts
index ab5938b1608..5202e55e5c5 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts
@@ -102,7 +102,7 @@ describe('ContextToken', function() {
       const token2 = new ContextToken(plainModel, "'");
       const token3 = new ContextToken(plainModel, "t");
 
-      const merged = ContextToken.merge([token1, token2, token3], plainModel);
+      const merged = ContextToken.merge([token1, token2, token3]);
       assert.equal(merged.exampleInput, "can't");
       token1.inputSegments.forEach((entry) => assert.isTrue(merged.inputSegments.indexOf(entry) > -1));
       token2.inputSegments.forEach((entry) => assert.isTrue(merged.inputSegments.indexOf(entry) > -1));
@@ -155,7 +155,7 @@ describe('ContextToken', function() {
         subsetId: srcSubsetId
       }, [{sample: {insert: 't', deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]);
 
-      const merged = ContextToken.merge([token1, token2, token3], plainModel);
+      const merged = ContextToken.merge([token1, token2, token3]);
       assert.equal(merged.exampleInput, "can't");
       assert.deepEqual(merged.inputSegments, [ {
         segment: {
@@ -253,7 +253,7 @@ describe('ContextToken', function() {
         subsetId: srcSubsetIds[3]
       }, [{sample: srcTransforms[3], p: 1}]);
 
-      const merged = ContextToken.merge(tokensToMerge, plainModel);
+      const merged = ContextToken.merge(tokensToMerge);
       assert.equal(merged.exampleInput, "applesandsourgrapes");
       assert.deepEqual(merged.inputSegments, srcTransforms.map((t, i) => ({
         segment: {
@@ -352,7 +352,7 @@ describe('ContextToken', function() {
         subsetId: srcSubsetIds[3]
       }, [{sample: srcTransforms[3], p: 1}]);
 
-      const merged = ContextToken.merge(tokensToMerge, plainModel);
+      const merged = ContextToken.merge(tokensToMerge);
       assert.equal(merged.exampleInput, toMathematicalSMP("applesandsourgrapes"));
       assert.deepEqual(merged.inputSegments, srcTransforms.map((t, i) => ({
         segment: {

From b6ef8d099ff1d022a839c17013226d61da4ffcfb Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Wed, 29 Oct 2025 13:43:52 -0500
Subject: [PATCH 2/9] feat(web): adds SearchPath merging unit tests

---
 .../correction-search/search-path.tests.ts    | 475 +++++++++++++++++-
 1 file changed, 474 insertions(+), 1 deletion(-)

diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts
index 4a832d6feaa..5569c9553a3 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts
@@ -9,10 +9,13 @@
 
 import { assert } from 'chai';
 
+import { LexicalModelTypes } from '@keymanapp/common-types';
 import { KMWString } from '@keymanapp/web-utils';
 import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs';
-import { models, SearchPath } from '@keymanapp/lm-worker/test-index';
+import { models, SearchPath, TokenInputSource } from '@keymanapp/lm-worker/test-index';
 
+import Distribution = LexicalModelTypes.Distribution;
+import Transform = LexicalModelTypes.Transform;
 import TrieModel = models.TrieModel;
 
 const testModel = new TrieModel(jsonFixture('models/tries/english-1000'));
@@ -1371,4 +1374,474 @@ describe('SearchPath', () => {
       assert.deepEqual((tail as SearchPath).inputSource, tailTarget.inputSource);
     });
   });
+
+  // Placed after `split()` because many cases mock a reversal of split-test results.
+  describe('merge()', () => {
+    /*
+     * To define:
+     * - merging a standard case
+     * - merging a split BMP case
+     * - merging a standard SMP case
+     * - merging a split SMP case
+     * - merging a case where the deleteLeft was split from the insert
+     *   - splitIndex = 0, but the deleteLeft is (conceptually) before that.
+     * - this (empty) + param (full)
+     * - this (full) + param (empty)
+     * - merging with distributions (no split)
+     * - merging with distributions (and a definite split)
+     *
+     * - biglargetransform for single-input multi-split remerge
+     *   - merging a three-way split should be associative (not dependent on order) so
+     *     long as the relative positions are correct
+     *
+     * - "cello" case(s) covers...
+     *   - deleteLeft split from insert
+     *   - a straight-up split (mid-insert)
+     *   - standard case (no distrib)
+     *   - with head + tail index inclusion, the empty + full versions
+     *   - SMP variant:  the SMP cases.
+     *
+     * - then we may need a "merging with distributions" coverage
+     *   - can prob make a simple BMP mocked version...
+     *   - and a simple SMP mocked version
+     *   - is actually pretty-much covered anyway... I believe.
+     */
+
+    // Covers cases where a single "input" was split into more than two fragments
+    describe(`previously-split token comprised of single titanic transform: biglargetransform`, () => {
+      const buildPath = () => {
+        const distributions = [
+          [{ sample: {insert: 'big', deleteLeft: 0, id: 11}, p: 1 }],
+          [{ sample: {insert: 'large', deleteLeft: 0, id: 11}, p: 1 }],
+          [{ sample: {insert: 'transform', deleteLeft: 0, id: 11}, p: 1 }]
+        ];
+
+        const originalInputBase: TokenInputSource = {
+          trueTransform: {insert: 'biglargetransform', deleteLeft: 0, id: 11},
+          inputStartIndex: 0,
+          bestProbFromSet: 1
+        };
+
+        const originalInputs = [0, 3, 8].map(n => ({...originalInputBase, inputStartIndex: n}));
+
+        const paths = distributions.map((d, i) => new SearchPath(new SearchPath(testModel), d, originalInputs[i]));
+
+        return {
+          paths,
+          distributions,
+          originalInputs
+        };
+      }
+
+      const checkFinalStateAssertions = (merged: SearchPath, originalInput: TokenInputSource) => {
+        assert.equal(merged.inputCount, 1);
+        assert.isTrue(merged instanceof SearchPath);
+        assert.deepEqual(merged.bestExample.text, "biglargetransform");
+        assert.deepEqual((merged as SearchPath).inputs, [
+          { sample: { insert: 'biglargetransform', deleteLeft: 0, id: 11 }, p: 1 }
+        ]);
+        assert.deepEqual((merged as SearchPath).inputSource, originalInput);
+        // TODO:  check the 'source' input data (here and in callers)
+      }
+
+      it('setup: constructs paths properly', () => {
+        const { paths, distributions, originalInputs } = buildPath();
+
+        assert.equal(paths.length, 3);
+        assert.equal(distributions.length, paths.length);
+        paths.forEach((p, i) => {
+          assert.equal(p.inputCount, 1);
+          assert.equal(distributions[i].length, p.inputCount);
+          assert.equal(p.codepointLength, KMWString.length(distributions[i][0].sample.insert));
+          assert.deepEqual(p.bestExample, {
+            text: ['big', 'large', 'transform'][i],
+            p: 1
+          });
+          assert.equal(p.parents[0].inputCount, 0);
+          assert.isTrue(p.hasInputs([distributions[i]]));
+        });
+
+        originalInputs.forEach((original) => {
+          assert.deepEqual({...original, inputStartIndex: 0}, {...originalInputs[0], inputStartIndex: 0});
+        });
+      });
+
+      it('merging order:  big + large, then + transform', () => {
+        const { paths, originalInputs } = buildPath();
+
+        const headMerge = paths[0].merge(paths[1]);
+
+        // Assertions
+        assert.equal(headMerge.inputCount, 1);
+        assert.isTrue(headMerge instanceof SearchPath);
+        assert.deepEqual(headMerge.bestExample.text, "biglarge");
+        assert.deepEqual((headMerge as SearchPath).inputs, [
+          { sample: { insert: 'biglarge', deleteLeft: 0, id: 11 }, p: 1 }
+        ]);
+        assert.deepEqual((headMerge as SearchPath).inputSource, originalInputs[0]);
+
+        const fullMerge = headMerge.merge(paths[2]);
+        checkFinalStateAssertions(fullMerge as SearchPath, originalInputs[0]);
+      });
+
+      it('merging order:  large + transform, then + big', () => {
+        const { paths, originalInputs } = buildPath();
+
+        const tailMerge = paths[1].merge(paths[2]);
+
+        // Assertions
+        assert.equal(tailMerge.inputCount, 1);
+        assert.isTrue(tailMerge instanceof SearchPath);
+        assert.deepEqual(tailMerge.bestExample.text, "largetransform");
+        assert.deepEqual((tailMerge as SearchPath).inputs, [
+          { sample: { insert: 'largetransform', deleteLeft: 0, id: 11 }, p: 1 }
+        ]);
+        assert.deepEqual((tailMerge as SearchPath).inputSource, originalInputs[1]);
+
+        const fullMerge = paths[0].merge(tailMerge);
+        checkFinalStateAssertions(fullMerge as SearchPath, originalInputs[0]);
+      });
+    });
+
+    // Covers many common aspects of SearchPath merging, though not merging of
+    // multi-member distributions.
+    describe(`previously-split token comprised of complex, rewriting transforms:  cello`, () => {
+      const buildPath = (inputs: Distribution<Transform>[], sources: TokenInputSource[], root?: SearchPath) => {
+        return inputs.reduce((path, input, index) => new SearchPath(path, input, sources[index]), root ?? new SearchPath(testModel));
+      }
+
+      const buildFixtures = () => {
+        const trueDistributions = [
+          [
+            { sample: {insert: 'ca', deleteLeft: 0, id: 11}, p: 1 }
+          ], [
+            { sample: {insert: 'ent', deleteLeft: 1, id: 12}, p: 1 }
+          ], [
+            { sample: {insert: 'llar', deleteLeft: 2, id: 13}, p: 1 }
+          ], [
+            { sample: {insert: 'o', deleteLeft: 2, id: 14}, p: 1 }
+          ]
+        ];
+
+        const trueInputSources: TokenInputSource[] = trueDistributions.map((d) => {
+          return {
+            trueTransform: d[0].sample,
+            bestProbFromSet: d[0].p,
+            inputStartIndex: 0
+          }
+        })
+
+        const commonRoot = new SearchPath(testModel);
+        const mergeTarget = buildPath(trueDistributions, trueInputSources, commonRoot);
+
+        // Index:  the position of the split.
+        const splits: [SearchPath, SearchPath][] = [];
+
+        // Case 0:  bare head path, reproduced token (on different root)
+        splits.push([
+          commonRoot, buildPath(trueDistributions, trueInputSources)
+        ]);
+
+        // Case 1: the split happens in token 2 (index 1), with the deleteLeft
+        // split from the insert.
+        splits.push([
+          buildPath([
+            trueDistributions[0],
+            [{ sample: {insert: '', deleteLeft: 1, id: 12}, p: 1 }]
+          ], trueInputSources.slice(0, 2), commonRoot),
+          buildPath([
+            [{ sample: {insert: 'ent', deleteLeft: 0, id: 12}, p: 1 }],
+            ...trueDistributions.slice(2)
+          ], [
+            {...trueInputSources[1], inputStartIndex: 0},
+            ...trueInputSources.slice(2)
+          ])
+        ]);
+
+        // Case 2: the split happens in token 3 (index 2), with the deleteLeft
+        // split from the insert.
+        splits.push([
+          buildPath([
+            ...trueDistributions.slice(0, 2),
+            [{ sample: {insert: '', deleteLeft: 2, id: 13}, p: 1 }]
+          ], trueInputSources.slice(0, 3), commonRoot),
+          buildPath([
+            [{ sample: {insert: 'llar', deleteLeft: 0, id: 13}, p: 1 }],
+            ...trueDistributions.slice(3)
+          ], [
+            {...trueInputSources[2], inputStartIndex: 0},
+            ...trueInputSources.slice(3)
+          ])
+        ]);
+
+        // Case 3: the split happens in token 3 (index 2), in the middle of the
+        // insert.
+        splits.push([
+          buildPath([
+            ...trueDistributions.slice(0, 2),
+            [{ sample: {insert: 'l', deleteLeft: 2, id: 13}, p: 1 }]
+          ], trueInputSources.slice(0, 3), commonRoot),
+          buildPath([
+            [{ sample: {insert: 'lar', deleteLeft: 0, id: 13}, p: 1 }],
+            ...trueDistributions.slice(3)
+          ], [
+            {...trueInputSources[2], inputStartIndex: 1},
+            ...trueInputSources.slice(3)
+          ])
+        ]);
+
+        // Case 4: the split happens in token 4 (index 3), with the deleteLeft
+        // split from the insert.
+        splits.push([
+          buildPath([
+            ...trueDistributions.slice(0, 3),
+            [{ sample: {insert: '', deleteLeft: 2, id: 14}, p: 1 }]
+          ], trueInputSources.slice(), commonRoot),
+          buildPath([
+            [{ sample: {insert: 'o', deleteLeft: 0, id: 14}, p: 1 }]
+          ], [
+            {...trueInputSources[3], inputStartIndex: 0},
+          ])
+        ]);
+
+        // Case 5: the split happens at the token's end, leaving the tail
+        // as a fresh, empty token.
+        splits.push([
+          buildPath(trueDistributions, trueInputSources, commonRoot),
+          new SearchPath(testModel)
+        ]);
+
+        return {
+          mergeTarget,
+          splits,
+          trueDistributions
+        };
+      }
+
+      const runCommonAssertions = (splitIndex: number) => {
+        const { mergeTarget, splits, trueDistributions } = buildFixtures();
+        const splitToTest = splits[splitIndex];
+
+        const remergedPath = splitToTest[0].merge(splitToTest[1]);
+
+        assert.deepEqual(remergedPath.bestExample, mergeTarget.bestExample);
+        assert.equal(remergedPath.inputCount, mergeTarget.inputCount);
+        assert.equal(remergedPath.codepointLength, mergeTarget.codepointLength);
+        assert.sameDeepOrderedMembers(remergedPath.sourceIdentifiers, mergeTarget.sourceIdentifiers);
+        assert.isTrue(remergedPath.hasInputs(trueDistributions));
+      }
+
+      it('setup: constructs path properly', () => {
+        const { mergeTarget, splits } = buildFixtures();
+
+        const targetText = mergeTarget.bestExample.text;
+
+        for(let i = 0; i < splits.length; i++) {
+          const splitSet = splits[i];
+
+          assert.equal(splitSet[0].codepointLength, i);
+          assert.equal(splitSet[0].bestExample.text, KMWString.substring(targetText, 0, i));
+          assert.equal(splitSet[1].codepointLength, KMWString.length(targetText) - i);
+          assert.equal(splitSet[1].bestExample.text, KMWString.substring(targetText, i));
+        }
+      });
+
+      it('splits properly at index 0', () => {
+        runCommonAssertions(0);
+      });
+
+      it('splits properly at index 1', () => {
+        runCommonAssertions(1);
+      });
+
+      it('splits properly at index 2', () => {
+        runCommonAssertions(2);
+      });
+
+      it('splits properly at index 3', () => {
+        runCommonAssertions(3);
+      });
+
+      it('splits properly at index 4', () => {
+        runCommonAssertions(4);
+      });
+
+      it('splits properly at index 5', () => {
+        runCommonAssertions(5);
+      });
+    });
+
+    // Same as the prior set, but now with non-BMP text!
+    describe(`previously-split token comprised of complex, rewriting non-BMP transforms`, () => {
+      const buildPath = (inputs: Distribution<Transform>[], sources: TokenInputSource[], root?: SearchPath) => {
+        return inputs.reduce((path, input, index) => new SearchPath(path, input, sources[index]), root ?? new SearchPath(testModel));
+      }
+
+      const buildFixtures = () => {
+        const trueDistributions = [
+          [
+            { sample: {insert: toMathematicalSMP('ca'), deleteLeft: 0, id: 11}, p: 1 }
+          ], [
+            { sample: {insert: toMathematicalSMP('ent'), deleteLeft: 1, id: 12}, p: 1 }
+          ], [
+            { sample: {insert: toMathematicalSMP('llar'), deleteLeft: 2, id: 13}, p: 1 }
+          ], [
+            { sample: {insert: toMathematicalSMP('o'), deleteLeft: 2, id: 14}, p: 1 }
+          ]
+        ];
+
+        const trueInputSources: TokenInputSource[] = trueDistributions.map((d) => {
+          return {
+            trueTransform: d[0].sample,
+            bestProbFromSet: d[0].p,
+            inputStartIndex: 0
+          }
+        })
+
+        const commonRoot = new SearchPath(testModel);
+        const mergeTarget = buildPath(trueDistributions, trueInputSources, commonRoot);
+
+        // Index:  the position of the split.
+        const splits: [SearchPath, SearchPath][] = [];
+
+        // Case 0:  bare head path, reproduced token (on different root)
+        splits.push([
+          commonRoot, buildPath(trueDistributions, trueInputSources)
+        ]);
+
+        // Case 1: the split happens in token 2 (index 1), with the deleteLeft
+        // split from the insert.
+        splits.push([
+          buildPath([
+            trueDistributions[0],
+            [{ sample: {insert: toMathematicalSMP(''), deleteLeft: 1, id: 12}, p: 1 }]
+          ], trueInputSources.slice(0, 2), commonRoot),
+          buildPath([
+            [{ sample: {insert: toMathematicalSMP('ent'), deleteLeft: 0, id: 12}, p: 1 }],
+            ...trueDistributions.slice(2)
+          ], [
+            {...trueInputSources[1], inputStartIndex: 0},
+            ...trueInputSources.slice(2)
+          ])
+        ]);
+
+        // Case 2: the split happens in token 3 (index 2), with the deleteLeft
+        // split from the insert.
+        splits.push([
+          buildPath([
+            ...trueDistributions.slice(0, 2),
+            [{ sample: {insert: toMathematicalSMP(''), deleteLeft: 2, id: 13}, p: 1 }]
+          ], trueInputSources.slice(0, 3), commonRoot),
+          buildPath([
+            [{ sample: {insert: toMathematicalSMP('llar'), deleteLeft: 0, id: 13}, p: 1 }],
+            ...trueDistributions.slice(3)
+          ], [
+            {...trueInputSources[2], inputStartIndex: 0},
+            ...trueInputSources.slice(3)
+          ])
+        ]);
+
+        // Case 3: the split happens in token 3 (index 2), in the middle of the
+        // insert.
+        splits.push([
+          buildPath([
+            ...trueDistributions.slice(0, 2),
+            [{ sample: {insert: toMathematicalSMP('l'), deleteLeft: 2, id: 13}, p: 1 }]
+          ], trueInputSources.slice(0, 3), commonRoot),
+          buildPath([
+            [{ sample: {insert: toMathematicalSMP('lar'), deleteLeft: 0, id: 13}, p: 1 }],
+            ...trueDistributions.slice(3)
+          ], [
+            {...trueInputSources[2], inputStartIndex: 1},
+            ...trueInputSources.slice(3)
+          ])
+        ]);
+
+        // Case 4: the split happens in token 4 (index 3), with the deleteLeft
+        // split from the insert.
+        splits.push([
+          buildPath([
+            ...trueDistributions.slice(0, 3),
+            [{ sample: {insert: toMathematicalSMP(''), deleteLeft: 2, id: 14}, p: 1 }]
+          ], trueInputSources.slice(), commonRoot),
+          buildPath([
+            [{ sample: {insert: toMathematicalSMP('o'), deleteLeft: 0, id: 14}, p: 1 }]
+          ], [
+            {...trueInputSources[3], inputStartIndex: 0},
+          ])
+        ]);
+
+        // Case 5: the split happens at the token's end, leaving the tail
+        // as a fresh, empty token.
+        splits.push([
+          buildPath(trueDistributions, trueInputSources, commonRoot),
+          new SearchPath(testModel)
+        ]);
+
+        return {
+          mergeTarget,
+          splits,
+          trueDistributions
+        };
+      }
+
+      const runCommonAssertions = (splitIndex: number) => {
+        const { mergeTarget, splits, trueDistributions } = buildFixtures();
+        const splitToTest = splits[splitIndex];
+
+        const remergedPath = splitToTest[0].merge(splitToTest[1]);
+
+        assert.deepEqual(remergedPath.bestExample, mergeTarget.bestExample);
+        assert.equal(remergedPath.inputCount, mergeTarget.inputCount);
+        assert.equal(remergedPath.codepointLength, mergeTarget.codepointLength);
+        assert.sameDeepOrderedMembers(remergedPath.sourceIdentifiers, mergeTarget.sourceIdentifiers);
+        assert.isTrue(remergedPath.hasInputs(trueDistributions));
+      }
+
+      it('setup: constructs path properly', () => {
+        // Validate that an SMP-conversion has occurred.
+        assert.notEqual(toMathematicalSMP("cello"), "cello");
+        assert.equal(toMathematicalSMP("cello").length, "cello".length * 2);
+        assert.equal(KMWString.length(toMathematicalSMP("cello")), KMWString.length("cello"));
+
+        const { mergeTarget, splits } = buildFixtures();
+
+        const targetText = mergeTarget.bestExample.text;
+        assert.equal(targetText, toMathematicalSMP("cello"));
+
+        for(let i = 0; i < splits.length; i++) {
+          const splitSet = splits[i];
+
+          assert.equal(splitSet[0].codepointLength, i);
+          assert.equal(splitSet[0].bestExample.text, KMWString.substring(targetText, 0, i));
+          assert.equal(splitSet[1].codepointLength, KMWString.length(targetText) - i);
+          assert.equal(splitSet[1].bestExample.text, KMWString.substring(targetText, i));
+        }
+      });
+
+      it('splits properly at index 0', () => {
+        runCommonAssertions(0);
+      });
+
+      it('splits properly at index 1', () => {
+        runCommonAssertions(1);
+      });
+
+      it('splits properly at index 2', () => {
+        runCommonAssertions(2);
+      });
+
+      it('splits properly at index 3', () => {
+        runCommonAssertions(3);
+      });
+
+      it('splits properly at index 4', () => {
+        runCommonAssertions(4);
+      });
+
+      it('splits properly at index 5', () => {
+        runCommonAssertions(5);
+      });
+    });
+  });
 });
\ No newline at end of file

From df5eefa5bd41a8a1a5cb09fc2f85b4f42f6193f1 Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Wed, 29 Oct 2025 13:49:22 -0500
Subject: [PATCH 3/9] change(web): remove SearchPath.inputSequence

---
 .../src/main/correction/search-path.ts              | 13 -------------
 .../src/main/correction/search-space.ts             |  9 ---------
 2 files changed, 22 deletions(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts
index b884a5437e9..e17016f4e9e 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts
@@ -137,19 +137,6 @@ export class SearchPath implements SearchSpace {
     this.bestProbInEdge = 1;
   }
 
-  /**
-   * Retrieves the sequences of inputs that led to this SearchPath.
-   */
-  public get inputSequence(): Distribution<Transform>[] {
-    if(this.parents[0]) {
-      return [...this.parents[0].inputSequence, this.inputs];
-    } else if(this.inputs) {
-      return [this.inputs];
-    } else {
-      return [];
-    }
-  }
-
   public get constituentPaths(): SearchPath[][] {
     const parentPaths = this.parents[0]?.constituentPaths ?? [];
     if(parentPaths.length > 0) {
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts
index 8ef7b523ea7..3ff21b4ab77 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts
@@ -190,15 +190,6 @@ export interface SearchSpace {
    */
   readonly inputCount: number;
 
-  /**
-   * Retrieves the sequence of inputs that led to this SearchSpace.
-   *
-   * THIS WILL BE REMOVED SHORTLY in favor of `constituentPaths` below, which
-   * provides an improved view into the data and models multiple paths to the
-   * space when they exist.  (Once SearchPath takes on merging & splitting)
-   */
-  readonly inputSequence: Distribution<Transform>[];
-
   /**
    * Reports the length in codepoints of corrected text represented by completed
    * paths from this instance.

From c3dbc62b703d43afe8ce95c9deca2e2b2d553914 Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Wed, 29 Oct 2025 13:57:55 -0500
Subject: [PATCH 4/9] fix(web): add safeguard for split-distribution merging

---
 .../src/main/correction/search-path.ts        | 37 +++++++++++--------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts
index e17016f4e9e..1ce98d79549 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts
@@ -280,23 +280,28 @@ export class SearchPath implements SearchSpace {
       // per-codepoint.
       if(localInputId != spaceInputId || localInputId === undefined) {
         return new SearchPath(parentMerge, space.inputs, space.inputSource);
-      } else {
-        // Get the twin halves that were split.
-        // Assumption:  the two halves are in their original order, etc.
-        const localInputs = this.inputs;
-        const spaceInputs = space.inputs;
-
-        // Merge them!
-        const mergedInputs = localInputs?.map((entry, index) => {
-          return {
-            sample: buildMergedTransform(entry.sample, spaceInputs[index].sample),
-            p: entry.p
-          }
-        });
-
-        // Now to re-merge the two halves.
-        return new SearchPath(this.parentSpace, mergedInputs, this.inputSource);
       }
+      // Get the twin halves that were split.
+      // Assumption:  the two halves are in their original order, etc.
+      const localInputs = this.inputs;
+      const spaceInputs = space.inputs;
+
+      // Sanity check - ensure that the input distributions have the same length;
+      // if not, this shouldn't represent a SearchPath split!
+      if(localInputs.length != spaceInputs.length) {
+        return new SearchPath(parentMerge, space.inputs, space.inputSource);
+      }
+
+      // Merge them!
+      const mergedInputs = localInputs?.map((entry, index) => {
+        return {
+          sample: buildMergedTransform(entry.sample, spaceInputs[index].sample),
+          p: entry.p
+        }
+      });
+
+      // Now to re-merge the two halves.
+      return new SearchPath(this.parentSpace, mergedInputs, this.inputSource);
     } else {
       // If the parent was a cluster, the cluster itself is the merge.
       return parentMerge;

From 9c56b6002912fa9bf7f03c7d530f3dd06bafffb1 Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Wed, 29 Oct 2025 14:22:50 -0500
Subject: [PATCH 5/9] feat(web): adds split-distribution SearchPath re-merge
 unit test

---
 .../correction-search/search-path.tests.ts    | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts
index 5569c9553a3..2ce5047c225 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts
@@ -1843,5 +1843,59 @@ describe('SearchPath', () => {
         runCommonAssertions(5);
       });
     });
+
+    it('correctly merges paths previously split mid-input', () => {
+      let path = new SearchPath(testModel);
+      const startSample = {sample: { insert: 'a', deleteLeft: 0 }, p: 1}
+      path = new SearchPath(path, [startSample], startSample);
+
+      const inputDistribution = [
+        {sample: { insert: 'four', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.4},
+        {sample: { insert: 'then', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.3},
+        {sample: { insert: 'nine', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.2},
+        {sample: { insert: 'what', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.06},
+        {sample: { insert: 'cent', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.04}
+      ];
+
+      const mergeTarget = new SearchPath(path, inputDistribution, inputDistribution[0]);
+      assert.equal(mergeTarget.codepointLength, 4);
+      assert.equal(mergeTarget.inputCount, 2);
+
+      // This test models a previous split at codepoint index 2, splitting
+      // the input distribution accordingly.  (Note:  deleteLeft = 1!)
+      const headDistributionSplit = [
+        {sample: { insert: 'fo', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.4},
+        {sample: { insert: 'th', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.3},
+        {sample: { insert: 'ni', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.2},
+        {sample: { insert: 'wh', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.06},
+        {sample: { insert: 'ce', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.04}
+      ];
+      const headPath = new SearchPath(
+        path, headDistributionSplit, inputDistribution[0]
+      );
+
+      const tailDistributionSplit = [
+        {sample: { insert: 'ur', deleteLeft: 0, deleteRight: 0, id: 42 }, p: 0.4},
+        {sample: { insert: 'en', deleteLeft: 0, deleteRight: 0, id: 42 }, p: 0.3},
+        {sample: { insert: 'ne', deleteLeft: 0, deleteRight: 0, id: 42 }, p: 0.2},
+        {sample: { insert: 'at', deleteLeft: 0, deleteRight: 0, id: 42 }, p: 0.06},
+        {sample: { insert: 'nt', deleteLeft: 0, deleteRight: 0, id: 42 }, p: 0.04}
+      ];
+      const tailPath = new SearchPath(
+        new SearchPath(testModel), tailDistributionSplit, {
+          trueTransform: inputDistribution[0].sample,
+          bestProbFromSet: inputDistribution[0].p,
+          inputStartIndex: 2
+        }
+      );
+
+      const remerged = headPath.merge(tailPath);
+
+      assert.deepEqual(remerged.bestExample, mergeTarget.bestExample);
+      assert.equal(remerged.inputCount, 2);
+      assert.isTrue(remerged instanceof SearchPath);
+      assert.deepEqual((remerged as SearchPath).inputs, inputDistribution);
+      assert.isTrue(remerged.hasInputs([[startSample], inputDistribution]));
+    });
   });
 });
\ No newline at end of file

From e966b24526bfa0ff2261ce579ed3397680779b1e Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Mon, 10 Nov 2025 15:17:22 -0600
Subject: [PATCH 6/9] fix(web): patches up unit tests post-rebase

---
 .../correction-search/search-path.tests.ts    | 21 +++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts
index 2ce5047c225..2a2d57668f1 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts
@@ -12,7 +12,7 @@ import { assert } from 'chai';
 import { LexicalModelTypes } from '@keymanapp/common-types';
 import { KMWString } from '@keymanapp/web-utils';
 import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs';
-import { models, SearchPath, TokenInputSource } from '@keymanapp/lm-worker/test-index';
+import { generateSubsetId, models, SearchPath, TokenInputSource } from '@keymanapp/lm-worker/test-index';
 
 import Distribution = LexicalModelTypes.Distribution;
 import Transform = LexicalModelTypes.Transform;
@@ -1419,7 +1419,8 @@ describe('SearchPath', () => {
         const originalInputBase: TokenInputSource = {
           trueTransform: {insert: 'biglargetransform', deleteLeft: 0, id: 11},
           inputStartIndex: 0,
-          bestProbFromSet: 1
+          bestProbFromSet: 1,
+          subsetId: generateSubsetId()
         };
 
         const originalInputs = [0, 3, 8].map(n => ({...originalInputBase, inputStartIndex: n}));
@@ -1527,7 +1528,8 @@ describe('SearchPath', () => {
           return {
             trueTransform: d[0].sample,
             bestProbFromSet: d[0].p,
-            inputStartIndex: 0
+            inputStartIndex: 0,
+            subsetId: generateSubsetId()
           }
         })
 
@@ -1694,7 +1696,8 @@ describe('SearchPath', () => {
           return {
             trueTransform: d[0].sample,
             bestProbFromSet: d[0].p,
-            inputStartIndex: 0
+            inputStartIndex: 0,
+            subsetId: generateSubsetId()
           }
         })
 
@@ -1871,7 +1874,12 @@ describe('SearchPath', () => {
         {sample: { insert: 'ce', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.04}
       ];
       const headPath = new SearchPath(
-        path, headDistributionSplit, inputDistribution[0]
+        path, headDistributionSplit, {
+          trueTransform: inputDistribution[0].sample,
+          bestProbFromSet: inputDistribution[0].p,
+          inputStartIndex: 0,
+          subsetId: mergeTarget.inputSource.subsetId
+        }
       );
 
       const tailDistributionSplit = [
@@ -1885,7 +1893,8 @@ describe('SearchPath', () => {
         new SearchPath(testModel), tailDistributionSplit, {
           trueTransform: inputDistribution[0].sample,
           bestProbFromSet: inputDistribution[0].p,
-          inputStartIndex: 2
+          inputStartIndex: 2,
+          subsetId: mergeTarget.inputSource.subsetId
         }
       );
 

From dc3965b3a4bff9fa0558494c90698640a9ed73a2 Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Mon, 10 Nov 2025 16:20:50 -0600
Subject: [PATCH 7/9] feat(web): adds isSameSpace, enhances merging of
 previously-split tokens

---
 .../src/main/correction/search-path.ts        | 48 ++++++++++++++++++-
 .../src/main/correction/search-space.ts       |  2 +
 .../correction-search/search-path.tests.ts    | 14 +++---
 3 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts
index 1ce98d79549..823b2e4cdfc 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts
@@ -260,7 +260,6 @@ export class SearchPath implements SearchSpace {
     const parentMerges = space.parents?.length > 0 ? space.parents.map((p) => this.merge(p)) : [this];
 
     // if parentMerges.length > 0, is a SearchCluster.
-    // const parentMerge = parentMerges.length > 0 ? new SearchCluster(parentMerges) : parentMerges[0];
     const parentMerge = parentMerges[0];
 
     // Special case:  if we've reached the head of the space to be merged, check
@@ -301,7 +300,16 @@ export class SearchPath implements SearchSpace {
       });
 
       // Now to re-merge the two halves.
-      return new SearchPath(this.parentSpace, mergedInputs, this.inputSource);
+      const mergedInputSource = {
+        ...this.inputSource,
+        inputSplitIndex: space.inputSource.inputSplitIndex
+      };
+
+      if(mergedInputSource.inputSplitIndex == undefined) {
+        delete mergedInputSource.inputSplitIndex;
+      }
+
+      return new SearchPath(this.parentSpace, mergedInputs, mergedInputSource);
     } else {
       // If the parent was a cluster, the cluster itself is the merge.
       return parentMerge;
@@ -536,4 +544,40 @@ export class SearchPath implements SearchSpace {
 
     return components.join('+');
   }
+
+  isSameSpace(space: SearchSpace): boolean {
+    // Easiest cases:  when the instances or their ' `spaceId` matches, we have
+    // a perfect match.
+    if(this == space || this.spaceId == space.spaceId) {
+      return true;
+    }
+
+    // If it's falsy or a different SearchSpace type, that's an easy filter.
+    if(!space || !(space instanceof SearchPath)) {
+      return false;
+    }
+
+    // If the most recent 'input source' was not triggered from the same input
+    // subset, it's not a match.
+    if(this.inputSource?.subsetId != space.inputSource?.subsetId) {
+      return false;
+    }
+
+    // We check the indices of the input's split if one occurred.
+    if(this.inputSource?.inputSplitIndex != space.inputSource?.inputSplitIndex) {
+      return false;
+    }
+
+    if(this.inputSource?.inputStartIndex != space.inputSource?.inputStartIndex) {
+      return false;
+    }
+
+    return true;
+
+    // Commented out b/c parentSpace-checks cause unit-test ID issues after... a... split.
+    //
+    // // Finally, we recursively verify that the parent matches.  If there IS no parent,
+    // // we verify that _that_ aspect matches.
+    // return this.parentSpace?.isSameSpace(space.parentSpace) ?? this.parentSpace == space.parentSpace;
+  }
 }
\ No newline at end of file
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts
index 3ff21b4ab77..435ebfeca91 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts
@@ -240,4 +240,6 @@ export interface SearchSpace {
    * Intended only for use during unit testing.
    */
   readonly constituentPaths: SearchPath[][];
+
+  isSameSpace(space: SearchSpace): boolean;
 }
\ No newline at end of file
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts
index 2a2d57668f1..aa6e48afd24 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts
@@ -1799,6 +1799,7 @@ describe('SearchPath', () => {
         assert.equal(remergedPath.codepointLength, mergeTarget.codepointLength);
         assert.sameDeepOrderedMembers(remergedPath.sourceIdentifiers, mergeTarget.sourceIdentifiers);
         assert.isTrue(remergedPath.hasInputs(trueDistributions));
+        assert.isTrue(remergedPath.isSameSpace(mergeTarget));
       }
 
       it('setup: constructs path properly', () => {
@@ -1822,27 +1823,27 @@ describe('SearchPath', () => {
         }
       });
 
-      it('splits properly at index 0', () => {
+      it('merges tokens previously split at index 0', () => {
         runCommonAssertions(0);
       });
 
-      it('splits properly at index 1', () => {
+      it('merges tokens previously split at index 1', () => {
         runCommonAssertions(1);
       });
 
-      it('splits properly at index 2', () => {
+      it('merges tokens previously split at index 2', () => {
         runCommonAssertions(2);
       });
 
-      it('splits properly at index 3', () => {
+      it('merges tokens previously split at index 3', () => {
         runCommonAssertions(3);
       });
 
-      it('splits properly at index 4', () => {
+      it('merges tokens previously split at index 4', () => {
         runCommonAssertions(4);
       });
 
-      it('splits properly at index 5', () => {
+      it('merges tokens previously split at index 5', () => {
         runCommonAssertions(5);
       });
     });
@@ -1905,6 +1906,7 @@ describe('SearchPath', () => {
       assert.isTrue(remerged instanceof SearchPath);
       assert.deepEqual((remerged as SearchPath).inputs, inputDistribution);
       assert.isTrue(remerged.hasInputs([[startSample], inputDistribution]));
+      assert.isTrue(remerged.isSameSpace(mergeTarget));
     });
   });
 });
\ No newline at end of file

From 4f1be8a4b7573f2bf1070b738067c62a88b8cbd9 Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Tue, 11 Nov 2025 16:27:36 -0600
Subject: [PATCH 8/9] fix(web): post-rebase .merge() patchup

---
 .../src/main/correction/search-path.ts              | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts
index 823b2e4cdfc..07b00649506 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts
@@ -302,11 +302,14 @@ export class SearchPath implements SearchSpace {
       // Now to re-merge the two halves.
       const mergedInputSource = {
         ...this.inputSource,
-        inputSplitIndex: space.inputSource.inputSplitIndex
+        segment: {
+          ...this.inputSource.segment,
+          end: space.inputSource.segment.end
+        }
       };
 
-      if(mergedInputSource.inputSplitIndex == undefined) {
-        delete mergedInputSource.inputSplitIndex;
+      if(mergedInputSource.segment.end == undefined) {
+        delete mergedInputSource.segment.end;
       }
 
       return new SearchPath(this.parentSpace, mergedInputs, mergedInputSource);
@@ -564,11 +567,11 @@ export class SearchPath implements SearchSpace {
     }
 
     // We check the indices of the input's split if one occurred.
-    if(this.inputSource?.inputSplitIndex != space.inputSource?.inputSplitIndex) {
+    if(this.inputSource?.segment.end != space.inputSource?.segment.end) {
       return false;
     }
 
-    if(this.inputSource?.inputStartIndex != space.inputSource?.inputStartIndex) {
+    if(this.inputSource?.segment.start != space.inputSource?.segment.start) {
       return false;
     }
 

From 9b947b75161b0ef7551723ed59c74bf3123b10ff Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Tue, 11 Nov 2025 16:33:35 -0600
Subject: [PATCH 9/9] fix(web): post-rebase unit test patch-up

---
 .../correction-search/search-path.tests.ts    | 69 +++++++++++--------
 1 file changed, 42 insertions(+), 27 deletions(-)

diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts
index aa6e48afd24..c62e15342aa 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts
@@ -12,7 +12,7 @@ import { assert } from 'chai';
 import { LexicalModelTypes } from '@keymanapp/common-types';
 import { KMWString } from '@keymanapp/web-utils';
 import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs';
-import { generateSubsetId, models, SearchPath, TokenInputSource } from '@keymanapp/lm-worker/test-index';
+import { generateSubsetId, models, SearchPath, PathInputProperties } from '@keymanapp/lm-worker/test-index';
 
 import Distribution = LexicalModelTypes.Distribution;
 import Transform = LexicalModelTypes.Transform;
@@ -1416,9 +1416,12 @@ describe('SearchPath', () => {
           [{ sample: {insert: 'transform', deleteLeft: 0, id: 11}, p: 1 }]
         ];
 
-        const originalInputBase: TokenInputSource = {
-          trueTransform: {insert: 'biglargetransform', deleteLeft: 0, id: 11},
-          inputStartIndex: 0,
+        const originalInputBase: PathInputProperties = {
+          segment: {
+            trueTransform: {insert: 'biglargetransform', deleteLeft: 0, id: 11},
+            transitionId: 11,
+            start: 0
+          },
           bestProbFromSet: 1,
           subsetId: generateSubsetId()
         };
@@ -1434,7 +1437,7 @@ describe('SearchPath', () => {
         };
       }
 
-      const checkFinalStateAssertions = (merged: SearchPath, originalInput: TokenInputSource) => {
+      const checkFinalStateAssertions = (merged: SearchPath, originalInput: PathInputProperties) => {
         assert.equal(merged.inputCount, 1);
         assert.isTrue(merged instanceof SearchPath);
         assert.deepEqual(merged.bestExample.text, "biglargetransform");
@@ -1507,7 +1510,7 @@ describe('SearchPath', () => {
     // Covers many common aspects of SearchPath merging, though not merging of
     // multi-member distributions.
     describe(`previously-split token comprised of complex, rewriting transforms:  cello`, () => {
-      const buildPath = (inputs: Distribution<Transform>[], sources: TokenInputSource[], root?: SearchPath) => {
+      const buildPath = (inputs: Distribution<Transform>[], sources: PathInputProperties[], root?: SearchPath) => {
         return inputs.reduce((path, input, index) => new SearchPath(path, input, sources[index]), root ?? new SearchPath(testModel));
       }
 
@@ -1524,11 +1527,14 @@ describe('SearchPath', () => {
           ]
         ];
 
-        const trueInputSources: TokenInputSource[] = trueDistributions.map((d) => {
+        const trueInputSources: PathInputProperties[] = trueDistributions.map((d) => {
           return {
-            trueTransform: d[0].sample,
+            segment: {
+              trueTransform: d[0].sample,
+              transitionId: d[0].sample.id,
+              start: 0
+            },
             bestProbFromSet: d[0].p,
-            inputStartIndex: 0,
             subsetId: generateSubsetId()
           }
         })
@@ -1555,7 +1561,7 @@ describe('SearchPath', () => {
             [{ sample: {insert: 'ent', deleteLeft: 0, id: 12}, p: 1 }],
             ...trueDistributions.slice(2)
           ], [
-            {...trueInputSources[1], inputStartIndex: 0},
+            {...trueInputSources[1], segment: {...trueInputSources[1].segment, start: 0}},
             ...trueInputSources.slice(2)
           ])
         ]);
@@ -1571,7 +1577,7 @@ describe('SearchPath', () => {
             [{ sample: {insert: 'llar', deleteLeft: 0, id: 13}, p: 1 }],
             ...trueDistributions.slice(3)
           ], [
-            {...trueInputSources[2], inputStartIndex: 0},
+            {...trueInputSources[2], segment: {...trueInputSources[2].segment, start: 0}},
             ...trueInputSources.slice(3)
           ])
         ]);
@@ -1587,7 +1593,7 @@ describe('SearchPath', () => {
             [{ sample: {insert: 'lar', deleteLeft: 0, id: 13}, p: 1 }],
             ...trueDistributions.slice(3)
           ], [
-            {...trueInputSources[2], inputStartIndex: 1},
+            {...trueInputSources[2], segment: {...trueInputSources[2].segment, start: 1}},
             ...trueInputSources.slice(3)
           ])
         ]);
@@ -1602,7 +1608,7 @@ describe('SearchPath', () => {
           buildPath([
             [{ sample: {insert: 'o', deleteLeft: 0, id: 14}, p: 1 }]
           ], [
-            {...trueInputSources[3], inputStartIndex: 0},
+            {...trueInputSources[3], segment: {...trueInputSources[3].segment, start: 0}},
           ])
         ]);
 
@@ -1629,7 +1635,7 @@ describe('SearchPath', () => {
         assert.deepEqual(remergedPath.bestExample, mergeTarget.bestExample);
         assert.equal(remergedPath.inputCount, mergeTarget.inputCount);
         assert.equal(remergedPath.codepointLength, mergeTarget.codepointLength);
-        assert.sameDeepOrderedMembers(remergedPath.sourceIdentifiers, mergeTarget.sourceIdentifiers);
+        assert.sameDeepOrderedMembers(remergedPath.inputSegments, mergeTarget.inputSegments);
         assert.isTrue(remergedPath.hasInputs(trueDistributions));
       }
 
@@ -1675,7 +1681,7 @@ describe('SearchPath', () => {
 
     // Same as the prior set, but now with non-BMP text!
     describe(`previously-split token comprised of complex, rewriting non-BMP transforms`, () => {
-      const buildPath = (inputs: Distribution<Transform>[], sources: TokenInputSource[], root?: SearchPath) => {
+      const buildPath = (inputs: Distribution<Transform>[], sources: PathInputProperties[], root?: SearchPath) => {
         return inputs.reduce((path, input, index) => new SearchPath(path, input, sources[index]), root ?? new SearchPath(testModel));
       }
 
@@ -1692,11 +1698,14 @@ describe('SearchPath', () => {
           ]
         ];
 
-        const trueInputSources: TokenInputSource[] = trueDistributions.map((d) => {
+        const trueInputSources: PathInputProperties[] = trueDistributions.map((d) => {
           return {
-            trueTransform: d[0].sample,
+            segment: {
+              trueTransform: d[0].sample,
+              transitionId: d[0].sample.id,
+              start: 0
+            },
             bestProbFromSet: d[0].p,
-            inputStartIndex: 0,
             subsetId: generateSubsetId()
           }
         })
@@ -1723,7 +1732,7 @@ describe('SearchPath', () => {
             [{ sample: {insert: toMathematicalSMP('ent'), deleteLeft: 0, id: 12}, p: 1 }],
             ...trueDistributions.slice(2)
           ], [
-            {...trueInputSources[1], inputStartIndex: 0},
+            {...trueInputSources[1], segment: {...trueInputSources[1].segment, start: 0}},
             ...trueInputSources.slice(2)
           ])
         ]);
@@ -1739,7 +1748,7 @@ describe('SearchPath', () => {
             [{ sample: {insert: toMathematicalSMP('llar'), deleteLeft: 0, id: 13}, p: 1 }],
             ...trueDistributions.slice(3)
           ], [
-            {...trueInputSources[2], inputStartIndex: 0},
+            {...trueInputSources[2], segment: {...trueInputSources[2].segment, start: 0}},
             ...trueInputSources.slice(3)
           ])
         ]);
@@ -1755,7 +1764,7 @@ describe('SearchPath', () => {
             [{ sample: {insert: toMathematicalSMP('lar'), deleteLeft: 0, id: 13}, p: 1 }],
             ...trueDistributions.slice(3)
           ], [
-            {...trueInputSources[2], inputStartIndex: 1},
+            {...trueInputSources[2], segment: {...trueInputSources[2].segment, start: 1}},
             ...trueInputSources.slice(3)
           ])
         ]);
@@ -1770,7 +1779,7 @@ describe('SearchPath', () => {
           buildPath([
             [{ sample: {insert: toMathematicalSMP('o'), deleteLeft: 0, id: 14}, p: 1 }]
           ], [
-            {...trueInputSources[3], inputStartIndex: 0},
+            {...trueInputSources[3], segment: {...trueInputSources[3].segment, start: 0}},
           ])
         ]);
 
@@ -1797,7 +1806,7 @@ describe('SearchPath', () => {
         assert.deepEqual(remergedPath.bestExample, mergeTarget.bestExample);
         assert.equal(remergedPath.inputCount, mergeTarget.inputCount);
         assert.equal(remergedPath.codepointLength, mergeTarget.codepointLength);
-        assert.sameDeepOrderedMembers(remergedPath.sourceIdentifiers, mergeTarget.sourceIdentifiers);
+        assert.sameDeepOrderedMembers(remergedPath.inputSegments, mergeTarget.inputSegments);
         assert.isTrue(remergedPath.hasInputs(trueDistributions));
         assert.isTrue(remergedPath.isSameSpace(mergeTarget));
       }
@@ -1876,9 +1885,12 @@ describe('SearchPath', () => {
       ];
       const headPath = new SearchPath(
         path, headDistributionSplit, {
-          trueTransform: inputDistribution[0].sample,
+          segment: {
+            trueTransform: inputDistribution[0].sample,
+            transitionId: inputDistribution[0].sample.id,
+            start: 0
+          },
           bestProbFromSet: inputDistribution[0].p,
-          inputStartIndex: 0,
           subsetId: mergeTarget.inputSource.subsetId
         }
       );
@@ -1892,9 +1904,12 @@ describe('SearchPath', () => {
       ];
       const tailPath = new SearchPath(
         new SearchPath(testModel), tailDistributionSplit, {
-          trueTransform: inputDistribution[0].sample,
+          segment: {
+            trueTransform: inputDistribution[0].sample,
+            transitionId: inputDistribution[0].sample.id,
+            start: 2
+          },
           bestProbFromSet: inputDistribution[0].p,
-          inputStartIndex: 2,
           subsetId: mergeTarget.inputSource.subsetId
         }
       );