keymanapp · jahorton · Oct 24, 2025 · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts
@@ -7,7 +7,6 @@
  * in the context and associated correction-search progress and results.
  */
 
-import { buildMergedTransform } from "@keymanapp/models-templates";
 import { LexicalModelTypes } from '@keymanapp/common-types';
 import { deepCopy, KMWString } from "@keymanapp/web-utils";
 
@@ -183,59 +182,19 @@ export class ContextToken {
    * @param lexicalModel
    * @returns
    */
-  static merge(tokensToMerge: ContextToken[], lexicalModel: LexicalModel): ContextToken {
+  static merge(tokensToMerge: ContextToken[]): ContextToken {
+    if(tokensToMerge.length < 1) {
+      return null;
+    }
+
     // Assumption:  if we're merging a token, it's not whitespace.
     // Thus, we don't set the .isWhitespace flag field.
-    const resultToken = new ContextToken(lexicalModel);
-
-    let lastSourceInput: PathInputProperties;
-    let lastInputDistrib: Distribution<Transform>;
-    for(const token of tokensToMerge) {
-      const inputCount = token.inputCount;
-      let startIndex = 0;
-
-      if(inputCount == 0) {
-        continue;
-      }
-
-      // Are we re-merging on a previously split transform?
-      if(lastSourceInput?.segment.trueTransform != token.inputSegments[0].segment.trueTransform) {
-        if(lastSourceInput) {
-          resultToken.addInput(lastSourceInput, lastInputDistrib);
-        } // else:  there's nothing to add as input
-      } else {
-        // If so, re-merge it!
-        startIndex++;
-
-        lastInputDistrib = lastInputDistrib?.map((entry, index) => {
-          return {
-            sample: buildMergedTransform(entry.sample, token.searchSpace.inputSequence[0][index].sample),
-            p: entry.p
-          }
-        });
-
-        // In case there's only one input that needs merging on both ends.
-        if(inputCount == 1) {
-          // There's potential that the next incoming token needs to merge with this.
-          continue;
-        } else {
-          resultToken.addInput(lastSourceInput, lastInputDistrib);
-        }
-      }
-      lastSourceInput = null;
-      lastInputDistrib = null;
-
-      // Ignore the last entry for now - it may need to merge with a matching
-      // entry in the next token!
-      for(let i = startIndex; i < inputCount - 1; i++) {
-        resultToken.addInput(token.inputSegments[i], token.searchSpace.inputSequence[i]);
-      }
-      lastSourceInput = token.inputSegments[inputCount-1];
-      lastInputDistrib = token.searchSpace.inputSequence[inputCount-1];
+    const resultToken = new ContextToken(tokensToMerge.shift());
+    while(tokensToMerge.length > 0) {
+      const next = tokensToMerge.shift();
+      resultToken._searchSpace = resultToken._searchSpace.merge(next._searchSpace);
     }
 
-    resultToken.addInput(lastSourceInput, lastInputDistrib);
-
     return resultToken;
   }
 

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts
@@ -526,7 +526,7 @@ export class ContextTokenization {
         // consider:  move to ContextToken as class method.  (static?)
         const merge = merges.shift();
         const tokensToMerge = merge.inputs.map((m) => baseTokenization[m.index]);
-        const mergeResult = ContextToken.merge(tokensToMerge, lexicalModel);
+        const mergeResult = ContextToken.merge(tokensToMerge);
         tokenization.push(mergeResult);
         i = merge.inputs[merge.inputs.length - 1].index;
         continue;

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts
@@ -10,6 +10,7 @@
 
 import { QueueComparator as Comparator, KMWString, PriorityQueue } from '@keymanapp/web-utils';
 import { LexicalModelTypes } from '@keymanapp/common-types';
+import { buildMergedTransform } from '@keymanapp/models-templates';
 
 import { EDIT_DISTANCE_COST_SCALE, SearchNode, SearchResult } from './distance-modeler.js';
 import { generateSpaceSeed, PathResult, SearchSpace, PathInputProperties } from './search-space.js';
@@ -148,19 +149,6 @@ export class SearchPath implements SearchSpace {
     this.bestProbInEdge = 1;
   }
 
-  /**
-   * Retrieves the sequences of inputs that led to this SearchPath.
-   */
-  public get inputSequence(): Distribution<Transform>[] {
-    if(this.parents[0]) {
-      return [...this.parents[0].inputSequence, this.inputs];
-    } else if(this.inputs) {
-      return [this.inputs];
-    } else {
-      return [];
-    }
-  }
-
   public get constituentPaths(): SearchPath[][] {
     const parentPaths = this.parents[0]?.constituentPaths ?? [];
     if(parentPaths.length > 0) {
@@ -256,6 +244,78 @@ export class SearchPath implements SearchSpace {
     this.selectionQueue = new PriorityQueue<SearchNode>(QUEUE_NODE_COMPARATOR, entries);
   }
 
+  // spaces are in sequence here.
+  // `this` = head 'space'.
+  public merge(space: SearchSpace): SearchSpace {
+    // Head node for the incoming path is empty, so skip it.
+    if(space.parents.length == 0) {
+      return this;
+    }
+
+    // Merge any parents first as a baseline.  We have to come after their
+    // affects are merged in, anyway.
+    const parentMerges = space.parents?.length > 0 ? space.parents.map((p) => this.merge(p)) : [this];
+
+    // if parentMerges.length > 0, is a SearchCluster.
+    const parentMerge = parentMerges[0];
+
+    // Special case:  if we've reached the head of the space to be merged, check
+    // for a split transform.
+    //  - we return `this` from the root, so if that's what we received, we're
+    //    on the first descendant - the first path component.
+    if(space instanceof SearchPath) {
+      if(parentMerge != this) {
+        return new SearchPath(parentMerge, space.inputs, space.inputSource);
+      }
+
+      const localInputId = this.inputSource?.segment.transitionId;
+      const spaceInputId = space.inputSource?.segment.transitionId;
+      // The 'id' may be undefined in some unit tests and for tokens
+      // reconstructed after a backspace.  In either case, we consider the
+      // related results as fully separate; our reconstructions are
+      // per-codepoint.
+      if(localInputId != spaceInputId || localInputId === undefined) {
+        return new SearchPath(parentMerge, space.inputs, space.inputSource);
+      }
+      // Get the twin halves that were split.
+      // Assumption:  the two halves are in their original order, etc.
+      const localInputs = this.inputs;
+      const spaceInputs = space.inputs;
+
+      // Sanity check - ensure that the input distributions have the same length;
+      // if not, this shouldn't represent a SearchPath split!
+      if(localInputs.length != spaceInputs.length) {
+        return new SearchPath(parentMerge, space.inputs, space.inputSource);
+      }
+
+      // Merge them!
+      const mergedInputs = localInputs?.map((entry, index) => {
+        return {
+          sample: buildMergedTransform(entry.sample, spaceInputs[index].sample),
+          p: entry.p
+        }
+      });
+
+      // Now to re-merge the two halves.
+      const mergedInputSource = {
+        ...this.inputSource,
+        segment: {
+          ...this.inputSource.segment,
+          end: space.inputSource.segment.end
+        }
+      };
+
+      if(mergedInputSource.segment.end == undefined) {
+        delete mergedInputSource.segment.end;
+      }
+
+      return new SearchPath(this.parentSpace, mergedInputs, mergedInputSource);
+    } else {
+      // If the parent was a cluster, the cluster itself is the merge.
+      return parentMerge;
+    }
+  }
+
   public split(charIndex: number): [SearchSpace, SearchPath] {
     const model = this.model;
     const internalSplitIndex = charIndex - (this.codepointLength - this.edgeLength);
@@ -484,4 +544,40 @@ export class SearchPath implements SearchSpace {
 
     return components.join('+');
   }
+
+  isSameSpace(space: SearchSpace): boolean {
+    // Easiest cases:  when the instances or their ' `spaceId` matches, we have
+    // a perfect match.
+    if(this == space || this.spaceId == space.spaceId) {
+      return true;
+    }
+
+    // If it's falsy or a different SearchSpace type, that's an easy filter.
+    if(!space || !(space instanceof SearchPath)) {
+      return false;
+    }
+
+    // If the most recent 'input source' was not triggered from the same input
+    // subset, it's not a match.
+    if(this.inputSource?.subsetId != space.inputSource?.subsetId) {
+      return false;
+    }
+
+    // We check the indices of the input's split if one occurred.
+    if(this.inputSource?.segment.end != space.inputSource?.segment.end) {
+      return false;
+    }
+
+    if(this.inputSource?.segment.start != space.inputSource?.segment.start) {
+      return false;
+    }
+
+    return true;
+
+    // Commented out b/c parentSpace-checks cause unit-test ID issues after... a... split.
+    //
+    // // Finally, we recursively verify that the parent matches.  If there IS no parent,
+    // // we verify that _that_ aspect matches.
+    // return this.parentSpace?.isSameSpace(space.parentSpace) ?? this.parentSpace == space.parentSpace;
+  }
 }
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts
@@ -190,15 +190,6 @@ export interface SearchSpace {
    */
   readonly inputCount: number;
 
-  /**
-   * Retrieves the sequence of inputs that led to this SearchSpace.
-   *
-   * THIS WILL BE REMOVED SHORTLY in favor of `constituentPaths` below, which
-   * provides an improved view into the data and models multiple paths to the
-   * space when they exist.  (Once SearchPath takes on merging & splitting)
-   */
-  readonly inputSequence: Distribution<Transform>[];
-
   /**
    * Reports the length in codepoints of corrected text represented by completed
    * paths from this instance.
@@ -225,6 +216,15 @@ export interface SearchSpace {
    */
   get sourceRangeKey(): string;
 
+  /**
+   * Appends this SearchSpace with the provided SearchSpace's search properties,
+   * extending the represented search range accordingly.  If this operation
+   * represents merging the result of a previous .split() call, the two halves
+   * of any split input components will be fully re-merged.
+   * @param space
+   */
+  merge(space: SearchSpace): SearchSpace;
+
   /**
    * Splits this SearchSpace into two halves at the specified codepoint index.
    * The 'head' component will maximally re-use existing cached data, while the
@@ -240,4 +240,6 @@ export interface SearchSpace {
    * Intended only for use during unit testing.
    */
   readonly constituentPaths: SearchPath[][];
+
+  isSameSpace(space: SearchSpace): boolean;
 }
diff --git a/...rc/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts b/...rc/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts
@@ -102,7 +102,7 @@ describe('ContextToken', function() {
       const token2 = new ContextToken(plainModel, "'");
       const token3 = new ContextToken(plainModel, "t");
 
-      const merged = ContextToken.merge([token1, token2, token3], plainModel);
+      const merged = ContextToken.merge([token1, token2, token3]);
       assert.equal(merged.exampleInput, "can't");
       token1.inputSegments.forEach((entry) => assert.isTrue(merged.inputSegments.indexOf(entry) > -1));
       token2.inputSegments.forEach((entry) => assert.isTrue(merged.inputSegments.indexOf(entry) > -1));
@@ -155,7 +155,7 @@ describe('ContextToken', function() {
         subsetId: srcSubsetId
       }, [{sample: {insert: 't', deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]);
 
-      const merged = ContextToken.merge([token1, token2, token3], plainModel);
+      const merged = ContextToken.merge([token1, token2, token3]);
       assert.equal(merged.exampleInput, "can't");
       assert.deepEqual(merged.inputSegments, [ {
         segment: {
@@ -253,7 +253,7 @@ describe('ContextToken', function() {
         subsetId: srcSubsetIds[3]
       }, [{sample: srcTransforms[3], p: 1}]);
 
-      const merged = ContextToken.merge(tokensToMerge, plainModel);
+      const merged = ContextToken.merge(tokensToMerge);
       assert.equal(merged.exampleInput, "applesandsourgrapes");
       assert.deepEqual(merged.inputSegments, srcTransforms.map((t, i) => ({
         segment: {
@@ -352,7 +352,7 @@ describe('ContextToken', function() {
         subsetId: srcSubsetIds[3]
       }, [{sample: srcTransforms[3], p: 1}]);
 
-      const merged = ContextToken.merge(tokensToMerge, plainModel);
+      const merged = ContextToken.merge(tokensToMerge);
       assert.equal(merged.exampleInput, toMathematicalSMP("applesandsourgrapes"));
       assert.deepEqual(merged.inputSegments, srcTransforms.map((t, i) => ({
         segment: {