keymanapp · jahorton · Nov 11, 2025 · Nov 11, 2025 · Nov 12, 2025
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts
@@ -12,7 +12,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types';
 import { deepCopy, KMWString } from "@keymanapp/web-utils";
 
 import { SearchPath } from "./search-path.js";
-import { SearchSpace, TokenInputSource } from "./search-space.js";
+import { SearchSpace, PathInputProperties } from "./search-space.js";
 import { TokenSplitMap } from "./context-tokenization.js";
 
 import Distribution = LexicalModelTypes.Distribution;
@@ -110,8 +110,11 @@ export class ContextToken {
 
       rawTransformDistributions.forEach((entry) => {
         searchSpace = new SearchPath(searchSpace, entry, {
-          trueTransform: entry[0].sample,
-          inputStartIndex: 0,
+          segment: {
+            trueTransform: entry[0].sample,
+            transitionId: entry[0].sample.id,
+            start: 0
+          },
           bestProbFromSet: 1
         });
       });
@@ -124,7 +127,7 @@ export class ContextToken {
    * Call this to record the original keystroke Transforms for the context range
    * corresponding to this token.
    */
-  addInput(inputSource: TokenInputSource, distribution: Distribution<Transform>) {
+  addInput(inputSource: PathInputProperties, distribution: Distribution<Transform>) {
     this._searchSpace = new SearchPath(this._searchSpace, distribution, inputSource);
   }
 
@@ -143,8 +146,8 @@ export class ContextToken {
    * Denotes the original keystroke Transforms comprising the range corresponding
    * to this token.
    */
-  get inputRange() {
-    return this.searchSpace.sourceIdentifiers;
+  get inputSegments() {
+    return this.searchSpace.inputSegments;
   }
 
   /**
@@ -163,15 +166,6 @@ export class ContextToken {
     return this.searchSpace.sourceRangeKey;
   }
 
-  /**
-   * Gets a simple, compact string-based representation of `inputRange`.
-   *
-   * This should only ever be used for debugging purposes.
-   */
-  get sourceText(): string {
-    return this.searchSpace.likeliestSourceText;
-  }
-
   /**
    * Generates text corresponding to the net effects of the most likely inputs
    * received that can correspond to the current instance.
@@ -192,7 +186,7 @@ export class ContextToken {
     // Thus, we don't set the .isWhitespace flag field.
     const resultToken = new ContextToken(lexicalModel);
 
-    let lastSourceInput: TokenInputSource;
+    let lastSourceInput: PathInputProperties;
     let lastInputDistrib: Distribution<Transform>;
     for(const token of tokensToMerge) {
       const inputCount = token.inputCount;
@@ -203,7 +197,7 @@ export class ContextToken {
       }
 
       // Are we re-merging on a previously split transform?
-      if(lastSourceInput?.trueTransform != token.inputRange[0].trueTransform) {
+      if(lastSourceInput?.segment.trueTransform != token.inputSegments[0].segment.trueTransform) {
         if(lastSourceInput) {
           resultToken.addInput(lastSourceInput, lastInputDistrib);
         } // else:  there's nothing to add as input
@@ -232,9 +226,9 @@ export class ContextToken {
       // Ignore the last entry for now - it may need to merge with a matching
       // entry in the next token!
       for(let i = startIndex; i < inputCount - 1; i++) {
-        resultToken.addInput(token.inputRange[i], token.searchSpace.inputSequence[i]);
+        resultToken.addInput(token.inputSegments[i], token.searchSpace.inputSequence[i]);
       }
-      lastSourceInput = token.inputRange[inputCount-1];
+      lastSourceInput = token.inputSegments[inputCount-1];
       lastInputDistrib = token.searchSpace.inputSequence[inputCount-1];
     }
 
@@ -257,7 +251,7 @@ export class ContextToken {
 
     // Build an alternate version of the transforms:  if we preprocess all deleteLefts,
     // what text remains from each?
-    const alteredSources = preprocessInputSources(this.inputRange);
+    const alteredSources = preprocessInputSources(this.inputSegments);
 
     const blankContext = { left: '', startOfBuffer: true, endOfBuffer: true };
     const splitSpecs = split.matches.slice();
@@ -313,15 +307,17 @@ export class ContextToken {
           };
         });
 
-        const priorSourceInput = overextendedToken.inputRange[lastInputIndex];
+        const priorSourceInput = overextendedToken.inputSegments[lastInputIndex];
         constructingToken.addInput(priorSourceInput, headDistribution);
         tokensFromSplit.push(constructingToken);
 
         constructingToken = new ContextToken(lexicalModel);
         backupToken = new ContextToken(constructingToken);
         constructingToken.addInput({
-          trueTransform: priorSourceInput.trueTransform,
-          inputStartIndex: priorSourceInput.inputStartIndex + extraCharsAdded,
+          segment: {
+            ...priorSourceInput.segment,
+            start: priorSourceInput.segment.start + extraCharsAdded
+          },
           bestProbFromSet: priorSourceInput.bestProbFromSet
         }, tailDistribution);
 
@@ -338,34 +334,34 @@ export class ContextToken {
 
       backupToken = new ContextToken(constructingToken);
       lenBeforeLastApply = KMWString.length(currentText.left);
-      currentText = applyTransform(alteredSources[transformIndex].trueTransform, currentText);
-      constructingToken.addInput(this.inputRange[transformIndex], this.searchSpace.inputSequence[transformIndex]);
+      currentText = applyTransform(alteredSources[transformIndex].segment.trueTransform, currentText);
+      constructingToken.addInput(this.inputSegments[transformIndex], this.searchSpace.inputSequence[transformIndex]);
       transformIndex++;
     }
 
     return tokensFromSplit;
   }
 }
 
-export function preprocessInputSources(inputSources: ReadonlyArray<TokenInputSource>) {
+export function preprocessInputSources(inputSources: ReadonlyArray<PathInputProperties>) {
   const alteredSources = deepCopy(inputSources);
   let trickledDeleteLeft = 0;
   for(let i = alteredSources.length - 1; i >= 0; i--) {
     const source = alteredSources[i];
     if(trickledDeleteLeft) {
-      const insLen = KMWString.length(source.trueTransform.insert);
+      const insLen = KMWString.length(source.segment.trueTransform.insert);
       if(insLen <= trickledDeleteLeft) {
-        source.trueTransform.insert = '';
+        source.segment.trueTransform.insert = '';
         trickledDeleteLeft -= insLen;
       } else {
-        source.trueTransform.insert = KMWString.substring(source.trueTransform.insert, 0, insLen - trickledDeleteLeft);
+        source.segment.trueTransform.insert = KMWString.substring(source.segment.trueTransform.insert, 0, insLen - trickledDeleteLeft);
         trickledDeleteLeft = 0;
       }
     }
-    trickledDeleteLeft += source.trueTransform.deleteLeft;
-    source.trueTransform.deleteLeft = 0;
+    trickledDeleteLeft += source.segment.trueTransform.deleteLeft;
+    source.segment.trueTransform.deleteLeft = 0;
   }
 
-  alteredSources[0].trueTransform.deleteLeft = trickledDeleteLeft;
+  alteredSources[0].segment.trueTransform.deleteLeft = trickledDeleteLeft;
   return alteredSources;
 }
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts
@@ -158,16 +158,6 @@ export class ContextTokenization {
     return this.tail.spaceId;
   }
 
-  /**
-   * Returns plain-text strings representing the most probable representation for all
-   * tokens represented by this tokenization instance.
-   *
-   * Intended for debugging use only.
-   */
-  get sourceText() {
-    return this.tokens.map(token => token.sourceText);
-  }
-
   /**
    * Returns a plain-text string representing the most probable representation for all
    * tokens represented by this tokenization instance.
@@ -596,8 +586,11 @@ export class ContextTokenization {
         distribution = distribution.map((mass) => ({sample: { ...mass.sample, deleteLeft: 0 }, p: mass.p }));
       }
       affectedToken.addInput({
-        trueTransform: sourceInput,
-        inputStartIndex: appliedLength,
+        segment: {
+          trueTransform: sourceInput,
+          transitionId: sourceInput.id,
+          start: appliedLength
+        },
         bestProbFromSet: bestProbFromSet
       }, distribution);
       appliedLength += KMWString.length(distribution[0].sample.insert);

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts
@@ -10,12 +10,10 @@
 
 import { QueueComparator as Comparator, KMWString, PriorityQueue } from '@keymanapp/web-utils';
 import { LexicalModelTypes } from '@keymanapp/common-types';
-import { applyTransform } from '@keymanapp/models-templates';
 
 import { EDIT_DISTANCE_COST_SCALE, SearchNode, SearchResult } from './distance-modeler.js';
-import { generateSpaceSeed, PathResult, SearchSpace, TokenInputSource } from './search-space.js';
+import { generateSpaceSeed, PathResult, SearchSpace, PathInputProperties } from './search-space.js';
 
-import Context = LexicalModelTypes.Context;
 import Distribution = LexicalModelTypes.Distribution;
 import LexicalModel = LexicalModelTypes.LexicalModel;
 import ProbabilityMass = LexicalModelTypes.ProbabilityMass;
@@ -30,7 +28,7 @@ export const QUEUE_NODE_COMPARATOR: Comparator<SearchNode> = function(arg1, arg2
 export class SearchPath implements SearchSpace {
   private selectionQueue: PriorityQueue<SearchNode> = new PriorityQueue(QUEUE_NODE_COMPARATOR);
   readonly inputs?: Distribution<Transform>;
-  readonly inputSource?: TokenInputSource;
+  readonly inputSource?: PathInputProperties;
 
   readonly parentSpace: SearchSpace;
   readonly spaceId: number;
@@ -79,31 +77,34 @@ export class SearchPath implements SearchSpace {
    * @param srcKeystroke Data about the actual context range represented by `inputs` and
    * its underlying keystroke.
    */
-  constructor(space: SearchSpace, inputs: Distribution<Transform>, srcKeystroke: TokenInputSource);
-  constructor(arg1: LexicalModel | SearchSpace, inputs?: Distribution<Transform>, inputSource?: TokenInputSource | ProbabilityMass<Transform>) {
+  constructor(space: SearchSpace, inputs: Distribution<Transform>, srcKeystroke: PathInputProperties);
+  constructor(arg1: LexicalModel | SearchSpace, inputs?: Distribution<Transform>, inputSource?: PathInputProperties | ProbabilityMass<Transform>) {
     // If we're taking in a pre-constructed search node, it's got an associated,
     // pre-assigned spaceID - so use that.
     const isExtending = (arg1 instanceof SearchPath);
     this.spaceId = generateSpaceSeed();
 
     // Coerce inputSource to TokenInputSource format.
-    if(inputSource && (inputSource as TokenInputSource).trueTransform == undefined) {
+    if(inputSource && (inputSource as ProbabilityMass<Transform>).sample != undefined) {
       const keystroke = inputSource as ProbabilityMass<Transform>;
       inputSource = {
-        trueTransform: keystroke.sample,
-        bestProbFromSet: keystroke.p,
-        inputStartIndex: 0
+        segment: {
+          trueTransform: keystroke.sample,
+          transitionId: keystroke.sample.id,
+          start: 0
+        },
+        bestProbFromSet: keystroke.p
       }
     };
 
-    const inputSrc = inputSource as TokenInputSource;
+    const inputSrc = inputSource as PathInputProperties;
 
     if(isExtending) {
       const parentSpace = arg1 as SearchSpace;
       const logTierCost = -Math.log(inputSrc.bestProbFromSet);
 
       const transitionId = (inputs?.[0].sample.id);
-      if(transitionId !== undefined && inputSrc.trueTransform.id != transitionId) {
+      if(transitionId !== undefined && inputSrc.segment.transitionId != transitionId) {
         throw new Error("Input distribution and input-source transition IDs must match");
       }
 
@@ -198,23 +199,6 @@ export class SearchPath implements SearchSpace {
     }
   }
 
-  get likeliestSourceText(): string {
-    let prefixContext: Context = { left: this.parentSpace?.likeliestSourceText ?? '', startOfBuffer: true, endOfBuffer: true };
-    const inputTransform = this.inputSource?.trueTransform ?? { insert: '', deleteLeft: 0 };
-
-    const excessDeletes = inputTransform.deleteLeft - KMWString.length(prefixContext.left);
-    if(excessDeletes > 0) {
-      prefixContext = {
-        ...prefixContext,
-        // \u{2421} = ␡ (Unicode symbol for Delete)
-        left: '\u{2421}'.repeat(excessDeletes) + prefixContext.left
-      };
-    }
-
-    const result = applyTransform(inputTransform, prefixContext);
-    return result.left;
-  }
-
   get parents() {
     // The SearchPath class may only have a single parent.
     return this.parentSpace ? [this.parentSpace] : [];
@@ -362,15 +346,15 @@ export class SearchPath implements SearchSpace {
     return Object.values(this.returnedValues ?? {}).map(v => new SearchResult(v));
   }
 
-  public get sourceIdentifiers(): TokenInputSource[] {
+  public get inputSegments(): PathInputProperties[] {
     if(!this.parentSpace) {
       return [];
     }
 
-    const parentSources = this.parentSpace.sourceIdentifiers;
+    const parentSources = this.parentSpace.inputSegments;
     if(this.inputSource) {
-      const inputId = this.inputSource.trueTransform.id;
-      if(inputId && parentSources.length > 0 && parentSources[parentSources.length - 1].trueTransform.id == inputId) {
+      const inputId = this.inputSource.segment.transitionId;
+      if(inputId && parentSources.length > 0 && parentSources[parentSources.length - 1].segment.transitionId == inputId) {
         return parentSources;
       }
 
@@ -386,11 +370,11 @@ export class SearchPath implements SearchSpace {
    */
   get sourceRangeKey(): string {
     const components: string[] = [];
-    const sources = this.sourceIdentifiers;
+    const sources = this.inputSegments;
 
     for(const source of sources) {
-      const i = source.inputStartIndex;
-      components.push(`T${source.trueTransform.id}${i != 0 ? '@' + i : ''}`);
+      const i = source.segment.start;
+      components.push(`T${source.segment.transitionId}${i != 0 ? '@' + i : ''}`);
     }
 
     return components.join('+');

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts
@@ -38,26 +38,38 @@ type CompleteSearchPath = {
 
 export type PathResult = NullPath | IntermediateSearchPath | CompleteSearchPath;
 
-/**
- * Models the properties and portion of an input event applied by a SearchSpace for
- * correction-search purposes.
- */
-export interface TokenInputSource {
+export interface InputSegment {
   /**
    * The Transform corresponding to the keystroke applied to the true context
    * for this input event.
    *
-   * NOTE:  outside of use for .sourceText / .likeliestSourceText, the only part
-   * that should actually be referenced is the Transform / transition ID.
+   * @deprecated Slated for removal within epic/autocorrect.
    */
   trueTransform: Transform;
 
+  /**
+   * The transform / transition ID of the corresponding input event.
+   */
+  transitionId: number,
+
   /**
    * Marks the initial index (inclusive) within the insert strings for the
-   * corresponding transitions' Transforms that is applied by the corresponding
+   * corresponding transitions' Transforms that are applied by the corresponding
    * tokenized correction-search input.
    */
-  inputStartIndex: number;
+  start: number
+}
+
+/**
+ * Models the properties and portion of an input event applied by a SearchSpace for
+ * correction-search purposes.
+ */
+export interface PathInputProperties {
+  /**
+   * Denotes the portion of the ongoing input stream represented by the corresponding
+   * input distribution(s) of a SearchSpace.
+   */
+  segment: InputSegment;
 
   /**
    * Notes the highest probability found in the input event's transform
@@ -163,13 +175,13 @@ export interface SearchSpace {
    */
   readonly bestExample: { text: string, p: number };
 
-  readonly likeliestSourceText: string;
-
   /**
    * Gets components useful for building a string-based representation of the
    * keystroke range corrected by this search space.
+   *
+   * TODO: will return only the `inputSegment` part of each entry in the future.
    */
-  readonly sourceIdentifiers: TokenInputSource[];
+  readonly inputSegments: PathInputProperties[];
 
   /**
    * Gets a compact string-based representation of `inputRange` that