From 1e6b86e06825e95b1d2fb10de4eb9a6cc90bcd0f Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Fri, 24 Oct 2025 12:52:35 -0500 Subject: [PATCH 1/9] refactor(web): implement SourcePath merging As with the prior PR, this moves correction-search path merging onto SourcePath, rather than expecting ContextToken to manage it when multiple paths to construct a token exists. Build-bot: skip build:web Test-bot: skip --- .../src/main/correction/context-token.ts | 59 +++---------------- .../main/correction/context-tokenization.ts | 2 +- .../src/main/correction/search-path.ts | 57 ++++++++++++++++++ .../src/main/correction/search-space.ts | 9 +++ .../context/context-token.tests.ts | 8 +-- 5 files changed, 80 insertions(+), 55 deletions(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts index 452ae142af0..03db28ef294 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts @@ -7,7 +7,6 @@ * in the context and associated correction-search progress and results. */ -import { buildMergedTransform } from "@keymanapp/models-templates"; import { LexicalModelTypes } from '@keymanapp/common-types'; import { deepCopy, KMWString } from "@keymanapp/web-utils"; @@ -183,59 +182,19 @@ export class ContextToken { * @param lexicalModel * @returns */ - static merge(tokensToMerge: ContextToken[], lexicalModel: LexicalModel): ContextToken { + static merge(tokensToMerge: ContextToken[]): ContextToken { + if(tokensToMerge.length < 1) { + return null; + } + // Assumption: if we're merging a token, it's not whitespace. // Thus, we don't set the .isWhitespace flag field. - const resultToken = new ContextToken(lexicalModel); - - let lastSourceInput: PathInputProperties; - let lastInputDistrib: Distribution; - for(const token of tokensToMerge) { - const inputCount = token.inputCount; - let startIndex = 0; - - if(inputCount == 0) { - continue; - } - - // Are we re-merging on a previously split transform? - if(lastSourceInput?.segment.trueTransform != token.inputSegments[0].segment.trueTransform) { - if(lastSourceInput) { - resultToken.addInput(lastSourceInput, lastInputDistrib); - } // else: there's nothing to add as input - } else { - // If so, re-merge it! - startIndex++; - - lastInputDistrib = lastInputDistrib?.map((entry, index) => { - return { - sample: buildMergedTransform(entry.sample, token.searchSpace.inputSequence[0][index].sample), - p: entry.p - } - }); - - // In case there's only one input that needs merging on both ends. - if(inputCount == 1) { - // There's potential that the next incoming token needs to merge with this. - continue; - } else { - resultToken.addInput(lastSourceInput, lastInputDistrib); - } - } - lastSourceInput = null; - lastInputDistrib = null; - - // Ignore the last entry for now - it may need to merge with a matching - // entry in the next token! - for(let i = startIndex; i < inputCount - 1; i++) { - resultToken.addInput(token.inputSegments[i], token.searchSpace.inputSequence[i]); - } - lastSourceInput = token.inputSegments[inputCount-1]; - lastInputDistrib = token.searchSpace.inputSequence[inputCount-1]; + const resultToken = new ContextToken(tokensToMerge.shift()); + while(tokensToMerge.length > 0) { + const next = tokensToMerge.shift(); + resultToken._searchSpace = resultToken._searchSpace.merge(next._searchSpace); } - resultToken.addInput(lastSourceInput, lastInputDistrib); - return resultToken; } diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts index 27239dd0506..3d1e04ced6a 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts @@ -526,7 +526,7 @@ export class ContextTokenization { // consider: move to ContextToken as class method. (static?) const merge = merges.shift(); const tokensToMerge = merge.inputs.map((m) => baseTokenization[m.index]); - const mergeResult = ContextToken.merge(tokensToMerge, lexicalModel); + const mergeResult = ContextToken.merge(tokensToMerge); tokenization.push(mergeResult); i = merge.inputs[merge.inputs.length - 1].index; continue; diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts index 71e9fa2be0c..b884a5437e9 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts @@ -10,6 +10,7 @@ import { QueueComparator as Comparator, KMWString, PriorityQueue } from '@keymanapp/web-utils'; import { LexicalModelTypes } from '@keymanapp/common-types'; +import { buildMergedTransform } from '@keymanapp/models-templates'; import { EDIT_DISTANCE_COST_SCALE, SearchNode, SearchResult } from './distance-modeler.js'; import { generateSpaceSeed, PathResult, SearchSpace, PathInputProperties } from './search-space.js'; @@ -259,6 +260,62 @@ export class SearchPath implements SearchSpace { this.selectionQueue = new PriorityQueue(QUEUE_NODE_COMPARATOR, entries); } + // spaces are in sequence here. + // `this` = head 'space'. + public merge(space: SearchSpace): SearchSpace { + // Head node for the incoming path is empty, so skip it. + if(space.parents.length == 0) { + return this; + } + + // Merge any parents first as a baseline. We have to come after their + // affects are merged in, anyway. + const parentMerges = space.parents?.length > 0 ? space.parents.map((p) => this.merge(p)) : [this]; + + // if parentMerges.length > 0, is a SearchCluster. + // const parentMerge = parentMerges.length > 0 ? new SearchCluster(parentMerges) : parentMerges[0]; + const parentMerge = parentMerges[0]; + + // Special case: if we've reached the head of the space to be merged, check + // for a split transform. + // - we return `this` from the root, so if that's what we received, we're + // on the first descendant - the first path component. + if(space instanceof SearchPath) { + if(parentMerge != this) { + return new SearchPath(parentMerge, space.inputs, space.inputSource); + } + + const localInputId = this.inputSource?.segment.transitionId; + const spaceInputId = space.inputSource?.segment.transitionId; + // The 'id' may be undefined in some unit tests and for tokens + // reconstructed after a backspace. In either case, we consider the + // related results as fully separate; our reconstructions are + // per-codepoint. + if(localInputId != spaceInputId || localInputId === undefined) { + return new SearchPath(parentMerge, space.inputs, space.inputSource); + } else { + // Get the twin halves that were split. + // Assumption: the two halves are in their original order, etc. + const localInputs = this.inputs; + const spaceInputs = space.inputs; + + // Merge them! + const mergedInputs = localInputs?.map((entry, index) => { + return { + sample: buildMergedTransform(entry.sample, spaceInputs[index].sample), + p: entry.p + } + }); + + // Now to re-merge the two halves. + return new SearchPath(this.parentSpace, mergedInputs, this.inputSource); + } + } else { + // If the parent was a cluster, the cluster itself is the merge. + return parentMerge; + } + } + public split(charIndex: number): [SearchSpace, SearchPath] { const model = this.model; const internalSplitIndex = charIndex - (this.codepointLength - this.edgeLength); diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts index a3e9f22796e..8ef7b523ea7 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts @@ -225,6 +225,15 @@ export interface SearchSpace { */ get sourceRangeKey(): string; + /** + * Appends this SearchSpace with the provided SearchSpace's search properties, + * extending the represented search range accordingly. If this operation + * represents merging the result of a previous .split() call, the two halves + * of any split input components will be fully re-merged. + * @param space + */ + merge(space: SearchSpace): SearchSpace; + /** * Splits this SearchSpace into two halves at the specified codepoint index. * The 'head' component will maximally re-use existing cached data, while the diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts index ab5938b1608..5202e55e5c5 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts @@ -102,7 +102,7 @@ describe('ContextToken', function() { const token2 = new ContextToken(plainModel, "'"); const token3 = new ContextToken(plainModel, "t"); - const merged = ContextToken.merge([token1, token2, token3], plainModel); + const merged = ContextToken.merge([token1, token2, token3]); assert.equal(merged.exampleInput, "can't"); token1.inputSegments.forEach((entry) => assert.isTrue(merged.inputSegments.indexOf(entry) > -1)); token2.inputSegments.forEach((entry) => assert.isTrue(merged.inputSegments.indexOf(entry) > -1)); @@ -155,7 +155,7 @@ describe('ContextToken', function() { subsetId: srcSubsetId }, [{sample: {insert: 't', deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]); - const merged = ContextToken.merge([token1, token2, token3], plainModel); + const merged = ContextToken.merge([token1, token2, token3]); assert.equal(merged.exampleInput, "can't"); assert.deepEqual(merged.inputSegments, [ { segment: { @@ -253,7 +253,7 @@ describe('ContextToken', function() { subsetId: srcSubsetIds[3] }, [{sample: srcTransforms[3], p: 1}]); - const merged = ContextToken.merge(tokensToMerge, plainModel); + const merged = ContextToken.merge(tokensToMerge); assert.equal(merged.exampleInput, "applesandsourgrapes"); assert.deepEqual(merged.inputSegments, srcTransforms.map((t, i) => ({ segment: { @@ -352,7 +352,7 @@ describe('ContextToken', function() { subsetId: srcSubsetIds[3] }, [{sample: srcTransforms[3], p: 1}]); - const merged = ContextToken.merge(tokensToMerge, plainModel); + const merged = ContextToken.merge(tokensToMerge); assert.equal(merged.exampleInput, toMathematicalSMP("applesandsourgrapes")); assert.deepEqual(merged.inputSegments, srcTransforms.map((t, i) => ({ segment: { From b6ef8d099ff1d022a839c17013226d61da4ffcfb Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Wed, 29 Oct 2025 13:43:52 -0500 Subject: [PATCH 2/9] feat(web): adds SearchPath merging unit tests --- .../correction-search/search-path.tests.ts | 475 +++++++++++++++++- 1 file changed, 474 insertions(+), 1 deletion(-) diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts index 4a832d6feaa..5569c9553a3 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts @@ -9,10 +9,13 @@ import { assert } from 'chai'; +import { LexicalModelTypes } from '@keymanapp/common-types'; import { KMWString } from '@keymanapp/web-utils'; import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; -import { models, SearchPath } from '@keymanapp/lm-worker/test-index'; +import { models, SearchPath, TokenInputSource } from '@keymanapp/lm-worker/test-index'; +import Distribution = LexicalModelTypes.Distribution; +import Transform = LexicalModelTypes.Transform; import TrieModel = models.TrieModel; const testModel = new TrieModel(jsonFixture('models/tries/english-1000')); @@ -1371,4 +1374,474 @@ describe('SearchPath', () => { assert.deepEqual((tail as SearchPath).inputSource, tailTarget.inputSource); }); }); + + // Placed after `split()` because many cases mock a reversal of split-test results. + describe('merge()', () => { + /* + * To define: + * - merging a standard case + * - merging a split BMP case + * - merging a standard SMP case + * - merging a split SMP case + * - merging a case where the deleteLeft was split from the insert + * - splitIndex = 0, but the deleteLeft is (conceptually) before that. + * - this (empty) + param (full) + * - this (full) + param (empty) + * - merging with distributions (no split) + * - merging with distributions (and a definite split) + * + * - biglargetransform for single-input multi-split remerge + * - merging a three-way split should be associative (not dependent on order) so + * long as the relative positions are correct + * + * - "cello" case(s) covers... + * - deleteLeft split from insert + * - a straight-up split (mid-insert) + * - standard case (no distrib) + * - with head + tail index inclusion, the empty + full versions + * - SMP variant: the SMP cases. + * + * - then we may need a "merging with distributions" coverage + * - can prob make a simple BMP mocked version... + * - and a simple SMP mocked version + * - is actually pretty-much covered anyway... I believe. + */ + + // Covers cases where a single "input" was split into more than two fragments + describe(`previously-split token comprised of single titanic transform: biglargetransform`, () => { + const buildPath = () => { + const distributions = [ + [{ sample: {insert: 'big', deleteLeft: 0, id: 11}, p: 1 }], + [{ sample: {insert: 'large', deleteLeft: 0, id: 11}, p: 1 }], + [{ sample: {insert: 'transform', deleteLeft: 0, id: 11}, p: 1 }] + ]; + + const originalInputBase: TokenInputSource = { + trueTransform: {insert: 'biglargetransform', deleteLeft: 0, id: 11}, + inputStartIndex: 0, + bestProbFromSet: 1 + }; + + const originalInputs = [0, 3, 8].map(n => ({...originalInputBase, inputStartIndex: n})); + + const paths = distributions.map((d, i) => new SearchPath(new SearchPath(testModel), d, originalInputs[i])); + + return { + paths, + distributions, + originalInputs + }; + } + + const checkFinalStateAssertions = (merged: SearchPath, originalInput: TokenInputSource) => { + assert.equal(merged.inputCount, 1); + assert.isTrue(merged instanceof SearchPath); + assert.deepEqual(merged.bestExample.text, "biglargetransform"); + assert.deepEqual((merged as SearchPath).inputs, [ + { sample: { insert: 'biglargetransform', deleteLeft: 0, id: 11 }, p: 1 } + ]); + assert.deepEqual((merged as SearchPath).inputSource, originalInput); + // TODO: check the 'source' input data (here and in callers) + } + + it('setup: constructs paths properly', () => { + const { paths, distributions, originalInputs } = buildPath(); + + assert.equal(paths.length, 3); + assert.equal(distributions.length, paths.length); + paths.forEach((p, i) => { + assert.equal(p.inputCount, 1); + assert.equal(distributions[i].length, p.inputCount); + assert.equal(p.codepointLength, KMWString.length(distributions[i][0].sample.insert)); + assert.deepEqual(p.bestExample, { + text: ['big', 'large', 'transform'][i], + p: 1 + }); + assert.equal(p.parents[0].inputCount, 0); + assert.isTrue(p.hasInputs([distributions[i]])); + }); + + originalInputs.forEach((original) => { + assert.deepEqual({...original, inputStartIndex: 0}, {...originalInputs[0], inputStartIndex: 0}); + }); + }); + + it('merging order: big + large, then + transform', () => { + const { paths, originalInputs } = buildPath(); + + const headMerge = paths[0].merge(paths[1]); + + // Assertions + assert.equal(headMerge.inputCount, 1); + assert.isTrue(headMerge instanceof SearchPath); + assert.deepEqual(headMerge.bestExample.text, "biglarge"); + assert.deepEqual((headMerge as SearchPath).inputs, [ + { sample: { insert: 'biglarge', deleteLeft: 0, id: 11 }, p: 1 } + ]); + assert.deepEqual((headMerge as SearchPath).inputSource, originalInputs[0]); + + const fullMerge = headMerge.merge(paths[2]); + checkFinalStateAssertions(fullMerge as SearchPath, originalInputs[0]); + }); + + it('merging order: large + transform, then + big', () => { + const { paths, originalInputs } = buildPath(); + + const tailMerge = paths[1].merge(paths[2]); + + // Assertions + assert.equal(tailMerge.inputCount, 1); + assert.isTrue(tailMerge instanceof SearchPath); + assert.deepEqual(tailMerge.bestExample.text, "largetransform"); + assert.deepEqual((tailMerge as SearchPath).inputs, [ + { sample: { insert: 'largetransform', deleteLeft: 0, id: 11 }, p: 1 } + ]); + assert.deepEqual((tailMerge as SearchPath).inputSource, originalInputs[1]); + + const fullMerge = paths[0].merge(tailMerge); + checkFinalStateAssertions(fullMerge as SearchPath, originalInputs[0]); + }); + }); + + // Covers many common aspects of SearchPath merging, though not merging of + // multi-member distributions. + describe(`previously-split token comprised of complex, rewriting transforms: cello`, () => { + const buildPath = (inputs: Distribution[], sources: TokenInputSource[], root?: SearchPath) => { + return inputs.reduce((path, input, index) => new SearchPath(path, input, sources[index]), root ?? new SearchPath(testModel)); + } + + const buildFixtures = () => { + const trueDistributions = [ + [ + { sample: {insert: 'ca', deleteLeft: 0, id: 11}, p: 1 } + ], [ + { sample: {insert: 'ent', deleteLeft: 1, id: 12}, p: 1 } + ], [ + { sample: {insert: 'llar', deleteLeft: 2, id: 13}, p: 1 } + ], [ + { sample: {insert: 'o', deleteLeft: 2, id: 14}, p: 1 } + ] + ]; + + const trueInputSources: TokenInputSource[] = trueDistributions.map((d) => { + return { + trueTransform: d[0].sample, + bestProbFromSet: d[0].p, + inputStartIndex: 0 + } + }) + + const commonRoot = new SearchPath(testModel); + const mergeTarget = buildPath(trueDistributions, trueInputSources, commonRoot); + + // Index: the position of the split. + const splits: [SearchPath, SearchPath][] = []; + + // Case 0: bare head path, reproduced token (on different root) + splits.push([ + commonRoot, buildPath(trueDistributions, trueInputSources) + ]); + + // Case 1: the split happens in token 2 (index 1), with the deleteLeft + // split from the insert. + splits.push([ + buildPath([ + trueDistributions[0], + [{ sample: {insert: '', deleteLeft: 1, id: 12}, p: 1 }] + ], trueInputSources.slice(0, 2), commonRoot), + buildPath([ + [{ sample: {insert: 'ent', deleteLeft: 0, id: 12}, p: 1 }], + ...trueDistributions.slice(2) + ], [ + {...trueInputSources[1], inputStartIndex: 0}, + ...trueInputSources.slice(2) + ]) + ]); + + // Case 2: the split happens in token 3 (index 2), with the deleteLeft + // split from the insert. + splits.push([ + buildPath([ + ...trueDistributions.slice(0, 2), + [{ sample: {insert: '', deleteLeft: 2, id: 13}, p: 1 }] + ], trueInputSources.slice(0, 3), commonRoot), + buildPath([ + [{ sample: {insert: 'llar', deleteLeft: 0, id: 13}, p: 1 }], + ...trueDistributions.slice(3) + ], [ + {...trueInputSources[2], inputStartIndex: 0}, + ...trueInputSources.slice(3) + ]) + ]); + + // Case 3: the split happens in token 3 (index 2), in the middle of the + // insert. + splits.push([ + buildPath([ + ...trueDistributions.slice(0, 2), + [{ sample: {insert: 'l', deleteLeft: 2, id: 13}, p: 1 }] + ], trueInputSources.slice(0, 3), commonRoot), + buildPath([ + [{ sample: {insert: 'lar', deleteLeft: 0, id: 13}, p: 1 }], + ...trueDistributions.slice(3) + ], [ + {...trueInputSources[2], inputStartIndex: 1}, + ...trueInputSources.slice(3) + ]) + ]); + + // Case 4: the split happens in token 4 (index 3), with the deleteLeft + // split from the insert. + splits.push([ + buildPath([ + ...trueDistributions.slice(0, 3), + [{ sample: {insert: '', deleteLeft: 2, id: 14}, p: 1 }] + ], trueInputSources.slice(), commonRoot), + buildPath([ + [{ sample: {insert: 'o', deleteLeft: 0, id: 14}, p: 1 }] + ], [ + {...trueInputSources[3], inputStartIndex: 0}, + ]) + ]); + + // Case 5: the split happens at the token's end, leaving the tail + // as a fresh, empty token. + splits.push([ + buildPath(trueDistributions, trueInputSources, commonRoot), + new SearchPath(testModel) + ]); + + return { + mergeTarget, + splits, + trueDistributions + }; + } + + const runCommonAssertions = (splitIndex: number) => { + const { mergeTarget, splits, trueDistributions } = buildFixtures(); + const splitToTest = splits[splitIndex]; + + const remergedPath = splitToTest[0].merge(splitToTest[1]); + + assert.deepEqual(remergedPath.bestExample, mergeTarget.bestExample); + assert.equal(remergedPath.inputCount, mergeTarget.inputCount); + assert.equal(remergedPath.codepointLength, mergeTarget.codepointLength); + assert.sameDeepOrderedMembers(remergedPath.sourceIdentifiers, mergeTarget.sourceIdentifiers); + assert.isTrue(remergedPath.hasInputs(trueDistributions)); + } + + it('setup: constructs path properly', () => { + const { mergeTarget, splits } = buildFixtures(); + + const targetText = mergeTarget.bestExample.text; + + for(let i = 0; i < splits.length; i++) { + const splitSet = splits[i]; + + assert.equal(splitSet[0].codepointLength, i); + assert.equal(splitSet[0].bestExample.text, KMWString.substring(targetText, 0, i)); + assert.equal(splitSet[1].codepointLength, KMWString.length(targetText) - i); + assert.equal(splitSet[1].bestExample.text, KMWString.substring(targetText, i)); + } + }); + + it('splits properly at index 0', () => { + runCommonAssertions(0); + }); + + it('splits properly at index 1', () => { + runCommonAssertions(1); + }); + + it('splits properly at index 2', () => { + runCommonAssertions(2); + }); + + it('splits properly at index 3', () => { + runCommonAssertions(3); + }); + + it('splits properly at index 4', () => { + runCommonAssertions(4); + }); + + it('splits properly at index 5', () => { + runCommonAssertions(5); + }); + }); + + // Same as the prior set, but now with non-BMP text! + describe(`previously-split token comprised of complex, rewriting non-BMP transforms`, () => { + const buildPath = (inputs: Distribution[], sources: TokenInputSource[], root?: SearchPath) => { + return inputs.reduce((path, input, index) => new SearchPath(path, input, sources[index]), root ?? new SearchPath(testModel)); + } + + const buildFixtures = () => { + const trueDistributions = [ + [ + { sample: {insert: toMathematicalSMP('ca'), deleteLeft: 0, id: 11}, p: 1 } + ], [ + { sample: {insert: toMathematicalSMP('ent'), deleteLeft: 1, id: 12}, p: 1 } + ], [ + { sample: {insert: toMathematicalSMP('llar'), deleteLeft: 2, id: 13}, p: 1 } + ], [ + { sample: {insert: toMathematicalSMP('o'), deleteLeft: 2, id: 14}, p: 1 } + ] + ]; + + const trueInputSources: TokenInputSource[] = trueDistributions.map((d) => { + return { + trueTransform: d[0].sample, + bestProbFromSet: d[0].p, + inputStartIndex: 0 + } + }) + + const commonRoot = new SearchPath(testModel); + const mergeTarget = buildPath(trueDistributions, trueInputSources, commonRoot); + + // Index: the position of the split. + const splits: [SearchPath, SearchPath][] = []; + + // Case 0: bare head path, reproduced token (on different root) + splits.push([ + commonRoot, buildPath(trueDistributions, trueInputSources) + ]); + + // Case 1: the split happens in token 2 (index 1), with the deleteLeft + // split from the insert. + splits.push([ + buildPath([ + trueDistributions[0], + [{ sample: {insert: toMathematicalSMP(''), deleteLeft: 1, id: 12}, p: 1 }] + ], trueInputSources.slice(0, 2), commonRoot), + buildPath([ + [{ sample: {insert: toMathematicalSMP('ent'), deleteLeft: 0, id: 12}, p: 1 }], + ...trueDistributions.slice(2) + ], [ + {...trueInputSources[1], inputStartIndex: 0}, + ...trueInputSources.slice(2) + ]) + ]); + + // Case 2: the split happens in token 3 (index 2), with the deleteLeft + // split from the insert. + splits.push([ + buildPath([ + ...trueDistributions.slice(0, 2), + [{ sample: {insert: toMathematicalSMP(''), deleteLeft: 2, id: 13}, p: 1 }] + ], trueInputSources.slice(0, 3), commonRoot), + buildPath([ + [{ sample: {insert: toMathematicalSMP('llar'), deleteLeft: 0, id: 13}, p: 1 }], + ...trueDistributions.slice(3) + ], [ + {...trueInputSources[2], inputStartIndex: 0}, + ...trueInputSources.slice(3) + ]) + ]); + + // Case 3: the split happens in token 3 (index 2), in the middle of the + // insert. + splits.push([ + buildPath([ + ...trueDistributions.slice(0, 2), + [{ sample: {insert: toMathematicalSMP('l'), deleteLeft: 2, id: 13}, p: 1 }] + ], trueInputSources.slice(0, 3), commonRoot), + buildPath([ + [{ sample: {insert: toMathematicalSMP('lar'), deleteLeft: 0, id: 13}, p: 1 }], + ...trueDistributions.slice(3) + ], [ + {...trueInputSources[2], inputStartIndex: 1}, + ...trueInputSources.slice(3) + ]) + ]); + + // Case 4: the split happens in token 4 (index 3), with the deleteLeft + // split from the insert. + splits.push([ + buildPath([ + ...trueDistributions.slice(0, 3), + [{ sample: {insert: toMathematicalSMP(''), deleteLeft: 2, id: 14}, p: 1 }] + ], trueInputSources.slice(), commonRoot), + buildPath([ + [{ sample: {insert: toMathematicalSMP('o'), deleteLeft: 0, id: 14}, p: 1 }] + ], [ + {...trueInputSources[3], inputStartIndex: 0}, + ]) + ]); + + // Case 5: the split happens at the token's end, leaving the tail + // as a fresh, empty token. + splits.push([ + buildPath(trueDistributions, trueInputSources, commonRoot), + new SearchPath(testModel) + ]); + + return { + mergeTarget, + splits, + trueDistributions + }; + } + + const runCommonAssertions = (splitIndex: number) => { + const { mergeTarget, splits, trueDistributions } = buildFixtures(); + const splitToTest = splits[splitIndex]; + + const remergedPath = splitToTest[0].merge(splitToTest[1]); + + assert.deepEqual(remergedPath.bestExample, mergeTarget.bestExample); + assert.equal(remergedPath.inputCount, mergeTarget.inputCount); + assert.equal(remergedPath.codepointLength, mergeTarget.codepointLength); + assert.sameDeepOrderedMembers(remergedPath.sourceIdentifiers, mergeTarget.sourceIdentifiers); + assert.isTrue(remergedPath.hasInputs(trueDistributions)); + } + + it('setup: constructs path properly', () => { + // Validate that an SMP-conversion has occurred. + assert.notEqual(toMathematicalSMP("cello"), "cello"); + assert.equal(toMathematicalSMP("cello").length, "cello".length * 2); + assert.equal(KMWString.length(toMathematicalSMP("cello")), KMWString.length("cello")); + + const { mergeTarget, splits } = buildFixtures(); + + const targetText = mergeTarget.bestExample.text; + assert.equal(targetText, toMathematicalSMP("cello")); + + for(let i = 0; i < splits.length; i++) { + const splitSet = splits[i]; + + assert.equal(splitSet[0].codepointLength, i); + assert.equal(splitSet[0].bestExample.text, KMWString.substring(targetText, 0, i)); + assert.equal(splitSet[1].codepointLength, KMWString.length(targetText) - i); + assert.equal(splitSet[1].bestExample.text, KMWString.substring(targetText, i)); + } + }); + + it('splits properly at index 0', () => { + runCommonAssertions(0); + }); + + it('splits properly at index 1', () => { + runCommonAssertions(1); + }); + + it('splits properly at index 2', () => { + runCommonAssertions(2); + }); + + it('splits properly at index 3', () => { + runCommonAssertions(3); + }); + + it('splits properly at index 4', () => { + runCommonAssertions(4); + }); + + it('splits properly at index 5', () => { + runCommonAssertions(5); + }); + }); + }); }); \ No newline at end of file From df5eefa5bd41a8a1a5cb09fc2f85b4f42f6193f1 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Wed, 29 Oct 2025 13:49:22 -0500 Subject: [PATCH 3/9] change(web): remove SearchPath.inputSequence --- .../src/main/correction/search-path.ts | 13 ------------- .../src/main/correction/search-space.ts | 9 --------- 2 files changed, 22 deletions(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts index b884a5437e9..e17016f4e9e 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts @@ -137,19 +137,6 @@ export class SearchPath implements SearchSpace { this.bestProbInEdge = 1; } - /** - * Retrieves the sequences of inputs that led to this SearchPath. - */ - public get inputSequence(): Distribution[] { - if(this.parents[0]) { - return [...this.parents[0].inputSequence, this.inputs]; - } else if(this.inputs) { - return [this.inputs]; - } else { - return []; - } - } - public get constituentPaths(): SearchPath[][] { const parentPaths = this.parents[0]?.constituentPaths ?? []; if(parentPaths.length > 0) { diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts index 8ef7b523ea7..3ff21b4ab77 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts @@ -190,15 +190,6 @@ export interface SearchSpace { */ readonly inputCount: number; - /** - * Retrieves the sequence of inputs that led to this SearchSpace. - * - * THIS WILL BE REMOVED SHORTLY in favor of `constituentPaths` below, which - * provides an improved view into the data and models multiple paths to the - * space when they exist. (Once SearchPath takes on merging & splitting) - */ - readonly inputSequence: Distribution[]; - /** * Reports the length in codepoints of corrected text represented by completed * paths from this instance. From c3dbc62b703d43afe8ce95c9deca2e2b2d553914 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Wed, 29 Oct 2025 13:57:55 -0500 Subject: [PATCH 4/9] fix(web): add safeguard for split-distribution merging --- .../src/main/correction/search-path.ts | 37 +++++++++++-------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts index e17016f4e9e..1ce98d79549 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts @@ -280,23 +280,28 @@ export class SearchPath implements SearchSpace { // per-codepoint. if(localInputId != spaceInputId || localInputId === undefined) { return new SearchPath(parentMerge, space.inputs, space.inputSource); - } else { - // Get the twin halves that were split. - // Assumption: the two halves are in their original order, etc. - const localInputs = this.inputs; - const spaceInputs = space.inputs; - - // Merge them! - const mergedInputs = localInputs?.map((entry, index) => { - return { - sample: buildMergedTransform(entry.sample, spaceInputs[index].sample), - p: entry.p - } - }); - - // Now to re-merge the two halves. - return new SearchPath(this.parentSpace, mergedInputs, this.inputSource); } + // Get the twin halves that were split. + // Assumption: the two halves are in their original order, etc. + const localInputs = this.inputs; + const spaceInputs = space.inputs; + + // Sanity check - ensure that the input distributions have the same length; + // if not, this shouldn't represent a SearchPath split! + if(localInputs.length != spaceInputs.length) { + return new SearchPath(parentMerge, space.inputs, space.inputSource); + } + + // Merge them! + const mergedInputs = localInputs?.map((entry, index) => { + return { + sample: buildMergedTransform(entry.sample, spaceInputs[index].sample), + p: entry.p + } + }); + + // Now to re-merge the two halves. + return new SearchPath(this.parentSpace, mergedInputs, this.inputSource); } else { // If the parent was a cluster, the cluster itself is the merge. return parentMerge; From 9c56b6002912fa9bf7f03c7d530f3dd06bafffb1 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Wed, 29 Oct 2025 14:22:50 -0500 Subject: [PATCH 5/9] feat(web): adds split-distribution SearchPath re-merge unit test --- .../correction-search/search-path.tests.ts | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts index 5569c9553a3..2ce5047c225 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts @@ -1843,5 +1843,59 @@ describe('SearchPath', () => { runCommonAssertions(5); }); }); + + it('correctly merges paths previously split mid-input', () => { + let path = new SearchPath(testModel); + const startSample = {sample: { insert: 'a', deleteLeft: 0 }, p: 1} + path = new SearchPath(path, [startSample], startSample); + + const inputDistribution = [ + {sample: { insert: 'four', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.4}, + {sample: { insert: 'then', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.3}, + {sample: { insert: 'nine', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.2}, + {sample: { insert: 'what', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.06}, + {sample: { insert: 'cent', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.04} + ]; + + const mergeTarget = new SearchPath(path, inputDistribution, inputDistribution[0]); + assert.equal(mergeTarget.codepointLength, 4); + assert.equal(mergeTarget.inputCount, 2); + + // This test models a previous split at codepoint index 2, splitting + // the input distribution accordingly. (Note: deleteLeft = 1!) + const headDistributionSplit = [ + {sample: { insert: 'fo', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.4}, + {sample: { insert: 'th', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.3}, + {sample: { insert: 'ni', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.2}, + {sample: { insert: 'wh', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.06}, + {sample: { insert: 'ce', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.04} + ]; + const headPath = new SearchPath( + path, headDistributionSplit, inputDistribution[0] + ); + + const tailDistributionSplit = [ + {sample: { insert: 'ur', deleteLeft: 0, deleteRight: 0, id: 42 }, p: 0.4}, + {sample: { insert: 'en', deleteLeft: 0, deleteRight: 0, id: 42 }, p: 0.3}, + {sample: { insert: 'ne', deleteLeft: 0, deleteRight: 0, id: 42 }, p: 0.2}, + {sample: { insert: 'at', deleteLeft: 0, deleteRight: 0, id: 42 }, p: 0.06}, + {sample: { insert: 'nt', deleteLeft: 0, deleteRight: 0, id: 42 }, p: 0.04} + ]; + const tailPath = new SearchPath( + new SearchPath(testModel), tailDistributionSplit, { + trueTransform: inputDistribution[0].sample, + bestProbFromSet: inputDistribution[0].p, + inputStartIndex: 2 + } + ); + + const remerged = headPath.merge(tailPath); + + assert.deepEqual(remerged.bestExample, mergeTarget.bestExample); + assert.equal(remerged.inputCount, 2); + assert.isTrue(remerged instanceof SearchPath); + assert.deepEqual((remerged as SearchPath).inputs, inputDistribution); + assert.isTrue(remerged.hasInputs([[startSample], inputDistribution])); + }); }); }); \ No newline at end of file From e966b24526bfa0ff2261ce579ed3397680779b1e Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Mon, 10 Nov 2025 15:17:22 -0600 Subject: [PATCH 6/9] fix(web): patches up unit tests post-rebase --- .../correction-search/search-path.tests.ts | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts index 2ce5047c225..2a2d57668f1 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts @@ -12,7 +12,7 @@ import { assert } from 'chai'; import { LexicalModelTypes } from '@keymanapp/common-types'; import { KMWString } from '@keymanapp/web-utils'; import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; -import { models, SearchPath, TokenInputSource } from '@keymanapp/lm-worker/test-index'; +import { generateSubsetId, models, SearchPath, TokenInputSource } from '@keymanapp/lm-worker/test-index'; import Distribution = LexicalModelTypes.Distribution; import Transform = LexicalModelTypes.Transform; @@ -1419,7 +1419,8 @@ describe('SearchPath', () => { const originalInputBase: TokenInputSource = { trueTransform: {insert: 'biglargetransform', deleteLeft: 0, id: 11}, inputStartIndex: 0, - bestProbFromSet: 1 + bestProbFromSet: 1, + subsetId: generateSubsetId() }; const originalInputs = [0, 3, 8].map(n => ({...originalInputBase, inputStartIndex: n})); @@ -1527,7 +1528,8 @@ describe('SearchPath', () => { return { trueTransform: d[0].sample, bestProbFromSet: d[0].p, - inputStartIndex: 0 + inputStartIndex: 0, + subsetId: generateSubsetId() } }) @@ -1694,7 +1696,8 @@ describe('SearchPath', () => { return { trueTransform: d[0].sample, bestProbFromSet: d[0].p, - inputStartIndex: 0 + inputStartIndex: 0, + subsetId: generateSubsetId() } }) @@ -1871,7 +1874,12 @@ describe('SearchPath', () => { {sample: { insert: 'ce', deleteLeft: 1, deleteRight: 0, id: 42 }, p: 0.04} ]; const headPath = new SearchPath( - path, headDistributionSplit, inputDistribution[0] + path, headDistributionSplit, { + trueTransform: inputDistribution[0].sample, + bestProbFromSet: inputDistribution[0].p, + inputStartIndex: 0, + subsetId: mergeTarget.inputSource.subsetId + } ); const tailDistributionSplit = [ @@ -1885,7 +1893,8 @@ describe('SearchPath', () => { new SearchPath(testModel), tailDistributionSplit, { trueTransform: inputDistribution[0].sample, bestProbFromSet: inputDistribution[0].p, - inputStartIndex: 2 + inputStartIndex: 2, + subsetId: mergeTarget.inputSource.subsetId } ); From dc3965b3a4bff9fa0558494c90698640a9ed73a2 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Mon, 10 Nov 2025 16:20:50 -0600 Subject: [PATCH 7/9] feat(web): adds isSameSpace, enhances merging of previously-split tokens --- .../src/main/correction/search-path.ts | 48 ++++++++++++++++++- .../src/main/correction/search-space.ts | 2 + .../correction-search/search-path.tests.ts | 14 +++--- 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts index 1ce98d79549..823b2e4cdfc 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts @@ -260,7 +260,6 @@ export class SearchPath implements SearchSpace { const parentMerges = space.parents?.length > 0 ? space.parents.map((p) => this.merge(p)) : [this]; // if parentMerges.length > 0, is a SearchCluster. - // const parentMerge = parentMerges.length > 0 ? new SearchCluster(parentMerges) : parentMerges[0]; const parentMerge = parentMerges[0]; // Special case: if we've reached the head of the space to be merged, check @@ -301,7 +300,16 @@ export class SearchPath implements SearchSpace { }); // Now to re-merge the two halves. - return new SearchPath(this.parentSpace, mergedInputs, this.inputSource); + const mergedInputSource = { + ...this.inputSource, + inputSplitIndex: space.inputSource.inputSplitIndex + }; + + if(mergedInputSource.inputSplitIndex == undefined) { + delete mergedInputSource.inputSplitIndex; + } + + return new SearchPath(this.parentSpace, mergedInputs, mergedInputSource); } else { // If the parent was a cluster, the cluster itself is the merge. return parentMerge; @@ -536,4 +544,40 @@ export class SearchPath implements SearchSpace { return components.join('+'); } + + isSameSpace(space: SearchSpace): boolean { + // Easiest cases: when the instances or their ' `spaceId` matches, we have + // a perfect match. + if(this == space || this.spaceId == space.spaceId) { + return true; + } + + // If it's falsy or a different SearchSpace type, that's an easy filter. + if(!space || !(space instanceof SearchPath)) { + return false; + } + + // If the most recent 'input source' was not triggered from the same input + // subset, it's not a match. + if(this.inputSource?.subsetId != space.inputSource?.subsetId) { + return false; + } + + // We check the indices of the input's split if one occurred. + if(this.inputSource?.inputSplitIndex != space.inputSource?.inputSplitIndex) { + return false; + } + + if(this.inputSource?.inputStartIndex != space.inputSource?.inputStartIndex) { + return false; + } + + return true; + + // Commented out b/c parentSpace-checks cause unit-test ID issues after... a... split. + // + // // Finally, we recursively verify that the parent matches. If there IS no parent, + // // we verify that _that_ aspect matches. + // return this.parentSpace?.isSameSpace(space.parentSpace) ?? this.parentSpace == space.parentSpace; + } } \ No newline at end of file diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts index 3ff21b4ab77..435ebfeca91 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts @@ -240,4 +240,6 @@ export interface SearchSpace { * Intended only for use during unit testing. */ readonly constituentPaths: SearchPath[][]; + + isSameSpace(space: SearchSpace): boolean; } \ No newline at end of file diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts index 2a2d57668f1..aa6e48afd24 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts @@ -1799,6 +1799,7 @@ describe('SearchPath', () => { assert.equal(remergedPath.codepointLength, mergeTarget.codepointLength); assert.sameDeepOrderedMembers(remergedPath.sourceIdentifiers, mergeTarget.sourceIdentifiers); assert.isTrue(remergedPath.hasInputs(trueDistributions)); + assert.isTrue(remergedPath.isSameSpace(mergeTarget)); } it('setup: constructs path properly', () => { @@ -1822,27 +1823,27 @@ describe('SearchPath', () => { } }); - it('splits properly at index 0', () => { + it('merges tokens previously split at index 0', () => { runCommonAssertions(0); }); - it('splits properly at index 1', () => { + it('merges tokens previously split at index 1', () => { runCommonAssertions(1); }); - it('splits properly at index 2', () => { + it('merges tokens previously split at index 2', () => { runCommonAssertions(2); }); - it('splits properly at index 3', () => { + it('merges tokens previously split at index 3', () => { runCommonAssertions(3); }); - it('splits properly at index 4', () => { + it('merges tokens previously split at index 4', () => { runCommonAssertions(4); }); - it('splits properly at index 5', () => { + it('merges tokens previously split at index 5', () => { runCommonAssertions(5); }); }); @@ -1905,6 +1906,7 @@ describe('SearchPath', () => { assert.isTrue(remerged instanceof SearchPath); assert.deepEqual((remerged as SearchPath).inputs, inputDistribution); assert.isTrue(remerged.hasInputs([[startSample], inputDistribution])); + assert.isTrue(remerged.isSameSpace(mergeTarget)); }); }); }); \ No newline at end of file From 4f1be8a4b7573f2bf1070b738067c62a88b8cbd9 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Tue, 11 Nov 2025 16:27:36 -0600 Subject: [PATCH 8/9] fix(web): post-rebase .merge() patchup --- .../src/main/correction/search-path.ts | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts index 823b2e4cdfc..07b00649506 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts @@ -302,11 +302,14 @@ export class SearchPath implements SearchSpace { // Now to re-merge the two halves. const mergedInputSource = { ...this.inputSource, - inputSplitIndex: space.inputSource.inputSplitIndex + segment: { + ...this.inputSource.segment, + end: space.inputSource.segment.end + } }; - if(mergedInputSource.inputSplitIndex == undefined) { - delete mergedInputSource.inputSplitIndex; + if(mergedInputSource.segment.end == undefined) { + delete mergedInputSource.segment.end; } return new SearchPath(this.parentSpace, mergedInputs, mergedInputSource); @@ -564,11 +567,11 @@ export class SearchPath implements SearchSpace { } // We check the indices of the input's split if one occurred. - if(this.inputSource?.inputSplitIndex != space.inputSource?.inputSplitIndex) { + if(this.inputSource?.segment.end != space.inputSource?.segment.end) { return false; } - if(this.inputSource?.inputStartIndex != space.inputSource?.inputStartIndex) { + if(this.inputSource?.segment.start != space.inputSource?.segment.start) { return false; } From 9b947b75161b0ef7551723ed59c74bf3123b10ff Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Tue, 11 Nov 2025 16:33:35 -0600 Subject: [PATCH 9/9] fix(web): post-rebase unit test patch-up --- .../correction-search/search-path.tests.ts | 69 +++++++++++-------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts index aa6e48afd24..c62e15342aa 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-path.tests.ts @@ -12,7 +12,7 @@ import { assert } from 'chai'; import { LexicalModelTypes } from '@keymanapp/common-types'; import { KMWString } from '@keymanapp/web-utils'; import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; -import { generateSubsetId, models, SearchPath, TokenInputSource } from '@keymanapp/lm-worker/test-index'; +import { generateSubsetId, models, SearchPath, PathInputProperties } from '@keymanapp/lm-worker/test-index'; import Distribution = LexicalModelTypes.Distribution; import Transform = LexicalModelTypes.Transform; @@ -1416,9 +1416,12 @@ describe('SearchPath', () => { [{ sample: {insert: 'transform', deleteLeft: 0, id: 11}, p: 1 }] ]; - const originalInputBase: TokenInputSource = { - trueTransform: {insert: 'biglargetransform', deleteLeft: 0, id: 11}, - inputStartIndex: 0, + const originalInputBase: PathInputProperties = { + segment: { + trueTransform: {insert: 'biglargetransform', deleteLeft: 0, id: 11}, + transitionId: 11, + start: 0 + }, bestProbFromSet: 1, subsetId: generateSubsetId() }; @@ -1434,7 +1437,7 @@ describe('SearchPath', () => { }; } - const checkFinalStateAssertions = (merged: SearchPath, originalInput: TokenInputSource) => { + const checkFinalStateAssertions = (merged: SearchPath, originalInput: PathInputProperties) => { assert.equal(merged.inputCount, 1); assert.isTrue(merged instanceof SearchPath); assert.deepEqual(merged.bestExample.text, "biglargetransform"); @@ -1507,7 +1510,7 @@ describe('SearchPath', () => { // Covers many common aspects of SearchPath merging, though not merging of // multi-member distributions. describe(`previously-split token comprised of complex, rewriting transforms: cello`, () => { - const buildPath = (inputs: Distribution[], sources: TokenInputSource[], root?: SearchPath) => { + const buildPath = (inputs: Distribution[], sources: PathInputProperties[], root?: SearchPath) => { return inputs.reduce((path, input, index) => new SearchPath(path, input, sources[index]), root ?? new SearchPath(testModel)); } @@ -1524,11 +1527,14 @@ describe('SearchPath', () => { ] ]; - const trueInputSources: TokenInputSource[] = trueDistributions.map((d) => { + const trueInputSources: PathInputProperties[] = trueDistributions.map((d) => { return { - trueTransform: d[0].sample, + segment: { + trueTransform: d[0].sample, + transitionId: d[0].sample.id, + start: 0 + }, bestProbFromSet: d[0].p, - inputStartIndex: 0, subsetId: generateSubsetId() } }) @@ -1555,7 +1561,7 @@ describe('SearchPath', () => { [{ sample: {insert: 'ent', deleteLeft: 0, id: 12}, p: 1 }], ...trueDistributions.slice(2) ], [ - {...trueInputSources[1], inputStartIndex: 0}, + {...trueInputSources[1], segment: {...trueInputSources[1].segment, start: 0}}, ...trueInputSources.slice(2) ]) ]); @@ -1571,7 +1577,7 @@ describe('SearchPath', () => { [{ sample: {insert: 'llar', deleteLeft: 0, id: 13}, p: 1 }], ...trueDistributions.slice(3) ], [ - {...trueInputSources[2], inputStartIndex: 0}, + {...trueInputSources[2], segment: {...trueInputSources[2].segment, start: 0}}, ...trueInputSources.slice(3) ]) ]); @@ -1587,7 +1593,7 @@ describe('SearchPath', () => { [{ sample: {insert: 'lar', deleteLeft: 0, id: 13}, p: 1 }], ...trueDistributions.slice(3) ], [ - {...trueInputSources[2], inputStartIndex: 1}, + {...trueInputSources[2], segment: {...trueInputSources[2].segment, start: 1}}, ...trueInputSources.slice(3) ]) ]); @@ -1602,7 +1608,7 @@ describe('SearchPath', () => { buildPath([ [{ sample: {insert: 'o', deleteLeft: 0, id: 14}, p: 1 }] ], [ - {...trueInputSources[3], inputStartIndex: 0}, + {...trueInputSources[3], segment: {...trueInputSources[3].segment, start: 0}}, ]) ]); @@ -1629,7 +1635,7 @@ describe('SearchPath', () => { assert.deepEqual(remergedPath.bestExample, mergeTarget.bestExample); assert.equal(remergedPath.inputCount, mergeTarget.inputCount); assert.equal(remergedPath.codepointLength, mergeTarget.codepointLength); - assert.sameDeepOrderedMembers(remergedPath.sourceIdentifiers, mergeTarget.sourceIdentifiers); + assert.sameDeepOrderedMembers(remergedPath.inputSegments, mergeTarget.inputSegments); assert.isTrue(remergedPath.hasInputs(trueDistributions)); } @@ -1675,7 +1681,7 @@ describe('SearchPath', () => { // Same as the prior set, but now with non-BMP text! describe(`previously-split token comprised of complex, rewriting non-BMP transforms`, () => { - const buildPath = (inputs: Distribution[], sources: TokenInputSource[], root?: SearchPath) => { + const buildPath = (inputs: Distribution[], sources: PathInputProperties[], root?: SearchPath) => { return inputs.reduce((path, input, index) => new SearchPath(path, input, sources[index]), root ?? new SearchPath(testModel)); } @@ -1692,11 +1698,14 @@ describe('SearchPath', () => { ] ]; - const trueInputSources: TokenInputSource[] = trueDistributions.map((d) => { + const trueInputSources: PathInputProperties[] = trueDistributions.map((d) => { return { - trueTransform: d[0].sample, + segment: { + trueTransform: d[0].sample, + transitionId: d[0].sample.id, + start: 0 + }, bestProbFromSet: d[0].p, - inputStartIndex: 0, subsetId: generateSubsetId() } }) @@ -1723,7 +1732,7 @@ describe('SearchPath', () => { [{ sample: {insert: toMathematicalSMP('ent'), deleteLeft: 0, id: 12}, p: 1 }], ...trueDistributions.slice(2) ], [ - {...trueInputSources[1], inputStartIndex: 0}, + {...trueInputSources[1], segment: {...trueInputSources[1].segment, start: 0}}, ...trueInputSources.slice(2) ]) ]); @@ -1739,7 +1748,7 @@ describe('SearchPath', () => { [{ sample: {insert: toMathematicalSMP('llar'), deleteLeft: 0, id: 13}, p: 1 }], ...trueDistributions.slice(3) ], [ - {...trueInputSources[2], inputStartIndex: 0}, + {...trueInputSources[2], segment: {...trueInputSources[2].segment, start: 0}}, ...trueInputSources.slice(3) ]) ]); @@ -1755,7 +1764,7 @@ describe('SearchPath', () => { [{ sample: {insert: toMathematicalSMP('lar'), deleteLeft: 0, id: 13}, p: 1 }], ...trueDistributions.slice(3) ], [ - {...trueInputSources[2], inputStartIndex: 1}, + {...trueInputSources[2], segment: {...trueInputSources[2].segment, start: 1}}, ...trueInputSources.slice(3) ]) ]); @@ -1770,7 +1779,7 @@ describe('SearchPath', () => { buildPath([ [{ sample: {insert: toMathematicalSMP('o'), deleteLeft: 0, id: 14}, p: 1 }] ], [ - {...trueInputSources[3], inputStartIndex: 0}, + {...trueInputSources[3], segment: {...trueInputSources[3].segment, start: 0}}, ]) ]); @@ -1797,7 +1806,7 @@ describe('SearchPath', () => { assert.deepEqual(remergedPath.bestExample, mergeTarget.bestExample); assert.equal(remergedPath.inputCount, mergeTarget.inputCount); assert.equal(remergedPath.codepointLength, mergeTarget.codepointLength); - assert.sameDeepOrderedMembers(remergedPath.sourceIdentifiers, mergeTarget.sourceIdentifiers); + assert.sameDeepOrderedMembers(remergedPath.inputSegments, mergeTarget.inputSegments); assert.isTrue(remergedPath.hasInputs(trueDistributions)); assert.isTrue(remergedPath.isSameSpace(mergeTarget)); } @@ -1876,9 +1885,12 @@ describe('SearchPath', () => { ]; const headPath = new SearchPath( path, headDistributionSplit, { - trueTransform: inputDistribution[0].sample, + segment: { + trueTransform: inputDistribution[0].sample, + transitionId: inputDistribution[0].sample.id, + start: 0 + }, bestProbFromSet: inputDistribution[0].p, - inputStartIndex: 0, subsetId: mergeTarget.inputSource.subsetId } ); @@ -1892,9 +1904,12 @@ describe('SearchPath', () => { ]; const tailPath = new SearchPath( new SearchPath(testModel), tailDistributionSplit, { - trueTransform: inputDistribution[0].sample, + segment: { + trueTransform: inputDistribution[0].sample, + transitionId: inputDistribution[0].sample.id, + start: 2 + }, bestProbFromSet: inputDistribution[0].p, - inputStartIndex: 2, subsetId: mergeTarget.inputSource.subsetId } );