Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import { TokenizationPath } from './tokenization-subsets.js';

import LexicalModel = LexicalModelTypes.LexicalModel;
import Transform = LexicalModelTypes.Transform;
import { PathInputProperties } from './search-space.js';

// May be able to "get away" with 2 & 5 or so, but having extra will likely help
// with edit path stability.
Expand Down Expand Up @@ -561,7 +562,8 @@ export class ContextTokenization {
}

let appliedLength = 0;
for(let tailRelativeIndex of inputTransformKeys) {
for(let i = 0; i < inputTransformKeys.length; i++) {
const tailRelativeIndex = inputTransformKeys[i];
let distribution = inputs.map((i) => ({sample: i.sample.get(tailRelativeIndex), p: i.p}));
const tokenIndex = (tokenization.length - 1) + tailRelativeIndex;

Expand All @@ -585,16 +587,22 @@ export class ContextTokenization {
if(affectedToken.inputCount == 0 && distribution[0].sample.deleteLeft != 0) {
distribution = distribution.map((mass) => ({sample: { ...mass.sample, deleteLeft: 0 }, p: mass.p }));
}
affectedToken.addInput({

const inputSource: PathInputProperties = {
segment: {
trueTransform: sourceInput,
transitionId: sourceInput.id,
start: appliedLength
},
bestProbFromSet: bestProbFromSet,
subsetId: tokenizationPath.inputSubsetId
}, distribution);
};
appliedLength += KMWString.length(distribution[0].sample.insert);
if(i + 1 < inputTransformKeys.length) {
inputSource.segment.end = appliedLength;
}

affectedToken.addInput(inputSource, distribution);

const tokenize = determineModelTokenizer(lexicalModel);
affectedToken.isWhitespace = tokenize({left: affectedToken.exampleInput, startOfBuffer: false, endOfBuffer: false}).left[0]?.isWhitespace ?? false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ export class SearchPath implements SearchSpace {
this.selectionQueue = new PriorityQueue<SearchNode>(QUEUE_NODE_COMPARATOR, entries);
}

public split(charIndex: number): [SearchSpace, SearchSpace] {
public split(charIndex: number): [SearchSpace, SearchPath] {
const model = this.model;
const internalSplitIndex = charIndex - (this.codepointLength - this.edgeLength);

Expand Down Expand Up @@ -295,7 +295,13 @@ export class SearchPath implements SearchSpace {
// don't append any part of it to the parent; it's actually clean.
const hasActualSplit = internalSplitIndex > 0 || this.inputs?.[0].sample.deleteLeft > 0;
const parent = hasActualSplit
? new SearchPath(this.parentSpace, firstSet, this.inputSource)
? new SearchPath(this.parentSpace, firstSet, {
...this.inputSource,
segment: {
...this.inputSource.segment,
end: this.inputSource.segment.start + internalSplitIndex
}
})
: this.parentSpace;
// construct two SearchPath instances based on the two sets!
return [
Expand Down Expand Up @@ -468,7 +474,12 @@ export class SearchPath implements SearchSpace {

for(const source of sources) {
const i = source.segment.start;
components.push(`T${source.segment.transitionId}${i != 0 ? '@' + i : ''}`);
const j = source.segment.end;
let component = (`T${source.segment.transitionId}${i != 0 || j !== undefined ? '@' + i : ''}`);
if(j) {
component = component + '-' + j;
}
components.push(component);
}

return components.join('+');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,17 @@ export interface InputSegment {
* tokenized correction-search input.
*/
start: number

/**
* Marks the final index (exclusive) within the insert strings for the
* corresponding transitions' Transforms that are applied by the corresponding
* tokenized correction-search input.
*
* If undefined, there is no portion of the input-source transform split from
* the right-hand side. Otherwise, this value should match the `start` value of
* the _next_ split-off component of the input-source.
*/
end?: number;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export { ContextTracker } from './correction/context-tracker.js';
export { ContextTransition } from './correction/context-transition.js';
export * from './correction/distance-modeler.js';
export * from './correction/search-path.js';
export * from './correction/search-space.js';
export { ExtendedEditOperation, SegmentableDistanceCalculation } from './correction/segmentable-calculation.js';
export * from './correction/tokenization-subsets.js';
export * as correction from './correction/index.js';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import { default as defaultBreaker } from '@keymanapp/models-wordbreakers';
import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs';
import { LexicalModelTypes } from '@keymanapp/common-types';

import { ContextToken, correction, generateSubsetId, getBestMatches, models, preprocessInputSources, SearchPath } from '@keymanapp/lm-worker/test-index';
import { ContextToken, correction, generateSubsetId, getBestMatches, models, PathInputProperties, preprocessInputSources, SearchPath } from '@keymanapp/lm-worker/test-index';

import Distribution = LexicalModelTypes.Distribution;
import ExecutionTimer = correction.ExecutionTimer;
Expand Down Expand Up @@ -464,20 +464,29 @@ describe('ContextToken', function() {

assert.equal(resultsOfSplit.length, 3);
assert.sameOrderedMembers(resultsOfSplit.map(t => t.exampleInput), splitTextArray);
assert.sameDeepOrderedMembers(resultsOfSplit.map(t => t.inputSegments[0]), [0, 3, 8].map(i => ({
segment: {
trueTransform: {
insert: 'biglargetransform',
id: 13,
deleteLeft: 0,
deleteRight: 0
const offsets = [0, 3, 8];
assert.sameDeepOrderedMembers(resultsOfSplit.map(t => t.inputSegments[0]), [0, 1, 2].map(i => {
const inputSource: PathInputProperties = {
segment: {
trueTransform: {
insert: 'biglargetransform',
id: 13,
deleteLeft: 0,
deleteRight: 0
},
transitionId: 13,
start: offsets[i]
},
transitionId: 13,
start: i
},
bestProbFromSet: 1,
subsetId
})));
bestProbFromSet: 1,
subsetId
};

if(offsets[i+1] !== undefined) {
inputSource.segment.end = offsets[i+1];
}

return inputSource;
}));

for(let i = 0; i < resultsOfSplit.length; i++) {
assert.isTrue(resultsOfSplit[i].searchSpace.hasInputs([
Expand Down Expand Up @@ -549,7 +558,8 @@ describe('ContextToken', function() {
segment: {
trueTransform: keystrokeDistributions[1][0].sample,
transitionId: keystrokeDistributions[1][0].sample.id,
start: 0
start: 0,
end: 'arge'.length
},
bestProbFromSet: 1,
subsetId: subsetIds[1]
Expand All @@ -568,7 +578,8 @@ describe('ContextToken', function() {
segment: {
trueTransform: keystrokeDistributions[2][0].sample,
transitionId: keystrokeDistributions[2][0].sample.id,
start: 0
start: 0,
end: 'ng'.length
},
bestProbFromSet: 1,
subsetId: subsetIds[2]
Expand Down Expand Up @@ -693,7 +704,8 @@ describe('ContextToken', function() {
segment: {
trueTransform: keystrokeDistributions[1][0].sample,
transitionId: keystrokeDistributions[1][0].sample.id,
start: 0
start: 0,
end: 'arge'.length
},
bestProbFromSet: 1,
subsetId: subsetIds[1]
Expand All @@ -711,7 +723,8 @@ describe('ContextToken', function() {
segment: {
trueTransform: keystrokeDistributions[2][0].sample,
transitionId: keystrokeDistributions[2][0].sample.id,
start: 0
start: 0,
end: 'ng'.length
},
bestProbFromSet: 1,
subsetId: subsetIds[2]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,8 @@ describe('ContextTokenization', function() {
segment: {
trueTransform: inputTransform,
transitionId: inputTransform.id,
start: 0
start: 0,
end: 0
}, bestProbFromSet: 1,
subsetId
});
Expand All @@ -476,7 +477,8 @@ describe('ContextTokenization', function() {
segment: {
trueTransform: inputTransform,
transitionId: inputTransform.id,
start: 0
start: 0,
end: 1 // captured the leading whitespace insert
}, bestProbFromSet: 1,
subsetId
}]);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1361,7 +1361,13 @@ describe('SearchPath', () => {
assert.isTrue(tail instanceof SearchPath);
assert.deepEqual((head as SearchPath).inputs, headTarget.inputs);
assert.deepEqual((tail as SearchPath).inputs, tailTarget.inputs);
assert.deepEqual((head as SearchPath).inputSource, headTarget.inputSource);
assert.deepEqual((head as SearchPath).inputSource, {
...headTarget.inputSource,
segment: {
...headTarget.inputSource.segment,
end: 2
}
});
assert.deepEqual((tail as SearchPath).inputSource, tailTarget.inputSource);
});
});
Expand Down