Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types';
import { deepCopy, KMWString } from "@keymanapp/web-utils";

import { SearchPath } from "./search-path.js";
import { SearchSpace, TokenInputSource } from "./search-space.js";
import { SearchSpace, PathInputProperties } from "./search-space.js";
import { TokenSplitMap } from "./context-tokenization.js";

import Distribution = LexicalModelTypes.Distribution;
Expand Down Expand Up @@ -110,8 +110,11 @@ export class ContextToken {

rawTransformDistributions.forEach((entry) => {
searchSpace = new SearchPath(searchSpace, entry, {
trueTransform: entry[0].sample,
inputStartIndex: 0,
segment: {
trueTransform: entry[0].sample,
transitionId: entry[0].sample.id,
start: 0
},
bestProbFromSet: 1
});
});
Expand All @@ -124,7 +127,7 @@ export class ContextToken {
* Call this to record the original keystroke Transforms for the context range
* corresponding to this token.
*/
addInput(inputSource: TokenInputSource, distribution: Distribution<Transform>) {
addInput(inputSource: PathInputProperties, distribution: Distribution<Transform>) {
this._searchSpace = new SearchPath(this._searchSpace, distribution, inputSource);
}

Expand All @@ -143,8 +146,8 @@ export class ContextToken {
* Denotes the original keystroke Transforms comprising the range corresponding
* to this token.
*/
get inputRange() {
return this.searchSpace.sourceIdentifiers;
get inputSegments() {
return this.searchSpace.inputSegments;
}

/**
Expand All @@ -163,15 +166,6 @@ export class ContextToken {
return this.searchSpace.sourceRangeKey;
}

/**
* Gets a simple, compact string-based representation of `inputRange`.
*
* This should only ever be used for debugging purposes.
*/
get sourceText(): string {
return this.searchSpace.likeliestSourceText;
}

/**
* Generates text corresponding to the net effects of the most likely inputs
* received that can correspond to the current instance.
Expand All @@ -192,7 +186,7 @@ export class ContextToken {
// Thus, we don't set the .isWhitespace flag field.
const resultToken = new ContextToken(lexicalModel);

let lastSourceInput: TokenInputSource;
let lastSourceInput: PathInputProperties;
let lastInputDistrib: Distribution<Transform>;
for(const token of tokensToMerge) {
const inputCount = token.inputCount;
Expand All @@ -203,7 +197,7 @@ export class ContextToken {
}

// Are we re-merging on a previously split transform?
if(lastSourceInput?.trueTransform != token.inputRange[0].trueTransform) {
if(lastSourceInput?.segment.trueTransform != token.inputSegments[0].segment.trueTransform) {
if(lastSourceInput) {
resultToken.addInput(lastSourceInput, lastInputDistrib);
} // else: there's nothing to add as input
Expand Down Expand Up @@ -232,9 +226,9 @@ export class ContextToken {
// Ignore the last entry for now - it may need to merge with a matching
// entry in the next token!
for(let i = startIndex; i < inputCount - 1; i++) {
resultToken.addInput(token.inputRange[i], token.searchSpace.inputSequence[i]);
resultToken.addInput(token.inputSegments[i], token.searchSpace.inputSequence[i]);
}
lastSourceInput = token.inputRange[inputCount-1];
lastSourceInput = token.inputSegments[inputCount-1];
lastInputDistrib = token.searchSpace.inputSequence[inputCount-1];
}

Expand All @@ -257,7 +251,7 @@ export class ContextToken {

// Build an alternate version of the transforms: if we preprocess all deleteLefts,
// what text remains from each?
const alteredSources = preprocessInputSources(this.inputRange);
const alteredSources = preprocessInputSources(this.inputSegments);

const blankContext = { left: '', startOfBuffer: true, endOfBuffer: true };
const splitSpecs = split.matches.slice();
Expand Down Expand Up @@ -313,15 +307,17 @@ export class ContextToken {
};
});

const priorSourceInput = overextendedToken.inputRange[lastInputIndex];
const priorSourceInput = overextendedToken.inputSegments[lastInputIndex];
constructingToken.addInput(priorSourceInput, headDistribution);
tokensFromSplit.push(constructingToken);

constructingToken = new ContextToken(lexicalModel);
backupToken = new ContextToken(constructingToken);
constructingToken.addInput({
trueTransform: priorSourceInput.trueTransform,
inputStartIndex: priorSourceInput.inputStartIndex + extraCharsAdded,
segment: {
...priorSourceInput.segment,
start: priorSourceInput.segment.start + extraCharsAdded
},
bestProbFromSet: priorSourceInput.bestProbFromSet
}, tailDistribution);

Expand All @@ -338,34 +334,34 @@ export class ContextToken {

backupToken = new ContextToken(constructingToken);
lenBeforeLastApply = KMWString.length(currentText.left);
currentText = applyTransform(alteredSources[transformIndex].trueTransform, currentText);
constructingToken.addInput(this.inputRange[transformIndex], this.searchSpace.inputSequence[transformIndex]);
currentText = applyTransform(alteredSources[transformIndex].segment.trueTransform, currentText);
constructingToken.addInput(this.inputSegments[transformIndex], this.searchSpace.inputSequence[transformIndex]);
transformIndex++;
}

return tokensFromSplit;
}
}

export function preprocessInputSources(inputSources: ReadonlyArray<TokenInputSource>) {
export function preprocessInputSources(inputSources: ReadonlyArray<PathInputProperties>) {
const alteredSources = deepCopy(inputSources);
let trickledDeleteLeft = 0;
for(let i = alteredSources.length - 1; i >= 0; i--) {
const source = alteredSources[i];
if(trickledDeleteLeft) {
const insLen = KMWString.length(source.trueTransform.insert);
const insLen = KMWString.length(source.segment.trueTransform.insert);
if(insLen <= trickledDeleteLeft) {
source.trueTransform.insert = '';
source.segment.trueTransform.insert = '';
trickledDeleteLeft -= insLen;
} else {
source.trueTransform.insert = KMWString.substring(source.trueTransform.insert, 0, insLen - trickledDeleteLeft);
source.segment.trueTransform.insert = KMWString.substring(source.segment.trueTransform.insert, 0, insLen - trickledDeleteLeft);
trickledDeleteLeft = 0;
}
}
trickledDeleteLeft += source.trueTransform.deleteLeft;
source.trueTransform.deleteLeft = 0;
trickledDeleteLeft += source.segment.trueTransform.deleteLeft;
source.segment.trueTransform.deleteLeft = 0;
}

alteredSources[0].trueTransform.deleteLeft = trickledDeleteLeft;
alteredSources[0].segment.trueTransform.deleteLeft = trickledDeleteLeft;
return alteredSources;
}
Original file line number Diff line number Diff line change
Expand Up @@ -158,16 +158,6 @@ export class ContextTokenization {
return this.tail.spaceId;
}

/**
* Returns plain-text strings representing the most probable representation for all
* tokens represented by this tokenization instance.
*
* Intended for debugging use only.
*/
get sourceText() {
return this.tokens.map(token => token.sourceText);
}

/**
* Returns a plain-text string representing the most probable representation for all
* tokens represented by this tokenization instance.
Expand Down Expand Up @@ -596,8 +586,11 @@ export class ContextTokenization {
distribution = distribution.map((mass) => ({sample: { ...mass.sample, deleteLeft: 0 }, p: mass.p }));
}
affectedToken.addInput({
trueTransform: sourceInput,
inputStartIndex: appliedLength,
segment: {
trueTransform: sourceInput,
transitionId: sourceInput.id,
start: appliedLength
},
bestProbFromSet: bestProbFromSet
}, distribution);
appliedLength += KMWString.length(distribution[0].sample.insert);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,10 @@

import { QueueComparator as Comparator, KMWString, PriorityQueue } from '@keymanapp/web-utils';
import { LexicalModelTypes } from '@keymanapp/common-types';
import { applyTransform } from '@keymanapp/models-templates';

import { EDIT_DISTANCE_COST_SCALE, SearchNode, SearchResult } from './distance-modeler.js';
import { generateSpaceSeed, PathResult, SearchSpace, TokenInputSource } from './search-space.js';
import { generateSpaceSeed, PathResult, SearchSpace, PathInputProperties } from './search-space.js';

import Context = LexicalModelTypes.Context;
import Distribution = LexicalModelTypes.Distribution;
import LexicalModel = LexicalModelTypes.LexicalModel;
import ProbabilityMass = LexicalModelTypes.ProbabilityMass;
Expand All @@ -30,7 +28,7 @@ export const QUEUE_NODE_COMPARATOR: Comparator<SearchNode> = function(arg1, arg2
export class SearchPath implements SearchSpace {
private selectionQueue: PriorityQueue<SearchNode> = new PriorityQueue(QUEUE_NODE_COMPARATOR);
readonly inputs?: Distribution<Transform>;
readonly inputSource?: TokenInputSource;
readonly inputSource?: PathInputProperties;

readonly parentSpace: SearchSpace;
readonly spaceId: number;
Expand Down Expand Up @@ -79,31 +77,34 @@ export class SearchPath implements SearchSpace {
* @param srcKeystroke Data about the actual context range represented by `inputs` and
* its underlying keystroke.
*/
constructor(space: SearchSpace, inputs: Distribution<Transform>, srcKeystroke: TokenInputSource);
constructor(arg1: LexicalModel | SearchSpace, inputs?: Distribution<Transform>, inputSource?: TokenInputSource | ProbabilityMass<Transform>) {
constructor(space: SearchSpace, inputs: Distribution<Transform>, srcKeystroke: PathInputProperties);
constructor(arg1: LexicalModel | SearchSpace, inputs?: Distribution<Transform>, inputSource?: PathInputProperties | ProbabilityMass<Transform>) {
// If we're taking in a pre-constructed search node, it's got an associated,
// pre-assigned spaceID - so use that.
const isExtending = (arg1 instanceof SearchPath);
this.spaceId = generateSpaceSeed();

// Coerce inputSource to TokenInputSource format.
if(inputSource && (inputSource as TokenInputSource).trueTransform == undefined) {
if(inputSource && (inputSource as ProbabilityMass<Transform>).sample != undefined) {
const keystroke = inputSource as ProbabilityMass<Transform>;
inputSource = {
trueTransform: keystroke.sample,
bestProbFromSet: keystroke.p,
inputStartIndex: 0
segment: {
trueTransform: keystroke.sample,
transitionId: keystroke.sample.id,
start: 0
},
bestProbFromSet: keystroke.p
}
};

const inputSrc = inputSource as TokenInputSource;
const inputSrc = inputSource as PathInputProperties;

if(isExtending) {
const parentSpace = arg1 as SearchSpace;
const logTierCost = -Math.log(inputSrc.bestProbFromSet);

const transitionId = (inputs?.[0].sample.id);
if(transitionId !== undefined && inputSrc.trueTransform.id != transitionId) {
if(transitionId !== undefined && inputSrc.segment.transitionId != transitionId) {
throw new Error("Input distribution and input-source transition IDs must match");
}

Expand Down Expand Up @@ -198,23 +199,6 @@ export class SearchPath implements SearchSpace {
}
}

get likeliestSourceText(): string {
let prefixContext: Context = { left: this.parentSpace?.likeliestSourceText ?? '', startOfBuffer: true, endOfBuffer: true };
const inputTransform = this.inputSource?.trueTransform ?? { insert: '', deleteLeft: 0 };

const excessDeletes = inputTransform.deleteLeft - KMWString.length(prefixContext.left);
if(excessDeletes > 0) {
prefixContext = {
...prefixContext,
// \u{2421} = ␡ (Unicode symbol for Delete)
left: '\u{2421}'.repeat(excessDeletes) + prefixContext.left
};
}

const result = applyTransform(inputTransform, prefixContext);
return result.left;
}

get parents() {
// The SearchPath class may only have a single parent.
return this.parentSpace ? [this.parentSpace] : [];
Expand Down Expand Up @@ -362,15 +346,15 @@ export class SearchPath implements SearchSpace {
return Object.values(this.returnedValues ?? {}).map(v => new SearchResult(v));
}

public get sourceIdentifiers(): TokenInputSource[] {
public get inputSegments(): PathInputProperties[] {
if(!this.parentSpace) {
return [];
}

const parentSources = this.parentSpace.sourceIdentifiers;
const parentSources = this.parentSpace.inputSegments;
if(this.inputSource) {
const inputId = this.inputSource.trueTransform.id;
if(inputId && parentSources.length > 0 && parentSources[parentSources.length - 1].trueTransform.id == inputId) {
const inputId = this.inputSource.segment.transitionId;
if(inputId && parentSources.length > 0 && parentSources[parentSources.length - 1].segment.transitionId == inputId) {
return parentSources;
}

Expand All @@ -386,11 +370,11 @@ export class SearchPath implements SearchSpace {
*/
get sourceRangeKey(): string {
const components: string[] = [];
const sources = this.sourceIdentifiers;
const sources = this.inputSegments;

for(const source of sources) {
const i = source.inputStartIndex;
components.push(`T${source.trueTransform.id}${i != 0 ? '@' + i : ''}`);
const i = source.segment.start;
components.push(`T${source.segment.transitionId}${i != 0 ? '@' + i : ''}`);
}

return components.join('+');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,26 +38,38 @@ type CompleteSearchPath = {

export type PathResult = NullPath | IntermediateSearchPath | CompleteSearchPath;

/**
* Models the properties and portion of an input event applied by a SearchSpace for
* correction-search purposes.
*/
export interface TokenInputSource {
export interface InputSegment {
/**
* The Transform corresponding to the keystroke applied to the true context
* for this input event.
*
* NOTE: outside of use for .sourceText / .likeliestSourceText, the only part
* that should actually be referenced is the Transform / transition ID.
* @deprecated Slated for removal within epic/autocorrect.
*/
trueTransform: Transform;

/**
* The transform / transition ID of the corresponding input event.
*/
transitionId: number,

/**
* Marks the initial index (inclusive) within the insert strings for the
* corresponding transitions' Transforms that is applied by the corresponding
* corresponding transitions' Transforms that are applied by the corresponding
* tokenized correction-search input.
*/
inputStartIndex: number;
start: number
}

/**
* Models the properties and portion of an input event applied by a SearchSpace for
* correction-search purposes.
*/
export interface PathInputProperties {
/**
* Denotes the portion of the ongoing input stream represented by the corresponding
* input distribution(s) of a SearchSpace.
*/
segment: InputSegment;

/**
* Notes the highest probability found in the input event's transform
Expand Down Expand Up @@ -163,13 +175,13 @@ export interface SearchSpace {
*/
readonly bestExample: { text: string, p: number };

readonly likeliestSourceText: string;

/**
* Gets components useful for building a string-based representation of the
* keystroke range corrected by this search space.
*
* TODO: will return only the `inputSegment` part of each entry in the future.
*/
readonly sourceIdentifiers: TokenInputSource[];
readonly inputSegments: PathInputProperties[];

/**
* Gets a compact string-based representation of `inputRange` that
Expand Down
Loading