diff --git a/README.md b/README.md index ad9de90..ba247f3 100644 --- a/README.md +++ b/README.md @@ -229,6 +229,27 @@ const hits = query(kb, 'react native bridge throttling', { }); ``` +## Semantic sidecar workflow (Ollama, optional) + +Lexical retrieval is still the first-pass and default. Sidecars add optional local reranking over lexical top-N candidates (no vector DB, no `.knolo` format migration). + +```bash +# 1) Build deterministic lexical pack +knolo build + +# 2) Generate local semantic sidecar (requires Ollama running) +knolo semantic:index --pack ./dist/knowledge.knolo --out ./dist/knowledge.knolo.semantic.json --model qwen3-embedding:4b + +# 3) Inspect and validate sidecar before query-time use +knolo semantic:inspect --sidecar ./dist/knowledge.knolo.semantic.json +knolo semantic:validate --pack ./dist/knowledge.knolo --sidecar ./dist/knowledge.knolo.semantic.json --model qwen3-embedding:4b +``` + +Troubleshooting: +- If Ollama is not running, start it and ensure `http://localhost:11434` is reachable. +- If model is missing, run `ollama pull qwen3-embedding:4b`. +- If validate fails for fingerprint/model mismatch, regenerate sidecar with the current pack and exact model. + --- # 🧠 Optional: Agent Metadata & Routing @@ -443,4 +464,3 @@ const hits = query(pack, 'knolo determinism', { # 📄 License Apache-2.0 — see `LICENSE` - diff --git a/packages/cli/bin/knolo.mjs b/packages/cli/bin/knolo.mjs index b6ab578..35577ed 100755 --- a/packages/cli/bin/knolo.mjs +++ b/packages/cli/bin/knolo.mjs @@ -27,7 +27,7 @@ const DEFAULT_CONFIG = { }; const SUPPORTED_EXTENSIONS = new Set(['.md', '.txt', '.json']); const SKIP_DIRS = new Set(['node_modules', 'dist', '.git']); -const SUBCOMMANDS = new Set(['init', 'add', 'build', 'query', 'dev']); +const SUBCOMMANDS = new Set(['init', 'add', 'build', 'query', 'dev', 'semantic:index', 'semantic:inspect', 'semantic:validate']); function createError(message) { return new Error(message); @@ -87,6 +87,9 @@ function printCommandHelp(command) { build: 'Usage: knolo build', query: 'Usage: knolo query [--pack ] [--k ] [--json]', dev: 'Usage: knolo dev', + 'semantic:index': 'Usage: knolo semantic:index --pack [--out ] [--model ] [--endpoint ]', + 'semantic:inspect': 'Usage: knolo semantic:inspect --sidecar ', + 'semantic:validate': 'Usage: knolo semantic:validate --pack --sidecar --model ', }; console.log(help[command] ?? 'Unknown command.'); } @@ -313,6 +316,79 @@ async function cmdQuery(core, args) { }); } +function parseKeyValueArgs(args) { + const out = {}; + for (let i = 0; i < args.length; i++) { + const key = args[i]; + if (!key.startsWith('--')) throw createError(`Unexpected argument: ${key}`); + out[key.slice(2)] = args[++i]; + } + return out; +} + +async function loadOllamaProvider() { + const mod = await tryImport(path.resolve(__dirname, '../../semantic-ollama/dist/index.js')); + if (mod?.OllamaEmbeddingProvider) return mod.OllamaEmbeddingProvider; + const pkg = await tryImport('@knolo/semantic-ollama'); + if (pkg?.OllamaEmbeddingProvider) return pkg.OllamaEmbeddingProvider; + throw createError('Could not load @knolo/semantic-ollama. Build packages/semantic-ollama first.'); +} + +async function cmdSemanticIndex(core, args) { + const flags = parseKeyValueArgs(args); + const packPath = path.resolve(process.cwd(), flags.pack || 'dist/knowledge.knolo'); + const outPath = path.resolve(process.cwd(), flags.out || `${packPath}.semantic.json`); + const modelId = flags.model || 'qwen3-embedding:4b'; + const endpoint = flags.endpoint || 'http://localhost:11434'; + if (!existsSync(packPath)) throw createError(`Pack file not found at ${path.relative(process.cwd(), packPath)}.`); + + const bytes = Uint8Array.from(readFileSync(packPath)); + const pack = await mountPackFromBytes(core, bytes); + const OllamaEmbeddingProvider = await loadOllamaProvider(); + const provider = new OllamaEmbeddingProvider({ modelId, endpoint }); + const vectors = await provider.embedTexts(pack.blocks); + const sidecar = { + version: 1, + packFingerprint: core.createPackFingerprint(pack), + modelId: provider.modelId, + dimension: vectors[0]?.length ?? 0, + metric: 'cosine', + createdAt: new Date().toISOString(), + blocks: vectors.map((vector, blockId) => ({ blockId, vector: Array.from(core.normalizeVector(vector)) })), + }; + writeFileSync(outPath, core.serializeSidecar(sidecar)); + console.log(`✔ wrote ${path.relative(process.cwd(), outPath)}`); +} + +async function cmdSemanticInspect(core, args) { + const flags = parseKeyValueArgs(args); + const sidecarPath = path.resolve(process.cwd(), flags.sidecar); + const sidecar = core.parseSidecar(readFileSync(sidecarPath, 'utf8')); + console.log(JSON.stringify({ + version: sidecar.version, + packFingerprint: sidecar.packFingerprint, + modelId: sidecar.modelId, + dimension: sidecar.dimension, + metric: sidecar.metric, + createdAt: sidecar.createdAt, + blocks: sidecar.blocks.length, + }, null, 2)); +} + +async function cmdSemanticValidate(core, args) { + const flags = parseKeyValueArgs(args); + const packPath = path.resolve(process.cwd(), flags.pack || 'dist/knowledge.knolo'); + const sidecarPath = path.resolve(process.cwd(), flags.sidecar); + const modelId = flags.model; + if (!modelId) throw createError('semantic:validate requires --model .'); + const pack = await mountPackFromBytes(core, Uint8Array.from(readFileSync(packPath))); + const sidecar = core.parseSidecar(readFileSync(sidecarPath, 'utf8')); + core.validateSidecarForPack({ sidecar, pack, modelId }); + if (sidecar.blocks.length !== pack.blocks.length) throw createError(`Semantic block count mismatch: sidecar=${sidecar.blocks.length}, pack=${pack.blocks.length}`); + if (sidecar.dimension <= 0) throw createError('Semantic sidecar dimension must be > 0.'); + console.log('✔ semantic sidecar validation passed'); +} + async function mountPackFromBytes(core, bytes) { try { return await core.mountPack({ bytes }); @@ -598,6 +674,9 @@ async function main() { if (command === 'build') return await cmdBuild(core); if (command === 'query') return await cmdQuery(core, commandArgs); if (command === 'dev') return await cmdDev(core); + if (command === 'semantic:index') return await cmdSemanticIndex(core, commandArgs); + if (command === 'semantic:inspect') return await cmdSemanticInspect(core, commandArgs); + if (command === 'semantic:validate') return await cmdSemanticValidate(core, commandArgs); } if (command.startsWith('-')) throw createError(`Unknown option: ${command}`); diff --git a/packages/cli/test/cli.test.mjs b/packages/cli/test/cli.test.mjs index f94d86f..5ce0d11 100644 --- a/packages/cli/test/cli.test.mjs +++ b/packages/cli/test/cli.test.mjs @@ -4,6 +4,7 @@ import { mkdtempSync, existsSync, mkdirSync, writeFileSync, readFileSync } from import { tmpdir } from 'node:os'; import path from 'node:path'; import { execFileSync } from 'node:child_process'; +import { pathToFileURL } from 'node:url'; const cliPath = path.resolve(process.cwd(), 'bin/knolo.mjs'); const cliPackageJson = JSON.parse( @@ -91,3 +92,33 @@ test('add updates existing source path', () => { const config = JSON.parse(readFileSync(path.join(cwd, 'knolo.config.json'), 'utf8')); assert.equal(config.sources[0].path, './knowledge-base'); }); + +test('semantic:validate succeeds for matching pack/model and fails on mismatch', async () => { + const cwd = mkdtempSync(path.join(tmpdir(), 'knolo-cli-sem-validate-')); + runCli(['init'], cwd); + runCli(['build'], cwd); + + const coreModule = await import(pathToFileURL(path.resolve(process.cwd(), '../core/dist/index.js')).href); + const packPath = path.join(cwd, 'dist/knowledge.knolo'); + const packBytes = readFileSync(packPath); + const pack = await coreModule.mountPack({ src: Uint8Array.from(packBytes) }); + const sidecarPath = path.join(cwd, 'dist/knowledge.knolo.semantic.json'); + const sidecar = { + version: 1, + packFingerprint: coreModule.createPackFingerprint(pack), + modelId: 'qwen3-embedding:4b', + dimension: 3, + metric: 'cosine', + createdAt: new Date().toISOString(), + blocks: pack.blocks.map((_, blockId) => ({ blockId, vector: [1, 0, 0] })), + }; + writeFileSync(sidecarPath, coreModule.serializeSidecar(sidecar), 'utf8'); + + const output = runCli(['semantic:validate', '--pack', './dist/knowledge.knolo', '--sidecar', './dist/knowledge.knolo.semantic.json', '--model', 'qwen3-embedding:4b'], cwd); + assert.match(output, /validation passed/); + + assert.throws( + () => runCli(['semantic:validate', '--pack', './dist/knowledge.knolo', '--sidecar', './dist/knowledge.knolo.semantic.json', '--model', 'other-model'], cwd), + /Semantic model mismatch/ + ); +}); diff --git a/packages/core/scripts/test.mjs b/packages/core/scripts/test.mjs index 25b7dae..a3432a1 100644 --- a/packages/core/scripts/test.mjs +++ b/packages/core/scripts/test.mjs @@ -37,6 +37,12 @@ import { mergeClaimGraphLogs, applyClaimGraphLog, expandQueryWithGraph, + cosineSimilarity, + normalizeVector, + createPackFingerprint, + serializeSidecar, + parseSidecar, + validateSidecarForPack, } from '../dist/index.js'; import { mountPack as mountPackNode } from '../dist/node.js'; @@ -613,6 +619,132 @@ async function testSemanticRerankErrorAndDefaults() { ); } +async function testSemanticSidecarRerankAndValidation() { + const docs = [ + { id: 'a', text: 'alpha beta alpha beta alpha beta river stone' }, + { id: 'b', text: 'alpha beta solar wind' }, + ]; + const pack = await mountPack({ src: await buildPack(docs) }); + const sidecar = { + version: 1, + packFingerprint: createPackFingerprint(pack), + modelId: 'qwen3-embedding:4b', + dimension: 2, + metric: 'cosine', + createdAt: new Date().toISOString(), + blocks: [ + { blockId: 0, vector: [1, 0] }, + { blockId: 1, vector: [0, 1] }, + ], + }; + + const lexical = query(pack, 'alpha beta', { topK: 2, queryExpansion: { enabled: false } }); + const reranked = query(pack, 'alpha beta', { + topK: 2, + queryExpansion: { enabled: false }, + semantic: { + enabled: true, + sidecarPath: serializeSidecar(sidecar), + provider: { type: 'ollama', modelId: 'qwen3-embedding:4b' }, + queryEmbedding: new Float32Array([0, 1]), + force: true, + blend: { enabled: false }, + }, + }); + + assert.notEqual(reranked[0]?.source, lexical[0]?.source, 'expected sidecar rerank to update ordering'); + assert.equal(reranked[0]?.evidence?.retrieval, 'hybrid'); + + assert.throws( + () => validateSidecarForPack({ sidecar: { ...sidecar, modelId: 'other' }, pack, modelId: 'qwen3-embedding:4b' }), + /Semantic model mismatch/ + ); + assert.throws( + () => validateSidecarForPack({ sidecar: { ...sidecar, packFingerprint: 'fnv1a-deadbeef' }, pack, modelId: 'qwen3-embedding:4b' }), + /pack fingerprint mismatch/ + ); + + const loaded = parseSidecar(serializeSidecar(sidecar)); + assert.deepEqual(loaded, sidecar, 'expected semantic sidecar round trip to remain stable'); +} + +async function testSemanticEvidenceScoresRemainCorrectAfterRerank() { + const docs = [ + { id: 'lex-a', text: 'alpha beta alpha beta alpha beta river stone' }, + { id: 'lex-b', text: 'alpha beta solar wind' }, + ]; + const pack = await mountPack({ + src: await buildPack(docs, { + semantic: { + enabled: true, + modelId: 'test-model', + embeddings: [new Float32Array([1, 0]), new Float32Array([0, 1])], + quantization: { type: 'int8_l2norm', perVectorScale: true }, + }, + }), + }); + + const lexical = query(pack, 'alpha beta', { + topK: 2, + queryExpansion: { enabled: false }, + }); + const lexicalScores = new Map(lexical.map((h) => [h.blockId, h.evidence?.lexicalScore ?? h.score])); + const reranked = query(pack, 'alpha beta', { + topK: 2, + queryExpansion: { enabled: false }, + semantic: { + enabled: true, + queryEmbedding: new Float32Array([0, 1]), + force: true, + blend: { enabled: true, wLex: 0.5, wSem: 0.5 }, + }, + }); + + assert.notEqual( + reranked[0]?.source, + lexical[0]?.source, + 'expected semantic rerank to change ordering' + ); + for (const hit of reranked) { + const before = lexicalScores.get(hit.blockId); + assert.equal( + hit.evidence?.lexicalScore, + before, + 'expected evidence.lexicalScore to preserve pre-rerank lexical score' + ); + assert.equal(hit.evidence?.retrieval, 'hybrid'); + assert.equal(typeof hit.evidence?.semanticScore, 'number'); + assert.equal(typeof hit.evidence?.blendedScore, 'number'); + } +} + +async function testLexicalOnlyEvidenceRemainsUnchanged() { + const docs = [ + { id: 'a', text: 'alpha beta gamma' }, + { id: 'b', text: 'alpha beta delta' }, + ]; + const pack = await mountPack({ src: await buildPack(docs) }); + const hits = query(pack, 'alpha beta', { + topK: 2, + queryExpansion: { enabled: false }, + }); + assert.ok(hits.length > 0, 'expected lexical query to return hits'); + for (const hit of hits) { + assert.equal(hit.evidence?.retrieval, 'lexical'); + assert.equal(typeof hit.evidence?.lexicalScore, 'number'); + assert.equal(hit.evidence?.semanticScore, undefined); + assert.equal(hit.evidence?.blendedScore, undefined); + } +} + +async function testCosineHelpers() { + const a = normalizeVector(new Float32Array([3, 4])); + const b = normalizeVector(new Float32Array([3, 4])); + const c = normalizeVector(new Float32Array([4, -3])); + assert.ok(Math.abs(cosineSimilarity(a, b) - 1) < 1e-6, 'expected same vector cosine to be 1'); + assert.ok(Math.abs(cosineSimilarity(a, c)) < 1e-6, 'expected orthogonal vector cosine to be ~0'); +} + async function testSemanticFixtureAndHelpers() { const pack = await buildSemanticFixturePack(); assert.ok( @@ -1638,6 +1770,10 @@ await testLexConfidenceDeterministic(); await testSemanticRerankLowConfidence(); await testSemanticRerankRespectsConfidenceAndForce(); await testSemanticRerankErrorAndDefaults(); +await testSemanticSidecarRerankAndValidation(); +await testSemanticEvidenceScoresRemainCorrectAfterRerank(); +await testLexicalOnlyEvidenceRemainsUnchanged(); +await testCosineHelpers(); await testSmartQuotePhrase(); await testFirstBlockRetrieval(); await testNearDuplicateDedupe(); diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 4999794..738fee1 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -13,6 +13,15 @@ export { encodeScaleF16, decodeScaleF16, } from './semantic.js'; +export { cosineSimilarity, normalizeVector } from './semantic/cosine.js'; +export { + createPackFingerprint, + serializeSidecar, + parseSidecar, + validateSidecarForPack, +} from './semantic/sidecar.js'; +export { rerankCandidates } from './semantic/rerank.js'; +export { assertProviderCompatible, ensureProviderModelId } from './semantic/provider.js'; export { listAgents, getAgent, @@ -39,6 +48,7 @@ export { export { expandQueryWithGraph } from './graph/query_expand.js'; export type { MountOptions, PackMeta, Pack } from './pack.runtime.js'; export type { QueryOptions, Hit } from './query.js'; +export type { EmbeddingProvider, SemanticSidecar, SemanticQueryOptions, RetrievalEvidence } from './semantic/types.js'; export type { ContextPatch } from './patch.js'; export type { BuildInputDoc, BuildPackOptions } from './builder.js'; export type { diff --git a/packages/core/src/query.ts b/packages/core/src/query.ts index fdd108d..12831af 100644 --- a/packages/core/src/query.ts +++ b/packages/core/src/query.ts @@ -17,6 +17,9 @@ import { diversifyAndDedupe } from "./quality/diversify.js"; import { knsSignature, knsDistance } from "./quality/signature.js"; import { decodeScaleF16, quantizeEmbeddingInt8L2Norm } from "./semantic.js"; import { expandQueryWithGraph } from "./graph/query_expand.js"; +import type { RetrievalEvidence, SemanticSidecar } from "./semantic/types.js"; +import { rerankCandidates } from "./semantic/rerank.js"; +import { parseSidecar } from "./semantic/sidecar.js"; export type QueryOptions = { topK?: number; @@ -47,6 +50,14 @@ export type QueryOptions = { wSem?: number; }; queryEmbedding?: Float32Array; + sidecar?: SemanticSidecar; + provider?: { + type: "ollama"; + modelId: string; + endpoint?: string; + }; + sidecarPath?: string; + minSemanticScore?: number; force?: boolean; }; }; @@ -113,6 +124,18 @@ export function validateSemanticQueryOptions(options?: QueryOptions["semantic"]) if (options.queryEmbedding !== undefined && !(options.queryEmbedding instanceof Float32Array)) { throw new Error("query(...): semantic.queryEmbedding must be a Float32Array."); } + if (options.sidecarPath !== undefined && typeof options.sidecarPath !== "string") { + throw new Error("query(...): semantic.sidecarPath must be a string when provided."); + } + if (options.minSemanticScore !== undefined && (!Number.isFinite(options.minSemanticScore) || options.minSemanticScore < 0 || options.minSemanticScore > 1)) { + throw new Error("query(...): semantic.minSemanticScore must be a finite number between 0 and 1."); + } + if (options.provider) { + if (options.provider.type !== "ollama") throw new Error('query(...): semantic.provider.type must be "ollama".'); + if (typeof options.provider.modelId !== "string" || !options.provider.modelId.trim()) { + throw new Error("query(...): semantic.provider.modelId must be a non-empty string."); + } + } if (options.blend) { if (options.blend.enabled !== undefined && typeof options.blend.enabled !== "boolean") { throw new Error("query(...): semantic.blend.enabled must be a boolean when provided."); @@ -135,6 +158,7 @@ export type Hit = { text: string; source?: string; namespace?: string; + evidence?: RetrievalEvidence; }; export function query(pack: Pack, q: string, opts: QueryOptions = {}): Hit[] { @@ -159,6 +183,9 @@ export function query(pack: Pack, q: string, opts: QueryOptions = {}): Hit[] { wSem: Math.max(0, opts.semantic?.blend?.wSem ?? 0.25), }, queryEmbedding: opts.semantic?.queryEmbedding, + sidecar: resolveSemanticSidecar(opts.semantic?.sidecar, opts.semantic?.sidecarPath), + provider: opts.semantic?.provider, + minSemanticScore: opts.semantic?.minSemanticScore, force: opts.semantic?.force ?? false, }; @@ -353,10 +380,18 @@ export function query(pack: Pack, q: string, opts: QueryOptions = {}): Hit[] { } const confidence = lexConfidence(prelim); + let semanticScores: Map | undefined; + let blendedScores: Map | undefined; + const originalLexicalScores = new Map(prelim.map((item) => [item.blockId, item.score])); if (shouldRerankWithSemantic(pack, semanticOpts, confidence)) { - prelim = rerankLexicalHitsWithSemantic(pack, prelim, semanticOpts); + const semanticResult = rerankLexicalHitsWithSemantic(pack, prelim, semanticOpts); + prelim = semanticResult.hits; + semanticScores = semanticResult.semanticScores; + blendedScores = semanticResult.blendedScores; } + const retrievalMode = semanticScores ? "hybrid" : "lexical"; + // --- KNS tie-breaker + de-dup/MMR const qSig = knsSignature(normalize(q)); const pool = prelim.slice(0, topK * 5).map((r) => { @@ -368,6 +403,13 @@ export function query(pack: Pack, q: string, opts: QueryOptions = {}): Hit[] { text, source: pack.docIds?.[r.blockId] ?? undefined, namespace: pack.namespaces?.[r.blockId] ?? undefined, + evidence: { + retrieval: retrievalMode, + lexicalScore: originalLexicalScores.get(r.blockId) ?? r.score, + semanticScore: semanticScores?.get(r.blockId), + blendedScore: blendedScores?.get(r.blockId), + modelId: semanticOpts.provider?.modelId ?? semanticOpts.sidecar?.modelId, + }, }; }); @@ -391,26 +433,77 @@ type ResolvedSemanticOpts = { minLexConfidence: number; blend: { enabled: boolean; wLex: number; wSem: number }; queryEmbedding?: Float32Array; + sidecar?: SemanticSidecar; + provider?: { type: "ollama"; modelId: string; endpoint?: string }; + minSemanticScore?: number; force: boolean; }; function shouldRerankWithSemantic(pack: Pack, opts: ResolvedSemanticOpts, confidence: number): boolean { if (!opts.enabled || opts.mode !== "rerank") return false; - if (!pack.semantic) return false; + if (!pack.semantic && !opts.sidecar) return false; if (!opts.queryEmbedding) { throw new Error("query(...): semantic.queryEmbedding (Float32Array) is required when semantic.enabled=true."); } return opts.force || confidence < opts.minLexConfidence; } +function resolveSemanticSidecar(sidecar?: SemanticSidecar, sidecarPath?: string): SemanticSidecar | undefined { + if (sidecar) return sidecar; + if (!sidecarPath) return undefined; + const raw = sidecarPath.trim(); + if (!raw) return undefined; + + if (raw.startsWith("{")) { + return parseSidecar(raw); + } + + if (raw.startsWith("data:")) { + const comma = raw.indexOf(","); + if (comma <= 0) return undefined; + const meta = raw.slice(5, comma).toLowerCase(); + const payload = raw.slice(comma + 1); + const decoded = meta.includes(";base64") + ? decodeBase64(payload) + : decodeURIComponent(payload); + if (!decoded.trim()) return undefined; + return parseSidecar(decoded); + } + + return undefined; +} + +function decodeBase64(input: string): string { + const normalized = input.replace(/\s+/g, ""); + const atobFn = (globalThis as { atob?: (s: string) => string }).atob; + if (typeof atobFn === "function") return atobFn(normalized); + + const maybeBufferCtor = (globalThis as { Buffer?: { from: (s: string, enc: string) => { toString: (enc: string) => string } } }).Buffer; + if (maybeBufferCtor?.from) return maybeBufferCtor.from(normalized, "base64").toString("utf8"); + + throw new Error("query(...): Unable to decode semantic.sidecarPath base64 payload in this runtime."); +} + function rerankLexicalHitsWithSemantic( pack: Pack, prelim: Array<{ blockId: number; score: number }>, opts: ResolvedSemanticOpts -): Array<{ blockId: number; score: number }> { +): { hits: Array<{ blockId: number; score: number }>; semanticScores?: Map; blendedScores?: Map } { + if (opts.sidecar && opts.queryEmbedding) { + const sidecarResult = rerankCandidates({ + lexical: prelim, + sidecar: opts.sidecar, + queryEmbedding: opts.queryEmbedding, + topN: opts.topN, + blend: opts.blend, + minSemanticScore: opts.minSemanticScore, + }); + return { hits: sidecarResult.reranked, semanticScores: sidecarResult.semanticScores, blendedScores: sidecarResult.blendedScores }; + } + const sem = pack.semantic; - if (!sem || !opts.queryEmbedding) return prelim; - if (sem.dims <= 0 || sem.vecs.length === 0 || sem.dims !== opts.queryEmbedding.length) return prelim; + if (!sem || !opts.queryEmbedding) return { hits: prelim }; + if (sem.dims <= 0 || sem.vecs.length === 0 || sem.dims !== opts.queryEmbedding.length) return { hits: prelim }; const topN = Math.min(opts.topN, prelim.length); const rerankSlice = prelim.slice(0, topN); @@ -427,16 +520,20 @@ function rerankLexicalHitsWithSemantic( const wSem = denom > 0 ? opts.blend.wSem / denom : 0.5; const reranked = new Array<{ blockId: number; score: number }>(topN); + const semanticScores = new Map(); + const blendedScores = new Map(); for (let i = 0; i < topN; i++) { const hit = rerankSlice[i]; + semanticScores.set(hit.blockId, normSem[i]); + blendedScores.set(hit.blockId, opts.blend.enabled ? wLex * normLex[i] + wSem * normSem[i] : semScores[i]); reranked[i] = { blockId: hit.blockId, - score: opts.blend.enabled ? wLex * normLex[i] + wSem * normSem[i] : semScores[i], + score: blendedScores.get(hit.blockId) ?? hit.score, }; } reranked.sort((a, b) => b.score - a.score || a.blockId - b.blockId); - return [...reranked, ...tail]; + return { hits: [...reranked, ...tail], semanticScores, blendedScores }; } function scoreSemanticInt8( diff --git a/packages/core/src/semantic/cosine.ts b/packages/core/src/semantic/cosine.ts new file mode 100644 index 0000000..4cec69c --- /dev/null +++ b/packages/core/src/semantic/cosine.ts @@ -0,0 +1,16 @@ +export function normalizeVector(vector: Float32Array): Float32Array { + let normSq = 0; + for (let i = 0; i < vector.length; i++) normSq += vector[i] * vector[i]; + const norm = Math.sqrt(normSq); + if (!norm) return new Float32Array(vector.length); + const out = new Float32Array(vector.length); + for (let i = 0; i < vector.length; i++) out[i] = vector[i] / norm; + return out; +} + +export function cosineSimilarity(a: Float32Array, b: Float32Array): number { + if (a.length !== b.length || a.length === 0) return 0; + let dot = 0; + for (let i = 0; i < a.length; i++) dot += a[i] * b[i]; + return dot; +} diff --git a/packages/core/src/semantic/provider.ts b/packages/core/src/semantic/provider.ts new file mode 100644 index 0000000..b5173ae --- /dev/null +++ b/packages/core/src/semantic/provider.ts @@ -0,0 +1,17 @@ +import type { EmbeddingProvider, SemanticQueryOptions } from './types.js'; + +export function ensureProviderModelId(options?: SemanticQueryOptions): string | undefined { + return options?.provider?.modelId; +} + +export function assertProviderCompatible(options?: SemanticQueryOptions, provider?: EmbeddingProvider): void { + if (!options?.enabled) return; + if (!provider && !options.queryEmbedding) { + throw new Error('semantic.enabled=true requires either semantic.queryEmbedding or an EmbeddingProvider.'); + } + if (provider && options.provider?.modelId && options.provider.modelId !== provider.modelId) { + throw new Error( + `Semantic provider model mismatch: options requested ${options.provider.modelId}, provider exposes ${provider.modelId}.` + ); + } +} diff --git a/packages/core/src/semantic/rerank.ts b/packages/core/src/semantic/rerank.ts new file mode 100644 index 0000000..c6a6e4a --- /dev/null +++ b/packages/core/src/semantic/rerank.ts @@ -0,0 +1,58 @@ +import type { SemanticSidecar } from './types.js'; +import { cosineSimilarity, normalizeVector } from './cosine.js'; + +export function rerankCandidates(params: { + lexical: Array<{ blockId: number; score: number }>; + sidecar: SemanticSidecar; + queryEmbedding: Float32Array; + topN: number; + blend: { enabled: boolean; wLex: number; wSem: number }; + minSemanticScore?: number; +}): { + reranked: Array<{ blockId: number; score: number }>; + semanticScores: Map; + blendedScores: Map; +} { + const topN = Math.min(params.topN, params.lexical.length); + const head = params.lexical.slice(0, topN); + const tail = params.lexical.slice(topN); + const q = normalizeVector(params.queryEmbedding); + const semanticScores = new Map(); + const blendedScores = new Map(); + + const lexNorm = minMax(head.map((h) => h.score)); + const semRaw: number[] = []; + for (const item of head) { + const rec = params.sidecar.blocks.find((b) => b.blockId === item.blockId); + const vec = rec ? Float32Array.from(rec.vector) : new Float32Array(q.length); + semRaw.push(cosineSimilarity(q, vec)); + } + const semNorm = minMax(semRaw); + + const denom = params.blend.wLex + params.blend.wSem; + const wLex = denom > 0 ? params.blend.wLex / denom : 0.7; + const wSem = denom > 0 ? params.blend.wSem / denom : 0.3; + + const reranked = head.map((item, idx) => { + const sem = semNorm[idx]; + semanticScores.set(item.blockId, sem); + if ((params.minSemanticScore ?? 0) > sem) { + blendedScores.set(item.blockId, lexNorm[idx]); + return { blockId: item.blockId, score: lexNorm[idx] }; + } + const blended = params.blend.enabled ? wLex * lexNorm[idx] + wSem * sem : sem; + blendedScores.set(item.blockId, blended); + return { blockId: item.blockId, score: blended }; + }); + + reranked.sort((a, b) => b.score - a.score || a.blockId - b.blockId); + return { reranked: [...reranked, ...tail], semanticScores, blendedScores }; +} + +function minMax(values: number[]): number[] { + if (values.length === 0) return values; + const min = Math.min(...values); + const max = Math.max(...values); + if (!Number.isFinite(min) || !Number.isFinite(max) || max <= min) return values.map(() => 1); + return values.map((v) => Math.min(1, Math.max(0, (v - min) / (max - min)))); +} diff --git a/packages/core/src/semantic/sidecar.ts b/packages/core/src/semantic/sidecar.ts new file mode 100644 index 0000000..80e99d8 --- /dev/null +++ b/packages/core/src/semantic/sidecar.ts @@ -0,0 +1,44 @@ +import type { Pack } from '../pack.runtime.js'; +import type { SemanticSidecar } from './types.js'; + +export function createPackFingerprint(pack: Pick): string { + let hash = 2166136261; + const parts = [String(pack.meta?.version ?? 0), ...(pack.docIds ?? []), ...pack.blocks]; + for (const part of parts) { + const text = String(part ?? ''); + for (let i = 0; i < text.length; i++) { + hash ^= text.charCodeAt(i); + hash = Math.imul(hash, 16777619); + } + } + return `fnv1a-${(hash >>> 0).toString(16).padStart(8, '0')}`; +} + +export function serializeSidecar(sidecar: SemanticSidecar): string { + return `${JSON.stringify(sidecar, null, 2)}\n`; +} + +export function parseSidecar(raw: string): SemanticSidecar { + const parsed = JSON.parse(raw) as SemanticSidecar; + if (parsed.version !== 1) throw new Error(`Unsupported semantic sidecar version: ${parsed.version}`); + if (parsed.metric !== 'cosine') throw new Error(`Unsupported semantic metric: ${parsed.metric}`); + return parsed; +} + +export function validateSidecarForPack(input: { + sidecar: SemanticSidecar; + pack: Pick; + modelId: string; +}): void { + const expectedFingerprint = createPackFingerprint(input.pack); + if (input.sidecar.packFingerprint !== expectedFingerprint) { + throw new Error( + `Semantic sidecar pack fingerprint mismatch: expected ${expectedFingerprint}, got ${input.sidecar.packFingerprint}. Regenerate the sidecar for this pack.` + ); + } + if (input.sidecar.modelId !== input.modelId) { + throw new Error( + `Semantic model mismatch: sidecar model is ${input.sidecar.modelId}, but query provider is ${input.modelId}. Use the same embedding model or regenerate the sidecar.` + ); + } +} diff --git a/packages/core/src/semantic/types.ts b/packages/core/src/semantic/types.ts new file mode 100644 index 0000000..86e2653 --- /dev/null +++ b/packages/core/src/semantic/types.ts @@ -0,0 +1,47 @@ +export interface EmbeddingProvider { + readonly modelId: string; + embedQuery(text: string): Promise; + embedTexts(texts: string[]): Promise; +} + +export interface SemanticSidecar { + version: 1; + packFingerprint: string; + modelId: string; + dimension: number; + metric: 'cosine'; + createdAt: string; + blocks: Array<{ + blockId: number; + vector: number[]; + }>; +} + +export type SemanticQueryOptions = { + enabled?: boolean; + mode?: 'rerank'; + topN?: number; + minLexConfidence?: number; + minSemanticScore?: number; + blend?: { + enabled?: boolean; + wLex?: number; + wSem?: number; + }; + provider?: { + type: 'ollama'; + modelId: string; + endpoint?: string; + }; + sidecarPath?: string; + queryEmbedding?: Float32Array; + force?: boolean; +}; + +export type RetrievalEvidence = { + retrieval: 'lexical' | 'hybrid'; + lexicalScore?: number; + semanticScore?: number; + blendedScore?: number; + modelId?: string; +}; diff --git a/packages/semantic-ollama/package.json b/packages/semantic-ollama/package.json new file mode 100644 index 0000000..eb8e891 --- /dev/null +++ b/packages/semantic-ollama/package.json @@ -0,0 +1,17 @@ +{ + "name": "@knolo/semantic-ollama", + "version": "0.1.0", + "type": "module", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "files": ["dist"], + "scripts": { + "build": "tsc -p tsconfig.json" + }, + "dependencies": { + "@knolo/core": "^3.2.1" + }, + "devDependencies": { + "typescript": "^5.5.0" + } +} diff --git a/packages/semantic-ollama/src/index.ts b/packages/semantic-ollama/src/index.ts new file mode 100644 index 0000000..51222ff --- /dev/null +++ b/packages/semantic-ollama/src/index.ts @@ -0,0 +1,62 @@ +import type { EmbeddingProvider } from '@knolo/core'; + +export type OllamaProviderOptions = { + endpoint?: string; + modelId?: string; + timeoutMs?: number; + batchSize?: number; +}; + +export class OllamaEmbeddingProvider implements EmbeddingProvider { + readonly modelId: string; + readonly endpoint: string; + readonly timeoutMs: number; + readonly batchSize: number; + + constructor(opts: OllamaProviderOptions = {}) { + this.modelId = opts.modelId ?? 'qwen3-embedding:4b'; + this.endpoint = opts.endpoint ?? 'http://localhost:11434'; + this.timeoutMs = opts.timeoutMs ?? 30_000; + this.batchSize = Math.max(1, opts.batchSize ?? 32); + } + + async embedQuery(text: string): Promise { + const [vec] = await this.embedTexts([text]); + return vec; + } + + async embedTexts(texts: string[]): Promise { + const out: Float32Array[] = []; + for (let i = 0; i < texts.length; i += this.batchSize) { + const batch = texts.slice(i, i + this.batchSize); + for (const text of batch) out.push(await this.requestEmbedding(text)); + } + return out; + } + + private async requestEmbedding(text: string): Promise { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), this.timeoutMs); + try { + const res = await fetch(`${this.endpoint}/api/embeddings`, { + method: 'POST', + headers: { 'content-type': 'application/json' }, + body: JSON.stringify({ model: this.modelId, prompt: text }), + signal: controller.signal, + }); + if (!res.ok) { + throw new Error(`Ollama embeddings failed (${res.status}): ${await res.text()}`); + } + const json = (await res.json()) as { embedding?: number[] }; + if (!Array.isArray(json.embedding) || json.embedding.length === 0) { + throw new Error('Ollama embeddings response missing embedding vector.'); + } + return Float32Array.from(json.embedding); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + throw new Error(`Failed to embed text with Ollama at ${this.endpoint}: ${message}`); + } finally { + clearTimeout(timer); + } + } +} diff --git a/packages/semantic-ollama/tsconfig.json b/packages/semantic-ollama/tsconfig.json new file mode 100644 index 0000000..d946efe --- /dev/null +++ b/packages/semantic-ollama/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../core/tsconfig.json", + "compilerOptions": { + "outDir": "dist", + "rootDir": "src" + }, + "include": ["src/**/*.ts"] +}