From 9bde1c3791200da117368e0635ba5cfe84d6f16b Mon Sep 17 00:00:00 2001 From: Rahil P Date: Tue, 9 Jun 2026 22:25:19 -0400 Subject: [PATCH 1/7] =?UTF-8?q?feat:=20add=20inferQueryTags=20=E2=80=94=20?= =?UTF-8?q?hashtag/keyword/LLM=20tag=20inference=20from=20recall=20query?= =?UTF-8?q?=20(#142)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/index.ts | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/src/index.ts b/src/index.ts index 69e1ae8..d5ebce0 100644 --- a/src/index.ts +++ b/src/index.ts @@ -62,6 +62,8 @@ const VECTORIZE_TOP_K_MULTIPLIER = 3; const VECTORIZE_GET_BY_IDS_BATCH = 20; // D1 allows at most 100 bound parameters per query const D1_MAX_BOUND_PARAMS = 100; +const TAG_BOOST_STEP = 0.15; +const TAG_BOOST_MAX = 1.5; // ─── Runtime state ──────────────────────────────────────────────────────────── @@ -520,6 +522,43 @@ export function extractHashtags(content: string): { cleanContent: string; hashta return { cleanContent, hashtags }; } +// ─── Query tag inference ────────────────────────────────────────────────────── + +export async function inferQueryTags(query: string, env: Env): Promise { + const { hashtags } = extractHashtags(query); + + const { results: tagRows } = await env.DB.prepare(`SELECT tags FROM entries`).all(); + const knownTags = [...new Set( + (tagRows as any[]).flatMap(r => JSON.parse((r.tags as string) ?? "[]") as string[]) + )]; + + const lowerQuery = query.toLowerCase(); + const keywordMatches = knownTags.filter(t => + new RegExp(`\\b${t.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, "i").test(lowerQuery) + ); + + const combined = [...new Set([...hashtags, ...keywordMatches])]; + if (combined.length) return combined; + + if (!knownTags.length) return []; + + try { + const stream = await env.AI.run(LLM_MODEL as any, { + messages: [{ + role: "user", + content: `From this list of tags: ${knownTags.slice(0, 50).join(", ")}\n\nWhich tags best match this query? Reply with only a comma-separated list of matching tag names from the list, or nothing if none apply.\n\nQuery: ${query.slice(0, 300)}`, + }], + max_tokens: 100, + stream: true, + }); + const text = await readStreamText(stream as ReadableStream); + const knownSet = new Set(knownTags); + return text.split(",").map(t => t.trim().toLowerCase()).filter(t => t && knownSet.has(t)); + } catch { + return []; + } +} + // ─── Shared entry-listing filter builder ───────────────────────────────────── // Builds the WHERE/ORDER/LIMIT clause shared by list_recent and GET /list so // both stay in sync on which filters (tag, after, before) are supported. From 329767f2306ea5c8a131343c5162e6933b9abe70 Mon Sep 17 00:00:00 2001 From: Rahil P Date: Tue, 9 Jun 2026 22:32:30 -0400 Subject: [PATCH 2/7] =?UTF-8?q?fix:=20inferQueryTags=20=E2=80=94=20early?= =?UTF-8?q?=20hashtag=20exit,=20json=5Feach=20for=20known=20tags,=20thresh?= =?UTF-8?q?old=20constants=20placement?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/index.ts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/index.ts b/src/index.ts index d5ebce0..a50c5e6 100644 --- a/src/index.ts +++ b/src/index.ts @@ -36,6 +36,8 @@ function graceMs(env: Env): number { const DUPLICATE_BLOCK_THRESHOLD = 0.95; const DUPLICATE_FLAG_THRESHOLD = 0.85; const CANDIDATE_SCORE_THRESHOLD = 0.45; +const TAG_BOOST_STEP = 0.15; +const TAG_BOOST_MAX = 1.5; // ─── Model constants ────────────────────────────────────────────────────────── @@ -62,8 +64,6 @@ const VECTORIZE_TOP_K_MULTIPLIER = 3; const VECTORIZE_GET_BY_IDS_BATCH = 20; // D1 allows at most 100 bound parameters per query const D1_MAX_BOUND_PARAMS = 100; -const TAG_BOOST_STEP = 0.15; -const TAG_BOOST_MAX = 1.5; // ─── Runtime state ──────────────────────────────────────────────────────────── @@ -526,19 +526,19 @@ export function extractHashtags(content: string): { cleanContent: string; hashta export async function inferQueryTags(query: string, env: Env): Promise { const { hashtags } = extractHashtags(query); + if (hashtags.length) return hashtags; - const { results: tagRows } = await env.DB.prepare(`SELECT tags FROM entries`).all(); - const knownTags = [...new Set( - (tagRows as any[]).flatMap(r => JSON.parse((r.tags as string) ?? "[]") as string[]) - )]; + const { results: tagRows } = await env.DB.prepare( + `SELECT DISTINCT value FROM entries, json_each(entries.tags) ORDER BY value` + ).all(); + const knownTags = (tagRows as { value: string }[]).map(r => r.value); const lowerQuery = query.toLowerCase(); const keywordMatches = knownTags.filter(t => new RegExp(`\\b${t.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, "i").test(lowerQuery) ); - const combined = [...new Set([...hashtags, ...keywordMatches])]; - if (combined.length) return combined; + if (keywordMatches.length) return keywordMatches; if (!knownTags.length) return []; From bda92fdd7aea45234bbf5bc036b49a84015ebc2b Mon Sep 17 00:00:00 2001 From: Rahil P Date: Tue, 9 Jun 2026 22:35:14 -0400 Subject: [PATCH 3/7] =?UTF-8?q?test:=20unit=20tests=20for=20inferQueryTags?= =?UTF-8?q?=20=E2=80=94=20hashtag,=20keyword,=20LLM=20fallback,=20error=20?= =?UTF-8?q?handling=20(#142)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/unit/infer-query-tags.test.ts | 88 ++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 test/unit/infer-query-tags.test.ts diff --git a/test/unit/infer-query-tags.test.ts b/test/unit/infer-query-tags.test.ts new file mode 100644 index 0000000..501d072 --- /dev/null +++ b/test/unit/infer-query-tags.test.ts @@ -0,0 +1,88 @@ +import { describe, it, expect, vi } from "vitest"; +import { inferQueryTags } from "../../src/index"; +import { makeTestEnv, makeTestDb } from "../helpers/make-env"; + +function makeSseStream(response: string) { + return new ReadableStream({ + start(c) { + c.enqueue(new TextEncoder().encode(`data: {"response":${JSON.stringify(response)}}\n\n`)); + c.enqueue(new TextEncoder().encode("data: [DONE]\n\n")); + c.close(); + }, + }); +} + +describe("inferQueryTags", () => { + it("returns hashtags extracted from the query without hitting the DB", async () => { + const db = makeTestDb(); + const aiRun = vi.fn(); + const dbPrepareSpy = vi.spyOn(db, "prepare"); + const env = makeTestEnv(db, { AI: { run: aiRun } as unknown as Ai }); + const tags = await inferQueryTags("what did I decide about #work today?", env); + expect(tags).toEqual(["work"]); + // Early return — no DB or LLM call + expect(dbPrepareSpy).not.toHaveBeenCalled(); + expect(aiRun).not.toHaveBeenCalled(); + }); + + it("returns keyword-matched known tags (whole-word match, case-insensitive)", async () => { + const db = makeTestDb(); + db.entries.push({ id: "e1", content: "Office lease note", tags: '["work","legal"]', source: "api", created_at: 1000, vector_ids: "[]", recall_count: 0, importance_score: 0 }); + const env = makeTestEnv(db); + const tags = await inferQueryTags("what work and legal things did I decide?", env); + expect(tags).toContain("work"); + expect(tags).toContain("legal"); + }); + + it("does not call the LLM when keyword matches are found", async () => { + const db = makeTestDb(); + db.entries.push({ id: "e1", content: "Note", tags: '["work"]', source: "api", created_at: 1000, vector_ids: "[]", recall_count: 0, importance_score: 0 }); + const aiRun = vi.fn().mockResolvedValue(makeSseStream("work")); + const env = makeTestEnv(db, { AI: { run: aiRun } as unknown as Ai }); + await inferQueryTags("work meeting notes", env); + expect(aiRun).not.toHaveBeenCalled(); + }); + + it("calls the LLM and intersects with known tags when cheap inference finds nothing", async () => { + const db = makeTestDb(); + db.entries.push({ id: "e1", content: "Note", tags: '["work","personal"]', source: "api", created_at: 1000, vector_ids: "[]", recall_count: 0, importance_score: 0 }); + const aiRun = vi.fn().mockResolvedValue(makeSseStream("work, personal")); + const env = makeTestEnv(db, { AI: { run: aiRun } as unknown as Ai }); + const tags = await inferQueryTags("quarterly planning session", env); + expect(tags).toEqual(expect.arrayContaining(["work", "personal"])); + expect(aiRun).toHaveBeenCalledTimes(1); + }); + + it("filters out unknown tags returned by the LLM (intersects with known set)", async () => { + const db = makeTestDb(); + db.entries.push({ id: "e1", content: "Note", tags: '["work"]', source: "api", created_at: 1000, vector_ids: "[]", recall_count: 0, importance_score: 0 }); + const aiRun = vi.fn().mockResolvedValue(makeSseStream("work, invented-tag, random")); + const env = makeTestEnv(db, { AI: { run: aiRun } as unknown as Ai }); + const tags = await inferQueryTags("quarterly planning session", env); + expect(tags).toEqual(["work"]); + }); + + it("returns empty array when the LLM throws — never propagates error", async () => { + const db = makeTestDb(); + db.entries.push({ id: "e1", content: "Note", tags: '["work"]', source: "api", created_at: 1000, vector_ids: "[]", recall_count: 0, importance_score: 0 }); + const aiRun = vi.fn().mockRejectedValue(new Error("AI unavailable")); + const env = makeTestEnv(db, { AI: { run: aiRun } as unknown as Ai }); + await expect(inferQueryTags("quarterly planning session", env)).resolves.toEqual([]); + }); + + it("returns empty array when DB has no entries (no vocabulary to match against)", async () => { + const db = makeTestDb(); + const env = makeTestEnv(db); + const tags = await inferQueryTags("quarterly planning session", env); + expect(tags).toEqual([]); + }); + + it("does not partially match — 'networking' does not match tag 'net'", async () => { + const db = makeTestDb(); + db.entries.push({ id: "e1", content: "Note", tags: '["net"]', source: "api", created_at: 1000, vector_ids: "[]", recall_count: 0, importance_score: 0 }); + const env = makeTestEnv(db); + // \bnet\b must not match "networking" (word boundary after 't' is before 'w', not end of word) + const tags = await inferQueryTags("networking event", env); + expect(tags).not.toContain("net"); + }); +}); From 952e075c8737050f113e4bd6ca3383495660cd54 Mon Sep 17 00:00:00 2001 From: Rahil P Date: Tue, 9 Jun 2026 22:38:00 -0400 Subject: [PATCH 4/7] =?UTF-8?q?test:=20tighten=20infer-query-tags=20assert?= =?UTF-8?q?ions=20=E2=80=94=20length=20checks=20and=20return-value=20verif?= =?UTF-8?q?ication?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/unit/infer-query-tags.test.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/unit/infer-query-tags.test.ts b/test/unit/infer-query-tags.test.ts index 501d072..2cb6837 100644 --- a/test/unit/infer-query-tags.test.ts +++ b/test/unit/infer-query-tags.test.ts @@ -30,8 +30,8 @@ describe("inferQueryTags", () => { db.entries.push({ id: "e1", content: "Office lease note", tags: '["work","legal"]', source: "api", created_at: 1000, vector_ids: "[]", recall_count: 0, importance_score: 0 }); const env = makeTestEnv(db); const tags = await inferQueryTags("what work and legal things did I decide?", env); - expect(tags).toContain("work"); - expect(tags).toContain("legal"); + expect(tags).toHaveLength(2); + expect(tags).toEqual(expect.arrayContaining(["work", "legal"])); }); it("does not call the LLM when keyword matches are found", async () => { @@ -39,7 +39,8 @@ describe("inferQueryTags", () => { db.entries.push({ id: "e1", content: "Note", tags: '["work"]', source: "api", created_at: 1000, vector_ids: "[]", recall_count: 0, importance_score: 0 }); const aiRun = vi.fn().mockResolvedValue(makeSseStream("work")); const env = makeTestEnv(db, { AI: { run: aiRun } as unknown as Ai }); - await inferQueryTags("work meeting notes", env); + const tags = await inferQueryTags("work meeting notes", env); + expect(tags).toContain("work"); expect(aiRun).not.toHaveBeenCalled(); }); @@ -49,6 +50,7 @@ describe("inferQueryTags", () => { const aiRun = vi.fn().mockResolvedValue(makeSseStream("work, personal")); const env = makeTestEnv(db, { AI: { run: aiRun } as unknown as Ai }); const tags = await inferQueryTags("quarterly planning session", env); + expect(tags).toHaveLength(2); expect(tags).toEqual(expect.arrayContaining(["work", "personal"])); expect(aiRun).toHaveBeenCalledTimes(1); }); From b0c98fbd9d8e040d84d80b46720ed5c4f785a04e Mon Sep 17 00:00:00 2001 From: Rahil P Date: Tue, 9 Jun 2026 22:40:00 -0400 Subject: [PATCH 5/7] feat: add queryTags soft boost to rerankWithTimeDecay (#142) --- src/index.ts | 10 ++++++++-- test/unit/rerank.test.ts | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/index.ts b/src/index.ts index a50c5e6..b02bb7f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -413,7 +413,8 @@ export function cosineSim(a: ArrayLike, b: ArrayLike): number { export function rerankWithTimeDecay( matches: VectorizeMatch[], recallCounts: Map = new Map(), - importanceScores: Map = new Map() + importanceScores: Map = new Map(), + queryTags: string[] = [] ): VectorizeMatch[] { const now = Date.now(); @@ -442,7 +443,12 @@ export function rerankWithTimeDecay( const imp = importanceScores.get(parentId) ?? 0; const importanceMultiplier = imp === 0 ? 1.0 : 0.8 + (imp / 5) * 0.4; - return { ...match, score: match.score * combinedMultiplier * appendPenalty * rolledUpPenalty * importanceMultiplier }; + // Tag boost: applied outside the recency ≤1.0 cap so a tag-relevant memory can + // surface above a marginally-closer but irrelevant one. + const overlap = queryTags.length ? tags.filter(t => queryTags.includes(t)).length : 0; + const tagBoost = overlap ? Math.min(TAG_BOOST_MAX, 1 + overlap * TAG_BOOST_STEP) : 1.0; + + return { ...match, score: match.score * combinedMultiplier * appendPenalty * rolledUpPenalty * importanceMultiplier * tagBoost }; }) .sort((a, b) => b.score - a.score); } diff --git a/test/unit/rerank.test.ts b/test/unit/rerank.test.ts index 84ce068..78d1704 100644 --- a/test/unit/rerank.test.ts +++ b/test/unit/rerank.test.ts @@ -106,4 +106,19 @@ describe("rerankWithTimeDecay", () => { const result = rerankWithTimeDecay([old, fresh], new Map(), importance); expect(result[0].id).toBe("old"); }); + + it("tag-overlapping entry outranks equal-vector-score entry without matching tag", () => { + const withTag = match("tagged", 0.9, NOW - 5 * MS_DAY, ["work"]); + const withoutTag = match("untagged", 0.9, NOW - 5 * MS_DAY, ["personal"]); + const result = rerankWithTimeDecay([withoutTag, withTag], new Map(), new Map(), ["work"]); + expect(result[0].id).toBe("tagged"); + expect(result[0].score).toBeGreaterThan(result[1].score); + }); + + it("queryTags=[] produces identical scores to no queryTags argument (backward compat)", () => { + const m = match("entry", 0.9, NOW - 5 * MS_DAY, ["work"]); + const [withEmpty] = rerankWithTimeDecay([m], new Map(), new Map(), []); + const [withDefault] = rerankWithTimeDecay([m]); + expect(withEmpty.score).toBeCloseTo(withDefault.score, 6); + }); }); From 2f8f40cbe41714b6933fbd047b022f0f26ce597d Mon Sep 17 00:00:00 2001 From: Rahil P Date: Tue, 9 Jun 2026 22:44:39 -0400 Subject: [PATCH 6/7] =?UTF-8?q?feat:=20wire=20inferQueryTags=20into=20reca?= =?UTF-8?q?llEntries=20=E2=80=94=20parallel=20embed=20+=20tag=20inference?= =?UTF-8?q?=20(#142)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/index.ts | 7 ++-- test/integration/recall.test.ts | 58 +++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/src/index.ts b/src/index.ts index b02bb7f..19ebc4d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -977,7 +977,10 @@ export async function recallEntries( embedQuery = parsed.cleanQuery; } - const values = await embed(embedQuery, env); + const [values, queryTags] = await Promise.all([ + embed(embedQuery, env), + inferQueryTags(embedQuery, env), + ]); let results: { matches: VectorizeMatch[] }; if (tag) { @@ -1041,7 +1044,7 @@ export async function recallEntries( const recallCounts = new Map(rcRows.map(r => [r.id, r.recall_count ?? 0])); const importanceScores = new Map(rcRows.map(r => [r.id, r.importance_score ?? 0])); - const reranked = rerankWithTimeDecay(results.matches as VectorizeMatch[], recallCounts, importanceScores); + const reranked = rerankWithTimeDecay(results.matches as VectorizeMatch[], recallCounts, importanceScores, queryTags); const seen = new Set(); const deduped = reranked.filter((m) => { diff --git a/test/integration/recall.test.ts b/test/integration/recall.test.ts index 652b376..e42022c 100644 --- a/test/integration/recall.test.ts +++ b/test/integration/recall.test.ts @@ -278,4 +278,62 @@ describe("GET /recall", () => { const scoringCalls = prepareSpy.mock.calls.filter(([sql]) => sql.includes("recall_count, importance_score")); expect(scoringCalls).toHaveLength(2); }); + + it("hashtag or keyword in query skips the LLM during tag inference", async () => { + db.entries.push( + { id: "entry-1", content: "Work meeting notes", tags: '["work"]', source: "api", created_at: 1000, vector_ids: '["entry-1"]', recall_count: 0, importance_score: 0 }, + ); + const aiRun = vi.fn().mockImplementation(async (model: string) => { + if (model === "@cf/baai/bge-small-en-v1.5") return { data: [new Array(384).fill(0.1)] }; + return new ReadableStream({ + start(c) { + c.enqueue(new TextEncoder().encode('data: {"response":"work"}\n\n')); + c.enqueue(new TextEncoder().encode("data: [DONE]\n\n")); + c.close(); + }, + }); + }); + env = makeTestEnv(db, { + AI: { run: aiRun } as unknown as Ai, + VECTORIZE: makeVectorizeMock({ + query: vi.fn().mockResolvedValue({ matches: [makeMatch("entry-1", 0.9)] }), + }), + }); + + const res = await worker.fetch(req("GET", "/recall?query=work+meeting"), env, ctx); + expect(res.status).toBe(200); + // "work" is a known tag AND appears as a keyword in the query → LLM not called for inference + // (embed call uses BGE model; only LLM calls use other models) + const llmCalls = aiRun.mock.calls.filter((args: any[]) => args[0] !== "@cf/baai/bge-small-en-v1.5"); + expect(llmCalls).toHaveLength(0); + }); + + it("query with no matching keywords exercises the LLM fallback for tag inference", async () => { + db.entries.push( + { id: "entry-1", content: "Office lease renewal", tags: '["work"]', source: "api", created_at: 1000, vector_ids: '["entry-1"]', recall_count: 0, importance_score: 0 }, + ); + const aiRun = vi.fn().mockImplementation(async (model: string) => { + if (model === "@cf/baai/bge-small-en-v1.5") return { data: [new Array(384).fill(0.1)] }; + return new ReadableStream({ + start(c) { + c.enqueue(new TextEncoder().encode('data: {"response":"work"}\n\n')); + c.enqueue(new TextEncoder().encode("data: [DONE]\n\n")); + c.close(); + }, + }); + }); + env = makeTestEnv(db, { + AI: { run: aiRun } as unknown as Ai, + VECTORIZE: makeVectorizeMock({ + query: vi.fn().mockResolvedValue({ matches: [makeMatch("entry-1", 0.9)] }), + }), + }); + + // "quarterly planning" — no hashtags, "work" is not a whole word in this query + const res = await worker.fetch(req("GET", "/recall?query=quarterly+planning"), env, ctx); + expect(res.status).toBe(200); + // LLM called at least once (for tag inference); embedding uses BGE model (not counted) + const llmCalls = aiRun.mock.calls.filter((args: any[]) => args[0] !== "@cf/baai/bge-small-en-v1.5"); + expect(llmCalls.length).toBeGreaterThanOrEqual(1); + }); }); From 76a206dbd92eb4a62def37a2bebfd4d05176253d Mon Sep 17 00:00:00 2001 From: Rahil P Date: Tue, 9 Jun 2026 22:51:28 -0400 Subject: [PATCH 7/7] fix: use lookahead/lookbehind for hyphenated-tag keyword matching (#142) --- src/index.ts | 2 +- test/unit/infer-query-tags.test.ts | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/index.ts b/src/index.ts index 19ebc4d..616eb07 100644 --- a/src/index.ts +++ b/src/index.ts @@ -541,7 +541,7 @@ export async function inferQueryTags(query: string, env: Env): Promise const lowerQuery = query.toLowerCase(); const keywordMatches = knownTags.filter(t => - new RegExp(`\\b${t.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, "i").test(lowerQuery) + new RegExp(`(? { const db = makeTestDb(); db.entries.push({ id: "e1", content: "Note", tags: '["net"]', source: "api", created_at: 1000, vector_ids: "[]", recall_count: 0, importance_score: 0 }); const env = makeTestEnv(db); - // \bnet\b must not match "networking" (word boundary after 't' is before 'w', not end of word) const tags = await inferQueryTags("networking event", env); expect(tags).not.toContain("net"); }); + + it("does not match a hyphenated compound — 'my-claude-response-thing' does not match tag 'claude-response'", async () => { + const db = makeTestDb(); + db.entries.push({ id: "e1", content: "Note", tags: '["claude-response"]', source: "api", created_at: 1000, vector_ids: "[]", recall_count: 0, importance_score: 0 }); + const env = makeTestEnv(db); + const tags = await inferQueryTags("my-claude-response-thing happened", env); + expect(tags).not.toContain("claude-response"); + }); + + it("matches a hyphenated tag that appears standalone in the query", async () => { + const db = makeTestDb(); + db.entries.push({ id: "e1", content: "Note", tags: '["claude-response"]', source: "api", created_at: 1000, vector_ids: "[]", recall_count: 0, importance_score: 0 }); + const env = makeTestEnv(db); + const tags = await inferQueryTags("what claude-response notes do I have", env); + expect(tags).toContain("claude-response"); + }); });