diff --git a/README.md b/README.md index fd451e8..8a6a44a 100644 --- a/README.md +++ b/README.md @@ -120,7 +120,7 @@ The **Deploy to Cloudflare** button provisions it automatically. - [Capture from Anywhere](../../wiki/Capture-from-Anywhere) — browser extension, bookmarklet, iOS Shortcuts, share sheet - [Web UI](../../wiki/Web-UI) — dashboard and mobile interface - [Obsidian Plugin](../../wiki/Obsidian-Plugin) — install, configure, sync modes -- [API Reference](../../wiki/API-Reference) — /capture, /append, /update, /list, /count, /tags, /stats, /chat, /mcp endpoints +- [API Reference](../../wiki/API-Reference) — /capture, /append, /update, /list, /recall, /forget, /count, /tags, /stats, /chat, /digest, /mcp endpoints ----- diff --git a/src/index.ts b/src/index.ts index 3bab33d..5ddbcd2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -88,6 +88,13 @@ function json(data: unknown, status = 200): Response { }); } +// Returns a 401 Response if the request lacks a valid token, otherwise null — +// lets routes early-return with `const authErr = requireAuth(...); if (authErr) return authErr;` +function requireAuth(request: Request, env: Env): Response | null { + if (isAuthorized(request, env)) return null; + return json({ ok: false, error: "Unauthorized" }, 401); +} + // Hosted OAuth login page. Styled to match the dashboard's token-entry card // (#auth-overlay in public/index.html) — same fonts, palette, and layout. function loginHtml(error?: string): string { @@ -489,6 +496,30 @@ export function extractHashtags(content: string): { cleanContent: string; hashta return { cleanContent, hashtags }; } +// ─── Shared entry-listing filter builder ───────────────────────────────────── +// Builds the WHERE/ORDER/LIMIT clause shared by list_recent and GET /list so +// both stay in sync on which filters (tag, after, before) are supported. + +export function buildEntryFilterQuery(params: { + n: number; + tag?: string; + after?: number; + before?: number; +}): { sql: string; bindings: (string | number)[] } { + const conds: string[] = []; + const bindings: (string | number)[] = []; + if (params.tag) { conds.push(`tags LIKE ?`); bindings.push(`%"${params.tag}"%`); } + if (params.after !== undefined) { conds.push(`created_at >= ?`); bindings.push(params.after); } + if (params.before !== undefined) { conds.push(`created_at <= ?`); bindings.push(params.before); } + + let sql = `SELECT id, content, tags, source, created_at FROM entries`; + if (conds.length) sql += ` WHERE ` + conds.join(` AND `); + sql += ` ORDER BY created_at DESC LIMIT ?`; + bindings.push(params.n); + + return { sql, bindings }; +} + // ─── Store entry (full embed + chunk) ──────────────────────────────────────── // Returns the list of vector IDs inserted so forget() can clean up exactly. @@ -830,6 +861,159 @@ async function runNightlyCompression(env: Env, ctx: ExecutionContext): Promise { + const { query, topK } = params; + let { tag, after, before } = params; + const now = Date.now(); + + let embedQuery = query; + if (after === undefined && before === undefined) { + const parsed = parseTimePhrase(query, now); + after = parsed.after; + before = parsed.before; + embedQuery = parsed.cleanQuery; + } + + const values = await embed(embedQuery, env); + + // If tag filter, resolve matching IDs from D1 first (D1 is source of truth for tags) + let tagFilterIds: Set | null = null; + if (tag) { + const { results: tagRows } = await env.DB.prepare( + `SELECT id FROM entries WHERE tags LIKE ?` + ).bind(`%"${tag}"%`).all(); + tagFilterIds = new Set((tagRows as any[]).map(r => r.id as string)); + if (tagFilterIds.size === 0) return { matches: [], insight: "" }; + } + + // Query Vectorize without filter — tag filtering happens in-memory below + // Cloudflare Vectorize caps topK at 50 when returnMetadata="all" (error 40025) + const vectorizeTopK = Math.min(topK * VECTORIZE_TOP_K_MULTIPLIER, 50); + let results = await env.VECTORIZE.query(values, { + topK: vectorizeTopK, + returnMetadata: "all", + }); + + if (results.matches.length && results.matches[0].score < DUPLICATE_FLAG_THRESHOLD) { + results = await env.VECTORIZE.query(values, { + topK: 50, + returnMetadata: "all", + }); + } + + if (!results.matches.length) return { matches: [], insight: "" }; + + // Fetch recall_count and importance_score for all candidates to use in scoring + const candidateIds = [...new Set(results.matches.map(m => (m.metadata as any)?.parentId ?? m.id))] as string[]; + const rcPlaceholders = candidateIds.map(() => "?").join(", "); + const { results: rcRows } = await env.DB.prepare( + `SELECT id, recall_count, importance_score FROM entries WHERE id IN (${rcPlaceholders})` + ).bind(...candidateIds).all() as { results: { id: string; recall_count: number; importance_score: number }[] }; + const recallCounts = new Map(rcRows.map(r => [r.id, r.recall_count ?? 0])); + const importanceScores = new Map(rcRows.map(r => [r.id, r.importance_score ?? 0])); + + const reranked = rerankWithTimeDecay(results.matches as VectorizeMatch[], recallCounts, importanceScores); + + const seen = new Set(); + const deduped = reranked.filter((m) => { + const parentId = (m.metadata as any)?.parentId ?? m.id; + if (seen.has(parentId)) return false; + // Apply tag filter against D1-resolved IDs + if (tagFilterIds && !tagFilterIds.has(parentId)) return false; + seen.add(parentId); + return true; + }).slice(0, topK); + + if (!deduped.length) return { matches: [], insight: "" }; + + // Fetch full content from D1 for all matched parent IDs, applying time filter if set + const parentIds = deduped.map((m) => (m.metadata as any)?.parentId ?? m.id); + const placeholders = parentIds.map(() => "?").join(", "); + const d1Bindings: (string | number)[] = [...parentIds]; + let d1Sql = `SELECT id, content, tags, source, created_at FROM entries WHERE id IN (${placeholders}) AND tags NOT LIKE '%"auto-pattern"%'`; + if (after !== undefined) { d1Sql += ` AND created_at >= ?`; d1Bindings.push(after); } + if (before !== undefined) { d1Sql += ` AND created_at <= ?`; d1Bindings.push(before); } + const { results: d1Rows } = await env.DB.prepare(d1Sql).bind(...d1Bindings).all() as { results: Record[] }; + + const d1Map = new Map(d1Rows.map((r) => [r.id as string, r])); + + // Increment recall_count for entries actually shown + ctx.waitUntil( + Promise.all( + [...d1Map.keys()].map(id => + env.DB.prepare(`UPDATE entries SET recall_count = recall_count + 1 WHERE id = ?`).bind(id).run() + ) + ).catch(e => console.error("recall_count update failed (non-fatal):", e)) + ); + + const matches: RecallMatch[] = deduped.map((m) => { + const meta = m.metadata as Record; + const parentId = (meta?.parentId ?? m.id) as string; + const row = d1Map.get(parentId); + const isUpdate = !!meta?.isUpdate; + + if (row) { + return { + id: parentId, + content: row.content as string, + score: m.score, + createdAt: row.created_at as number, + tags: JSON.parse(row.tags ?? "[]"), + source: row.source as string, + isUpdate, + }; + } + + // Fallback to metadata if D1 row not found (shouldn't happen) + return { + id: parentId, + content: (meta?.content as string) ?? "", + score: m.score, + createdAt: (meta?.created_at as number) ?? now, + tags: Array.isArray(meta?.tags) ? (meta.tags as string[]) : [], + source: (meta?.source as string) ?? "", + isUpdate, + }; + }); + + const insight = d1Rows.length > 1 + ? await synthesizeInsight(embedQuery, d1Rows as { id: string; content: string }[], env) + : ""; + + if (d1Rows.length >= 5) { + ctx.waitUntil( + derivePattern(d1Rows as { id: string; content: string }[], env, ctx) + .catch(e => console.error("derivePattern failed (non-fatal):", e)) + ); + } + + return { matches, insight }; +} + // ─── Shared write path ──────────────────────────────────────────────────────── export type CaptureResult = @@ -940,6 +1124,37 @@ export async function captureEntry( return { status: "stored", id }; } +// ─── Shared delete path ─────────────────────────────────────────────────────── +// Used by both the `forget` MCP tool and POST /forget so the cleanup logic +// (D1 row + tracked Vectorize IDs) lives in exactly one place. + +export type ForgetResult = + | { status: "not_found" } + | { status: "deleted"; vectorCount: number }; + +export async function forgetEntry(id: string, env: Env): Promise { + const row = await env.DB.prepare( + `SELECT vector_ids FROM entries WHERE id = ?` + ).bind(id).first() as Record | null; + + if (!row) return { status: "not_found" }; + + const vectorIds: string[] = JSON.parse(row.vector_ids ?? "[]"); + + await env.DB.prepare(`DELETE FROM entries WHERE id = ?`).bind(id).run(); + + try { + if (vectorIds.length) { + // Delete exact IDs — no guessing, no leaks + await env.VECTORIZE.deleteByIds(vectorIds); + } + } catch (e) { + console.error("Vectorize delete failed (non-fatal):", e); + } + + return { status: "deleted", vectorCount: vectorIds.length }; +} + // ─── MCP Server ─────────────────────────────────────────────────────────────── function buildMcpServer(env: Env, ctx: ExecutionContext): McpServer { @@ -1096,126 +1311,21 @@ function buildMcpServer(env: Env, ctx: ExecutionContext): McpServer { }, }, async ({ query, topK, tag, after, before }) => { - const now = Date.now(); - let embedQuery = query; - if (after === undefined && before === undefined) { - const parsed = parseTimePhrase(query, now); - after = parsed.after; - before = parsed.before; - embedQuery = parsed.cleanQuery; - } + const { matches, insight } = await recallEntries({ query, topK, tag, after, before }, env, ctx); - const values = await embed(embedQuery, env); - - // If tag filter, resolve matching IDs from D1 first (D1 is source of truth for tags) - let tagFilterIds: Set | null = null; - if (tag) { - const { results: tagRows } = await env.DB.prepare( - `SELECT id FROM entries WHERE tags LIKE ?` - ).bind(`%"${tag}"%`).all(); - tagFilterIds = new Set((tagRows as any[]).map(r => r.id as string)); - if (tagFilterIds.size === 0) { - return { content: [{ type: "text", text: "Nothing found matching that query." }] }; - } - } - - // Query Vectorize without filter — tag filtering happens in-memory below - // Cloudflare Vectorize caps topK at 50 when returnMetadata="all" (error 40025) - const vectorizeTopK = Math.min(topK * VECTORIZE_TOP_K_MULTIPLIER, 50); - let results = await env.VECTORIZE.query(values, { - topK: vectorizeTopK, - returnMetadata: "all", - }); - - if (results.matches.length && results.matches[0].score < DUPLICATE_FLAG_THRESHOLD) { - results = await env.VECTORIZE.query(values, { - topK: 50, - returnMetadata: "all", - }); - } - - if (!results.matches.length) { - return { content: [{ type: "text", text: "Nothing found matching that query." }] }; - } - - // Fetch recall_count and importance_score for all candidates to use in scoring - const candidateIds = [...new Set(results.matches.map(m => (m.metadata as any)?.parentId ?? m.id))] as string[]; - const rcPlaceholders = candidateIds.map(() => "?").join(", "); - const { results: rcRows } = await env.DB.prepare( - `SELECT id, recall_count, importance_score FROM entries WHERE id IN (${rcPlaceholders})` - ).bind(...candidateIds).all() as { results: { id: string; recall_count: number; importance_score: number }[] }; - const recallCounts = new Map(rcRows.map(r => [r.id, r.recall_count ?? 0])); - const importanceScores = new Map(rcRows.map(r => [r.id, r.importance_score ?? 0])); - - const reranked = rerankWithTimeDecay(results.matches as VectorizeMatch[], recallCounts, importanceScores); - - const seen = new Set(); - const deduped = reranked.filter((m) => { - const parentId = (m.metadata as any)?.parentId ?? m.id; - if (seen.has(parentId)) return false; - // Apply tag filter against D1-resolved IDs - if (tagFilterIds && !tagFilterIds.has(parentId)) return false; - seen.add(parentId); - return true; - }).slice(0, topK); - - if (!deduped.length) { + if (!matches.length) { return { content: [{ type: "text", text: "Nothing found matching that query." }] }; } - // Fetch full content from D1 for all matched parent IDs, applying time filter if set - const parentIds = deduped.map((m) => (m.metadata as any)?.parentId ?? m.id); - const placeholders = parentIds.map(() => "?").join(", "); - const d1Bindings: (string | number)[] = [...parentIds]; - let d1Sql = `SELECT id, content, tags, source, created_at FROM entries WHERE id IN (${placeholders}) AND tags NOT LIKE '%"auto-pattern"%'`; - if (after !== undefined) { d1Sql += ` AND created_at >= ?`; d1Bindings.push(after); } - if (before !== undefined) { d1Sql += ` AND created_at <= ?`; d1Bindings.push(before); } - const { results: d1Rows } = await env.DB.prepare(d1Sql).bind(...d1Bindings).all() as { results: Record[] }; - - const d1Map = new Map(d1Rows.map((r) => [r.id as string, r])); - - // Increment recall_count for entries actually shown - ctx.waitUntil( - Promise.all( - [...d1Map.keys()].map(id => - env.DB.prepare(`UPDATE entries SET recall_count = recall_count + 1 WHERE id = ?`).bind(id).run() - ) - ).catch(e => console.error("recall_count update failed (non-fatal):", e)) - ); - - const text = deduped.map((m, i) => { - const meta = m.metadata as Record; - const parentId = (meta?.parentId ?? m.id) as string; - const row = d1Map.get(parentId); + const text = matches.map((m, i) => { + const date = new Date(m.createdAt).toLocaleDateString(); + const tagList = m.tags.length ? ` [${m.tags.join(", ")}]` : ""; + const src = m.source ? ` · ${m.source}` : ""; const score = (m.score * 100).toFixed(0); - const updateLabel = meta?.isUpdate ? " [updated]" : ""; - - if (row) { - const date = new Date(row.created_at as number).toLocaleDateString(); - const tags: string[] = JSON.parse(row.tags ?? "[]"); - const tagList = tags.length ? ` [${tags.join(", ")}]` : ""; - const src = row.source ? ` · ${row.source}` : ""; - return `${i + 1}. [${date}${src}${tagList}] (${score}% match)${updateLabel}\n${row.content}`; - } - - // Fallback to metadata if D1 row not found (shouldn't happen) - const date = meta?.created_at ? new Date(meta.created_at as number).toLocaleDateString() : "?"; - const tagList = Array.isArray(meta?.tags) && meta.tags.length ? ` [${(meta.tags as string[]).join(", ")}]` : ""; - const src = meta?.source ? ` · ${meta.source}` : ""; - return `${i + 1}. [${date}${src}${tagList}] (${score}% match)${updateLabel}\n${meta?.content ?? ""}`; + const updateLabel = m.isUpdate ? " [updated]" : ""; + return `${i + 1}. [${date}${src}${tagList}] (${score}% match)${updateLabel}\n${m.content}`; }).join("\n\n"); - const insight = d1Rows.length > 1 - ? await synthesizeInsight(embedQuery, d1Rows as { id: string; content: string }[], env) - : ""; - - if (d1Rows.length >= 5) { - ctx.waitUntil( - derivePattern(d1Rows as { id: string; content: string }[], env, ctx) - .catch(e => console.error("derivePattern failed (non-fatal):", e)) - ); - } - const finalText = insight ? `**Insight:** ${insight}\n\n---\n\n${text}` : text; return { content: [{ type: "text", text: finalText }] }; } @@ -1234,16 +1344,8 @@ function buildMcpServer(env: Env, ctx: ExecutionContext): McpServer { }, }, async ({ n, tag, after, before }) => { - const conds: string[] = []; - const p: (string | number)[] = []; - if (tag) { conds.push(`tags LIKE ?`); p.push(`%"${tag}"%`); } - if (after !== undefined) { conds.push(`created_at >= ?`); p.push(after); } - if (before !== undefined) { conds.push(`created_at <= ?`); p.push(before); } - let q = `SELECT id, content, tags, source, created_at FROM entries`; - if (conds.length) q += ` WHERE ` + conds.join(` AND `); - q += ` ORDER BY created_at DESC LIMIT ?`; p.push(n); - - const { results } = await env.DB.prepare(q).bind(...p).all(); + const { sql, bindings } = buildEntryFilterQuery({ n, tag, after, before }); + const { results } = await env.DB.prepare(sql).bind(...bindings).all(); if (!results.length) { return { content: [{ type: "text", text: "No entries found." }] }; @@ -1270,25 +1372,11 @@ function buildMcpServer(env: Env, ctx: ExecutionContext): McpServer { }, }, async ({ id }) => { - // Fetch tracked vector IDs before deleting the D1 row - const row = await env.DB.prepare( - `SELECT vector_ids FROM entries WHERE id = ?` - ).bind(id).first() as Record | null; - - const vectorIds: string[] = JSON.parse(row?.vector_ids ?? "[]"); - - await env.DB.prepare(`DELETE FROM entries WHERE id = ?`).bind(id).run(); - - try { - if (vectorIds.length) { - // Delete exact IDs — no guessing, no leaks - await env.VECTORIZE.deleteByIds(vectorIds); - } - } catch (e) { - console.error("Vectorize delete failed (non-fatal):", e); + const result = await forgetEntry(id, env); + if (result.status === "not_found") { + return { content: [{ type: "text", text: `No entry found with ID: ${id}` }] }; } - - return { content: [{ type: "text", text: `Deleted entry ${id} and ${vectorIds.length} vector(s)` }] }; + return { content: [{ type: "text", text: `Deleted entry ${id} and ${result.vectorCount} vector(s)` }] }; } ); @@ -1358,11 +1446,12 @@ const defaultHandler = { // POST /capture if (url.pathname === "/capture" && request.method === "POST") { - if (!isAuthorized(request, env)) return json({ error: "Unauthorized" }, 401); + const authErr = requireAuth(request, env); + if (authErr) return authErr; let body: { content?: string; tags?: string[]; source?: string }; - try { body = await request.json(); } catch { return json({ error: "Invalid JSON" }, 400); } - if (!body.content?.trim()) return json({ error: "content is required" }, 400); + try { body = await request.json(); } catch { return json({ ok: false, error: "Invalid JSON" }, 400); } + if (!body.content?.trim()) return json({ ok: false, error: "content is required" }, 400); const result = await captureEntry(body.content, body.tags ?? [], body.source ?? "api", env, ctx); @@ -1399,12 +1488,13 @@ const defaultHandler = { // POST /append if (url.pathname === "/append" && request.method === "POST") { - if (!isAuthorized(request, env)) return json({ error: "Unauthorized" }, 401); + const authErr = requireAuth(request, env); + if (authErr) return authErr; let body: { id?: string; addition?: string }; - try { body = await request.json(); } catch { return json({ error: "Invalid JSON" }, 400); } - if (!body.id?.trim()) return json({ error: "id is required" }, 400); - if (!body.addition?.trim()) return json({ error: "addition is required" }, 400); + try { body = await request.json(); } catch { return json({ ok: false, error: "Invalid JSON" }, 400); } + if (!body.id?.trim()) return json({ ok: false, error: "id is required" }, 400); + if (!body.addition?.trim()) return json({ ok: false, error: "addition is required" }, 400); const id = body.id.trim(); const addition = body.addition.trim(); @@ -1436,12 +1526,13 @@ const defaultHandler = { // POST /update if (url.pathname === "/update" && request.method === "POST") { - if (!isAuthorized(request, env)) return json({ error: "Unauthorized" }, 401); + const authErr = requireAuth(request, env); + if (authErr) return authErr; let body: { id?: string; content?: string }; - try { body = await request.json(); } catch { return json({ error: "Invalid JSON" }, 400); } - if (!body.id?.trim()) return json({ error: "id is required" }, 400); - if (!body.content?.trim()) return json({ error: "content is required" }, 400); + try { body = await request.json(); } catch { return json({ ok: false, error: "Invalid JSON" }, 400); } + if (!body.id?.trim()) return json({ ok: false, error: "id is required" }, 400); + if (!body.content?.trim()) return json({ ok: false, error: "content is required" }, 400); const id = body.id.trim(); const newContent = body.content.trim(); @@ -1481,14 +1572,16 @@ const defaultHandler = { // GET /count if (url.pathname === "/count" && request.method === "GET") { - if (!isAuthorized(request, env)) return json({ error: "Unauthorized" }, 401); + const authErr = requireAuth(request, env); + if (authErr) return authErr; const row = await env.DB.prepare(`SELECT COUNT(*) as count FROM entries`).first() as Record | null; return json({ count: (row?.count as number) ?? 0 }); } // GET /tags if (url.pathname === "/tags" && request.method === "GET") { - if (!isAuthorized(request, env)) return json({ error: "Unauthorized" }, 401); + const authErr = requireAuth(request, env); + if (authErr) return authErr; const { results } = await env.DB.prepare( `SELECT DISTINCT value FROM entries, json_each(entries.tags) ORDER BY value` ).all(); @@ -1497,7 +1590,8 @@ const defaultHandler = { // GET /stats if (url.pathname === "/stats" && request.method === "GET") { - if (!isAuthorized(request, env)) return json({ error: "Unauthorized" }, 401); + const authErr = requireAuth(request, env); + if (authErr) return authErr; const [summary, tagRows, candidateRows] = await Promise.all([ env.DB.prepare(`SELECT COUNT(*) as count, AVG(importance_score) as avg_importance FROM entries`).first() as Promise | null>, env.DB.prepare(`SELECT value, COUNT(*) as n FROM entries, json_each(entries.tags) GROUP BY value ORDER BY n DESC LIMIT 5`).all(), @@ -1535,21 +1629,79 @@ const defaultHandler = { // GET /list if (url.pathname === "/list" && request.method === "GET") { - if (!isAuthorized(request, env)) return json({ error: "Unauthorized" }, 401); + const authErr = requireAuth(request, env); + if (authErr) return authErr; const n = Math.min(parseInt(url.searchParams.get("n") ?? "20", 10), 100); - const { results } = await env.DB.prepare( - `SELECT id, content, tags, source, created_at FROM entries ORDER BY created_at DESC LIMIT ?` - ).bind(n).all(); + const tag = url.searchParams.get("tag")?.trim() || undefined; + const after = url.searchParams.has("after") ? parseInt(url.searchParams.get("after")!, 10) : undefined; + const before = url.searchParams.has("before") ? parseInt(url.searchParams.get("before")!, 10) : undefined; + + const { sql, bindings } = buildEntryFilterQuery({ n, tag, after, before }); + const { results } = await env.DB.prepare(sql).bind(...bindings).all(); return json(results); } + // GET /recall — semantic search, mirrors the MCP `recall` tool + if (url.pathname === "/recall" && request.method === "GET") { + const authErr = requireAuth(request, env); + if (authErr) return authErr; + + const query = url.searchParams.get("query")?.trim(); + if (!query) return json({ ok: false, error: "query is required" }, 400); + + const topK = Math.min(Math.max(parseInt(url.searchParams.get("topK") ?? "5", 10), 1), 20); + const tag = url.searchParams.get("tag")?.trim() || undefined; + const after = url.searchParams.has("after") ? parseInt(url.searchParams.get("after")!, 10) : undefined; + const before = url.searchParams.has("before") ? parseInt(url.searchParams.get("before")!, 10) : undefined; + + const { matches, insight } = await recallEntries({ query, topK, tag, after, before }, env, ctx); + + if (!matches.length) { + return json({ ok: true, results: [], message: "Nothing found matching that query." }); + } + + return json({ + ok: true, + results: matches.map(m => ({ + id: m.id, + content: m.content, + score: parseFloat((m.score * 100).toFixed(1)), + tags: m.tags, + source: m.source, + created_at: m.createdAt, + updated: m.isUpdate, + })), + insight: insight || null, + }); + } + + // POST /forget — delete-by-id, mirrors the MCP `forget` tool + if (url.pathname === "/forget" && request.method === "POST") { + const authErr = requireAuth(request, env); + if (authErr) return authErr; + + let body: { id?: string }; + try { body = await request.json(); } catch { return json({ ok: false, error: "Invalid JSON" }, 400); } + if (!body.id?.trim()) return json({ ok: false, error: "id is required" }, 400); + + const id = body.id.trim(); + const result = await forgetEntry(id, env); + + if (result.status === "not_found") { + return json({ ok: false, error: `No entry found with ID: ${id}` }, 404); + } + + return json({ ok: true, id, deletedVectors: result.vectorCount }); + } + // POST /chat if (url.pathname === "/chat" && request.method === "POST") { - if (!isAuthorized(request, env)) return json({ error: "Unauthorized" }, 401); + const authErr = requireAuth(request, env); + if (authErr) return authErr; let body: { query?: string; memories?: string }; - try { body = await request.json(); } catch { return json({ error: "Invalid JSON" }, 400); } - if (!body.query?.trim()) return json({ error: "query is required" }, 400); + try { body = await request.json(); } catch { return json({ ok: false, error: "Invalid JSON" }, 400); } + if (!body.query?.trim()) return json({ ok: false, error: "query is required" }, 400); const systemPrompt = `You are a personal memory assistant. Answer the user's question using ONLY the memories provided. Even if the match scores are low, extract any relevant facts and answer directly. Never say you don't have enough information if the answer exists anywhere in the memories. Be concise.`; @@ -1571,9 +1723,10 @@ const defaultHandler = { // GET /digest if (url.pathname === "/digest" && request.method === "GET") { - if (!isAuthorized(request, env)) return json({ error: "Unauthorized" }, 401); + const authErr = requireAuth(request, env); + if (authErr) return authErr; const tag = url.searchParams.get("tag")?.trim(); - if (!tag) return json({ error: "tag parameter is required" }, 400); + if (!tag) return json({ ok: false, error: "tag parameter is required" }, 400); const result = await compressTag(tag, env, ctx); diff --git a/test/helpers/d1-mock.ts b/test/helpers/d1-mock.ts index 995d0e4..5e91b19 100644 --- a/test/helpers/d1-mock.ts +++ b/test/helpers/d1-mock.ts @@ -116,10 +116,39 @@ export class D1Mock { return null; }, async all() { - if (s.includes("recall_count FROM entries")) { + if (s === "SELECT id FROM entries WHERE tags LIKE ?") { + const pattern = String(args[0]); + const tag = pattern.replace(/%"/g, "").replace(/"%/g, ""); + const results = db.entries + .filter((e: any) => (JSON.parse(e.tags ?? "[]") as string[]).includes(tag)) + .map((e: any) => ({ id: e.id })); + return { results }; + } + if (s.includes("SELECT id, recall_count, importance_score FROM entries")) { const results = db.entries .filter((e: any) => args.includes(e.id)) - .map((e: any) => ({ id: e.id, recall_count: e.recall_count ?? 0 })); + .map((e: any) => ({ id: e.id, recall_count: e.recall_count ?? 0, importance_score: e.importance_score ?? 0 })); + return { results }; + } + if (s.includes("FROM entries WHERE id IN") && s.includes("tags NOT LIKE")) { + // recallEntries D1 hydration — filter by IDs, exclude auto-pattern entries, apply after/before + const inMatch = s.match(/WHERE id IN \(([^)]*)\)/); + const idCount = inMatch ? inMatch[1].split(",").length : 0; + const ids = args.slice(0, idCount); + const rest = args.slice(idCount); + let argIdx = 0; + let rows = db.entries.filter((e: any) => + ids.includes(e.id) && !(JSON.parse(e.tags ?? "[]") as string[]).includes("auto-pattern") + ); + if (s.includes("created_at >= ?")) { + const after = Number(rest[argIdx++]); + rows = rows.filter((e: any) => e.created_at >= after); + } + if (s.includes("created_at <= ?")) { + const before = Number(rest[argIdx++]); + rows = rows.filter((e: any) => e.created_at <= before); + } + const results = rows.map((e: any) => ({ id: e.id, content: e.content, tags: e.tags, source: e.source, created_at: e.created_at })); return { results }; } if (s.includes("SELECT id, content FROM entries") && s.includes("WHERE tags LIKE") && s.includes("ORDER BY created_at DESC")) { @@ -167,7 +196,23 @@ export class D1Mock { } if (s.includes("ORDER BY created_at DESC LIMIT")) { const limit = Number(args[args.length - 1]); - const rows = [...db.entries].sort((a: any, b: any) => b.created_at - a.created_at); + const filterArgs = args.slice(0, -1); + let argIdx = 0; + let rows = [...db.entries]; + if (s.includes("tags LIKE ?")) { + const pattern = String(filterArgs[argIdx++]); + const tag = pattern.replace(/%"/g, "").replace(/"%/g, ""); + rows = rows.filter((e: any) => (JSON.parse(e.tags ?? "[]") as string[]).includes(tag)); + } + if (s.includes("created_at >= ?")) { + const after = Number(filterArgs[argIdx++]); + rows = rows.filter((e: any) => e.created_at >= after); + } + if (s.includes("created_at <= ?")) { + const before = Number(filterArgs[argIdx++]); + rows = rows.filter((e: any) => e.created_at <= before); + } + rows.sort((a: any, b: any) => b.created_at - a.created_at); return { results: rows.slice(0, limit) }; } return { results: [] }; diff --git a/test/integration/auth.test.ts b/test/integration/auth.test.ts index ff19cab..f250956 100644 --- a/test/integration/auth.test.ts +++ b/test/integration/auth.test.ts @@ -11,6 +11,8 @@ const PROTECTED_ROUTES: Array<[string, string, unknown?]> = [ ["POST", "/append", { id: "abc", addition: "update" }], ["GET", "/list", undefined], ["GET", "/tags", undefined], + ["GET", "/recall?query=test", undefined], + ["POST", "/forget", { id: "abc" }], ["POST", "/chat", { query: "what?" }], ["POST", "/mcp", undefined], ]; diff --git a/test/integration/forget.test.ts b/test/integration/forget.test.ts new file mode 100644 index 0000000..be29cfb --- /dev/null +++ b/test/integration/forget.test.ts @@ -0,0 +1,111 @@ +import { describe, it, expect, beforeEach, vi } from "vitest"; +import worker from "../../src/index"; +import { makeTestEnv, makeTestDb, makeVectorizeMock } from "../helpers/make-env"; +import { req } from "../helpers/make-request"; +import type { Env } from "../../src/index"; +import { D1Mock } from "../helpers/d1-mock"; + +const ctx = { waitUntil: (_: Promise) => {} } as any; + +describe("POST /forget", () => { + let env: Env; + let db: D1Mock; + + beforeEach(() => { + db = makeTestDb(); + env = makeTestEnv(db); + }); + + it("returns 400 when body is invalid JSON", async () => { + const res = await worker.fetch( + new Request("http://localhost/forget", { + method: "POST", + headers: { "Content-Type": "application/json", Authorization: "Bearer test-token" }, + body: "{not json", + }), + env, + ctx + ); + expect(res.status).toBe(400); + const data = await res.json() as any; + expect(data.ok).toBe(false); + }); + + it("returns 400 when id is missing", async () => { + const res = await worker.fetch(req("POST", "/forget", { body: {} }), env, ctx); + expect(res.status).toBe(400); + const data = await res.json() as any; + expect(data.ok).toBe(false); + expect(data.error).toBe("id is required"); + }); + + it("returns 404 for non-existent id", async () => { + const res = await worker.fetch(req("POST", "/forget", { body: { id: "no-such-id" } }), env, ctx); + expect(res.status).toBe(404); + const data = await res.json() as any; + expect(data.ok).toBe(false); + }); + + it("deletes an existing entry and its vectors", async () => { + const deleteByIdsMock = vi.fn().mockResolvedValue({ mutationId: "m" }); + env = makeTestEnv(db, { + VECTORIZE: makeVectorizeMock({ deleteByIds: deleteByIdsMock }), + }); + db.entries.push({ + id: "entry-1", + content: "Some content", + tags: "[]", + source: "api", + created_at: Date.now(), + vector_ids: '["entry-1","entry-1-update-111"]', + }); + + const res = await worker.fetch(req("POST", "/forget", { body: { id: "entry-1" } }), env, ctx); + expect(res.status).toBe(200); + const data = await res.json() as any; + expect(data.ok).toBe(true); + expect(data.id).toBe("entry-1"); + expect(data.deletedVectors).toBe(2); + + expect(db.entries.find((e: any) => e.id === "entry-1")).toBeUndefined(); + expect(deleteByIdsMock).toHaveBeenCalledWith(["entry-1", "entry-1-update-111"]); + }); + + it("trims whitespace from id before lookup", async () => { + db.entries.push({ + id: "entry-1", + content: "Some content", + tags: "[]", + source: "api", + created_at: Date.now(), + vector_ids: "[]", + }); + + const res = await worker.fetch(req("POST", "/forget", { body: { id: " entry-1 " } }), env, ctx); + expect(res.status).toBe(200); + const data = await res.json() as any; + expect(data.id).toBe("entry-1"); + }); + + it("is non-fatal when Vectorize delete fails", async () => { + env = makeTestEnv(db, { + VECTORIZE: makeVectorizeMock({ + deleteByIds: vi.fn().mockRejectedValue(new Error("Vectorize down")), + }), + }); + db.entries.push({ + id: "entry-1", + content: "Some content", + tags: "[]", + source: "api", + created_at: Date.now(), + vector_ids: '["entry-1"]', + }); + + const res = await worker.fetch(req("POST", "/forget", { body: { id: "entry-1" } }), env, ctx); + expect(res.status).toBe(200); + const data = await res.json() as any; + expect(data.ok).toBe(true); + expect(db.entries.find((e: any) => e.id === "entry-1")).toBeUndefined(); + }); +}); diff --git a/test/integration/list.test.ts b/test/integration/list.test.ts index 5cba5f8..a02e13a 100644 --- a/test/integration/list.test.ts +++ b/test/integration/list.test.ts @@ -64,4 +64,60 @@ describe("GET /list", () => { const data = await res.json(); expect(Array.isArray(data)).toBe(true); }); + + // ── Filter parity with list_recent (?tag, ?after, ?before) ────────────────── + + it("filters by ?tag=", async () => { + db.entries.push( + { id: "work-1", content: "Work note", tags: '["work"]', source: "api", created_at: 1000, vector_ids: "[]" }, + { id: "idea-1", content: "Idea note", tags: '["idea"]', source: "api", created_at: 2000, vector_ids: "[]" }, + ); + + const res = await worker.fetch(req("GET", "/list?tag=work"), env, ctx); + expect(res.status).toBe(200); + const data = await res.json() as any[]; + expect(data).toHaveLength(1); + expect(data[0].id).toBe("work-1"); + }); + + it("filters by ?after=", async () => { + db.entries.push( + { id: "old", content: "Old", tags: "[]", source: "api", created_at: 1000, vector_ids: "[]" }, + { id: "new", content: "New", tags: "[]", source: "api", created_at: 2000, vector_ids: "[]" }, + ); + + const res = await worker.fetch(req("GET", "/list?after=1500"), env, ctx); + expect(res.status).toBe(200); + const data = await res.json() as any[]; + expect(data).toHaveLength(1); + expect(data[0].id).toBe("new"); + }); + + it("filters by ?before=", async () => { + db.entries.push( + { id: "old", content: "Old", tags: "[]", source: "api", created_at: 1000, vector_ids: "[]" }, + { id: "new", content: "New", tags: "[]", source: "api", created_at: 2000, vector_ids: "[]" }, + ); + + const res = await worker.fetch(req("GET", "/list?before=1500"), env, ctx); + expect(res.status).toBe(200); + const data = await res.json() as any[]; + expect(data).toHaveLength(1); + expect(data[0].id).toBe("old"); + }); + + it("combines ?tag=, ?after= and ?before=", async () => { + db.entries.push( + { id: "work-old", content: "Work old", tags: '["work"]', source: "api", created_at: 1000, vector_ids: "[]" }, + { id: "work-mid", content: "Work mid", tags: '["work"]', source: "api", created_at: 2000, vector_ids: "[]" }, + { id: "work-new", content: "Work new", tags: '["work"]', source: "api", created_at: 3000, vector_ids: "[]" }, + { id: "idea-mid", content: "Idea mid", tags: '["idea"]', source: "api", created_at: 2000, vector_ids: "[]" }, + ); + + const res = await worker.fetch(req("GET", "/list?tag=work&after=1500&before=2500"), env, ctx); + expect(res.status).toBe(200); + const data = await res.json() as any[]; + expect(data).toHaveLength(1); + expect(data[0].id).toBe("work-mid"); + }); }); diff --git a/test/integration/recall.test.ts b/test/integration/recall.test.ts new file mode 100644 index 0000000..22da349 --- /dev/null +++ b/test/integration/recall.test.ts @@ -0,0 +1,129 @@ +import { describe, it, expect, beforeEach, vi } from "vitest"; +import worker from "../../src/index"; +import { makeTestEnv, makeTestDb, makeVectorizeMock } from "../helpers/make-env"; +import { req } from "../helpers/make-request"; +import type { Env } from "../../src/index"; +import { D1Mock } from "../helpers/d1-mock"; + +const ctx = { waitUntil: (_: Promise) => {} } as any; + +function makeMatch(id: string, score: number, overrides: Record = {}) { + return { + id, + score, + metadata: { parentId: id, isUpdate: false, ...overrides }, + }; +} + +describe("GET /recall", () => { + let env: Env; + let db: D1Mock; + + beforeEach(() => { + db = makeTestDb(); + env = makeTestEnv(db); + }); + + it("returns 400 when query is missing", async () => { + const res = await worker.fetch(req("GET", "/recall"), env, ctx); + expect(res.status).toBe(400); + const data = await res.json() as any; + expect(data.ok).toBe(false); + expect(data.error).toBe("query is required"); + }); + + it("returns an empty result set with a message when nothing matches", async () => { + env = makeTestEnv(db, { + VECTORIZE: makeVectorizeMock({ query: vi.fn().mockResolvedValue({ matches: [] }) }), + }); + + const res = await worker.fetch(req("GET", "/recall?query=anything"), env, ctx); + expect(res.status).toBe(200); + const data = await res.json() as any; + expect(data.ok).toBe(true); + expect(data.results).toEqual([]); + expect(data.message).toBe("Nothing found matching that query."); + }); + + it("returns ranked matches hydrated from D1", async () => { + db.entries.push( + { id: "entry-1", content: "First memory", tags: '["work"]', source: "api", created_at: 1000, vector_ids: "[]", recall_count: 0, importance_score: 0 }, + { id: "entry-2", content: "Second memory", tags: '["idea"]', source: "api", created_at: 2000, vector_ids: "[]", recall_count: 0, importance_score: 0 }, + ); + env = makeTestEnv(db, { + VECTORIZE: makeVectorizeMock({ + query: vi.fn().mockResolvedValue({ + matches: [makeMatch("entry-1", 0.9), makeMatch("entry-2", 0.8)], + }), + }), + }); + + const res = await worker.fetch(req("GET", "/recall?query=memory"), env, ctx); + expect(res.status).toBe(200); + const data = await res.json() as any; + expect(data.ok).toBe(true); + expect(data.results).toHaveLength(2); + expect(data.results[0]).toMatchObject({ id: "entry-1", content: "First memory", tags: ["work"], source: "api" }); + expect(data.results[0].score).toBeCloseTo(90, 0); + expect(data.results[1]).toMatchObject({ id: "entry-2", content: "Second memory" }); + expect(typeof data.insight === "string" || data.insight === null).toBe(true); + }); + + it("dedupes matches that share the same parentId", async () => { + db.entries.push( + { id: "entry-1", content: "Chunked memory", tags: "[]", source: "api", created_at: 1000, vector_ids: "[]", recall_count: 0, importance_score: 0 }, + ); + env = makeTestEnv(db, { + VECTORIZE: makeVectorizeMock({ + query: vi.fn().mockResolvedValue({ + matches: [makeMatch("entry-1", 0.9), makeMatch("entry-1-update-1", 0.85, { parentId: "entry-1", isUpdate: true })], + }), + }), + }); + + const res = await worker.fetch(req("GET", "/recall?query=memory"), env, ctx); + const data = await res.json() as any; + expect(data.results).toHaveLength(1); + expect(data.results[0].id).toBe("entry-1"); + }); + + it("filters out matches whose parent entry doesn't carry the requested tag", async () => { + db.entries.push( + { id: "entry-1", content: "Work memory", tags: '["work"]', source: "api", created_at: 1000, vector_ids: "[]", recall_count: 0, importance_score: 0 }, + { id: "entry-2", content: "Idea memory", tags: '["idea"]', source: "api", created_at: 2000, vector_ids: "[]", recall_count: 0, importance_score: 0 }, + ); + env = makeTestEnv(db, { + VECTORIZE: makeVectorizeMock({ + query: vi.fn().mockResolvedValue({ + matches: [makeMatch("entry-1", 0.9), makeMatch("entry-2", 0.85)], + }), + }), + }); + + const res = await worker.fetch(req("GET", "/recall?query=memory&tag=work"), env, ctx); + const data = await res.json() as any; + expect(data.results).toHaveLength(1); + expect(data.results[0].id).toBe("entry-1"); + }); + + it("returns empty results immediately when the tag has no matching entries", async () => { + const queryMock = vi.fn().mockResolvedValue({ matches: [makeMatch("entry-1", 0.9)] }); + env = makeTestEnv(db, { VECTORIZE: makeVectorizeMock({ query: queryMock }) }); + + const res = await worker.fetch(req("GET", "/recall?query=memory&tag=nonexistent"), env, ctx); + const data = await res.json() as any; + expect(data.ok).toBe(true); + expect(data.results).toEqual([]); + // Short-circuits before hitting Vectorize since the tag resolves to no IDs in D1 + expect(queryMock).not.toHaveBeenCalled(); + }); + + it("clamps ?topK= to the 1-20 range", async () => { + const queryMock = vi.fn().mockResolvedValue({ matches: [] }); + env = makeTestEnv(db, { VECTORIZE: makeVectorizeMock({ query: queryMock }) }); + + await worker.fetch(req("GET", "/recall?query=memory&topK=999"), env, ctx); + const [, opts] = queryMock.mock.calls[0]; + expect(opts.topK).toBeLessThanOrEqual(50); + }); +});