diff --git a/.gitignore b/.gitignore index 01443aa..0d1373b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ assets/*.gif node_modules/ coverage/ -.wrangler/ \ No newline at end of file +.wrangler/ +docs/ \ No newline at end of file diff --git a/public/index.html b/public/index.html index 790597d..59cf32a 100644 --- a/public/index.html +++ b/public/index.html @@ -978,6 +978,29 @@ font-weight: 500; } + .vec-chip { + display: inline-flex; + align-items: center; + gap: 4px; + font-size: 11px; + padding: 3px 8px; + border-radius: var(--radius-tag); + white-space: nowrap; + } + .vec-chip--on { + background: var(--accent-soft); + color: var(--accent-ink); + } + .vec-chip--pending { + background: var(--surface-2); + color: var(--text-tag); + } + .vec-chip--off { + background: color-mix(in srgb, var(--danger) 15%, transparent); + color: var(--danger); + font-weight: 500; + } + .match-line { display: flex; align-items: center; @@ -2468,6 +2491,7 @@

Second Brain

+
Appearance
@@ -2503,6 +2527,7 @@

Second Brain

selectedTag = '', selectedTimeRange = '' let currentCount = 0 + let vectorizeGraceMs = 300000 function init() { applyTheme() @@ -2888,19 +2913,33 @@

Second Brain

} catch {} const isSynthesized = tags.includes('synthesized') const isRolledUp = tags.includes('rolled-up') + + let vectorIds = [] + try { vectorIds = JSON.parse(entry.vector_ids || '[]') } catch {} + const vectorized = vectorIds.length > 0 + // Pending state is computed at render time; won't auto-flip — reload required + const pending = !vectorized && (Date.now() - (entry.created_at || 0) < vectorizeGraceMs) + const vec = vectorized ? 'on' : (pending ? 'pending' : 'off') + + const vecChip = vec === 'on' + ? `` + : vec === 'pending' + ? `` + : `Not indexed` + const card = document.createElement('div') card.className = 'memory-card' + (isSynthesized ? ' card--synthesized' : '') + (isRolledUp ? ' card--rolled-up' : '') card.dataset.id = entry.id card.innerHTML = ` -
${escHtml(entry.content)}
- ` +
${escHtml(entry.content)}
+` card.querySelector('.card-content').onclick = () => openView({ id: entry.id, content: entry.content, tags }, card) card.querySelector('.edit-btn').onclick = () => openEdit(entry.id, entry.content, tags) return card @@ -3090,7 +3129,9 @@

Second Brain

tagsEl.innerHTML = data.top_tags?.length ? data.top_tags.map((t) => `${escHtml(t)}`).join('') : 'No tags yet' + vectorizeGraceMs = data.vectorize_grace_ms ?? vectorizeGraceMs renderDigestSection(data.digest_candidates ?? []) + renderVectorizeSection(data.unvectorized ?? 0) } catch {} } @@ -3152,6 +3193,52 @@

Second Brain

} } + function renderVectorizeSection(count) { + const el = document.getElementById('vectorize-section') + if (!count) { el.style.display = 'none'; return } + el.style.display = '' + el.innerHTML = ` + +

${count} ${count === 1 ? 'memory' : 'memories'} failed to embed and won't appear in recall.

+ + ` + } + + async function runVectorize(btn) { + btn.disabled = true + btn.classList.add('digest-btn--loading') + btn.innerHTML = ' Working…' + try { + let remaining = 1 + let totalProcessed = 0 + while (remaining > 0) { + const res = await fetch(`${WORKER_URL}/vectorize-pending`, { + method: 'POST', + headers: { Authorization: `Bearer ${AUTH_TOKEN}` } + }) + if (!res.ok) throw new Error(`Server error: ${res.status}`) + const data = await res.json() + remaining = data.remaining ?? 0 + totalProcessed += data.processed ?? 0 + if ((data.processed ?? 0) === 0 && remaining > 0) break + } + btn.classList.remove('digest-btn--loading') + btn.innerHTML = ` Done — ${totalProcessed} re-indexed` + btn.style.color = 'var(--good)' + await loadMenuStats() + loadRecent() + } catch { + btn.classList.remove('digest-btn--loading') + btn.innerHTML = ' Request failed' + btn.style.color = 'var(--danger)' + setTimeout(() => { + btn.disabled = false + btn.innerHTML = 'Vectorize now →' + btn.style.color = '' + }, 3000) + } + } + async function exportMemories(format) { closeMenu() try { diff --git a/src/index.ts b/src/index.ts index 1d9c4a2..e0a2ad3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -14,6 +14,7 @@ export interface Env { AI: Ai; AUTH_TOKEN: string; OAUTH_KV: KVNamespace; + VECTORIZE_GRACE_MS?: string; } const LLM_MODEL = "@cf/meta/llama-4-scout-17b-16e-instruct"; @@ -26,6 +27,10 @@ const CORS_HEADERS = { "Access-Control-Allow-Headers": "Content-Type, Authorization, Accept", }; +function graceMs(env: Env): number { + return parseInt(env.VECTORIZE_GRACE_MS ?? "300000", 10) || 300000; +} + // ─── Thresholds ─────────────────────────────────────────────────────────────── const DUPLICATE_BLOCK_THRESHOLD = 0.95; @@ -512,7 +517,7 @@ export function buildEntryFilterQuery(params: { if (params.after !== undefined) { conds.push(`created_at >= ?`); bindings.push(params.after); } if (params.before !== undefined) { conds.push(`created_at <= ?`); bindings.push(params.before); } - let sql = `SELECT id, content, tags, source, created_at FROM entries`; + let sql = `SELECT id, content, tags, source, created_at, vector_ids FROM entries`; if (conds.length) sql += ` WHERE ` + conds.join(` AND `); sql += ` ORDER BY created_at DESC LIMIT ?`; bindings.push(params.n); @@ -1603,8 +1608,13 @@ const defaultHandler = { if (url.pathname === "/stats" && request.method === "GET") { const authErr = requireAuth(request, env); if (authErr) return authErr; + const graceCutoff = Date.now() - graceMs(env); const [summary, tagRows, candidateRows] = await Promise.all([ - env.DB.prepare(`SELECT COUNT(*) as count, AVG(importance_score) as avg_importance FROM entries`).first() as Promise | null>, + env.DB.prepare( + `SELECT COUNT(*) as count, AVG(importance_score) as avg_importance, + SUM(CASE WHEN vector_ids = '[]' AND created_at < ? THEN 1 ELSE 0 END) as unvectorized + FROM entries` + ).bind(graceCutoff).first() as Promise | null>, env.DB.prepare(`SELECT value, COUNT(*) as n FROM entries, json_each(entries.tags) GROUP BY value ORDER BY n DESC LIMIT 5`).all(), env.DB.prepare(` SELECT value as tag, COUNT(*) as count @@ -1635,6 +1645,8 @@ const defaultHandler = { avg_importance: summary?.avg_importance != null ? Math.round((summary.avg_importance as number) * 10) / 10 : null, top_tags: (tagRows.results as any[]).map(r => r.value as string), digest_candidates: digestCandidates, + unvectorized: (summary?.unvectorized as number) ?? 0, + vectorize_grace_ms: graceMs(env), }); } @@ -1748,6 +1760,46 @@ const defaultHandler = { return json({ tag, synthesis: result.text, entry_id: result.synthesizedId, source_count: result.entriesUsed }); } + // POST /vectorize-pending + if (url.pathname === "/vectorize-pending" && request.method === "POST") { + const authErr = requireAuth(request, env); + if (authErr) return authErr; + + const graceCutoff = Date.now() - graceMs(env); + + const { results: toProcess } = await env.DB.prepare( + `SELECT id, content, tags, source, created_at FROM entries + WHERE vector_ids = '[]' AND created_at < ? + ORDER BY created_at DESC LIMIT 25` + ).bind(graceCutoff).all(); + + let processed = 0; + let failed = 0; + + for (const row of toProcess as Record[]) { + try { + await storeEntry( + env, + row.id as string, + row.content as string, + JSON.parse(row.tags as string), + row.source as string, + row.created_at as number + ); + processed++; + } catch (e) { + console.error("Re-embed failed for entry", row.id, e); + failed++; + } + } + + const remaining = await env.DB.prepare( + `SELECT COUNT(*) as count FROM entries WHERE vector_ids = '[]' AND created_at < ?` + ).bind(graceCutoff).first() as Record | null; + + return json({ processed, failed, remaining: (remaining?.count as number) ?? 0 }); + } + return new Response("Not found", { status: 404 }); }, }; diff --git a/test/helpers/d1-mock.ts b/test/helpers/d1-mock.ts index 5e91b19..6394e13 100644 --- a/test/helpers/d1-mock.ts +++ b/test/helpers/d1-mock.ts @@ -88,7 +88,16 @@ export class D1Mock { const avg_importance = scored.length > 0 ? scored.reduce((sum: number, e: any) => sum + e.importance_score, 0) / scored.length : null; - return { count, avg_importance }; + const cutoff = args.length > 0 ? Number(args[0]) : undefined; + const unvectorized = cutoff !== undefined + ? db.entries.filter((e: any) => e.vector_ids === '[]' && e.created_at < cutoff).length + : 0; + return { count, avg_importance, unvectorized }; + } + if (s.includes("COUNT(*) as count") && s.includes("vector_ids = '[]'") && s.includes("created_at <")) { + const cutoff = Number(args[0]); + const count = db.entries.filter((e: any) => e.vector_ids === '[]' && e.created_at < cutoff).length; + return { count }; } if (s.includes("COUNT(*) as count")) { return { count: db.entries.length }; @@ -194,6 +203,17 @@ export class D1Mock { }); return { results: [...tags].sort().map(t => ({ value: t })) }; } + if (s.includes("vector_ids = '[]' AND created_at <") && s.includes("ORDER BY created_at DESC LIMIT")) { + const cutoff = Number(args[0]); + const limitMatch = s.match(/LIMIT\s+(\d+)/i); + const limit = limitMatch ? parseInt(limitMatch[1], 10) : 25; + const rows = [...db.entries] + .filter((e: any) => e.vector_ids === '[]' && e.created_at < cutoff) + .sort((a: any, b: any) => b.created_at - a.created_at) + .slice(0, limit) + .map((e: any) => ({ id: e.id, content: e.content, tags: e.tags, source: e.source, created_at: e.created_at })); + return { results: rows }; + } if (s.includes("ORDER BY created_at DESC LIMIT")) { const limit = Number(args[args.length - 1]); const filterArgs = args.slice(0, -1); diff --git a/test/integration/list.test.ts b/test/integration/list.test.ts index a02e13a..d2be13b 100644 --- a/test/integration/list.test.ts +++ b/test/integration/list.test.ts @@ -120,4 +120,22 @@ describe("GET /list", () => { expect(data).toHaveLength(1); expect(data[0].id).toBe("work-mid"); }); + + it("includes vector_ids field in each entry", async () => { + db.entries.push({ + id: "v1", content: "Vectorized note", tags: "[]", source: "api", + created_at: 1000, vector_ids: '["v1"]', + }); + db.entries.push({ + id: "v2", content: "Unvectorized note", tags: "[]", source: "api", + created_at: 2000, vector_ids: "[]", + }); + + const res = await worker.fetch(req("GET", "/list"), env, ctx); + const data = await res.json() as any[]; + const v1 = data.find((e: any) => e.id === "v1"); + const v2 = data.find((e: any) => e.id === "v2"); + expect(v1.vector_ids).toBe('["v1"]'); + expect(v2.vector_ids).toBe("[]"); + }); }); diff --git a/test/integration/stats.test.ts b/test/integration/stats.test.ts index d6fda6e..9e50a53 100644 --- a/test/integration/stats.test.ts +++ b/test/integration/stats.test.ts @@ -68,3 +68,64 @@ describe("GET /stats", () => { expect(data.top_tags.length).toBeLessThanOrEqual(5); }); }); + +describe("GET /stats — vectorization fields", () => { + let env: Env; + let db: D1Mock; + + beforeEach(() => { + db = makeTestDb(); + env = makeTestEnv(db); + }); + + it("returns unvectorized: 0 when all entries are vectorized", async () => { + db.entries.push({ + id: "a", content: "content", tags: "[]", source: "api", + created_at: Date.now() - 600000, vector_ids: '["a"]', recall_count: 0, importance_score: 0, + }); + const res = await worker.fetch(req("GET", "/stats"), env, ctx); + const data = await res.json() as any; + expect(data.unvectorized).toBe(0); + }); + + it("returns unvectorized: 0 for entries within the grace window (pending)", async () => { + // created_at = now → within 5-minute grace window → not counted as failed + db.entries.push({ + id: "b", content: "content", tags: "[]", source: "api", + created_at: Date.now(), vector_ids: "[]", recall_count: 0, importance_score: 0, + }); + const res = await worker.fetch(req("GET", "/stats"), env, ctx); + const data = await res.json() as any; + expect(data.unvectorized).toBe(0); + }); + + it("counts past-grace entries with vector_ids=[] as unvectorized", async () => { + db.entries.push( + { id: "old-1", content: "c1", tags: "[]", source: "api", created_at: Date.now() - 600000, vector_ids: "[]", recall_count: 0, importance_score: 0 }, + { id: "old-2", content: "c2", tags: "[]", source: "api", created_at: Date.now() - 700000, vector_ids: "[]", recall_count: 0, importance_score: 0 }, + { id: "vec", content: "c3", tags: "[]", source: "api", created_at: Date.now() - 600000, vector_ids: '["vec"]', recall_count: 0, importance_score: 0 }, + ); + const res = await worker.fetch(req("GET", "/stats"), env, ctx); + const data = await res.json() as any; + expect(data.unvectorized).toBe(2); + }); + + it("returns vectorize_grace_ms in response", async () => { + const res = await worker.fetch(req("GET", "/stats"), env, ctx); + const data = await res.json() as any; + expect(data.vectorize_grace_ms).toBe(300000); + }); + + it("uses VECTORIZE_GRACE_MS env var when set", async () => { + env = makeTestEnv(db, { VECTORIZE_GRACE_MS: "60000" }); + // entry that is 90 seconds old — past the 60s grace but within default 300s + db.entries.push({ + id: "x", content: "c", tags: "[]", source: "api", + created_at: Date.now() - 90000, vector_ids: "[]", recall_count: 0, importance_score: 0, + }); + const res = await worker.fetch(req("GET", "/stats"), env, ctx); + const data = await res.json() as any; + expect(data.unvectorized).toBe(1); + expect(data.vectorize_grace_ms).toBe(60000); + }); +}); diff --git a/test/integration/vectorize-pending.test.ts b/test/integration/vectorize-pending.test.ts new file mode 100644 index 0000000..b675927 --- /dev/null +++ b/test/integration/vectorize-pending.test.ts @@ -0,0 +1,132 @@ +import { describe, it, expect, beforeEach, vi } from "vitest"; +import worker from "../../src/index"; +import { makeTestEnv, makeTestDb, makeVectorizeMock } from "../helpers/make-env"; +import { req } from "../helpers/make-request"; +import type { Env } from "../../src/index"; +import { D1Mock } from "../helpers/d1-mock"; + +const ctx = { waitUntil: (_: Promise) => {} } as any; + +function pastGraceEntry(id: string) { + return { + id, + content: `Content for ${id}`, + tags: '["work"]', + source: "api", + created_at: Date.now() - 600000, // 10 minutes ago — past default 5-min grace + vector_ids: "[]", + recall_count: 0, + importance_score: 0, + }; +} + +describe("POST /vectorize-pending", () => { + let env: Env; + let db: D1Mock; + + beforeEach(() => { + db = makeTestDb(); + env = makeTestEnv(db); + }); + + it("returns 401 without auth", async () => { + const res = await worker.fetch(req("POST", "/vectorize-pending", { token: null }), env, ctx); + expect(res.status).toBe(401); + }); + + it("returns { processed: 0, failed: 0, remaining: 0 } when no past-grace entries", async () => { + const res = await worker.fetch(req("POST", "/vectorize-pending"), env, ctx); + expect(res.status).toBe(200); + const data = await res.json() as any; + expect(data.processed).toBe(0); + expect(data.failed).toBe(0); + expect(data.remaining).toBe(0); + }); + + it("processes past-grace entries and returns correct counts", async () => { + db.entries.push(pastGraceEntry("e1"), pastGraceEntry("e2")); + const res = await worker.fetch(req("POST", "/vectorize-pending"), env, ctx); + const data = await res.json() as any; + expect(data.processed).toBe(2); + expect(data.failed).toBe(0); + expect(data.remaining).toBe(0); + }); + + it("updates vector_ids in D1 after successful re-embed", async () => { + db.entries.push(pastGraceEntry("fix-me")); + await worker.fetch(req("POST", "/vectorize-pending"), env, ctx); + const updated = db.entries.find((e: any) => e.id === "fix-me"); + const ids = JSON.parse(updated.vector_ids); + expect(ids.length).toBeGreaterThan(0); + }); + + it("skips entries within the grace window (vector_ids=[] but recent)", async () => { + db.entries.push({ + id: "pending", + content: "Just captured", + tags: "[]", + source: "api", + created_at: Date.now(), // within grace window + vector_ids: "[]", + recall_count: 0, + importance_score: 0, + }); + const res = await worker.fetch(req("POST", "/vectorize-pending"), env, ctx); + const data = await res.json() as any; + expect(data.processed).toBe(0); + expect(data.remaining).toBe(0); + }); + + it("skips entries that already have vector_ids populated", async () => { + db.entries.push({ + id: "already-done", + content: "Already vectorized", + tags: "[]", + source: "api", + created_at: Date.now() - 600000, + vector_ids: '["already-done"]', + recall_count: 0, + importance_score: 0, + }); + const res = await worker.fetch(req("POST", "/vectorize-pending"), env, ctx); + const data = await res.json() as any; + expect(data.processed).toBe(0); + }); + + it("counts failed and continues when storeEntry throws for one entry", async () => { + db.entries.push(pastGraceEntry("bad"), pastGraceEntry("good")); + let callCount = 0; + env = makeTestEnv(db, { + VECTORIZE: makeVectorizeMock({ + insert: vi.fn().mockImplementation(() => { + callCount++; + if (callCount === 1) throw new Error("Vectorize error"); + return Promise.resolve({ mutationId: "m" }); + }), + }), + }); + const res = await worker.fetch(req("POST", "/vectorize-pending"), env, ctx); + const data = await res.json() as any; + expect(data.processed).toBe(1); + expect(data.failed).toBe(1); + expect(data.remaining).toBe(1); + }); + + it("respects VECTORIZE_GRACE_MS env var", async () => { + // entry 90s old — past 60s grace but within default 300s + db.entries.push({ + id: "e90", + content: "90-second-old memory", + tags: "[]", + source: "api", + created_at: Date.now() - 90000, + vector_ids: "[]", + recall_count: 0, + importance_score: 0, + }); + env = makeTestEnv(db, { VECTORIZE_GRACE_MS: "60000" }); + const res = await worker.fetch(req("POST", "/vectorize-pending"), env, ctx); + const data = await res.json() as any; + expect(data.processed).toBe(1); + }); +}); diff --git a/wrangler.toml b/wrangler.toml index 7bd073c..5f16be3 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -29,6 +29,9 @@ id = "" # Create manually: wrangler kv namespace create OAUTH_KV # then paste the returned id above. +[vars] +VECTORIZE_GRACE_MS = "300000" + [assets] directory = "./public"