diff --git a/AGENT_GUIDE.md b/AGENT_GUIDE.md index cde84e5..863aafa 100644 --- a/AGENT_GUIDE.md +++ b/AGENT_GUIDE.md @@ -149,16 +149,18 @@ Find documents by metadata criteria without a text search query. | Parameter | Required | Description | |-----------|----------|-------------| -| `metadata_filter` | Yes | JSON key-value pairs (AND semantics). Example: `{"type": "decision-log"}` | -| `project_name` | No | Restrict to a project. | +| `metadata_filter` | No† | JSON key-value pairs (AND semantics). Example: `{"type": "decision-log"}` | +| `project_name` | No† | Restrict to a project. Sufficient on its own to **list that project's documents**. | | `include_content` | No | Include full text (default false). | | `limit` | No | Max results (default 10). | -| `updated_since` | No | ISO-8601 timestamp. Only docs updated on/after. | -| `created_since` | No | ISO-8601 timestamp. Only docs created on/after. | +| `updated_since` | No† | ISO-8601 timestamp. Only docs updated on/after. | +| `created_since` | No† | ISO-8601 timestamp. Only docs created on/after. | | `max_bytes` | No | Response size budget when include_content is true. | | `requestor` | No | Your agent name. | -Use for browsing by category, catching up on recent changes (`updated_since`), or finding all documents of a specific type. +† **At least one** of `metadata_filter`, `project_name`, `updated_since`, or `created_since` must be supplied (so this never becomes an unbounded whole-KB dump). An empty `metadata_filter` plus `project_name` lists that project's documents. + +Use for browsing by category, catching up on recent changes (`updated_since`), listing all documents in a project (`project_name` alone), or finding all documents of a specific type. Results are ordered newest-updated first. --- @@ -240,7 +242,7 @@ These two tools have **different contracts**. Picking the wrong one is the most | Top-N ranked hits are enough to answer | You need a complete, exhaustive set (e.g. an inventory or a catch-up) | - **`cerefox_search` is relevance-ranked top-N.** It returns the best `match_count` matches (**default 5** — raise it via `match_count`). It is **not** an enumeration tool: if more docs match than `match_count`, the rest sit silently below the cutoff — and the one you most want (e.g. the *newest*) may be exactly the one dropped. -- **`cerefox_metadata_search` is exhaustive enumeration by criteria.** No text query. Filters by `metadata_filter` (plus `project_name`, `updated_since` / `created_since`). It returns **metadata only by default** (`include_content=false`) — ids + titles + tags, which is cheap — so raise `limit` (**default 10**) freely to get the whole set. Discover available keys with `cerefox_list_metadata_keys`. +- **`cerefox_metadata_search` is exhaustive enumeration by criteria.** No text query. Filters by `metadata_filter`, `project_name`, `updated_since` / `created_since` — supply **at least one** (an empty `metadata_filter` plus `project_name` lists that project's documents). It returns **metadata only by default** (`include_content=false`) — ids + titles + tags, which is cheap — so raise `limit` (**default 10**) freely to get the whole set. Discover available keys with `cerefox_list_metadata_keys`. ### Examples @@ -248,6 +250,7 @@ These two tools have **different contracts**. Picking the wrong one is the most - *"List every decision-log doc"* (enumeration) → `cerefox_metadata_search(metadata_filter={"type":"decision-log"}, limit=50, include_content=false)` - *"What changed since I last looked?"* → `cerefox_metadata_search(metadata_filter={"type":"decision-log"}, updated_since="2026-05-01T00:00:00Z")` - *"Just the ids of all active research docs"* → `cerefox_metadata_search(metadata_filter={"type":"research","status":"active"}, limit=100)` +- *"List everything in the Cerefox project"* → `cerefox_metadata_search(project_name="Cerefox", limit=100)` (no `metadata_filter` needed) ### Pattern: finding the newest item in a growing series @@ -420,6 +423,7 @@ The legacy Python `uv run cerefox` is a frozen husk as of v0.9 — only `uv run | `cerefox_get_document(document_id, version_id, requestor)` | `cerefox document get --version-id --requestor ` | | `cerefox_list_versions(document_id, requestor)` | `cerefox document version list --requestor ` | | `cerefox_list_projects(requestor)` | `cerefox project list --requestor ` | +| `cerefox_set_document_projects(document_id, project_names, author)` | `cerefox document set-projects --author --author-type user\|agent` (or `--clear` to remove all) | | `cerefox_list_metadata_keys()` | `cerefox metadata keys` | | `cerefox_metadata_search(metadata_filter, project_name, updated_since, created_since, limit, include_content, requestor)` | `cerefox metadata search --metadata-filter '' --project-name --updated-since --created-since --limit N --include-content --requestor ` | | `cerefox_get_audit_log(document_id, author, operation, since, until, limit, requestor)` | `cerefox audit list --document-id --author --operation --since --until --limit N --json --requestor ` | diff --git a/AGENT_QUICK_REFERENCE.md b/AGENT_QUICK_REFERENCE.md index 8aedbb5..541b524 100644 --- a/AGENT_QUICK_REFERENCE.md +++ b/AGENT_QUICK_REFERENCE.md @@ -10,7 +10,7 @@ Cerefox is a persistent, shared knowledge base. You have **10 MCP tools** (9 of | `cerefox_ingest` | Save or update a document | `title`, `content` (required), `document_id` (update by ID), `update_if_exists`, `project_name` (single, non-destructive add on update), `project_names` (list, destructive replace on update), `metadata`, `author` | | `cerefox_get_document` | Get full document by ID | `document_id` (required) | | `cerefox_list_versions` | Version history of a document | `document_id` (required) | -| `cerefox_metadata_search` | Find docs by metadata (no text query) | `metadata_filter` (required), `include_content`, `updated_since` | +| `cerefox_metadata_search` | Find or list docs by metadata, project, or time (no text query) | `metadata_filter`, `project_name` (list a project's docs), `updated_since`, `include_content` — **at least one** of metadata_filter/project_name/updated_since/created_since | | `cerefox_list_metadata_keys` | Discover available metadata keys | (none required) | | `cerefox_list_projects` | List all projects | (none required) | | `cerefox_set_document_projects` | Set doc's project memberships to exactly the given list (destructive replace; metadata-only, no content change) | `document_id`, `project_names` (required) | @@ -64,8 +64,8 @@ Same operations, same conventions. Full reference: [`docs/guides/cli.md`](docs/g | `cerefox_list_versions` | `cerefox document version list --requestor ""` | | `cerefox_list_projects` | `cerefox project list --requestor ""` | | `cerefox_list_metadata_keys` | `cerefox metadata keys` | -| `cerefox_metadata_search` | `cerefox metadata search --metadata-filter '' --requestor ""` | -| `cerefox_set_document_projects` | _MCP-only; a CLI command will be added in a future release. Until then, run via MCP if available._ | +| `cerefox_metadata_search` | `cerefox metadata search --metadata-filter '' --requestor ""` (list a project: `cerefox document list --project `) | +| `cerefox_set_document_projects` | `cerefox document set-projects --author "" --author-type agent` (or `--clear` to remove all) | | `cerefox_get_audit_log` | `cerefox audit list --requestor ""` (add `--json` for scripted access) | | `cerefox_get_help` | `cerefox guides show agent-quick-reference` (or `cerefox guides list` for the full bundled-docs index) | diff --git a/CHANGELOG.md b/CHANGELOG.md index fb5bac3..992c660 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,26 @@ Versioning: [Semantic Versioning](https://semver.org/spec/v2.0.0.html) — all ` ## [Unreleased] -Open roadmap. +### Added + +- **`cerefox_metadata_search` can now list a project's documents** — closing a CLI↔MCP + parity gap (the CLI's `cerefox document list --project ` had no MCP equivalent). + `metadata_filter` is now **optional**: supply `project_name` (and/or `updated_since` / + `created_since`) alone to list documents by scope, ordered newest-updated first. At + least one of `metadata_filter` / `project_name` / `updated_since` / `created_since` is + still required, so the tool never becomes an unbounded whole-KB dump. Backward + compatible — existing non-empty-filter callers are unaffected. The twin + `cerefox-metadata-search` Edge Function and the GPT Actions OpenAPI block + (`info.version` → 1.9.0) were relaxed in lockstep. A new **CLI ↔ MCP parity matrix** in + [`docs/guides/cli.md`](docs/guides/cli.md) documents the full surface and the remaining + (intentional vs. actionable) gaps. +- **`cerefox document set-projects [names…]`** — new CLI command + closing the reverse parity gap (the `cerefox_set_document_projects` MCP tool + had no CLI form). Full-set replace of a document's project memberships + (`--clear` removes all); created-if-missing, case-insensitively de-duplicated, + logged as an `update-metadata` audit entry. Shares the membership-replace core + with the MCP tool (`_shared/mcp-tools/_projects.ts → replaceDocumentProjects`) + so both behave identically. --- diff --git a/CLAUDE.md b/CLAUDE.md index ad28b0b..a42fa54 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -65,7 +65,7 @@ cerefox/ │ └── memory/ # @cerefox/memory npm package — both bins (v0.5+) │ ├── src/ │ │ ├── bin/cerefox.ts # single bin (v0.5.1+); commander dispatch + error handler -│ │ ├── cli/ # commander program + 28 subcommand files +│ │ ├── cli/ # commander program + 35 subcommand files │ │ │ ├── commands/ # one file per subcommand (including `mcp` which runs buildServer()) │ │ │ └── util/ # checks, mcp-config-writers, bundled-docs │ │ ├── server.ts # buildServer() factory (called by the `mcp` subcommand) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f3fbb3f..375a98a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -186,7 +186,7 @@ packages/memory/ bin/cerefox.ts the package's bin — top-level error handler + commander dispatch cli/ program.ts commander program assembly; one registerXyz() per subcommand - commands/ 28 subcommand files (including `mcp` which runs buildServer()) + commands/ 35 subcommand files (including `mcp` which runs buildServer()) util/ checks (doctor/status), mcp-config-writers, bundled-docs, client, embed test/ stdio-smoke.test.ts spawn `cerefox mcp` and walk an MCP handshake diff --git a/README.md b/README.md index f51ea8c..263fad9 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ See [Connecting AI agents](#connecting-ai-agents) for the how-to per client. | **Review status** | Schema-level `review_status` on documents (`approved` / `pending_review`). Auto-transitions based on author_type. Filterable on search | | **Version governance** | Version archival (protect specific versions from cleanup), configurable retention (`CEREFOX_VERSION_CLEANUP_ENABLED`), version diff viewer | | **Usage tracking** | Opt-in logging of all operations (reads and writes) across all access paths. Tracks operation type, access path (remote-mcp, local-mcp, edge-function, webapp, cli), requestor identity, query text, and result count. Controlled via `cerefox config set usage_tracking_enabled true/false` -- no redeploy needed | -| **Analytics dashboard** | `/app/analytics` -- 7 interactive charts: calls per day, access path breakdown, top documents, top readers, operations donut, reader word cloud, and reader-to-document access pattern visualization (HEB). Date range + project + path filters. CSV export. | +| **Analytics dashboard** | `/app/analytics` -- 8 interactive charts: calls per day, access path breakdown, top documents, top readers, operations donut, requestor word cloud, requestor→document access patterns (HEB), and requestor→operation patterns (HEB). Date range + project + path filters. CSV export. | --- diff --git a/_shared/__tests__/mcp_tools.test.ts b/_shared/__tests__/mcp_tools.test.ts index 0231fbb..d13fb07 100644 --- a/_shared/__tests__/mcp_tools.test.ts +++ b/_shared/__tests__/mcp_tools.test.ts @@ -181,7 +181,7 @@ describe("input validation throws McpInvalidParams", () => { expect(err).toBeInstanceOf(McpInvalidParams); }); - test("cerefox_metadata_search rejects missing metadata_filter", async () => { + test("cerefox_metadata_search rejects no criteria at all (no filter, no scope)", async () => { const tool = TOOLS_BY_NAME["cerefox_metadata_search"]; let err: unknown; try { @@ -192,7 +192,7 @@ describe("input validation throws McpInvalidParams", () => { expect(err).toBeInstanceOf(McpInvalidParams); }); - test("cerefox_metadata_search rejects empty metadata_filter", async () => { + test("cerefox_metadata_search rejects empty metadata_filter with no other scope", async () => { const tool = TOOLS_BY_NAME["cerefox_metadata_search"]; let err: unknown; try { @@ -203,6 +203,16 @@ describe("input validation throws McpInvalidParams", () => { expect(err).toBeInstanceOf(McpInvalidParams); }); + test("cerefox_metadata_search rejects a non-object metadata_filter", async () => { + const tool = TOOLS_BY_NAME["cerefox_metadata_search"]; + let err: unknown; + try { + await tool.handler(noopClient(), { metadata_filter: "nope" }, FAKE_CTX); + } catch (e) { + err = e; + } + expect(err).toBeInstanceOf(McpInvalidParams); + }); test("cerefox_set_document_projects rejects missing document_id", async () => { const tool = TOOLS_BY_NAME["cerefox_set_document_projects"]; let err: unknown; @@ -226,6 +236,41 @@ describe("input validation throws McpInvalidParams", () => { }); }); +describe("cerefox_metadata_search listing (empty filter + scope)", () => { + // A mock client that resolves any project name → "proj-1" and records the + // params passed to the cerefox_metadata_search RPC. + function listingClient(captured: { params?: Record }): SupabaseClient { + const projectChain = { + select: () => projectChain, + ilike: () => projectChain, + limit: () => ({ data: [{ id: "proj-1" }], error: null }), + }; + return { + rpc: (name: string, params: Record) => { + if (name === "cerefox_metadata_search") captured.params = params; + return { data: [], error: null }; + }, + from: () => projectChain, + } as unknown as SupabaseClient; + } + + test("project_name alone lists docs (empty filter → RPC gets {} + resolved project_id)", async () => { + const tool = TOOLS_BY_NAME["cerefox_metadata_search"]; + const captured: { params?: Record } = {}; + await tool.handler(listingClient(captured), { project_name: "Cerefox" }, FAKE_CTX); + expect(captured.params?.p_metadata_filter).toEqual({}); + expect(captured.params?.p_project_id).toBe("proj-1"); + }); + + test("updated_since alone is a sufficient scope (no throw)", async () => { + const tool = TOOLS_BY_NAME["cerefox_metadata_search"]; + const captured: { params?: Record } = {}; + await tool.handler(listingClient(captured), { updated_since: "2026-01-01" }, FAKE_CTX); + expect(captured.params?.p_metadata_filter).toEqual({}); + expect(captured.params?.p_updated_since).toBe("2026-01-01"); + }); +}); + describe("chunker (used by ingest)", () => { test("short content → 1 chunk", async () => { const { chunkMarkdown } = await import("../mcp-tools/_chunker.ts"); diff --git a/_shared/mcp-tools/_projects.ts b/_shared/mcp-tools/_projects.ts index 24d1ae3..3827a29 100644 --- a/_shared/mcp-tools/_projects.ts +++ b/_shared/mcp-tools/_projects.ts @@ -21,7 +21,9 @@ * prevents drift. */ -import type { MCPSupabaseClient } from "./types.ts"; +import type { AccessPath, MCPSupabaseClient } from "./types.ts"; + +import { logUsage } from "./_utils.ts"; /** Ensure `(documentId, project)` exists. Resolves project by name * (case-insensitive); creates the project if missing. Idempotent. @@ -104,6 +106,120 @@ export async function setDocumentProjectsByName( return projectIds; } +export interface ReplaceDocumentProjectsResult { + documentTitle: string; + /** Names after stripping blanks + case-insensitive dedup, in input order. */ + cleanNames: string[]; + projectIds: string[]; +} + +/** + * Full-set replace of a document's project memberships, with audit + usage + * logging. The shared core behind both the `cerefox_set_document_projects` + * MCP tool and the `cerefox document set-projects` CLI command, so the two + * behave identically. + * + * Cleans the incoming names (strip blanks, preserve order, case-insensitive + * dedup), verifies the document exists and isn't soft-deleted, resolves each + * name → project_id (creating the project if absent), then DELETE-then-INSERT + * replaces the membership set. An empty (or all-blank) list clears all + * memberships. Writes an `update-metadata` audit entry (content is untouched) + * and a usage-log entry. + * + * Throws if the document is missing or soft-deleted. Callers validate + * argument *shape* (e.g. that `projectNames` is an array of strings). + */ +export async function replaceDocumentProjects( + supabase: MCPSupabaseClient, + opts: { + documentId: string; + projectNames: string[]; + author: string; + authorType: string; + accessPath: AccessPath; + }, +): Promise { + const { documentId, projectNames, author, authorType, accessPath } = opts; + + // Strip empties; preserve order; dedup case-insensitively. + const seenLower = new Set(); + const cleanNames: string[] = []; + for (const n of projectNames) { + const stripped = (n ?? "").trim(); + if (!stripped) continue; + const key = stripped.toLowerCase(); + if (seenLower.has(key)) continue; + seenLower.add(key); + cleanNames.push(stripped); + } + + // Verify the document exists and isn't soft-deleted. + const { data: doc } = await supabase + .from("cerefox_documents") + .select("id, title") + .eq("id", documentId) + .is("deleted_at", null) + .limit(1); + if (!doc?.length) { + throw new Error(`Document not found (or soft-deleted): ${documentId}`); + } + + // Resolve each name → project_id (create if absent). Preserve order. + const projectIds: string[] = []; + for (const name of cleanNames) { + const { data: proj } = await supabase + .from("cerefox_projects") + .select("id") + .ilike("name", name) + .limit(1); + if (proj?.length) { + projectIds.push(proj[0].id); + } else { + const { data: newProj } = await supabase + .from("cerefox_projects") + .insert({ name }) + .select("id"); + if (newProj?.[0]?.id) projectIds.push(newProj[0].id); + } + } + + // DELETE-then-INSERT replace (matches Python assign_document_projects). + await supabase.from("cerefox_document_projects").delete().eq("document_id", documentId); + if (projectIds.length > 0) { + const rows = projectIds.map((pid) => ({ document_id: documentId, project_id: pid })); + await supabase.from("cerefox_document_projects").insert(rows); + } + + // Audit entry — project membership is metadata, not content. + try { + await supabase.rpc("cerefox_create_audit_entry", { + p_document_id: documentId, + p_version_id: null, + p_operation: "update-metadata", + p_author: author, + p_author_type: authorType, + p_size_before: null, + p_size_after: null, + p_description: + cleanNames.length > 0 + ? `Set document projects to [${cleanNames.join(", ")}]` + : "Cleared all project memberships", + }); + } catch (err) { + console.warn("replaceDocumentProjects: audit entry failed", err); + } + + logUsage(supabase, { + operation: "set-document-projects", + accessPath, + requestor: author, + document_id: documentId, + result_count: projectIds.length, + }); + + return { documentTitle: doc[0].title as string, cleanNames, projectIds }; +} + /** Resolve a project name → project_id (case-insensitive), or `null` if * not found. Does NOT create. Used by search / metadata-search to translate * `project_name` parameters to UUIDs. */ diff --git a/_shared/mcp-tools/get-help-content.ts b/_shared/mcp-tools/get-help-content.ts index d55bef2..89f2d90 100644 --- a/_shared/mcp-tools/get-help-content.ts +++ b/_shared/mcp-tools/get-help-content.ts @@ -11,16 +11,16 @@ * docs/specs/polish-and-distribution-design.md §10d. */ -export const HELP_FULL = "# Cerefox Knowledge Base -- Agent Quick Reference\n\nCerefox is a persistent, shared knowledge base. You have **10 MCP tools** (9 of them have CLI equivalents — `cerefox_get_help` is MCP-only). For the full guide, search Cerefox for \"How AI Agents Use Cerefox\" or call `cerefox_get_help` to retrieve this content over MCP.\n\n## Tools\n\n| Tool | Purpose | Key params |\n|------|---------|------------|\n| `cerefox_search` | Find documents (hybrid FTS + semantic) | `query` (required), `project_name`, `metadata_filter`, `requestor` |\n| `cerefox_ingest` | Save or update a document | `title`, `content` (required), `document_id` (update by ID), `update_if_exists`, `project_name` (single, non-destructive add on update), `project_names` (list, destructive replace on update), `metadata`, `author` |\n| `cerefox_get_document` | Get full document by ID | `document_id` (required) |\n| `cerefox_list_versions` | Version history of a document | `document_id` (required) |\n| `cerefox_metadata_search` | Find docs by metadata (no text query) | `metadata_filter` (required), `include_content`, `updated_since` |\n| `cerefox_list_metadata_keys` | Discover available metadata keys | (none required) |\n| `cerefox_list_projects` | List all projects | (none required) |\n| `cerefox_set_document_projects` | Set doc's project memberships to exactly the given list (destructive replace; metadata-only, no content change) | `document_id`, `project_names` (required) |\n| `cerefox_get_audit_log` | Query write operation history | `document_id`, `author`, `operation`, `since` |\n| `cerefox_get_help` | Retrieve Cerefox conventions (this reference) over MCP. **Call this whenever uncertain.** | `topic` (optional, case-insensitive H2 substring match) |\n\n## Essential Rules\n\n1. **Search before ingesting** -- check if the document exists first.\n2. **Prefer ID-based updates** -- pass `document_id` from search results for deterministic updates. Falls back to title-matching with `update_if_exists: true`.\n3. **Set `author`/`requestor`** to your name on every call (e.g., \"Claude Code\", \"archiver\"). On MCP, pass as parameters. On CLI, pass `--author`/`--author-type`/`--requestor` flags, or rely on `CEREFOX_AUTHOR_NAME`/`CEREFOX_AUTHOR_TYPE`/`CEREFOX_REQUESTOR_NAME` env vars set in the user's `.env`.\n4. **Use `document_id` from search results** `[id: uuid]` for get_document and list_versions.\n5. **Add metadata** -- at minimum `type` (\"decision-log\", \"research\", \"design-doc\") and `status` (\"active\", \"draft\").\n6. **Write structured Markdown** with H1/H2/H3 headings for good chunking and search.\n7. **Deletes are soft (recoverable); purge is web-UI-only.** If you decide to delete, surface it to the user (`I soft-deleted X — recoverable from the Cerefox web UI trash`). You cannot un-do your own delete from agent code by design.\n8. **Cross-doc links inside content**: **always use `[Text](document-uuid)`.** UUIDs are the only fully reliable link form — stable across title changes, never ambiguous, no encoding gotchas. Every `cerefox_search` result shows `[id: ]` after the title; grab it and use it. Title-based linking (`[Text]()`) is fragile (breaks on colons, parens, ampersands, brackets — silently navigates to wrong page) — **don't write title-based links**; do an extra search to get the UUID instead. Repo-path forms (`[Text](docs/path.md)`) exist for repo-ingested files; don't construct manually. See `AGENT_GUIDE.md → Writing linkable content` for the full rule.\n9. **Project memberships — non-destructive by default**: on `cerefox_ingest` updates, **`project_name` (singular) is a non-destructive add** (ensures membership, preserves others). Use **`project_names` (list)** when you want to set the doc's full project set in one call (destructive replace). For metadata-only project changes without writing content, use **`cerefox_set_document_projects(document_id, project_names)`** — that tool is the destructive-replace contract made explicit. Never call `cerefox_set_document_projects` with a single name when you mean \"add\" — that would REMOVE the doc from all other projects. When in doubt, use `cerefox_ingest` with singular `project_name`.\n\n## Update Workflow (ID-based -- preferred)\n\n```\nsearch(\"topic\") -> find doc [id: abc123] -> get_document(abc123) -> modify ->\ningest(title=\"Same Title\", content=\"...\", document_id=\"abc123\", author=\"my-agent\")\n```\n\n## Update Workflow (title-based -- fallback)\n\n```\nsearch(\"topic\") -> find doc -> modify ->\ningest(title=\"Same Title\", content=\"...\", update_if_exists=true, author=\"my-agent\")\n```\n\n## Catch-Up Workflow\n\n```\nmetadata_search(metadata_filter={\"type\": \"decision-log\"}, updated_since=\"2026-03-28T00:00:00Z\")\n```\n\n## CLI fallback (when MCP is unavailable)\n\nIf `cerefox_search` is not in your tool list, your user has likely installed the Cerefox CLI. The canonical invocation is plain **`cerefox <subcommand>`** (the TypeScript CLI, installed via `npm install -g @cerefox/memory`). It uses a resource-verb shape (`cerefox document get`, `cerefox project list`, …). The legacy Python `uv run cerefox` is now a frozen husk as of v0.9 — only `uv run cerefox mcp` still works.\n\nSame operations, same conventions. Full reference: [`docs/guides/cli.md`](docs/guides/cli.md). CLI flag names match MCP parameter names exactly (e.g. `metadata_filter` ↔ `--metadata-filter`); common flags also have single-letter short forms (`-f`, `-p`, `-c`, `-m`, `-u`, `-a`, `-r`). Use the canonical long name (what `--help` shows) or its short form — there are no long-form aliases like `--filter` or `--count`.\n\n| MCP tool | CLI |\n|---|---|\n| `cerefox_search` | `cerefox search \"<q>\" --requestor \"<your-name>\"` |\n| `cerefox_ingest` (paste) | `printf '...' \\| cerefox document ingest --paste --title \"<t>\" --author \"<your-name>\" --author-type agent` |\n| `cerefox_ingest` (update by ID) | `printf '...' \\| cerefox document ingest --paste --title \"<t>\" --document-id \"<uuid>\" --author \"<your-name>\" --author-type agent` |\n| `cerefox_get_document` | `cerefox document get <id> --version-id <vid> --requestor \"<your-name>\"` |\n| `cerefox_list_versions` | `cerefox document version list <id> --requestor \"<your-name>\"` |\n| `cerefox_list_projects` | `cerefox project list --requestor \"<your-name>\"` |\n| `cerefox_list_metadata_keys` | `cerefox metadata keys` |\n| `cerefox_metadata_search` | `cerefox metadata search --metadata-filter '<json>' --requestor \"<your-name>\"` |\n| `cerefox_set_document_projects` | _MCP-only; a CLI command will be added in a future release. Until then, run via MCP if available._ |\n| `cerefox_get_audit_log` | `cerefox audit list --requestor \"<your-name>\"` (add `--json` for scripted access) |\n| `cerefox_get_help` | `cerefox guides show agent-quick-reference` (or `cerefox guides list` for the full bundled-docs index) |\n\n**Set identity on every call**, exactly as you would on MCP:\n- Writes (`document ingest`, `document ingest-dir`): `--author \"<your-name>\" --author-type agent`\n- Reads: `--requestor \"<your-name>\"`\n\nOr have your user set `CEREFOX_AUTHOR_NAME` / `CEREFOX_AUTHOR_TYPE` / `CEREFOX_REQUESTOR_NAME` in their `.env` to apply defaults once.\n"; +export const HELP_FULL = "# Cerefox Knowledge Base -- Agent Quick Reference\n\nCerefox is a persistent, shared knowledge base. You have **10 MCP tools** (9 of them have CLI equivalents — `cerefox_get_help` is MCP-only). For the full guide, search Cerefox for \"How AI Agents Use Cerefox\" or call `cerefox_get_help` to retrieve this content over MCP.\n\n## Tools\n\n| Tool | Purpose | Key params |\n|------|---------|------------|\n| `cerefox_search` | Find documents (hybrid FTS + semantic) | `query` (required), `project_name`, `metadata_filter`, `requestor` |\n| `cerefox_ingest` | Save or update a document | `title`, `content` (required), `document_id` (update by ID), `update_if_exists`, `project_name` (single, non-destructive add on update), `project_names` (list, destructive replace on update), `metadata`, `author` |\n| `cerefox_get_document` | Get full document by ID | `document_id` (required) |\n| `cerefox_list_versions` | Version history of a document | `document_id` (required) |\n| `cerefox_metadata_search` | Find or list docs by metadata, project, or time (no text query) | `metadata_filter`, `project_name` (list a project's docs), `updated_since`, `include_content` — **at least one** of metadata_filter/project_name/updated_since/created_since |\n| `cerefox_list_metadata_keys` | Discover available metadata keys | (none required) |\n| `cerefox_list_projects` | List all projects | (none required) |\n| `cerefox_set_document_projects` | Set doc's project memberships to exactly the given list (destructive replace; metadata-only, no content change) | `document_id`, `project_names` (required) |\n| `cerefox_get_audit_log` | Query write operation history | `document_id`, `author`, `operation`, `since` |\n| `cerefox_get_help` | Retrieve Cerefox conventions (this reference) over MCP. **Call this whenever uncertain.** | `topic` (optional, case-insensitive H2 substring match) |\n\n## Essential Rules\n\n1. **Search before ingesting** -- check if the document exists first.\n2. **Prefer ID-based updates** -- pass `document_id` from search results for deterministic updates. Falls back to title-matching with `update_if_exists: true`.\n3. **Set `author`/`requestor`** to your name on every call (e.g., \"Claude Code\", \"archiver\"). On MCP, pass as parameters. On CLI, pass `--author`/`--author-type`/`--requestor` flags, or rely on `CEREFOX_AUTHOR_NAME`/`CEREFOX_AUTHOR_TYPE`/`CEREFOX_REQUESTOR_NAME` env vars set in the user's `.env`.\n4. **Use `document_id` from search results** `[id: uuid]` for get_document and list_versions.\n5. **Add metadata** -- at minimum `type` (\"decision-log\", \"research\", \"design-doc\") and `status` (\"active\", \"draft\").\n6. **Write structured Markdown** with H1/H2/H3 headings for good chunking and search.\n7. **Deletes are soft (recoverable); purge is web-UI-only.** If you decide to delete, surface it to the user (`I soft-deleted X — recoverable from the Cerefox web UI trash`). You cannot un-do your own delete from agent code by design.\n8. **Cross-doc links inside content**: **always use `[Text](document-uuid)`.** UUIDs are the only fully reliable link form — stable across title changes, never ambiguous, no encoding gotchas. Every `cerefox_search` result shows `[id: <uuid>]` after the title; grab it and use it. Title-based linking (`[Text](<Title With Spaces>)`) is fragile (breaks on colons, parens, ampersands, brackets — silently navigates to wrong page) — **don't write title-based links**; do an extra search to get the UUID instead. Repo-path forms (`[Text](docs/path.md)`) exist for repo-ingested files; don't construct manually. See `AGENT_GUIDE.md → Writing linkable content` for the full rule.\n9. **Project memberships — non-destructive by default**: on `cerefox_ingest` updates, **`project_name` (singular) is a non-destructive add** (ensures membership, preserves others). Use **`project_names` (list)** when you want to set the doc's full project set in one call (destructive replace). For metadata-only project changes without writing content, use **`cerefox_set_document_projects(document_id, project_names)`** — that tool is the destructive-replace contract made explicit. Never call `cerefox_set_document_projects` with a single name when you mean \"add\" — that would REMOVE the doc from all other projects. When in doubt, use `cerefox_ingest` with singular `project_name`.\n\n## Update Workflow (ID-based -- preferred)\n\n```\nsearch(\"topic\") -> find doc [id: abc123] -> get_document(abc123) -> modify ->\ningest(title=\"Same Title\", content=\"...\", document_id=\"abc123\", author=\"my-agent\")\n```\n\n## Update Workflow (title-based -- fallback)\n\n```\nsearch(\"topic\") -> find doc -> modify ->\ningest(title=\"Same Title\", content=\"...\", update_if_exists=true, author=\"my-agent\")\n```\n\n## Catch-Up Workflow\n\n```\nmetadata_search(metadata_filter={\"type\": \"decision-log\"}, updated_since=\"2026-03-28T00:00:00Z\")\n```\n\n## CLI fallback (when MCP is unavailable)\n\nIf `cerefox_search` is not in your tool list, your user has likely installed the Cerefox CLI. The canonical invocation is plain **`cerefox <subcommand>`** (the TypeScript CLI, installed via `npm install -g @cerefox/memory`). It uses a resource-verb shape (`cerefox document get`, `cerefox project list`, …). The legacy Python `uv run cerefox` is now a frozen husk as of v0.9 — only `uv run cerefox mcp` still works.\n\nSame operations, same conventions. Full reference: [`docs/guides/cli.md`](docs/guides/cli.md). CLI flag names match MCP parameter names exactly (e.g. `metadata_filter` ↔ `--metadata-filter`); common flags also have single-letter short forms (`-f`, `-p`, `-c`, `-m`, `-u`, `-a`, `-r`). Use the canonical long name (what `--help` shows) or its short form — there are no long-form aliases like `--filter` or `--count`.\n\n| MCP tool | CLI |\n|---|---|\n| `cerefox_search` | `cerefox search \"<q>\" --requestor \"<your-name>\"` |\n| `cerefox_ingest` (paste) | `printf '...' \\| cerefox document ingest --paste --title \"<t>\" --author \"<your-name>\" --author-type agent` |\n| `cerefox_ingest` (update by ID) | `printf '...' \\| cerefox document ingest --paste --title \"<t>\" --document-id \"<uuid>\" --author \"<your-name>\" --author-type agent` |\n| `cerefox_get_document` | `cerefox document get <id> --version-id <vid> --requestor \"<your-name>\"` |\n| `cerefox_list_versions` | `cerefox document version list <id> --requestor \"<your-name>\"` |\n| `cerefox_list_projects` | `cerefox project list --requestor \"<your-name>\"` |\n| `cerefox_list_metadata_keys` | `cerefox metadata keys` |\n| `cerefox_metadata_search` | `cerefox metadata search --metadata-filter '<json>' --requestor \"<your-name>\"` (list a project: `cerefox document list --project <name>`) |\n| `cerefox_set_document_projects` | `cerefox document set-projects <id> <name...> --author \"<your-name>\" --author-type agent` (or `--clear` to remove all) |\n| `cerefox_get_audit_log` | `cerefox audit list --requestor \"<your-name>\"` (add `--json` for scripted access) |\n| `cerefox_get_help` | `cerefox guides show agent-quick-reference` (or `cerefox guides list` for the full bundled-docs index) |\n\n**Set identity on every call**, exactly as you would on MCP:\n- Writes (`document ingest`, `document ingest-dir`): `--author \"<your-name>\" --author-type agent`\n- Reads: `--requestor \"<your-name>\"`\n\nOr have your user set `CEREFOX_AUTHOR_NAME` / `CEREFOX_AUTHOR_TYPE` / `CEREFOX_REQUESTOR_NAME` in their `.env` to apply defaults once.\n"; /** Sections keyed by their H2 heading text (lower-cased for matching). */ export const HELP_SECTIONS: Record<string, string> = { - "Tools": "## Tools\n\n| Tool | Purpose | Key params |\n|------|---------|------------|\n| `cerefox_search` | Find documents (hybrid FTS + semantic) | `query` (required), `project_name`, `metadata_filter`, `requestor` |\n| `cerefox_ingest` | Save or update a document | `title`, `content` (required), `document_id` (update by ID), `update_if_exists`, `project_name` (single, non-destructive add on update), `project_names` (list, destructive replace on update), `metadata`, `author` |\n| `cerefox_get_document` | Get full document by ID | `document_id` (required) |\n| `cerefox_list_versions` | Version history of a document | `document_id` (required) |\n| `cerefox_metadata_search` | Find docs by metadata (no text query) | `metadata_filter` (required), `include_content`, `updated_since` |\n| `cerefox_list_metadata_keys` | Discover available metadata keys | (none required) |\n| `cerefox_list_projects` | List all projects | (none required) |\n| `cerefox_set_document_projects` | Set doc's project memberships to exactly the given list (destructive replace; metadata-only, no content change) | `document_id`, `project_names` (required) |\n| `cerefox_get_audit_log` | Query write operation history | `document_id`, `author`, `operation`, `since` |\n| `cerefox_get_help` | Retrieve Cerefox conventions (this reference) over MCP. **Call this whenever uncertain.** | `topic` (optional, case-insensitive H2 substring match) |", + "Tools": "## Tools\n\n| Tool | Purpose | Key params |\n|------|---------|------------|\n| `cerefox_search` | Find documents (hybrid FTS + semantic) | `query` (required), `project_name`, `metadata_filter`, `requestor` |\n| `cerefox_ingest` | Save or update a document | `title`, `content` (required), `document_id` (update by ID), `update_if_exists`, `project_name` (single, non-destructive add on update), `project_names` (list, destructive replace on update), `metadata`, `author` |\n| `cerefox_get_document` | Get full document by ID | `document_id` (required) |\n| `cerefox_list_versions` | Version history of a document | `document_id` (required) |\n| `cerefox_metadata_search` | Find or list docs by metadata, project, or time (no text query) | `metadata_filter`, `project_name` (list a project's docs), `updated_since`, `include_content` — **at least one** of metadata_filter/project_name/updated_since/created_since |\n| `cerefox_list_metadata_keys` | Discover available metadata keys | (none required) |\n| `cerefox_list_projects` | List all projects | (none required) |\n| `cerefox_set_document_projects` | Set doc's project memberships to exactly the given list (destructive replace; metadata-only, no content change) | `document_id`, `project_names` (required) |\n| `cerefox_get_audit_log` | Query write operation history | `document_id`, `author`, `operation`, `since` |\n| `cerefox_get_help` | Retrieve Cerefox conventions (this reference) over MCP. **Call this whenever uncertain.** | `topic` (optional, case-insensitive H2 substring match) |", "Essential Rules": "## Essential Rules\n\n1. **Search before ingesting** -- check if the document exists first.\n2. **Prefer ID-based updates** -- pass `document_id` from search results for deterministic updates. Falls back to title-matching with `update_if_exists: true`.\n3. **Set `author`/`requestor`** to your name on every call (e.g., \"Claude Code\", \"archiver\"). On MCP, pass as parameters. On CLI, pass `--author`/`--author-type`/`--requestor` flags, or rely on `CEREFOX_AUTHOR_NAME`/`CEREFOX_AUTHOR_TYPE`/`CEREFOX_REQUESTOR_NAME` env vars set in the user's `.env`.\n4. **Use `document_id` from search results** `[id: uuid]` for get_document and list_versions.\n5. **Add metadata** -- at minimum `type` (\"decision-log\", \"research\", \"design-doc\") and `status` (\"active\", \"draft\").\n6. **Write structured Markdown** with H1/H2/H3 headings for good chunking and search.\n7. **Deletes are soft (recoverable); purge is web-UI-only.** If you decide to delete, surface it to the user (`I soft-deleted X — recoverable from the Cerefox web UI trash`). You cannot un-do your own delete from agent code by design.\n8. **Cross-doc links inside content**: **always use `[Text](document-uuid)`.** UUIDs are the only fully reliable link form — stable across title changes, never ambiguous, no encoding gotchas. Every `cerefox_search` result shows `[id: <uuid>]` after the title; grab it and use it. Title-based linking (`[Text](<Title With Spaces>)`) is fragile (breaks on colons, parens, ampersands, brackets — silently navigates to wrong page) — **don't write title-based links**; do an extra search to get the UUID instead. Repo-path forms (`[Text](docs/path.md)`) exist for repo-ingested files; don't construct manually. See `AGENT_GUIDE.md → Writing linkable content` for the full rule.\n9. **Project memberships — non-destructive by default**: on `cerefox_ingest` updates, **`project_name` (singular) is a non-destructive add** (ensures membership, preserves others). Use **`project_names` (list)** when you want to set the doc's full project set in one call (destructive replace). For metadata-only project changes without writing content, use **`cerefox_set_document_projects(document_id, project_names)`** — that tool is the destructive-replace contract made explicit. Never call `cerefox_set_document_projects` with a single name when you mean \"add\" — that would REMOVE the doc from all other projects. When in doubt, use `cerefox_ingest` with singular `project_name`.", "Update Workflow (ID-based -- preferred)": "## Update Workflow (ID-based -- preferred)\n\n```\nsearch(\"topic\") -> find doc [id: abc123] -> get_document(abc123) -> modify ->\ningest(title=\"Same Title\", content=\"...\", document_id=\"abc123\", author=\"my-agent\")\n```", "Update Workflow (title-based -- fallback)": "## Update Workflow (title-based -- fallback)\n\n```\nsearch(\"topic\") -> find doc -> modify ->\ningest(title=\"Same Title\", content=\"...\", update_if_exists=true, author=\"my-agent\")\n```", "Catch-Up Workflow": "## Catch-Up Workflow\n\n```\nmetadata_search(metadata_filter={\"type\": \"decision-log\"}, updated_since=\"2026-03-28T00:00:00Z\")\n```", - "CLI fallback (when MCP is unavailable)": "## CLI fallback (when MCP is unavailable)\n\nIf `cerefox_search` is not in your tool list, your user has likely installed the Cerefox CLI. The canonical invocation is plain **`cerefox <subcommand>`** (the TypeScript CLI, installed via `npm install -g @cerefox/memory`). It uses a resource-verb shape (`cerefox document get`, `cerefox project list`, …). The legacy Python `uv run cerefox` is now a frozen husk as of v0.9 — only `uv run cerefox mcp` still works.\n\nSame operations, same conventions. Full reference: [`docs/guides/cli.md`](docs/guides/cli.md). CLI flag names match MCP parameter names exactly (e.g. `metadata_filter` ↔ `--metadata-filter`); common flags also have single-letter short forms (`-f`, `-p`, `-c`, `-m`, `-u`, `-a`, `-r`). Use the canonical long name (what `--help` shows) or its short form — there are no long-form aliases like `--filter` or `--count`.\n\n| MCP tool | CLI |\n|---|---|\n| `cerefox_search` | `cerefox search \"<q>\" --requestor \"<your-name>\"` |\n| `cerefox_ingest` (paste) | `printf '...' \\| cerefox document ingest --paste --title \"<t>\" --author \"<your-name>\" --author-type agent` |\n| `cerefox_ingest` (update by ID) | `printf '...' \\| cerefox document ingest --paste --title \"<t>\" --document-id \"<uuid>\" --author \"<your-name>\" --author-type agent` |\n| `cerefox_get_document` | `cerefox document get <id> --version-id <vid> --requestor \"<your-name>\"` |\n| `cerefox_list_versions` | `cerefox document version list <id> --requestor \"<your-name>\"` |\n| `cerefox_list_projects` | `cerefox project list --requestor \"<your-name>\"` |\n| `cerefox_list_metadata_keys` | `cerefox metadata keys` |\n| `cerefox_metadata_search` | `cerefox metadata search --metadata-filter '<json>' --requestor \"<your-name>\"` |\n| `cerefox_set_document_projects` | _MCP-only; a CLI command will be added in a future release. Until then, run via MCP if available._ |\n| `cerefox_get_audit_log` | `cerefox audit list --requestor \"<your-name>\"` (add `--json` for scripted access) |\n| `cerefox_get_help` | `cerefox guides show agent-quick-reference` (or `cerefox guides list` for the full bundled-docs index) |\n\n**Set identity on every call**, exactly as you would on MCP:\n- Writes (`document ingest`, `document ingest-dir`): `--author \"<your-name>\" --author-type agent`\n- Reads: `--requestor \"<your-name>\"`\n\nOr have your user set `CEREFOX_AUTHOR_NAME` / `CEREFOX_AUTHOR_TYPE` / `CEREFOX_REQUESTOR_NAME` in their `.env` to apply defaults once.", + "CLI fallback (when MCP is unavailable)": "## CLI fallback (when MCP is unavailable)\n\nIf `cerefox_search` is not in your tool list, your user has likely installed the Cerefox CLI. The canonical invocation is plain **`cerefox <subcommand>`** (the TypeScript CLI, installed via `npm install -g @cerefox/memory`). It uses a resource-verb shape (`cerefox document get`, `cerefox project list`, …). The legacy Python `uv run cerefox` is now a frozen husk as of v0.9 — only `uv run cerefox mcp` still works.\n\nSame operations, same conventions. Full reference: [`docs/guides/cli.md`](docs/guides/cli.md). CLI flag names match MCP parameter names exactly (e.g. `metadata_filter` ↔ `--metadata-filter`); common flags also have single-letter short forms (`-f`, `-p`, `-c`, `-m`, `-u`, `-a`, `-r`). Use the canonical long name (what `--help` shows) or its short form — there are no long-form aliases like `--filter` or `--count`.\n\n| MCP tool | CLI |\n|---|---|\n| `cerefox_search` | `cerefox search \"<q>\" --requestor \"<your-name>\"` |\n| `cerefox_ingest` (paste) | `printf '...' \\| cerefox document ingest --paste --title \"<t>\" --author \"<your-name>\" --author-type agent` |\n| `cerefox_ingest` (update by ID) | `printf '...' \\| cerefox document ingest --paste --title \"<t>\" --document-id \"<uuid>\" --author \"<your-name>\" --author-type agent` |\n| `cerefox_get_document` | `cerefox document get <id> --version-id <vid> --requestor \"<your-name>\"` |\n| `cerefox_list_versions` | `cerefox document version list <id> --requestor \"<your-name>\"` |\n| `cerefox_list_projects` | `cerefox project list --requestor \"<your-name>\"` |\n| `cerefox_list_metadata_keys` | `cerefox metadata keys` |\n| `cerefox_metadata_search` | `cerefox metadata search --metadata-filter '<json>' --requestor \"<your-name>\"` (list a project: `cerefox document list --project <name>`) |\n| `cerefox_set_document_projects` | `cerefox document set-projects <id> <name...> --author \"<your-name>\" --author-type agent` (or `--clear` to remove all) |\n| `cerefox_get_audit_log` | `cerefox audit list --requestor \"<your-name>\"` (add `--json` for scripted access) |\n| `cerefox_get_help` | `cerefox guides show agent-quick-reference` (or `cerefox guides list` for the full bundled-docs index) |\n\n**Set identity on every call**, exactly as you would on MCP:\n- Writes (`document ingest`, `document ingest-dir`): `--author \"<your-name>\" --author-type agent`\n- Reads: `--requestor \"<your-name>\"`\n\nOr have your user set `CEREFOX_AUTHOR_NAME` / `CEREFOX_AUTHOR_TYPE` / `CEREFOX_REQUESTOR_NAME` in their `.env` to apply defaults once.", }; export const HELP_SECTION_HEADINGS: string[] = ["Tools", "Essential Rules", "Update Workflow (ID-based -- preferred)", "Update Workflow (title-based -- fallback)", "Catch-Up Workflow", "CLI fallback (when MCP is unavailable)"]; diff --git a/_shared/mcp-tools/metadata-search.ts b/_shared/mcp-tools/metadata-search.ts index a23ff3d..8fd28a8 100644 --- a/_shared/mcp-tools/metadata-search.ts +++ b/_shared/mcp-tools/metadata-search.ts @@ -24,11 +24,18 @@ async function handler( const include_content = (args.include_content as boolean | undefined) ?? false; const requested_max_bytes = args.max_bytes as number | undefined; - if (!metadata_filter || typeof metadata_filter !== "object" || Array.isArray(metadata_filter)) { - throw new McpInvalidParams("metadata_filter is required and must be a JSON object"); + if (metadata_filter !== undefined && (typeof metadata_filter !== "object" || Array.isArray(metadata_filter))) { + throw new McpInvalidParams("metadata_filter must be a JSON object when provided"); } - if (Object.keys(metadata_filter).length === 0) { - throw new McpInvalidParams("metadata_filter must contain at least one key-value pair"); + const has_metadata = !!metadata_filter && Object.keys(metadata_filter).length > 0; + // metadata_filter is optional, but at least one narrowing criterion is + // required so this can never become an unbounded whole-KB dump. An empty + // filter + project_name lists a project's documents: the RPC's + // `metadata @> '{}'` matches every row and the project predicate narrows it. + if (!has_metadata && !project_name && !updated_since && !created_since) { + throw new McpInvalidParams( + "Provide at least one of: metadata_filter, project_name, updated_since, or created_since.", + ); } // Resolve project name to UUID if provided @@ -45,7 +52,7 @@ async function handler( : null; const params: Record<string, unknown> = { - p_metadata_filter: metadata_filter, + p_metadata_filter: metadata_filter ?? {}, p_project_id: projectId, p_updated_since: updated_since ?? null, p_created_since: created_since ?? null, @@ -78,12 +85,12 @@ async function handler( operation: "metadata_search", accessPath: ctx.accessPath, requestor: args.requestor as string | undefined, - query_text: JSON.stringify(metadata_filter), + query_text: JSON.stringify(metadata_filter ?? {}), project_id: projectId, result_count: rows.length, }); - if (rows.length === 0) return "No documents match the metadata filter."; + if (rows.length === 0) return "No documents match the given criteria."; // Note: when include_content is true the RPC already respects p_max_bytes // server-side. The applyByteBudget helper is retained here only for @@ -114,18 +121,17 @@ async function handler( export const metadataSearchTool: ToolDefinition = { name: "cerefox_metadata_search", description: - "Find documents by metadata key-value criteria without a text search term. Use to discover documents tagged with specific attributes, browse by taxonomy, or retrieve messages/tasks by type and status.", + "Find or list documents by metadata key-value criteria without a text search term. Use to discover documents tagged with specific attributes, browse by taxonomy, retrieve messages/tasks by type and status, or list all documents in a project (pass project_name alone). At least one of metadata_filter, project_name, updated_since, or created_since must be supplied; results are ordered newest-updated first.", inputSchema: { type: "object", - required: ["metadata_filter"], properties: { metadata_filter: { type: "object", description: - 'Key-value pairs; ALL must match (AND semantics). Example: {"type": "decision", "status": "active"}. Call cerefox_list_metadata_keys first to discover available keys.', + 'Key-value pairs; ALL must match (AND semantics). Example: {"type": "decision", "status": "active"}. Call cerefox_list_metadata_keys first to discover available keys. Optional — omit (or pass {}) to list by project_name / time range alone.', additionalProperties: { type: "string" }, }, - project_name: { type: "string", description: "Restrict to a project by name (optional)" }, + project_name: { type: "string", description: "Restrict to a project by name. Sufficient on its own to list that project's documents (optional)." }, updated_since: { type: "string", description: "ISO-8601 timestamp; only docs updated on/after (optional)", diff --git a/_shared/mcp-tools/set-document-projects.ts b/_shared/mcp-tools/set-document-projects.ts index eea5a5e..e5bb9c3 100644 --- a/_shared/mcp-tools/set-document-projects.ts +++ b/_shared/mcp-tools/set-document-projects.ts @@ -11,7 +11,7 @@ import type { MCPSupabaseClient } from "./types.ts"; -import { logUsage } from "./_utils.ts"; +import { replaceDocumentProjects } from "./_projects.ts"; import { McpInvalidParams, type ToolContext, type ToolDefinition } from "./types.ts"; async function handler( @@ -42,80 +42,15 @@ async function handler( throw new McpInvalidParams("project_names must contain only strings."); } - // Strip empties; preserve order; dedup case-insensitively. - const seenLower = new Set<string>(); - const cleanNames: string[] = []; - for (const n of project_names_raw as string[]) { - const stripped = n.trim(); - if (!stripped) continue; - const key = stripped.toLowerCase(); - if (seenLower.has(key)) continue; - seenLower.add(key); - cleanNames.push(stripped); - } - - // Verify the document exists and isn't soft-deleted. - const { data: doc } = await supabase - .from("cerefox_documents") - .select("id, title") - .eq("id", document_id) - .is("deleted_at", null) - .limit(1); - if (!doc?.length) { - throw new Error(`Document not found (or soft-deleted): ${document_id}`); - } - - // Resolve each name → project_id (create if absent). Preserve order. - const projectIds: string[] = []; - for (const name of cleanNames) { - const { data: proj } = await supabase - .from("cerefox_projects") - .select("id") - .ilike("name", name) - .limit(1); - if (proj?.length) { - projectIds.push(proj[0].id); - } else { - const { data: newProj } = await supabase - .from("cerefox_projects") - .insert({ name }) - .select("id"); - if (newProj?.[0]?.id) projectIds.push(newProj[0].id); - } - } - - // DELETE-then-INSERT replace (matches Python assign_document_projects). - await supabase.from("cerefox_document_projects").delete().eq("document_id", document_id); - if (projectIds.length > 0) { - const rows = projectIds.map((pid) => ({ document_id, project_id: pid })); - await supabase.from("cerefox_document_projects").insert(rows); - } - - // Audit entry — project membership is metadata, not content. - try { - await supabase.rpc("cerefox_create_audit_entry", { - p_document_id: document_id, - p_version_id: null, - p_operation: "update-metadata", - p_author: author, - p_author_type: "agent", - p_size_before: null, - p_size_after: null, - p_description: - cleanNames.length > 0 - ? `Set document projects to [${cleanNames.join(", ")}]` - : "Cleared all project memberships", - }); - } catch (err) { - console.warn("set-document-projects: audit entry failed", err); - } - - logUsage(supabase, { - operation: "set-document-projects", + // The clean/dedup, existence check, name→id resolution, DELETE-then-INSERT + // replace, audit entry, and usage log all live in the shared core so the + // `cerefox document set-projects` CLI command behaves identically. + const { cleanNames, projectIds } = await replaceDocumentProjects(supabase, { + documentId: document_id, + projectNames: project_names_raw as string[], + author, + authorType: "agent", accessPath: ctx.accessPath, - requestor: author, - document_id, - result_count: projectIds.length, }); if (cleanNames.length === 0) { diff --git a/docs/guides/access-paths.md b/docs/guides/access-paths.md index 3289560..dd9db4f 100644 --- a/docs/guides/access-paths.md +++ b/docs/guides/access-paths.md @@ -209,7 +209,7 @@ paths. | Tier | Operations | Reversible? | Where exposed | |---|---|---|---| | 1. Reads + soft mutations | search, get, list-*, ingest (create/update), metadata-search, get-audit-log | n/a (reads) / yes (versioned) | All paths — MCP, Edge Functions, CLI, web UI | -| 2. Soft-destructive | `delete_document` (soft delete to trash), `set_review_status` | yes — restorable via web UI | All paths (CLI: `cerefox document delete`; web UI; Python; **not** MCP or Edge Functions today) | +| 2. Soft-destructive | `delete_document` (soft delete to trash), `set_review_status` | yes — restorable via web UI | All paths (CLI: `cerefox document delete`; web UI; **not** MCP or Edge Functions today) | | 3. **Hard-destructive** | `purge_document` (permanent), `restore_document` (un-trash), `set_version_archived` (toggle version retention) | no (purge) / yes (restore, but recovers from a destructive action) | **Web UI only** | ### Why purge / restore are web-UI-only diff --git a/docs/guides/agent-coordination.md b/docs/guides/agent-coordination.md index 47feb87..8b7e0e7 100644 --- a/docs/guides/agent-coordination.md +++ b/docs/guides/agent-coordination.md @@ -53,7 +53,7 @@ A living document where agents record decisions, experiment outcomes, and lesson **Example**: A coding agent working on a project records "Chose PostgreSQL RPC approach over application-level logic because..." in a decision log document. Next week, a different agent working on a related feature searches Cerefox, finds the decision log, and understands the rationale without re-deriving it. -**How it works**: Create a document with a structured format (date, context, decision, outcome). Use a consistent title or project tag so agents can find it. Use `update_if_exists: true` to append new entries. +**How it works**: Create a document with a structured format (date, context, decision, outcome). Use a consistent title or project tag so agents can find it. To add entries over time, re-ingest with `update_if_exists: true` (or `document_id`) — this replaces the document in place, so build the new full content by appending to the prior content you fetched. **Best for**: Project-level institutional memory, avoiding repeated decisions, onboarding new agent sessions. diff --git a/docs/guides/cli.md b/docs/guides/cli.md index 4ca5c2d..c27aabe 100644 --- a/docs/guides/cli.md +++ b/docs/guides/cli.md @@ -207,7 +207,9 @@ cerefox document list [OPTIONS] | `--deleted` | flag | off | List soft-deleted (trashed) documents instead of active ones, newest-deleted first. Pair the ids with `cerefox document restore` / `cerefox document delete`. | | `--json` | flag | off | Machine-readable JSON output. | -**Output**: tabular `id | title | source | status | updated_at` listing (or `deleted_at` with `--deleted`). CLI-only — there is no MCP equivalent. +**Output**: tabular `id | title | source | status | updated_at` listing (or `deleted_at` with `--deleted`). + +**MCP equivalent**: scope-by-project / metadata / time listing maps to [`cerefox_metadata_search`](../../AGENT_GUIDE.md) — e.g. `cerefox_metadata_search(project_name="research")` lists that project's documents (the `metadata_filter` may be empty when another scope is supplied). The `--deleted` (trash) view and unscoped whole-KB listing remain CLI-only. --- @@ -240,6 +242,43 @@ cerefox document edit <doc-id> --set-meta status=archived --unset-meta draft --- +### `cerefox document set-projects` + +**Purpose**: replace a document's project memberships with **exactly** the given set (full-set replace — any project not listed is removed). This is the CLI equivalent of the `cerefox_set_document_projects` MCP tool; both share one membership-replace core, so they behave identically. Content is untouched; the change is logged as an `update-metadata` audit entry. + +**Synopsis**: +``` +cerefox document set-projects [OPTIONS] DOCUMENT_ID [PROJECT_NAMES...] +``` + +**Options**: + +| Flag | Type | Default | Description | +|---|---|---|---| +| `[project-names...]` | variadic args | _none_ | One or more project names. Each is created if missing; order preserved; case-insensitively de-duplicated. | +| `--clear` | flag | off | Remove the document from **all** projects. Mutually exclusive with passing names. | +| `--author <name>` (`-a`) | str | `CEREFOX_AUTHOR_NAME` or `unknown` | Identity recorded in the audit log. | +| `--author-type <type>` | `user`\|`agent` | `user` | Caller type recorded in the audit log. | + +To set memberships **and** update content in one shot, use `cerefox document ingest --document-id <id> --project-name …` instead. Use this command when you only need to change membership. + +**Examples**: +```bash +# Set the document to belong to exactly these two projects (replaces any others) +cerefox document set-projects <doc-id> research archive + +# Remove the document from all projects +cerefox document set-projects <doc-id> --clear +``` + +**Output**: a confirmation line with the document title and the resulting project set (or a "cleared all memberships" line), plus a reminder that the previous set was replaced. + +**Exit codes**: `0` on success; `1` on validation error (no names and no `--clear`, or both) or if the document is missing / soft-deleted. + +**MCP equivalent**: [`cerefox_set_document_projects`](../../AGENT_GUIDE.md). + +--- + ### `cerefox document restore` **Purpose**: restore a soft-deleted (trashed) document back to active. @@ -599,10 +638,59 @@ Every MCP parameter has an exact-name CLI flag (kebab-cased). Short forms exist | `cerefox_get_document(document_id, version_id, requestor)` | `cerefox document get <id> --version-id <vid> --requestor <name>` | | `cerefox_list_versions(document_id, requestor)` | `cerefox document version list <id> --requestor <name>` | | `cerefox_list_projects(requestor)` | `cerefox project list --requestor <name>` | +| `cerefox_set_document_projects(document_id, project_names, author)` | `cerefox document set-projects <id> <name...> --author <a> --author-type <t>` (or `--clear` to remove all) | | `cerefox_list_metadata_keys()` | `cerefox metadata keys` | | `cerefox_metadata_search(metadata_filter, project_name, updated_since, created_since, limit, include_content, requestor)` | `cerefox metadata search --metadata-filter '<json>' --project-name <n> --updated-since <iso> --created-since <iso> --limit N --include-content --requestor <name>` | | `cerefox_get_audit_log(document_id, author, operation, since, until, limit, requestor)` | `cerefox audit list --document-id <id> --author <a> --operation <op> --since <iso> --until <iso> --limit N --requestor <name>` | +## CLI ↔ MCP parity matrix + +The table above is MCP-first (it lists the tools that *have* a CLI form). This +one is **CLI-first** — every `cerefox` command, with its MCP equivalent or an +explicit reason it has none. It exists to make parity gaps visible. Legend: +**✅ mapped** · **⚠️ gap** (a capability one surface has and the other lacks, +arguably worth closing) · **🔒 intentional** (deliberately not on the MCP/agent +surface). + +| CLI command | MCP equivalent | Status | +|---|---|---| +| `document ingest` | `cerefox_ingest` | ✅ | +| `document ingest-dir` | — (agents loop `cerefox_ingest`) | 🔒 bulk filesystem walk; no server-side dir access from MCP | +| `search` | `cerefox_search` | ✅ (CLI adds `--mode`/`--alpha`/`--min-score`/`--only-metadata`) | +| `document get` | `cerefox_get_document` | ✅ | +| `document list` | `cerefox_metadata_search` (scope by `project_name` / metadata / time) | ✅ as of this change. Unscoped whole-KB listing has no MCP path by design (scope it) | +| `document edit` (title / metadata in place) | — | 🔒 intentional: a human/web-parity convenience. Agents update title+metadata deterministically via `cerefox_ingest` (with `document_id`); a metadata-only edit isn't a needed agent primitive | +| `document delete` (soft-delete) | — | 🔒 destructive; trust model keeps delete/restore on CLI + web only | +| `document restore` | — | 🔒 trust model (CLI + web only) | +| `document version list` | `cerefox_list_versions` | ✅ | +| `document version archive` / `unarchive` | — | 🔒 intentional: version-retention protection is exposed only to CLI + web (a maintenance concern, not an agent primitive) | +| `document set-projects` | `cerefox_set_document_projects` | ✅ full-set replace of a document's project memberships (shared core; `--clear` to remove all) | +| `project list` | `cerefox_list_projects` | ✅ | +| `project create` / `edit` / `delete` | — | 🔒 project mutations CLI + web only | +| `metadata keys` | `cerefox_list_metadata_keys` | ✅ | +| `metadata search` | `cerefox_metadata_search` | ✅ | +| `audit list` | `cerefox_get_audit_log` | ✅ | +| `guides list` / `show` / `open` / `ingest` | `cerefox_get_help` (partial) | ✅~ `get_help` returns the bundled quick-reference; `guides` is the richer CLI form | +| `server deploy` / `server reindex` | — | 🔒 operator/deploy surface | +| `config list` / `get` / `set` | — | 🔒 runtime config; operator surface | +| `web` / `mcp` | — | 🔒 lifecycle (`mcp` *is* the MCP server) | +| `init` / `doctor` / `status` / `configure-agent` / `self-update` / `completion` / `backup *` | — | 🔒 install / health / ops | + +**Gap status** (the 🔒 rows are deliberate and out of scope): + +1. `document list` → **closed**: project/metadata/time-scoped listing now routes + through `cerefox_metadata_search` (it accepts an empty `metadata_filter` when + another scope is supplied). +2. `cerefox_set_document_projects` → **closed**: added `cerefox document + set-projects` (full-set replace, `--clear` to remove all), sharing the + membership-replace core with the MCP tool. +3. `document edit` (metadata/title-only edit) → **intentional non-gap**: a + human/web-parity convenience; agents use `cerefox_ingest` for content+metadata + updates. Revisit only if a concrete agent workflow needs metadata-only edits. +4. `document version archive` / `unarchive` → **intentional non-gap**: version-retention + protection is exposed only to CLI + web (a maintenance concern, deliberately not on + the MCP/agent surface). + ## Known issues None outstanding as of v0.1.17 (cerefox#27 — the `cerefox search` NameError — is resolved). When new bugs surface, they are tracked in the GitHub issues list; check there before relying on a behaviour the docs imply. diff --git a/docs/guides/connect-agents.md b/docs/guides/connect-agents.md index 7fb948f..1beaed4 100644 --- a/docs/guides/connect-agents.md +++ b/docs/guides/connect-agents.md @@ -604,7 +604,7 @@ In the action editor, paste this schema (replace `<your-project-ref>`): openapi: 3.1.0 info: title: Cerefox Knowledge Base - version: 1.8.0 + version: 1.9.0 servers: - url: https://<your-project-ref>.supabase.co/functions/v1 paths: @@ -895,15 +895,17 @@ paths: post: operationId: metadataSearch summary: > - Find documents by metadata key-value criteria without a text search term. - Use to discover documents tagged with specific attributes or browse by taxonomy. + Find or list documents by metadata key-value criteria without a text + search term. Use to discover documents tagged with specific attributes, + browse by taxonomy, or list a project's documents (pass project_id alone). + At least one of metadata_filter, project_id, updated_since, or + created_since must be supplied. requestBody: required: true content: application/json: schema: type: object - required: [metadata_filter] properties: metadata_filter: type: object @@ -912,10 +914,14 @@ paths: description: > Key-value pairs; ALL must match (AND semantics). Example: {"type": "decision", "status": "active"}. + Optional — omit (or pass {}) to list by project_id / time + range alone. At least one filter (metadata_filter, project_id, + updated_since, or created_since) is required. project_id: type: string description: > - Filter by project UUID (optional). NOTE: this is the project + Filter by project UUID (optional). Sufficient on its own to + list that project's documents. NOTE: this is the project UUID, not its name — unlike searchKnowledgeBase / ingestNote which take project_name. Get UUIDs from listProjects. updated_since: @@ -1148,6 +1154,7 @@ The agent docs are written around MCP tool names. **CLI flag names match MCP par | `cerefox_get_document` | `cerefox document get <document-id> --version-id <vid> --requestor <name>` | | `cerefox_list_versions` | `cerefox document version list <document-id> --requestor <name>` | | `cerefox_list_projects` | `cerefox project list --requestor <name>` | +| `cerefox_set_document_projects` | `cerefox document set-projects <document-id> <name...> --author <a> --author-type user\|agent` (or `--clear`) | | `cerefox_list_metadata_keys` | `cerefox metadata keys` | | `cerefox_metadata_search` | `cerefox metadata search --metadata-filter '<json>' --project-name <n> --requestor <name>` | | `cerefox_get_audit_log` | `cerefox audit list --document-id <id> --author <a> --operation <op> --since <iso> --until <iso> --limit N --json --requestor <name>` | diff --git a/docs/plan.md b/docs/plan.md index 2085459..73ec9d6 100644 --- a/docs/plan.md +++ b/docs/plan.md @@ -3449,30 +3449,45 @@ in-place supervise-restart) instead of relying on the Docker restart cycle. ## Current Focus -**Status (2026-06-02, `main` at v0.9.10):** the resource-verb CLI shipped; Python is -a husk (`uv run cerefox mcp` only); the entire runtime is TypeScript in -`@cerefox/memory`. Since the v0.9.3 doc-accuracy release, v0.9.4–v0.9.10 patches -landed — notably the web-UI redesign + pagination, the schema-version release gate -(`cut_release.ts` now fails a `db/` change without a lockstep `schema_version` bump), -in-place archived-version viewing, and an installer/`self-update` fix that bypasses -stale package-manager **manifest** caches (`--no-cache` for bun, `--prefer-online` -for npm) so re-installs always resolve the newest published version. - -**Two near-term tracks** (iteration numbers are planning IDs, **not** ship order; -ship order by version: **iter-30 `v0.10.0` → iter-28 `v1.0` → iter-29 `v1.1`**): -1. **Iteration 30 — Local / Self-Hosted Cerefox Backend (D1)**, target **v0.10.0**. - Design of record: [`docs/research/local-cerefox-design.md`](research/local-cerefox-design.md). - On `feat/local-cerefox`: P0 spike, the all-in-one s6 image, the `/rest/v1` proxy, and - the **ghcr multi-arch publish** are ✅ done + validated. **Next (this is the remaining - v0.10.0 work — see "P2 finalized" above): the World-B user workflow** — the - `cerefox-local` host script (lifecycle + KB-proxy via `docker exec`), container - self-generated JWT (drop host minting), `CEREFOX_PROG_NAME` in the bin, simplified - `install-local.sh` as a Release asset, `cut_release.ts` asset upload, and the rewritten - `setup-local.md` (bundled). Then **cut v0.10.0** (publishes npm + ghcr) and test **both** - one-liners on a clean machine. +**Update (2026-06-09, `main` at v0.10.3):** Iteration 30 (Local / Self-Hosted Cerefox, +World B) shipped across v0.10.0–v0.10.2; v0.10.3 fixed the `cerefox server deploy` Edge +Function bundler (`--use-api`, issue #84). Two active branches: +- **`feat/local-embedder`** — design for a local ONNX embedder (fully-offline World B), + target **v0.11.0**. Design committed; implementation pending review. +- **`feat/mcp-list-documents`** — closes the CLI↔MCP parity gap where + `cerefox document list --project` had no MCP form. `cerefox_metadata_search` now accepts + an empty `metadata_filter` when another scope (`project_name` / time) is supplied, so it + lists a project's documents. Handler + EF twin + GPT Actions OpenAPI (v1.9.0) relaxed in + lockstep; **no `schema_version` bump** (the RPC's `metadata @> '{}'` already match-alls, + so no `rpcs.sql` change). Added a CLI↔MCP parity matrix to `docs/guides/cli.md` that + surfaced two further gaps: `cerefox_set_document_projects` had no CLI verb (**now + closed** — added `cerefox document set-projects`, sharing a `replaceDocumentProjects` + core with the MCP tool), and metadata-only `document edit` has no MCP tool (left as an + intentional non-gap — a human/web convenience; agents use `cerefox_ingest`). + +**Baseline:** the resource-verb CLI shipped; Python is a husk (`uv run cerefox mcp` +only); the entire runtime is TypeScript in `@cerefox/memory`. **Iteration 30 — +Local / Self-Hosted Cerefox Backend (World B) is ✅ DONE and shipped** across +v0.10.0–v0.10.2: the all-in-one s6 image, the `/rest/v1` proxy, ghcr multi-arch +publish, the `cerefox-local` host script (lifecycle + KB-proxy via `docker exec`), +container self-generated JWT, `install-local.sh` as a Release asset, completion + +`configure-agent` for both bins, port auto-selection, and the World-B guide rewrite +all landed and were validated. v0.10.3 then fixed the `cerefox server deploy` EF +bundler (`--use-api`, issue #84). Design of record: +[`docs/research/local-cerefox-design.md`](research/local-cerefox-design.md). + +**Near-term tracks** (iteration numbers are planning IDs, not ship order): +1. **Iteration 31 — Local ONNX embedder** (fully-offline World B), target **v0.11.0**, + on `feat/local-embedder`. **This is the only remaining Iteration-30-family build.** + Design committed; P0 implementation pending review. See iter-31 in the log above. 2. **Iteration 28 — v1.0**, the stability commitment (strict SemVer becomes binding) - + security audit. Trigger: ~2–3 months of v0.9 in the wild + an outside user + + security audit. Trigger: ~2–3 months of v0.10 in the wild + an outside user installing unaided. +3. **Iteration 29 — Document Relations & Semantic Graph** (post-v1.0, target **v1.1+**), + pending — design only. Design of record: + [`docs/research/document-relations-and-semantic-graph.md`](research/document-relations-and-semantic-graph.md). + (The early semantic-graph exploration branch was already merged to main; + implementation is future work.) Release history lives in [`CHANGELOG.md`](../CHANGELOG.md); the design-of-record for the polish arc is [`docs/specs/polish-and-distribution-design.md`](specs/polish-and-distribution-design.md). diff --git a/docs/requirements-and-specs.md b/docs/requirements-and-specs.md index cefcd8a..a9a66ca 100644 --- a/docs/requirements-and-specs.md +++ b/docs/requirements-and-specs.md @@ -63,7 +63,7 @@ Projects and categories are created, renamed, and deleted by the user at any tim | FR-1.1 | Ingest Markdown files (.md) | P0 | | FR-1.2 | Ingest pasted text (treated as markdown) | P0 | | FR-1.3 | Convert PDF to markdown before ingestion (dropped in v0.7; Markdown/.txt only) | P1 | -| FR-1.4 | Convert DOCX to markdown before ingestion (dropped in v0.7; Markdown/.txt only) | P1 | +| FR-1.4 | Convert DOCX to markdown before ingestion (supported, beta — converted via mammoth on ingest) | P1 | | FR-1.5 | Deduplicate content by hash (skip re-ingestion of identical files) | P0 | | FR-1.6 | Associate ingested content with a project | P0 | | FR-1.7 | Attach metadata (tags, importance, custom fields) on ingest | P0 | @@ -433,6 +433,6 @@ All parameters use `CEREFOX_` prefix and can be set via environment variables or | `CEREFOX_BACKUP_DIR` | `./backups` | Directory for file system backups | | `CEREFOX_VECTOR_DIMENSIONS` | `768` | Embedding vector dimensions | | `CEREFOX_LOG_LEVEL` | `INFO` | Logging level | -| `CEREFOX_SMALL_TO_BIG_THRESHOLD` | `40000` | Doc size (chars) above which search returns chunks + neighbors instead of full document | +| `CEREFOX_SMALL_TO_BIG_THRESHOLD` | `20000` | Doc size (chars) above which search returns chunks + neighbors instead of full document | | `CEREFOX_CONTEXT_WINDOW` | `1` | Number of neighbor chunks on each side of matched chunks in small-to-big retrieval | | `CEREFOX_VERSION_RETENTION_HOURS` | `48` | Hours to retain all document versions before lazy cleanup | diff --git a/docs/research/cli-web-parity-audit.md b/docs/research/cli-web-parity-audit.md index 0f4f586..780a2c8 100644 --- a/docs/research/cli-web-parity-audit.md +++ b/docs/research/cli-web-parity-audit.md @@ -1,5 +1,13 @@ # CLI ↔ Web parity audit (iter-27 / v0.9.0) +> **Historical snapshot (v0.9.0).** Parity as of the v0.9.0 CLI rename. The gaps +> deferred below have since **shipped**: `cerefox document edit`, `cerefox document +> restore`, and `cerefox project create/edit/delete` (v0.9.1), and later `cerefox +> document set-projects` (CLI parity with the `cerefox_set_document_projects` MCP +> tool); `cerefox_metadata_search` can now also list a project's documents. Treat +> the tables below as a point-in-time record — the live parity reference is the +> CLI↔MCP matrix in [`docs/guides/cli.md`](../guides/cli.md). + Produced for the v0.9.0 rename-only CLI redesign. Maps every web-UI surface to its CLI equivalent and vice-versa, and records the gaps. Conclusion up front: **the only real web→CLI gaps map to the new commands already deferred to diff --git a/docs/solution-design.md b/docs/solution-design.md index 44d920b..6c3fb5f 100644 --- a/docs/solution-design.md +++ b/docs/solution-design.md @@ -436,7 +436,7 @@ Returns: list of version rows — `id`, `version_number`, `total_chars`, `chunk_ **REST API**: `GET /api/v1/documents/{id}` and `GET /api/v1/documents/{id}/versions` — same semantics as the MCP tools, served by the TS web server (`cerefox web`) for the web UI and scripting. -**CLI** (resource-verb shape, v0.9.0+): `cerefox document get <id>` (current content) and `cerefox document get <id> --version <version-id>` (specific version). `cerefox version list <id>` lists version history. +**CLI** (resource-verb shape, v0.9.0+): `cerefox document get <id>` (current content) and `cerefox document get <id> --version <version-id>` (specific version). `cerefox document version list <id>` lists version history. ### 5.4 Response Size Management diff --git a/frontend/README.md b/frontend/README.md index 7dbf7eb..40713c3 100644 --- a/frontend/README.md +++ b/frontend/README.md @@ -1,73 +1,44 @@ -# React + TypeScript + Vite +# Cerefox Web UI (frontend) -This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules. +The Cerefox web UI — a React + TypeScript single-page app (Mantine UI, TanStack +Query, Vite), served at `/app/` by the TypeScript web server (`cerefox web`). -Currently, two official plugins are available: +## Develop -- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Oxc](https://oxc.rs) -- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) - -## React Compiler +```bash +bun install +bun run dev # Vite dev server with HMR (point it at a running `cerefox web` backend) +``` -The React Compiler is not enabled on this template because of its impact on dev & build performances. To add it, see [this documentation](https://react.dev/learn/react-compiler/installation). +Use the Vite dev server for iterative UI work — a from-source `cerefox web` reads +`index.html` into memory at startup, so it serves stale hashed assets until +restarted after a rebuild. -## Expanding the ESLint configuration +## Build -If you are developing a production application, we recommend updating the configuration to enable type-aware lint rules: +```bash +bun run build # tsc -b && vite build → production SPA bundle (base: /app/) +``` -```js -export default defineConfig([ - globalIgnores(['dist']), - { - files: ['**/*.{ts,tsx}'], - extends: [ - // Other configs... +Installed users never build this: the bundle ships inside the `@cerefox/memory` +npm package and is served by `cerefox web`. Rebuild only when changing the +frontend from source. - // Remove tseslint.configs.recommended and replace with this - tseslint.configs.recommendedTypeChecked, - // Alternatively, use this for stricter rules - tseslint.configs.strictTypeChecked, - // Optionally, add this for stylistic rules - tseslint.configs.stylisticTypeChecked, +## Lint & test - // Other configs... - ], - languageOptions: { - parserOptions: { - project: ['./tsconfig.node.json', './tsconfig.app.json'], - tsconfigRootDir: import.meta.dirname, - }, - // other options... - }, - }, -]) +```bash +bun run lint # eslint +bunx playwright install chromium +bun run test:e2e # Playwright browser tests against a local `cerefox web` ``` -You can also install [eslint-plugin-react-x](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-x) and [eslint-plugin-react-dom](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-dom) for React-specific lint rules: +## Layout -```js -// eslint.config.js -import reactX from 'eslint-plugin-react-x' -import reactDom from 'eslint-plugin-react-dom' +- `src/pages/` — route pages (Search, Document, Ingest, Projects, Metadata + Search, Audit Log, Analytics, Dashboard, Help, Trash). +- `src/components/` — shared UI (charts, banners, layout). +- `src/api/` — typed client for the `cerefox web` JSON API. +- `vite.config.ts` — build config (`base: /app/`). -export default defineConfig([ - globalIgnores(['dist']), - { - files: ['**/*.{ts,tsx}'], - extends: [ - // Other configs... - // Enable lint rules for React - reactX.configs['recommended-typescript'], - // Enable lint rules for React DOM - reactDom.configs.recommended, - ], - languageOptions: { - parserOptions: { - project: ['./tsconfig.node.json', './tsconfig.app.json'], - tsconfigRootDir: import.meta.dirname, - }, - // other options... - }, - }, -]) -``` +Design history: [`docs/specs/ui-redesign-spa-python-api.md`](../docs/specs/ui-redesign-spa-python-api.md). +The serving command is documented in [`docs/guides/cli.md`](../docs/guides/cli.md) (`cerefox web`). diff --git a/packages/memory/src/cli/commands/document-set-projects.ts b/packages/memory/src/cli/commands/document-set-projects.ts new file mode 100644 index 0000000..73ea29e --- /dev/null +++ b/packages/memory/src/cli/commands/document-set-projects.ts @@ -0,0 +1,97 @@ +/** + * `cerefox document set-projects <document-id> [project-names...]` — replace a + * document's project memberships with EXACTLY the given set (full-set replace, + * matching the `cerefox_set_document_projects` MCP tool). `--clear` removes all + * memberships. Writes an `update-metadata` audit entry; document content is + * untouched. + * + * Closes the CLI↔MCP parity gap: the MCP tool had no CLI equivalent. The + * membership-replace logic is shared with the MCP tool via + * `_shared/mcp-tools/_projects.ts → replaceDocumentProjects`. + */ + +import type { Command } from "commander"; + +import { + c, + println, + resolveAuthor, + resolveAuthorType, + userError, + warn, +} from "../../../../../_shared/cli-core/index.ts"; +import { replaceDocumentProjects } from "../../../../../_shared/mcp-tools/_projects.ts"; +import type { MCPSupabaseClient } from "../../../../../_shared/mcp-tools/types.ts"; +import { getClient } from "../util/client.ts"; + +interface SetProjectsOptions { + clear?: boolean; + author?: string; + authorType?: string; +} + +async function action( + documentId: string, + projectNames: string[], + options: SetProjectsOptions, +): Promise<void> { + const names = projectNames ?? []; + + if (options.clear && names.length > 0) { + throw userError( + "Pass either project names or --clear, not both.", + "Use --clear on its own to remove the document from all projects.", + ); + } + if (!options.clear && names.length === 0) { + throw userError( + "No project names given.", + "Pass one or more project names, or --clear to remove all memberships.", + ); + } + + const author = resolveAuthor(options.author); + const authorType = resolveAuthorType(options.authorType); + if (author === "unknown") { + warn("No --author / CEREFOX_AUTHOR_NAME set — audit log will record this as 'unknown'."); + } + + const client = getClient(); + const { documentTitle, cleanNames } = await replaceDocumentProjects( + client.raw as unknown as MCPSupabaseClient, + { + documentId, + projectNames: names, + author, + authorType, + accessPath: "cli", + }, + ); + + if (cleanNames.length === 0) { + println(c.green(`✓ Cleared all project memberships for "${documentTitle}" (id: ${documentId}).`)); + return; + } + println( + c.green(`✓ Set ${cleanNames.length} project membership(s) for "${documentTitle}" (id: ${documentId}).`), + ); + println(c.dim(` Projects: ${cleanNames.join(", ")}`)); + println(c.dim(" This REPLACED the previous set — any project not listed is no longer associated.")); +} + +export function registerDocumentSetProjects(parent: Command): void { + parent + .command("set-projects") + .description( + "Replace a document's project memberships with exactly the given set (or --clear to remove all).", + ) + .argument("<document-id>", "UUID of the document.") + .argument( + "[project-names...]", + "Project names to set (created if missing). Omit and pass --clear to remove all.", + ) + .option("--clear", "Remove the document from all projects.") + .option("-a, --author <name>", "Caller identity (audit log).") + .option("--author-type <type>", "'user' or 'agent' (default: user).", "user") + .action(action); +} diff --git a/packages/memory/src/cli/program.ts b/packages/memory/src/cli/program.ts index 4d0cb32..c6f4cdf 100644 --- a/packages/memory/src/cli/program.ts +++ b/packages/memory/src/cli/program.ts @@ -33,6 +33,7 @@ import { registerDeleteProject } from "./commands/delete-project.ts"; import { registerDeployServer } from "./commands/deploy-server.ts"; import { registerDocumentEdit } from "./commands/document-edit.ts"; import { registerDocumentRestore } from "./commands/document-restore.ts"; +import { registerDocumentSetProjects } from "./commands/document-set-projects.ts"; import { registerGuides } from "./commands/guides.ts"; import { registerProjectCreate } from "./commands/project-create.ts"; import { registerProjectEdit } from "./commands/project-edit.ts"; @@ -179,6 +180,7 @@ export function buildProgram(): Command { moveInto(document, registerDeleteDoc, "delete"); registerDocumentRestore(document); // v0.9.0: new command (no old flat verb) registerDocumentEdit(document); // v0.9.1: non-destructive title/metadata patch + registerDocumentSetProjects(document); // CLI parity with cerefox_set_document_projects moveInto(document, registerIngest, "ingest"); moveInto(document, registerIngestDir, "ingest-dir"); diff --git a/supabase/functions/cerefox-metadata-search/index.ts b/supabase/functions/cerefox-metadata-search/index.ts index 3fe7d90..7aa6176 100644 --- a/supabase/functions/cerefox-metadata-search/index.ts +++ b/supabase/functions/cerefox-metadata-search/index.ts @@ -15,8 +15,11 @@ import { isVersionRequest, versionResponse } from "../../../_shared/ef-meta/inde * Note: cerefox-mcp calls the RPC directly (not this Edge Function). * * Request body (JSON): - * metadata_filter object required Key-value pairs (AND semantics) + * metadata_filter object optional Key-value pairs (AND semantics) * project_id string optional Project UUID filter + * + * At least one of metadata_filter / project_id / updated_since / created_since + * must be supplied (an empty filter + project_id lists that project's docs). * updated_since string optional ISO-8601 lower bound for updated_at * created_since string optional ISO-8601 lower bound for created_at * limit number optional Max results (default: 10) @@ -53,13 +56,12 @@ Deno.serve(async (req: Request): Promise<Response> => { const metadata_filter = body.metadata_filter; if ( - !metadata_filter || - typeof metadata_filter !== "object" || - Array.isArray(metadata_filter) || - Object.keys(metadata_filter).length === 0 + metadata_filter !== undefined && + metadata_filter !== null && + (typeof metadata_filter !== "object" || Array.isArray(metadata_filter)) ) { return new Response( - JSON.stringify({ error: "metadata_filter is required and must be a non-empty JSON object" }), + JSON.stringify({ error: "metadata_filter must be a JSON object when provided" }), { status: 400, headers: { ...CORS_HEADERS, "Content-Type": "application/json" } }, ); } @@ -67,6 +69,22 @@ Deno.serve(async (req: Request): Promise<Response> => { const project_id = body.project_id ?? null; const updated_since = body.updated_since ?? null; const created_since = body.created_since ?? null; + + // metadata_filter is optional, but at least one narrowing criterion is + // required so this never becomes an unbounded whole-KB dump. An empty + // filter + project_id lists a project's documents (the RPC's + // `metadata @> '{}'` matches every row; the project predicate narrows it). + const has_metadata = + metadata_filter && typeof metadata_filter === "object" && + Object.keys(metadata_filter).length > 0; + if (!has_metadata && !project_id && !updated_since && !created_since) { + return new Response( + JSON.stringify({ + error: "Provide at least one of: metadata_filter, project_id, updated_since, or created_since.", + }), + { status: 400, headers: { ...CORS_HEADERS, "Content-Type": "application/json" } }, + ); + } const limit = body.limit ?? 10; const include_content = body.include_content ?? false; const requested_max_bytes = body.max_bytes; @@ -102,7 +120,7 @@ Deno.serve(async (req: Request): Promise<Response> => { } const params: Record<string, unknown> = { - p_metadata_filter: metadata_filter, + p_metadata_filter: has_metadata ? metadata_filter : {}, p_project_id: project_id, p_updated_since: updated_since, p_created_since: created_since,