From cd6b120c931a384c2159a402c15956cf5c9c77ad Mon Sep 17 00:00:00 2001 From: Seth Voltz Date: Thu, 25 Jun 2026 18:56:21 -0700 Subject: [PATCH] refactor(evolve): collapse three scanners behind one runScanner core + collectFn seam MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the three hand-copied scanner pipelines (friction / preferences / dreaming) with one deep `runScanner` core fed by per-scanner `Taxonomy` adapters. The collect → batch → score → bucket scaffolding now lives once; each scanner contributes only its payload projection, default LLM scorer, and (unchanged) bucketing rule. - New `collect.ts`: extracted OrchestratorTurn + collectOrchestratorTurns + dbTurnIdToLine to a neutral module (breaks the run-scanner↔scan-friction cycle). - New `run-scanner.ts`: runScanner + Taxonomy + RunScannerOptions + SCAN_BATCH_SIZE, plus de-duplicated truncate/clamp/severityFor/severityRank. Adds a second injectable seam, collectFn (defaults to collectOrchestratorTurns), so the whole pipeline is testable without a DB. The core only touches the turn_id join key on T (ScoredRow constraint); all domain variation stays in bucket. - scan-*.ts: each is now a Taxonomy + a thin `scanX = (opts) => runScanner(...)` wrapper. Public names/signatures unchanged. Dreaming's richer bucketing (payload-in-first-pointer, separate note cap, recall→severity bump, promote/ reinforce verb) preserved whole; DreamEvidence threaded as the opaque ctx. - New run-scanner.test.ts: end-to-end pipeline coverage that the collectFn seam unlocks (cross-session swap, default+override batching, cross-batch merge, log-and-continue, per-category caps, dreaming reinforce-merge + payload round-trip). Pure internal refactor: no Signal shape change, no propose.ts/rank.ts change. Behavior verified byte-identical across all three buckets, prompts, and collect. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01SeavKpZNAxkrokvKhQF8Ai --- packages/evolve/src/collect.ts | 150 ++++++++++ packages/evolve/src/index.ts | 2 + packages/evolve/src/run-scanner.test.ts | 300 +++++++++++++++++++ packages/evolve/src/run-scanner.ts | 148 +++++++++ packages/evolve/src/scan-dreaming.test.ts | 2 +- packages/evolve/src/scan-dreaming.ts | 109 +++---- packages/evolve/src/scan-friction.test.ts | 2 +- packages/evolve/src/scan-friction.ts | 237 ++------------- packages/evolve/src/scan-preferences.test.ts | 2 +- packages/evolve/src/scan-preferences.ts | 102 ++----- 10 files changed, 700 insertions(+), 354 deletions(-) create mode 100644 packages/evolve/src/collect.ts create mode 100644 packages/evolve/src/run-scanner.test.ts create mode 100644 packages/evolve/src/run-scanner.ts diff --git a/packages/evolve/src/collect.ts b/packages/evolve/src/collect.ts new file mode 100644 index 00000000..af762dab --- /dev/null +++ b/packages/evolve/src/collect.ts @@ -0,0 +1,150 @@ +/** + * Orchestrator-transcript collection — the shared stage-A input for every + * scanner (friction / preferences / dreaming). Extracted from scan-friction.ts + * so the deep `runScanner` core (run-scanner.ts) can depend on it without a + * circular import back through any individual scanner, and so the three + * scanners draw `OrchestratorTurn` / `dbTurnIdToLine` from a neutral home. + * + * The DB coupling lives here and nowhere else: `runScanner` accepts a + * `collectFn` seam that defaults to `collectOrchestratorTurns`, so the full + * pipeline is drivable with canned turns and no Postgres in tests. + */ + +import { eq, inArray } from "drizzle-orm"; +import { getDb, schema } from "@friday/shared"; + +export interface OrchestratorTurn { + sessionId: string; + /** Source JSONL file (recorded on the turns row). */ + filePath: string; + /** Synthetic id for matching back from LLM scoring output. */ + turnId: string; + /** ISO timestamp. */ + ts: string; + userText: string; + prevAssistantText: string; + /** DB row id. Phase 4.11 flipped `blocks.id` to text (UUID), so + * this can be a UUID for newer rows or a bigserial-shaped + * numeric string for legacy rows. We parse it as a number for + * the EvidencePointer's `line` field (falls through to omitted + * when NaN). */ + dbTurnId: string; +} + +export function dbTurnIdToLine(id: string): number | undefined { + const n = Number(id); + return Number.isFinite(n) ? n : undefined; +} + +/** + * Resolve "which sessions belong to the orchestrator" by: + * 1. Selecting all agents of type=orchestrator from the registry. + * 2. Including their currently-attached sessionId, plus + * 3. Every distinct sessionId in the `turns` table that's tagged with one + * of those agent names (catches historical sessions across resumes). + */ +async function collectOrchestratorSessions(): Promise> { + const out = new Set(); + const db = getDb(); + + const orchAgents = await db + .select() + .from(schema.agents) + .where(eq(schema.agents.type, "orchestrator")); + if (orchAgents.length === 0) return out; + + for (const a of orchAgents) { + if (a.sessionId) out.add(a.sessionId); + } + + // Historic session enumeration via the `blocks` table — the legacy + // `turns` table is retired per ADR-016. Distinct session_id values + // for any orchestrator-named agent's blocks. + const orchNames = orchAgents.map((a) => a.name); + const historicSessions = await db + .selectDistinct({ sessionId: schema.blocks.sessionId }) + .from(schema.blocks) + .where(inArray(schema.blocks.agentName, orchNames)); + for (const t of historicSessions) { + if (t.sessionId) out.add(t.sessionId); + } + + return out; +} + +/** + * Walk the `turns` table for orchestrator sessions, return user turns paired + * with the immediately preceding assistant text. Skips pure tool_result + * echoes and strips `` auto-injection blocks. + * + * Capped at `maxTurns`. Older sessions go first if we hit the cap. + */ +export async function collectOrchestratorTurns( + sinceMs: number, + maxTurns: number, +): Promise { + const sessionIds = await collectOrchestratorSessions(); + if (sessionIds.size === 0) return []; + + // Ported to the `blocks` table per ADR-016 + ADR-023. Each block row is + // already a single semantic unit (text / thinking / tool_use / tool_result + // / user / mail); we no longer parse a JSONL-style `content_json` envelope + // with `type=user|assistant`. The friction scorer wants pairs of + // (user-typed text, immediately-preceding assistant text), so we walk + // blocks in ts order, accumulate the latest assistant text per session, + // and emit a turn whenever we see a user-role text/user block. + const db = getDb(); + const rows = await db + .select() + .from(schema.blocks) + .where(inArray(schema.blocks.sessionId, [...sessionIds])); + // Sort ts ascending so older sessions get scored first when capped. + rows.sort((a, b) => a.ts.getTime() - b.ts.getTime()); + + const out: OrchestratorTurn[] = []; + const prevAssistantBySession = new Map(); + + for (const r of rows) { + const rTsMs = r.ts.getTime(); + if (sinceMs && rTsMs < sinceMs) continue; + if (out.length >= maxTurns) break; + + // contentJson is jsonb; Drizzle returns it as the parsed object. Block + // payloads are shaped per-kind; we only need the `text` field for the + // text + user kinds. + const content = r.contentJson as { text?: string }; + + if (r.role === "assistant" && r.kind === "text") { + const txt = typeof content?.text === "string" ? content.text : ""; + if (txt) prevAssistantBySession.set(r.sessionId, txt); + continue; + } + + // User-typed blocks (chat input, scratch seed, agent_spawn, schedule + // task prompt). Skip mail-delivered user blocks — those aren't the + // user's free-text friction signal. + if (r.role !== "user" || r.kind !== "text") continue; + if (r.source === "mail") continue; + + const userText = typeof content?.text === "string" ? content.text : ""; + if (!userText.trim()) continue; + const cleaned = stripMemoryContext(userText).trim(); + if (!cleaned) continue; + + out.push({ + sessionId: r.sessionId, + filePath: "", + turnId: r.turnId, + ts: r.ts.toISOString(), + userText: cleaned, + prevAssistantText: prevAssistantBySession.get(r.sessionId) ?? "", + dbTurnId: r.id, + }); + } + + return out; +} + +function stripMemoryContext(text: string): string { + return text.replace(/[\s\S]*?<\/memory-context>\s*/g, ""); +} diff --git a/packages/evolve/src/index.ts b/packages/evolve/src/index.ts index 2e6b15ec..b4d6b938 100644 --- a/packages/evolve/src/index.ts +++ b/packages/evolve/src/index.ts @@ -18,6 +18,8 @@ export * from "./types.js"; export * from "./store.js"; export * from "./scan.js"; export * from "./scan-agent-depth.js"; +export * from "./collect.js"; +export * from "./run-scanner.js"; export * from "./scan-friction.js"; export * from "./scan-preferences.js"; export * from "./rank.js"; diff --git a/packages/evolve/src/run-scanner.test.ts b/packages/evolve/src/run-scanner.test.ts new file mode 100644 index 00000000..53dec228 --- /dev/null +++ b/packages/evolve/src/run-scanner.test.ts @@ -0,0 +1,300 @@ +/** + * End-to-end pipeline tests for the deep `runScanner` core, driven through the + * three public entry points (scanFriction / scanPreferences / scanDreaming) + * with BOTH seams injected — a fake `collectFn` (canned turns, no DB) and a + * fake `scoreFn` (canned scores, no LLM). This is the coverage the refactor + * unlocked: before the `collectFn` seam existed, the collect → batch → score → + * bucket wiring had ZERO tests (only the isolated bucket functions were + * exercised). The co-located bucket tests still pin the bucketing rules + * directly; these pin the wiring around them. + */ + +import { describe, expect, it, vi } from "vitest"; +import { scanFriction, type ScoredTurn, type TurnForScoring } from "./scan-friction.js"; +import { scanPreferences, type PreferenceScoredTurn } from "./scan-preferences.js"; +import { + scanDreaming, + decodeDreamPayload, + type DreamScoredCandidate, + type DreamEvidence, +} from "./scan-dreaming.js"; +import { SCAN_BATCH_SIZE } from "./run-scanner.js"; +import type { OrchestratorTurn } from "./collect.js"; + +function oturn(i: number, overrides: Partial = {}): OrchestratorTurn { + return { + sessionId: overrides.sessionId ?? "sess-a", + filePath: overrides.filePath ?? "/tmp/sess-a.jsonl", + turnId: `t-${i}`, + ts: overrides.ts ?? `2026-05-01T00:0${i}:00.000Z`, + userText: overrides.userText ?? `user text ${i}`, + prevAssistantText: overrides.prevAssistantText ?? `assistant text ${i}`, + dbTurnId: overrides.dbTurnId ?? `${100 + i}`, + ...overrides, + }; +} + +/** A collectFn seam that returns a fixed set of turns regardless of args. */ +function fixedCollect(turns: OrchestratorTurn[]) { + return async (_since: number, _max: number) => turns; +} + +describe("runScanner — friction pipeline (collect → batch → score → bucket)", () => { + it("returns [] without scoring when collect yields no turns", async () => { + const scoreFn = vi.fn(); + const out = await scanFriction({ collectFn: fixedCollect([]), scoreFn, model: "m" }); + expect(out).toEqual([]); + expect(scoreFn).not.toHaveBeenCalled(); + }); + + it("projects each turn through buildPayload and passes opts.model to scoreFn", async () => { + const turns = [oturn(1, { userText: "x".repeat(900), prevAssistantText: "y".repeat(500) })]; + const scoreFn = vi.fn( + async (batch: TurnForScoring[], _model: string): Promise => + batch.map((b) => ({ + turn_id: b.turn_id, + friction_score: 3, + category: "correction", + reason: "", + })), + ); + + await scanFriction({ collectFn: fixedCollect(turns), scoreFn, model: "haiku-test" }); + + expect(scoreFn).toHaveBeenCalledTimes(1); + const [batch, model] = scoreFn.mock.calls[0]; + expect(model).toBe("haiku-test"); + // buildPayload shape + truncation (user_text cap 800, prev_assistant cap 400). + expect(batch[0]).toMatchObject({ turn_id: "t-1" }); + expect(batch[0].user_text.length).toBe(800); + expect(batch[0].prev_assistant_text.length).toBe(400); + }); + + it("drives the cross-session-diversity swap through the real entry point", async () => { + // 5 sess-a turns fill the 5-cap, then 1 sess-b turn forces the swap so the + // enricher sees evidence from >1 session. Reachable ONLY via the pipeline. + const turns = [ + ...Array.from({ length: 5 }, (_, i) => oturn(i + 1, { sessionId: "sess-a" })), + oturn(6, { sessionId: "sess-b" }), + ]; + const scoreFn = async (batch: { turn_id: string }[]): Promise => + batch.map((b) => ({ + turn_id: b.turn_id, + friction_score: 3, + category: "doubt", + reason: "", + })); + + const out = await scanFriction({ collectFn: fixedCollect(turns), scoreFn, model: "m" }); + + expect(out).toHaveLength(1); + expect(out[0].key).toBe("friction_doubt"); + expect(out[0].count).toBe(6); + expect(out[0].evidencePointers).toHaveLength(5); + const sessions = out[0].evidencePointers.map((p) => p.sessionId); + expect(sessions).toContain("sess-a"); + expect(sessions).toContain("sess-b"); + }); + + it("splits into batches of the default SCAN_BATCH_SIZE (30) when no override is given", async () => { + const turns = Array.from({ length: 65 }, (_, i) => oturn(i + 1)); + const scoreFn = vi.fn( + async (batch: { turn_id: string }[]): Promise => + batch.map((b) => ({ turn_id: b.turn_id, friction_score: 0, category: "none", reason: "" })), + ); + + await scanFriction({ collectFn: fixedCollect(turns), scoreFn, model: "m" }); + + // 65 turns / default batch 30 → 3 calls of sizes 30, 30, 5. + expect(SCAN_BATCH_SIZE).toBe(30); + expect(scoreFn.mock.calls.map((c) => c[0].length)).toEqual([30, 30, 5]); + }); + + it("honors a per-call batchSize override", async () => { + const turns = Array.from({ length: 7 }, (_, i) => oturn(i + 1)); + const scoreFn = vi.fn( + async (batch: { turn_id: string }[]): Promise => + batch.map((b) => ({ turn_id: b.turn_id, friction_score: 0, category: "none", reason: "" })), + ); + + await scanFriction({ collectFn: fixedCollect(turns), scoreFn, model: "m", batchSize: 3 }); + + // 7 turns / batch 3 → 3 calls of sizes 3, 3, 1. + expect(scoreFn.mock.calls.map((c) => c[0].length)).toEqual([3, 3, 1]); + }); + + it("accumulates same-category turns across multiple batches into one merged signal", async () => { + // batchSize:1 forces three separate surviving batches; the core accumulates + // into a single `scored` array and buckets ONCE after the loop, so they must + // merge into one signal with count 3. Catches any per-batch re-init of + // `scored` or per-batch bucketing regression. + const turns = Array.from({ length: 3 }, (_, i) => oturn(i + 1, { sessionId: `s${i}` })); + const scoreFn = async (batch: { turn_id: string }[]): Promise => + batch.map((b) => ({ turn_id: b.turn_id, friction_score: 3, category: "doubt", reason: "" })); + + const out = await scanFriction({ + collectFn: fixedCollect(turns), + scoreFn, + model: "m", + batchSize: 1, + }); + + expect(out).toHaveLength(1); + expect(out[0].key).toBe("friction_doubt"); + expect(out[0].count).toBe(3); + }); + + it("logs and continues when one batch throws, scoring the rest", async () => { + const errSpy = vi.spyOn(console, "error").mockImplementation(() => {}); + const turns = Array.from({ length: 4 }, (_, i) => oturn(i + 1, { sessionId: `s${i}` })); + let call = 0; + const scoreFn = async (batch: { turn_id: string }[]): Promise => { + call++; + if (call === 1) throw new Error("batch boom"); + return batch.map((b) => ({ + turn_id: b.turn_id, + friction_score: 4, + category: "frustration", + reason: "", + })); + }; + + const out = await scanFriction({ + collectFn: fixedCollect(turns), + scoreFn, + model: "m", + batchSize: 2, + }); + + // First batch (turns 1-2) threw → dropped; second batch (turns 3-4) survived. + expect(out).toHaveLength(1); + expect(out[0].count).toBe(2); + expect(errSpy).toHaveBeenCalledTimes(1); + expect(String(errSpy.mock.calls[0][0])).toContain("friction scoring batch"); + errSpy.mockRestore(); + }); +}); + +describe("runScanner — preferences pipeline", () => { + it("emits one signal per category with the 3-cap and severity escalation", async () => { + const turns = [ + oturn(1, { sessionId: "s1" }), + oturn(2, { sessionId: "s2" }), + oturn(3, { sessionId: "s3" }), + oturn(4, { sessionId: "s4" }), + oturn(5, { sessionId: "s5" }), + ]; + // 4 tooling turns (escalating 2→5) + 1 directive turn. + const scores: Record = { + "t-1": { turn_id: "t-1", signal_score: 2, category: "preference_tooling", reason: "" }, + "t-2": { turn_id: "t-2", signal_score: 3, category: "preference_tooling", reason: "" }, + "t-3": { turn_id: "t-3", signal_score: 4, category: "preference_tooling", reason: "" }, + "t-4": { turn_id: "t-4", signal_score: 5, category: "preference_tooling", reason: "" }, + "t-5": { turn_id: "t-5", signal_score: 3, category: "directive", reason: "" }, + }; + const scoreFn = async (batch: { turn_id: string }[]) => batch.map((b) => scores[b.turn_id]); + + const out = await scanPreferences({ collectFn: fixedCollect(turns), scoreFn, model: "m" }); + + const tooling = out.find((s) => s.key === "preference_tooling"); + const directive = out.find((s) => s.key === "directive"); + expect(out).toHaveLength(2); + expect(tooling?.count).toBe(4); + expect(tooling?.evidencePointers).toHaveLength(3); // capped at 3 + expect(tooling?.severity).toBe("high"); // escalated by the score-5 turn + expect(directive?.count).toBe(1); + expect(directive?.severity).toBe("medium"); + }); + + it("drops sub-threshold (< 2) and 'none' turns end-to-end", async () => { + const turns = [oturn(1), oturn(2)]; + const scoreFn = async (_batch: { turn_id: string }[]): Promise => [ + { turn_id: "t-1", signal_score: 1, category: "preference_style", reason: "" }, + { turn_id: "t-2", signal_score: 5, category: "none", reason: "" }, + ]; + const out = await scanPreferences({ collectFn: fixedCollect(turns), scoreFn, model: "m" }); + expect(out).toEqual([]); + }); +}); + +describe("runScanner — dreaming pipeline", () => { + function dcand( + turn_id: string, + title: string, + overrides: Partial = {}, + ): DreamScoredCandidate { + return { + turn_id, + signal_score: overrides.signal_score ?? 3, + category: overrides.category ?? "feedback", + reason: "", + proposed_title: title, + proposed_content: overrides.proposed_content ?? `content for ${title}`, + proposed_tags: overrides.proposed_tags ?? ["alpha"], + already_covered: overrides.already_covered, + }; + } + + it("merges a recurring candidate to one reinforce signal with a decodable payload", async () => { + const title = "Always deploy via friday update"; + const turns = [ + oturn(1, { ts: "2026-05-01T00:01:00.000Z" }), + oturn(2, { ts: "2026-05-01T00:05:00.000Z" }), + ]; + const scoreFn = async (batch: { turn_id: string }[]): Promise => + batch.map((b) => dcand(b.turn_id, title)); + + const out = await scanDreaming({ collectFn: fixedCollect(turns), scoreFn, model: "m" }); + + expect(out).toHaveLength(1); + expect(out[0].count).toBe(2); + expect(out[0].key).toBe("dream:reinforce:always-deploy-via-friday-update"); + expect(out[0].firstSeenAt).toBe("2026-05-01T00:01:00.000Z"); + expect(out[0].lastSeenAt).toBe("2026-05-01T00:05:00.000Z"); + const payload = decodeDreamPayload(out[0]); + expect(payload?.title).toBe(title); + expect(payload?.category).toBe("feedback"); + }); + + it("a single non-recurring candidate is a promote signal", async () => { + const turns = [oturn(1)]; + const scoreFn = async (batch: { turn_id: string }[]): Promise => + batch.map((b) => dcand(b.turn_id, "Seth works in Pacific time", { category: "user" })); + + const out = await scanDreaming({ collectFn: fixedCollect(turns), scoreFn, model: "m" }); + expect(out).toHaveLength(1); + expect(out[0].key).toBe("dream:promote:seth-works-in-pacific-time"); + }); + + it("threads DreamEvidence through buildPayload into the scoreFn batch", async () => { + const turns = [oturn(1)]; + const evidence: DreamEvidence = { + recallStatsBySlug: new Map(), + orchestratorName: "friday", + frictionSignalsInWindow: [ + { + hash: "h1", + source: "daemon", + key: "watchdog.stall.detected", + severity: "high", + count: 1, + firstSeenAt: "2026-05-01T00:00:00.000Z", + lastSeenAt: "2026-05-01T00:01:00.000Z", + agent: "friday", + evidencePointers: [], + }, + ], + }; + const scoreFn = vi.fn( + async (batch: { turn_id: string }[]): Promise => + batch.map((b) => dcand(b.turn_id, "Daemon stalls under load")), + ); + + await scanDreaming({ collectFn: fixedCollect(turns), scoreFn, model: "m", evidence }); + + // Pin the exact rendered evidence string buildPayload → renderEvidenceForTurn + // produced for this matching-agent stall signal (no existing-memories line). + const rendered = (scoreFn.mock.calls[0][0][0] as { evidence?: string }).evidence; + expect(rendered).toBe("co-occurring daemon signal: watchdog.stall.detected (severity=high)"); + }); +}); diff --git a/packages/evolve/src/run-scanner.ts b/packages/evolve/src/run-scanner.ts new file mode 100644 index 00000000..872088b5 --- /dev/null +++ b/packages/evolve/src/run-scanner.ts @@ -0,0 +1,148 @@ +/** + * The deep scanner core. `scan-friction.ts`, `scan-preferences.ts`, and + * `scan-dreaming.ts` were three hand-copied pipelines sharing the same four + * stages — collect orchestrator turns → batch → LLM-score → bucket into + * `Signal[]` — with ~35–40 lines of identical scaffolding per file. This module + * owns those stages once; each scanner contributes only a `Taxonomy` adapter + * (its per-turn payload projection, its default LLM scorer, and its bucketing + * rule). The genuine variation between the scanners — category enums, score + * gates, pointer caps, dreaming's payload encoding and promote/reinforce verb — + * lives entirely inside each `bucket`; the core never inspects the scored type + * beyond the `turn_id` join key it needs to map scores back to their turns. + * + * Two injectable seams ride on the core: + * - `scoreFn` (the pre-existing model seam) — replaces the LLM call in tests. + * - `collectFn` (new) — replaces `collectOrchestratorTurns`, so the WHOLE + * pipeline (collect → batch → score → bucket) is testable without a DB. + */ + +import { loadConfig, resolveModelForEvolveTask, type EvolveTaskName } from "@friday/shared"; +import type { Signal, SignalSeverity } from "./types.js"; +import { collectOrchestratorTurns, type OrchestratorTurn } from "./collect.js"; + +/** Turns per LLM batch. Default 30. Overridable per-call via `opts.batchSize`. */ +export const SCAN_BATCH_SIZE = 30; + +/** + * The one field every scored row carries: the synthetic turn id, echoed back by + * the LLM, used to join scores onto their source turns. This is the sole part + * of the scored type `T` the core is allowed to know — everything else is the + * taxonomy's business. + */ +export interface ScoredRow { + turn_id: string; +} + +/** The turn-collection seam. Defaults to `collectOrchestratorTurns`. */ +export type CollectFn = (sinceMs: number, maxTurns: number) => Promise; + +/** The model seam: score a batch of per-turn payloads `P` into scored rows `T`. */ +export type BatchScoreFn = (batch: P[], model: string) => Promise; + +/** + * A per-scanner adapter. `P` is the per-turn LLM payload shape, `T` the scored + * row merged onto each `OrchestratorTurn`, `Ctx` an opaque per-scanner context + * threaded through untouched by the core (dreaming uses it for its + * `DreamEvidence`; friction/preferences leave it `undefined`). + */ +export interface Taxonomy { + /** Used in the per-batch error log line: `${name} scoring batch …`. */ + name: string; + /** Resolves the default model via `cfg.evolve.models[modelTask]`. */ + modelTask: EvolveTaskName; + /** Project one turn into the LLM payload shape (friction adds prev_assistant; dreaming adds evidence). */ + buildPayload: (turn: OrchestratorTurn, ctx: Ctx) => P; + /** The default LLM scorer (the existing per-scanner `defaultScoreFn`). */ + defaultScoreFn: BatchScoreFn; + /** The scanner-specific bucketing rule — score gate, caps, encoding, verb. Unchanged. */ + bucket: (scored: Array, ctx: Ctx) => Signal[]; +} + +/** Per-scan options shared by every scanner. `P`/`T` specialize per taxonomy. */ +export interface RunScannerOptions { + /** ISO string lower bound — turns earlier than this are skipped. */ + since?: string; + /** Maximum user turns to evaluate per scan. Default 1000. */ + maxTurns?: number; + /** Turns per LLM batch. Default `SCAN_BATCH_SIZE` (30). */ + batchSize?: number; + /** Model id override. Default resolves via `cfg.evolve.models[taxonomy.modelTask]`. */ + model?: string; + /** Inject for tests — replaces the real LLM call. */ + scoreFn?: BatchScoreFn; + /** Inject for tests — replaces the DB-backed turn collection. */ + collectFn?: CollectFn; +} + +/** + * The deep core: collect → batch → score → bucket. Generic over the per-turn + * payload `P`, the scored row `T` (constrained to carry `turn_id`), and the + * opaque per-scanner context `Ctx`. A failing batch is logged and skipped — it + * never aborts the scan (identical to the copied loops it replaces). + */ +export async function runScanner( + taxonomy: Taxonomy, + opts: RunScannerOptions, + ctx: Ctx, +): Promise { + const sinceMs = opts.since ? Date.parse(opts.since) : 0; + const maxTurns = opts.maxTurns ?? 1000; + const batchSize = opts.batchSize ?? SCAN_BATCH_SIZE; + const model = opts.model ?? resolveModelForEvolveTask(loadConfig(), taxonomy.modelTask).name; + const score = opts.scoreFn ?? taxonomy.defaultScoreFn; + const collect = opts.collectFn ?? collectOrchestratorTurns; + + const turns = await collect(sinceMs, maxTurns); + if (turns.length === 0) return []; + + const scored: Array = []; + for (let i = 0; i < turns.length; i += batchSize) { + const batch = turns.slice(i, i + batchSize); + const payload = batch.map((t) => taxonomy.buildPayload(t, ctx)); + let results: T[]; + try { + results = await score(payload, model); + } catch (err) { + // Better to score fewer turns than abort the whole pass; log loudly. + console.error( + `${taxonomy.name} scoring batch ${i}-${i + batch.length - 1} failed: ${ + err instanceof Error ? err.message : String(err) + }`, + ); + continue; + } + const byId = new Map(results.map((r) => [r.turn_id, r])); + for (const turn of batch) { + const r = byId.get(turn.turnId); + if (!r) continue; + scored.push({ ...turn, ...r }); + } + } + + return taxonomy.bucket(scored, ctx); +} + +// ── Shared pure helpers, de-duplicated from the three scanner files ────────── + +/** Truncate to `max` chars with an ellipsis. Used by every `buildPayload`. */ +export function truncate(s: string, max: number): string { + if (s.length <= max) return s; + return s.slice(0, max - 1) + "…"; +} + +/** Clamp `n` into `[lo, hi]`. Used by every `defaultScoreFn` parser. */ +export function clamp(n: number, lo: number, hi: number): number { + return Math.max(lo, Math.min(hi, n)); +} + +/** Map an integer signal/friction score (0–5) to a severity band. */ +export function severityFor(score: number): SignalSeverity { + if (score >= 4) return "high"; + if (score >= 3) return "medium"; + return "low"; +} + +/** Order severities so bucketing can keep the strongest seen. */ +export function severityRank(s: SignalSeverity): number { + return s === "high" ? 3 : s === "medium" ? 2 : 1; +} diff --git a/packages/evolve/src/scan-dreaming.test.ts b/packages/evolve/src/scan-dreaming.test.ts index a647120a..ff76df0d 100644 --- a/packages/evolve/src/scan-dreaming.test.ts +++ b/packages/evolve/src/scan-dreaming.test.ts @@ -10,7 +10,7 @@ import { } from "./scan-dreaming.js"; import { scoreProposal } from "./rank.js"; import { slugify } from "./apply.js"; -import type { OrchestratorTurn } from "./scan-friction.js"; +import type { OrchestratorTurn } from "./collect.js"; function turn( i: number, diff --git a/packages/evolve/src/scan-dreaming.ts b/packages/evolve/src/scan-dreaming.ts index 599039a0..44e5f760 100644 --- a/packages/evolve/src/scan-dreaming.ts +++ b/packages/evolve/src/scan-dreaming.ts @@ -17,15 +17,25 @@ * The promote-vs-reinforce distinction rides in the signal `key` * (`dream:promote:` / `dream:reinforce:`); the category is NOT in * the key — it is recovered from the decoded payload downstream (design D3). + * + * The collect → batch → score → bucket pipeline lives in run-scanner.ts; this + * file contributes the dreaming `Taxonomy` plus its richer bucketing (payload + * encoding, recall→severity bump, separate note-pointer cap, and the post-merge + * promote/reinforce verb decision). The per-scanner `evidence` context is + * threaded through `runScanner` opaquely to both `buildPayload` and `bucket`. */ -import { loadConfig, resolveModelForEvolveTask } from "@friday/shared"; import type { EvidencePointer, Signal, SignalSeverity } from "./types.js"; +import { dbTurnIdToLine, type OrchestratorTurn } from "./collect.js"; import { - collectOrchestratorTurns, - dbTurnIdToLine, - type OrchestratorTurn, -} from "./scan-friction.js"; + runScanner, + truncate, + clamp, + severityFor, + severityRank, + type RunScannerOptions, + type Taxonomy, +} from "./run-scanner.js"; import { signalHash } from "./scan.js"; import { chat, extractJson } from "./llm.js"; import { slugify } from "./apply.js"; @@ -78,17 +88,10 @@ export type DreamScoreFn = ( model: string, ) => Promise; -export interface DreamScanOptions { - /** ISO string lower bound (Date.parse'd to ms; mirrors scanPreferences). */ - since?: string; - /** Maximum user turns to evaluate per scan. Default 1000. */ - maxTurns?: number; - /** Turns per LLM batch. Default 30. */ - batchSize?: number; - /** Model id override. Default resolves via `cfg.evolve.models.scanPreferences`. */ - model?: string; - /** Inject for tests — replaces the real LLM call. */ - scoreFn?: DreamScoreFn; +export interface DreamScanOptions extends RunScannerOptions< + TurnForDreamScoring, + DreamScoredCandidate +> { /** Recall/friction evidence assembled by the endpoint; optional. */ evidence?: DreamEvidence; } @@ -136,43 +139,7 @@ export function decodeDreamPayload(signal: Signal): DreamPayload | null { } export async function scanDreaming(opts: DreamScanOptions = {}): Promise { - const sinceMs = opts.since ? Date.parse(opts.since) : 0; - const maxTurns = opts.maxTurns ?? 1000; - const batchSize = opts.batchSize ?? 30; - const model = opts.model ?? resolveModelForEvolveTask(loadConfig(), "scanPreferences").name; - const score = opts.scoreFn ?? defaultScoreFn; - - const turns = await collectOrchestratorTurns(sinceMs, maxTurns); - if (turns.length === 0) return []; - - const scored: Array = []; - for (let i = 0; i < turns.length; i += batchSize) { - const batch = turns.slice(i, i + batchSize); - const payload: TurnForDreamScoring[] = batch.map((t) => ({ - turn_id: t.turnId, - user_text: truncate(t.userText, 800), - evidence: renderEvidenceForTurn(t, opts.evidence), - })); - let results: DreamScoredCandidate[]; - try { - results = await score(payload, model); - } catch (err) { - console.error( - `dream scoring batch ${i}-${i + batch.length - 1} failed: ${ - err instanceof Error ? err.message : String(err) - }`, - ); - continue; - } - const byId = new Map(results.map((r) => [r.turn_id, r])); - for (const turn of batch) { - const r = byId.get(turn.turnId); - if (!r) continue; - scored.push({ ...turn, ...r }); - } - } - - return bucketByCandidate(scored, opts.evidence); + return runScanner(dreamTaxonomy, opts, opts.evidence); } /** @@ -320,25 +287,10 @@ function renderEvidenceForTurn(_t: OrchestratorTurn, evidence?: DreamEvidence): return lines.length > 0 ? lines.join("\n") : undefined; } -function severityFor(score: number): SignalSeverity { - if (score >= 4) return "high"; - if (score >= 3) return "medium"; - return "low"; -} - function bumpSeverity(s: SignalSeverity): SignalSeverity { return s === "low" ? "medium" : "high"; } -function severityRank(s: SignalSeverity): number { - return s === "high" ? 3 : s === "medium" ? 2 : 1; -} - -function truncate(s: string, max: number): string { - if (s.length <= max) return s; - return s.slice(0, max - 1) + "…"; -} - const SCORING_SYSTEM_PROMPT = [ "You scan user messages from an orchestrator-agent transcript and propose any", "NEW persistent memory worth saving — durable facts about the user, their", @@ -422,6 +374,21 @@ const defaultScoreFn: DreamScoreFn = async (batch, model) => { })); }; -function clamp(n: number, lo: number, hi: number): number { - return Math.max(lo, Math.min(hi, n)); -} +/** The dreaming adapter fed to the deep `runScanner` core. The opaque context + * is `DreamEvidence` (or undefined) — threaded into both `buildPayload` + * (for per-turn evidence rendering) and `bucket` (for recall/friction notes). */ +const dreamTaxonomy: Taxonomy< + TurnForDreamScoring, + DreamScoredCandidate, + DreamEvidence | undefined +> = { + name: "dream", + modelTask: "scanPreferences", + buildPayload: (t, evidence) => ({ + turn_id: t.turnId, + user_text: truncate(t.userText, 800), + evidence: renderEvidenceForTurn(t, evidence), + }), + defaultScoreFn, + bucket: (scored, evidence) => bucketByCandidate(scored, evidence), +}; diff --git a/packages/evolve/src/scan-friction.test.ts b/packages/evolve/src/scan-friction.test.ts index 0dede70a..30ba1c6a 100644 --- a/packages/evolve/src/scan-friction.test.ts +++ b/packages/evolve/src/scan-friction.test.ts @@ -4,7 +4,7 @@ import { type FrictionCategory, type ScoredTurn, } from "./scan-friction.js"; -import type { OrchestratorTurn } from "./scan-friction.js"; +import type { OrchestratorTurn } from "./collect.js"; function turn( i: number, diff --git a/packages/evolve/src/scan-friction.ts b/packages/evolve/src/scan-friction.ts index 98b8da5a..c0af08c8 100644 --- a/packages/evolve/src/scan-friction.ts +++ b/packages/evolve/src/scan-friction.ts @@ -17,13 +17,25 @@ * (correction / confusion / repeat / reset / frustration / doubt / redirect) * is general-purpose user-orchestrator interaction analysis, not specific * to any chat surface. + * + * The collect → batch → score → bucket pipeline lives in run-scanner.ts; this + * file contributes only the friction `Taxonomy` (payload projection, default + * LLM scorer, and the cross-session-diversity bucketing rule). */ -import { eq, inArray } from "drizzle-orm"; -import { getDb, loadConfig, resolveModelForEvolveTask, schema } from "@friday/shared"; -import type { EvidencePointer, Signal, SignalSeverity } from "./types.js"; +import type { EvidencePointer, Signal } from "./types.js"; import { signalHash } from "./scan.js"; import { chat, extractJson } from "./llm.js"; +import { dbTurnIdToLine, type OrchestratorTurn } from "./collect.js"; +import { + runScanner, + truncate, + clamp, + severityFor, + severityRank, + type RunScannerOptions, + type Taxonomy, +} from "./run-scanner.js"; export type FrictionCategory = | "correction" @@ -35,18 +47,7 @@ export type FrictionCategory = | "redirect" | "none"; -export interface FrictionScanOptions { - /** ISO string lower bound — turns earlier than this are skipped. */ - since?: string; - /** Maximum user turns to evaluate per scan. Default 1000. */ - maxTurns?: number; - /** Turns per LLM batch. Default 30. */ - batchSize?: number; - /** Model id override. Default resolves via `cfg.evolve.models.scanFriction`. */ - model?: string; - /** Inject for tests — replaces the real LLM call. */ - scoreFn?: ScoreFn; -} +export type FrictionScanOptions = RunScannerOptions; export interface ScoredTurn { turn_id: string; @@ -63,69 +64,8 @@ export interface TurnForScoring { prev_assistant_text: string; } -export interface OrchestratorTurn { - sessionId: string; - /** Source JSONL file (recorded on the turns row). */ - filePath: string; - /** Synthetic id for matching back from LLM scoring output. */ - turnId: string; - /** ISO timestamp. */ - ts: string; - userText: string; - prevAssistantText: string; - /** DB row id. Phase 4.11 flipped `blocks.id` to text (UUID), so - * this can be a UUID for newer rows or a bigserial-shaped - * numeric string for legacy rows. We parse it as a number for - * the EvidencePointer's `line` field (falls through to omitted - * when NaN). */ - dbTurnId: string; -} - -export function dbTurnIdToLine(id: string): number | undefined { - const n = Number(id); - return Number.isFinite(n) ? n : undefined; -} - export async function scanFriction(opts: FrictionScanOptions = {}): Promise { - const sinceMs = opts.since ? Date.parse(opts.since) : 0; - const maxTurns = opts.maxTurns ?? 1000; - const batchSize = opts.batchSize ?? 30; - const model = opts.model ?? resolveModelForEvolveTask(loadConfig(), "scanFriction").name; - const score = opts.scoreFn ?? defaultScoreFn; - - const turns = await collectOrchestratorTurns(sinceMs, maxTurns); - if (turns.length === 0) return []; - - const scored: Array = []; - for (let i = 0; i < turns.length; i += batchSize) { - const batch = turns.slice(i, i + batchSize); - const payload: TurnForScoring[] = batch.map((t) => ({ - turn_id: t.turnId, - user_text: truncate(t.userText, 800), - prev_assistant_text: truncate(t.prevAssistantText, 400), - })); - let results: ScoredTurn[]; - try { - results = await score(payload, model); - } catch (err) { - // Better to score fewer turns than abort the whole pass; log loudly. - - console.error( - `friction scoring batch ${i}-${i + batch.length - 1} failed: ${ - err instanceof Error ? err.message : String(err) - }`, - ); - continue; - } - const byId = new Map(results.map((r) => [r.turn_id, r])); - for (const turn of batch) { - const r = byId.get(turn.turnId); - if (!r) continue; - scored.push({ ...turn, ...r }); - } - } - - return bucketFrictionByCategory(scored); + return runScanner(frictionTaxonomy, opts, undefined); } export function bucketFrictionByCategory(scored: Array): Signal[] { @@ -184,134 +124,6 @@ export function bucketFrictionByCategory(scored: Array= 4) return "high"; - if (score >= 3) return "medium"; - return "low"; -} - -function severityRank(s: SignalSeverity): number { - return s === "high" ? 3 : s === "medium" ? 2 : 1; -} - -/** - * Resolve "which sessions belong to the orchestrator" by: - * 1. Selecting all agents of type=orchestrator from the registry. - * 2. Including their currently-attached sessionId, plus - * 3. Every distinct sessionId in the `turns` table that's tagged with one - * of those agent names (catches historical sessions across resumes). - */ -async function collectOrchestratorSessions(): Promise> { - const out = new Set(); - const db = getDb(); - - const orchAgents = await db - .select() - .from(schema.agents) - .where(eq(schema.agents.type, "orchestrator")); - if (orchAgents.length === 0) return out; - - for (const a of orchAgents) { - if (a.sessionId) out.add(a.sessionId); - } - - // Historic session enumeration via the `blocks` table — the legacy - // `turns` table is retired per ADR-016. Distinct session_id values - // for any orchestrator-named agent's blocks. - const orchNames = orchAgents.map((a) => a.name); - const historicSessions = await db - .selectDistinct({ sessionId: schema.blocks.sessionId }) - .from(schema.blocks) - .where(inArray(schema.blocks.agentName, orchNames)); - for (const t of historicSessions) { - if (t.sessionId) out.add(t.sessionId); - } - - return out; -} - -/** - * Walk the `turns` table for orchestrator sessions, return user turns paired - * with the immediately preceding assistant text. Skips pure tool_result - * echoes and strips `` auto-injection blocks. - * - * Capped at `maxTurns`. Older sessions go first if we hit the cap. - */ -export async function collectOrchestratorTurns( - sinceMs: number, - maxTurns: number, -): Promise { - const sessionIds = await collectOrchestratorSessions(); - if (sessionIds.size === 0) return []; - - // Ported to the `blocks` table per ADR-016 + ADR-023. Each block row is - // already a single semantic unit (text / thinking / tool_use / tool_result - // / user / mail); we no longer parse a JSONL-style `content_json` envelope - // with `type=user|assistant`. The friction scorer wants pairs of - // (user-typed text, immediately-preceding assistant text), so we walk - // blocks in ts order, accumulate the latest assistant text per session, - // and emit a turn whenever we see a user-role text/user block. - const db = getDb(); - const rows = await db - .select() - .from(schema.blocks) - .where(inArray(schema.blocks.sessionId, [...sessionIds])); - // Sort ts ascending so older sessions get scored first when capped. - rows.sort((a, b) => a.ts.getTime() - b.ts.getTime()); - - const out: OrchestratorTurn[] = []; - const prevAssistantBySession = new Map(); - - for (const r of rows) { - const rTsMs = r.ts.getTime(); - if (sinceMs && rTsMs < sinceMs) continue; - if (out.length >= maxTurns) break; - - // contentJson is jsonb; Drizzle returns it as the parsed object. Block - // payloads are shaped per-kind; we only need the `text` field for the - // text + user kinds. - const content = r.contentJson as { text?: string }; - - if (r.role === "assistant" && r.kind === "text") { - const txt = typeof content?.text === "string" ? content.text : ""; - if (txt) prevAssistantBySession.set(r.sessionId, txt); - continue; - } - - // User-typed blocks (chat input, scratch seed, agent_spawn, schedule - // task prompt). Skip mail-delivered user blocks — those aren't the - // user's free-text friction signal. - if (r.role !== "user" || r.kind !== "text") continue; - if (r.source === "mail") continue; - - const userText = typeof content?.text === "string" ? content.text : ""; - if (!userText.trim()) continue; - const cleaned = stripMemoryContext(userText).trim(); - if (!cleaned) continue; - - out.push({ - sessionId: r.sessionId, - filePath: "", - turnId: r.turnId, - ts: r.ts.toISOString(), - userText: cleaned, - prevAssistantText: prevAssistantBySession.get(r.sessionId) ?? "", - dbTurnId: r.id, - }); - } - - return out; -} - -function stripMemoryContext(text: string): string { - return text.replace(/[\s\S]*?<\/memory-context>\s*/g, ""); -} - -function truncate(s: string, max: number): string { - if (s.length <= max) return s; - return s.slice(0, max - 1) + "…"; -} - const SCORING_SYSTEM_PROMPT = [ "You score user messages from an orchestrator-agent transcript for friction:", "moments where the user is correcting, confused, frustrated, repeating themselves,", @@ -390,6 +202,15 @@ const defaultScoreFn: ScoreFn = async (batch, model) => { })); }; -function clamp(n: number, lo: number, hi: number): number { - return Math.max(lo, Math.min(hi, n)); -} +/** The friction adapter fed to the deep `runScanner` core. */ +const frictionTaxonomy: Taxonomy = { + name: "friction", + modelTask: "scanFriction", + buildPayload: (t) => ({ + turn_id: t.turnId, + user_text: truncate(t.userText, 800), + prev_assistant_text: truncate(t.prevAssistantText, 400), + }), + defaultScoreFn, + bucket: (scored) => bucketFrictionByCategory(scored), +}; diff --git a/packages/evolve/src/scan-preferences.test.ts b/packages/evolve/src/scan-preferences.test.ts index 9b0ea6cc..a584a8b2 100644 --- a/packages/evolve/src/scan-preferences.test.ts +++ b/packages/evolve/src/scan-preferences.test.ts @@ -4,7 +4,7 @@ import { type PreferenceCategory, type PreferenceScoredTurn, } from "./scan-preferences.js"; -import type { OrchestratorTurn } from "./scan-friction.js"; +import type { OrchestratorTurn } from "./collect.js"; function turn( i: number, diff --git a/packages/evolve/src/scan-preferences.ts b/packages/evolve/src/scan-preferences.ts index 77bfc154..f56531a7 100644 --- a/packages/evolve/src/scan-preferences.ts +++ b/packages/evolve/src/scan-preferences.ts @@ -10,15 +10,23 @@ * erosion, and "from now on use pnpm" is a calm score-0 turn that would be * filtered out — but it's exactly the kind of statement that should become a * memory. Different signal, different prompt, different bucketing. + * + * The collect → batch → score → bucket pipeline lives in run-scanner.ts; this + * file contributes only the preference `Taxonomy` (payload projection, default + * LLM scorer, and the per-category bucketing rule). */ -import { loadConfig, resolveModelForEvolveTask } from "@friday/shared"; -import type { EvidencePointer, Signal, SignalSeverity } from "./types.js"; +import type { EvidencePointer, Signal } from "./types.js"; +import { dbTurnIdToLine, type OrchestratorTurn } from "./collect.js"; import { - collectOrchestratorTurns, - dbTurnIdToLine, - type OrchestratorTurn, -} from "./scan-friction.js"; + runScanner, + truncate, + clamp, + severityFor, + severityRank, + type RunScannerOptions, + type Taxonomy, +} from "./run-scanner.js"; import { signalHash } from "./scan.js"; import { chat, extractJson } from "./llm.js"; @@ -31,18 +39,10 @@ export type PreferenceCategory = | "external_pointer" | "none"; -export interface PreferenceScanOptions { - /** ISO string lower bound — turns earlier than this are skipped. */ - since?: string; - /** Maximum user turns to evaluate per scan. Default 1000. */ - maxTurns?: number; - /** Turns per LLM batch. Default 30. */ - batchSize?: number; - /** Model id override. Default resolves via `cfg.evolve.models.scanPreferences`. */ - model?: string; - /** Inject for tests — replaces the real LLM call. */ - scoreFn?: PreferenceScoreFn; -} +export type PreferenceScanOptions = RunScannerOptions< + TurnForPreferenceScoring, + PreferenceScoredTurn +>; export interface PreferenceScoredTurn { turn_id: string; @@ -64,42 +64,7 @@ export type PreferenceScoreFn = ( ) => Promise; export async function scanPreferences(opts: PreferenceScanOptions = {}): Promise { - const sinceMs = opts.since ? Date.parse(opts.since) : 0; - const maxTurns = opts.maxTurns ?? 1000; - const batchSize = opts.batchSize ?? 30; - const model = opts.model ?? resolveModelForEvolveTask(loadConfig(), "scanPreferences").name; - const score = opts.scoreFn ?? defaultScoreFn; - - const turns = await collectOrchestratorTurns(sinceMs, maxTurns); - if (turns.length === 0) return []; - - const scored: Array = []; - for (let i = 0; i < turns.length; i += batchSize) { - const batch = turns.slice(i, i + batchSize); - const payload: TurnForPreferenceScoring[] = batch.map((t) => ({ - turn_id: t.turnId, - user_text: truncate(t.userText, 800), - })); - let results: PreferenceScoredTurn[]; - try { - results = await score(payload, model); - } catch (err) { - console.error( - `preference scoring batch ${i}-${i + batch.length - 1} failed: ${ - err instanceof Error ? err.message : String(err) - }`, - ); - continue; - } - const byId = new Map(results.map((r) => [r.turn_id, r])); - for (const turn of batch) { - const r = byId.get(turn.turnId); - if (!r) continue; - scored.push({ ...turn, ...r }); - } - } - - return bucketByCategory(scored); + return runScanner(preferenceTaxonomy, opts, undefined); } export function bucketByCategory(scored: Array): Signal[] { @@ -149,21 +114,6 @@ export function bucketByCategory(scored: Array= 4) return "high"; - if (score >= 3) return "medium"; - return "low"; -} - -function severityRank(s: SignalSeverity): number { - return s === "high" ? 3 : s === "medium" ? 2 : 1; -} - -function truncate(s: string, max: number): string { - if (s.length <= max) return s; - return s.slice(0, max - 1) + "…"; -} - const SCORING_SYSTEM_PROMPT = [ "You scan user messages from an orchestrator-agent transcript for AFFIRMATIVE", "preference statements — things the user said that should become persistent", @@ -239,6 +189,14 @@ const defaultScoreFn: PreferenceScoreFn = async (batch, model) => { })); }; -function clamp(n: number, lo: number, hi: number): number { - return Math.max(lo, Math.min(hi, n)); -} +/** The preference adapter fed to the deep `runScanner` core. */ +const preferenceTaxonomy: Taxonomy = { + name: "preference", + modelTask: "scanPreferences", + buildPayload: (t) => ({ + turn_id: t.turnId, + user_text: truncate(t.userText, 800), + }), + defaultScoreFn, + bucket: (scored) => bucketByCategory(scored), +};