From cd6b120c931a384c2159a402c15956cf5c9c77ad Mon Sep 17 00:00:00 2001
From: Seth Voltz <seth@designgods.net>
Date: Thu, 25 Jun 2026 18:56:21 -0700
Subject: [PATCH] refactor(evolve): collapse three scanners behind one
 runScanner core + collectFn seam
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the three hand-copied scanner pipelines (friction / preferences /
dreaming) with one deep `runScanner` core fed by per-scanner `Taxonomy`
adapters. The collect → batch → score → bucket scaffolding now lives once;
each scanner contributes only its payload projection, default LLM scorer,
and (unchanged) bucketing rule.

- New `collect.ts`: extracted OrchestratorTurn + collectOrchestratorTurns +
  dbTurnIdToLine to a neutral module (breaks the run-scanner↔scan-friction cycle).
- New `run-scanner.ts`: runScanner<P,T,Ctx> + Taxonomy + RunScannerOptions +
  SCAN_BATCH_SIZE, plus de-duplicated truncate/clamp/severityFor/severityRank.
  Adds a second injectable seam, collectFn (defaults to collectOrchestratorTurns),
  so the whole pipeline is testable without a DB. The core only touches the
  turn_id join key on T (ScoredRow constraint); all domain variation stays in bucket.
- scan-*.ts: each is now a Taxonomy + a thin `scanX = (opts) => runScanner(...)`
  wrapper. Public names/signatures unchanged. Dreaming's richer bucketing
  (payload-in-first-pointer, separate note cap, recall→severity bump, promote/
  reinforce verb) preserved whole; DreamEvidence threaded as the opaque ctx.
- New run-scanner.test.ts: end-to-end pipeline coverage that the collectFn seam
  unlocks (cross-session swap, default+override batching, cross-batch merge,
  log-and-continue, per-category caps, dreaming reinforce-merge + payload round-trip).

Pure internal refactor: no Signal shape change, no propose.ts/rank.ts change.
Behavior verified byte-identical across all three buckets, prompts, and collect.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01SeavKpZNAxkrokvKhQF8Ai
---
 packages/evolve/src/collect.ts               | 150 ++++++++++
 packages/evolve/src/index.ts                 |   2 +
 packages/evolve/src/run-scanner.test.ts      | 300 +++++++++++++++++++
 packages/evolve/src/run-scanner.ts           | 148 +++++++++
 packages/evolve/src/scan-dreaming.test.ts    |   2 +-
 packages/evolve/src/scan-dreaming.ts         | 109 +++----
 packages/evolve/src/scan-friction.test.ts    |   2 +-
 packages/evolve/src/scan-friction.ts         | 237 ++-------------
 packages/evolve/src/scan-preferences.test.ts |   2 +-
 packages/evolve/src/scan-preferences.ts      | 102 ++-----
 10 files changed, 700 insertions(+), 354 deletions(-)
 create mode 100644 packages/evolve/src/collect.ts
 create mode 100644 packages/evolve/src/run-scanner.test.ts
 create mode 100644 packages/evolve/src/run-scanner.ts

diff --git a/packages/evolve/src/collect.ts b/packages/evolve/src/collect.ts
new file mode 100644
index 00000000..af762dab
--- /dev/null
+++ b/packages/evolve/src/collect.ts
@@ -0,0 +1,150 @@
+/**
+ * Orchestrator-transcript collection — the shared stage-A input for every
+ * scanner (friction / preferences / dreaming). Extracted from scan-friction.ts
+ * so the deep `runScanner` core (run-scanner.ts) can depend on it without a
+ * circular import back through any individual scanner, and so the three
+ * scanners draw `OrchestratorTurn` / `dbTurnIdToLine` from a neutral home.
+ *
+ * The DB coupling lives here and nowhere else: `runScanner` accepts a
+ * `collectFn` seam that defaults to `collectOrchestratorTurns`, so the full
+ * pipeline is drivable with canned turns and no Postgres in tests.
+ */
+
+import { eq, inArray } from "drizzle-orm";
+import { getDb, schema } from "@friday/shared";
+
+export interface OrchestratorTurn {
+  sessionId: string;
+  /** Source JSONL file (recorded on the turns row). */
+  filePath: string;
+  /** Synthetic id for matching back from LLM scoring output. */
+  turnId: string;
+  /** ISO timestamp. */
+  ts: string;
+  userText: string;
+  prevAssistantText: string;
+  /** DB row id. Phase 4.11 flipped `blocks.id` to text (UUID), so
+   *  this can be a UUID for newer rows or a bigserial-shaped
+   *  numeric string for legacy rows. We parse it as a number for
+   *  the EvidencePointer's `line` field (falls through to omitted
+   *  when NaN). */
+  dbTurnId: string;
+}
+
+export function dbTurnIdToLine(id: string): number | undefined {
+  const n = Number(id);
+  return Number.isFinite(n) ? n : undefined;
+}
+
+/**
+ * Resolve "which sessions belong to the orchestrator" by:
+ *   1. Selecting all agents of type=orchestrator from the registry.
+ *   2. Including their currently-attached sessionId, plus
+ *   3. Every distinct sessionId in the `turns` table that's tagged with one
+ *      of those agent names (catches historical sessions across resumes).
+ */
+async function collectOrchestratorSessions(): Promise<Set<string>> {
+  const out = new Set<string>();
+  const db = getDb();
+
+  const orchAgents = await db
+    .select()
+    .from(schema.agents)
+    .where(eq(schema.agents.type, "orchestrator"));
+  if (orchAgents.length === 0) return out;
+
+  for (const a of orchAgents) {
+    if (a.sessionId) out.add(a.sessionId);
+  }
+
+  // Historic session enumeration via the `blocks` table — the legacy
+  // `turns` table is retired per ADR-016. Distinct session_id values
+  // for any orchestrator-named agent's blocks.
+  const orchNames = orchAgents.map((a) => a.name);
+  const historicSessions = await db
+    .selectDistinct({ sessionId: schema.blocks.sessionId })
+    .from(schema.blocks)
+    .where(inArray(schema.blocks.agentName, orchNames));
+  for (const t of historicSessions) {
+    if (t.sessionId) out.add(t.sessionId);
+  }
+
+  return out;
+}
+
+/**
+ * Walk the `turns` table for orchestrator sessions, return user turns paired
+ * with the immediately preceding assistant text. Skips pure tool_result
+ * echoes and strips `<memory-context>` auto-injection blocks.
+ *
+ * Capped at `maxTurns`. Older sessions go first if we hit the cap.
+ */
+export async function collectOrchestratorTurns(
+  sinceMs: number,
+  maxTurns: number,
+): Promise<OrchestratorTurn[]> {
+  const sessionIds = await collectOrchestratorSessions();
+  if (sessionIds.size === 0) return [];
+
+  // Ported to the `blocks` table per ADR-016 + ADR-023. Each block row is
+  // already a single semantic unit (text / thinking / tool_use / tool_result
+  // / user / mail); we no longer parse a JSONL-style `content_json` envelope
+  // with `type=user|assistant`. The friction scorer wants pairs of
+  // (user-typed text, immediately-preceding assistant text), so we walk
+  // blocks in ts order, accumulate the latest assistant text per session,
+  // and emit a turn whenever we see a user-role text/user block.
+  const db = getDb();
+  const rows = await db
+    .select()
+    .from(schema.blocks)
+    .where(inArray(schema.blocks.sessionId, [...sessionIds]));
+  // Sort ts ascending so older sessions get scored first when capped.
+  rows.sort((a, b) => a.ts.getTime() - b.ts.getTime());
+
+  const out: OrchestratorTurn[] = [];
+  const prevAssistantBySession = new Map<string, string>();
+
+  for (const r of rows) {
+    const rTsMs = r.ts.getTime();
+    if (sinceMs && rTsMs < sinceMs) continue;
+    if (out.length >= maxTurns) break;
+
+    // contentJson is jsonb; Drizzle returns it as the parsed object. Block
+    // payloads are shaped per-kind; we only need the `text` field for the
+    // text + user kinds.
+    const content = r.contentJson as { text?: string };
+
+    if (r.role === "assistant" && r.kind === "text") {
+      const txt = typeof content?.text === "string" ? content.text : "";
+      if (txt) prevAssistantBySession.set(r.sessionId, txt);
+      continue;
+    }
+
+    // User-typed blocks (chat input, scratch seed, agent_spawn, schedule
+    // task prompt). Skip mail-delivered user blocks — those aren't the
+    // user's free-text friction signal.
+    if (r.role !== "user" || r.kind !== "text") continue;
+    if (r.source === "mail") continue;
+
+    const userText = typeof content?.text === "string" ? content.text : "";
+    if (!userText.trim()) continue;
+    const cleaned = stripMemoryContext(userText).trim();
+    if (!cleaned) continue;
+
+    out.push({
+      sessionId: r.sessionId,
+      filePath: "",
+      turnId: r.turnId,
+      ts: r.ts.toISOString(),
+      userText: cleaned,
+      prevAssistantText: prevAssistantBySession.get(r.sessionId) ?? "",
+      dbTurnId: r.id,
+    });
+  }
+
+  return out;
+}
+
+function stripMemoryContext(text: string): string {
+  return text.replace(/<memory-context>[\s\S]*?<\/memory-context>\s*/g, "");
+}
diff --git a/packages/evolve/src/index.ts b/packages/evolve/src/index.ts
index 2e6b15ec..b4d6b938 100644
--- a/packages/evolve/src/index.ts
+++ b/packages/evolve/src/index.ts
@@ -18,6 +18,8 @@ export * from "./types.js";
 export * from "./store.js";
 export * from "./scan.js";
 export * from "./scan-agent-depth.js";
+export * from "./collect.js";
+export * from "./run-scanner.js";
 export * from "./scan-friction.js";
 export * from "./scan-preferences.js";
 export * from "./rank.js";
diff --git a/packages/evolve/src/run-scanner.test.ts b/packages/evolve/src/run-scanner.test.ts
new file mode 100644
index 00000000..53dec228
--- /dev/null
+++ b/packages/evolve/src/run-scanner.test.ts
@@ -0,0 +1,300 @@
+/**
+ * End-to-end pipeline tests for the deep `runScanner` core, driven through the
+ * three public entry points (scanFriction / scanPreferences / scanDreaming)
+ * with BOTH seams injected — a fake `collectFn` (canned turns, no DB) and a
+ * fake `scoreFn` (canned scores, no LLM). This is the coverage the refactor
+ * unlocked: before the `collectFn` seam existed, the collect → batch → score →
+ * bucket wiring had ZERO tests (only the isolated bucket functions were
+ * exercised). The co-located bucket tests still pin the bucketing rules
+ * directly; these pin the wiring around them.
+ */
+
+import { describe, expect, it, vi } from "vitest";
+import { scanFriction, type ScoredTurn, type TurnForScoring } from "./scan-friction.js";
+import { scanPreferences, type PreferenceScoredTurn } from "./scan-preferences.js";
+import {
+  scanDreaming,
+  decodeDreamPayload,
+  type DreamScoredCandidate,
+  type DreamEvidence,
+} from "./scan-dreaming.js";
+import { SCAN_BATCH_SIZE } from "./run-scanner.js";
+import type { OrchestratorTurn } from "./collect.js";
+
+function oturn(i: number, overrides: Partial<OrchestratorTurn> = {}): OrchestratorTurn {
+  return {
+    sessionId: overrides.sessionId ?? "sess-a",
+    filePath: overrides.filePath ?? "/tmp/sess-a.jsonl",
+    turnId: `t-${i}`,
+    ts: overrides.ts ?? `2026-05-01T00:0${i}:00.000Z`,
+    userText: overrides.userText ?? `user text ${i}`,
+    prevAssistantText: overrides.prevAssistantText ?? `assistant text ${i}`,
+    dbTurnId: overrides.dbTurnId ?? `${100 + i}`,
+    ...overrides,
+  };
+}
+
+/** A collectFn seam that returns a fixed set of turns regardless of args. */
+function fixedCollect(turns: OrchestratorTurn[]) {
+  return async (_since: number, _max: number) => turns;
+}
+
+describe("runScanner — friction pipeline (collect → batch → score → bucket)", () => {
+  it("returns [] without scoring when collect yields no turns", async () => {
+    const scoreFn = vi.fn();
+    const out = await scanFriction({ collectFn: fixedCollect([]), scoreFn, model: "m" });
+    expect(out).toEqual([]);
+    expect(scoreFn).not.toHaveBeenCalled();
+  });
+
+  it("projects each turn through buildPayload and passes opts.model to scoreFn", async () => {
+    const turns = [oturn(1, { userText: "x".repeat(900), prevAssistantText: "y".repeat(500) })];
+    const scoreFn = vi.fn(
+      async (batch: TurnForScoring[], _model: string): Promise<ScoredTurn[]> =>
+        batch.map((b) => ({
+          turn_id: b.turn_id,
+          friction_score: 3,
+          category: "correction",
+          reason: "",
+        })),
+    );
+
+    await scanFriction({ collectFn: fixedCollect(turns), scoreFn, model: "haiku-test" });
+
+    expect(scoreFn).toHaveBeenCalledTimes(1);
+    const [batch, model] = scoreFn.mock.calls[0];
+    expect(model).toBe("haiku-test");
+    // buildPayload shape + truncation (user_text cap 800, prev_assistant cap 400).
+    expect(batch[0]).toMatchObject({ turn_id: "t-1" });
+    expect(batch[0].user_text.length).toBe(800);
+    expect(batch[0].prev_assistant_text.length).toBe(400);
+  });
+
+  it("drives the cross-session-diversity swap through the real entry point", async () => {
+    // 5 sess-a turns fill the 5-cap, then 1 sess-b turn forces the swap so the
+    // enricher sees evidence from >1 session. Reachable ONLY via the pipeline.
+    const turns = [
+      ...Array.from({ length: 5 }, (_, i) => oturn(i + 1, { sessionId: "sess-a" })),
+      oturn(6, { sessionId: "sess-b" }),
+    ];
+    const scoreFn = async (batch: { turn_id: string }[]): Promise<ScoredTurn[]> =>
+      batch.map((b) => ({
+        turn_id: b.turn_id,
+        friction_score: 3,
+        category: "doubt",
+        reason: "",
+      }));
+
+    const out = await scanFriction({ collectFn: fixedCollect(turns), scoreFn, model: "m" });
+
+    expect(out).toHaveLength(1);
+    expect(out[0].key).toBe("friction_doubt");
+    expect(out[0].count).toBe(6);
+    expect(out[0].evidencePointers).toHaveLength(5);
+    const sessions = out[0].evidencePointers.map((p) => p.sessionId);
+    expect(sessions).toContain("sess-a");
+    expect(sessions).toContain("sess-b");
+  });
+
+  it("splits into batches of the default SCAN_BATCH_SIZE (30) when no override is given", async () => {
+    const turns = Array.from({ length: 65 }, (_, i) => oturn(i + 1));
+    const scoreFn = vi.fn(
+      async (batch: { turn_id: string }[]): Promise<ScoredTurn[]> =>
+        batch.map((b) => ({ turn_id: b.turn_id, friction_score: 0, category: "none", reason: "" })),
+    );
+
+    await scanFriction({ collectFn: fixedCollect(turns), scoreFn, model: "m" });
+
+    // 65 turns / default batch 30 → 3 calls of sizes 30, 30, 5.
+    expect(SCAN_BATCH_SIZE).toBe(30);
+    expect(scoreFn.mock.calls.map((c) => c[0].length)).toEqual([30, 30, 5]);
+  });
+
+  it("honors a per-call batchSize override", async () => {
+    const turns = Array.from({ length: 7 }, (_, i) => oturn(i + 1));
+    const scoreFn = vi.fn(
+      async (batch: { turn_id: string }[]): Promise<ScoredTurn[]> =>
+        batch.map((b) => ({ turn_id: b.turn_id, friction_score: 0, category: "none", reason: "" })),
+    );
+
+    await scanFriction({ collectFn: fixedCollect(turns), scoreFn, model: "m", batchSize: 3 });
+
+    // 7 turns / batch 3 → 3 calls of sizes 3, 3, 1.
+    expect(scoreFn.mock.calls.map((c) => c[0].length)).toEqual([3, 3, 1]);
+  });
+
+  it("accumulates same-category turns across multiple batches into one merged signal", async () => {
+    // batchSize:1 forces three separate surviving batches; the core accumulates
+    // into a single `scored` array and buckets ONCE after the loop, so they must
+    // merge into one signal with count 3. Catches any per-batch re-init of
+    // `scored` or per-batch bucketing regression.
+    const turns = Array.from({ length: 3 }, (_, i) => oturn(i + 1, { sessionId: `s${i}` }));
+    const scoreFn = async (batch: { turn_id: string }[]): Promise<ScoredTurn[]> =>
+      batch.map((b) => ({ turn_id: b.turn_id, friction_score: 3, category: "doubt", reason: "" }));
+
+    const out = await scanFriction({
+      collectFn: fixedCollect(turns),
+      scoreFn,
+      model: "m",
+      batchSize: 1,
+    });
+
+    expect(out).toHaveLength(1);
+    expect(out[0].key).toBe("friction_doubt");
+    expect(out[0].count).toBe(3);
+  });
+
+  it("logs and continues when one batch throws, scoring the rest", async () => {
+    const errSpy = vi.spyOn(console, "error").mockImplementation(() => {});
+    const turns = Array.from({ length: 4 }, (_, i) => oturn(i + 1, { sessionId: `s${i}` }));
+    let call = 0;
+    const scoreFn = async (batch: { turn_id: string }[]): Promise<ScoredTurn[]> => {
+      call++;
+      if (call === 1) throw new Error("batch boom");
+      return batch.map((b) => ({
+        turn_id: b.turn_id,
+        friction_score: 4,
+        category: "frustration",
+        reason: "",
+      }));
+    };
+
+    const out = await scanFriction({
+      collectFn: fixedCollect(turns),
+      scoreFn,
+      model: "m",
+      batchSize: 2,
+    });
+
+    // First batch (turns 1-2) threw → dropped; second batch (turns 3-4) survived.
+    expect(out).toHaveLength(1);
+    expect(out[0].count).toBe(2);
+    expect(errSpy).toHaveBeenCalledTimes(1);
+    expect(String(errSpy.mock.calls[0][0])).toContain("friction scoring batch");
+    errSpy.mockRestore();
+  });
+});
+
+describe("runScanner — preferences pipeline", () => {
+  it("emits one signal per category with the 3-cap and severity escalation", async () => {
+    const turns = [
+      oturn(1, { sessionId: "s1" }),
+      oturn(2, { sessionId: "s2" }),
+      oturn(3, { sessionId: "s3" }),
+      oturn(4, { sessionId: "s4" }),
+      oturn(5, { sessionId: "s5" }),
+    ];
+    // 4 tooling turns (escalating 2→5) + 1 directive turn.
+    const scores: Record<string, PreferenceScoredTurn> = {
+      "t-1": { turn_id: "t-1", signal_score: 2, category: "preference_tooling", reason: "" },
+      "t-2": { turn_id: "t-2", signal_score: 3, category: "preference_tooling", reason: "" },
+      "t-3": { turn_id: "t-3", signal_score: 4, category: "preference_tooling", reason: "" },
+      "t-4": { turn_id: "t-4", signal_score: 5, category: "preference_tooling", reason: "" },
+      "t-5": { turn_id: "t-5", signal_score: 3, category: "directive", reason: "" },
+    };
+    const scoreFn = async (batch: { turn_id: string }[]) => batch.map((b) => scores[b.turn_id]);
+
+    const out = await scanPreferences({ collectFn: fixedCollect(turns), scoreFn, model: "m" });
+
+    const tooling = out.find((s) => s.key === "preference_tooling");
+    const directive = out.find((s) => s.key === "directive");
+    expect(out).toHaveLength(2);
+    expect(tooling?.count).toBe(4);
+    expect(tooling?.evidencePointers).toHaveLength(3); // capped at 3
+    expect(tooling?.severity).toBe("high"); // escalated by the score-5 turn
+    expect(directive?.count).toBe(1);
+    expect(directive?.severity).toBe("medium");
+  });
+
+  it("drops sub-threshold (< 2) and 'none' turns end-to-end", async () => {
+    const turns = [oturn(1), oturn(2)];
+    const scoreFn = async (_batch: { turn_id: string }[]): Promise<PreferenceScoredTurn[]> => [
+      { turn_id: "t-1", signal_score: 1, category: "preference_style", reason: "" },
+      { turn_id: "t-2", signal_score: 5, category: "none", reason: "" },
+    ];
+    const out = await scanPreferences({ collectFn: fixedCollect(turns), scoreFn, model: "m" });
+    expect(out).toEqual([]);
+  });
+});
+
+describe("runScanner — dreaming pipeline", () => {
+  function dcand(
+    turn_id: string,
+    title: string,
+    overrides: Partial<DreamScoredCandidate> = {},
+  ): DreamScoredCandidate {
+    return {
+      turn_id,
+      signal_score: overrides.signal_score ?? 3,
+      category: overrides.category ?? "feedback",
+      reason: "",
+      proposed_title: title,
+      proposed_content: overrides.proposed_content ?? `content for ${title}`,
+      proposed_tags: overrides.proposed_tags ?? ["alpha"],
+      already_covered: overrides.already_covered,
+    };
+  }
+
+  it("merges a recurring candidate to one reinforce signal with a decodable payload", async () => {
+    const title = "Always deploy via friday update";
+    const turns = [
+      oturn(1, { ts: "2026-05-01T00:01:00.000Z" }),
+      oturn(2, { ts: "2026-05-01T00:05:00.000Z" }),
+    ];
+    const scoreFn = async (batch: { turn_id: string }[]): Promise<DreamScoredCandidate[]> =>
+      batch.map((b) => dcand(b.turn_id, title));
+
+    const out = await scanDreaming({ collectFn: fixedCollect(turns), scoreFn, model: "m" });
+
+    expect(out).toHaveLength(1);
+    expect(out[0].count).toBe(2);
+    expect(out[0].key).toBe("dream:reinforce:always-deploy-via-friday-update");
+    expect(out[0].firstSeenAt).toBe("2026-05-01T00:01:00.000Z");
+    expect(out[0].lastSeenAt).toBe("2026-05-01T00:05:00.000Z");
+    const payload = decodeDreamPayload(out[0]);
+    expect(payload?.title).toBe(title);
+    expect(payload?.category).toBe("feedback");
+  });
+
+  it("a single non-recurring candidate is a promote signal", async () => {
+    const turns = [oturn(1)];
+    const scoreFn = async (batch: { turn_id: string }[]): Promise<DreamScoredCandidate[]> =>
+      batch.map((b) => dcand(b.turn_id, "Seth works in Pacific time", { category: "user" }));
+
+    const out = await scanDreaming({ collectFn: fixedCollect(turns), scoreFn, model: "m" });
+    expect(out).toHaveLength(1);
+    expect(out[0].key).toBe("dream:promote:seth-works-in-pacific-time");
+  });
+
+  it("threads DreamEvidence through buildPayload into the scoreFn batch", async () => {
+    const turns = [oturn(1)];
+    const evidence: DreamEvidence = {
+      recallStatsBySlug: new Map(),
+      orchestratorName: "friday",
+      frictionSignalsInWindow: [
+        {
+          hash: "h1",
+          source: "daemon",
+          key: "watchdog.stall.detected",
+          severity: "high",
+          count: 1,
+          firstSeenAt: "2026-05-01T00:00:00.000Z",
+          lastSeenAt: "2026-05-01T00:01:00.000Z",
+          agent: "friday",
+          evidencePointers: [],
+        },
+      ],
+    };
+    const scoreFn = vi.fn(
+      async (batch: { turn_id: string }[]): Promise<DreamScoredCandidate[]> =>
+        batch.map((b) => dcand(b.turn_id, "Daemon stalls under load")),
+    );
+
+    await scanDreaming({ collectFn: fixedCollect(turns), scoreFn, model: "m", evidence });
+
+    // Pin the exact rendered evidence string buildPayload → renderEvidenceForTurn
+    // produced for this matching-agent stall signal (no existing-memories line).
+    const rendered = (scoreFn.mock.calls[0][0][0] as { evidence?: string }).evidence;
+    expect(rendered).toBe("co-occurring daemon signal: watchdog.stall.detected (severity=high)");
+  });
+});
diff --git a/packages/evolve/src/run-scanner.ts b/packages/evolve/src/run-scanner.ts
new file mode 100644
index 00000000..872088b5
--- /dev/null
+++ b/packages/evolve/src/run-scanner.ts
@@ -0,0 +1,148 @@
+/**
+ * The deep scanner core. `scan-friction.ts`, `scan-preferences.ts`, and
+ * `scan-dreaming.ts` were three hand-copied pipelines sharing the same four
+ * stages — collect orchestrator turns → batch → LLM-score → bucket into
+ * `Signal[]` — with ~35–40 lines of identical scaffolding per file. This module
+ * owns those stages once; each scanner contributes only a `Taxonomy` adapter
+ * (its per-turn payload projection, its default LLM scorer, and its bucketing
+ * rule). The genuine variation between the scanners — category enums, score
+ * gates, pointer caps, dreaming's payload encoding and promote/reinforce verb —
+ * lives entirely inside each `bucket`; the core never inspects the scored type
+ * beyond the `turn_id` join key it needs to map scores back to their turns.
+ *
+ * Two injectable seams ride on the core:
+ *   - `scoreFn` (the pre-existing model seam) — replaces the LLM call in tests.
+ *   - `collectFn` (new) — replaces `collectOrchestratorTurns`, so the WHOLE
+ *     pipeline (collect → batch → score → bucket) is testable without a DB.
+ */
+
+import { loadConfig, resolveModelForEvolveTask, type EvolveTaskName } from "@friday/shared";
+import type { Signal, SignalSeverity } from "./types.js";
+import { collectOrchestratorTurns, type OrchestratorTurn } from "./collect.js";
+
+/** Turns per LLM batch. Default 30. Overridable per-call via `opts.batchSize`. */
+export const SCAN_BATCH_SIZE = 30;
+
+/**
+ * The one field every scored row carries: the synthetic turn id, echoed back by
+ * the LLM, used to join scores onto their source turns. This is the sole part
+ * of the scored type `T` the core is allowed to know — everything else is the
+ * taxonomy's business.
+ */
+export interface ScoredRow {
+  turn_id: string;
+}
+
+/** The turn-collection seam. Defaults to `collectOrchestratorTurns`. */
+export type CollectFn = (sinceMs: number, maxTurns: number) => Promise<OrchestratorTurn[]>;
+
+/** The model seam: score a batch of per-turn payloads `P` into scored rows `T`. */
+export type BatchScoreFn<P, T extends ScoredRow> = (batch: P[], model: string) => Promise<T[]>;
+
+/**
+ * A per-scanner adapter. `P` is the per-turn LLM payload shape, `T` the scored
+ * row merged onto each `OrchestratorTurn`, `Ctx` an opaque per-scanner context
+ * threaded through untouched by the core (dreaming uses it for its
+ * `DreamEvidence`; friction/preferences leave it `undefined`).
+ */
+export interface Taxonomy<P, T extends ScoredRow, Ctx = undefined> {
+  /** Used in the per-batch error log line: `${name} scoring batch …`. */
+  name: string;
+  /** Resolves the default model via `cfg.evolve.models[modelTask]`. */
+  modelTask: EvolveTaskName;
+  /** Project one turn into the LLM payload shape (friction adds prev_assistant; dreaming adds evidence). */
+  buildPayload: (turn: OrchestratorTurn, ctx: Ctx) => P;
+  /** The default LLM scorer (the existing per-scanner `defaultScoreFn`). */
+  defaultScoreFn: BatchScoreFn<P, T>;
+  /** The scanner-specific bucketing rule — score gate, caps, encoding, verb. Unchanged. */
+  bucket: (scored: Array<OrchestratorTurn & T>, ctx: Ctx) => Signal[];
+}
+
+/** Per-scan options shared by every scanner. `P`/`T` specialize per taxonomy. */
+export interface RunScannerOptions<P, T extends ScoredRow> {
+  /** ISO string lower bound — turns earlier than this are skipped. */
+  since?: string;
+  /** Maximum user turns to evaluate per scan. Default 1000. */
+  maxTurns?: number;
+  /** Turns per LLM batch. Default `SCAN_BATCH_SIZE` (30). */
+  batchSize?: number;
+  /** Model id override. Default resolves via `cfg.evolve.models[taxonomy.modelTask]`. */
+  model?: string;
+  /** Inject for tests — replaces the real LLM call. */
+  scoreFn?: BatchScoreFn<P, T>;
+  /** Inject for tests — replaces the DB-backed turn collection. */
+  collectFn?: CollectFn;
+}
+
+/**
+ * The deep core: collect → batch → score → bucket. Generic over the per-turn
+ * payload `P`, the scored row `T` (constrained to carry `turn_id`), and the
+ * opaque per-scanner context `Ctx`. A failing batch is logged and skipped — it
+ * never aborts the scan (identical to the copied loops it replaces).
+ */
+export async function runScanner<P, T extends ScoredRow, Ctx>(
+  taxonomy: Taxonomy<P, T, Ctx>,
+  opts: RunScannerOptions<P, T>,
+  ctx: Ctx,
+): Promise<Signal[]> {
+  const sinceMs = opts.since ? Date.parse(opts.since) : 0;
+  const maxTurns = opts.maxTurns ?? 1000;
+  const batchSize = opts.batchSize ?? SCAN_BATCH_SIZE;
+  const model = opts.model ?? resolveModelForEvolveTask(loadConfig(), taxonomy.modelTask).name;
+  const score = opts.scoreFn ?? taxonomy.defaultScoreFn;
+  const collect = opts.collectFn ?? collectOrchestratorTurns;
+
+  const turns = await collect(sinceMs, maxTurns);
+  if (turns.length === 0) return [];
+
+  const scored: Array<OrchestratorTurn & T> = [];
+  for (let i = 0; i < turns.length; i += batchSize) {
+    const batch = turns.slice(i, i + batchSize);
+    const payload = batch.map((t) => taxonomy.buildPayload(t, ctx));
+    let results: T[];
+    try {
+      results = await score(payload, model);
+    } catch (err) {
+      // Better to score fewer turns than abort the whole pass; log loudly.
+      console.error(
+        `${taxonomy.name} scoring batch ${i}-${i + batch.length - 1} failed: ${
+          err instanceof Error ? err.message : String(err)
+        }`,
+      );
+      continue;
+    }
+    const byId = new Map(results.map((r) => [r.turn_id, r]));
+    for (const turn of batch) {
+      const r = byId.get(turn.turnId);
+      if (!r) continue;
+      scored.push({ ...turn, ...r });
+    }
+  }
+
+  return taxonomy.bucket(scored, ctx);
+}
+
+// ── Shared pure helpers, de-duplicated from the three scanner files ──────────
+
+/** Truncate to `max` chars with an ellipsis. Used by every `buildPayload`. */
+export function truncate(s: string, max: number): string {
+  if (s.length <= max) return s;
+  return s.slice(0, max - 1) + "…";
+}
+
+/** Clamp `n` into `[lo, hi]`. Used by every `defaultScoreFn` parser. */
+export function clamp(n: number, lo: number, hi: number): number {
+  return Math.max(lo, Math.min(hi, n));
+}
+
+/** Map an integer signal/friction score (0–5) to a severity band. */
+export function severityFor(score: number): SignalSeverity {
+  if (score >= 4) return "high";
+  if (score >= 3) return "medium";
+  return "low";
+}
+
+/** Order severities so bucketing can keep the strongest seen. */
+export function severityRank(s: SignalSeverity): number {
+  return s === "high" ? 3 : s === "medium" ? 2 : 1;
+}
diff --git a/packages/evolve/src/scan-dreaming.test.ts b/packages/evolve/src/scan-dreaming.test.ts
index a647120a..ff76df0d 100644
--- a/packages/evolve/src/scan-dreaming.test.ts
+++ b/packages/evolve/src/scan-dreaming.test.ts
@@ -10,7 +10,7 @@ import {
 } from "./scan-dreaming.js";
 import { scoreProposal } from "./rank.js";
 import { slugify } from "./apply.js";
-import type { OrchestratorTurn } from "./scan-friction.js";
+import type { OrchestratorTurn } from "./collect.js";
 
 function turn(
   i: number,
diff --git a/packages/evolve/src/scan-dreaming.ts b/packages/evolve/src/scan-dreaming.ts
index 599039a0..44e5f760 100644
--- a/packages/evolve/src/scan-dreaming.ts
+++ b/packages/evolve/src/scan-dreaming.ts
@@ -17,15 +17,25 @@
  * The promote-vs-reinforce distinction rides in the signal `key`
  * (`dream:promote:<slug>` / `dream:reinforce:<slug>`); the category is NOT in
  * the key — it is recovered from the decoded payload downstream (design D3).
+ *
+ * The collect → batch → score → bucket pipeline lives in run-scanner.ts; this
+ * file contributes the dreaming `Taxonomy` plus its richer bucketing (payload
+ * encoding, recall→severity bump, separate note-pointer cap, and the post-merge
+ * promote/reinforce verb decision). The per-scanner `evidence` context is
+ * threaded through `runScanner` opaquely to both `buildPayload` and `bucket`.
  */
 
-import { loadConfig, resolveModelForEvolveTask } from "@friday/shared";
 import type { EvidencePointer, Signal, SignalSeverity } from "./types.js";
+import { dbTurnIdToLine, type OrchestratorTurn } from "./collect.js";
 import {
-  collectOrchestratorTurns,
-  dbTurnIdToLine,
-  type OrchestratorTurn,
-} from "./scan-friction.js";
+  runScanner,
+  truncate,
+  clamp,
+  severityFor,
+  severityRank,
+  type RunScannerOptions,
+  type Taxonomy,
+} from "./run-scanner.js";
 import { signalHash } from "./scan.js";
 import { chat, extractJson } from "./llm.js";
 import { slugify } from "./apply.js";
@@ -78,17 +88,10 @@ export type DreamScoreFn = (
   model: string,
 ) => Promise<DreamScoredCandidate[]>;
 
-export interface DreamScanOptions {
-  /** ISO string lower bound (Date.parse'd to ms; mirrors scanPreferences). */
-  since?: string;
-  /** Maximum user turns to evaluate per scan. Default 1000. */
-  maxTurns?: number;
-  /** Turns per LLM batch. Default 30. */
-  batchSize?: number;
-  /** Model id override. Default resolves via `cfg.evolve.models.scanPreferences`. */
-  model?: string;
-  /** Inject for tests — replaces the real LLM call. */
-  scoreFn?: DreamScoreFn;
+export interface DreamScanOptions extends RunScannerOptions<
+  TurnForDreamScoring,
+  DreamScoredCandidate
+> {
   /** Recall/friction evidence assembled by the endpoint; optional. */
   evidence?: DreamEvidence;
 }
@@ -136,43 +139,7 @@ export function decodeDreamPayload(signal: Signal): DreamPayload | null {
 }
 
 export async function scanDreaming(opts: DreamScanOptions = {}): Promise<Signal[]> {
-  const sinceMs = opts.since ? Date.parse(opts.since) : 0;
-  const maxTurns = opts.maxTurns ?? 1000;
-  const batchSize = opts.batchSize ?? 30;
-  const model = opts.model ?? resolveModelForEvolveTask(loadConfig(), "scanPreferences").name;
-  const score = opts.scoreFn ?? defaultScoreFn;
-
-  const turns = await collectOrchestratorTurns(sinceMs, maxTurns);
-  if (turns.length === 0) return [];
-
-  const scored: Array<OrchestratorTurn & DreamScoredCandidate> = [];
-  for (let i = 0; i < turns.length; i += batchSize) {
-    const batch = turns.slice(i, i + batchSize);
-    const payload: TurnForDreamScoring[] = batch.map((t) => ({
-      turn_id: t.turnId,
-      user_text: truncate(t.userText, 800),
-      evidence: renderEvidenceForTurn(t, opts.evidence),
-    }));
-    let results: DreamScoredCandidate[];
-    try {
-      results = await score(payload, model);
-    } catch (err) {
-      console.error(
-        `dream scoring batch ${i}-${i + batch.length - 1} failed: ${
-          err instanceof Error ? err.message : String(err)
-        }`,
-      );
-      continue;
-    }
-    const byId = new Map(results.map((r) => [r.turn_id, r]));
-    for (const turn of batch) {
-      const r = byId.get(turn.turnId);
-      if (!r) continue;
-      scored.push({ ...turn, ...r });
-    }
-  }
-
-  return bucketByCandidate(scored, opts.evidence);
+  return runScanner(dreamTaxonomy, opts, opts.evidence);
 }
 
 /**
@@ -320,25 +287,10 @@ function renderEvidenceForTurn(_t: OrchestratorTurn, evidence?: DreamEvidence):
   return lines.length > 0 ? lines.join("\n") : undefined;
 }
 
-function severityFor(score: number): SignalSeverity {
-  if (score >= 4) return "high";
-  if (score >= 3) return "medium";
-  return "low";
-}
-
 function bumpSeverity(s: SignalSeverity): SignalSeverity {
   return s === "low" ? "medium" : "high";
 }
 
-function severityRank(s: SignalSeverity): number {
-  return s === "high" ? 3 : s === "medium" ? 2 : 1;
-}
-
-function truncate(s: string, max: number): string {
-  if (s.length <= max) return s;
-  return s.slice(0, max - 1) + "…";
-}
-
 const SCORING_SYSTEM_PROMPT = [
   "You scan user messages from an orchestrator-agent transcript and propose any",
   "NEW persistent memory worth saving — durable facts about the user, their",
@@ -422,6 +374,21 @@ const defaultScoreFn: DreamScoreFn = async (batch, model) => {
     }));
 };
 
-function clamp(n: number, lo: number, hi: number): number {
-  return Math.max(lo, Math.min(hi, n));
-}
+/** The dreaming adapter fed to the deep `runScanner` core. The opaque context
+ *  is `DreamEvidence` (or undefined) — threaded into both `buildPayload`
+ *  (for per-turn evidence rendering) and `bucket` (for recall/friction notes). */
+const dreamTaxonomy: Taxonomy<
+  TurnForDreamScoring,
+  DreamScoredCandidate,
+  DreamEvidence | undefined
+> = {
+  name: "dream",
+  modelTask: "scanPreferences",
+  buildPayload: (t, evidence) => ({
+    turn_id: t.turnId,
+    user_text: truncate(t.userText, 800),
+    evidence: renderEvidenceForTurn(t, evidence),
+  }),
+  defaultScoreFn,
+  bucket: (scored, evidence) => bucketByCandidate(scored, evidence),
+};
diff --git a/packages/evolve/src/scan-friction.test.ts b/packages/evolve/src/scan-friction.test.ts
index 0dede70a..30ba1c6a 100644
--- a/packages/evolve/src/scan-friction.test.ts
+++ b/packages/evolve/src/scan-friction.test.ts
@@ -4,7 +4,7 @@ import {
   type FrictionCategory,
   type ScoredTurn,
 } from "./scan-friction.js";
-import type { OrchestratorTurn } from "./scan-friction.js";
+import type { OrchestratorTurn } from "./collect.js";
 
 function turn(
   i: number,
diff --git a/packages/evolve/src/scan-friction.ts b/packages/evolve/src/scan-friction.ts
index 98b8da5a..c0af08c8 100644
--- a/packages/evolve/src/scan-friction.ts
+++ b/packages/evolve/src/scan-friction.ts
@@ -17,13 +17,25 @@
  * (correction / confusion / repeat / reset / frustration / doubt / redirect)
  * is general-purpose user-orchestrator interaction analysis, not specific
  * to any chat surface.
+ *
+ * The collect → batch → score → bucket pipeline lives in run-scanner.ts; this
+ * file contributes only the friction `Taxonomy` (payload projection, default
+ * LLM scorer, and the cross-session-diversity bucketing rule).
  */
 
-import { eq, inArray } from "drizzle-orm";
-import { getDb, loadConfig, resolveModelForEvolveTask, schema } from "@friday/shared";
-import type { EvidencePointer, Signal, SignalSeverity } from "./types.js";
+import type { EvidencePointer, Signal } from "./types.js";
 import { signalHash } from "./scan.js";
 import { chat, extractJson } from "./llm.js";
+import { dbTurnIdToLine, type OrchestratorTurn } from "./collect.js";
+import {
+  runScanner,
+  truncate,
+  clamp,
+  severityFor,
+  severityRank,
+  type RunScannerOptions,
+  type Taxonomy,
+} from "./run-scanner.js";
 
 export type FrictionCategory =
   | "correction"
@@ -35,18 +47,7 @@ export type FrictionCategory =
   | "redirect"
   | "none";
 
-export interface FrictionScanOptions {
-  /** ISO string lower bound — turns earlier than this are skipped. */
-  since?: string;
-  /** Maximum user turns to evaluate per scan. Default 1000. */
-  maxTurns?: number;
-  /** Turns per LLM batch. Default 30. */
-  batchSize?: number;
-  /** Model id override. Default resolves via `cfg.evolve.models.scanFriction`. */
-  model?: string;
-  /** Inject for tests — replaces the real LLM call. */
-  scoreFn?: ScoreFn;
-}
+export type FrictionScanOptions = RunScannerOptions<TurnForScoring, ScoredTurn>;
 
 export interface ScoredTurn {
   turn_id: string;
@@ -63,69 +64,8 @@ export interface TurnForScoring {
   prev_assistant_text: string;
 }
 
-export interface OrchestratorTurn {
-  sessionId: string;
-  /** Source JSONL file (recorded on the turns row). */
-  filePath: string;
-  /** Synthetic id for matching back from LLM scoring output. */
-  turnId: string;
-  /** ISO timestamp. */
-  ts: string;
-  userText: string;
-  prevAssistantText: string;
-  /** DB row id. Phase 4.11 flipped `blocks.id` to text (UUID), so
-   *  this can be a UUID for newer rows or a bigserial-shaped
-   *  numeric string for legacy rows. We parse it as a number for
-   *  the EvidencePointer's `line` field (falls through to omitted
-   *  when NaN). */
-  dbTurnId: string;
-}
-
-export function dbTurnIdToLine(id: string): number | undefined {
-  const n = Number(id);
-  return Number.isFinite(n) ? n : undefined;
-}
-
 export async function scanFriction(opts: FrictionScanOptions = {}): Promise<Signal[]> {
-  const sinceMs = opts.since ? Date.parse(opts.since) : 0;
-  const maxTurns = opts.maxTurns ?? 1000;
-  const batchSize = opts.batchSize ?? 30;
-  const model = opts.model ?? resolveModelForEvolveTask(loadConfig(), "scanFriction").name;
-  const score = opts.scoreFn ?? defaultScoreFn;
-
-  const turns = await collectOrchestratorTurns(sinceMs, maxTurns);
-  if (turns.length === 0) return [];
-
-  const scored: Array<OrchestratorTurn & ScoredTurn> = [];
-  for (let i = 0; i < turns.length; i += batchSize) {
-    const batch = turns.slice(i, i + batchSize);
-    const payload: TurnForScoring[] = batch.map((t) => ({
-      turn_id: t.turnId,
-      user_text: truncate(t.userText, 800),
-      prev_assistant_text: truncate(t.prevAssistantText, 400),
-    }));
-    let results: ScoredTurn[];
-    try {
-      results = await score(payload, model);
-    } catch (err) {
-      // Better to score fewer turns than abort the whole pass; log loudly.
-
-      console.error(
-        `friction scoring batch ${i}-${i + batch.length - 1} failed: ${
-          err instanceof Error ? err.message : String(err)
-        }`,
-      );
-      continue;
-    }
-    const byId = new Map(results.map((r) => [r.turn_id, r]));
-    for (const turn of batch) {
-      const r = byId.get(turn.turnId);
-      if (!r) continue;
-      scored.push({ ...turn, ...r });
-    }
-  }
-
-  return bucketFrictionByCategory(scored);
+  return runScanner(frictionTaxonomy, opts, undefined);
 }
 
 export function bucketFrictionByCategory(scored: Array<OrchestratorTurn & ScoredTurn>): Signal[] {
@@ -184,134 +124,6 @@ export function bucketFrictionByCategory(scored: Array<OrchestratorTurn & Scored
   return [...buckets.values()];
 }
 
-function severityFor(score: number): SignalSeverity {
-  if (score >= 4) return "high";
-  if (score >= 3) return "medium";
-  return "low";
-}
-
-function severityRank(s: SignalSeverity): number {
-  return s === "high" ? 3 : s === "medium" ? 2 : 1;
-}
-
-/**
- * Resolve "which sessions belong to the orchestrator" by:
- *   1. Selecting all agents of type=orchestrator from the registry.
- *   2. Including their currently-attached sessionId, plus
- *   3. Every distinct sessionId in the `turns` table that's tagged with one
- *      of those agent names (catches historical sessions across resumes).
- */
-async function collectOrchestratorSessions(): Promise<Set<string>> {
-  const out = new Set<string>();
-  const db = getDb();
-
-  const orchAgents = await db
-    .select()
-    .from(schema.agents)
-    .where(eq(schema.agents.type, "orchestrator"));
-  if (orchAgents.length === 0) return out;
-
-  for (const a of orchAgents) {
-    if (a.sessionId) out.add(a.sessionId);
-  }
-
-  // Historic session enumeration via the `blocks` table — the legacy
-  // `turns` table is retired per ADR-016. Distinct session_id values
-  // for any orchestrator-named agent's blocks.
-  const orchNames = orchAgents.map((a) => a.name);
-  const historicSessions = await db
-    .selectDistinct({ sessionId: schema.blocks.sessionId })
-    .from(schema.blocks)
-    .where(inArray(schema.blocks.agentName, orchNames));
-  for (const t of historicSessions) {
-    if (t.sessionId) out.add(t.sessionId);
-  }
-
-  return out;
-}
-
-/**
- * Walk the `turns` table for orchestrator sessions, return user turns paired
- * with the immediately preceding assistant text. Skips pure tool_result
- * echoes and strips `<memory-context>` auto-injection blocks.
- *
- * Capped at `maxTurns`. Older sessions go first if we hit the cap.
- */
-export async function collectOrchestratorTurns(
-  sinceMs: number,
-  maxTurns: number,
-): Promise<OrchestratorTurn[]> {
-  const sessionIds = await collectOrchestratorSessions();
-  if (sessionIds.size === 0) return [];
-
-  // Ported to the `blocks` table per ADR-016 + ADR-023. Each block row is
-  // already a single semantic unit (text / thinking / tool_use / tool_result
-  // / user / mail); we no longer parse a JSONL-style `content_json` envelope
-  // with `type=user|assistant`. The friction scorer wants pairs of
-  // (user-typed text, immediately-preceding assistant text), so we walk
-  // blocks in ts order, accumulate the latest assistant text per session,
-  // and emit a turn whenever we see a user-role text/user block.
-  const db = getDb();
-  const rows = await db
-    .select()
-    .from(schema.blocks)
-    .where(inArray(schema.blocks.sessionId, [...sessionIds]));
-  // Sort ts ascending so older sessions get scored first when capped.
-  rows.sort((a, b) => a.ts.getTime() - b.ts.getTime());
-
-  const out: OrchestratorTurn[] = [];
-  const prevAssistantBySession = new Map<string, string>();
-
-  for (const r of rows) {
-    const rTsMs = r.ts.getTime();
-    if (sinceMs && rTsMs < sinceMs) continue;
-    if (out.length >= maxTurns) break;
-
-    // contentJson is jsonb; Drizzle returns it as the parsed object. Block
-    // payloads are shaped per-kind; we only need the `text` field for the
-    // text + user kinds.
-    const content = r.contentJson as { text?: string };
-
-    if (r.role === "assistant" && r.kind === "text") {
-      const txt = typeof content?.text === "string" ? content.text : "";
-      if (txt) prevAssistantBySession.set(r.sessionId, txt);
-      continue;
-    }
-
-    // User-typed blocks (chat input, scratch seed, agent_spawn, schedule
-    // task prompt). Skip mail-delivered user blocks — those aren't the
-    // user's free-text friction signal.
-    if (r.role !== "user" || r.kind !== "text") continue;
-    if (r.source === "mail") continue;
-
-    const userText = typeof content?.text === "string" ? content.text : "";
-    if (!userText.trim()) continue;
-    const cleaned = stripMemoryContext(userText).trim();
-    if (!cleaned) continue;
-
-    out.push({
-      sessionId: r.sessionId,
-      filePath: "",
-      turnId: r.turnId,
-      ts: r.ts.toISOString(),
-      userText: cleaned,
-      prevAssistantText: prevAssistantBySession.get(r.sessionId) ?? "",
-      dbTurnId: r.id,
-    });
-  }
-
-  return out;
-}
-
-function stripMemoryContext(text: string): string {
-  return text.replace(/<memory-context>[\s\S]*?<\/memory-context>\s*/g, "");
-}
-
-function truncate(s: string, max: number): string {
-  if (s.length <= max) return s;
-  return s.slice(0, max - 1) + "…";
-}
-
 const SCORING_SYSTEM_PROMPT = [
   "You score user messages from an orchestrator-agent transcript for friction:",
   "moments where the user is correcting, confused, frustrated, repeating themselves,",
@@ -390,6 +202,15 @@ const defaultScoreFn: ScoreFn = async (batch, model) => {
     }));
 };
 
-function clamp(n: number, lo: number, hi: number): number {
-  return Math.max(lo, Math.min(hi, n));
-}
+/** The friction adapter fed to the deep `runScanner` core. */
+const frictionTaxonomy: Taxonomy<TurnForScoring, ScoredTurn> = {
+  name: "friction",
+  modelTask: "scanFriction",
+  buildPayload: (t) => ({
+    turn_id: t.turnId,
+    user_text: truncate(t.userText, 800),
+    prev_assistant_text: truncate(t.prevAssistantText, 400),
+  }),
+  defaultScoreFn,
+  bucket: (scored) => bucketFrictionByCategory(scored),
+};
diff --git a/packages/evolve/src/scan-preferences.test.ts b/packages/evolve/src/scan-preferences.test.ts
index 9b0ea6cc..a584a8b2 100644
--- a/packages/evolve/src/scan-preferences.test.ts
+++ b/packages/evolve/src/scan-preferences.test.ts
@@ -4,7 +4,7 @@ import {
   type PreferenceCategory,
   type PreferenceScoredTurn,
 } from "./scan-preferences.js";
-import type { OrchestratorTurn } from "./scan-friction.js";
+import type { OrchestratorTurn } from "./collect.js";
 
 function turn(
   i: number,
diff --git a/packages/evolve/src/scan-preferences.ts b/packages/evolve/src/scan-preferences.ts
index 77bfc154..f56531a7 100644
--- a/packages/evolve/src/scan-preferences.ts
+++ b/packages/evolve/src/scan-preferences.ts
@@ -10,15 +10,23 @@
  * erosion, and "from now on use pnpm" is a calm score-0 turn that would be
  * filtered out — but it's exactly the kind of statement that should become a
  * memory. Different signal, different prompt, different bucketing.
+ *
+ * The collect → batch → score → bucket pipeline lives in run-scanner.ts; this
+ * file contributes only the preference `Taxonomy` (payload projection, default
+ * LLM scorer, and the per-category bucketing rule).
  */
 
-import { loadConfig, resolveModelForEvolveTask } from "@friday/shared";
-import type { EvidencePointer, Signal, SignalSeverity } from "./types.js";
+import type { EvidencePointer, Signal } from "./types.js";
+import { dbTurnIdToLine, type OrchestratorTurn } from "./collect.js";
 import {
-  collectOrchestratorTurns,
-  dbTurnIdToLine,
-  type OrchestratorTurn,
-} from "./scan-friction.js";
+  runScanner,
+  truncate,
+  clamp,
+  severityFor,
+  severityRank,
+  type RunScannerOptions,
+  type Taxonomy,
+} from "./run-scanner.js";
 import { signalHash } from "./scan.js";
 import { chat, extractJson } from "./llm.js";
 
@@ -31,18 +39,10 @@ export type PreferenceCategory =
   | "external_pointer"
   | "none";
 
-export interface PreferenceScanOptions {
-  /** ISO string lower bound — turns earlier than this are skipped. */
-  since?: string;
-  /** Maximum user turns to evaluate per scan. Default 1000. */
-  maxTurns?: number;
-  /** Turns per LLM batch. Default 30. */
-  batchSize?: number;
-  /** Model id override. Default resolves via `cfg.evolve.models.scanPreferences`. */
-  model?: string;
-  /** Inject for tests — replaces the real LLM call. */
-  scoreFn?: PreferenceScoreFn;
-}
+export type PreferenceScanOptions = RunScannerOptions<
+  TurnForPreferenceScoring,
+  PreferenceScoredTurn
+>;
 
 export interface PreferenceScoredTurn {
   turn_id: string;
@@ -64,42 +64,7 @@ export type PreferenceScoreFn = (
 ) => Promise<PreferenceScoredTurn[]>;
 
 export async function scanPreferences(opts: PreferenceScanOptions = {}): Promise<Signal[]> {
-  const sinceMs = opts.since ? Date.parse(opts.since) : 0;
-  const maxTurns = opts.maxTurns ?? 1000;
-  const batchSize = opts.batchSize ?? 30;
-  const model = opts.model ?? resolveModelForEvolveTask(loadConfig(), "scanPreferences").name;
-  const score = opts.scoreFn ?? defaultScoreFn;
-
-  const turns = await collectOrchestratorTurns(sinceMs, maxTurns);
-  if (turns.length === 0) return [];
-
-  const scored: Array<OrchestratorTurn & PreferenceScoredTurn> = [];
-  for (let i = 0; i < turns.length; i += batchSize) {
-    const batch = turns.slice(i, i + batchSize);
-    const payload: TurnForPreferenceScoring[] = batch.map((t) => ({
-      turn_id: t.turnId,
-      user_text: truncate(t.userText, 800),
-    }));
-    let results: PreferenceScoredTurn[];
-    try {
-      results = await score(payload, model);
-    } catch (err) {
-      console.error(
-        `preference scoring batch ${i}-${i + batch.length - 1} failed: ${
-          err instanceof Error ? err.message : String(err)
-        }`,
-      );
-      continue;
-    }
-    const byId = new Map(results.map((r) => [r.turn_id, r]));
-    for (const turn of batch) {
-      const r = byId.get(turn.turnId);
-      if (!r) continue;
-      scored.push({ ...turn, ...r });
-    }
-  }
-
-  return bucketByCategory(scored);
+  return runScanner(preferenceTaxonomy, opts, undefined);
 }
 
 export function bucketByCategory(scored: Array<OrchestratorTurn & PreferenceScoredTurn>): Signal[] {
@@ -149,21 +114,6 @@ export function bucketByCategory(scored: Array<OrchestratorTurn & PreferenceScor
   return [...buckets.values()];
 }
 
-function severityFor(score: number): SignalSeverity {
-  if (score >= 4) return "high";
-  if (score >= 3) return "medium";
-  return "low";
-}
-
-function severityRank(s: SignalSeverity): number {
-  return s === "high" ? 3 : s === "medium" ? 2 : 1;
-}
-
-function truncate(s: string, max: number): string {
-  if (s.length <= max) return s;
-  return s.slice(0, max - 1) + "…";
-}
-
 const SCORING_SYSTEM_PROMPT = [
   "You scan user messages from an orchestrator-agent transcript for AFFIRMATIVE",
   "preference statements — things the user said that should become persistent",
@@ -239,6 +189,14 @@ const defaultScoreFn: PreferenceScoreFn = async (batch, model) => {
     }));
 };
 
-function clamp(n: number, lo: number, hi: number): number {
-  return Math.max(lo, Math.min(hi, n));
-}
+/** The preference adapter fed to the deep `runScanner` core. */
+const preferenceTaxonomy: Taxonomy<TurnForPreferenceScoring, PreferenceScoredTurn> = {
+  name: "preference",
+  modelTask: "scanPreferences",
+  buildPayload: (t) => ({
+    turn_id: t.turnId,
+    user_text: truncate(t.userText, 800),
+  }),
+  defaultScoreFn,
+  bucket: (scored) => bucketByCategory(scored),
+};