From 6f0ab12cd43925a3552b8bf5aa8db7d2bf03b5b3 Mon Sep 17 00:00:00 2001
From: R4vager <tvschonleber@gmail.com>
Date: Fri, 24 Apr 2026 05:26:29 -0400
Subject: [PATCH 1/8] build strand cockpit tui

---
 README.md                        |   1 +
 config/policies.yaml             |   4 +-
 docs/RUNBOOK.md                  | 123 ++++++++-
 src/cli/commands/review.ts       | 102 ++++++--
 src/cli/commands/status.ts       |  62 ++++-
 src/cli/commands/tui.ts          |  12 +
 src/cli/tui/components.tsx       | 423 +++++++++++++++++++++----------
 src/cli/tui/dashboard.tsx        | 117 +++++++--
 src/cli/tui/hooks.ts             | 190 ++++++++++++++
 src/cli/tui/layout.ts            |  71 ++++++
 src/cli/tui/welcome.tsx          |   1 +
 src/clients/x.ts                 |  29 +++
 src/db/schema.sql                |  55 ++++
 src/loops/actor.ts               |  12 +-
 src/metrics/index.ts             | 220 ++++++++++++++++
 src/orchestrator.ts              | 153 ++++++++---
 tests/cli/cli.test.ts            |   1 +
 tests/cli/tui.test.ts            |  99 +++++++-
 tests/loops/actor-phase3.test.ts | 276 ++++++++++++++++++++
 19 files changed, 1719 insertions(+), 232 deletions(-)
 create mode 100644 src/cli/tui/layout.ts
 create mode 100644 src/metrics/index.ts
 create mode 100644 tests/loops/actor-phase3.test.ts
diff --git a/README.md b/README.md
index c73c70b..afd0d62 100644
--- a/README.md
+++ b/README.md
@@ -11,6 +11,7 @@ strand init                      # first-run wizard — pick provider, store key
 strand doctor                    # preflight health check
 strand run "summarize the README and commit a rewrite"   # one-shot agentic plan
 strand tui                       # welcome splash · [d] live dashboard
+strand cockpit                   # live operator cockpit for a pinned terminal
 strand status                    # orchestrator + reasoner/consolidator summary
 strand tasks list                # persisted TaskGraphs
 strand tasks show <id>           # graph + steps + reflections
diff --git a/config/policies.yaml b/config/policies.yaml
index eb4ac29..db24413 100644
--- a/config/policies.yaml
+++ b/config/policies.yaml
@@ -1,8 +1,10 @@
 # Must match PoliciesConfigSchema in src/config.ts.
 # Values here are the *ceiling*; `effectiveCap` multiplies by ramp_multiplier.
 
+# Phase 3 configuration: like/bookmark live, all other actions shadow
+# ramp_multiplier 0.5 = half-caps during ramp-up (100 likes/day, 15 bookmarks/day)
 mode: shadow
-ramp_multiplier: 0.25
+ramp_multiplier: 0.5
 
 caps_per_day:
   posts: 6
diff --git a/docs/RUNBOOK.md b/docs/RUNBOOK.md
index cd8a1f4..ca8b87d 100644
--- a/docs/RUNBOOK.md
+++ b/docs/RUNBOOK.md
@@ -159,11 +159,132 @@ can attribute label quality to a specific prompt version.
 
 Before enabling low-risk live actions (`like` + `bookmark`):
 
+- [ ] `pnpm strand review gate-check` exits 0 (≥100 labeled, ≥80% agreement)
 - [ ] `pnpm strand review agreement --json | jq '.gate.met'` → `true`
-  (≥100 labeled candidates AND ≥80% agreement in `mode=shadow`)
+  (confirms same criteria with full confusion matrix)
 - [ ] Confusion matrix shows no systematic `false_approve` bias
   (i.e. policy approves ≤5 actions the operator would reject)
 - [ ] No `reasoner.candidate_cap_enforced` warnings sustained over ≥48h
   (model consistently emits ≤5 — overrun implies prompt drift)
 - [ ] Actor dry-run verified: `action_log` rows show `status='executed'` in
   shadow mode with the write path short-circuited before the X API call
+
+## Phase 3: Low-risk actions live
+
+In Phase 3 the Actor enables **only** `like` and `bookmark` in live mode. All
+other actions (`reply`, `quote`, `post`, `follow`, `dm`) remain in shadow mode
+even when `STRAND_MODE=live`.
+
+### Phase 3 gate checklist
+
+Before enabling live actions:
+
+```bash
+# 1. Verify gate criteria met (≥100 labeled, ≥80% agreement)
+pnpm strand review gate-check
+
+# 2. Verify half-caps configured (ramp_multiplier should be 0.5)
+cat config/policies.yaml | grep ramp_multiplier  # should be 0.5
+
+# 3. Check metrics baseline (run for 24h in shadow with metrics enabled)
+pnpm strand status --metrics
+```
+
+### Enabling live mode
+
+```bash
+# 1. Confirm readiness
+pnpm strand review gate-check --json | jq '.ready'  # should be true
+
+# 2. Set live mode (only like/bookmark will actually go live)
+export STRAND_MODE=live
+export STRAND_HALT=false
+
+# 3. Restart orchestrator
+pkill -SIGTERM -f "strand start"
+pnpm strand start &
+
+# 4. Record transition
+echo "Phase 3 live start: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> ./data/phase3.log
+```
+
+### Phase 3 kill switch (drain mode)
+
+If anything goes wrong, the kill switch implements drain semantics:
+
+```bash
+# STRAND_HALT stops the Reasoner (no new candidates)
+# Perceiver continues (reads are safe)
+# In-flight actions complete; new actions are rejected
+export STRAND_HALT=true
+```
+
+Verify drain state:
+```bash
+# Check halt is active
+pnpm strand status --json | jq '.env.strand_halt'  # should be "true"
+
+# Check no new candidates being emitted (reasoner_runs should stop growing)
+pnpm strand status | grep reasoner_runs
+```
+
+### Monitoring Phase 3 health
+
+Check metrics dashboard every 4 hours:
+
+```bash
+# Full metrics dashboard
+pnpm strand status --metrics
+
+# Key metrics to watch:
+# - X API health: rate limits healthy, monthly cap < 50%
+# - Follower delta: no sudden negative spikes (>10% drop)
+# - Error rates: < 5% failure rate on like/bookmark
+```
+
+### Cap enforcement (half-caps during ramp-up)
+
+Phase 3 uses `ramp_multiplier: 0.5` in `policies.yaml`:
+
+| Action | Daily Cap (full) | Phase 3 Cap (0.5x) |
+|--------|------------------|-------------------|
+| likes | 200 | 100 |
+| bookmarks | 50 | 25 |
+
+Verify caps in effect:
+```bash
+# Check action_log for cap enforcement
+sqlite3 ./data/strand.db "SELECT kind, COUNT(*) FROM action_log WHERE status='executed' AND created_at > datetime('now', '-24 hours') GROUP BY kind"
+```
+
+### Rollback to shadow
+
+If you need to revert:
+
+```bash
+# 1. Halt first (drain in-flight)
+export STRAND_HALT=true
+sleep 30  # wait for drain
+
+# 2. Switch back to shadow
+export STRAND_MODE=shadow
+export STRAND_HALT=false
+
+# 3. Restart
+pkill -SIGTERM -f "strand start"
+pnpm strand start &
+
+# 4. Record rollback
+echo "Phase 3 rollback: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> ./data/phase3.log
+```
+
+### Gate to Phase 4
+
+Before enabling `reply` live:
+
+- [ ] Phase 3 ran clean for ≥ 72 hours
+- [ ] `like` and `bookmark` error rate < 1%
+- [ ] No X rate limit 429s sustained
+- [ ] Follower delta stable (no negative trend)
+- [ ] `pnpm strand review agreement --mode=live` shows ≥90% agreement
+- [ ] Human review queue shows manageable volume
diff --git a/src/cli/commands/review.ts b/src/cli/commands/review.ts
index ebe9290..66afe66 100644
--- a/src/cli/commands/review.ts
+++ b/src/cli/commands/review.ts
@@ -28,11 +28,11 @@ export function registerReviewCmd(program: Command, _ctx: CliContext): void {
           "SELECT id, decision_id, payload_json, reasons_json FROM human_review_queue WHERE decided_at IS NULL ORDER BY created_at ASC LIMIT 50",
         )
         .all() as Array<{
-        id: number;
-        decision_id: string;
-        payload_json: string;
-        reasons_json: string | null;
-      }>;
+          id: number;
+          decision_id: string;
+          payload_json: string;
+          reasons_json: string | null;
+        }>;
 
       if (rows.length === 0) {
         printLine("no pending reviews");
@@ -77,17 +77,17 @@ export function registerReviewCmd(program: Command, _ctx: CliContext): void {
            LIMIT ?`,
         )
         .all(opts.mode, limit) as Array<{
-        id: number;
-        decision_id: string;
-        kind: string;
-        status: string;
-        payload_json: string;
-        rationale: string | null;
-        confidence: number | null;
-        relevance: number | null;
-        reasons_json: string | null;
-        created_at: string;
-      }>;
+          id: number;
+          decision_id: string;
+          kind: string;
+          status: string;
+          payload_json: string;
+          rationale: string | null;
+          confidence: number | null;
+          relevance: number | null;
+          reasons_json: string | null;
+          created_at: string;
+        }>;
 
       if (rows.length === 0) {
         printLine(`no unlabeled candidates in mode=${opts.mode}`);
@@ -156,11 +156,11 @@ export function registerReviewCmd(program: Command, _ctx: CliContext): void {
            WHERE operator_label IS NOT NULL AND mode = ?`,
         )
         .all(opts.mode) as Array<{
-        status: string;
-        operator_label: string;
-        confidence: number | null;
-        relevance: number | null;
-      }>;
+          status: string;
+          operator_label: string;
+          confidence: number | null;
+          relevance: number | null;
+        }>;
 
       const total = rows.length;
       let agree = 0;
@@ -254,4 +254,64 @@ export function registerReviewCmd(program: Command, _ctx: CliContext): void {
         }
       }
     });
+
+  // ─── Phase 2: `review gate-check` ─────────────────────────────
+  review
+    .command("gate-check")
+    .description("programmatic Phase 3 gate check — exits 0 if ready, 1 if not")
+    .option("--min-labeled <n>", "minimum labeled candidates", "100")
+    .option("--min-agreement <pct>", "minimum agreement %", "80")
+    .option("--mode <mode>", "filter by mode", "shadow")
+    .option("--json", "emit JSON result to stdout")
+    .action(async (opts: { minLabeled: string; minAgreement: string; mode: string; json?: boolean }) => {
+      const { db } = await import("@/db");
+
+      const minLabeled = Number.parseInt(opts.minLabeled, 10) || 100;
+      const minAgreement = Number.parseInt(opts.minAgreement, 10) || 80;
+
+      const rows = db()
+        .prepare(
+          `SELECT status, operator_label
+           FROM action_log
+           WHERE operator_label IS NOT NULL AND mode = ?`,
+        )
+        .all(opts.mode) as Array<{ status: string; operator_label: string }>;
+
+      const total = rows.length;
+      let agree = 0;
+      let disagree = 0;
+
+      for (const r of rows) {
+        const policyApproved = r.status === "approved" || r.status === "executed";
+        if (r.operator_label === "unclear") continue;
+        const operatorGood = r.operator_label === "good";
+        if (policyApproved === operatorGood) {
+          agree++;
+        } else {
+          disagree++;
+        }
+      }
+
+      const decisive = agree + disagree;
+      const agreementPct = decisive > 0 ? (agree / decisive) * 100 : 0;
+      const gateMet = total >= minLabeled && agreementPct >= minAgreement;
+
+      if (opts.json) {
+        const result = {
+          ready: gateMet,
+          mode: opts.mode,
+          total_labeled: total,
+          min_labeled: minLabeled,
+          agreement_pct: Number(agreementPct.toFixed(2)),
+          min_agreement_pct: minAgreement,
+        };
+        printLine(JSON.stringify(result, null, 2));
+      } else {
+        printLine(gateMet ? "READY" : "NOT_READY");
+        printLine(`  labeled: ${total}/${minLabeled}`);
+        printLine(`  agreement: ${agreementPct.toFixed(2)}% (min ${minAgreement}%)`);
+      }
+
+      process.exit(gateMet ? 0 : 1);
+    });
 }
diff --git a/src/cli/commands/status.ts b/src/cli/commands/status.ts
index cb92639..61133bf 100644
--- a/src/cli/commands/status.ts
+++ b/src/cli/commands/status.ts
@@ -7,10 +7,50 @@ export function registerStatusCmd(program: Command, _ctx: CliContext): void {
     .command("status")
     .description("orchestrator status + recent events / actions / reasoner / consolidator rows")
     .option("--json", "emit status as JSON for programmatic checks")
-    .action(async (opts: { json?: boolean }) => {
+    .option("--metrics", "show Phase 3 health metrics dashboard")
+    .action(async (opts: { json?: boolean; metrics?: boolean }) => {
       const { db } = await import("@/db");
       const dbh = db();
 
+      if (opts.metrics) {
+        // Phase 3: Health metrics dashboard
+        const { getHealthSummary } = await import("@/metrics");
+        const metrics = getHealthSummary();
+
+        printLine("=== Phase 3 Health Metrics ===");
+        printLine("");
+
+        printLine("--- X API Health (last hour) ---");
+        if (metrics.xHealth.length === 0) {
+          printLine("  No health snapshots recorded yet");
+        } else {
+          for (const h of metrics.xHealth.slice(0, 5)) {
+            printLine(`  [${h.sampledAt}] ${h.endpoint}: ${h.healthy ? "healthy" : "degraded"}`);
+          }
+        }
+        printLine("");
+
+        printLine("--- Follower Delta ---");
+        if (metrics.followerDelta) {
+          printLine(`  Current: ${metrics.followerDelta.followersCount}`);
+          printLine(`  24h change: ${metrics.followerDelta.delta24h ?? 0}`);
+          printLine(`  Last sampled: ${metrics.followerDelta.sampledAt}`);
+        } else {
+          printLine("  No follower data recorded yet");
+        }
+        printLine("");
+
+        printLine("--- Error Rates (last 24h) ---");
+        if (metrics.errorRates.length === 0) {
+          printLine("  No errors recorded");
+        } else {
+          for (const e of metrics.errorRates.slice(0, 10)) {
+            printLine(`  [${e.hourBucket}] ${e.kind}.${e.errorCode}: ${e.count}`);
+          }
+        }
+        return;
+      }
+
       if (opts.json) {
         // JSON output for 48h sanity checks
         const eventCounts = dbh
@@ -100,11 +140,11 @@ export function registerStatusCmd(program: Command, _ctx: CliContext): void {
           "SELECT tick_at, candidate_count, tool_call_count, cost_in_usd_ticks FROM reasoner_runs ORDER BY tick_at DESC LIMIT 5",
         )
         .all() as Array<{
-        tick_at: string;
-        candidate_count: number;
-        tool_call_count: number;
-        cost_in_usd_ticks: number | null;
-      }>;
+          tick_at: string;
+          candidate_count: number;
+          tool_call_count: number;
+          cost_in_usd_ticks: number | null;
+        }>;
       printLine(`=== last ${reasoner.length} reasoner_runs ===`);
       for (const r of reasoner) {
         printLine(
@@ -118,11 +158,11 @@ export function registerStatusCmd(program: Command, _ctx: CliContext): void {
           "SELECT status, batch_id, completed_at, created_at FROM consolidator_runs ORDER BY created_at DESC LIMIT 5",
         )
         .all() as Array<{
-        status: string;
-        batch_id: string | null;
-        completed_at: string | null;
-        created_at: string;
-      }>;
+          status: string;
+          batch_id: string | null;
+          completed_at: string | null;
+          created_at: string;
+        }>;
       printLine(`=== last ${consolidator.length} consolidator_runs ===`);
       for (const c of consolidator) {
         printLine(
diff --git a/src/cli/commands/tui.ts b/src/cli/commands/tui.ts
index 6a572bf..829ab77 100644
--- a/src/cli/commands/tui.ts
+++ b/src/cli/commands/tui.ts
@@ -15,4 +15,16 @@ export function registerTuiCmd(program: Command, _ctx: CliContext): void {
         pollMs: Number.isFinite(n) && n > 0 ? n : 2000,
       });
     });
+
+  program
+    .command("cockpit")
+    .description("open the live Strand operator cockpit")
+    .option("--poll-ms <n>", "dashboard poll cadence in ms", "2000")
+    .action(async (opts: { pollMs: string }) => {
+      const n = Number(opts.pollMs);
+      await launchTui({
+        dashboard: true,
+        pollMs: Number.isFinite(n) && n > 0 ? n : 2000,
+      });
+    });
 }
diff --git a/src/cli/tui/components.tsx b/src/cli/tui/components.tsx
index a273859..b00f9d2 100644
--- a/src/cli/tui/components.tsx
+++ b/src/cli/tui/components.tsx
@@ -1,33 +1,33 @@
 /**
  * Stateless presentational components for the Strand TUI.
  *
- * Every piece of data is passed in as props — no hook calls here, no side
- * effects. Makes these trivial to render in tests with whatever mock data we
- * want.
+ * Every visible row is sized before Ink sees it. That keeps the cockpit stable
+ * in 80-column terminals and avoids flex-row wrapping between adjacent Text
+ * nodes.
  */
 
 import type { PlanStep, StepStatus, TaskGraph } from "@/agent/types";
 import { Box, Text } from "ink";
-import Spinner from "ink-spinner";
-import type { ReactElement } from "react";
-import type { InvocationRow, RunSummary } from "./hooks";
+import type { ReactElement, ReactNode } from "react";
+import type { InvocationRow, OperatorSnapshot, RunSummary } from "./hooks";
+import { fit, kv, pad, panelInnerWidth, ratioBar, sign, truncate } from "./layout";
 
-// ─── Visual helpers ─────────────────────────────────────────────────────────
+// --- Visual helpers ---------------------------------------------------------
 
 function statusGlyph(s: StepStatus): string {
   switch (s) {
     case "completed":
-      return "\u2713";
+      return "ok";
     case "running":
-      return "\u27F3";
+      return ">>";
     case "failed":
-      return "\u2717";
+      return "!!";
     case "skipped":
-      return "\u2192";
+      return "--";
     case "abandoned":
-      return "\u00D7";
+      return "xx";
     case "pending":
-      return "·";
+      return "..";
   }
 }
 
@@ -49,75 +49,202 @@ function statusColor(s: StepStatus): string {
 }
 
 function shortId(id: string): string {
-  return id.length > 4 ? `${id.slice(0, 4)}\u2026` : id;
+  return id.length > 6 ? `${id.slice(0, 6)}...` : id;
 }
 
 function fmtTime(iso: string): string {
   try {
     const d = new Date(iso);
+    if (!Number.isFinite(d.getTime())) return iso.slice(11, 19) || iso;
     const hh = String(d.getHours()).padStart(2, "0");
     const mm = String(d.getMinutes()).padStart(2, "0");
     const ss = String(d.getSeconds()).padStart(2, "0");
     return `${hh}:${mm}:${ss}`;
   } catch {
-    return iso.slice(11, 19);
+    return iso.slice(11, 19) || iso;
   }
 }
 
 function fmtDuration(ms: number | null): string {
-  if (ms == null) return "\u2014";
+  if (ms == null) return "-";
   if (ms < 1000) return `${ms}ms`;
   return `${(ms / 1000).toFixed(1)}s`;
 }
 
 function fmtUsdFromTicks(ticks: number): string {
-  // 1 tick = 1e-10 USD.
   const usd = ticks / 1e10;
   if (usd === 0) return "$0.00";
   if (usd < 0.01) return `$${usd.toFixed(4)}`;
   return `$${usd.toFixed(2)}`;
 }
 
-// ─── Header ─────────────────────────────────────────────────────────────────
+function fmtMinutes(minutes: number | null): string {
+  if (minutes == null) return "-";
+  if (minutes < 60) return `${minutes}m`;
+  const hours = Math.floor(minutes / 60);
+  const rest = minutes % 60;
+  if (hours < 48) return rest === 0 ? `${hours}h` : `${hours}h ${rest}m`;
+  return `${Math.floor(hours / 24)}d`;
+}
+
+function fmtMaybeCount(n: number | null): string {
+  return n == null ? "-" : String(n);
+}
+
+function fmtNumber(n: number): string {
+  return String(n).replace(/\B(?=(\d{3})+(?!\d))/g, ",");
+}
+
+function panelColor(issueCount: number): string {
+  if (issueCount > 0) return "red";
+  return "green";
+}
+
+function Panel({
+  title,
+  width,
+  color = "gray",
+  children,
+}: {
+  title: string;
+  width: number;
+  color?: string;
+  children: ReactNode;
+}): ReactElement {
+  const inner = panelInnerWidth(width);
+  return (
+    <Box width={width} flexDirection="column" borderStyle="single" borderColor={color} paddingX={1}>
+      <Text bold color={color}>
+        {fit(title, inner)}
+      </Text>
+      {children}
+    </Box>
+  );
+}
+
+function PanelLine({
+  width,
+  color,
+  children,
+}: {
+  width: number;
+  color?: string;
+  children: string;
+}): ReactElement {
+  const line = fit(children, panelInnerWidth(width));
+  if (color) return <Text color={color}>{line}</Text>;
+  return <Text>{line}</Text>;
+}
+
+// --- Header ----------------------------------------------------------------
 
 export interface HeaderProps {
   provider: string;
   model: string;
   mode: string;
+  halt: string;
+  tier: string;
   credentialStore: string;
   tenant: string | null;
+  width?: number;
 }
 
 export function Header(props: HeaderProps): ReactElement {
+  const width = props.width ?? 80;
+  const inner = Math.max(20, width - 2);
+  const modelBudget = Math.max(14, inner - 42);
+  const model = truncate(`${props.provider}/${props.model}`, modelBudget);
+  const halt = props.halt === "true" ? "HALTED" : "armed";
   return (
-    <Box flexDirection="column" paddingX={1} paddingY={0}>
-      <Box>
-        <Text bold color="magenta">
-          Strand TUI
-        </Text>
-        <Text color="gray"> — live agent harness</Text>
-      </Box>
-      <Box>
-        <Text color="gray">provider: </Text>
-        <Text color="white">
-          {props.provider}/{props.model}
-        </Text>
-        <Text color="gray"> mode: </Text>
-        <Text color={props.mode === "live" ? "red" : props.mode === "gated" ? "yellow" : "green"}>
-          {props.mode}
-        </Text>
-      </Box>
-      <Box>
-        <Text color="gray">credential store: </Text>
-        <Text>{props.credentialStore}</Text>
-        <Text color="gray"> tenant: </Text>
-        <Text>{props.tenant ?? "\u2014"}</Text>
-      </Box>
+    <Box flexDirection="column" paddingX={1}>
+      <Text bold color="magenta">
+        {fit("STRAND COCKPIT - live agent harness", inner)}
+      </Text>
+      <Text color="gray">
+        {fit(`model ${model} | mode ${props.mode} | halt ${halt} | tier ${props.tier}`, inner)}
+      </Text>
+      <Text color="gray">
+        {fit(`credential store ${props.credentialStore} | tenant ${props.tenant ?? "-"}`, inner)}
+      </Text>
     </Box>
   );
 }
 
-// ─── TaskGraphsPane ─────────────────────────────────────────────────────────
+// --- Operator cockpit -------------------------------------------------------
+
+export interface OperatorPaneProps {
+  snapshot: OperatorSnapshot;
+  loading: boolean;
+  width: number;
+}
+
+function healthColor(row: OperatorSnapshot["x"]["latestHealth"][number]): string {
+  if (row.healthy === 0) return "red";
+  if (row.remaining != null && row.limit != null && row.limit > 0) {
+    const ratio = row.remaining / row.limit;
+    if (ratio < 0.1) return "red";
+    if (ratio < 0.25) return "yellow";
+  }
+  return "green";
+}
+
+function healthText(row: OperatorSnapshot["x"]["latestHealth"][number]): string {
+  const state = healthColor(row) === "green" ? "ok" : healthColor(row);
+  return `${row.endpoint} ${fmtMaybeCount(row.remaining)}/${fmtMaybeCount(row.limit)} ${state}`;
+}
+
+function actionsByKindText(rows: OperatorSnapshot["actions24h"]["byKind"]): string {
+  if (rows.length === 0) return "none";
+  return rows.map((r) => `${r.kind}:${r.count}`).join(" ");
+}
+
+export function OperatorPane({ snapshot, loading, width }: OperatorPaneProps): ReactElement {
+  const inner = panelInnerWidth(width);
+  const barWidth = Math.max(8, Math.min(18, Math.floor(inner / 5)));
+  const actionTotal = Math.max(1, snapshot.actions24h.total);
+  const executedBar = ratioBar(snapshot.actions24h.executed, actionTotal, barWidth);
+  const guardIssueCount =
+    snapshot.guardrails.dlqOpen + snapshot.actions24h.failed + snapshot.actions24h.rejected;
+  const usageBar =
+    snapshot.x.monthlyUsed == null || snapshot.x.monthlyCap == null
+      ? "[-]"
+      : ratioBar(snapshot.x.monthlyUsed, snapshot.x.monthlyCap, barWidth);
+  const monthly =
+    snapshot.x.monthlyUsed == null || snapshot.x.monthlyCap == null
+      ? "-"
+      : `${snapshot.x.monthlyUsed}/${snapshot.x.monthlyCap}`;
+  const followers = snapshot.followers
+    ? `${fmtNumber(snapshot.followers.count)} (${sign(snapshot.followers.delta24h)} 24h)`
+    : "-";
+  const latestHealth = snapshot.x.latestHealth.slice(0, 3).map(healthText).join(" | ");
+  const title = loading ? "operator cockpit / syncing" : "operator cockpit";
+
+  return (
+    <Panel title={title} width={width} color={panelColor(guardIssueCount)}>
+      <PanelLine width={width} color={snapshot.review.open > 0 ? "yellow" : "green"}>
+        {`MISSION ${kv("review", snapshot.review.open)} open | oldest ${fmtMinutes(
+          snapshot.review.oldestMinutes,
+        )} | actions ${snapshot.actions24h.total}`}
+      </PanelLine>
+      <PanelLine width={width} color={snapshot.actions24h.failed > 0 ? "red" : "cyan"}>
+        {`PULSE   exec ${executedBar} ${snapshot.actions24h.executed}/${snapshot.actions24h.total} | approved ${snapshot.actions24h.approved} | kinds ${actionsByKindText(
+          snapshot.actions24h.byKind,
+        )}`}
+      </PanelLine>
+      <PanelLine width={width} color={panelColor(guardIssueCount)}>
+        {`SHIELD  cooldowns ${snapshot.guardrails.activeCooldowns} | dlq ${snapshot.guardrails.dlqOpen} | dedup ${snapshot.guardrails.recentDuplicateHashes} | rejected ${snapshot.actions24h.rejected} | failed ${snapshot.actions24h.failed}`}
+      </PanelLine>
+      <PanelLine width={width} color="magenta">
+        {`REACH   x usage ${usageBar} ${monthly} | followers ${followers}`}
+      </PanelLine>
+      <PanelLine width={width} color={latestHealth.length === 0 ? "gray" : "green"}>
+        {`HEALTH  ${latestHealth.length === 0 ? "no snapshots" : latestHealth}`}
+      </PanelLine>
+    </Panel>
+  );
+}
+
+// --- Task graphs ------------------------------------------------------------
 
 export interface TaskGraphsPaneProps {
   graphs: TaskGraph[];
@@ -125,118 +252,96 @@ export interface TaskGraphsPaneProps {
   selectedIdx: number;
   expanded: boolean;
   focused: boolean;
+  width: number;
 }
 
-function StepLine({ step }: { step: PlanStep }): ReactElement {
-  const glyph = statusGlyph(step.status);
-  const color = statusColor(step.status);
+function stepLine(step: PlanStep, width: number): string {
   const duration =
     step.startedAt && step.completedAt
       ? fmtDuration(new Date(step.completedAt).getTime() - new Date(step.startedAt).getTime())
       : step.startedAt
         ? fmtDuration(Date.now() - new Date(step.startedAt).getTime())
-        : null;
-  return (
-    <Box>
-      <Text color={color}> {glyph} </Text>
-      <Text>{step.status.padEnd(9, " ")}</Text>
-      <Text color="white"> {step.goal.slice(0, 48)}</Text>
-      {duration ? <Text color="gray"> ({duration})</Text> : null}
-      {step.error ? <Text color="red"> error: {step.error.slice(0, 32)}</Text> : null}
-    </Box>
-  );
+        : "-";
+  const prefix = `${statusGlyph(step.status)} ${pad(step.status, 9)} ${duration.padStart(8)} `;
+  const suffix = step.error ? ` | error ${step.error}` : "";
+  return `${prefix}${truncate(step.goal, Math.max(12, width - prefix.length - suffix.length))}${suffix}`;
 }
 
-function GraphLine({ g, selected }: { g: TaskGraph; selected: boolean }): ReactElement {
+function graphLine(g: TaskGraph, selected: boolean, width: number): string {
   const total = g.steps.length;
   const done = g.steps.filter((s) => s.status === "completed").length;
   const running = g.steps.some((s) => s.status === "running");
   const cursor = selected ? ">" : " ";
-  return (
-    <Box>
-      <Text color={selected ? "cyan" : "white"}>{cursor} </Text>
-      <Text color="gray">{shortId(g.id)} </Text>
-      <Text color={statusColor(g.status)}>{g.status.padEnd(10, " ")}</Text>
-      <Text>"{g.rootGoal.slice(0, 42)}"</Text>
-      <Text color="gray">
-        {" "}
-        {done} / {total} steps
-      </Text>
-      {running ? (
-        <Text color="cyan">
-          {" "}
-          <Spinner type="dots" />
-        </Text>
-      ) : null}
-    </Box>
-  );
+  const prefix = `${cursor} ${shortId(g.id)} ${pad(g.status, 10)} `;
+  const suffix = ` ${done}/${total} steps${running ? " running" : ""}`;
+  return `${prefix}${truncate(g.rootGoal, Math.max(10, width - prefix.length - suffix.length))}${suffix}`;
 }
 
 export function TaskGraphsPane(props: TaskGraphsPaneProps): ReactElement {
+  const inner = panelInnerWidth(props.width);
+  const title = `active task graphs${props.focused ? " / focused" : ""}`;
   return (
-    <Box flexDirection="column" paddingX={1}>
-      <Text color={props.focused ? "cyan" : "gray"}>
-        {"─── active task graphs "}
-        {props.focused ? "[focused]" : ""}
-      </Text>
+    <Panel title={title} width={props.width} color={props.focused ? "cyan" : "gray"}>
       {props.loading && props.graphs.length === 0 ? (
-        <Box>
-          <Text color="gray">
-            <Spinner type="dots" /> loading…
-          </Text>
-        </Box>
+        <PanelLine width={props.width} color="gray">
+          {"loading active graphs"}
+        </PanelLine>
       ) : props.graphs.length === 0 ? (
-        <Text color="gray"> (no active graphs)</Text>
+        <PanelLine width={props.width} color="gray">
+          {"(no active graphs)"}
+        </PanelLine>
       ) : (
         props.graphs.map((g, i) => (
           <Box key={g.id} flexDirection="column">
-            <GraphLine g={g} selected={i === props.selectedIdx} />
+            <PanelLine
+              width={props.width}
+              color={i === props.selectedIdx ? "cyan" : statusColor(g.status)}
+            >
+              {graphLine(g, i === props.selectedIdx, inner)}
+            </PanelLine>
             {props.expanded && i === props.selectedIdx
-              ? g.steps.map((s) => <StepLine key={s.id} step={s} />)
+              ? g.steps.map((s) => (
+                  <PanelLine key={s.id} width={props.width} color={statusColor(s.status)}>
+                    {stepLine(s, inner)}
+                  </PanelLine>
+                ))
               : null}
           </Box>
         ))
       )}
-    </Box>
+    </Panel>
   );
 }
 
-// ─── RunSummaryPane ─────────────────────────────────────────────────────────
+// --- Run summary ------------------------------------------------------------
 
 export interface RunSummaryPaneProps {
   summary: RunSummary;
   loading: boolean;
+  width: number;
 }
 
 export function RunSummaryPane(props: RunSummaryPaneProps): ReactElement {
   const r = props.summary.reasoner;
   const c = props.summary.consolidator;
+  const inner = panelInnerWidth(props.width);
+  const barWidth = Math.max(8, Math.min(18, Math.floor(inner / 5)));
+  const title = props.loading ? "run pulse 24h / syncing" : "run pulse 24h";
   return (
-    <Box flexDirection="column" paddingX={1}>
-      <Text color="gray">{"─── recent runs (24h)"}</Text>
-      <Box>
-        <Text color="gray">reasoner: </Text>
-        <Text>{r.ticks} ticks · </Text>
-        <Text>{r.candidates} candidates · </Text>
-        <Text>{r.toolCalls} tool calls · </Text>
-        <Text color="yellow">{fmtUsdFromTicks(r.costUsdTicks)}</Text>
-      </Box>
-      <Box>
-        <Text color="gray">consolidator: </Text>
-        <Text>{c.total} runs · </Text>
-        <Text color="green">{c.completed} completed</Text>
-        <Text> · </Text>
-        <Text color="red">{c.failed} failed</Text>
-        <Text> · </Text>
-        <Text color="cyan">{c.inProgress} in-progress</Text>
-        <Text> · </Text>
-        <Text color="gray">{c.queued} queued</Text>
-      </Box>
-    </Box>
+    <Panel title={title} width={props.width} color={c.failed > 0 ? "yellow" : "green"}>
+      <PanelLine width={props.width} color="cyan">
+        {`reasoner ${r.ticks} ticks | ${r.candidates} candidates | ${r.toolCalls} tool calls | ${fmtUsdFromTicks(
+          r.costUsdTicks,
+        )}`}
+      </PanelLine>
+      <PanelLine width={props.width} color={c.failed > 0 ? "yellow" : "green"}>
+        {`consolidator ${ratioBar(c.completed, Math.max(1, c.total), barWidth)} ${c.total} runs | ok ${c.completed} | fail ${c.failed} | wip ${c.inProgress} | queue ${c.queued}`}
+      </PanelLine>
+    </Panel>
   );
 }
 
-// ─── InvocationsPane ────────────────────────────────────────────────────────
+// --- Invocations ------------------------------------------------------------
 
 export interface InvocationsPaneProps {
   rows: InvocationRow[];
@@ -244,6 +349,7 @@ export interface InvocationsPaneProps {
   focused: boolean;
   scrollOffset: number;
   maxRows?: number;
+  width: number;
 }
 
 export function InvocationsPane(props: InvocationsPaneProps): ReactElement {
@@ -251,46 +357,99 @@ export function InvocationsPane(props: InvocationsPaneProps): ReactElement {
   const total = props.rows.length;
   const start = Math.min(Math.max(0, props.scrollOffset), Math.max(0, total - 1));
   const visible = props.rows.slice(start, start + maxRows);
+  const inner = panelInnerWidth(props.width);
+  const title = `tool invocations${props.focused ? " / focused" : ""} (${visible.length}/${total})`;
 
   return (
-    <Box flexDirection="column" paddingX={1}>
-      <Text color={props.focused ? "cyan" : "gray"}>
-        {"─── tool invocations "}
-        {props.focused ? "[focused] " : ""}
-        (showing {visible.length}/{total})
-      </Text>
+    <Panel title={title} width={props.width} color={props.focused ? "cyan" : "gray"}>
       {total === 0 ? (
-        <Text color="gray"> (no invocations yet)</Text>
+        <PanelLine width={props.width} color="gray">
+          {"(no invocations yet)"}
+        </PanelLine>
       ) : (
-        visible.map((r) => (
-          <Box key={r.id}>
-            <Text color="gray">{fmtTime(r.at)} </Text>
-            <Text color={r.error ? "red" : "cyan"}>{r.toolName.padEnd(16, " ")}</Text>
-            <Text color="gray"> {fmtDuration(r.durationMs).padStart(8, " ")}</Text>
-            {r.error ? <Text color="red"> {r.error.slice(0, 40)}</Text> : null}
-          </Box>
-        ))
+        visible.map((r) => {
+          const prefix = `${fmtTime(r.at)} ${pad(truncate(r.toolName, 18), 18)} ${pad(
+            fmtDuration(r.durationMs),
+            8,
+          )}`;
+          const error = r.error ? ` error ${r.error}` : "";
+          return (
+            <PanelLine key={r.id} width={props.width} color={r.error ? "red" : "cyan"}>
+              {fit(`${prefix}${truncate(error, Math.max(0, inner - prefix.length))}`, inner)}
+            </PanelLine>
+          );
+        })
       )}
-    </Box>
+    </Panel>
   );
 }
 
-// ─── Footer ─────────────────────────────────────────────────────────────────
+// --- Help + footer ----------------------------------------------------------
+
+export interface HelpEntry {
+  key: string;
+  description: string;
+}
+
+export const HELP_ENTRIES: HelpEntry[] = [
+  { key: "?", description: "toggle this help menu" },
+  { key: "tab", description: "switch focus between graphs and tools" },
+  { key: "up/down", description: "move graph selection or invocation scroll" },
+  { key: "enter", description: "expand or collapse the selected graph" },
+  { key: "r", description: "refresh every data panel once" },
+  { key: "p", description: "pause or resume polling" },
+  { key: "w", description: "return to the welcome screen" },
+  { key: "q / ctrl-c", description: "quit Strand cockpit" },
+  { key: "esc", description: "close help" },
+];
+
+export interface HelpPanelProps {
+  width: number;
+  focusedPane: "graphs" | "invocations";
+  paused: boolean;
+}
+
+export function HelpPanel(props: HelpPanelProps): ReactElement {
+  return (
+    <Panel title="help / cockpit controls" width={props.width} color="yellow">
+      <PanelLine width={props.width} color="gray">
+        {`state focus ${props.focusedPane} | polling ${props.paused ? "paused" : "live"}`}
+      </PanelLine>
+      {HELP_ENTRIES.map((entry) => (
+        <PanelLine key={entry.key} width={props.width}>
+          {`${pad(`[${entry.key}]`, 12)} ${entry.description}`}
+        </PanelLine>
+      ))}
+    </Panel>
+  );
+}
 
 export interface FooterProps {
   focusedPane: "graphs" | "invocations";
   lastRefreshAt: number;
+  paused: boolean;
+  width: number;
 }
 
 export function Footer(props: FooterProps): ReactElement {
+  const inner = Math.max(20, props.width - 2);
+  const verb = props.paused ? "resume" : "pause";
+  const focusHint =
+    props.focusedPane === "graphs" ? "[up/down] select  [enter] expand" : "[up/down] scroll tools";
   return (
     <Box paddingX={1} flexDirection="column">
       <Text color="gray">
-        {"[↑↓] select · [enter] expand · [tab] switch pane ("}
-        {props.focusedPane}
-        {") · [r] refresh · [p] pause · [q] quit"}
+        {fit(
+          `[?] help  [tab] focus ${props.focusedPane}  [r] refresh  [p] ${verb}  [q] quit`,
+          inner,
+        )}
+      </Text>
+      <Text color="gray">
+        {fit(
+          `${focusHint}  [w] welcome  refreshed ${fmtTime(new Date(props.lastRefreshAt).toISOString())}`,
+          inner,
+        )}
       </Text>
-      <Text color="gray">last refresh: {fmtTime(new Date(props.lastRefreshAt).toISOString())}</Text>
     </Box>
   );
 }
diff --git a/src/cli/tui/dashboard.tsx b/src/cli/tui/dashboard.tsx
index 43a255c..f4927f0 100644
--- a/src/cli/tui/dashboard.tsx
+++ b/src/cli/tui/dashboard.tsx
@@ -11,37 +11,51 @@
  */
 
 import { env } from "@/config";
-import { Box, Text, useApp, useInput, useStdin } from "ink";
+import { Box, Text, useApp, useInput, useStdin, useStdout } from "ink";
 import type { ReactElement } from "react";
 import { useCallback, useMemo, useState } from "react";
-import { Footer, Header, InvocationsPane, RunSummaryPane, TaskGraphsPane } from "./components";
-import { useRecentInvocations, useRunSummary, useTaskGraphs } from "./hooks";
+import {
+  Footer,
+  Header,
+  HelpPanel,
+  InvocationsPane,
+  OperatorPane,
+  RunSummaryPane,
+  TaskGraphsPane,
+} from "./components";
+import { useOperatorSnapshot, useRecentInvocations, useRunSummary, useTaskGraphs } from "./hooks";
+import { splitWidths, terminalWidth } from "./layout";
 
 export interface DashboardProps {
   pollMs?: number;
   onWelcome?: () => void;
+  width?: number;
 }
 
-export function Dashboard({ pollMs = 2000, onWelcome }: DashboardProps): ReactElement {
+export function Dashboard({ pollMs = 2000, onWelcome, width }: DashboardProps): ReactElement {
   const app = useApp();
   const { isRawModeSupported } = useStdin();
+  const { stdout } = useStdout();
   const [selectedIdx, setSelectedIdx] = useState(0);
   const [expanded, setExpanded] = useState(true);
   const [focusedPane, setFocusedPane] = useState<"graphs" | "invocations">("graphs");
   const [scrollOffset, setScrollOffset] = useState(0);
   const [lastRefreshAt, setLastRefreshAt] = useState<number>(Date.now());
   const [paused, setPaused] = useState(false);
+  const [showHelp, setShowHelp] = useState(false);
 
   const graphs = useTaskGraphs(paused ? 10 * 60_000 : pollMs);
+  const operator = useOperatorSnapshot(paused ? 10 * 60_000 : Math.max(pollMs, 3000));
   const summary = useRunSummary(paused ? 10 * 60_000 : Math.max(pollMs * 2, 5000));
   const invocations = useRecentInvocations(50, paused ? 10 * 60_000 : Math.max(pollMs / 2, 1000));
 
   const refreshAll = useCallback((): void => {
     graphs.refresh();
+    operator.refresh();
     summary.refresh();
     invocations.refresh();
     setLastRefreshAt(Date.now());
-  }, [graphs, summary, invocations]);
+  }, [graphs, operator, summary, invocations]);
 
   useInput(
     (input, key) => {
@@ -49,6 +63,14 @@ export function Dashboard({ pollMs = 2000, onWelcome }: DashboardProps): ReactEl
         app.exit();
         return;
       }
+      if (input === "?") {
+        setShowHelp((v) => !v);
+        return;
+      }
+      if (key.escape) {
+        if (showHelp) setShowHelp(false);
+        return;
+      }
       if (input === "w" && onWelcome) {
         onWelcome();
         return;
@@ -57,14 +79,17 @@ export function Dashboard({ pollMs = 2000, onWelcome }: DashboardProps): ReactEl
         refreshAll();
         return;
       }
-      if (key.tab) {
-        setFocusedPane((p) => (p === "graphs" ? "invocations" : "graphs"));
-        return;
-      }
       if (input === "p") {
         setPaused((p) => !p);
         return;
       }
+      if (showHelp) {
+        return;
+      }
+      if (key.tab) {
+        setFocusedPane((p) => (p === "graphs" ? "invocations" : "graphs"));
+        return;
+      }
       if (focusedPane === "graphs") {
         if (key.upArrow) {
           setSelectedIdx((i) => Math.max(0, i - 1));
@@ -97,15 +122,20 @@ export function Dashboard({ pollMs = 2000, onWelcome }: DashboardProps): ReactEl
       provider: env.LLM_PROVIDER,
       model: env.LLM_MODEL_REASONER,
       mode: env.STRAND_MODE,
+      halt: env.STRAND_HALT,
+      tier: env.TIER,
       credentialStore: process.env["STRAND_CREDENTIAL_STORE"] ?? "env",
       tenant: process.env["STRAND_TENANT"] ?? null,
     }),
     [],
   );
 
+  const viewportWidth = terminalWidth(width ?? stdout.columns);
+  const layout = splitWidths(viewportWidth);
+
   return (
     <Box flexDirection="column">
-      <Header {...header} />
+      <Header {...header} width={viewportWidth} />
       {!isRawModeSupported ? (
         <Box paddingX={1}>
           <Text color="yellow">
@@ -118,21 +148,60 @@ export function Dashboard({ pollMs = 2000, onWelcome }: DashboardProps): ReactEl
           <Text color="yellow">{"[paused] — press p to resume, r to refresh once"}</Text>
         </Box>
       ) : null}
-      <TaskGraphsPane
-        graphs={graphs.data}
-        loading={graphs.loading}
-        selectedIdx={Math.min(selectedIdx, Math.max(0, graphs.data.length - 1))}
-        expanded={expanded}
-        focused={focusedPane === "graphs"}
-      />
-      <RunSummaryPane summary={summary.data} loading={summary.loading} />
-      <InvocationsPane
-        rows={invocations.data}
-        loading={invocations.loading}
-        focused={focusedPane === "invocations"}
-        scrollOffset={scrollOffset}
+      {showHelp ? (
+        <HelpPanel width={viewportWidth} focusedPane={focusedPane} paused={paused} />
+      ) : (
+        <>
+          <OperatorPane snapshot={operator.data} loading={operator.loading} width={viewportWidth} />
+          {layout.stacked ? (
+            <>
+              <TaskGraphsPane
+                graphs={graphs.data}
+                loading={graphs.loading}
+                selectedIdx={Math.min(selectedIdx, Math.max(0, graphs.data.length - 1))}
+                expanded={expanded}
+                focused={focusedPane === "graphs"}
+                width={layout.full}
+              />
+              <RunSummaryPane
+                summary={summary.data}
+                loading={summary.loading}
+                width={layout.full}
+              />
+            </>
+          ) : (
+            <Box flexDirection="row">
+              <TaskGraphsPane
+                graphs={graphs.data}
+                loading={graphs.loading}
+                selectedIdx={Math.min(selectedIdx, Math.max(0, graphs.data.length - 1))}
+                expanded={expanded}
+                focused={focusedPane === "graphs"}
+                width={layout.left}
+              />
+              <Box width={layout.gap} />
+              <RunSummaryPane
+                summary={summary.data}
+                loading={summary.loading}
+                width={layout.right}
+              />
+            </Box>
+          )}
+          <InvocationsPane
+            rows={invocations.data}
+            loading={invocations.loading}
+            focused={focusedPane === "invocations"}
+            scrollOffset={scrollOffset}
+            width={viewportWidth}
+          />
+        </>
+      )}
+      <Footer
+        focusedPane={focusedPane}
+        lastRefreshAt={lastRefreshAt}
+        paused={paused}
+        width={viewportWidth}
       />
-      <Footer focusedPane={focusedPane} lastRefreshAt={lastRefreshAt} />
     </Box>
   );
 }
diff --git a/src/cli/tui/hooks.ts b/src/cli/tui/hooks.ts
index d87d201..6615954 100644
--- a/src/cli/tui/hooks.ts
+++ b/src/cli/tui/hooks.ts
@@ -45,12 +45,49 @@ export interface RunSummary {
   };
 }
 
+export interface OperatorSnapshot {
+  review: {
+    open: number;
+    oldestMinutes: number | null;
+  };
+  actions24h: {
+    total: number;
+    approved: number;
+    rejected: number;
+    executed: number;
+    failed: number;
+    byKind: Array<{ kind: string; count: number }>;
+  };
+  guardrails: {
+    activeCooldowns: number;
+    dlqOpen: number;
+    recentDuplicateHashes: number;
+  };
+  x: {
+    latestHealth: Array<{
+      endpoint: string;
+      healthy: number;
+      remaining: number | null;
+      limit: number | null;
+      sampledAt: string;
+    }>;
+    monthlyUsed: number | null;
+    monthlyCap: number | null;
+  };
+  followers: {
+    count: number;
+    delta24h: number | null;
+    sampledAt: string;
+  } | null;
+}
+
 // ─── DataSource interface ───────────────────────────────────────────────────
 
 export interface TuiDataSource {
   listActiveTaskGraphs(): TaskGraph[];
   recentInvocations(limit: number): InvocationRow[];
   runSummary24h(): RunSummary;
+  operatorSnapshot(): OperatorSnapshot;
 }
 
 // ─── SQLite-backed data source ──────────────────────────────────────────────
@@ -105,6 +142,40 @@ interface ConsolidatorRow {
   n: number;
 }
 
+interface CountRow {
+  n: number;
+}
+
+interface OldestRow {
+  created_at: string | null;
+}
+
+interface StatusCountRow {
+  status: string;
+  n: number;
+}
+
+interface KindCountRow {
+  kind: string;
+  n: number;
+}
+
+interface HealthRow {
+  endpoint: string;
+  healthy: number;
+  rate_limit_remaining: number | null;
+  rate_limit_limit: number | null;
+  sampled_at: string;
+  monthly_used: number | null;
+  monthly_cap: number | null;
+}
+
+interface FollowerRow {
+  followers_count: number;
+  delta_24h: number | null;
+  sampled_at: string;
+}
+
 function stepFromRow(r: StepRow): PlanStep {
   const step: PlanStep = {
     id: r.id,
@@ -121,8 +192,16 @@ function stepFromRow(r: StepRow): PlanStep {
   return step;
 }
 
+function ageMinutes(iso: string | null): number | null {
+  if (iso == null) return null;
+  const ms = new Date(iso).getTime();
+  if (!Number.isFinite(ms)) return null;
+  return Math.max(0, Math.floor((Date.now() - ms) / 60_000));
+}
+
 export function makeSqliteDataSource(database?: Database.Database): TuiDataSource {
   const dbi = database ?? defaultDb();
+  const since24h = "strftime('%Y-%m-%dT%H:%M:%fZ','now','-24 hours')";
   const qGraphs = dbi.prepare(
     "SELECT * FROM agent_task_graphs WHERE status IN ('pending','running') ORDER BY updated_at DESC LIMIT 20",
   );
@@ -145,6 +224,43 @@ export function makeSqliteDataSource(database?: Database.Database): TuiDataSourc
      WHERE created_at >= datetime('now','-24 hours')
      GROUP BY status`,
   );
+  const qReviewOpen = dbi.prepare(
+    "SELECT COUNT(*) AS n FROM human_review_queue WHERE decided_at IS NULL",
+  );
+  const qReviewOldest = dbi.prepare(
+    "SELECT MIN(created_at) AS created_at FROM human_review_queue WHERE decided_at IS NULL",
+  );
+  const qActionStatus24h = dbi.prepare(
+    `SELECT status, COUNT(*) AS n
+     FROM action_log
+     WHERE created_at >= ${since24h}
+     GROUP BY status`,
+  );
+  const qActionKind24h = dbi.prepare(
+    `SELECT kind, COUNT(*) AS n
+     FROM action_log
+     WHERE created_at >= ${since24h}
+     GROUP BY kind
+     ORDER BY n DESC, kind ASC
+     LIMIT 6`,
+  );
+  const qActiveCooldowns = dbi.prepare("SELECT COUNT(*) AS n FROM cooldowns WHERE until_at > ?");
+  const qDlqOpen = dbi.prepare("SELECT COUNT(*) AS n FROM dlq");
+  const qDuplicateHashes = dbi.prepare(
+    "SELECT COUNT(*) AS n FROM tweet_dedup WHERE expires_at > strftime('%Y-%m-%dT%H:%M:%fZ','now')",
+  );
+  const qHealth = dbi.prepare(
+    `SELECT endpoint, healthy, rate_limit_remaining, rate_limit_limit, sampled_at, monthly_used, monthly_cap
+     FROM x_health
+     ORDER BY sampled_at DESC
+     LIMIT 8`,
+  );
+  const qFollowers = dbi.prepare(
+    `SELECT followers_count, delta_24h, sampled_at
+     FROM follower_delta
+     ORDER BY sampled_at DESC
+     LIMIT 1`,
+  );
 
   return {
     listActiveTaskGraphs(): TaskGraph[] {
@@ -204,6 +320,61 @@ export function makeSqliteDataSource(database?: Database.Database): TuiDataSourc
         },
       };
     },
+    operatorSnapshot(): OperatorSnapshot {
+      const reviewOpen = (qReviewOpen.get() as CountRow | undefined)?.n ?? 0;
+      const oldest = (qReviewOldest.get() as OldestRow | undefined)?.created_at ?? null;
+      const oldestMinutes = ageMinutes(oldest);
+
+      const statusRows = qActionStatus24h.all() as StatusCountRow[];
+      const byStatus = new Map(statusRows.map((r) => [r.status, r.n]));
+      const total = statusRows.reduce((acc, r) => acc + r.n, 0);
+      const byKind = (qActionKind24h.all() as KindCountRow[]).map((r) => ({
+        kind: r.kind,
+        count: r.n,
+      }));
+
+      const healthRows = qHealth.all() as HealthRow[];
+      const latestByEndpoint = new Map<string, HealthRow>();
+      for (const row of healthRows) {
+        if (!latestByEndpoint.has(row.endpoint)) latestByEndpoint.set(row.endpoint, row);
+      }
+      const latestHealth = Array.from(latestByEndpoint.values()).map((r) => ({
+        endpoint: r.endpoint,
+        healthy: r.healthy,
+        remaining: r.rate_limit_remaining,
+        limit: r.rate_limit_limit,
+        sampledAt: r.sampled_at,
+      }));
+      const monthlyUsed = healthRows.find((r) => r.monthly_used != null)?.monthly_used ?? null;
+      const monthlyCap = healthRows.find((r) => r.monthly_cap != null)?.monthly_cap ?? null;
+
+      const followers = qFollowers.get() as FollowerRow | undefined;
+
+      return {
+        review: { open: reviewOpen, oldestMinutes },
+        actions24h: {
+          total,
+          approved: byStatus.get("approved") ?? 0,
+          rejected: byStatus.get("rejected") ?? 0,
+          executed: byStatus.get("executed") ?? 0,
+          failed: byStatus.get("failed") ?? 0,
+          byKind,
+        },
+        guardrails: {
+          activeCooldowns: (qActiveCooldowns.get(Date.now()) as CountRow | undefined)?.n ?? 0,
+          dlqOpen: (qDlqOpen.get() as CountRow | undefined)?.n ?? 0,
+          recentDuplicateHashes: (qDuplicateHashes.get() as CountRow | undefined)?.n ?? 0,
+        },
+        x: { latestHealth, monthlyUsed, monthlyCap },
+        followers: followers
+          ? {
+              count: followers.followers_count,
+              delta24h: followers.delta_24h,
+              sampledAt: followers.sampled_at,
+            }
+          : null,
+      };
+    },
   };
 }
 
@@ -276,6 +447,25 @@ export function useRunSummary(pollMs = 5000): PollState<RunSummary> {
   return usePolled<RunSummary>(() => src.runSummary24h(), initial, pollMs);
 }
 
+export function useOperatorSnapshot(pollMs = 5000): PollState<OperatorSnapshot> {
+  const src = useDataSource();
+  const initial: OperatorSnapshot = {
+    review: { open: 0, oldestMinutes: null },
+    actions24h: {
+      total: 0,
+      approved: 0,
+      rejected: 0,
+      executed: 0,
+      failed: 0,
+      byKind: [],
+    },
+    guardrails: { activeCooldowns: 0, dlqOpen: 0, recentDuplicateHashes: 0 },
+    x: { latestHealth: [], monthlyUsed: null, monthlyCap: null },
+    followers: null,
+  };
+  return usePolled<OperatorSnapshot>(() => src.operatorSnapshot(), initial, pollMs);
+}
+
 export function useRecentInvocations(limit = 50, pollMs = 1000): PollState<InvocationRow[]> {
   const src = useDataSource();
   return usePolled<InvocationRow[]>(() => src.recentInvocations(limit), [], pollMs);
diff --git a/src/cli/tui/layout.ts b/src/cli/tui/layout.ts
new file mode 100644
index 0000000..faa7176
--- /dev/null
+++ b/src/cli/tui/layout.ts
@@ -0,0 +1,71 @@
+/**
+ * Width-safe text helpers for Ink layouts.
+ *
+ * Ink wraps long text aggressively when adjacent Text nodes sit in flex rows.
+ * The cockpit uses pre-sized strings so every visible line fits the reported
+ * terminal width.
+ */
+
+export const DEFAULT_TERMINAL_WIDTH = 80;
+export const MIN_TERMINAL_WIDTH = 60;
+export const MAX_TERMINAL_WIDTH = 160;
+export const PANEL_PADDING_X = 1;
+
+export function terminalWidth(columns: number | undefined | null): number {
+  if (columns == null || !Number.isFinite(columns)) return DEFAULT_TERMINAL_WIDTH;
+  return Math.max(MIN_TERMINAL_WIDTH, Math.min(MAX_TERMINAL_WIDTH, Math.floor(columns)));
+}
+
+export function panelInnerWidth(width: number, paddingX = PANEL_PADDING_X): number {
+  return Math.max(12, width - 2 - paddingX * 2);
+}
+
+export function truncate(value: string, width: number): string {
+  if (width <= 0) return "";
+  if (value.length <= width) return value;
+  if (width <= 3) return ".".repeat(width);
+  return `${value.slice(0, width - 3)}...`;
+}
+
+export function pad(value: string, width: number): string {
+  const short = truncate(value, width);
+  if (short.length >= width) return short;
+  return short + " ".repeat(width - short.length);
+}
+
+export function fit(value: string, width: number): string {
+  return pad(value, width);
+}
+
+export function kv(label: string, value: string | number | null | undefined): string {
+  return `${label} ${value ?? "-"}`;
+}
+
+export function sign(value: number | null | undefined): string {
+  if (value == null) return "-";
+  if (value > 0) return `+${value}`;
+  return String(value);
+}
+
+export function ratioBar(value: number, max: number, width: number): string {
+  const safeWidth = Math.max(3, width);
+  const safeMax = Math.max(1, max);
+  const ratio = Math.max(0, Math.min(1, value / safeMax));
+  const filled = Math.round(ratio * safeWidth);
+  return `[${"#".repeat(filled)}${"-".repeat(safeWidth - filled)}]`;
+}
+
+export function splitWidths(totalWidth: number):
+  | { stacked: true; full: number }
+  | {
+      stacked: false;
+      left: number;
+      right: number;
+      gap: number;
+    } {
+  if (totalWidth < 112) return { stacked: true, full: totalWidth };
+  const gap = 1;
+  const left = Math.floor((totalWidth - gap) * 0.58);
+  const right = totalWidth - gap - left;
+  return { stacked: false, left, right, gap };
+}
diff --git a/src/cli/tui/welcome.tsx b/src/cli/tui/welcome.tsx
index 3a3c74e..570e28d 100644
--- a/src/cli/tui/welcome.tsx
+++ b/src/cli/tui/welcome.tsx
@@ -124,6 +124,7 @@ export function Welcome({ commands, tools, onDashboard }: WelcomeProps): ReactEl
 
 export const DEFAULT_COMMANDS: WelcomeEntry[] = [
   { name: "run <goal>", description: "one-shot agentic plan" },
+  { name: "cockpit", description: "live operator cockpit" },
   { name: "tui", description: "this welcome (+ --dashboard)" },
   { name: "status", description: "orchestrator + runs summary" },
   { name: "tasks", description: "inspect persisted task graphs" },
diff --git a/src/clients/x.ts b/src/clients/x.ts
index 21298ef..79f4879 100644
--- a/src/clients/x.ts
+++ b/src/clients/x.ts
@@ -178,6 +178,35 @@ export async function fetchDmEvents(opts: { sinceId?: string; max?: number } = {
   }
 }
 
+export async function fetchUser(): Promise<{
+  id: string;
+  name: string;
+  username: string;
+  followersCount: number;
+  followingCount: number;
+  listedCount: number;
+}> {
+  const client = await userClient();
+  const id = await userId();
+  const res = await client.v2.user(id, {
+    "user.fields": ["public_metrics"],
+  });
+  const u = res.data;
+  const metrics = u.public_metrics as {
+    followers_count?: number;
+    following_count?: number;
+    listed_count?: number;
+  };
+  return {
+    id: u.id,
+    name: u.name,
+    username: u.username,
+    followersCount: metrics?.followers_count ?? 0,
+    followingCount: metrics?.following_count ?? 0,
+    listedCount: metrics?.listed_count ?? 0,
+  };
+}
+
 // ─── WRITE ───────────────────────────────────────────────────
 
 export interface WriteResult {
diff --git a/src/db/schema.sql b/src/db/schema.sql
index 0fa01b5..cdf43b3 100644
--- a/src/db/schema.sql
+++ b/src/db/schema.sql
@@ -179,3 +179,58 @@ CREATE TABLE IF NOT EXISTS tweet_dedup (
 );
 
 CREATE INDEX IF NOT EXISTS idx_tweet_dedup_expires ON tweet_dedup(expires_at);
+
+-- Phase 3: Metrics tracking tables
+
+-- X API health snapshot (poll every 15 min)
+CREATE TABLE IF NOT EXISTS x_health (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  sampled_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')),
+  endpoint TEXT NOT NULL,           -- 'mentions', 'timeline', 'dm', 'like', 'bookmark', etc.
+  rate_limit_remaining INTEGER,     -- x-rate-limit-remaining header
+  rate_limit_limit INTEGER,         -- x-rate-limit-limit header
+  rate_limit_reset INTEGER,         -- x-rate-limit-reset header (unix seconds)
+  monthly_cap INTEGER,              -- tier cap (10k basic, 1m pro)
+  monthly_used INTEGER,             -- current usage estimate
+  healthy INTEGER NOT NULL DEFAULT 1  -- 1 = healthy, 0 = degraded
+);
+CREATE INDEX IF NOT EXISTS idx_x_health_sampled ON x_health(sampled_at);
+CREATE INDEX IF NOT EXISTS idx_x_health_endpoint ON x_health(endpoint, sampled_at);
+
+-- Mention sentiment baseline (computed from perceived_events text)
+CREATE TABLE IF NOT EXISTS mention_sentiment (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  event_id TEXT NOT NULL REFERENCES perceived_events(id) ON DELETE CASCADE,
+  sampled_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')),
+  sentiment_score REAL,             -- -1 (negative) to +1 (positive), NULL if not computed
+  magnitude REAL,                   -- confidence in sentiment score
+  model TEXT,                       -- model used for sentiment analysis
+  error TEXT                        -- error message if sentiment computation failed
+);
+CREATE INDEX IF NOT EXISTS idx_mention_sentiment_sampled ON mention_sentiment(sampled_at);
+CREATE INDEX IF NOT EXISTS idx_mention_sentiment_event ON mention_sentiment(event_id);
+
+-- Follower delta tracking (poll every 1 hour)
+CREATE TABLE IF NOT EXISTS follower_delta (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  sampled_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')),
+  followers_count INTEGER NOT NULL,
+  following_count INTEGER,
+  listed_count INTEGER,
+  delta_1h INTEGER,                 -- change from 1 hour ago
+  delta_24h INTEGER,              -- change from 24 hours ago
+  delta_7d INTEGER                -- change from 7 days ago
+);
+CREATE INDEX IF NOT EXISTS idx_follower_delta_sampled ON follower_delta(sampled_at);
+
+-- Error rates by action kind and error code (aggregated hourly)
+CREATE TABLE IF NOT EXISTS error_rates (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  hour_bucket TEXT NOT NULL,        -- '2025-01-15T14:00:00Z' format
+  kind TEXT NOT NULL,               -- action kind or 'all'
+  error_code TEXT NOT NULL,         -- specific error code or 'TOTAL'
+  count INTEGER NOT NULL DEFAULT 0,
+  UNIQUE(hour_bucket, kind, error_code)
+);
+CREATE INDEX IF NOT EXISTS idx_error_rates_hour ON error_rates(hour_bucket);
+CREATE INDEX IF NOT EXISTS idx_error_rates_kind ON error_rates(kind, hour_bucket);
diff --git a/src/loops/actor.ts b/src/loops/actor.ts
index 1051f95..2e4f86c 100644
--- a/src/loops/actor.ts
+++ b/src/loops/actor.ts
@@ -3,6 +3,7 @@ import * as x from "@/clients/x";
 import { checkMonthlyCapHalt, incrementMonthlyUsage, isActorHalted } from "@/clients/x";
 import { env } from "@/config";
 import { db } from "@/db";
+import { recordActionError } from "@/metrics";
 import { recordActionCooldowns } from "@/policy/cooldowns";
 import { recordPostText } from "@/policy/duplicates";
 import type { Candidate } from "@/types/actions";
@@ -67,8 +68,12 @@ export async function executeApproved(
       env.STRAND_MODE,
     );
 
-  if (env.STRAND_MODE === "shadow") {
-    log.info({ key, kind: c.action.kind }, "actor.shadow");
+  // Phase 3: only like and bookmark are live; everything else stays shadow
+  const isLowRisk = c.action.kind === "like" || c.action.kind === "bookmark";
+  const isShadow = env.STRAND_MODE === "shadow" || (!isLowRisk && env.STRAND_MODE === "live");
+
+  if (isShadow) {
+    log.info({ key, kind: c.action.kind, reason: env.STRAND_MODE === "shadow" ? "mode_shadow" : "phase3_non_lowrisk" }, "actor.shadow");
     db().prepare("UPDATE action_log SET status = 'executed' WHERE idempotency_key = ?").run(key);
     return;
   }
@@ -132,6 +137,9 @@ export async function executeApproved(
     const msg = err instanceof Error ? err.message : String(err);
     const code = (err as { code?: string } | null)?.code ?? "UNKNOWN";
 
+    // Record error rate for metrics
+    recordActionError(c.action.kind, code);
+
     db()
       .prepare(
         `UPDATE action_log
diff --git a/src/metrics/index.ts b/src/metrics/index.ts
new file mode 100644
index 0000000..1211764
--- /dev/null
+++ b/src/metrics/index.ts
@@ -0,0 +1,220 @@
+/**
+ * Phase 3 metrics collection module.
+ *
+ * Tracks:
+ * - X API health (rate limits, monthly cap)
+ * - Mention sentiment baseline
+ * - Follower delta
+ * - Error rates by action kind
+ */
+
+import { env } from "@/config";
+import { db } from "@/db";
+import { getRateLimit, getMonthlyUsage } from "@/clients/x";
+import { log } from "@/util/log";
+import type { Action } from "@/types/actions";
+
+const TIER_MONTHLY_CAP: Record<"basic" | "pro" | "enterprise", number> = {
+  basic: 10_000,
+  pro: 1_000_000,
+  enterprise: 50_000_000,
+};
+
+/**
+ * Record X API health snapshot for an endpoint.
+ * Call after each X API request to track rate limit state.
+ */
+export function recordXHealth(
+  endpoint: string,
+  opts: {
+    healthy?: boolean;
+  } = {},
+): void {
+  try {
+    const rateLimit = getRateLimit(endpoint);
+    const tier = env.TIER;
+    const monthlyCap = TIER_MONTHLY_CAP[tier];
+    const monthlyUsed = getMonthlyUsage();
+
+    db()
+      .prepare(
+        `INSERT INTO x_health
+         (endpoint, rate_limit_remaining, rate_limit_limit, rate_limit_reset, monthly_cap, monthly_used, healthy)
+         VALUES (?, ?, ?, ?, ?, ?, ?)`,
+      )
+      .run(
+        endpoint,
+        rateLimit?.remaining ?? null,
+        rateLimit?.limit ?? null,
+        rateLimit?.resetAt ?? null,
+        monthlyCap,
+        monthlyUsed,
+        opts.healthy !== false ? 1 : 0,
+      );
+  } catch (err) {
+    log.warn({ err, endpoint }, "metrics.x_health_failed");
+  }
+}
+
+/**
+ * Record mention sentiment for a perceived event.
+ * Called by Perceiver after analyzing mention sentiment.
+ */
+export function recordMentionSentiment(
+  eventId: string,
+  sentiment: {
+    score: number;
+    magnitude: number;
+    model: string;
+  } | null,
+  error?: string,
+): void {
+  try {
+    db()
+      .prepare(
+        `INSERT INTO mention_sentiment
+         (event_id, sentiment_score, magnitude, model, error)
+         VALUES (?, ?, ?, ?, ?)`,
+      )
+      .run(
+        eventId,
+        sentiment?.score ?? null,
+        sentiment?.magnitude ?? null,
+        sentiment?.model ?? null,
+        error ?? null,
+      );
+  } catch (err) {
+    log.warn({ err, eventId }, "metrics.mention_sentiment_failed");
+  }
+}
+
+/**
+ * Record follower count snapshot.
+ * Called periodically to track follower growth.
+ */
+export function recordFollowerDelta(followers: {
+  followersCount: number;
+  followingCount?: number;
+  listedCount?: number;
+}): void {
+  try {
+    const d = db();
+
+    // Calculate deltas from previous snapshots
+    const hourAgo = d
+      .prepare(
+        `SELECT followers_count FROM follower_delta
+         WHERE sampled_at > datetime('now', '-1 hour')
+         ORDER BY sampled_at ASC LIMIT 1`,
+      )
+      .get() as { followers_count: number } | undefined;
+
+    const dayAgo = d
+      .prepare(
+        `SELECT followers_count FROM follower_delta
+         WHERE sampled_at > datetime('now', '-24 hours')
+         ORDER BY sampled_at ASC LIMIT 1`,
+      )
+      .get() as { followers_count: number } | undefined;
+
+    const weekAgo = d
+      .prepare(
+        `SELECT followers_count FROM follower_delta
+         WHERE sampled_at > datetime('now', '-7 days')
+         ORDER BY sampled_at ASC LIMIT 1`,
+      )
+      .get() as { followers_count: number } | undefined;
+
+    const delta1h = hourAgo ? followers.followersCount - hourAgo.followers_count : null;
+    const delta24h = dayAgo ? followers.followersCount - dayAgo.followers_count : null;
+    const delta7d = weekAgo ? followers.followersCount - weekAgo.followers_count : null;
+
+    d.prepare(
+      `INSERT INTO follower_delta
+       (followers_count, following_count, listed_count, delta_1h, delta_24h, delta_7d)
+       VALUES (?, ?, ?, ?, ?, ?)`,
+    ).run(
+      followers.followersCount,
+      followers.followingCount ?? null,
+      followers.listedCount ?? null,
+      delta1h,
+      delta24h,
+      delta7d,
+    );
+  } catch (err) {
+    log.warn({ err }, "metrics.follower_delta_failed");
+  }
+}
+
+/**
+ * Record action error rate.
+ * Called by Actor when an action fails.
+ */
+export function recordActionError(
+  kind: Action["kind"],
+  errorCode: string,
+): void {
+  try {
+    const hourBucket = new Date().toISOString().slice(0, 13) + ":00:00Z"; // Round to hour
+
+    const d = db();
+
+    // Increment total for this kind
+    d.prepare(
+      `INSERT INTO error_rates (hour_bucket, kind, error_code, count)
+       VALUES (?, ?, ?, 1)
+       ON CONFLICT(hour_bucket, kind, error_code) DO UPDATE SET count = count + 1`,
+    ).run(hourBucket, kind, "TOTAL");
+
+    // Increment specific error code
+    d.prepare(
+      `INSERT INTO error_rates (hour_bucket, kind, error_code, count)
+       VALUES (?, ?, ?, 1)
+       ON CONFLICT(hour_bucket, kind, error_code) DO UPDATE SET count = count + 1`,
+    ).run(hourBucket, kind, errorCode);
+  } catch (err) {
+    log.warn({ err, kind, errorCode }, "metrics.error_rate_failed");
+  }
+}
+
+/**
+ * Get latest health summary for status CLI.
+ */
+export function getHealthSummary(): {
+  xHealth: { endpoint: string; healthy: number; sampledAt: string }[];
+  followerDelta: { followersCount: number; delta24h: number | null; sampledAt: string } | null;
+  errorRates: { hourBucket: string; kind: string; errorCode: string; count: number }[];
+} {
+  const d = db();
+
+  const xHealth = d
+    .prepare(
+      `SELECT endpoint, healthy, sampled_at as sampledAt
+       FROM x_health
+       WHERE sampled_at > datetime('now', '-1 hour')
+       ORDER BY sampled_at DESC
+       LIMIT 10`,
+    )
+    .all() as Array<{ endpoint: string; healthy: number; sampledAt: string }>;
+
+  const followerDelta = d
+    .prepare(
+      `SELECT followers_count as followersCount, delta_24h as delta24h, sampled_at as sampledAt
+       FROM follower_delta
+       ORDER BY sampled_at DESC
+       LIMIT 1`,
+    )
+    .get() as { followersCount: number; delta24h: number | null; sampledAt: string } | null;
+
+  const errorRates = d
+    .prepare(
+      `SELECT hour_bucket as hourBucket, kind, error_code as errorCode, count
+       FROM error_rates
+       WHERE hour_bucket > datetime('now', '-24 hours')
+       ORDER BY hour_bucket DESC, count DESC
+       LIMIT 20`,
+    )
+    .all() as Array<{ hourBucket: string; kind: string; errorCode: string; count: number }>;
+
+  return { xHealth, followerDelta, errorRates };
+}
diff --git a/src/orchestrator.ts b/src/orchestrator.ts
index b99b7db..7054d01 100644
--- a/src/orchestrator.ts
+++ b/src/orchestrator.ts
@@ -1,10 +1,12 @@
 import { disconnect as brainDisconnect } from "@/clients/brain";
+import { fetchUser, pollUsage } from "@/clients/x";
 import { env } from "@/config";
 import { closeDb, db } from "@/db";
 import { executeApproved } from "@/loops/actor";
 import { consolidatorPoll, consolidatorRun } from "@/loops/consolidator";
 import { dmTick, perceiverTick } from "@/loops/perceiver";
 import { reasonerTick } from "@/loops/reasoner";
+import { recordFollowerDelta, recordXHealth } from "@/metrics";
 import { evaluate, makeGate } from "@/policy";
 import { proposed } from "@/types/actions";
 import { log } from "@/util/log";
@@ -18,14 +20,32 @@ interface LoopHandle {
 const handles: LoopHandle[] = [];
 let stopping = false;
 
-function every(ms: number, name: string, fn: () => Promise<void>): LoopHandle {
+// Track which loops should drain vs halt completely when STRAND_HALT is set
+type LoopMode = "run" | "drain" | "halt";
+
+function every(
+  ms: number,
+  name: string,
+  fn: () => Promise<void>,
+  opts: { mode?: LoopMode } = {},
+): LoopHandle {
   const run = async () => {
     if (stopping) return;
-    // Kill switch: env flag halts loop in <5s
+
+    // Phase 3 kill switch: drain semantics
     if (env.STRAND_HALT === "true") {
-      log.warn({ loop: name }, "orchestrator.halt_skipping_loop");
-      return;
+      const mode = opts.mode ?? "halt";
+      if (mode === "halt") {
+        log.warn({ loop: name }, "orchestrator.halt_skipping_loop");
+        return;
+      }
+      if (mode === "drain") {
+        log.info({ loop: name }, "orchestrator.drain_mode");
+        // Continue to fn() - drain in-flight work
+      }
+      // mode === "run" continues normally (perceiver reads are safe)
     }
+
     try {
       await fn();
     } catch (err) {
@@ -41,51 +61,108 @@ export function start(): void {
   db(); // open + migrate
   const gate = makeGate();
 
-  handles.push(every(120_000, "perceiver", perceiverTick));
+  // Perceiver (reads): always run — reads are safe
+  handles.push(every(120_000, "perceiver", perceiverTick, { mode: "run" }));
 
   // DM poll every 5 min (separate from mention/timeline poll)
-  handles.push(every(300_000, "perceiver-dm", dmTick));
+  handles.push(every(300_000, "perceiver-dm", dmTick, { mode: "run" }));
 
   // Phase 2: Reasoner ticks every 10 min (shadow-mode). Emits ≤5 candidates.
+  // Phase 3 kill switch: halt — stop emitting new candidates immediately
   handles.push(
-    every(600_000, "reasoner", async () => {
-      const candidates = await reasonerTick();
-      for (const c of candidates) {
-        const verdict = evaluate(gate, proposed(c));
-        if (verdict.approved) {
-          await executeApproved({ rl: gate.rl }, verdict.candidate, verdict.cacheableDecisionId);
-        } else {
-          db()
-            .prepare(
-              `INSERT INTO action_log (idempotency_key, decision_id, kind, payload_json, rationale, confidence, relevance, target_entity_id, mode, status, reasons_json)
-               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 'rejected', ?)`,
-            )
-            .run(
-              `rej_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
-              verdict.candidate.modelResponseId ?? "",
-              c.action.kind,
-              JSON.stringify(c.action),
-              c.rationale,
-              c.confidence,
-              c.relevanceScore,
-              c.targetEntityId ?? null,
-              process.env["STRAND_MODE"] ?? "shadow",
-              JSON.stringify({ reasons: verdict.reasons, ruleIds: verdict.ruleIds }),
-            );
+    every(
+      600_000,
+      "reasoner",
+      async () => {
+        const candidates = await reasonerTick();
+        for (const c of candidates) {
+          const verdict = evaluate(gate, proposed(c));
+          if (verdict.approved) {
+            await executeApproved({ rl: gate.rl }, verdict.candidate, verdict.cacheableDecisionId);
+          } else {
+            db()
+              .prepare(
+                `INSERT INTO action_log (idempotency_key, decision_id, kind, payload_json, rationale, confidence, relevance, target_entity_id, mode, status, reasons_json)
+                 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 'rejected', ?)`,
+              )
+              .run(
+                `rej_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
+                verdict.candidate.modelResponseId ?? "",
+                c.action.kind,
+                JSON.stringify(c.action),
+                c.rationale,
+                c.confidence,
+                c.relevanceScore,
+                c.targetEntityId ?? null,
+                process.env["STRAND_MODE"] ?? "shadow",
+                JSON.stringify({ reasons: verdict.reasons, ruleIds: verdict.ruleIds }),
+              );
+          }
         }
-      }
-    }),
+      },
+      { mode: "halt" },
+    ),
   );
 
   // Consolidator: submit every 24 h, poll open batches every 30 min.
-  handles.push(every(24 * 60 * 60 * 1000, "consolidator", consolidatorRun));
-  handles.push(every(30 * 60 * 1000, "consolidator-poll", consolidatorPoll));
+  handles.push(every(24 * 60 * 60 * 1000, "consolidator", consolidatorRun, { mode: "halt" }));
+  handles.push(every(30 * 60 * 1000, "consolidator-poll", consolidatorPoll, { mode: "halt" }));
+
+  // Sweeper: clean up expired TTL rows every hour — always run
+  handles.push(
+    every(
+      60 * 60 * 1000,
+      "sweeper",
+      async () => {
+        sweepExpired(db());
+      },
+      { mode: "run" },
+    ),
+  );
 
-  // Sweeper: clean up expired TTL rows every hour
+  // Phase 3: X health snapshot every 15 minutes — always run for monitoring
   handles.push(
-    every(60 * 60 * 1000, "sweeper", async () => {
-      sweepExpired(db());
-    }),
+    every(
+      15 * 60 * 1000,
+      "metrics-x-health",
+      async () => {
+        try {
+          // Poll usage endpoint for accurate monthly cap tracking
+          await pollUsage();
+          // Record health for key endpoints (rate limits tracked in x client)
+          recordXHealth("mentions", { healthy: true });
+          recordXHealth("dm_events", { healthy: true });
+        } catch (err) {
+          log.warn({ err }, "orchestrator.metrics_x_health_failed");
+        }
+      },
+      { mode: "run" },
+    ),
+  );
+
+  // Phase 3: Follower delta tracking every 1 hour — always run for monitoring
+  handles.push(
+    every(
+      60 * 60 * 1000,
+      "metrics-followers",
+      async () => {
+        try {
+          const user = await fetchUser();
+          recordFollowerDelta({
+            followersCount: user.followersCount,
+            followingCount: user.followingCount,
+            listedCount: user.listedCount,
+          });
+          log.info(
+            { followers: user.followersCount, following: user.followingCount },
+            "orchestrator.followers_recorded",
+          );
+        } catch (err) {
+          log.warn({ err }, "orchestrator.metrics_followers_failed");
+        }
+      },
+      { mode: "run" },
+    ),
   );
 
   log.info({ loops: handles.map((h) => h.name) }, "orchestrator.started");
diff --git a/tests/cli/cli.test.ts b/tests/cli/cli.test.ts
index fd7fca1..3750974 100644
--- a/tests/cli/cli.test.ts
+++ b/tests/cli/cli.test.ts
@@ -59,6 +59,7 @@ describe("strand CLI", () => {
     for (const sub of [
       "run",
       "tui",
+      "cockpit",
       "status",
       "review",
       "tasks",
diff --git a/tests/cli/tui.test.ts b/tests/cli/tui.test.ts
index f13ca94..5e06b4d 100644
--- a/tests/cli/tui.test.ts
+++ b/tests/cli/tui.test.ts
@@ -13,6 +13,7 @@ import type { TaskGraph } from "@/agent/types";
 import {
   DataSourceContext,
   type InvocationRow,
+  type OperatorSnapshot,
   type RunSummary,
   type TuiDataSource,
 } from "@/cli/tui/hooks";
@@ -21,6 +22,10 @@ import { render } from "ink-testing-library";
 import { createElement } from "react";
 import { describe, expect, it } from "vitest";
 
+function stripAnsi(s: string): string {
+  return s.replace(new RegExp(`${String.fromCharCode(27)}\\[[0-?]*[ -/]*[@-~]`, "g"), "");
+}
+
 function makeStubSource(): TuiDataSource {
   const graph: TaskGraph = {
     id: "7e3c1234-abcd-4000-8000-000000000000",
@@ -87,10 +92,45 @@ function makeStubSource(): TuiDataSource {
     consolidator: { total: 7, completed: 2, failed: 1, queued: 4, inProgress: 0 },
   };
 
+  const operator: OperatorSnapshot = {
+    review: { open: 3, oldestMinutes: 91 },
+    actions24h: {
+      total: 12,
+      approved: 2,
+      rejected: 4,
+      executed: 5,
+      failed: 1,
+      byKind: [
+        { kind: "reply", count: 5 },
+        { kind: "like", count: 4 },
+      ],
+    },
+    guardrails: { activeCooldowns: 8, dlqOpen: 0, recentDuplicateHashes: 2 },
+    x: {
+      monthlyUsed: 120,
+      monthlyCap: 10_000,
+      latestHealth: [
+        {
+          endpoint: "mentions",
+          healthy: 1,
+          remaining: 177,
+          limit: 180,
+          sampledAt: "2026-04-20T15:04:00.000Z",
+        },
+      ],
+    },
+    followers: {
+      count: 1234,
+      delta24h: 6,
+      sampledAt: "2026-04-20T15:04:00.000Z",
+    },
+  };
+
   return {
     listActiveTaskGraphs: () => [graph],
     recentInvocations: () => invocations,
     runSummary24h: () => summary,
+    operatorSnapshot: () => operator,
   };
 }
 
@@ -107,8 +147,14 @@ describe("strand tui dashboard", () => {
     const frame = lastFrame() ?? "";
     expect(frame.length).toBeGreaterThan(0);
     // Header renders provider + mode
-    expect(frame).toContain("Strand TUI");
+    expect(frame).toContain("STRAND COCKPIT");
     expect(frame).toContain("shadow");
+    // Operator cockpit shows guardrails + queue shape
+    expect(frame).toContain("operator cockpit");
+    expect(frame).toContain("review 3 open");
+    expect(frame).toContain("reply:5");
+    expect(frame).toContain("120/10000");
+    expect(frame).toContain("followers");
     // A fake graph appears
     expect(frame).toContain("crawl site X and summarize");
     expect(frame).toContain("fetch home page");
@@ -140,6 +186,20 @@ describe("strand tui dashboard", () => {
         },
         consolidator: { total: 0, completed: 0, failed: 0, queued: 0, inProgress: 0 },
       }),
+      operatorSnapshot: () => ({
+        review: { open: 0, oldestMinutes: null },
+        actions24h: {
+          total: 0,
+          approved: 0,
+          rejected: 0,
+          executed: 0,
+          failed: 0,
+          byKind: [],
+        },
+        guardrails: { activeCooldowns: 0, dlqOpen: 0, recentDuplicateHashes: 0 },
+        x: { latestHealth: [], monthlyUsed: null, monthlyCap: null },
+        followers: null,
+      }),
     };
     const tree = createElement(
       DataSourceContext.Provider,
@@ -148,9 +208,44 @@ describe("strand tui dashboard", () => {
     );
     const { lastFrame, unmount } = render(tree);
     const frame = lastFrame() ?? "";
-    expect(frame).toContain("Strand TUI");
+    expect(frame).toContain("STRAND COCKPIT");
     expect(frame).toContain("(no active graphs)");
     expect(frame).toContain("(no invocations yet)");
     unmount();
   });
+
+  it("keeps every rendered line inside 80 columns", () => {
+    const source = makeStubSource();
+    const tree = createElement(
+      DataSourceContext.Provider,
+      { value: source },
+      createElement(Dashboard, { pollMs: 10_000, width: 80 }),
+    );
+    const { lastFrame, unmount } = render(tree);
+    const frame = stripAnsi(lastFrame() ?? "");
+    const tooWide = frame.split("\n").filter((line) => line.length > 80);
+
+    expect(tooWide).toEqual([]);
+    unmount();
+  });
+
+  it("opens a width-safe help panel from ?", async () => {
+    const source = makeStubSource();
+    const tree = createElement(
+      DataSourceContext.Provider,
+      { value: source },
+      createElement(Dashboard, { pollMs: 10_000, width: 80 }),
+    );
+    const { lastFrame, stdin, unmount } = render(tree);
+
+    stdin.write("?");
+    await new Promise((resolve) => setTimeout(resolve, 0));
+
+    const frame = stripAnsi(lastFrame() ?? "");
+    expect(frame).toContain("help / cockpit controls");
+    expect(frame).toContain("toggle this help menu");
+    expect(frame).toContain("switch focus between graphs and tools");
+    expect(frame.split("\n").filter((line) => line.length > 80)).toEqual([]);
+    unmount();
+  });
 });
diff --git a/tests/loops/actor-phase3.test.ts b/tests/loops/actor-phase3.test.ts
new file mode 100644
index 0000000..ae89a36
--- /dev/null
+++ b/tests/loops/actor-phase3.test.ts
@@ -0,0 +1,276 @@
+/**
+ * Phase 3 Actor tests: like/bookmark live, everything else shadow.
+ */
+
+import { executeApproved } from "@/loops/actor";
+import { type CandidateEnvelope, __unsafeMarkApproved, proposed } from "@/types/actions";
+import type { RateLimiter } from "@/util/ratelimit";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+// Mock dependencies
+vi.mock("@/clients/brain", () => ({
+  brain: {
+    outcome_annotate: vi.fn().mockResolvedValue(undefined),
+  },
+}));
+
+vi.mock("@/clients/x", async () => {
+  const actual = await vi.importActual<typeof import("@/clients/x")>("@/clients/x");
+  return {
+    ...actual,
+    execute: vi.fn().mockResolvedValue({ xObjectId: "tweet_123", reversible: true }),
+    checkMonthlyCapHalt: vi.fn().mockReturnValue(false),
+    incrementMonthlyUsage: vi.fn(),
+    isActorHalted: vi.fn().mockReturnValue(false),
+  };
+});
+
+vi.mock("@/metrics", () => ({
+  recordActionError: vi.fn(),
+}));
+
+vi.mock("@/config", () => ({
+  env: {
+    LOG_LEVEL: "fatal",
+    STRAND_MODE: "live",
+    TIER: "basic",
+  },
+  policies: {
+    caps_per_day: {
+      likes: 200,
+      bookmarks: 50,
+    },
+  },
+  effectiveCap: vi.fn((k: string) => {
+    const caps: Record<string, number> = { likes: 100, bookmarks: 25 };
+    return caps[k] ?? 0;
+  }),
+}));
+
+vi.mock("@/db", () => {
+  const mockDb = {
+    prepare: vi.fn().mockReturnValue({
+      get: vi.fn().mockReturnValue(undefined),
+      run: vi.fn().mockReturnValue({ lastInsertRowid: 1 }),
+    }),
+  };
+  return {
+    db: vi.fn().mockReturnValue(mockDb),
+  };
+});
+
+vi.mock("@/policy/cooldowns", () => ({
+  recordActionCooldowns: vi.fn(),
+}));
+
+vi.mock("@/policy/duplicates", () => ({
+  recordPostText: vi.fn(),
+}));
+
+vi.mock("@/util/idempotency", () => ({
+  idempotencyKey: vi.fn().mockReturnValue("test_key_123"),
+  tweetDedupHash: vi.fn().mockReturnValue("hash_123"),
+}));
+
+vi.mock("@/util/sweeper", () => ({
+  isDuplicateTweet: vi.fn().mockReturnValue(false),
+  recordTweetHash: vi.fn(),
+}));
+
+describe("Phase 3 Actor", () => {
+  let mockRl: RateLimiter;
+
+  beforeEach(() => {
+    mockRl = {
+      check: vi.fn().mockReturnValue({ allowed: true }),
+      increment: vi.fn(),
+    } as unknown as RateLimiter;
+    vi.clearAllMocks();
+  });
+
+  const deps = () => ({ rl: mockRl });
+
+  describe("live mode - low-risk actions", () => {
+    it("should execute like action in live mode (not shadow)", async () => {
+      const { execute } = await import("@/clients/x");
+
+      const envelope: CandidateEnvelope = {
+        action: { kind: "like", tweetId: "12345" },
+        rationale: "Test like",
+        confidence: 0.9,
+        relevanceScore: 0.8,
+        sourceEventIds: ["event_1"],
+        requiresHumanReview: false,
+      };
+      const candidate = __unsafeMarkApproved(proposed(envelope));
+
+      await executeApproved(deps(), candidate, "decision_123");
+
+      // Should have called X execute (not shadow)
+      expect(execute).toHaveBeenCalledWith(candidate.action);
+    });
+
+    it("should execute bookmark action in live mode (not shadow)", async () => {
+      const { execute } = await import("@/clients/x");
+
+      const envelope: CandidateEnvelope = {
+        action: { kind: "bookmark", tweetId: "67890" },
+        rationale: "Test bookmark",
+        confidence: 0.85,
+        relevanceScore: 0.75,
+        sourceEventIds: ["event_2"],
+        requiresHumanReview: false,
+      };
+      const candidate = __unsafeMarkApproved(proposed(envelope));
+
+      await executeApproved(deps(), candidate, "decision_456");
+
+      // Should have called X execute (not shadow)
+      expect(execute).toHaveBeenCalledWith(candidate.action);
+    });
+  });
+
+  describe("live mode - non-low-risk actions (shadow)", () => {
+    it("should NOT execute reply in live mode (should be shadow)", async () => {
+      const { execute } = await import("@/clients/x");
+      const { db } = await import("@/db");
+
+      const envelope: CandidateEnvelope = {
+        action: { kind: "reply", tweetId: "12345", text: "Test reply" },
+        rationale: "Test reply",
+        confidence: 0.9,
+        relevanceScore: 0.8,
+        sourceEventIds: ["event_1"],
+        requiresHumanReview: false,
+      };
+      const candidate = __unsafeMarkApproved(proposed(envelope));
+
+      await executeApproved(deps(), candidate, "decision_789");
+
+      // Should NOT have called X execute (shadow)
+      expect(execute).not.toHaveBeenCalled();
+
+      // Should have recorded shadow execution
+      const dbPrepare = db().prepare as ReturnType<typeof vi.fn>;
+      expect(dbPrepare).toHaveBeenCalledWith(
+        expect.stringContaining("UPDATE action_log SET status = 'executed'"),
+      );
+    });
+
+    it("should NOT execute quote in live mode (should be shadow)", async () => {
+      const { execute } = await import("@/clients/x");
+
+      const envelope: CandidateEnvelope = {
+        action: { kind: "quote", tweetId: "12345", text: "Test quote" },
+        rationale: "Test quote",
+        confidence: 0.9,
+        relevanceScore: 0.8,
+        sourceEventIds: ["event_1"],
+        requiresHumanReview: false,
+      };
+      const candidate = __unsafeMarkApproved(proposed(envelope));
+
+      await executeApproved(deps(), candidate, "decision_abc");
+
+      expect(execute).not.toHaveBeenCalled();
+    });
+
+    it("should NOT execute post in live mode (should be shadow)", async () => {
+      const { execute } = await import("@/clients/x");
+
+      const envelope: CandidateEnvelope = {
+        action: { kind: "post", text: "Test post" },
+        rationale: "Test post",
+        confidence: 0.9,
+        relevanceScore: 0.8,
+        sourceEventIds: ["event_1"],
+        requiresHumanReview: false,
+      };
+      const candidate = __unsafeMarkApproved(proposed(envelope));
+
+      await executeApproved(deps(), candidate, "decision_def");
+
+      expect(execute).not.toHaveBeenCalled();
+    });
+
+    it("should NOT execute follow in live mode (should be shadow)", async () => {
+      const { execute } = await import("@/clients/x");
+
+      const envelope: CandidateEnvelope = {
+        action: { kind: "follow", userId: "user_123" },
+        rationale: "Test follow",
+        confidence: 0.9,
+        relevanceScore: 0.8,
+        sourceEventIds: ["event_1"],
+        requiresHumanReview: false,
+      };
+      const candidate = __unsafeMarkApproved(proposed(envelope));
+
+      await executeApproved(deps(), candidate, "decision_ghi");
+
+      expect(execute).not.toHaveBeenCalled();
+    });
+
+    it("should NOT execute DM in live mode (should be shadow)", async () => {
+      const { execute } = await import("@/clients/x");
+
+      const envelope: CandidateEnvelope = {
+        action: { kind: "dm", userId: "user_123", text: "Test DM" },
+        rationale: "Test DM",
+        confidence: 0.9,
+        relevanceScore: 0.8,
+        sourceEventIds: ["event_1"],
+        requiresHumanReview: false,
+      };
+      const candidate = __unsafeMarkApproved(proposed(envelope));
+
+      await executeApproved(deps(), candidate, "decision_jkl");
+
+      expect(execute).not.toHaveBeenCalled();
+    });
+  });
+
+  describe("shadow mode - all actions", () => {
+    beforeEach(async () => {
+      // Override env for these tests
+      const { env } = await import("@/config");
+      env.STRAND_MODE = "shadow";
+    });
+
+    it("should NOT execute like in shadow mode", async () => {
+      const { execute } = await import("@/clients/x");
+
+      const envelope: CandidateEnvelope = {
+        action: { kind: "like", tweetId: "12345" },
+        rationale: "Test like",
+        confidence: 0.9,
+        relevanceScore: 0.8,
+        sourceEventIds: ["event_1"],
+        requiresHumanReview: false,
+      };
+      const candidate = __unsafeMarkApproved(proposed(envelope));
+
+      await executeApproved(deps(), candidate, "decision_mno");
+
+      expect(execute).not.toHaveBeenCalled();
+    });
+
+    it("should NOT execute bookmark in shadow mode", async () => {
+      const { execute } = await import("@/clients/x");
+
+      const envelope: CandidateEnvelope = {
+        action: { kind: "bookmark", tweetId: "67890" },
+        rationale: "Test bookmark",
+        confidence: 0.85,
+        relevanceScore: 0.75,
+        sourceEventIds: ["event_2"],
+        requiresHumanReview: false,
+      };
+      const candidate = __unsafeMarkApproved(proposed(envelope));
+
+      await executeApproved(deps(), candidate, "decision_pqr");
+
+      expect(execute).not.toHaveBeenCalled();
+    });
+  });
+});

From a51df73e696d0cfbeaf8371d0f3aeac12ddae7b2 Mon Sep 17 00:00:00 2001
From: R4vager <tvschonleber@gmail.com>
Date: Fri, 24 Apr 2026 06:38:41 -0400
Subject: [PATCH 2/8] =?UTF-8?q?docs(spec):=20strand=20cockpit=20redesign?=
 =?UTF-8?q?=20=E2=80=94=20provider-agnostic=20chat=20harness?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Design spec for the cockpit rework. Pivots strand from twitter-engine-
monitor to chat-first operator cockpit. Covers:

- provider/subagent/skill architectural split (hermes-agent pattern)
- auth model: BYOK + PKCE device-code + oauth_external credential reuse
  with honest billing warnings (anthropic extra_usage routing, etc.)
- pinned cockpit event schema for parallel ink + web renderers
- subagent spawn with cli-process backend (claude, codex) + budget
  inheritance + depth/concurrency caps
- skill lifecycle (markdown + sqlite) with queued reflexion-driven
  retirement, integrated as a brainctl memory category
- workstream decomposition for codex + 4 devin + 1 claude-code sprint

Hard constraints call out the non-negotiables: policy gate preservation,
pinned renderer protocol, local-only oauth_external, no implicit env-var
activation, queued skill retirement.

References hermes-agent (NousResearch) as the primary reference impl.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../specs/2026-04-24-strand-cockpit-design.md | 412 ++++++++++++++++++
 1 file changed, 412 insertions(+)
 create mode 100644 docs/superpowers/specs/2026-04-24-strand-cockpit-design.md

diff --git a/docs/superpowers/specs/2026-04-24-strand-cockpit-design.md b/docs/superpowers/specs/2026-04-24-strand-cockpit-design.md
new file mode 100644
index 0000000..5260bd4
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-24-strand-cockpit-design.md
@@ -0,0 +1,412 @@
+# Strand Cockpit Redesign — Design Spec
+
+**Date:** 2026-04-24
+**Author:** Terrence (via Claude Opus 4.7 brainstorming)
+**Status:** Approved for implementation plan
+**Audience:** Codex (team lead), 4 Devin agents, 1 Claude Code agent
+
+---
+
+## Executive summary
+
+Replace the current gamified Strand cockpit with a chat-first, provider-agnostic agent harness. The existing X/Twitter engine (Perceiver / Reasoner / Actor / Consolidator) keeps running as a registered background loop; the cockpit stops being a Twitter monitor and becomes a generic operator chat interface with pluggable LLM providers, multi-backend subagent spawning, and a self-curating skill lifecycle.
+
+Architecture reference: [NousResearch/hermes-agent](https://github.com/NousResearch/hermes-agent). Where hermes has solved the same problem cleanly, Strand copies the pattern verbatim and cites it.
+
+---
+
+## Hard constraints (non-negotiable)
+
+These are contracts every stream owner tests against, not prose to read once.
+
+1. **Policy-gate preservation.** Any chat-driven action that maps to an X/Twitter action kind MUST still flow through the existing `Candidate<Approved>` typestate gate in `src/policy/index.ts`. Subagents propose `Candidate<Unchecked>`; only the gate mints `Approved`. Enforced at compile time — TS should refuse a bypass path. Property tests in S1.
+
+2. **Renderer protocol is pinned in §4 of this spec.** Breaking changes bump the `X-Cockpit-Protocol` header major version. Ink renderer (Devin-path) and Web renderer (Devin-path) consume the identical schema. Schema drift = P0 bug.
+
+3. **`oauth_external` credential reuse is local-only; `oauth_device_code` works anywhere.** BYOK works anywhere. The auth picker tells the user which modes are available based on whether the cockpit is running on the same machine as their logged-in `claude` / `codex` / `gemini` CLI.
+
+4. **Anthropic "OAuth-external" mode carries a billing warning.** Per open hermes-agent issue #12905, Anthropic routes third-party OAuth clients to the `extra_usage` billing pool, which is empty for most users. The cockpit surfaces this inline before the first call. No silent fallback fiction.
+
+5. **No implicit activation from environment variables.** The presence of `CLAUDE_CODE_OAUTH_TOKEN` in the environment does NOT auto-activate the Anthropic provider. The user must explicitly pick a provider in the first-run flow or via `/auth`. Prevents silent token spend.
+
+6. **Skill retirement is queued, not silent.** v1 ships with auto-retire proposals landing in a review feed; user approves with one click. Flip to silent after usage data validates the signal.
+
+7. **Claude Code handling contract (`--bare` gotcha).** Bare mode skips OAuth and requires `ANTHROPIC_API_KEY`. The `cli-process` backend's Claude Code parser never passes `--bare` when the user's auth mode is `oauth_external`. If the cockpit is in BYOK-Anthropic mode AND the user wants `--bare`, wire it through — otherwise don't.
+
+---
+
+## §1 — Architecture at a glance
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│  Cockpit Core (headless, no UI imports)                     │
+│  ┌───────────────┐  ┌──────────────┐  ┌──────────────────┐  │
+│  │ Transcript    │  │ Subagent     │  │ Skill Lifecycle  │  │
+│  │ event bus     │  │ registry     │  │ (c + iii)        │  │
+│  └───────────────┘  └──────────────┘  └──────────────────┘  │
+│  ┌───────────────┐  ┌──────────────┐  ┌──────────────────┐  │
+│  │ Provider      │  │ Policy gate  │  │ Loop registry    │  │
+│  │ router        │  │ (untouched)  │  │ (X engine =      │  │
+│  │               │  │              │  │  one entry)      │  │
+│  └───────────────┘  └──────────────┘  └──────────────────┘  │
+└──────────┬──────────────────────┬───────────────────────────┘
+           │ Renderer Protocol    │ Renderer Protocol
+           │ (pinned SSE schema)  │
+           ▼                      ▼
+    ┌──────────────┐        ┌──────────────┐
+    │ Ink renderer │        │ Web renderer │
+    │ (terminal)   │        │ (Vite+Hono)  │
+    └──────────────┘        └──────────────┘
+```
+
+**Two invariants:**
+- Core never imports from either renderer.
+- Both renderers consume the same event schema.
+
+---
+
+## §2 — Provider / subagent / skill split
+
+This is the structural refactor that everything else depends on. The hermes codebase demonstrates the split; Strand adopts it.
+
+| Layer | What it is | Examples |
+|---|---|---|
+| **Provider** | Chat completions source — where tokens come from | `anthropic-api`, `openai-api`, `xai-api`, `gemini-api`, `openai-compat` (Ollama / LM Studio / OpenRouter / Together), `nous-portal` |
+| **Subagent** | Delegatable worker the main agent spawns | `internal`, `cli-process` (generic), `ssh` |
+| **Skill** | Markdown instruction telling the agent *when* to use a provider / tool / subagent | `claude-code.md`, `codex.md`, `pr-review.md`, arbitrary new skills |
+
+**Consequences:**
+- `claude` and `codex` CLIs are NOT LLM providers. They are skills that invoke the `cli-process` subagent backend. One generic backend, unlimited CLI skills.
+- Adding a new CLI (Aider, Cline, gpt-engineer, whatever ships next) = write a skill, not a backend.
+- The existing `src/clients/llm/` stays the home for provider adapters. Subagents live in `src/agent/`.
+
+---
+
+## §3 — Auth & provider model
+
+Reference implementation: `hermes_cli/auth.py`.
+
+### Auth types
+
+| Auth type | Mechanism | Host constraint |
+|---|---|---|
+| `api_key` | User-supplied key, read from env or Strand's encrypted store | Any |
+| `oauth_device_code` | Real PKCE device-code flow. POST to issuer's `/deviceauth/usercode`, show user a URL + code, poll, exchange at `/oauth/token`. Strand manages refresh. | Any |
+| `oauth_external` | Read credentials another tool wrote to disk (`~/.claude/.credentials.json`, `~/.qwen/oauth_creds.json`, etc.) | Local only |
+
+### Per-provider plan
+
+| Provider | Primary | Secondary | Notes |
+|---|---|---|---|
+| Anthropic | `api_key` (`ANTHROPIC_API_KEY`) | `oauth_external` from Claude Code creds | Secondary shows billing warning (hard constraint #4) |
+| OpenAI | `api_key` (`OPENAI_API_KEY`) | `oauth_device_code` against `auth.openai.com` (genuine PKCE — see hermes `_codex_device_code_login`) | Device-code works on any host |
+| xAI | `api_key` (`XAI_API_KEY`) | — | Removed as the default — user picks |
+| Gemini | `api_key` (`GEMINI_API_KEY`) | `oauth_external` from gemini-cli creds | |
+| openai-compat | `api_key` + `baseURL` | — | Covers Ollama, LM Studio, OpenRouter, Together |
+| Nous Portal | `oauth_device_code` | `api_key` fallback | |
+
+### Device-code flow reference (OpenAI)
+
+```
+POST https://auth.openai.com/api/accounts/deviceauth/usercode
+  body: { client_id }
+  → { user_code, device_auth_id, interval }
+
+# Show user: open https://auth.openai.com/codex/device, enter code
+# Poll:
+POST https://auth.openai.com/api/accounts/deviceauth/token
+  body: { device_auth_id, user_code }
+  → 200 { authorization_code, code_verifier }  OR  403/404 (not yet)
+
+POST https://auth.openai.com/oauth/token
+  body (form): { grant_type: authorization_code, code, redirect_uri,
+                 client_id, code_verifier }
+  → { access_token, refresh_token, id_token, expires_in }
+```
+
+Max wait 15 minutes. Poll interval ≥ 3s. Port hermes's implementation directly.
+
+### Auth store shape
+
+```jsonc
+// ~/.strand/auth.json
+{
+  "active_provider": "openai",
+  "providers": {
+    "openai":    { "auth_type": "oauth_device_code", "tokens": {...}, "expires_at": "..." },
+    "anthropic": { "auth_type": "api_key", "source": "env:ANTHROPIC_API_KEY" }
+  },
+  "suppressed_sources": { "anthropic": ["cli_credentials"] }
+}
+```
+
+**Rules** (verbatim from hermes):
+1. No implicit use of external credentials — see hard constraint #5.
+2. `suppressed_sources` lets users blacklist a specific discovery path per provider.
+3. Single-writer file lock on the auth store during refresh.
+
+### First-run UX
+
+No default. Picker shows the provider table with inline "how this will be billed" copy. Choice persists to `~/.strand/auth.json` + `~/.strand/profile.json`. Switching is a slash command: `/model anthropic claude-sonnet-4-6`.
+
+**`strand.config.yaml`:** the `llm.provider: xai` default is removed. Explicit selection required or cockpit errors with a clear picker prompt.
+
+### Language in the UI
+
+Label the `oauth_external` entries honestly: *"Use my Claude Pro/Max subscription (local only) — may bill as metered API usage, see notice"*. Avoid the word "OAuth" alone, since the semantics vary per provider.
+
+---
+
+## §4 — Cockpit substrate
+
+### Packages
+
+```
+src/cockpit/core/      ← no UI imports; pure TypeScript
+src/cockpit/ink/       ← depends on core only
+src/cockpit/web/       ← depends on core only; Vite + Hono, served by `strand dev`
+```
+
+### Core exports
+
+- `Transcript` — append-only event log (SQLite-backed, keyed by session UUID). Survives restarts.
+- `ChatController` — takes user input, routes to provider, emits events.
+- `SubagentRegistry` — tracks spawned workers (see §5).
+- `SkillRegistry` — see §6.
+- `ProviderRouter` — picks the right provider per the auth/profile from §3.
+- `EventBus` — in-process `EventEmitter<CockpitEvent>`; renderers subscribe.
+
+### Renderer protocol (PINNED)
+
+```ts
+type CockpitEvent =
+  | { t: 'transcript.append', sessionId: string, message: Message }
+  | { t: 'transcript.delta',  sessionId: string, messageId: string, chunk: string }
+  | { t: 'tool.start',        sessionId: string, callId: string, name: string, args: unknown }
+  | { t: 'tool.progress',     sessionId: string, callId: string, chunk: string }
+  | { t: 'tool.end',          sessionId: string, callId: string, ok: boolean, result?: unknown }
+  | { t: 'subagent.spawn',    subagentId: string, backend: SubagentBackend, parentSessionId: string }
+  | { t: 'subagent.event',    subagentId: string, kind: 'stdout'|'stderr'|'status', chunk: string }
+  | { t: 'subagent.end',      subagentId: string, ok: boolean, exit?: number }
+  | { t: 'skill.proposal',    proposalId: string, kind: 'draft'|'retire', payload: SkillProposal }
+  | { t: 'skill.decision',    proposalId: string, decision: 'accepted'|'rejected', by: 'user'|'auto' }
+  | { t: 'provider.switch',   from: ProviderId, to: ProviderId }
+  | { t: 'policy.gate',       candidateId: string, result: 'approved'|'rejected', reason?: string }
+  | { t: 'budget.warn',       sessionId: string, dimension: 'tokens'|'usd'|'wallclock'|'toolCalls', used: number, cap: number }
+  | { t: 'error',             sessionId?: string, code: string, message: string };
+```
+
+- **Ink** subscribes to the in-process `EventBus` directly.
+- **Web** connects via SSE at `GET /events` (same schema, serialized).
+- Both render **from the event stream**, never query mutable state.
+- **Version header:** `X-Cockpit-Protocol: 1` on the SSE stream. Bumping it is a major change; renderers warn on mismatch.
+
+### Transport details
+
+- Web renderer served by `strand dev` (Vite + Hono); production build via `strand web-build` → `dist/web/`.
+- SSE endpoints: `GET /events` (event stream), `POST /input` (user input), `POST /commands/:slash` (slash commands).
+- Auth to the local web server: loopback-only, random token written to `~/.strand/cockpit.token`, passed via header. Prevents other local processes from snooping.
+
+---
+
+## §5 — Subagent spawn model
+
+### Unified interface
+
+```ts
+interface Subagent {
+  id: string;
+  backend: 'internal' | 'cli-process' | 'ssh';
+  spawn(spec: SpawnSpec): Promise<SubagentHandle>;
+}
+
+interface SubagentHandle {
+  send(input: string): Promise<void>;     // for interactive (tmux / stdin)
+  events: AsyncIterable<CockpitEvent>;    // normalized into core's event schema
+  status(): Promise<SubagentStatus>;
+  cancel(): Promise<void>;
+  budget: BudgetTracker;                  // inherited cap, child ≤ parent
+}
+```
+
+### Backends
+
+| Backend | Implementation | Use case |
+|---|---|---|
+| `internal` | Wrap existing `src/agent/spawn.ts`. Shares memory (brainctl), policy gate, provider router. | Cheap in-process delegation; capability-limited sub-agents |
+| `cli-process` | Generic. Takes `{ cmd, args, mode: 'oneshot'\|'interactive', parser: StreamParser }`. Oneshot pipes stdin/stdout; interactive wraps in `tmux` (hidden from user). Ships with parsers for `claude -p --output-format stream-json`, `codex exec --json`, and raw-text passthrough. | `claude`, `codex`, any future CLI agent |
+| `ssh` | Wrap existing `src/agent/executor-ssh.ts`. | Remote shell, future remote worker fleet |
+
+### Budget inheritance
+
+Every subagent inherits ≤ parent budget on all four dimensions: `tokens`, `usdTicks`, `wallClockMs`, `toolCalls`. Child can't exceed parent's remaining. Enforced at spawn, not trust-the-child.
+
+### Concurrency + depth caps (from hermes `tools/delegate_tool.py`)
+
+- `maxDepth: 3` — cockpit (0) → agent (1) → subagent (2) → grand-subagent (3), beyond rejected.
+- `maxConcurrentChildren: 3` per parent (configurable via `strand.config.yaml`).
+- Heartbeat every 30s during long delegations.
+- Stale subagent auto-cancelled at 10 minutes of no progress (override-able per-spawn).
+
+### Chat-driven spawning
+
+Slash commands in cockpit chat:
+- `/spawn claude <task>` — delegates to Claude Code skill
+- `/spawn codex <task>` — delegates to Codex skill
+- `/spawn internal <task>` — internal Strand subagent
+- `/spawn ssh <host> <task>` — remote worker
+
+Each spawned worker gets its own tab (web) / pane (Ink). Worker events stream into the parent transcript AND the worker's own sub-transcript.
+
+### Claude Code parser (implementation note for S4)
+
+Ship oneshot-mode default. Example invocation:
+
+```bash
+claude -p "<task>" \
+  --output-format stream-json \
+  --verbose \
+  --include-partial-messages \
+  --max-turns 10 \
+  --allowedTools "Read,Edit,Bash" \
+  --max-budget-usd 2.00
+```
+
+Parse newline-delimited JSON. Map `stream_event` → `subagent.event` kind `stdout`. Map `system/api_retry` → `subagent.event` kind `status`. Terminal `result` event carries `session_id`, `num_turns`, `total_cost_usd` — emit as `subagent.end` payload. For interactive mode, wrap in tmux per hermes Claude Code skill (handle trust dialog + bypass-permissions dialog as specified in that skill).
+
+### Policy gate preservation
+
+Subagents proposing X/Twitter actions emit `Candidate<Unchecked>` — the gate in `src/policy/index.ts` is the only code path that can mint `Candidate<Approved>`. Subagents cannot import the gate; TS refuses the bypass at compile time. Hard constraint #1.
+
+---
+
+## §6 — Skill lifecycle
+
+### Storage (option "c": markdown + SQLite)
+
+```
+src/agent/skills/*.md        ← human-readable, git-tracked, frontmatter spec
+data/skills.sqlite           ← executable record: usage_count, success_count,
+                              token_cost_p50/p95, last_used_at, trust_score,
+                              triggers[], supersedes[], status
+                              (active | retired | draft | queued_draft | queued_retire)
+```
+
+### Frontmatter shape
+
+```yaml
+---
+name: claude-code
+description: Delegate coding tasks to Claude Code CLI
+version: 1.0.0
+triggers: ["coding", "refactor", "review", "PR"]
+backend: cli-process
+spawn_spec:
+  cmd: claude
+  args: ["-p", "--output-format", "stream-json", "--verbose"]
+  parser: claude-code-stream
+tools_allowed: [Read, Edit, Bash, Write]
+budget: { tokens: 50000, usdTicks: 2000000, wallClockMs: 300000 }
+---
+```
+
+### Evolution loop (option "iii": reflexion + usage)
+
+1. **Post-task reflexion** — after every completed task, a lightweight judge model reads the transcript and emits 0-N proposals: `{ kind: 'draft'|'retire', rationale, proposed_frontmatter? }`.
+2. **Usage metrics** tick on every skill invocation (success, latency, token cost, user-aborted).
+3. **Nightly scorer** (on the consolidator schedule):
+   - `retire` candidate = hit-rate < 0.15 OR (success-rate < 0.5 AND n ≥ 10) OR (superseded by a higher-scoring skill on same triggers).
+   - `draft` candidate = reflexion flagged AND ≥3 sessions exhibited the same pattern AND no active skill matches triggers.
+4. **All proposals queue.** Cockpit surfaces a "Skill Review" feed. One-click accept/reject. Rejection remembered — same proposal won't re-queue for 30 days.
+5. **Audit trail.** Every accept/reject logged to brainctl as a `decision` event with rationale.
+
+### brainctl integration
+
+Skills are a memory category. `skill` joins the existing categories (`convention | decision | environment | identity | integration | lesson | preference | project | user`). This reuses:
+- W(m) trust gate
+- Retirement analysis (the nightly scorer IS `retirement_analysis` filtered to `category=skill`)
+- Labile-window rescue
+- Trust decay
+
+### Token-bloat reduction (the actual ask)
+
+- Skills are NOT dumped into the system prompt.
+- Skills retrieved JIT via trigger-match against user's current turn. Top-K (default 3) included.
+- A skill's markdown body is the full instruction; never pasted inline unless match score clears a threshold.
+- Retired skills removed from retrieval index same minute they're approved.
+- `/skills` slash command lists active + queued + retired-with-un-retire.
+
+---
+
+## §7 — Gamified panel disposition
+
+- **Default `strand` entry point** → drops user into the chat cockpit (web or Ink, user's pick on first run, persists).
+- **Legacy panels** accessible via `strand tui --classic` or `/classic`. Zero loss, just not the default.
+- **Twitter engine** keeps running when credentials + policy are configured. In the cockpit, it's a registered background subagent — its own tab emitting status events. Chat with the operator without reading per-tweet telemetry.
+- **Systems telemetry** that used to live in gamified panels now flows into a collapsible right-rail "Systems" drawer — off by default. Keeps chat context clean.
+
+---
+
+## §8 — Workstream decomposition
+
+**Ownership principle:** Claude Code takes the TS-hardest seats (typestate, policy gate, core event schema). Devins take web / adapters / storage / parallelizable surface work.
+
+| Stream | Owner | Scope | Depends on |
+|---|---|---|---|
+| **S0 — Spec + scaffolding** | Codex (team lead) | Read this spec. Scaffold `src/cockpit/core/` with the §4 event schema. Land empty package skeletons. Stub `SubagentHandle` + `Subagent` interfaces. Set up CI matrix. | — |
+| **S1 — Cockpit core + policy-gate preservation** | **Claude Code** | Implement `Transcript`, `EventBus`, `ChatController`, `ProviderRouter`. Prove any chat-driven X-engine action still compiles through `Candidate<Approved>`. Property tests enforcing hard constraint #1. | S0 |
+| **S2 — Auth adapters + provider registry** | Devin-1 | BYOK for anthropic/openai/xai/gemini. PKCE device-code for OpenAI + Nous Portal. `oauth_external` reader for `~/.claude/.credentials.json`. `~/.strand/auth.json` store with single-writer lock. Picker UI wiring. | S0 |
+| **S3 — Web cockpit renderer** | Devin-2 | Vite + Hono app served by `strand dev`. SSE consumer rendering §4 schema. Chat UI + subagent tabs + slash commands + `/skills` review feed. Tailwind + shadcn/ui. | S1 partial (schema + stub events) |
+| **S4 — Subagent backends + seed skills** | Devin-3 | `cli-process` backend with `claude -p` + `codex exec` parsers. tmux wrapping for interactive. Seed skills: `claude-code`, `codex`, `pr-review` (port from hermes). Budget inheritance + caps. | S1 |
+| **S5 — Skill lifecycle + brainctl integration + Ink renderer** | Devin-4 | `data/skills.sqlite` schema. Usage metric hooks. Reflexion judge. Nightly scorer. Queue + review UI wiring. brainctl `skill` category registration. Ink renderer for the "classic"-preserving path. | S1, S4 seed skills |
+
+### Integration checkpoints (Codex enforces)
+
+- **Day 1** — S0 landed, event schema frozen. All agents read this spec top-to-bottom, initials at the bottom of `docs/superpowers/specs/2026-04-24-strand-cockpit-design.md`.
+- **Day 3** — S1 + S2 render a streaming BYOK chat in Ink. If the Ink renderer can't render a streaming response from any provider by end of day 3, the event schema has a bug — fix before anything else ships.
+- **Day 5** — S3 web cockpit renders the same schema. Parity test: same events produce same transcript in both renderers.
+- **Day 7** — S4 first successful `/spawn claude` in web cockpit.
+- **Day 10** — S5 first queued skill proposal flows end-to-end; review UI works.
+- **Day 12** — Integration + cutover. Old TUI moves to `--classic`.
+
+### Kill switches
+
+- **S2 Anthropic-OAuth-external** → `extra_usage` bug: ship with warning banner, fall back to BYOK. Don't block sprint on Anthropic's billing behavior.
+- **S4 `claude -p` parser** → version-mismatch-unreliable: fall back to raw-text parser, log, proceed.
+- **S5 reflexion judge** → costs > $1/session: disable by default, keep queue + manual `/skill propose` only.
+
+### Test matrix (minimum)
+
+- **Core** — property test: no X-engine action reaches actor without `Candidate<Approved>`.
+- **Auth** — device-code flow against openai (mocked token endpoint in CI).
+- **Renderer parity** — record event stream from a scripted chat, replay through Ink and Web, assert identical transcript state.
+- **Subagent budget** — spawn child with 50% of parent budget, burn 60% of child quota, assert child aborted, parent proceeds.
+- **Skill lifecycle** — seed low-hit skill, tick usage below threshold, run scorer, assert `queued_retire` proposal emitted.
+
+---
+
+## References
+
+- [NousResearch/hermes-agent](https://github.com/NousResearch/hermes-agent) — primary reference implementation.
+  - `hermes_cli/auth.py` — provider registry, device-code flow, auth store.
+  - `tools/delegate_tool.py` — subagent spawn conventions.
+  - `skills/autonomous-ai-agents/claude-code/SKILL.md` — Claude Code wrapping pattern.
+  - `skills/autonomous-ai-agents/codex/SKILL.md` — Codex wrapping pattern.
+  - Issue [#12905](https://github.com/NousResearch/hermes-agent/issues/12905) — Anthropic OAuth `extra_usage` routing.
+- [Claude Code CLI reference](https://code.claude.com/docs/en/cli-reference) — flags, output formats, session management.
+- [OpenAI device authorization](https://auth.openai.com/codex/device) — the endpoint users enter their device code at.
+- Strand `CLAUDE.md` — existing project non-negotiables (policy gate, X API tier reality, Twitter engine architecture).
+- Strand `docs/ARCHITECTURE.md` — existing architecture to preserve.
+
+---
+
+## Sign-off
+
+Every stream owner acknowledges they've read this spec by appending their name + date below before writing code.
+
+- [ ] Codex (team lead)
+- [ ] Claude Code
+- [ ] Devin-1
+- [ ] Devin-2
+- [ ] Devin-3
+- [ ] Devin-4

From f194cb99f8d1157fd0f427f91e1b4a324e9cbe7b Mon Sep 17 00:00:00 2001
From: R4vager <tvschonleber@gmail.com>
Date: Fri, 24 Apr 2026 06:47:01 -0400
Subject: [PATCH 3/8] =?UTF-8?q?docs(spec):=20tighten=20strand=20cockpit=20?=
 =?UTF-8?q?spec=20=E2=80=94=20lean-by-default,=20prune=20providers,=20flow?=
 =?UTF-8?q?-gate=20timeline?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- elevate "lean by default" as a cross-cutting principle: JIT skill
  retrieval, --bare default for subagents, summarizing context engine
  default, cheapest-capable reflexion judge, lean default budgets
- add hard constraint #8 pinning lean session budget defaults
- prune provider list to anthropic / openai / xai / gemini / openai-compat
  (the long tail is reachable via openai-compat + baseURL, no new adapters)
- remove nous-portal from the v1 registry
- replace day-by-day sprint timeline with flow-gated checkpoints — agents
  move faster than calendar time

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../specs/2026-04-24-strand-cockpit-design.md | 48 ++++++++++++++-----
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/docs/superpowers/specs/2026-04-24-strand-cockpit-design.md b/docs/superpowers/specs/2026-04-24-strand-cockpit-design.md
index 5260bd4..31006ab 100644
--- a/docs/superpowers/specs/2026-04-24-strand-cockpit-design.md
+++ b/docs/superpowers/specs/2026-04-24-strand-cockpit-design.md
@@ -15,13 +15,30 @@ Architecture reference: [NousResearch/hermes-agent](https://github.com/NousResea
 
 ---
 
+## Cross-cutting principle: LEAN BY DEFAULT
+
+Every token in Strand's own runtime costs the operator money. The cockpit is the operator's *interface to agents*, not an agent itself — it should add the smallest possible context footprint on top of the user's prompt.
+
+**Enforced everywhere:**
+- No skill bodies in the system prompt. Skills are retrieved JIT (top-K=3 by default, configurable down to 0).
+- No background-loop telemetry in chat context. Systems drawer is off by default.
+- Subagents default to `--bare` + minimal `--allowedTools` when the auth mode permits (see hard constraint #7).
+- Reflexion judge defaults to the cheapest capable model (Haiku / GPT-4o-mini / Grok-4-fast). Never the reasoner.
+- Context compaction default flips from `noop` → `summarizing` (`thresholdRatio: 0.75`, `keepTailTurns: 8`, `summarizerMaxOutputTokens: 800`).
+- Event schema is lean — chunks are byte-sized, no giant base64 payloads through the renderer protocol.
+- Every provider call logs `usage.{input,cached,output}_tokens` + prompt_cache_key. Unused cache = bug.
+
+If a feature adds context weight to the main chat without a direct operator-visible benefit, it's wrong by default.
+
+---
+
 ## Hard constraints (non-negotiable)
 
 These are contracts every stream owner tests against, not prose to read once.
 
 1. **Policy-gate preservation.** Any chat-driven action that maps to an X/Twitter action kind MUST still flow through the existing `Candidate<Approved>` typestate gate in `src/policy/index.ts`. Subagents propose `Candidate<Unchecked>`; only the gate mints `Approved`. Enforced at compile time — TS should refuse a bypass path. Property tests in S1.
 
-2. **Renderer protocol is pinned in §4 of this spec.** Breaking changes bump the `X-Cockpit-Protocol` header major version. Ink renderer (Devin-path) and Web renderer (Devin-path) consume the identical schema. Schema drift = P0 bug.
+2. **Renderer protocol is pinned in §4 of this spec.** Breaking changes bump the `X-Cockpit-Protocol` header major version. Ink and Web renderers consume the identical schema. Schema drift = P0 bug.
 
 3. **`oauth_external` credential reuse is local-only; `oauth_device_code` works anywhere.** BYOK works anywhere. The auth picker tells the user which modes are available based on whether the cockpit is running on the same machine as their logged-in `claude` / `codex` / `gemini` CLI.
 
@@ -31,7 +48,9 @@ These are contracts every stream owner tests against, not prose to read once.
 
 6. **Skill retirement is queued, not silent.** v1 ships with auto-retire proposals landing in a review feed; user approves with one click. Flip to silent after usage data validates the signal.
 
-7. **Claude Code handling contract (`--bare` gotcha).** Bare mode skips OAuth and requires `ANTHROPIC_API_KEY`. The `cli-process` backend's Claude Code parser never passes `--bare` when the user's auth mode is `oauth_external`. If the cockpit is in BYOK-Anthropic mode AND the user wants `--bare`, wire it through — otherwise don't.
+7. **Claude Code handling contract (`--bare` gotcha).** Bare mode skips OAuth and requires `ANTHROPIC_API_KEY`. The `cli-process` backend's Claude Code parser never passes `--bare` when the user's auth mode is `oauth_external`. In BYOK-Anthropic mode, `--bare` is the **default** for subagent spawns (fastest startup, lowest token overhead) — operator can opt in to full-context mode per-spawn.
+
+8. **Lean budget defaults.** Default budgets per cockpit session: `tokens: 50_000`, `usdTicks: 2_000_000` ($0.002), `wallClockMs: 300_000`, `toolCalls: 40`. Subagent spawns get half their parent's remaining budget by default. Operator can raise per-session; the default is set to yell early on bloat.
 
 ---
 
@@ -71,7 +90,7 @@ This is the structural refactor that everything else depends on. The hermes code
 
 | Layer | What it is | Examples |
 |---|---|---|
-| **Provider** | Chat completions source — where tokens come from | `anthropic-api`, `openai-api`, `xai-api`, `gemini-api`, `openai-compat` (Ollama / LM Studio / OpenRouter / Together), `nous-portal` |
+| **Provider** | Chat completions source — where tokens come from | `anthropic-api`, `openai-api`, `xai-api`, `gemini-api`, `openai-compat` (Ollama / LM Studio / OpenRouter / Together) |
 | **Subagent** | Delegatable worker the main agent spawns | `internal`, `cli-process` (generic), `ssh` |
 | **Skill** | Markdown instruction telling the agent *when* to use a provider / tool / subagent | `claude-code.md`, `codex.md`, `pr-review.md`, arbitrary new skills |
 
@@ -96,14 +115,15 @@ Reference implementation: `hermes_cli/auth.py`.
 
 ### Per-provider plan
 
+**Lean list.** v1 ships with exactly these providers. No Kimi / z.ai / MiniMax / DeepSeek / etc. in the first cut — the `openai-compat` entry already covers any OpenAI-API-compatible endpoint via `baseURL`, which handles 90% of future additions without new adapter code.
+
 | Provider | Primary | Secondary | Notes |
 |---|---|---|---|
 | Anthropic | `api_key` (`ANTHROPIC_API_KEY`) | `oauth_external` from Claude Code creds | Secondary shows billing warning (hard constraint #4) |
 | OpenAI | `api_key` (`OPENAI_API_KEY`) | `oauth_device_code` against `auth.openai.com` (genuine PKCE — see hermes `_codex_device_code_login`) | Device-code works on any host |
 | xAI | `api_key` (`XAI_API_KEY`) | — | Removed as the default — user picks |
 | Gemini | `api_key` (`GEMINI_API_KEY`) | `oauth_external` from gemini-cli creds | |
-| openai-compat | `api_key` + `baseURL` | — | Covers Ollama, LM Studio, OpenRouter, Together |
-| Nous Portal | `oauth_device_code` | `api_key` fallback | |
+| openai-compat | `api_key` + `baseURL` | — | Catches Ollama, LM Studio, OpenRouter, Together, and the long tail — no per-vendor adapter |
 
 ### Device-code flow reference (OpenAI)
 
@@ -355,19 +375,21 @@ Skills are a memory category. `skill` joins the existing categories (`convention
 |---|---|---|---|
 | **S0 — Spec + scaffolding** | Codex (team lead) | Read this spec. Scaffold `src/cockpit/core/` with the §4 event schema. Land empty package skeletons. Stub `SubagentHandle` + `Subagent` interfaces. Set up CI matrix. | — |
 | **S1 — Cockpit core + policy-gate preservation** | **Claude Code** | Implement `Transcript`, `EventBus`, `ChatController`, `ProviderRouter`. Prove any chat-driven X-engine action still compiles through `Candidate<Approved>`. Property tests enforcing hard constraint #1. | S0 |
-| **S2 — Auth adapters + provider registry** | Devin-1 | BYOK for anthropic/openai/xai/gemini. PKCE device-code for OpenAI + Nous Portal. `oauth_external` reader for `~/.claude/.credentials.json`. `~/.strand/auth.json` store with single-writer lock. Picker UI wiring. | S0 |
+| **S2 — Auth adapters + provider registry** | Devin-1 | BYOK for anthropic/openai/xai/gemini + `openai-compat`. PKCE device-code for OpenAI. `oauth_external` reader for `~/.claude/.credentials.json` + gemini-cli creds. `~/.strand/auth.json` store with single-writer lock. Picker UI wiring. | S0 |
 | **S3 — Web cockpit renderer** | Devin-2 | Vite + Hono app served by `strand dev`. SSE consumer rendering §4 schema. Chat UI + subagent tabs + slash commands + `/skills` review feed. Tailwind + shadcn/ui. | S1 partial (schema + stub events) |
 | **S4 — Subagent backends + seed skills** | Devin-3 | `cli-process` backend with `claude -p` + `codex exec` parsers. tmux wrapping for interactive. Seed skills: `claude-code`, `codex`, `pr-review` (port from hermes). Budget inheritance + caps. | S1 |
 | **S5 — Skill lifecycle + brainctl integration + Ink renderer** | Devin-4 | `data/skills.sqlite` schema. Usage metric hooks. Reflexion judge. Nightly scorer. Queue + review UI wiring. brainctl `skill` category registration. Ink renderer for the "classic"-preserving path. | S1, S4 seed skills |
 
-### Integration checkpoints (Codex enforces)
+### Integration checkpoints (Codex enforces, not time-boxed — flow-boxed)
+
+Agents work fast. These are gate conditions, not days. Codex holds the green flag between each one.
 
-- **Day 1** — S0 landed, event schema frozen. All agents read this spec top-to-bottom, initials at the bottom of `docs/superpowers/specs/2026-04-24-strand-cockpit-design.md`.
-- **Day 3** — S1 + S2 render a streaming BYOK chat in Ink. If the Ink renderer can't render a streaming response from any provider by end of day 3, the event schema has a bug — fix before anything else ships.
-- **Day 5** — S3 web cockpit renders the same schema. Parity test: same events produce same transcript in both renderers.
-- **Day 7** — S4 first successful `/spawn claude` in web cockpit.
-- **Day 10** — S5 first queued skill proposal flows end-to-end; review UI works.
-- **Day 12** — Integration + cutover. Old TUI moves to `--classic`.
+1. **Spec-read gate.** S0 landed, event schema frozen in code. All agents initial the sign-off at the bottom of this spec. Nobody writes feature code until this gate passes.
+2. **Alive gate.** S1 + S2 land a streaming BYOK chat in Ink. If any provider's streaming response doesn't render into the Ink transcript, the event schema has a bug — fix before anything else ships. This is the single highest-value checkpoint.
+3. **Parity gate.** S3 web cockpit renders the same schema. Parity test: identical event stream → identical transcript in Ink and Web. Divergence = P0.
+4. **Spawn gate.** S4 `/spawn claude` and `/spawn codex` both complete a oneshot task end-to-end with streamed output into the cockpit transcript. `--bare` default path verified. Budget inheritance verified (child can't exceed parent remaining).
+5. **Skill gate.** S5 scores a low-hit skill below threshold, emits a `queued_retire` proposal, operator approves in the review UI, skill drops out of the retrieval index. Same path for `queued_draft`.
+6. **Cutover gate.** Old TUI moves to `strand tui --classic`. Default `strand` enters chat cockpit. `strand.config.yaml` example updated (no default provider). Release branch opened.
 
 ### Kill switches
 

From 4948f3f129e130f61ef3e669d91dd242e89262e1 Mon Sep 17 00:00:00 2001
From: R4vager <tvschonleber@gmail.com>
Date: Fri, 24 Apr 2026 06:55:49 -0400
Subject: [PATCH 4/8] scaffold cockpit core protocol

---
 .github/workflows/ci.yml                      |   6 +-
 .../specs/2026-04-24-strand-cockpit-design.md |   2 +-
 src/auth/env-store.ts                         |   1 -
 src/cli/commands/review.ts                    | 126 +++++++-------
 src/cli/commands/status.ts                    |  20 +--
 src/cockpit/core/budget.ts                    |  19 +++
 src/cockpit/core/controller.ts                |  26 +++
 src/cockpit/core/events.ts                    | 156 ++++++++++++++++++
 src/cockpit/core/index.ts                     |   6 +
 src/cockpit/core/providers.ts                 |  17 ++
 src/cockpit/core/subagents.ts                 |  45 +++++
 src/cockpit/core/transcript.ts                |   9 +
 src/cockpit/ink/index.ts                      |   6 +
 src/cockpit/web/index.ts                      |  13 ++
 src/loops/actor.ts                            |   9 +-
 src/metrics/index.ts                          |  11 +-
 tests/cockpit/protocol.test.ts                |  70 ++++++++
 17 files changed, 459 insertions(+), 83 deletions(-)
 create mode 100644 src/cockpit/core/budget.ts
 create mode 100644 src/cockpit/core/controller.ts
 create mode 100644 src/cockpit/core/events.ts
 create mode 100644 src/cockpit/core/index.ts
 create mode 100644 src/cockpit/core/providers.ts
 create mode 100644 src/cockpit/core/subagents.ts
 create mode 100644 src/cockpit/core/transcript.ts
 create mode 100644 src/cockpit/ink/index.ts
 create mode 100644 src/cockpit/web/index.ts
 create mode 100644 tests/cockpit/protocol.test.ts

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 267f686..0361052 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,6 +12,10 @@ concurrency:
 jobs:
   verify:
     runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        node-version: [22, 24]
     steps:
       - uses: actions/checkout@v4
 
@@ -21,7 +25,7 @@ jobs:
 
       - uses: actions/setup-node@v4
         with:
-          node-version: 22
+          node-version: ${{ matrix.node-version }}
           cache: pnpm
 
       - name: install deps
diff --git a/docs/superpowers/specs/2026-04-24-strand-cockpit-design.md b/docs/superpowers/specs/2026-04-24-strand-cockpit-design.md
index 31006ab..170e180 100644
--- a/docs/superpowers/specs/2026-04-24-strand-cockpit-design.md
+++ b/docs/superpowers/specs/2026-04-24-strand-cockpit-design.md
@@ -426,7 +426,7 @@ Agents work fast. These are gate conditions, not days. Codex holds the green fla
 
 Every stream owner acknowledges they've read this spec by appending their name + date below before writing code.
 
-- [ ] Codex (team lead)
+- [x] Codex (team lead) — 2026-04-24
 - [ ] Claude Code
 - [ ] Devin-1
 - [ ] Devin-2
diff --git a/src/auth/env-store.ts b/src/auth/env-store.ts
index 0a6e1e2..828ecd6 100644
--- a/src/auth/env-store.ts
+++ b/src/auth/env-store.ts
@@ -9,7 +9,6 @@ export class EnvCredentialStore implements CredentialStore {
   readonly name = "env";
 
   async get(key: string): Promise<string | undefined> {
-    // biome-ignore lint/complexity/useLiteralKeys: process.env has an index signature
     const v = process.env[key];
     return v && v.length > 0 ? v : undefined;
   }
diff --git a/src/cli/commands/review.ts b/src/cli/commands/review.ts
index 66afe66..4258463 100644
--- a/src/cli/commands/review.ts
+++ b/src/cli/commands/review.ts
@@ -28,11 +28,11 @@ export function registerReviewCmd(program: Command, _ctx: CliContext): void {
           "SELECT id, decision_id, payload_json, reasons_json FROM human_review_queue WHERE decided_at IS NULL ORDER BY created_at ASC LIMIT 50",
         )
         .all() as Array<{
-          id: number;
-          decision_id: string;
-          payload_json: string;
-          reasons_json: string | null;
-        }>;
+        id: number;
+        decision_id: string;
+        payload_json: string;
+        reasons_json: string | null;
+      }>;
 
       if (rows.length === 0) {
         printLine("no pending reviews");
@@ -77,17 +77,17 @@ export function registerReviewCmd(program: Command, _ctx: CliContext): void {
            LIMIT ?`,
         )
         .all(opts.mode, limit) as Array<{
-          id: number;
-          decision_id: string;
-          kind: string;
-          status: string;
-          payload_json: string;
-          rationale: string | null;
-          confidence: number | null;
-          relevance: number | null;
-          reasons_json: string | null;
-          created_at: string;
-        }>;
+        id: number;
+        decision_id: string;
+        kind: string;
+        status: string;
+        payload_json: string;
+        rationale: string | null;
+        confidence: number | null;
+        relevance: number | null;
+        reasons_json: string | null;
+        created_at: string;
+      }>;
 
       if (rows.length === 0) {
         printLine(`no unlabeled candidates in mode=${opts.mode}`);
@@ -156,11 +156,11 @@ export function registerReviewCmd(program: Command, _ctx: CliContext): void {
            WHERE operator_label IS NOT NULL AND mode = ?`,
         )
         .all(opts.mode) as Array<{
-          status: string;
-          operator_label: string;
-          confidence: number | null;
-          relevance: number | null;
-        }>;
+        status: string;
+        operator_label: string;
+        confidence: number | null;
+        relevance: number | null;
+      }>;
 
       const total = rows.length;
       let agree = 0;
@@ -263,55 +263,57 @@ export function registerReviewCmd(program: Command, _ctx: CliContext): void {
     .option("--min-agreement <pct>", "minimum agreement %", "80")
     .option("--mode <mode>", "filter by mode", "shadow")
     .option("--json", "emit JSON result to stdout")
-    .action(async (opts: { minLabeled: string; minAgreement: string; mode: string; json?: boolean }) => {
-      const { db } = await import("@/db");
+    .action(
+      async (opts: { minLabeled: string; minAgreement: string; mode: string; json?: boolean }) => {
+        const { db } = await import("@/db");
 
-      const minLabeled = Number.parseInt(opts.minLabeled, 10) || 100;
-      const minAgreement = Number.parseInt(opts.minAgreement, 10) || 80;
+        const minLabeled = Number.parseInt(opts.minLabeled, 10) || 100;
+        const minAgreement = Number.parseInt(opts.minAgreement, 10) || 80;
 
-      const rows = db()
-        .prepare(
-          `SELECT status, operator_label
+        const rows = db()
+          .prepare(
+            `SELECT status, operator_label
            FROM action_log
            WHERE operator_label IS NOT NULL AND mode = ?`,
-        )
-        .all(opts.mode) as Array<{ status: string; operator_label: string }>;
+          )
+          .all(opts.mode) as Array<{ status: string; operator_label: string }>;
 
-      const total = rows.length;
-      let agree = 0;
-      let disagree = 0;
+        const total = rows.length;
+        let agree = 0;
+        let disagree = 0;
 
-      for (const r of rows) {
-        const policyApproved = r.status === "approved" || r.status === "executed";
-        if (r.operator_label === "unclear") continue;
-        const operatorGood = r.operator_label === "good";
-        if (policyApproved === operatorGood) {
-          agree++;
-        } else {
-          disagree++;
+        for (const r of rows) {
+          const policyApproved = r.status === "approved" || r.status === "executed";
+          if (r.operator_label === "unclear") continue;
+          const operatorGood = r.operator_label === "good";
+          if (policyApproved === operatorGood) {
+            agree++;
+          } else {
+            disagree++;
+          }
         }
-      }
 
-      const decisive = agree + disagree;
-      const agreementPct = decisive > 0 ? (agree / decisive) * 100 : 0;
-      const gateMet = total >= minLabeled && agreementPct >= minAgreement;
+        const decisive = agree + disagree;
+        const agreementPct = decisive > 0 ? (agree / decisive) * 100 : 0;
+        const gateMet = total >= minLabeled && agreementPct >= minAgreement;
 
-      if (opts.json) {
-        const result = {
-          ready: gateMet,
-          mode: opts.mode,
-          total_labeled: total,
-          min_labeled: minLabeled,
-          agreement_pct: Number(agreementPct.toFixed(2)),
-          min_agreement_pct: minAgreement,
-        };
-        printLine(JSON.stringify(result, null, 2));
-      } else {
-        printLine(gateMet ? "READY" : "NOT_READY");
-        printLine(`  labeled: ${total}/${minLabeled}`);
-        printLine(`  agreement: ${agreementPct.toFixed(2)}% (min ${minAgreement}%)`);
-      }
+        if (opts.json) {
+          const result = {
+            ready: gateMet,
+            mode: opts.mode,
+            total_labeled: total,
+            min_labeled: minLabeled,
+            agreement_pct: Number(agreementPct.toFixed(2)),
+            min_agreement_pct: minAgreement,
+          };
+          printLine(JSON.stringify(result, null, 2));
+        } else {
+          printLine(gateMet ? "READY" : "NOT_READY");
+          printLine(`  labeled: ${total}/${minLabeled}`);
+          printLine(`  agreement: ${agreementPct.toFixed(2)}% (min ${minAgreement}%)`);
+        }
 
-      process.exit(gateMet ? 0 : 1);
-    });
+        process.exit(gateMet ? 0 : 1);
+      },
+    );
 }
diff --git a/src/cli/commands/status.ts b/src/cli/commands/status.ts
index 61133bf..9f44a71 100644
--- a/src/cli/commands/status.ts
+++ b/src/cli/commands/status.ts
@@ -140,11 +140,11 @@ export function registerStatusCmd(program: Command, _ctx: CliContext): void {
           "SELECT tick_at, candidate_count, tool_call_count, cost_in_usd_ticks FROM reasoner_runs ORDER BY tick_at DESC LIMIT 5",
         )
         .all() as Array<{
-          tick_at: string;
-          candidate_count: number;
-          tool_call_count: number;
-          cost_in_usd_ticks: number | null;
-        }>;
+        tick_at: string;
+        candidate_count: number;
+        tool_call_count: number;
+        cost_in_usd_ticks: number | null;
+      }>;
       printLine(`=== last ${reasoner.length} reasoner_runs ===`);
       for (const r of reasoner) {
         printLine(
@@ -158,11 +158,11 @@ export function registerStatusCmd(program: Command, _ctx: CliContext): void {
           "SELECT status, batch_id, completed_at, created_at FROM consolidator_runs ORDER BY created_at DESC LIMIT 5",
         )
         .all() as Array<{
-          status: string;
-          batch_id: string | null;
-          completed_at: string | null;
-          created_at: string;
-        }>;
+        status: string;
+        batch_id: string | null;
+        completed_at: string | null;
+        created_at: string;
+      }>;
       printLine(`=== last ${consolidator.length} consolidator_runs ===`);
       for (const c of consolidator) {
         printLine(
diff --git a/src/cockpit/core/budget.ts b/src/cockpit/core/budget.ts
new file mode 100644
index 0000000..4007019
--- /dev/null
+++ b/src/cockpit/core/budget.ts
@@ -0,0 +1,19 @@
+import { remaining } from "@/agent/budget";
+import type { Budget, BudgetLimits } from "@/agent/types";
+
+export const DEFAULT_COCKPIT_BUDGET_LIMITS: BudgetLimits = {
+  tokens: 50_000,
+  usdTicks: 2_000_000,
+  wallClockMs: 300_000,
+  toolCalls: 40,
+};
+
+export function defaultChildBudgetLimits(parent: Budget): BudgetLimits {
+  const headroom = remaining(parent);
+  const child: BudgetLimits = {};
+  if (headroom.tokens !== undefined) child.tokens = Math.floor(headroom.tokens / 2);
+  if (headroom.usdTicks !== undefined) child.usdTicks = Math.floor(headroom.usdTicks / 2);
+  if (headroom.wallClockMs !== undefined) child.wallClockMs = Math.floor(headroom.wallClockMs / 2);
+  if (headroom.toolCalls !== undefined) child.toolCalls = Math.floor(headroom.toolCalls / 2);
+  return child;
+}
diff --git a/src/cockpit/core/controller.ts b/src/cockpit/core/controller.ts
new file mode 100644
index 0000000..f3e9fc1
--- /dev/null
+++ b/src/cockpit/core/controller.ts
@@ -0,0 +1,26 @@
+import type { Candidate } from "@/types/actions";
+import type { CockpitEvent } from "./events";
+
+export interface ChatInput {
+  sessionId: string;
+  text: string;
+  metadata?: Record<string, unknown>;
+}
+
+export interface SlashCommandInput {
+  sessionId: string;
+  command: string;
+  args: readonly string[];
+}
+
+export interface ChatController {
+  submit(input: ChatInput): Promise<void>;
+  slash(input: SlashCommandInput): Promise<void>;
+  events(): AsyncIterable<CockpitEvent>;
+}
+
+export interface XActionProposal {
+  candidate: Candidate<"proposed">;
+  sourceSessionId: string;
+  sourceMessageId?: string;
+}
diff --git a/src/cockpit/core/events.ts b/src/cockpit/core/events.ts
new file mode 100644
index 0000000..d2ae31e
--- /dev/null
+++ b/src/cockpit/core/events.ts
@@ -0,0 +1,156 @@
+import { EventEmitter } from "node:events";
+import { z } from "zod";
+
+export const COCKPIT_PROTOCOL_VERSION = 1;
+export const COCKPIT_PROTOCOL_HEADER = "X-Cockpit-Protocol";
+
+export const MessageSchema = z.object({
+  id: z.string().min(1),
+  role: z.enum(["system", "user", "assistant", "tool"]),
+  content: z.string(),
+  name: z.string().optional(),
+  toolCallId: z.string().optional(),
+  createdAt: z.string().datetime().optional(),
+  metadata: z.record(z.unknown()).optional(),
+});
+
+export type Message = z.infer<typeof MessageSchema>;
+
+export const ProviderIdSchema = z.string().min(1);
+export type ProviderId = z.infer<typeof ProviderIdSchema>;
+
+export const SubagentBackendSchema = z.enum(["internal", "cli-process", "ssh"]);
+export type SubagentBackend = z.infer<typeof SubagentBackendSchema>;
+
+export const SkillProposalSchema = z
+  .object({
+    name: z.string().min(1).optional(),
+    rationale: z.string().min(1),
+    proposedFrontmatter: z.record(z.unknown()).optional(),
+    metadata: z.record(z.unknown()).optional(),
+  })
+  .passthrough();
+
+export type SkillProposal = z.infer<typeof SkillProposalSchema>;
+
+export const CockpitEventSchema = z.discriminatedUnion("t", [
+  z.object({
+    t: z.literal("transcript.append"),
+    sessionId: z.string().min(1),
+    message: MessageSchema,
+  }),
+  z.object({
+    t: z.literal("transcript.delta"),
+    sessionId: z.string().min(1),
+    messageId: z.string().min(1),
+    chunk: z.string(),
+  }),
+  z.object({
+    t: z.literal("tool.start"),
+    sessionId: z.string().min(1),
+    callId: z.string().min(1),
+    name: z.string().min(1),
+    args: z.unknown(),
+  }),
+  z.object({
+    t: z.literal("tool.progress"),
+    sessionId: z.string().min(1),
+    callId: z.string().min(1),
+    chunk: z.string(),
+  }),
+  z.object({
+    t: z.literal("tool.end"),
+    sessionId: z.string().min(1),
+    callId: z.string().min(1),
+    ok: z.boolean(),
+    result: z.unknown().optional(),
+  }),
+  z.object({
+    t: z.literal("subagent.spawn"),
+    subagentId: z.string().min(1),
+    backend: SubagentBackendSchema,
+    parentSessionId: z.string().min(1),
+  }),
+  z.object({
+    t: z.literal("subagent.event"),
+    subagentId: z.string().min(1),
+    kind: z.enum(["stdout", "stderr", "status"]),
+    chunk: z.string(),
+  }),
+  z.object({
+    t: z.literal("subagent.end"),
+    subagentId: z.string().min(1),
+    ok: z.boolean(),
+    exit: z.number().int().optional(),
+  }),
+  z.object({
+    t: z.literal("skill.proposal"),
+    proposalId: z.string().min(1),
+    kind: z.enum(["draft", "retire"]),
+    payload: SkillProposalSchema,
+  }),
+  z.object({
+    t: z.literal("skill.decision"),
+    proposalId: z.string().min(1),
+    decision: z.enum(["accepted", "rejected"]),
+    by: z.enum(["user", "auto"]),
+  }),
+  z.object({
+    t: z.literal("provider.switch"),
+    from: ProviderIdSchema,
+    to: ProviderIdSchema,
+  }),
+  z.object({
+    t: z.literal("policy.gate"),
+    candidateId: z.string().min(1),
+    result: z.enum(["approved", "rejected"]),
+    reason: z.string().optional(),
+  }),
+  z.object({
+    t: z.literal("budget.warn"),
+    sessionId: z.string().min(1),
+    dimension: z.enum(["tokens", "usd", "wallclock", "toolCalls"]),
+    used: z.number().nonnegative(),
+    cap: z.number().nonnegative(),
+  }),
+  z.object({
+    t: z.literal("error"),
+    sessionId: z.string().min(1).optional(),
+    code: z.string().min(1),
+    message: z.string(),
+  }),
+]);
+
+export type CockpitEvent = z.infer<typeof CockpitEventSchema>;
+export type CockpitEventType = CockpitEvent["t"];
+export type CockpitEventListener = (event: CockpitEvent) => void;
+
+export function parseCockpitEvent(value: unknown): CockpitEvent {
+  return CockpitEventSchema.parse(value);
+}
+
+export class EventBus {
+  private readonly emitter = new EventEmitter();
+
+  publish(event: CockpitEvent): void {
+    const parsed = parseCockpitEvent(event);
+    this.emitter.emit("event", parsed);
+    this.emitter.emit(parsed.t, parsed);
+  }
+
+  subscribe(listener: CockpitEventListener): () => void {
+    this.emitter.on("event", listener);
+    return () => this.emitter.off("event", listener);
+  }
+
+  subscribeTo<T extends CockpitEventType>(
+    type: T,
+    listener: (event: Extract<CockpitEvent, { t: T }>) => void,
+  ): () => void {
+    const wrapped = (event: CockpitEvent): void => {
+      listener(event as Extract<CockpitEvent, { t: T }>);
+    };
+    this.emitter.on(type, wrapped);
+    return () => this.emitter.off(type, wrapped);
+  }
+}
diff --git a/src/cockpit/core/index.ts b/src/cockpit/core/index.ts
new file mode 100644
index 0000000..0b6e121
--- /dev/null
+++ b/src/cockpit/core/index.ts
@@ -0,0 +1,6 @@
+export * from "./budget";
+export * from "./controller";
+export * from "./events";
+export * from "./providers";
+export * from "./subagents";
+export * from "./transcript";
diff --git a/src/cockpit/core/providers.ts b/src/cockpit/core/providers.ts
new file mode 100644
index 0000000..7192eca
--- /dev/null
+++ b/src/cockpit/core/providers.ts
@@ -0,0 +1,17 @@
+import type { LlmProvider } from "@/clients/llm";
+import type { ProviderId } from "./events";
+
+export type AuthType = "api_key" | "oauth_device_code" | "oauth_external";
+
+export interface ProviderSelection {
+  id: ProviderId;
+  model: string;
+  authType: AuthType;
+  source: "env" | "strand_store" | "external_cli" | "openai_compat";
+}
+
+export interface ProviderRouter {
+  active(): Promise<ProviderSelection | null>;
+  switchProvider(next: ProviderSelection): Promise<void>;
+  providerFor(selection: ProviderSelection): Promise<LlmProvider>;
+}
diff --git a/src/cockpit/core/subagents.ts b/src/cockpit/core/subagents.ts
new file mode 100644
index 0000000..554bdad
--- /dev/null
+++ b/src/cockpit/core/subagents.ts
@@ -0,0 +1,45 @@
+import type { Budget, BudgetLimits } from "@/agent/types";
+import type { CockpitEvent, SubagentBackend } from "./events";
+
+export const MAX_SUBAGENT_DEPTH = 3;
+export const DEFAULT_MAX_CONCURRENT_CHILDREN = 3;
+export const DEFAULT_SUBAGENT_HEARTBEAT_MS = 30_000;
+export const DEFAULT_SUBAGENT_STALE_MS = 10 * 60_000;
+
+export type SpawnMode = "oneshot" | "interactive";
+
+export interface SpawnSpec {
+  task: string;
+  backend: SubagentBackend;
+  parentSessionId: string;
+  mode?: SpawnMode;
+  cmd?: string;
+  args?: readonly string[];
+  parser?: string;
+  allowedTools?: readonly string[];
+  budget?: Partial<BudgetLimits>;
+  depth?: number;
+  metadata?: Record<string, unknown>;
+}
+
+export interface SubagentStatus {
+  state: "queued" | "running" | "completed" | "failed" | "cancelled";
+  startedAt?: string;
+  endedAt?: string;
+  exit?: number;
+  message?: string;
+}
+
+export interface SubagentHandle {
+  send(input: string): Promise<void>;
+  readonly events: AsyncIterable<CockpitEvent>;
+  status(): Promise<SubagentStatus>;
+  cancel(): Promise<void>;
+  readonly budget: Budget;
+}
+
+export interface Subagent {
+  readonly id: string;
+  readonly backend: SubagentBackend;
+  spawn(spec: SpawnSpec): Promise<SubagentHandle>;
+}
diff --git a/src/cockpit/core/transcript.ts b/src/cockpit/core/transcript.ts
new file mode 100644
index 0000000..93a22c4
--- /dev/null
+++ b/src/cockpit/core/transcript.ts
@@ -0,0 +1,9 @@
+import type { CockpitEvent, Message } from "./events";
+
+export interface Transcript {
+  readonly sessionId: string;
+  append(message: Message): Promise<void>;
+  appendDelta(messageId: string, chunk: string): Promise<void>;
+  list(): Promise<readonly Message[]>;
+  events(): AsyncIterable<CockpitEvent>;
+}
diff --git a/src/cockpit/ink/index.ts b/src/cockpit/ink/index.ts
new file mode 100644
index 0000000..32c21ec
--- /dev/null
+++ b/src/cockpit/ink/index.ts
@@ -0,0 +1,6 @@
+import { COCKPIT_PROTOCOL_VERSION } from "../core";
+
+export const INK_COCKPIT_RENDERER = {
+  name: "ink",
+  protocolVersion: COCKPIT_PROTOCOL_VERSION,
+} as const;
diff --git a/src/cockpit/web/index.ts b/src/cockpit/web/index.ts
new file mode 100644
index 0000000..19479f7
--- /dev/null
+++ b/src/cockpit/web/index.ts
@@ -0,0 +1,13 @@
+import { COCKPIT_PROTOCOL_HEADER, COCKPIT_PROTOCOL_VERSION } from "../core";
+
+export const WEB_COCKPIT_RENDERER = {
+  name: "web",
+  protocolVersion: COCKPIT_PROTOCOL_VERSION,
+} as const;
+
+export const COCKPIT_SSE_PATH = "/events";
+export const COCKPIT_INPUT_PATH = "/input";
+export const COCKPIT_COMMAND_PATH_PREFIX = "/commands";
+export const COCKPIT_SSE_HEADERS = {
+  [COCKPIT_PROTOCOL_HEADER]: String(COCKPIT_PROTOCOL_VERSION),
+} as const;
diff --git a/src/loops/actor.ts b/src/loops/actor.ts
index 2e4f86c..ac24206 100644
--- a/src/loops/actor.ts
+++ b/src/loops/actor.ts
@@ -73,7 +73,14 @@ export async function executeApproved(
   const isShadow = env.STRAND_MODE === "shadow" || (!isLowRisk && env.STRAND_MODE === "live");
 
   if (isShadow) {
-    log.info({ key, kind: c.action.kind, reason: env.STRAND_MODE === "shadow" ? "mode_shadow" : "phase3_non_lowrisk" }, "actor.shadow");
+    log.info(
+      {
+        key,
+        kind: c.action.kind,
+        reason: env.STRAND_MODE === "shadow" ? "mode_shadow" : "phase3_non_lowrisk",
+      },
+      "actor.shadow",
+    );
     db().prepare("UPDATE action_log SET status = 'executed' WHERE idempotency_key = ?").run(key);
     return;
   }
diff --git a/src/metrics/index.ts b/src/metrics/index.ts
index 1211764..e01d956 100644
--- a/src/metrics/index.ts
+++ b/src/metrics/index.ts
@@ -8,11 +8,11 @@
  * - Error rates by action kind
  */
 
+import { getMonthlyUsage, getRateLimit } from "@/clients/x";
 import { env } from "@/config";
 import { db } from "@/db";
-import { getRateLimit, getMonthlyUsage } from "@/clients/x";
-import { log } from "@/util/log";
 import type { Action } from "@/types/actions";
+import { log } from "@/util/log";
 
 const TIER_MONTHLY_CAP: Record<"basic" | "pro" | "enterprise", number> = {
   basic: 10_000,
@@ -150,12 +150,9 @@ export function recordFollowerDelta(followers: {
  * Record action error rate.
  * Called by Actor when an action fails.
  */
-export function recordActionError(
-  kind: Action["kind"],
-  errorCode: string,
-): void {
+export function recordActionError(kind: Action["kind"], errorCode: string): void {
   try {
-    const hourBucket = new Date().toISOString().slice(0, 13) + ":00:00Z"; // Round to hour
+    const hourBucket = `${new Date().toISOString().slice(0, 13)}:00:00Z`; // Round to hour
 
     const d = db();
 
diff --git a/tests/cockpit/protocol.test.ts b/tests/cockpit/protocol.test.ts
new file mode 100644
index 0000000..a38a2bf
--- /dev/null
+++ b/tests/cockpit/protocol.test.ts
@@ -0,0 +1,70 @@
+import { createBudget } from "@/agent/budget";
+import {
+  COCKPIT_PROTOCOL_HEADER,
+  COCKPIT_PROTOCOL_VERSION,
+  CockpitEventSchema,
+  DEFAULT_COCKPIT_BUDGET_LIMITS,
+  EventBus,
+  defaultChildBudgetLimits,
+} from "@/cockpit/core";
+import { COCKPIT_SSE_HEADERS, WEB_COCKPIT_RENDERER } from "@/cockpit/web";
+import { describe, expect, it } from "vitest";
+
+describe("cockpit protocol scaffold", () => {
+  it("pins protocol version 1 for renderers", () => {
+    expect(COCKPIT_PROTOCOL_VERSION).toBe(1);
+    expect(COCKPIT_SSE_HEADERS[COCKPIT_PROTOCOL_HEADER]).toBe("1");
+    expect(WEB_COCKPIT_RENDERER.protocolVersion).toBe(1);
+  });
+
+  it("parses the pinned transcript event schema", () => {
+    const event = CockpitEventSchema.parse({
+      t: "transcript.append",
+      sessionId: "session-1",
+      message: {
+        id: "message-1",
+        role: "user",
+        content: "ship the cockpit scaffold",
+        createdAt: "2026-04-24T12:00:00.000Z",
+      },
+    });
+
+    expect(event.t).toBe("transcript.append");
+    if (event.t !== "transcript.append") throw new Error("expected transcript append event");
+    expect(event.message.content).toBe("ship the cockpit scaffold");
+  });
+
+  it("emits typed events through the in-process bus", () => {
+    const bus = new EventBus();
+    const seen: string[] = [];
+    const unsubscribe = bus.subscribe((event) => seen.push(event.t));
+
+    bus.publish({
+      t: "budget.warn",
+      sessionId: "session-1",
+      dimension: "tokens",
+      used: 45_000,
+      cap: 50_000,
+    });
+    unsubscribe();
+
+    expect(seen).toEqual(["budget.warn"]);
+  });
+
+  it("defaults to lean cockpit session budgets and half-budget children", () => {
+    expect(DEFAULT_COCKPIT_BUDGET_LIMITS).toEqual({
+      tokens: 50_000,
+      usdTicks: 2_000_000,
+      wallClockMs: 300_000,
+      toolCalls: 40,
+    });
+
+    const parent = createBudget(DEFAULT_COCKPIT_BUDGET_LIMITS);
+    const child = defaultChildBudgetLimits(parent);
+    expect(child.tokens).toBe(25_000);
+    expect(child.usdTicks).toBe(1_000_000);
+    expect(child.toolCalls).toBe(20);
+    expect(child.wallClockMs).toBeLessThanOrEqual(150_000);
+    expect(child.wallClockMs).toBeGreaterThan(149_000);
+  });
+});

From fef93eeeeed7f3fff1108e34f44cacaedf54bda6 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 24 Apr 2026 19:11:06 +0000
Subject: [PATCH 5/8] =?UTF-8?q?feat(skills):=20S5=20=E2=80=94=20skill=20li?=
 =?UTF-8?q?fecycle,=20brainctl=20integration,=20Ink=20bridge?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- SQLite skill_records + skill_decisions tables (schema.sql)
- SkillRecordStore: upsert, usage metrics, status transitions
- SkillDecisionStore: accept/reject with 30-day rejection suppression
- Nightly scorer: queued_retire on low hit-rate (<0.15), low success-rate
  (<0.5 with n>=10), or superseded by higher-scoring skill
- brainctl adapter: toBrainctlDecisionEvent for decision event logging
- Ink bridge: createInkBridge + createSkillEventBridge for chat-first
  renderer consuming CockpitEvent streams (preserves classic TUI)
- 20 tests: lifecycle store, scorer, accept/reject, suppression, bridge

Co-Authored-By: Terrence Schonleber <TVschonleber@gmail.com>
---
 src/agent/skills/index.ts           |  21 ++
 src/agent/skills/lifecycle.ts       | 450 ++++++++++++++++++++++++++++
 src/cockpit/ink/bridge.ts           |  89 ++++++
 src/cockpit/ink/index.ts            |   3 +
 src/db/schema.sql                   |  32 ++
 tests/agent/skill-lifecycle.test.ts | 262 ++++++++++++++++
 tests/cockpit/ink-bridge.test.ts    | 119 ++++++++
 7 files changed, 976 insertions(+)
 create mode 100644 src/agent/skills/lifecycle.ts
 create mode 100644 src/cockpit/ink/bridge.ts
 create mode 100644 tests/agent/skill-lifecycle.test.ts
 create mode 100644 tests/cockpit/ink-bridge.test.ts

diff --git a/src/agent/skills/index.ts b/src/agent/skills/index.ts
index 2d4ccd3..b7a8f2f 100644
--- a/src/agent/skills/index.ts
+++ b/src/agent/skills/index.ts
@@ -28,3 +28,24 @@ export type {
   SkillProposalStore,
 } from "./auto-create";
 export { SqliteSkillProposalStore, makeSqliteSkillProposalStore } from "./proposal-store";
+
+// Skill lifecycle (§6):
+export {
+  SkillRecordStore,
+  SkillDecisionStore,
+  acceptProposal,
+  rejectProposal,
+  runNightlyScorer,
+  toBrainctlDecisionEvent,
+  tokenCostP50,
+  tokenCostP95,
+} from "./lifecycle";
+export type {
+  BrainctlDecisionEvent,
+  ScorerOpts,
+  ScorerResult,
+  SkillDecision,
+  SkillRecord,
+  SkillStatus,
+  UsageEvent,
+} from "./lifecycle";
diff --git a/src/agent/skills/lifecycle.ts b/src/agent/skills/lifecycle.ts
new file mode 100644
index 0000000..1818954
--- /dev/null
+++ b/src/agent/skills/lifecycle.ts
@@ -0,0 +1,450 @@
+/**
+ * Skill lifecycle — executable skill records, usage metrics, nightly scorer,
+ * and brainctl decision integration.
+ *
+ * Markdown skill files remain the human-readable source of truth;
+ * this module tracks runtime metrics in SQLite and manages state transitions.
+ *
+ * Statuses: active | retired | draft | queued_draft | queued_retire
+ * No silent retirement — everything queues for operator approval.
+ */
+
+import { db as defaultDb } from "@/db";
+import type { Database as BetterSqliteDatabase } from "better-sqlite3";
+
+// ─── Types ─────────────────────────────────────────────────────────────────
+
+export type SkillStatus = "active" | "retired" | "draft" | "queued_draft" | "queued_retire";
+
+export interface SkillRecord {
+  name: string;
+  status: SkillStatus;
+  usageCount: number;
+  successCount: number;
+  tokenCostSamples: number[];
+  lastUsedAt: string | null;
+  trustScore: number;
+  triggers: string[];
+  supersedes: string[];
+  createdAt: string;
+  updatedAt: string;
+}
+
+export interface SkillDecision {
+  id: string;
+  skillName: string;
+  proposalKind: "draft" | "retire";
+  decision: "accepted" | "rejected";
+  decidedBy: "user" | "auto";
+  rationale: string | null;
+  suppressedUntil: string | null;
+  createdAt: string;
+}
+
+export interface UsageEvent {
+  skillName: string;
+  success: boolean;
+  tokenCost: number;
+}
+
+export interface ScorerResult {
+  queuedRetire: string[];
+  queuedDraft: string[];
+  skipped: string[];
+}
+
+// ─── Percentile helpers ────────────────────────────────────────────────────
+
+const MAX_COST_SAMPLES = 100;
+
+function percentile(sorted: number[], p: number): number {
+  if (sorted.length === 0) return 0;
+  const idx = Math.ceil((p / 100) * sorted.length) - 1;
+  return sorted[Math.max(0, idx)] ?? 0;
+}
+
+export function tokenCostP50(samples: number[]): number {
+  const s = [...samples].sort((a, b) => a - b);
+  return percentile(s, 50);
+}
+
+export function tokenCostP95(samples: number[]): number {
+  const s = [...samples].sort((a, b) => a - b);
+  return percentile(s, 95);
+}
+
+// ─── SQLite row mapping ────────────────────────────────────────────────────
+
+interface SkillRecordRow {
+  name: string;
+  status: string;
+  usage_count: number;
+  success_count: number;
+  token_cost_samples_json: string | null;
+  last_used_at: string | null;
+  trust_score: number;
+  triggers_json: string | null;
+  supersedes_json: string | null;
+  created_at: string;
+  updated_at: string;
+}
+
+interface SkillDecisionRow {
+  id: string;
+  skill_name: string;
+  proposal_kind: string;
+  decision: string;
+  decided_by: string;
+  rationale: string | null;
+  suppressed_until: string | null;
+  created_at: string;
+}
+
+function parseJsonArray(raw: string | null): string[] {
+  if (!raw) return [];
+  try {
+    const arr: unknown = JSON.parse(raw);
+    return Array.isArray(arr) ? arr.filter((v): v is string => typeof v === "string") : [];
+  } catch {
+    return [];
+  }
+}
+
+function parseNumberArray(raw: string | null): number[] {
+  if (!raw) return [];
+  try {
+    const arr: unknown = JSON.parse(raw);
+    return Array.isArray(arr) ? arr.filter((v): v is number => typeof v === "number") : [];
+  } catch {
+    return [];
+  }
+}
+
+function rowToRecord(r: SkillRecordRow): SkillRecord {
+  return {
+    name: r.name,
+    status: r.status as SkillStatus,
+    usageCount: r.usage_count,
+    successCount: r.success_count,
+    tokenCostSamples: parseNumberArray(r.token_cost_samples_json),
+    lastUsedAt: r.last_used_at,
+    trustScore: r.trust_score,
+    triggers: parseJsonArray(r.triggers_json),
+    supersedes: parseJsonArray(r.supersedes_json),
+    createdAt: r.created_at,
+    updatedAt: r.updated_at,
+  };
+}
+
+function rowToDecision(r: SkillDecisionRow): SkillDecision {
+  return {
+    id: r.id,
+    skillName: r.skill_name,
+    proposalKind: r.proposal_kind as "draft" | "retire",
+    decision: r.decision as "accepted" | "rejected",
+    decidedBy: r.decided_by as "user" | "auto",
+    rationale: r.rationale,
+    suppressedUntil: r.suppressed_until,
+    createdAt: r.created_at,
+  };
+}
+
+// ─── SkillRecordStore ──────────────────────────────────────────────────────
+
+export class SkillRecordStore {
+  private readonly db: BetterSqliteDatabase;
+
+  constructor(database?: BetterSqliteDatabase) {
+    this.db = database ?? defaultDb();
+  }
+
+  upsert(
+    name: string,
+    fields: Partial<Pick<SkillRecord, "status" | "triggers" | "supersedes" | "trustScore">>,
+  ): void {
+    const now = new Date().toISOString();
+    this.db
+      .prepare(
+        `INSERT INTO skill_records (name, status, triggers_json, supersedes_json, trust_score, created_at, updated_at)
+         VALUES (?, ?, ?, ?, ?, ?, ?)
+         ON CONFLICT(name) DO UPDATE SET
+           status = COALESCE(?, status),
+           triggers_json = COALESCE(?, triggers_json),
+           supersedes_json = COALESCE(?, supersedes_json),
+           trust_score = COALESCE(?, trust_score),
+           updated_at = ?`,
+      )
+      .run(
+        name,
+        fields.status ?? "active",
+        fields.triggers ? JSON.stringify(fields.triggers) : null,
+        fields.supersedes ? JSON.stringify(fields.supersedes) : null,
+        fields.trustScore ?? 1.0,
+        now,
+        now,
+        fields.status ?? null,
+        fields.triggers ? JSON.stringify(fields.triggers) : null,
+        fields.supersedes ? JSON.stringify(fields.supersedes) : null,
+        fields.trustScore ?? null,
+        now,
+      );
+  }
+
+  get(name: string): SkillRecord | null {
+    const row = this.db.prepare("SELECT * FROM skill_records WHERE name = ?").get(name) as
+      | SkillRecordRow
+      | undefined;
+    return row ? rowToRecord(row) : null;
+  }
+
+  listByStatus(status: SkillStatus, limit = 100): SkillRecord[] {
+    const rows = this.db
+      .prepare("SELECT * FROM skill_records WHERE status = ? ORDER BY updated_at DESC LIMIT ?")
+      .all(status, limit) as SkillRecordRow[];
+    return rows.map(rowToRecord);
+  }
+
+  listActive(): SkillRecord[] {
+    return this.listByStatus("active");
+  }
+
+  listAll(limit = 500): SkillRecord[] {
+    const rows = this.db
+      .prepare("SELECT * FROM skill_records ORDER BY name LIMIT ?")
+      .all(limit) as SkillRecordRow[];
+    return rows.map(rowToRecord);
+  }
+
+  recordUsage(event: UsageEvent): void {
+    const now = new Date().toISOString();
+    const existing = this.get(event.skillName);
+    if (!existing) {
+      this.upsert(event.skillName, {});
+    }
+    const samples = existing?.tokenCostSamples ?? [];
+    samples.push(event.tokenCost);
+    if (samples.length > MAX_COST_SAMPLES) {
+      samples.splice(0, samples.length - MAX_COST_SAMPLES);
+    }
+
+    this.db
+      .prepare(
+        `UPDATE skill_records SET
+           usage_count = usage_count + 1,
+           success_count = success_count + CASE WHEN ? THEN 1 ELSE 0 END,
+           token_cost_samples_json = ?,
+           last_used_at = ?,
+           updated_at = ?
+         WHERE name = ?`,
+      )
+      .run(event.success ? 1 : 0, JSON.stringify(samples), now, now, event.skillName);
+  }
+
+  updateStatus(name: string, status: SkillStatus): void {
+    const now = new Date().toISOString();
+    this.db
+      .prepare("UPDATE skill_records SET status = ?, updated_at = ? WHERE name = ?")
+      .run(status, now, name);
+  }
+
+  removeFromIndex(name: string): void {
+    this.updateStatus(name, "retired");
+  }
+}
+
+// ─── Nightly scorer ────────────────────────────────────────────────────────
+
+const HIT_RATE_THRESHOLD = 0.15;
+const SUCCESS_RATE_THRESHOLD = 0.5;
+const MIN_USAGE_FOR_SUCCESS_RATE = 10;
+const SUPPRESSION_DAYS = 30;
+
+export interface ScorerOpts {
+  store: SkillRecordStore;
+  decisionStore: SkillDecisionStore;
+  totalInvocations: number;
+}
+
+export function runNightlyScorer(opts: ScorerOpts): ScorerResult {
+  const { store, decisionStore, totalInvocations } = opts;
+  const active = store.listActive();
+  const result: ScorerResult = { queuedRetire: [], queuedDraft: [], skipped: [] };
+
+  for (const skill of active) {
+    if (decisionStore.isSuppressed(skill.name, "retire")) {
+      result.skipped.push(skill.name);
+      continue;
+    }
+
+    const hitRate = totalInvocations > 0 ? skill.usageCount / totalInvocations : 0;
+    const successRate =
+      skill.usageCount >= MIN_USAGE_FOR_SUCCESS_RATE ? skill.successCount / skill.usageCount : 1.0;
+
+    const isSuperseded = skill.supersedes.length === 0 && isSupersededByOther(skill.name, store);
+
+    if (
+      hitRate < HIT_RATE_THRESHOLD ||
+      (successRate < SUCCESS_RATE_THRESHOLD && skill.usageCount >= MIN_USAGE_FOR_SUCCESS_RATE) ||
+      isSuperseded
+    ) {
+      store.updateStatus(skill.name, "queued_retire");
+      result.queuedRetire.push(skill.name);
+    }
+  }
+
+  return result;
+}
+
+function isSupersededByOther(name: string, store: SkillRecordStore): boolean {
+  const all = store.listActive();
+  return all.some((s) => s.name !== name && s.supersedes.includes(name));
+}
+
+// ─── SkillDecisionStore ────────────────────────────────────────────────────
+
+export class SkillDecisionStore {
+  private readonly db: BetterSqliteDatabase;
+
+  constructor(database?: BetterSqliteDatabase) {
+    this.db = database ?? defaultDb();
+  }
+
+  record(decision: Omit<SkillDecision, "createdAt">): void {
+    const suppressedUntil =
+      decision.decision === "rejected"
+        ? new Date(Date.now() + SUPPRESSION_DAYS * 24 * 60 * 60 * 1000).toISOString()
+        : null;
+
+    this.db
+      .prepare(
+        `INSERT INTO skill_decisions (id, skill_name, proposal_kind, decision, decided_by, rationale, suppressed_until)
+         VALUES (?, ?, ?, ?, ?, ?, ?)`,
+      )
+      .run(
+        decision.id,
+        decision.skillName,
+        decision.proposalKind,
+        decision.decision,
+        decision.decidedBy,
+        decision.rationale ?? null,
+        suppressedUntil,
+      );
+  }
+
+  isSuppressed(skillName: string, proposalKind: string): boolean {
+    const now = new Date().toISOString();
+    const row = this.db
+      .prepare(
+        `SELECT 1 FROM skill_decisions
+         WHERE skill_name = ? AND proposal_kind = ? AND decision = 'rejected'
+           AND suppressed_until > ?
+         LIMIT 1`,
+      )
+      .get(skillName, proposalKind, now);
+    return row !== undefined;
+  }
+
+  listForSkill(skillName: string, limit = 50): SkillDecision[] {
+    const rows = this.db
+      .prepare(
+        "SELECT * FROM skill_decisions WHERE skill_name = ? ORDER BY created_at DESC LIMIT ?",
+      )
+      .all(skillName, limit) as SkillDecisionRow[];
+    return rows.map(rowToDecision);
+  }
+}
+
+// ─── Brainctl adapter ──────────────────────────────────────────────────────
+
+export interface BrainctlDecisionEvent {
+  type: "skill_decision";
+  skillName: string;
+  proposalKind: "draft" | "retire";
+  decision: "accepted" | "rejected";
+  decidedBy: "user" | "auto";
+  rationale: string | null;
+  timestamp: string;
+}
+
+export function toBrainctlDecisionEvent(d: SkillDecision): BrainctlDecisionEvent {
+  return {
+    type: "skill_decision",
+    skillName: d.skillName,
+    proposalKind: d.proposalKind,
+    decision: d.decision,
+    decidedBy: d.decidedBy,
+    rationale: d.rationale,
+    timestamp: d.createdAt,
+  };
+}
+
+/**
+ * Accept a skill lifecycle proposal.
+ * - retire proposals: mark skill as retired, remove from retrieval index
+ * - draft proposals: mark skill as active (promote from draft)
+ */
+export function acceptProposal(opts: {
+  store: SkillRecordStore;
+  decisionStore: SkillDecisionStore;
+  skillName: string;
+  proposalKind: "draft" | "retire";
+  decidedBy: "user" | "auto";
+  rationale?: string;
+}): SkillDecision {
+  const { store, decisionStore, skillName, proposalKind, decidedBy, rationale } = opts;
+  const id = `sd_${Date.now()}_${Math.random().toString(36).slice(2, 10)}`;
+
+  if (proposalKind === "retire") {
+    store.removeFromIndex(skillName);
+  } else {
+    store.updateStatus(skillName, "active");
+  }
+
+  const decision: Omit<SkillDecision, "createdAt"> = {
+    id,
+    skillName,
+    proposalKind,
+    decision: "accepted",
+    decidedBy,
+    rationale: rationale ?? null,
+    suppressedUntil: null,
+  };
+  decisionStore.record(decision);
+
+  return { ...decision, createdAt: new Date().toISOString() };
+}
+
+/**
+ * Reject a skill lifecycle proposal.
+ * Suppresses the same proposal for 30 days.
+ */
+export function rejectProposal(opts: {
+  store: SkillRecordStore;
+  decisionStore: SkillDecisionStore;
+  skillName: string;
+  proposalKind: "draft" | "retire";
+  decidedBy: "user" | "auto";
+  rationale?: string;
+}): SkillDecision {
+  const { store, decisionStore, skillName, proposalKind, decidedBy, rationale } = opts;
+  const id = `sd_${Date.now()}_${Math.random().toString(36).slice(2, 10)}`;
+
+  // Revert to active if it was queued
+  const record = store.get(skillName);
+  if (record && (record.status === "queued_retire" || record.status === "queued_draft")) {
+    store.updateStatus(skillName, record.status === "queued_retire" ? "active" : "draft");
+  }
+
+  const decision: Omit<SkillDecision, "createdAt"> = {
+    id,
+    skillName,
+    proposalKind,
+    decision: "rejected",
+    decidedBy,
+    rationale: rationale ?? null,
+    suppressedUntil: null,
+  };
+  decisionStore.record(decision);
+
+  return { ...decision, createdAt: new Date().toISOString() };
+}
diff --git a/src/cockpit/ink/bridge.ts b/src/cockpit/ink/bridge.ts
new file mode 100644
index 0000000..071aeef
--- /dev/null
+++ b/src/cockpit/ink/bridge.ts
@@ -0,0 +1,89 @@
+/**
+ * Ink renderer bridge — minimal adapter for a future chat-first Ink renderer
+ * to consume CockpitEvent streams from the core EventBus.
+ *
+ * This does NOT replace the classic gamified TUI (`strand tui --classic`).
+ * It provides the plumbing so a chat-oriented Ink UI can subscribe to the
+ * same event stream that the web renderer uses.
+ *
+ * Design constraints (from §7):
+ * - Preserve current gamified TUI as "classic"
+ * - Do not make telemetry default chat context
+ * - Both renderers consume the identical CockpitEvent schema
+ */
+
+import type { SkillRecord } from "../../agent/skills/lifecycle";
+import type { CockpitEvent, CockpitEventType, EventBus } from "../core/events";
+
+export interface InkBridgeOpts {
+  bus: EventBus;
+  filter?: CockpitEventType[];
+}
+
+export type InkEventHandler = (event: CockpitEvent) => void;
+
+export interface SkillReviewItem {
+  skillName: string;
+  proposalKind: "draft" | "retire";
+  proposalId: string;
+  rationale: string;
+  record: SkillRecord | null;
+}
+
+/**
+ * Lightweight bridge that connects the core EventBus to an Ink renderer.
+ *
+ * Usage:
+ *   const bridge = createInkBridge({ bus });
+ *   bridge.onEvent((event) => { ... render in Ink ... });
+ *   // later:
+ *   bridge.destroy();
+ */
+export interface InkBridge {
+  onEvent(handler: InkEventHandler): void;
+  destroy(): void;
+  readonly active: boolean;
+}
+
+export function createInkBridge(opts: InkBridgeOpts): InkBridge {
+  const { bus, filter } = opts;
+  const handlers: InkEventHandler[] = [];
+  let destroyed = false;
+
+  const listener = (event: CockpitEvent): void => {
+    if (destroyed) return;
+    if (filter && !filter.includes(event.t)) return;
+    for (const h of handlers) {
+      h(event);
+    }
+  };
+
+  const unsubscribe = bus.subscribe(listener);
+
+  return {
+    onEvent(handler: InkEventHandler): void {
+      if (destroyed) return;
+      handlers.push(handler);
+    },
+    destroy(): void {
+      if (destroyed) return;
+      destroyed = true;
+      handlers.length = 0;
+      unsubscribe();
+    },
+    get active(): boolean {
+      return !destroyed;
+    },
+  };
+}
+
+/**
+ * Create a bridge filtered to skill-lifecycle events only.
+ * Useful for the skill review feed in the chat-first UI.
+ */
+export function createSkillEventBridge(bus: EventBus): InkBridge {
+  return createInkBridge({
+    bus,
+    filter: ["skill.proposal", "skill.decision"],
+  });
+}
diff --git a/src/cockpit/ink/index.ts b/src/cockpit/ink/index.ts
index 32c21ec..3827819 100644
--- a/src/cockpit/ink/index.ts
+++ b/src/cockpit/ink/index.ts
@@ -4,3 +4,6 @@ export const INK_COCKPIT_RENDERER = {
   name: "ink",
   protocolVersion: COCKPIT_PROTOCOL_VERSION,
 } as const;
+
+export { createInkBridge, createSkillEventBridge } from "./bridge";
+export type { InkBridge, InkBridgeOpts, InkEventHandler, SkillReviewItem } from "./bridge";
diff --git a/src/db/schema.sql b/src/db/schema.sql
index cdf43b3..fcc8c33 100644
--- a/src/db/schema.sql
+++ b/src/db/schema.sql
@@ -234,3 +234,35 @@ CREATE TABLE IF NOT EXISTS error_rates (
 );
 CREATE INDEX IF NOT EXISTS idx_error_rates_hour ON error_rates(hour_bucket);
 CREATE INDEX IF NOT EXISTS idx_error_rates_kind ON error_rates(kind, hour_bucket);
+
+-- Skill lifecycle: executable skill records (§6 of cockpit design spec)
+-- Markdown files in src/agent/skills/*.md are the human-readable source;
+-- this table tracks runtime metrics, trust scores, and lifecycle status.
+CREATE TABLE IF NOT EXISTS skill_records (
+  name TEXT PRIMARY KEY,                 -- matches frontmatter `name`
+  status TEXT NOT NULL DEFAULT 'active', -- active | retired | draft | queued_draft | queued_retire
+  usage_count INTEGER NOT NULL DEFAULT 0,
+  success_count INTEGER NOT NULL DEFAULT 0,
+  token_cost_samples_json TEXT,          -- JSON array of recent token costs for p50/p95
+  last_used_at TEXT,
+  trust_score REAL NOT NULL DEFAULT 1.0, -- 0.0–1.0
+  triggers_json TEXT,                    -- JSON array of trigger strings
+  supersedes_json TEXT,                  -- JSON array of skill names this supersedes
+  created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now')),
+  updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now'))
+);
+CREATE INDEX IF NOT EXISTS idx_skill_records_status ON skill_records(status);
+
+-- Skill lifecycle decisions (accept/reject proposals, brainctl decision events)
+CREATE TABLE IF NOT EXISTS skill_decisions (
+  id TEXT PRIMARY KEY,
+  skill_name TEXT NOT NULL,
+  proposal_kind TEXT NOT NULL,           -- draft | retire
+  decision TEXT NOT NULL,                -- accepted | rejected
+  decided_by TEXT NOT NULL,              -- user | auto
+  rationale TEXT,
+  suppressed_until TEXT,                 -- rejection suppresses re-proposal for 30 days
+  created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ','now'))
+);
+CREATE INDEX IF NOT EXISTS idx_skill_decisions_skill ON skill_decisions(skill_name);
+CREATE INDEX IF NOT EXISTS idx_skill_decisions_suppressed ON skill_decisions(suppressed_until);
diff --git a/tests/agent/skill-lifecycle.test.ts b/tests/agent/skill-lifecycle.test.ts
new file mode 100644
index 0000000..6d0e55c
--- /dev/null
+++ b/tests/agent/skill-lifecycle.test.ts
@@ -0,0 +1,262 @@
+import "../../tests/helpers/env";
+import {
+  type SkillDecision,
+  SkillDecisionStore,
+  SkillRecordStore,
+  acceptProposal,
+  rejectProposal,
+  runNightlyScorer,
+  toBrainctlDecisionEvent,
+  tokenCostP50,
+  tokenCostP95,
+} from "@/agent/skills/lifecycle";
+import type { Database as BetterSqliteDatabase } from "better-sqlite3";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { freshDb } from "../helpers/db";
+
+let d: BetterSqliteDatabase;
+let store: SkillRecordStore;
+let decisionStore: SkillDecisionStore;
+
+beforeEach(() => {
+  d = freshDb();
+  store = new SkillRecordStore(d);
+  decisionStore = new SkillDecisionStore(d);
+});
+
+afterEach(() => {
+  d.close();
+});
+
+describe("SkillRecordStore", () => {
+  it("upserts and retrieves a skill record", () => {
+    store.upsert("test-skill", {
+      status: "active",
+      triggers: ["coding", "refactor"],
+      supersedes: [],
+    });
+    const rec = store.get("test-skill");
+    expect(rec).not.toBeNull();
+    expect(rec?.name).toBe("test-skill");
+    expect(rec?.status).toBe("active");
+    expect(rec?.triggers).toEqual(["coding", "refactor"]);
+    expect(rec?.usageCount).toBe(0);
+    expect(rec?.trustScore).toBe(1.0);
+  });
+
+  it("records usage metrics and tracks success count", () => {
+    store.upsert("test-skill", { status: "active" });
+    store.recordUsage({ skillName: "test-skill", success: true, tokenCost: 500 });
+    store.recordUsage({ skillName: "test-skill", success: true, tokenCost: 800 });
+    store.recordUsage({ skillName: "test-skill", success: false, tokenCost: 200 });
+
+    const rec = store.get("test-skill");
+    expect(rec?.usageCount).toBe(3);
+    expect(rec?.successCount).toBe(2);
+    expect(rec?.tokenCostSamples).toEqual([500, 800, 200]);
+    expect(rec?.lastUsedAt).toBeTruthy();
+  });
+
+  it("lists by status", () => {
+    store.upsert("active-1", { status: "active" });
+    store.upsert("active-2", { status: "active" });
+    store.upsert("retired-1", { status: "retired" });
+    expect(store.listByStatus("active")).toHaveLength(2);
+    expect(store.listByStatus("retired")).toHaveLength(1);
+  });
+
+  it("creates record on first usage if it doesn't exist", () => {
+    store.recordUsage({ skillName: "new-skill", success: true, tokenCost: 100 });
+    const rec = store.get("new-skill");
+    expect(rec).not.toBeNull();
+    expect(rec?.usageCount).toBe(1);
+  });
+});
+
+describe("tokenCost percentiles", () => {
+  it("computes p50 and p95 from samples", () => {
+    const samples = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000];
+    expect(tokenCostP50(samples)).toBe(500);
+    expect(tokenCostP95(samples)).toBe(1000);
+  });
+
+  it("handles empty samples", () => {
+    expect(tokenCostP50([])).toBe(0);
+    expect(tokenCostP95([])).toBe(0);
+  });
+
+  it("handles single sample", () => {
+    expect(tokenCostP50([42])).toBe(42);
+    expect(tokenCostP95([42])).toBe(42);
+  });
+});
+
+describe("nightly scorer", () => {
+  it("queues retire for low-hit skill", () => {
+    store.upsert("low-hit", { status: "active", triggers: ["test"] });
+    store.recordUsage({ skillName: "low-hit", success: true, tokenCost: 100 });
+
+    store.upsert("popular", { status: "active", triggers: ["coding"] });
+    for (let i = 0; i < 50; i++) {
+      store.recordUsage({ skillName: "popular", success: true, tokenCost: 100 });
+    }
+
+    const result = runNightlyScorer({
+      store,
+      decisionStore,
+      totalInvocations: 100,
+    });
+
+    expect(result.queuedRetire).toContain("low-hit");
+    expect(result.queuedRetire).not.toContain("popular");
+
+    const rec = store.get("low-hit");
+    expect(rec?.status).toBe("queued_retire");
+  });
+
+  it("queues retire for low success rate skill (n >= 10)", () => {
+    store.upsert("bad-skill", { status: "active", triggers: ["test"] });
+    for (let i = 0; i < 10; i++) {
+      store.recordUsage({
+        skillName: "bad-skill",
+        success: i < 3,
+        tokenCost: 100,
+      });
+    }
+
+    const result = runNightlyScorer({
+      store,
+      decisionStore,
+      totalInvocations: 10,
+    });
+
+    expect(result.queuedRetire).toContain("bad-skill");
+  });
+
+  it("queues retire for superseded skill", () => {
+    store.upsert("old-skill", { status: "active", triggers: ["coding"] });
+    store.upsert("new-skill", {
+      status: "active",
+      triggers: ["coding"],
+      supersedes: ["old-skill"],
+    });
+    for (let i = 0; i < 20; i++) {
+      store.recordUsage({ skillName: "old-skill", success: true, tokenCost: 100 });
+      store.recordUsage({ skillName: "new-skill", success: true, tokenCost: 100 });
+    }
+
+    const result = runNightlyScorer({
+      store,
+      decisionStore,
+      totalInvocations: 40,
+    });
+
+    expect(result.queuedRetire).toContain("old-skill");
+    expect(result.queuedRetire).not.toContain("new-skill");
+  });
+
+  it("skips skills with suppressed retire proposals", () => {
+    store.upsert("suppressed-skill", { status: "active", triggers: ["test"] });
+    store.recordUsage({ skillName: "suppressed-skill", success: true, tokenCost: 100 });
+
+    rejectProposal({
+      store,
+      decisionStore,
+      skillName: "suppressed-skill",
+      proposalKind: "retire",
+      decidedBy: "user",
+      rationale: "keep it",
+    });
+    // Manually reset back to active since rejectProposal doesn't change active→active
+    store.updateStatus("suppressed-skill", "active");
+
+    const result = runNightlyScorer({
+      store,
+      decisionStore,
+      totalInvocations: 100,
+    });
+
+    expect(result.skipped).toContain("suppressed-skill");
+    expect(result.queuedRetire).not.toContain("suppressed-skill");
+  });
+});
+
+describe("accept / reject proposals", () => {
+  it("accept retire removes skill from retrieval index", () => {
+    store.upsert("to-retire", { status: "queued_retire", triggers: ["test"] });
+
+    const decision = acceptProposal({
+      store,
+      decisionStore,
+      skillName: "to-retire",
+      proposalKind: "retire",
+      decidedBy: "user",
+      rationale: "low usage",
+    });
+
+    expect(decision.decision).toBe("accepted");
+
+    const rec = store.get("to-retire");
+    expect(rec?.status).toBe("retired");
+
+    const active = store.listActive();
+    expect(active.find((s) => s.name === "to-retire")).toBeUndefined();
+  });
+
+  it("accept draft promotes to active", () => {
+    store.upsert("new-draft", { status: "queued_draft", triggers: ["review"] });
+
+    acceptProposal({
+      store,
+      decisionStore,
+      skillName: "new-draft",
+      proposalKind: "draft",
+      decidedBy: "user",
+    });
+
+    const rec = store.get("new-draft");
+    expect(rec?.status).toBe("active");
+  });
+
+  it("reject suppresses same proposal for 30 days", () => {
+    store.upsert("keep-me", { status: "queued_retire", triggers: ["test"] });
+
+    rejectProposal({
+      store,
+      decisionStore,
+      skillName: "keep-me",
+      proposalKind: "retire",
+      decidedBy: "user",
+      rationale: "still useful",
+    });
+
+    expect(decisionStore.isSuppressed("keep-me", "retire")).toBe(true);
+    expect(decisionStore.isSuppressed("keep-me", "draft")).toBe(false);
+
+    const rec = store.get("keep-me");
+    expect(rec?.status).toBe("active");
+  });
+});
+
+describe("brainctl decision events", () => {
+  it("converts a SkillDecision to a brainctl event shape", () => {
+    const decision: SkillDecision = {
+      id: "sd_123",
+      skillName: "test-skill",
+      proposalKind: "retire",
+      decision: "accepted",
+      decidedBy: "user",
+      rationale: "low hit rate",
+      suppressedUntil: null,
+      createdAt: "2026-04-24T12:00:00.000Z",
+    };
+
+    const event = toBrainctlDecisionEvent(decision);
+    expect(event.type).toBe("skill_decision");
+    expect(event.skillName).toBe("test-skill");
+    expect(event.proposalKind).toBe("retire");
+    expect(event.decision).toBe("accepted");
+    expect(event.decidedBy).toBe("user");
+    expect(event.timestamp).toBe("2026-04-24T12:00:00.000Z");
+  });
+});
diff --git a/tests/cockpit/ink-bridge.test.ts b/tests/cockpit/ink-bridge.test.ts
new file mode 100644
index 0000000..1bf43e0
--- /dev/null
+++ b/tests/cockpit/ink-bridge.test.ts
@@ -0,0 +1,119 @@
+import { type CockpitEvent, EventBus } from "@/cockpit/core/events";
+import { createInkBridge, createSkillEventBridge } from "@/cockpit/ink/bridge";
+import { describe, expect, it } from "vitest";
+
+describe("InkBridge", () => {
+  it("forwards events from the EventBus to handlers", () => {
+    const bus = new EventBus();
+    const bridge = createInkBridge({ bus });
+    const seen: CockpitEvent[] = [];
+
+    bridge.onEvent((e) => seen.push(e));
+
+    bus.publish({
+      t: "transcript.append",
+      sessionId: "s1",
+      message: { id: "m1", role: "user", content: "hello" },
+    });
+
+    expect(seen).toHaveLength(1);
+    expect(seen[0]?.t).toBe("transcript.append");
+  });
+
+  it("filters events by type when filter is provided", () => {
+    const bus = new EventBus();
+    const bridge = createInkBridge({ bus, filter: ["budget.warn"] });
+    const seen: CockpitEvent[] = [];
+
+    bridge.onEvent((e) => seen.push(e));
+
+    bus.publish({
+      t: "transcript.append",
+      sessionId: "s1",
+      message: { id: "m1", role: "user", content: "hello" },
+    });
+    bus.publish({
+      t: "budget.warn",
+      sessionId: "s1",
+      dimension: "tokens",
+      used: 45_000,
+      cap: 50_000,
+    });
+
+    expect(seen).toHaveLength(1);
+    expect(seen[0]?.t).toBe("budget.warn");
+  });
+
+  it("stops forwarding after destroy()", () => {
+    const bus = new EventBus();
+    const bridge = createInkBridge({ bus });
+    const seen: CockpitEvent[] = [];
+
+    bridge.onEvent((e) => seen.push(e));
+    expect(bridge.active).toBe(true);
+
+    bridge.destroy();
+    expect(bridge.active).toBe(false);
+
+    bus.publish({
+      t: "transcript.append",
+      sessionId: "s1",
+      message: { id: "m1", role: "user", content: "hello" },
+    });
+
+    expect(seen).toHaveLength(0);
+  });
+
+  it("does not register handlers after destroy", () => {
+    const bus = new EventBus();
+    const bridge = createInkBridge({ bus });
+    bridge.destroy();
+
+    const seen: CockpitEvent[] = [];
+    bridge.onEvent((e) => seen.push(e));
+
+    bus.publish({
+      t: "budget.warn",
+      sessionId: "s1",
+      dimension: "tokens",
+      used: 100,
+      cap: 200,
+    });
+
+    expect(seen).toHaveLength(0);
+  });
+});
+
+describe("createSkillEventBridge", () => {
+  it("only passes skill.proposal and skill.decision events", () => {
+    const bus = new EventBus();
+    const bridge = createSkillEventBridge(bus);
+    const seen: CockpitEvent[] = [];
+
+    bridge.onEvent((e) => seen.push(e));
+
+    bus.publish({
+      t: "transcript.append",
+      sessionId: "s1",
+      message: { id: "m1", role: "user", content: "hello" },
+    });
+    bus.publish({
+      t: "skill.proposal",
+      proposalId: "sp1",
+      kind: "retire",
+      payload: { rationale: "low hit rate" },
+    });
+    bus.publish({
+      t: "skill.decision",
+      proposalId: "sp1",
+      decision: "accepted",
+      by: "user",
+    });
+
+    expect(seen).toHaveLength(2);
+    expect(seen[0]?.t).toBe("skill.proposal");
+    expect(seen[1]?.t).toBe("skill.decision");
+
+    bridge.destroy();
+  });
+});

From 23cc91536eeda07533a74662267b8112e4969542 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 24 Apr 2026 19:11:06 +0000
Subject: [PATCH 6/8] feat(S4): cli-process subagent backend, parsers, seed
 skills, budget/depth enforcement
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- cli-process backend with oneshot mode, CockpitEvent normalization
- Parsers: claude-code-stream, codex-exec, raw-text fallback
- --bare flag contract: never with oauth_external, default with api_key (HC#7)
- maxDepth 3, maxConcurrentChildren 3, heartbeat 30s, stale 10min
- Seed skills: claude-code.md, codex.md, pr-review.md (frontmatter per §6)
- 45 tests covering parsers, --bare logic, budget, depth, concurrency

Co-Authored-By: Terrence Schonleber <TVschonleber@gmail.com>
---
 src/agent/skills/claude-code.md             |  54 ++++
 src/agent/skills/codex.md                   |  50 ++++
 src/agent/skills/pr-review.md               |  52 ++++
 src/cockpit/subagents/cli-process.ts        | 316 ++++++++++++++++++++
 src/cockpit/subagents/index.ts              |  15 +
 src/cockpit/subagents/parsers.ts            | 189 ++++++++++++
 tests/cockpit/subagents/budget.test.ts      |  74 +++++
 tests/cockpit/subagents/cli-process.test.ts | 280 +++++++++++++++++
 tests/cockpit/subagents/parsers.test.ts     | 165 ++++++++++
 9 files changed, 1195 insertions(+)
 create mode 100644 src/agent/skills/claude-code.md
 create mode 100644 src/agent/skills/codex.md
 create mode 100644 src/agent/skills/pr-review.md
 create mode 100644 src/cockpit/subagents/cli-process.ts
 create mode 100644 src/cockpit/subagents/index.ts
 create mode 100644 src/cockpit/subagents/parsers.ts
 create mode 100644 tests/cockpit/subagents/budget.test.ts
 create mode 100644 tests/cockpit/subagents/cli-process.test.ts
 create mode 100644 tests/cockpit/subagents/parsers.test.ts

diff --git a/src/agent/skills/claude-code.md b/src/agent/skills/claude-code.md
new file mode 100644
index 0000000..67f807a
--- /dev/null
+++ b/src/agent/skills/claude-code.md
@@ -0,0 +1,54 @@
+---
+name: claude-code
+description: Delegate coding tasks to Claude Code CLI
+version: 1.0.0
+triggers: ["coding", "refactor", "review", "PR", "debug", "implement"]
+backend: cli-process
+spawn_spec:
+  cmd: claude
+  args: ["-p", "--output-format", "stream-json", "--verbose"]
+  parser: claude-code-stream
+tools_allowed: [Read, Edit, Bash, Write]
+budget: { tokens: 50000, usdTicks: 2000000, wallClockMs: 300000 }
+---
+
+# Claude Code
+
+Delegate a coding task to Claude Code running as a CLI subprocess.
+
+## When to use
+
+- The task requires reading, editing, or creating files in a repository.
+- The task involves debugging, refactoring, or implementing features.
+- You need a capable coding agent with file system and shell access.
+
+## Invocation
+
+The cockpit spawns `claude -p "<task>"` with `--output-format stream-json`
+for structured streaming output. In BYOK (api_key) mode, `--bare` is
+added automatically for fastest startup and lowest overhead. In
+`oauth_external` mode, `--bare` is never passed (hard constraint #7).
+
+## Allowed tools
+
+The subagent is restricted to: `Read`, `Edit`, `Bash`, `Write`.
+Additional tools can be granted per-spawn via `--allowedTools`.
+
+## Budget
+
+Default: 50k tokens, $0.002 USD, 5 min wall clock. Child budget is
+capped at half the parent's remaining budget on all dimensions.
+
+## Output
+
+Stdout is parsed as newline-delimited JSON (`stream-json` format).
+Events are normalized into CockpitEvent schema:
+- Content deltas -> `subagent.event` kind `stdout`
+- System/retry notices -> `subagent.event` kind `status`
+- Terminal result -> `subagent.end` with cost metadata
+
+## Notes
+
+- `--max-turns 10` is a sensible default for bounded tasks.
+- `--max-budget-usd 2.00` prevents runaway spend on a single delegation.
+- Interactive mode (tmux wrapping) is planned but not yet implemented.
diff --git a/src/agent/skills/codex.md b/src/agent/skills/codex.md
new file mode 100644
index 0000000..f3cb2d6
--- /dev/null
+++ b/src/agent/skills/codex.md
@@ -0,0 +1,50 @@
+---
+name: codex
+description: Delegate coding tasks to OpenAI Codex CLI
+version: 1.0.0
+triggers: ["coding", "implement", "fix", "generate", "scaffold"]
+backend: cli-process
+spawn_spec:
+  cmd: codex
+  args: ["exec", "--json"]
+  parser: codex-exec
+tools_allowed: [Read, Edit, Bash]
+budget: { tokens: 50000, usdTicks: 2000000, wallClockMs: 300000 }
+---
+
+# Codex CLI
+
+Delegate a coding task to OpenAI's Codex CLI agent.
+
+## When to use
+
+- The task requires generating, editing, or scaffolding code.
+- You want to use OpenAI models for the subtask.
+- The task is well-scoped and can run as a oneshot execution.
+
+## Invocation
+
+The cockpit spawns `codex exec --json "<task>"` for structured output.
+The task description is passed as stdin in oneshot mode.
+
+## Allowed tools
+
+The subagent is restricted to: `Read`, `Edit`, `Bash`.
+
+## Budget
+
+Default: 50k tokens, $0.002 USD, 5 min wall clock. Child budget is
+capped at half the parent's remaining budget on all dimensions.
+
+## Output
+
+Stdout is parsed as newline-delimited JSON when available. The parser
+falls back to raw-text passthrough if the JSON format is unstable.
+Events are normalized into CockpitEvent schema.
+
+## Known risks
+
+- `codex exec --json` output format is not fully stable. The parser
+  includes a raw-text fallback for resilience.
+- Version mismatches may change the JSON schema without notice. Monitor
+  parser errors and fall back to raw-text if needed (kill switch S4).
diff --git a/src/agent/skills/pr-review.md b/src/agent/skills/pr-review.md
new file mode 100644
index 0000000..80f2f84
--- /dev/null
+++ b/src/agent/skills/pr-review.md
@@ -0,0 +1,52 @@
+---
+name: pr-review
+description: Review a pull request for correctness, security, and style
+version: 1.0.0
+triggers: ["review", "PR", "pull request", "code review"]
+backend: cli-process
+spawn_spec:
+  cmd: claude
+  args: ["-p", "--output-format", "stream-json", "--verbose", "--max-turns", "5"]
+  parser: claude-code-stream
+tools_allowed: [Read, Bash]
+budget: { tokens: 30000, usdTicks: 1500000, wallClockMs: 180000 }
+---
+
+# PR Review
+
+Review a pull request for correctness, security, and performance issues.
+
+## When to use
+
+- A pull request needs review before merge.
+- You want an automated first pass on code quality.
+- The operator asks you to review changes in a branch or PR.
+
+## Invocation
+
+The cockpit spawns Claude Code with a review-focused prompt. The task
+should include the PR URL, branch name, or diff context. The subagent
+reads the relevant files and provides structured feedback.
+
+## Allowed tools
+
+Read-only: `Read` for file inspection, `Bash` for git commands
+(`git diff`, `git log`, `git show`). No write tools.
+
+## Budget
+
+Tighter than general coding: 30k tokens, $0.0015 USD, 3 min wall clock.
+Reviews should be fast and focused.
+
+## Review checklist
+
+The subagent should evaluate:
+1. **Correctness** — does the code do what it claims?
+2. **Security** — no exposed secrets, injection vectors, or auth bypasses.
+3. **Performance** — no N+1 queries, unbounded loops, or memory leaks.
+4. **Style** — follows existing conventions (skip nitpicks).
+
+## Output
+
+Structured review comments normalized through the Claude Code stream
+parser into CockpitEvent schema.
diff --git a/src/cockpit/subagents/cli-process.ts b/src/cockpit/subagents/cli-process.ts
new file mode 100644
index 0000000..71a88ab
--- /dev/null
+++ b/src/cockpit/subagents/cli-process.ts
@@ -0,0 +1,316 @@
+/**
+ * cli-process subagent backend.
+ *
+ * Spawns CLI tools (claude, codex, etc.) as child processes, pipes their
+ * output through a StreamParser, and normalizes everything into CockpitEvents.
+ *
+ * Oneshot mode: stdin closed after task, process runs to completion.
+ * Interactive mode: TODO — tmux wrapping per hermes pattern.
+ */
+
+import { type ChildProcess, spawn } from "node:child_process";
+import { randomUUID } from "node:crypto";
+import { createInterface } from "node:readline";
+
+import { type DefaultBudget, createBudget } from "@/agent/budget";
+import type { Budget, BudgetLimits } from "@/agent/types";
+import { defaultChildBudgetLimits } from "../core/budget";
+import type { CockpitEvent } from "../core/events";
+import {
+  DEFAULT_MAX_CONCURRENT_CHILDREN,
+  DEFAULT_SUBAGENT_HEARTBEAT_MS,
+  DEFAULT_SUBAGENT_STALE_MS,
+  MAX_SUBAGENT_DEPTH,
+  type SpawnSpec,
+  type Subagent,
+  type SubagentHandle,
+  type SubagentStatus,
+} from "../core/subagents";
+import { type StreamParser, createParser } from "./parsers";
+
+export type AuthMode = "api_key" | "oauth_external" | "oauth_device_code";
+
+export interface CliProcessBackendOptions {
+  authMode?: AuthMode;
+  maxConcurrentChildren?: number;
+  parentBudget?: Budget;
+}
+
+export class CliProcessBackend implements Subagent {
+  readonly id: string;
+  readonly backend = "cli-process" as const;
+
+  private readonly authMode: AuthMode;
+  private readonly maxConcurrentChildren: number;
+  private readonly parentBudget: Budget;
+  private readonly activeChildren = new Map<string, CliProcessHandle>();
+
+  constructor(options: CliProcessBackendOptions = {}) {
+    this.id = `cli-process-${randomUUID().slice(0, 8)}`;
+    this.authMode = options.authMode ?? "api_key";
+    this.maxConcurrentChildren = options.maxConcurrentChildren ?? DEFAULT_MAX_CONCURRENT_CHILDREN;
+    this.parentBudget = options.parentBudget ?? createBudget();
+  }
+
+  async spawn(spec: SpawnSpec): Promise<SubagentHandle> {
+    const depth = spec.depth ?? 0;
+    if (depth > MAX_SUBAGENT_DEPTH) {
+      throw new Error(`Subagent depth ${depth} exceeds maximum ${MAX_SUBAGENT_DEPTH}`);
+    }
+
+    if (this.activeChildren.size >= this.maxConcurrentChildren) {
+      throw new Error(`Concurrent children limit reached (${this.maxConcurrentChildren})`);
+    }
+
+    const mode = spec.mode ?? "oneshot";
+    if (mode === "interactive") {
+      // TODO: tmux wrapping for interactive mode per hermes pattern
+      throw new Error("Interactive mode not yet implemented — use oneshot");
+    }
+
+    const cmd = spec.cmd;
+    if (!cmd) {
+      throw new Error("SpawnSpec.cmd is required for cli-process backend");
+    }
+
+    const args = resolveArgs(spec, this.authMode);
+    const parser = createParser(spec.parser ?? "raw-text");
+
+    const childLimits: BudgetLimits = spec.budget
+      ? { ...defaultChildBudgetLimits(this.parentBudget), ...spec.budget }
+      : defaultChildBudgetLimits(this.parentBudget);
+    const childBudget = (this.parentBudget as DefaultBudget).fork(childLimits);
+
+    const subagentId = `subagent-${randomUUID().slice(0, 8)}`;
+    const handle = new CliProcessHandle(subagentId, cmd, args, spec.task, parser, childBudget);
+
+    this.activeChildren.set(subagentId, handle);
+    handle.onDone(() => this.activeChildren.delete(subagentId));
+    handle.start();
+
+    return handle;
+  }
+
+  activeCount(): number {
+    return this.activeChildren.size;
+  }
+}
+
+// ─── Handle ─────────────────────────────────────────────────────────────────
+
+class CliProcessHandle implements SubagentHandle {
+  private process: ChildProcess | null = null;
+  private state: SubagentStatus["state"] = "queued";
+  private exitCode: number | undefined;
+  private readonly eventQueue: CockpitEvent[] = [];
+  private resolveWaiter: (() => void) | null = null;
+  private done = false;
+  private doneCallbacks: Array<() => void> = [];
+  private heartbeatTimer: ReturnType<typeof setInterval> | null = null;
+  private staleTimer: ReturnType<typeof setTimeout> | null = null;
+  private lastActivityAt: number = Date.now();
+  private startedAt: string | undefined;
+  private endedAt: string | undefined;
+
+  constructor(
+    private readonly subagentId: string,
+    private readonly cmd: string,
+    private readonly args: readonly string[],
+    private readonly task: string,
+    private readonly parser: StreamParser,
+    readonly budget: Budget,
+  ) {}
+
+  onDone(cb: () => void): void {
+    if (this.done) {
+      cb();
+    } else {
+      this.doneCallbacks.push(cb);
+    }
+  }
+
+  start(): void {
+    this.state = "running";
+    this.startedAt = new Date().toISOString();
+
+    // Spawn event
+    this.pushEvent({
+      t: "subagent.spawn",
+      subagentId: this.subagentId,
+      backend: "cli-process",
+      parentSessionId: "",
+    });
+
+    const proc = spawn(this.cmd, [...this.args], {
+      stdio: ["pipe", "pipe", "pipe"],
+      env: { ...process.env },
+    });
+    this.process = proc;
+
+    // Send task as stdin for oneshot mode, then close
+    if (this.task) {
+      proc.stdin?.write(this.task);
+      proc.stdin?.end();
+    }
+
+    // Parse stdout
+    if (proc.stdout) {
+      const rl = createInterface({ input: proc.stdout });
+      rl.on("line", (line) => {
+        this.lastActivityAt = Date.now();
+        const { events } = this.parser.parseLine(this.subagentId, line);
+        for (const e of events) this.pushEvent(e);
+      });
+    }
+
+    // Stderr as raw events
+    if (proc.stderr) {
+      const rl = createInterface({ input: proc.stderr });
+      rl.on("line", (line) => {
+        this.lastActivityAt = Date.now();
+        this.pushEvent({
+          t: "subagent.event",
+          subagentId: this.subagentId,
+          kind: "stderr",
+          chunk: line,
+        });
+      });
+    }
+
+    proc.on("close", (code) => {
+      this.exitCode = code ?? 1;
+      const { events } = this.parser.finalize(this.subagentId, this.exitCode);
+      for (const e of events) this.pushEvent(e);
+      this.finish(code === 0 ? "completed" : "failed");
+    });
+
+    proc.on("error", (err) => {
+      this.pushEvent({
+        t: "subagent.event",
+        subagentId: this.subagentId,
+        kind: "stderr",
+        chunk: err.message,
+      });
+      this.finish("failed");
+    });
+
+    // Heartbeat + stale detection
+    this.heartbeatTimer = setInterval(() => {
+      this.pushEvent({
+        t: "subagent.event",
+        subagentId: this.subagentId,
+        kind: "status",
+        chunk: "heartbeat",
+      });
+    }, DEFAULT_SUBAGENT_HEARTBEAT_MS);
+
+    this.staleTimer = setTimeout(() => {
+      if (Date.now() - this.lastActivityAt >= DEFAULT_SUBAGENT_STALE_MS) {
+        this.cancel();
+      }
+    }, DEFAULT_SUBAGENT_STALE_MS);
+  }
+
+  async send(input: string): Promise<void> {
+    if (!this.process?.stdin?.writable) {
+      throw new Error("Cannot send input — process stdin is not writable");
+    }
+    this.process.stdin.write(input);
+  }
+
+  async status(): Promise<SubagentStatus> {
+    const s: SubagentStatus = { state: this.state };
+    if (this.startedAt !== undefined) s.startedAt = this.startedAt;
+    if (this.endedAt !== undefined) s.endedAt = this.endedAt;
+    if (this.exitCode !== undefined) s.exit = this.exitCode;
+    return s;
+  }
+
+  async cancel(): Promise<void> {
+    if (this.process && !this.done) {
+      this.process.kill("SIGTERM");
+      this.finish("cancelled");
+    }
+  }
+
+  get events(): AsyncIterable<CockpitEvent> {
+    const self = this;
+    return {
+      [Symbol.asyncIterator]() {
+        return {
+          async next(): Promise<IteratorResult<CockpitEvent>> {
+            while (true) {
+              if (self.eventQueue.length > 0) {
+                const event = self.eventQueue.shift();
+                if (event !== undefined) return { value: event, done: false };
+              }
+              if (self.done) {
+                return { value: undefined as unknown as CockpitEvent, done: true };
+              }
+              await new Promise<void>((resolve) => {
+                self.resolveWaiter = resolve;
+              });
+            }
+          },
+        };
+      },
+    };
+  }
+
+  private pushEvent(event: CockpitEvent): void {
+    this.eventQueue.push(event);
+    if (this.resolveWaiter) {
+      const resolve = this.resolveWaiter;
+      this.resolveWaiter = null;
+      resolve();
+    }
+  }
+
+  private finish(state: SubagentStatus["state"]): void {
+    if (this.done) return;
+    this.done = true;
+    this.state = state;
+    this.endedAt = new Date().toISOString();
+
+    if (this.heartbeatTimer) clearInterval(this.heartbeatTimer);
+    if (this.staleTimer) clearTimeout(this.staleTimer);
+
+    // Wake async iterator
+    if (this.resolveWaiter) {
+      const resolve = this.resolveWaiter;
+      this.resolveWaiter = null;
+      resolve();
+    }
+
+    for (const cb of this.doneCallbacks) cb();
+    this.doneCallbacks = [];
+  }
+}
+
+// ─── Arg resolution ─────────────────────────────────────────────────────────
+
+/**
+ * Resolve CLI args for a SpawnSpec, applying hard constraint #7:
+ * - Never pass --bare when auth mode is oauth_external.
+ * - In BYOK (api_key) Anthropic mode, --bare is default for subagent spawns.
+ */
+export function resolveArgs(spec: SpawnSpec, authMode: AuthMode): readonly string[] {
+  const args = [...(spec.args ?? [])];
+
+  const isClaude = spec.cmd === "claude";
+  if (!isClaude) return args;
+
+  const hasBare = args.includes("--bare");
+
+  if (authMode === "oauth_external") {
+    // Hard constraint #7: never pass --bare with oauth_external
+    return args.filter((a) => a !== "--bare");
+  }
+
+  if (authMode === "api_key" && !hasBare) {
+    // BYOK mode: --bare is default for subagent spawns
+    return ["--bare", ...args];
+  }
+
+  return args;
+}
diff --git a/src/cockpit/subagents/index.ts b/src/cockpit/subagents/index.ts
new file mode 100644
index 0000000..a5fdb1b
--- /dev/null
+++ b/src/cockpit/subagents/index.ts
@@ -0,0 +1,15 @@
+export {
+  CliProcessBackend,
+  resolveArgs,
+  type AuthMode,
+  type CliProcessBackendOptions,
+} from "./cli-process";
+export {
+  type StreamParser,
+  type ParsedChunk,
+  RawTextParser,
+  ClaudeCodeStreamParser,
+  CodexExecParser,
+  createParser,
+  availableParsers,
+} from "./parsers";
diff --git a/src/cockpit/subagents/parsers.ts b/src/cockpit/subagents/parsers.ts
new file mode 100644
index 0000000..5ca45f9
--- /dev/null
+++ b/src/cockpit/subagents/parsers.ts
@@ -0,0 +1,189 @@
+/**
+ * Stream parsers for cli-process subagent backends.
+ *
+ * Each parser transforms child process output (stdout/stderr lines) into
+ * normalized CockpitEvents (`subagent.event` / `subagent.end`).
+ */
+
+import type { CockpitEvent } from "../core/events";
+
+export interface ParsedChunk {
+  events: CockpitEvent[];
+}
+
+export interface StreamParser {
+  readonly name: string;
+  /** Feed a single line of stdout. Returns zero or more CockpitEvents. */
+  parseLine(subagentId: string, line: string): ParsedChunk;
+  /** Called when the process exits. Returns final events if any. */
+  finalize(subagentId: string, exitCode: number): ParsedChunk;
+}
+
+// ─── Raw-text fallback ──────────────────────────────────────────────────────
+
+export class RawTextParser implements StreamParser {
+  readonly name = "raw-text";
+
+  parseLine(subagentId: string, line: string): ParsedChunk {
+    return {
+      events: [{ t: "subagent.event", subagentId, kind: "stdout", chunk: line }],
+    };
+  }
+
+  finalize(subagentId: string, exitCode: number): ParsedChunk {
+    return {
+      events: [{ t: "subagent.end", subagentId, ok: exitCode === 0, exit: exitCode }],
+    };
+  }
+}
+
+// ─── Claude Code stream-json parser ─────────────────────────────────────────
+//
+// `claude -p --output-format stream-json` emits newline-delimited JSON.
+// Each object has a `type` field. We map:
+//   - type containing "content_block_delta" or text content -> subagent.event stdout
+//   - type "system" or containing "api_retry"               -> subagent.event status
+//   - type "result" (terminal)                              -> subagent.end
+//   - everything else                                       -> subagent.event stdout
+//
+// The result event carries session_id, num_turns, total_cost_usd which we
+// include as the subagent.end exit payload.
+
+export class ClaudeCodeStreamParser implements StreamParser {
+  readonly name = "claude-code-stream";
+
+  parseLine(subagentId: string, line: string): ParsedChunk {
+    const trimmed = line.trim();
+    if (trimmed.length === 0) return { events: [] };
+
+    let parsed: Record<string, unknown>;
+    try {
+      parsed = JSON.parse(trimmed) as Record<string, unknown>;
+    } catch {
+      return {
+        events: [{ t: "subagent.event", subagentId, kind: "stdout", chunk: line }],
+      };
+    }
+
+    const type = typeof parsed["type"] === "string" ? parsed["type"] : "";
+
+    if (type === "result") {
+      const ok = typeof parsed["is_error"] === "boolean" ? !parsed["is_error"] : true;
+      return {
+        events: [{ t: "subagent.end", subagentId, ok, exit: ok ? 0 : 1 }],
+      };
+    }
+
+    if (type === "system" || type.includes("api_retry")) {
+      const msg =
+        typeof parsed["message"] === "string" ? parsed["message"] : JSON.stringify(parsed);
+      return {
+        events: [{ t: "subagent.event", subagentId, kind: "status", chunk: msg }],
+      };
+    }
+
+    // Content deltas, assistant messages, and everything else -> stdout
+    const chunk = extractTextContent(parsed) ?? JSON.stringify(parsed);
+    return {
+      events: [{ t: "subagent.event", subagentId, kind: "stdout", chunk }],
+    };
+  }
+
+  finalize(subagentId: string, exitCode: number): ParsedChunk {
+    return {
+      events: [{ t: "subagent.end", subagentId, ok: exitCode === 0, exit: exitCode }],
+    };
+  }
+}
+
+// ─── Codex exec parser ──────────────────────────────────────────────────────
+//
+// `codex exec --json` stability is uncertain. This parser attempts JSON
+// parsing per line. If the output isn't valid JSON, it falls back to raw text.
+// TODO: Revisit when codex CLI stabilizes its JSON output format.
+
+export class CodexExecParser implements StreamParser {
+  readonly name = "codex-exec";
+
+  parseLine(subagentId: string, line: string): ParsedChunk {
+    const trimmed = line.trim();
+    if (trimmed.length === 0) return { events: [] };
+
+    let parsed: Record<string, unknown>;
+    try {
+      parsed = JSON.parse(trimmed) as Record<string, unknown>;
+    } catch {
+      // Raw-text fallback when JSON isn't stable
+      return {
+        events: [{ t: "subagent.event", subagentId, kind: "stdout", chunk: line }],
+      };
+    }
+
+    const type = typeof parsed["type"] === "string" ? parsed["type"] : "";
+
+    if (type === "completed" || type === "done") {
+      const ok = typeof parsed["exit_code"] === "number" ? parsed["exit_code"] === 0 : true;
+      const exit = typeof parsed["exit_code"] === "number" ? parsed["exit_code"] : 0;
+      return {
+        events: [{ t: "subagent.end", subagentId, ok, exit }],
+      };
+    }
+
+    if (type === "error") {
+      const msg =
+        typeof parsed["message"] === "string" ? parsed["message"] : JSON.stringify(parsed);
+      return {
+        events: [{ t: "subagent.event", subagentId, kind: "stderr", chunk: msg }],
+      };
+    }
+
+    // Status updates, progress, etc.
+    const chunk = extractTextContent(parsed) ?? JSON.stringify(parsed);
+    return {
+      events: [{ t: "subagent.event", subagentId, kind: "stdout", chunk }],
+    };
+  }
+
+  finalize(subagentId: string, exitCode: number): ParsedChunk {
+    return {
+      events: [{ t: "subagent.end", subagentId, ok: exitCode === 0, exit: exitCode }],
+    };
+  }
+}
+
+// ─── Parser registry ────────────────────────────────────────────────────────
+
+const PARSERS: Record<string, () => StreamParser> = {
+  "raw-text": () => new RawTextParser(),
+  "claude-code-stream": () => new ClaudeCodeStreamParser(),
+  "codex-exec": () => new CodexExecParser(),
+};
+
+export function createParser(name: string): StreamParser {
+  const factory = PARSERS[name];
+  if (!factory) {
+    throw new Error(`Unknown parser: ${name}. Available: ${Object.keys(PARSERS).join(", ")}`);
+  }
+  return factory();
+}
+
+export function availableParsers(): readonly string[] {
+  return Object.keys(PARSERS);
+}
+
+// ─── Helpers ────────────────────────────────────────────────────────────────
+
+function extractTextContent(obj: Record<string, unknown>): string | undefined {
+  // Try common content fields across LLM CLI outputs
+  if (typeof obj["content"] === "string") return obj["content"];
+  if (typeof obj["text"] === "string") return obj["text"];
+  if (typeof obj["message"] === "string") return obj["message"];
+  if (
+    typeof obj["delta"] === "object" &&
+    obj["delta"] !== null &&
+    typeof (obj["delta"] as Record<string, unknown>)["text"] === "string"
+  ) {
+    return (obj["delta"] as Record<string, unknown>)["text"] as string;
+  }
+  return undefined;
+}
diff --git a/tests/cockpit/subagents/budget.test.ts b/tests/cockpit/subagents/budget.test.ts
new file mode 100644
index 0000000..0bbcf02
--- /dev/null
+++ b/tests/cockpit/subagents/budget.test.ts
@@ -0,0 +1,74 @@
+import { createBudget } from "@/agent/budget";
+import { DEFAULT_COCKPIT_BUDGET_LIMITS, defaultChildBudgetLimits } from "@/cockpit/core/budget";
+import {
+  DEFAULT_MAX_CONCURRENT_CHILDREN,
+  DEFAULT_SUBAGENT_HEARTBEAT_MS,
+  DEFAULT_SUBAGENT_STALE_MS,
+  MAX_SUBAGENT_DEPTH,
+} from "@/cockpit/core/subagents";
+import { describe, expect, it } from "vitest";
+
+describe("cockpit budget defaults", () => {
+  it("has correct default limits from spec", () => {
+    expect(DEFAULT_COCKPIT_BUDGET_LIMITS.tokens).toBe(50_000);
+    expect(DEFAULT_COCKPIT_BUDGET_LIMITS.usdTicks).toBe(2_000_000);
+    expect(DEFAULT_COCKPIT_BUDGET_LIMITS.wallClockMs).toBe(300_000);
+    expect(DEFAULT_COCKPIT_BUDGET_LIMITS.toolCalls).toBe(40);
+  });
+});
+
+describe("defaultChildBudgetLimits", () => {
+  it("returns half of parent remaining on all dimensions", () => {
+    const parent = createBudget({
+      tokens: 10_000,
+      usdTicks: 2_000_000,
+      wallClockMs: 300_000,
+      toolCalls: 40,
+    });
+    const child = defaultChildBudgetLimits(parent);
+    expect(child.tokens).toBe(5_000);
+    expect(child.usdTicks).toBe(1_000_000);
+    expect(child.wallClockMs).toBe(150_000);
+    expect(child.toolCalls).toBe(20);
+  });
+
+  it("halves remaining after partial consumption", () => {
+    const parent = createBudget({
+      tokens: 10_000,
+      usdTicks: 2_000_000,
+      wallClockMs: 300_000,
+      toolCalls: 40,
+    });
+    // Simulate consuming some tokens/cost via LlmUsage
+    parent.consumeUsage({
+      inputTokens: 2_000,
+      cachedInputTokens: 0,
+      outputTokens: 2_000,
+      reasoningTokens: 0,
+      costInUsdTicks: 500_000,
+    });
+    for (let i = 0; i < 10; i++) parent.consumeToolCall();
+    const child = defaultChildBudgetLimits(parent);
+    expect(child.tokens).toBe(3_000); // (10000 - 4000) / 2
+    expect(child.usdTicks).toBe(750_000); // (2000000 - 500000) / 2
+    expect(child.toolCalls).toBe(15); // (40 - 10) / 2
+  });
+});
+
+describe("subagent constants", () => {
+  it("maxDepth is 3", () => {
+    expect(MAX_SUBAGENT_DEPTH).toBe(3);
+  });
+
+  it("maxConcurrentChildren defaults to 3", () => {
+    expect(DEFAULT_MAX_CONCURRENT_CHILDREN).toBe(3);
+  });
+
+  it("heartbeat interval is 30s", () => {
+    expect(DEFAULT_SUBAGENT_HEARTBEAT_MS).toBe(30_000);
+  });
+
+  it("stale timeout is 10 minutes", () => {
+    expect(DEFAULT_SUBAGENT_STALE_MS).toBe(600_000);
+  });
+});
diff --git a/tests/cockpit/subagents/cli-process.test.ts b/tests/cockpit/subagents/cli-process.test.ts
new file mode 100644
index 0000000..401e896
--- /dev/null
+++ b/tests/cockpit/subagents/cli-process.test.ts
@@ -0,0 +1,280 @@
+import { createBudget } from "@/agent/budget";
+import type { SpawnSpec } from "@/cockpit/core/subagents";
+import { CliProcessBackend, resolveArgs } from "@/cockpit/subagents/cli-process";
+import { describe, expect, it } from "vitest";
+
+// ─── resolveArgs (hard constraint #7) ───────────────────────────────────────
+
+describe("resolveArgs — Claude Code --bare flag", () => {
+  const baseSpec: SpawnSpec = {
+    task: "test task",
+    backend: "cli-process",
+    parentSessionId: "sess-1",
+    cmd: "claude",
+    args: ["-p", "--output-format", "stream-json"],
+  };
+
+  it("adds --bare in api_key mode when not already present", () => {
+    const args = resolveArgs(baseSpec, "api_key");
+    expect(args).toContain("--bare");
+    expect(args[0]).toBe("--bare");
+  });
+
+  it("does not duplicate --bare in api_key mode when already present", () => {
+    const spec = { ...baseSpec, args: ["--bare", "-p"] };
+    const args = resolveArgs(spec, "api_key");
+    const bareCount = args.filter((a) => a === "--bare").length;
+    expect(bareCount).toBe(1);
+  });
+
+  it("never passes --bare in oauth_external mode", () => {
+    const args = resolveArgs(baseSpec, "oauth_external");
+    expect(args).not.toContain("--bare");
+  });
+
+  it("strips --bare from args in oauth_external mode even if explicitly provided", () => {
+    const spec = { ...baseSpec, args: ["--bare", "-p", "--output-format", "stream-json"] };
+    const args = resolveArgs(spec, "oauth_external");
+    expect(args).not.toContain("--bare");
+  });
+
+  it("does not add --bare in oauth_device_code mode", () => {
+    const args = resolveArgs(baseSpec, "oauth_device_code");
+    expect(args).not.toContain("--bare");
+  });
+
+  it("does not modify args for non-claude commands", () => {
+    const spec = { ...baseSpec, cmd: "codex", args: ["exec", "--json"] };
+    const args = resolveArgs(spec, "api_key");
+    expect(args).toEqual(["exec", "--json"]);
+    expect(args).not.toContain("--bare");
+  });
+
+  it("handles missing args gracefully", () => {
+    const { args: _discard, ...rest } = baseSpec;
+    const spec: SpawnSpec = { ...rest };
+    const args = resolveArgs(spec, "api_key");
+    expect(args).toEqual(["--bare"]);
+  });
+});
+
+// ─── CliProcessBackend depth + concurrency ──────────────────────────────────
+
+describe("CliProcessBackend", () => {
+  it("rejects spawn when depth exceeds MAX_SUBAGENT_DEPTH", async () => {
+    const backend = new CliProcessBackend({
+      parentBudget: createBudget({
+        tokens: 10000,
+        usdTicks: 1000000,
+        wallClockMs: 60000,
+        toolCalls: 10,
+      }),
+    });
+    const spec: SpawnSpec = {
+      task: "deep task",
+      backend: "cli-process",
+      parentSessionId: "sess-1",
+      cmd: "echo",
+      args: ["hello"],
+      depth: 4,
+    };
+    await expect(backend.spawn(spec)).rejects.toThrow("depth 4 exceeds maximum 3");
+  });
+
+  it("rejects spawn at max depth boundary", async () => {
+    const backend = new CliProcessBackend({
+      parentBudget: createBudget({
+        tokens: 10000,
+        usdTicks: 1000000,
+        wallClockMs: 60000,
+        toolCalls: 10,
+      }),
+    });
+    const spec: SpawnSpec = {
+      task: "deep task",
+      backend: "cli-process",
+      parentSessionId: "sess-1",
+      cmd: "echo",
+      args: ["hello"],
+      depth: 4,
+    };
+    await expect(backend.spawn(spec)).rejects.toThrow("depth");
+  });
+
+  it("allows spawn at exactly max depth", async () => {
+    const backend = new CliProcessBackend({
+      parentBudget: createBudget({
+        tokens: 10000,
+        usdTicks: 1000000,
+        wallClockMs: 60000,
+        toolCalls: 10,
+      }),
+    });
+    const spec: SpawnSpec = {
+      task: "hello",
+      backend: "cli-process",
+      parentSessionId: "sess-1",
+      cmd: "echo",
+      args: ["hello"],
+      depth: 3,
+    };
+    const handle = await backend.spawn(spec);
+    // Clean up
+    await handle.cancel();
+  });
+
+  it("rejects spawn when concurrency limit reached", async () => {
+    const backend = new CliProcessBackend({
+      maxConcurrentChildren: 1,
+      parentBudget: createBudget({
+        tokens: 100000,
+        usdTicks: 10000000,
+        wallClockMs: 600000,
+        toolCalls: 100,
+      }),
+    });
+    const spec: SpawnSpec = {
+      task: "hello",
+      backend: "cli-process",
+      parentSessionId: "sess-1",
+      cmd: "sleep",
+      args: ["10"],
+      depth: 0,
+    };
+    const handle = await backend.spawn(spec);
+    await expect(backend.spawn(spec)).rejects.toThrow("Concurrent children limit");
+    await handle.cancel();
+  });
+
+  it("rejects interactive mode (not yet implemented)", async () => {
+    const backend = new CliProcessBackend({
+      parentBudget: createBudget({
+        tokens: 10000,
+        usdTicks: 1000000,
+        wallClockMs: 60000,
+        toolCalls: 10,
+      }),
+    });
+    const spec: SpawnSpec = {
+      task: "interactive task",
+      backend: "cli-process",
+      parentSessionId: "sess-1",
+      cmd: "bash",
+      mode: "interactive",
+    };
+    await expect(backend.spawn(spec)).rejects.toThrow("Interactive mode not yet implemented");
+  });
+
+  it("rejects spawn without cmd", async () => {
+    const backend = new CliProcessBackend({
+      parentBudget: createBudget({
+        tokens: 10000,
+        usdTicks: 1000000,
+        wallClockMs: 60000,
+        toolCalls: 10,
+      }),
+    });
+    const spec: SpawnSpec = {
+      task: "no cmd",
+      backend: "cli-process",
+      parentSessionId: "sess-1",
+    };
+    await expect(backend.spawn(spec)).rejects.toThrow("SpawnSpec.cmd is required");
+  });
+
+  it("spawns a process and receives events", async () => {
+    const backend = new CliProcessBackend({
+      parentBudget: createBudget({
+        tokens: 10000,
+        usdTicks: 1000000,
+        wallClockMs: 60000,
+        toolCalls: 10,
+      }),
+    });
+    const spec: SpawnSpec = {
+      task: "",
+      backend: "cli-process",
+      parentSessionId: "sess-1",
+      cmd: "echo",
+      args: ["test output"],
+      depth: 0,
+      parser: "raw-text",
+    };
+    const handle = await backend.spawn(spec);
+    const events = [];
+    for await (const event of handle.events) {
+      events.push(event);
+    }
+    // Should have spawn + stdout + end events
+    expect(events.some((e) => e.t === "subagent.spawn")).toBe(true);
+    expect(events.some((e) => e.t === "subagent.event")).toBe(true);
+    expect(events.some((e) => e.t === "subagent.end")).toBe(true);
+    const endEvent = events.find((e) => e.t === "subagent.end");
+    expect(endEvent).toBeDefined();
+    if (endEvent && endEvent.t === "subagent.end") {
+      expect(endEvent.ok).toBe(true);
+      expect(endEvent.exit).toBe(0);
+    }
+  });
+
+  it("reports status after completion", async () => {
+    const backend = new CliProcessBackend({
+      parentBudget: createBudget({
+        tokens: 10000,
+        usdTicks: 1000000,
+        wallClockMs: 60000,
+        toolCalls: 10,
+      }),
+    });
+    const spec: SpawnSpec = {
+      task: "",
+      backend: "cli-process",
+      parentSessionId: "sess-1",
+      cmd: "echo",
+      args: ["done"],
+      depth: 0,
+      parser: "raw-text",
+    };
+    const handle = await backend.spawn(spec);
+    // Drain events
+    for await (const _ of handle.events) {
+      /* consume */
+    }
+    const status = await handle.status();
+    expect(status.state).toBe("completed");
+    expect(status.exit).toBe(0);
+    expect(status.startedAt).toBeDefined();
+    expect(status.endedAt).toBeDefined();
+  });
+
+  it("frees concurrency slot after child completes", async () => {
+    const backend = new CliProcessBackend({
+      maxConcurrentChildren: 1,
+      parentBudget: createBudget({
+        tokens: 100000,
+        usdTicks: 10000000,
+        wallClockMs: 600000,
+        toolCalls: 100,
+      }),
+    });
+    const spec: SpawnSpec = {
+      task: "",
+      backend: "cli-process",
+      parentSessionId: "sess-1",
+      cmd: "echo",
+      args: ["fast"],
+      depth: 0,
+      parser: "raw-text",
+    };
+    const handle1 = await backend.spawn(spec);
+    for await (const _ of handle1.events) {
+      /* consume */
+    }
+    // Slot is freed, second spawn should succeed
+    const handle2 = await backend.spawn(spec);
+    for await (const _ of handle2.events) {
+      /* consume */
+    }
+    expect(backend.activeCount()).toBe(0);
+  });
+});
diff --git a/tests/cockpit/subagents/parsers.test.ts b/tests/cockpit/subagents/parsers.test.ts
new file mode 100644
index 0000000..31c197e
--- /dev/null
+++ b/tests/cockpit/subagents/parsers.test.ts
@@ -0,0 +1,165 @@
+import {
+  ClaudeCodeStreamParser,
+  CodexExecParser,
+  RawTextParser,
+  availableParsers,
+  createParser,
+} from "@/cockpit/subagents/parsers";
+import { describe, expect, it } from "vitest";
+
+const SID = "subagent-test";
+
+describe("RawTextParser", () => {
+  const parser = new RawTextParser();
+
+  it("emits stdout event for each line", () => {
+    const { events } = parser.parseLine(SID, "hello world");
+    expect(events).toEqual([
+      { t: "subagent.event", subagentId: SID, kind: "stdout", chunk: "hello world" },
+    ]);
+  });
+
+  it("finalize emits subagent.end with ok=true on exit 0", () => {
+    const { events } = parser.finalize(SID, 0);
+    expect(events).toEqual([{ t: "subagent.end", subagentId: SID, ok: true, exit: 0 }]);
+  });
+
+  it("finalize emits subagent.end with ok=false on non-zero exit", () => {
+    const { events } = parser.finalize(SID, 1);
+    expect(events).toEqual([{ t: "subagent.end", subagentId: SID, ok: false, exit: 1 }]);
+  });
+});
+
+describe("ClaudeCodeStreamParser", () => {
+  const parser = new ClaudeCodeStreamParser();
+
+  it("parses result event as subagent.end", () => {
+    const line = JSON.stringify({
+      type: "result",
+      is_error: false,
+      session_id: "abc",
+      total_cost_usd: 0.01,
+    });
+    const { events } = parser.parseLine(SID, line);
+    expect(events).toEqual([{ t: "subagent.end", subagentId: SID, ok: true, exit: 0 }]);
+  });
+
+  it("parses error result as subagent.end with ok=false", () => {
+    const line = JSON.stringify({ type: "result", is_error: true });
+    const { events } = parser.parseLine(SID, line);
+    expect(events).toEqual([{ t: "subagent.end", subagentId: SID, ok: false, exit: 1 }]);
+  });
+
+  it("parses system event as status", () => {
+    const line = JSON.stringify({ type: "system", message: "retrying..." });
+    const { events } = parser.parseLine(SID, line);
+    expect(events).toEqual([
+      { t: "subagent.event", subagentId: SID, kind: "status", chunk: "retrying..." },
+    ]);
+  });
+
+  it("parses api_retry event as status", () => {
+    const line = JSON.stringify({ type: "api_retry", message: "rate limited" });
+    const { events } = parser.parseLine(SID, line);
+    expect(events).toEqual([
+      { t: "subagent.event", subagentId: SID, kind: "status", chunk: "rate limited" },
+    ]);
+  });
+
+  it("parses content delta as stdout", () => {
+    const line = JSON.stringify({ type: "content_block_delta", delta: { text: "hello" } });
+    const { events } = parser.parseLine(SID, line);
+    expect(events).toEqual([
+      { t: "subagent.event", subagentId: SID, kind: "stdout", chunk: "hello" },
+    ]);
+  });
+
+  it("falls back to raw text on invalid JSON", () => {
+    const { events } = parser.parseLine(SID, "not json at all");
+    expect(events).toEqual([
+      { t: "subagent.event", subagentId: SID, kind: "stdout", chunk: "not json at all" },
+    ]);
+  });
+
+  it("skips empty lines", () => {
+    const { events } = parser.parseLine(SID, "   ");
+    expect(events).toEqual([]);
+  });
+
+  it("finalize emits subagent.end", () => {
+    const { events } = parser.finalize(SID, 0);
+    expect(events).toEqual([{ t: "subagent.end", subagentId: SID, ok: true, exit: 0 }]);
+  });
+});
+
+describe("CodexExecParser", () => {
+  const parser = new CodexExecParser();
+
+  it("parses completed event as subagent.end", () => {
+    const line = JSON.stringify({ type: "completed", exit_code: 0 });
+    const { events } = parser.parseLine(SID, line);
+    expect(events).toEqual([{ t: "subagent.end", subagentId: SID, ok: true, exit: 0 }]);
+  });
+
+  it("parses completed with non-zero exit as failed", () => {
+    const line = JSON.stringify({ type: "completed", exit_code: 1 });
+    const { events } = parser.parseLine(SID, line);
+    expect(events).toEqual([{ t: "subagent.end", subagentId: SID, ok: false, exit: 1 }]);
+  });
+
+  it("parses done event as subagent.end", () => {
+    const line = JSON.stringify({ type: "done" });
+    const { events } = parser.parseLine(SID, line);
+    expect(events).toEqual([{ t: "subagent.end", subagentId: SID, ok: true, exit: 0 }]);
+  });
+
+  it("parses error event as stderr", () => {
+    const line = JSON.stringify({ type: "error", message: "something broke" });
+    const { events } = parser.parseLine(SID, line);
+    expect(events).toEqual([
+      { t: "subagent.event", subagentId: SID, kind: "stderr", chunk: "something broke" },
+    ]);
+  });
+
+  it("falls back to raw text on non-JSON", () => {
+    const { events } = parser.parseLine(SID, "plain text output");
+    expect(events).toEqual([
+      { t: "subagent.event", subagentId: SID, kind: "stdout", chunk: "plain text output" },
+    ]);
+  });
+
+  it("skips empty lines", () => {
+    const { events } = parser.parseLine(SID, "");
+    expect(events).toEqual([]);
+  });
+});
+
+describe("createParser", () => {
+  it("creates raw-text parser", () => {
+    const p = createParser("raw-text");
+    expect(p.name).toBe("raw-text");
+  });
+
+  it("creates claude-code-stream parser", () => {
+    const p = createParser("claude-code-stream");
+    expect(p.name).toBe("claude-code-stream");
+  });
+
+  it("creates codex-exec parser", () => {
+    const p = createParser("codex-exec");
+    expect(p.name).toBe("codex-exec");
+  });
+
+  it("throws on unknown parser", () => {
+    expect(() => createParser("nonexistent")).toThrow("Unknown parser: nonexistent");
+  });
+});
+
+describe("availableParsers", () => {
+  it("returns all registered parsers", () => {
+    const parsers = availableParsers();
+    expect(parsers).toContain("raw-text");
+    expect(parsers).toContain("claude-code-stream");
+    expect(parsers).toContain("codex-exec");
+  });
+});

From 6383ebd38b85a68676a551aa4a175adee5b5e7ea Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 24 Apr 2026 19:12:05 +0000
Subject: [PATCH 7/8] fix: swallow EPIPE on child stdin to prevent unhandled
 error in tests

Co-Authored-By: Terrence Schonleber <TVschonleber@gmail.com>
---
 src/cockpit/subagents/cli-process.ts | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/cockpit/subagents/cli-process.ts b/src/cockpit/subagents/cli-process.ts
index 71a88ab..186b1c0 100644
--- a/src/cockpit/subagents/cli-process.ts
+++ b/src/cockpit/subagents/cli-process.ts
@@ -147,6 +147,9 @@ class CliProcessHandle implements SubagentHandle {
     });
     this.process = proc;
 
+    // Swallow EPIPE on stdin — process may exit before we finish writing
+    proc.stdin?.on("error", () => {});
+
     // Send task as stdin for oneshot mode, then close
     if (this.task) {
       proc.stdin?.write(this.task);

From 13f673135a7a6d7582ed92bd46a93d6b9a6f7bcb Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 24 Apr 2026 19:12:37 +0000
Subject: [PATCH 8/8] feat(auth): add provider registry, auth store, external
 discovery, and device-code scaffolding

- Provider registry for anthropic, openai, xai, gemini, openai-compat
- CockpitAuthStore (~/.strand/auth.json) with single-writer lock
- External credential discovery for Claude Code and gemini-cli
- OpenAI oauth_device_code flow with mockable HTTP client
- Enforce hard constraints #3 (oauth_external local-only), #4 (billing warning), #5 (no implicit activation)
- 39 tests across 4 test files

Co-Authored-By: Terrence Schonleber <TVschonleber@gmail.com>
---
 src/auth/auth-store.ts                | 183 +++++++++++++++++++++++++
 src/auth/device-code.ts               | 186 ++++++++++++++++++++++++++
 src/auth/external-discovery.ts        | 140 +++++++++++++++++++
 src/auth/index.ts                     |  37 +++++
 src/auth/provider-registry.ts         | 139 +++++++++++++++++++
 tests/auth/auth-store.test.ts         | 130 ++++++++++++++++++
 tests/auth/device-code.test.ts        | 127 ++++++++++++++++++
 tests/auth/external-discovery.test.ts | 169 +++++++++++++++++++++++
 tests/auth/provider-registry.test.ts  |  98 ++++++++++++++
 9 files changed, 1209 insertions(+)
 create mode 100644 src/auth/auth-store.ts
 create mode 100644 src/auth/device-code.ts
 create mode 100644 src/auth/external-discovery.ts
 create mode 100644 src/auth/provider-registry.ts
 create mode 100644 tests/auth/auth-store.test.ts
 create mode 100644 tests/auth/device-code.test.ts
 create mode 100644 tests/auth/external-discovery.test.ts
 create mode 100644 tests/auth/provider-registry.test.ts

diff --git a/src/auth/auth-store.ts b/src/auth/auth-store.ts
new file mode 100644
index 0000000..563cd62
--- /dev/null
+++ b/src/auth/auth-store.ts
@@ -0,0 +1,183 @@
+/**
+ * Cockpit auth store — persists the user's active provider choice and
+ * per-provider auth state to `~/.strand/auth.json`.
+ *
+ * Shape mirrors the spec (S3 Auth store shape):
+ *   { active_provider, providers, suppressed_sources }
+ *
+ * Rules (verbatim from hermes):
+ *   1. No implicit use of external credentials (hard constraint #5).
+ *   2. `suppressed_sources` blacklists a discovery path per provider.
+ *   3. Single-writer file lock during refresh.
+ */
+
+import { existsSync, mkdirSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
+import { homedir } from "node:os";
+import { dirname, join } from "node:path";
+import { z } from "zod";
+import type { AuthType, ProviderId } from "./provider-registry";
+import { ProviderIdSchema } from "./provider-registry";
+
+export const ApiKeyEntrySchema = z.object({
+  auth_type: z.literal("api_key"),
+  source: z.string().min(1),
+});
+
+export const OAuthDeviceCodeEntrySchema = z.object({
+  auth_type: z.literal("oauth_device_code"),
+  tokens: z.record(z.string()),
+  expires_at: z.string().datetime(),
+});
+
+export const OAuthExternalEntrySchema = z.object({
+  auth_type: z.literal("oauth_external"),
+  credential_path: z.string().min(1),
+});
+
+export const AuthEntrySchema = z.discriminatedUnion("auth_type", [
+  ApiKeyEntrySchema,
+  OAuthDeviceCodeEntrySchema,
+  OAuthExternalEntrySchema,
+]);
+export type AuthEntry = z.infer<typeof AuthEntrySchema>;
+
+export const AuthStoreDataSchema = z.object({
+  active_provider: ProviderIdSchema.nullable(),
+  providers: z.record(ProviderIdSchema, AuthEntrySchema),
+  suppressed_sources: z.record(ProviderIdSchema, z.array(z.string())),
+});
+export type AuthStoreData = z.infer<typeof AuthStoreDataSchema>;
+
+function emptyStore(): AuthStoreData {
+  return {
+    active_provider: null,
+    providers: {},
+    suppressed_sources: {},
+  };
+}
+
+export interface CockpitAuthStoreOpts {
+  /** Override the default path for testing. */
+  path?: string;
+}
+
+/**
+ * Manages `~/.strand/auth.json` with single-writer locking.
+ *
+ * All mutations go through `update()` which holds a lock file for the
+ * duration of the write. Reads are lock-free (stale reads are acceptable
+ * since the only writer is the cockpit process on this machine).
+ */
+export class CockpitAuthStore {
+  readonly path: string;
+  private lockHeld = false;
+
+  constructor(opts?: CockpitAuthStoreOpts) {
+    this.path = opts?.path ?? join(homedir(), ".strand", "auth.json");
+  }
+
+  read(): AuthStoreData {
+    if (!existsSync(this.path)) return emptyStore();
+    const raw = readFileSync(this.path, "utf-8");
+    const parsed: unknown = JSON.parse(raw);
+    return AuthStoreDataSchema.parse(parsed);
+  }
+
+  /**
+   * Apply `fn` to the current store state and write the result atomically.
+   * Acquires a local lock to prevent concurrent writes during refresh flows.
+   */
+  update(fn: (current: AuthStoreData) => AuthStoreData): AuthStoreData {
+    this.acquireLock();
+    try {
+      const current = this.read();
+      const next = AuthStoreDataSchema.parse(fn(current));
+      const dir = dirname(this.path);
+      if (!existsSync(dir)) mkdirSync(dir, { recursive: true, mode: 0o700 });
+      writeFileSync(this.path, `${JSON.stringify(next, null, 2)}\n`, {
+        mode: 0o600,
+      });
+      return next;
+    } finally {
+      this.releaseLock();
+    }
+  }
+
+  activeProvider(): string | null {
+    return this.read().active_provider;
+  }
+
+  setActiveProvider(id: ProviderId, entry: AuthEntry): AuthStoreData {
+    return this.update((s) => ({
+      ...s,
+      active_provider: id,
+      providers: { ...s.providers, [id]: entry },
+    }));
+  }
+
+  clearProvider(id: ProviderId): AuthStoreData {
+    return this.update((s) => {
+      const providers = { ...s.providers };
+      delete providers[id];
+      return {
+        ...s,
+        active_provider: s.active_provider === id ? null : s.active_provider,
+        providers,
+      };
+    });
+  }
+
+  isSuppressed(providerId: ProviderId, source: string): boolean {
+    const data = this.read();
+    const list = data.suppressed_sources[providerId];
+    return list?.includes(source) ?? false;
+  }
+
+  suppressSource(providerId: ProviderId, source: string): AuthStoreData {
+    return this.update((s) => {
+      const existing = s.suppressed_sources[providerId] ?? [];
+      if (existing.includes(source)) return s;
+      return {
+        ...s,
+        suppressed_sources: {
+          ...s.suppressed_sources,
+          [providerId]: [...existing, source],
+        },
+      };
+    });
+  }
+
+  providerAuthType(id: ProviderId): AuthType | null {
+    const data = this.read();
+    const entry = data.providers[id];
+    return (entry?.auth_type as AuthType) ?? null;
+  }
+
+  private acquireLock(): void {
+    if (this.lockHeld) throw new Error("CockpitAuthStore: lock already held (re-entrant write)");
+    const lockPath = `${this.path}.lock`;
+    const dir = dirname(lockPath);
+    if (!existsSync(dir)) mkdirSync(dir, { recursive: true, mode: 0o700 });
+    try {
+      writeFileSync(lockPath, `${process.pid}\n`, { flag: "wx", mode: 0o600 });
+      this.lockHeld = true;
+    } catch (err: unknown) {
+      if ((err as NodeJS.ErrnoException).code === "EEXIST") {
+        throw new Error(
+          `CockpitAuthStore: lock file exists at ${lockPath}. Another process may be writing. Remove manually if stale.`,
+        );
+      }
+      throw err;
+    }
+  }
+
+  private releaseLock(): void {
+    const lockPath = `${this.path}.lock`;
+    try {
+      unlinkSync(lockPath);
+    } catch {
+      // lock file already gone — acceptable
+    }
+    this.lockHeld = false;
+  }
+}
diff --git a/src/auth/device-code.ts b/src/auth/device-code.ts
new file mode 100644
index 0000000..6ecfab3
--- /dev/null
+++ b/src/auth/device-code.ts
@@ -0,0 +1,186 @@
+/**
+ * OpenAI OAuth device-code flow scaffolding.
+ *
+ * Reference: spec S3 "Device-code flow reference (OpenAI)" and hermes
+ * `_codex_device_code_login`.
+ *
+ * Flow:
+ *   1. POST /api/accounts/deviceauth/usercode → { user_code, device_auth_id, interval }
+ *   2. Show user: open https://auth.openai.com/codex/device, enter code
+ *   3. Poll: POST /api/accounts/deviceauth/token → 200 { authorization_code, code_verifier } | 403/404
+ *   4. Exchange: POST /oauth/token → { access_token, refresh_token, id_token, expires_in }
+ *
+ * Max wait 15 minutes. Poll interval >= 3s.
+ *
+ * The `DeviceCodeHttpClient` interface is fully mockable for CI tests.
+ */
+
+import { z } from "zod";
+
+export const OPENAI_AUTH_BASE = "https://auth.openai.com";
+export const OPENAI_DEVICE_URL = "https://auth.openai.com/codex/device";
+export const DEVICE_CODE_MAX_WAIT_MS = 15 * 60 * 1000;
+export const DEVICE_CODE_MIN_POLL_INTERVAL_MS = 3000;
+
+export const UserCodeResponseSchema = z.object({
+  user_code: z.string().min(1),
+  device_auth_id: z.string().min(1),
+  interval: z.number().int().nonnegative(),
+});
+export type UserCodeResponse = z.infer<typeof UserCodeResponseSchema>;
+
+export const TokenPollSuccessSchema = z.object({
+  authorization_code: z.string().min(1),
+  code_verifier: z.string().min(1),
+});
+export type TokenPollSuccess = z.infer<typeof TokenPollSuccessSchema>;
+
+export const TokenSetSchema = z.object({
+  access_token: z.string().min(1),
+  refresh_token: z.string().min(1),
+  id_token: z.string().optional(),
+  expires_in: z.number().int().positive(),
+});
+export type TokenSet = z.infer<typeof TokenSetSchema>;
+
+export type TokenPollResult = { status: "pending" } | { status: "success"; data: TokenPollSuccess };
+
+/**
+ * HTTP-level interface for the device-code flow. Fully mockable —
+ * CI tests inject a stub that returns canned responses.
+ */
+export interface DeviceCodeHttpClient {
+  requestUserCode(clientId: string): Promise<UserCodeResponse>;
+  pollToken(deviceAuthId: string, userCode: string): Promise<TokenPollResult>;
+  exchangeToken(params: {
+    authorizationCode: string;
+    codeVerifier: string;
+    clientId: string;
+    redirectUri: string;
+  }): Promise<TokenSet>;
+}
+
+/**
+ * Real HTTP client that talks to auth.openai.com.
+ * Uses `fetch` (Node 22+ built-in).
+ */
+export class OpenAIDeviceCodeClient implements DeviceCodeHttpClient {
+  private readonly baseUrl: string;
+
+  constructor(baseUrl: string = OPENAI_AUTH_BASE) {
+    this.baseUrl = baseUrl;
+  }
+
+  async requestUserCode(clientId: string): Promise<UserCodeResponse> {
+    const res = await fetch(`${this.baseUrl}/api/accounts/deviceauth/usercode`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ client_id: clientId }),
+    });
+    if (!res.ok) {
+      throw new DeviceCodeError(`usercode request failed: ${res.status} ${res.statusText}`);
+    }
+    const body: unknown = await res.json();
+    return UserCodeResponseSchema.parse(body);
+  }
+
+  async pollToken(deviceAuthId: string, userCode: string): Promise<TokenPollResult> {
+    const res = await fetch(`${this.baseUrl}/api/accounts/deviceauth/token`, {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ device_auth_id: deviceAuthId, user_code: userCode }),
+    });
+    if (res.status === 403 || res.status === 404) {
+      return { status: "pending" };
+    }
+    if (!res.ok) {
+      throw new DeviceCodeError(`token poll failed: ${res.status} ${res.statusText}`);
+    }
+    const body: unknown = await res.json();
+    return { status: "success", data: TokenPollSuccessSchema.parse(body) };
+  }
+
+  async exchangeToken(params: {
+    authorizationCode: string;
+    codeVerifier: string;
+    clientId: string;
+    redirectUri: string;
+  }): Promise<TokenSet> {
+    const formBody = new URLSearchParams({
+      grant_type: "authorization_code",
+      code: params.authorizationCode,
+      redirect_uri: params.redirectUri,
+      client_id: params.clientId,
+      code_verifier: params.codeVerifier,
+    });
+    const res = await fetch(`${this.baseUrl}/oauth/token`, {
+      method: "POST",
+      headers: { "Content-Type": "application/x-www-form-urlencoded" },
+      body: formBody.toString(),
+    });
+    if (!res.ok) {
+      throw new DeviceCodeError(`token exchange failed: ${res.status} ${res.statusText}`);
+    }
+    const body: unknown = await res.json();
+    return TokenSetSchema.parse(body);
+  }
+}
+
+export class DeviceCodeError extends Error {
+  constructor(message: string) {
+    super(message);
+    this.name = "DeviceCodeError";
+  }
+}
+
+/**
+ * Orchestrates the full device-code flow. Calls the HTTP client, polls with
+ * backoff, and returns the final token set.
+ *
+ * @param client   - injectable HTTP client (mock for tests)
+ * @param clientId - OpenAI OAuth client ID
+ * @param onUserCode - callback to display the user code + URL to the user
+ */
+export async function runDeviceCodeFlow(opts: {
+  client: DeviceCodeHttpClient;
+  clientId: string;
+  redirectUri?: string;
+  onUserCode: (info: { userCode: string; verificationUrl: string }) => void;
+  /** Override for tests — minimum poll interval in ms. Production default: 3000. */
+  _minPollIntervalMs?: number;
+  /** Override for tests — max wait in ms. Production default: 15 min. */
+  _maxWaitMs?: number;
+}): Promise<TokenSet> {
+  const { client, clientId, onUserCode } = opts;
+  const redirectUri = opts.redirectUri ?? "https://auth.openai.com/codex/device/callback";
+  const minPoll = opts._minPollIntervalMs ?? DEVICE_CODE_MIN_POLL_INTERVAL_MS;
+  const maxWait = opts._maxWaitMs ?? DEVICE_CODE_MAX_WAIT_MS;
+
+  const codeResponse = await client.requestUserCode(clientId);
+  onUserCode({
+    userCode: codeResponse.user_code,
+    verificationUrl: OPENAI_DEVICE_URL,
+  });
+
+  const intervalMs = Math.max(codeResponse.interval * 1000, minPoll);
+  const deadline = Date.now() + maxWait;
+
+  while (Date.now() < deadline) {
+    await sleep(intervalMs);
+    const result = await client.pollToken(codeResponse.device_auth_id, codeResponse.user_code);
+    if (result.status === "success") {
+      return client.exchangeToken({
+        authorizationCode: result.data.authorization_code,
+        codeVerifier: result.data.code_verifier,
+        clientId,
+        redirectUri,
+      });
+    }
+  }
+
+  throw new DeviceCodeError("device-code flow timed out after 15 minutes");
+}
+
+function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
diff --git a/src/auth/external-discovery.ts b/src/auth/external-discovery.ts
new file mode 100644
index 0000000..8102842
--- /dev/null
+++ b/src/auth/external-discovery.ts
@@ -0,0 +1,140 @@
+/**
+ * External credential discovery — reads credentials written by other CLI
+ * tools installed on the same machine.
+ *
+ * Hard constraints:
+ *   #3 — `oauth_external` is local-only.
+ *   #4 — Anthropic routes third-party OAuth to `extra_usage` billing pool;
+ *         must surface warning before first call.
+ *   #5 — Discovery NEVER auto-activates a provider. Results are offered as
+ *         selectable sources in the picker.
+ */
+
+import { existsSync, readFileSync } from "node:fs";
+import { homedir } from "node:os";
+import { join } from "node:path";
+import { z } from "zod";
+
+export interface ExternalCredentialResult {
+  readonly found: boolean;
+  readonly path: string;
+  readonly localOnly: true;
+  readonly billingWarning?: string;
+  readonly token?: string;
+}
+
+const ANTHROPIC_BILLING_WARNING =
+  "Anthropic routes third-party OAuth clients to the extra_usage billing pool, " +
+  "which is empty for most users. You may incur metered API charges on your " +
+  "Claude Pro/Max subscription. See hermes-agent issue #12905.";
+
+/**
+ * Claude Code credentials file schema — only the fields we need.
+ * Path: `~/.claude/.credentials.json`
+ */
+const ClaudeCodeCredentialsSchema = z.object({
+  accessToken: z.string().min(1).optional(),
+  oauthAccessToken: z.string().min(1).optional(),
+});
+
+/**
+ * Discover Claude Code OAuth credentials on this machine.
+ *
+ * Returns found=true if `~/.claude/.credentials.json` exists and contains a
+ * usable token. The billing warning is ALWAYS attached when found (hard
+ * constraint #4). The caller MUST surface this warning before the first call.
+ */
+export function discoverClaudeCodeCredentials(homeOverride?: string): ExternalCredentialResult {
+  const home = homeOverride ?? homedir();
+  const credPath = join(home, ".claude", ".credentials.json");
+
+  if (!existsSync(credPath)) {
+    return { found: false, path: credPath, localOnly: true };
+  }
+
+  try {
+    const raw = readFileSync(credPath, "utf-8");
+    const parsed: unknown = JSON.parse(raw);
+    const creds = ClaudeCodeCredentialsSchema.parse(parsed);
+    const token = creds.oauthAccessToken ?? creds.accessToken;
+    if (!token) {
+      return { found: false, path: credPath, localOnly: true };
+    }
+    return {
+      found: true,
+      path: credPath,
+      localOnly: true,
+      billingWarning: ANTHROPIC_BILLING_WARNING,
+      token,
+    };
+  } catch {
+    return { found: false, path: credPath, localOnly: true };
+  }
+}
+
+/**
+ * Gemini CLI credentials file schema — only the fields we need.
+ * Path: varies by platform; typically `~/.config/gemini-cli/oauth_creds.json`
+ * or `~/.qwen/oauth_creds.json`.
+ */
+const GeminiCliCredentialsSchema = z.object({
+  access_token: z.string().min(1).optional(),
+  client_id: z.string().optional(),
+  client_secret: z.string().optional(),
+  refresh_token: z.string().optional(),
+});
+
+const GEMINI_CLI_PATHS = [
+  join(".config", "gemini-cli", "oauth_creds.json"),
+  join(".qwen", "oauth_creds.json"),
+] as const;
+
+/**
+ * Discover gemini-cli OAuth credentials on this machine.
+ *
+ * Checks known credential paths. Returns found=true if any contains a
+ * usable access token. No billing warning for Gemini (Google handles
+ * third-party OAuth differently).
+ */
+export function discoverGeminiCliCredentials(homeOverride?: string): ExternalCredentialResult {
+  const home = homeOverride ?? homedir();
+
+  for (const relPath of GEMINI_CLI_PATHS) {
+    const credPath = join(home, relPath);
+    if (!existsSync(credPath)) continue;
+
+    try {
+      const raw = readFileSync(credPath, "utf-8");
+      const parsed: unknown = JSON.parse(raw);
+      const creds = GeminiCliCredentialsSchema.parse(parsed);
+      if (creds.access_token) {
+        return {
+          found: true,
+          path: credPath,
+          localOnly: true,
+          token: creds.access_token,
+        };
+      }
+    } catch {}
+  }
+
+  const firstPath = GEMINI_CLI_PATHS[0] ?? "";
+  const defaultPath = join(home, firstPath);
+  return { found: false, path: defaultPath, localOnly: true };
+}
+
+/**
+ * Run all external credential discovery probes. Returns a map of
+ * provider ID to discovery result. Caller decides what to show in the picker.
+ *
+ * IMPORTANT: Discovery NEVER auto-activates a provider (hard constraint #5).
+ */
+export function discoverAllExternalCredentials(homeOverride?: string): {
+  anthropic: ExternalCredentialResult;
+  gemini: ExternalCredentialResult;
+} {
+  return {
+    anthropic: discoverClaudeCodeCredentials(homeOverride),
+    gemini: discoverGeminiCliCredentials(homeOverride),
+  };
+}
diff --git a/src/auth/index.ts b/src/auth/index.ts
index 1cc1ca1..cfc6e2e 100644
--- a/src/auth/index.ts
+++ b/src/auth/index.ts
@@ -19,6 +19,43 @@ export { OAuthCredentialStore, type OAuthProviderStrategy } from "./oauth-store"
 export { makeXOAuthStrategy } from "./oauth-x";
 export { TenantScopedCredentialStore } from "./tenant-store";
 
+// Cockpit auth — provider registry, auth store, external discovery, device-code
+export {
+  type AuthMode,
+  type AuthSource,
+  type AuthType,
+  type HostConstraint,
+  type ProviderId,
+  type ProviderDef,
+  ProviderIdSchema,
+  AuthTypeSchema,
+  availableAuthModes,
+  getProvider,
+  listProviders,
+  requiresBaseUrl,
+} from "./provider-registry";
+export {
+  type AuthEntry,
+  type AuthStoreData,
+  AuthStoreDataSchema,
+  CockpitAuthStore,
+} from "./auth-store";
+export {
+  type ExternalCredentialResult,
+  discoverAllExternalCredentials,
+  discoverClaudeCodeCredentials,
+  discoverGeminiCliCredentials,
+} from "./external-discovery";
+export {
+  type DeviceCodeHttpClient,
+  type TokenPollResult,
+  type TokenSet,
+  type UserCodeResponse,
+  DeviceCodeError,
+  OpenAIDeviceCodeClient,
+  runDeviceCodeFlow,
+} from "./device-code";
+
 let _default: CredentialStore | null = null;
 
 /**
diff --git a/src/auth/provider-registry.ts b/src/auth/provider-registry.ts
new file mode 100644
index 0000000..5d5d571
--- /dev/null
+++ b/src/auth/provider-registry.ts
@@ -0,0 +1,139 @@
+/**
+ * Provider registry — canonical list of supported LLM providers and their
+ * auth modes.
+ *
+ * v1 ships with exactly five providers (per spec S3). The `openai-compat`
+ * entry covers any OpenAI-API-compatible endpoint via `baseURL`.
+ *
+ * Hard constraints enforced here:
+ *   #3 — `oauth_external` is local-only; `oauth_device_code` + `api_key` work anywhere.
+ *   #4 — Anthropic `oauth_external` carries a billing warning.
+ *   #5 — No implicit activation from env vars.
+ */
+
+import { z } from "zod";
+
+export const ProviderIdSchema = z.enum(["anthropic", "openai", "xai", "gemini", "openai-compat"]);
+export type ProviderId = z.infer<typeof ProviderIdSchema>;
+
+export const AuthTypeSchema = z.enum(["api_key", "oauth_device_code", "oauth_external"]);
+export type AuthType = z.infer<typeof AuthTypeSchema>;
+
+export const AuthSourceSchema = z.enum(["env", "strand_store", "external_cli"]);
+export type AuthSource = z.infer<typeof AuthSourceSchema>;
+
+export const HostConstraintSchema = z.enum(["any", "local_only"]);
+export type HostConstraint = z.infer<typeof HostConstraintSchema>;
+
+export interface AuthMode {
+  readonly type: AuthType;
+  readonly source: AuthSource;
+  readonly envKey?: string;
+  readonly hostConstraint: HostConstraint;
+  readonly billingWarning?: string;
+}
+
+export interface ProviderDef {
+  readonly id: ProviderId;
+  readonly displayName: string;
+  readonly primaryAuth: AuthMode;
+  readonly secondaryAuth?: AuthMode;
+  readonly baseUrlEnv?: string;
+}
+
+const ANTHROPIC_BILLING_WARNING =
+  "Anthropic routes third-party OAuth clients to the extra_usage billing pool, " +
+  "which is empty for most users. You may incur metered API charges on your " +
+  "Claude Pro/Max subscription. See hermes-agent issue #12905.";
+
+const PROVIDERS: readonly ProviderDef[] = [
+  {
+    id: "anthropic",
+    displayName: "Anthropic",
+    primaryAuth: {
+      type: "api_key",
+      source: "env",
+      envKey: "ANTHROPIC_API_KEY",
+      hostConstraint: "any",
+    },
+    secondaryAuth: {
+      type: "oauth_external",
+      source: "external_cli",
+      hostConstraint: "local_only",
+      billingWarning: ANTHROPIC_BILLING_WARNING,
+    },
+  },
+  {
+    id: "openai",
+    displayName: "OpenAI",
+    primaryAuth: {
+      type: "api_key",
+      source: "env",
+      envKey: "OPENAI_API_KEY",
+      hostConstraint: "any",
+    },
+    secondaryAuth: {
+      type: "oauth_device_code",
+      source: "strand_store",
+      hostConstraint: "any",
+    },
+  },
+  {
+    id: "xai",
+    displayName: "xAI",
+    primaryAuth: {
+      type: "api_key",
+      source: "env",
+      envKey: "XAI_API_KEY",
+      hostConstraint: "any",
+    },
+  },
+  {
+    id: "gemini",
+    displayName: "Gemini",
+    primaryAuth: {
+      type: "api_key",
+      source: "env",
+      envKey: "GEMINI_API_KEY",
+      hostConstraint: "any",
+    },
+    secondaryAuth: {
+      type: "oauth_external",
+      source: "external_cli",
+      hostConstraint: "local_only",
+    },
+  },
+  {
+    id: "openai-compat",
+    displayName: "OpenAI-compatible",
+    primaryAuth: {
+      type: "api_key",
+      source: "env",
+      envKey: "OPENAI_API_KEY",
+      hostConstraint: "any",
+    },
+    baseUrlEnv: "OPENAI_BASE_URL",
+  },
+] as const;
+
+const REGISTRY = new Map<ProviderId, ProviderDef>(PROVIDERS.map((p) => [p.id, p]));
+
+export function listProviders(): readonly ProviderDef[] {
+  return PROVIDERS;
+}
+
+export function getProvider(id: ProviderId): ProviderDef | undefined {
+  return REGISTRY.get(id);
+}
+
+export function availableAuthModes(id: ProviderId): readonly AuthMode[] {
+  const def = REGISTRY.get(id);
+  if (!def) return [];
+  const modes: AuthMode[] = [def.primaryAuth];
+  if (def.secondaryAuth) modes.push(def.secondaryAuth);
+  return modes;
+}
+
+export function requiresBaseUrl(id: ProviderId): boolean {
+  return id === "openai-compat";
+}
diff --git a/tests/auth/auth-store.test.ts b/tests/auth/auth-store.test.ts
new file mode 100644
index 0000000..81279ed
--- /dev/null
+++ b/tests/auth/auth-store.test.ts
@@ -0,0 +1,130 @@
+import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { type AuthEntry, AuthStoreDataSchema, CockpitAuthStore } from "@/auth/auth-store";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+
+function tmpStorePath(): string {
+  const dir = join(
+    tmpdir(),
+    `strand-auth-test-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+  );
+  mkdirSync(dir, { recursive: true });
+  return join(dir, "auth.json");
+}
+
+describe("CockpitAuthStore", () => {
+  let storePath: string;
+  let store: CockpitAuthStore;
+
+  beforeEach(() => {
+    storePath = tmpStorePath();
+    store = new CockpitAuthStore({ path: storePath });
+  });
+
+  afterEach(() => {
+    const dir = join(storePath, "..");
+    if (existsSync(dir)) rmSync(dir, { recursive: true, force: true });
+  });
+
+  it("read returns empty store when file does not exist", () => {
+    const data = store.read();
+    expect(data.active_provider).toBeNull();
+    expect(data.providers).toEqual({});
+    expect(data.suppressed_sources).toEqual({});
+  });
+
+  it("setActiveProvider persists and reads back", () => {
+    const entry: AuthEntry = { auth_type: "api_key", source: "env:ANTHROPIC_API_KEY" };
+    store.setActiveProvider("anthropic", entry);
+
+    const data = store.read();
+    expect(data.active_provider).toBe("anthropic");
+    expect(data.providers["anthropic"]).toEqual(entry);
+  });
+
+  it("clearProvider removes provider and resets active if matching", () => {
+    const entry: AuthEntry = { auth_type: "api_key", source: "env:XAI_API_KEY" };
+    store.setActiveProvider("xai", entry);
+    expect(store.activeProvider()).toBe("xai");
+
+    store.clearProvider("xai");
+    expect(store.activeProvider()).toBeNull();
+    expect(store.read().providers["xai"]).toBeUndefined();
+  });
+
+  it("clearProvider does not reset active if different provider", () => {
+    store.setActiveProvider("openai", { auth_type: "api_key", source: "env:OPENAI_API_KEY" });
+    store.update((s) => ({
+      ...s,
+      providers: {
+        ...s.providers,
+        xai: { auth_type: "api_key" as const, source: "env:XAI_API_KEY" },
+      },
+    }));
+    store.clearProvider("xai");
+    expect(store.activeProvider()).toBe("openai");
+  });
+
+  it("suppressed sources work", () => {
+    expect(store.isSuppressed("anthropic", "cli_credentials")).toBe(false);
+    store.suppressSource("anthropic", "cli_credentials");
+    expect(store.isSuppressed("anthropic", "cli_credentials")).toBe(true);
+  });
+
+  it("suppressSource is idempotent", () => {
+    store.suppressSource("anthropic", "cli_credentials");
+    store.suppressSource("anthropic", "cli_credentials");
+    const data = store.read();
+    expect(data.suppressed_sources["anthropic"]).toEqual(["cli_credentials"]);
+  });
+
+  it("providerAuthType returns the auth type", () => {
+    store.setActiveProvider("openai", {
+      auth_type: "oauth_device_code",
+      tokens: { access_token: "tok" },
+      expires_at: "2099-01-01T00:00:00.000Z",
+    });
+    expect(store.providerAuthType("openai")).toBe("oauth_device_code");
+    expect(store.providerAuthType("xai")).toBeNull();
+  });
+
+  it("validates data on read (rejects corrupt file)", () => {
+    mkdirSync(join(storePath, ".."), { recursive: true });
+    writeFileSync(storePath, '{"active_provider": 42}');
+    expect(() => store.read()).toThrow();
+  });
+
+  it("update acquires and releases lock", () => {
+    store.update((s) => s);
+    const lockPath = `${storePath}.lock`;
+    expect(existsSync(lockPath)).toBe(false);
+  });
+
+  it("schema validates full store shape", () => {
+    const valid = {
+      active_provider: "openai",
+      providers: {
+        openai: {
+          auth_type: "oauth_device_code",
+          tokens: { access_token: "at", refresh_token: "rt" },
+          expires_at: "2099-01-01T00:00:00.000Z",
+        },
+        anthropic: { auth_type: "api_key", source: "env:ANTHROPIC_API_KEY" },
+      },
+      suppressed_sources: { anthropic: ["cli_credentials"] },
+    };
+    expect(() => AuthStoreDataSchema.parse(valid)).not.toThrow();
+  });
+
+  it("schema rejects invalid auth_type", () => {
+    const invalid = {
+      active_provider: "openai",
+      providers: {
+        openai: { auth_type: "magic", source: "nowhere" },
+      },
+      suppressed_sources: {},
+    };
+    expect(() => AuthStoreDataSchema.parse(invalid)).toThrow();
+  });
+});
diff --git a/tests/auth/device-code.test.ts b/tests/auth/device-code.test.ts
new file mode 100644
index 0000000..03c703b
--- /dev/null
+++ b/tests/auth/device-code.test.ts
@@ -0,0 +1,127 @@
+import {
+  DeviceCodeError,
+  type DeviceCodeHttpClient,
+  type TokenPollResult,
+  type TokenSet,
+  type UserCodeResponse,
+  runDeviceCodeFlow,
+} from "@/auth/device-code";
+import { describe, expect, it, vi } from "vitest";
+
+function makeMockClient(opts?: {
+  pendingPolls?: number;
+  failExchange?: boolean;
+}): DeviceCodeHttpClient {
+  const pendingPolls = opts?.pendingPolls ?? 1;
+  let pollCount = 0;
+
+  return {
+    async requestUserCode(_clientId: string): Promise<UserCodeResponse> {
+      return {
+        user_code: "ABCD-1234",
+        device_auth_id: "dev-auth-42",
+        interval: 0,
+      };
+    },
+
+    async pollToken(_deviceAuthId: string, _userCode: string): Promise<TokenPollResult> {
+      pollCount++;
+      if (pollCount <= pendingPolls) {
+        return { status: "pending" };
+      }
+      return {
+        status: "success",
+        data: {
+          authorization_code: "auth-code-xyz",
+          code_verifier: "verifier-abc",
+        },
+      };
+    },
+
+    async exchangeToken(_params: {
+      authorizationCode: string;
+      codeVerifier: string;
+      clientId: string;
+      redirectUri: string;
+    }): Promise<TokenSet> {
+      if (opts?.failExchange) {
+        throw new DeviceCodeError("exchange failed");
+      }
+      return {
+        access_token: "at-final",
+        refresh_token: "rt-final",
+        id_token: "id-final",
+        expires_in: 3600,
+      };
+    },
+  };
+}
+
+describe("device-code flow", () => {
+  it("completes with mocked HTTP endpoints", async () => {
+    const client = makeMockClient({ pendingPolls: 2 });
+    const onUserCode = vi.fn();
+
+    const tokens = await runDeviceCodeFlow({
+      client,
+      clientId: "test-client-id",
+      onUserCode,
+      _minPollIntervalMs: 10,
+    });
+
+    expect(onUserCode).toHaveBeenCalledOnce();
+    expect(onUserCode).toHaveBeenCalledWith({
+      userCode: "ABCD-1234",
+      verificationUrl: "https://auth.openai.com/codex/device",
+    });
+
+    expect(tokens.access_token).toBe("at-final");
+    expect(tokens.refresh_token).toBe("rt-final");
+    expect(tokens.expires_in).toBe(3600);
+  });
+
+  it("resolves immediately when first poll succeeds", async () => {
+    const client = makeMockClient({ pendingPolls: 0 });
+    const onUserCode = vi.fn();
+
+    const tokens = await runDeviceCodeFlow({
+      client,
+      clientId: "test-client-id",
+      onUserCode,
+      _minPollIntervalMs: 10,
+    });
+
+    expect(tokens.access_token).toBe("at-final");
+  });
+
+  it("propagates exchange errors", async () => {
+    const client = makeMockClient({ pendingPolls: 0, failExchange: true });
+    const onUserCode = vi.fn();
+
+    await expect(
+      runDeviceCodeFlow({
+        client,
+        clientId: "test-client-id",
+        onUserCode,
+        _minPollIntervalMs: 10,
+      }),
+    ).rejects.toThrow(DeviceCodeError);
+  });
+
+  it("calls onUserCode with the correct user code", async () => {
+    const client = makeMockClient();
+    const onUserCode = vi.fn();
+
+    await runDeviceCodeFlow({
+      client,
+      clientId: "my-client",
+      onUserCode,
+      _minPollIntervalMs: 10,
+    });
+
+    expect(onUserCode.mock.calls[0]?.[0]).toEqual({
+      userCode: "ABCD-1234",
+      verificationUrl: "https://auth.openai.com/codex/device",
+    });
+  });
+});
diff --git a/tests/auth/external-discovery.test.ts b/tests/auth/external-discovery.test.ts
new file mode 100644
index 0000000..fd2113f
--- /dev/null
+++ b/tests/auth/external-discovery.test.ts
@@ -0,0 +1,169 @@
+import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import {
+  discoverAllExternalCredentials,
+  discoverClaudeCodeCredentials,
+  discoverGeminiCliCredentials,
+} from "@/auth/external-discovery";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+
+function tmpHome(): string {
+  const dir = join(
+    tmpdir(),
+    `strand-discovery-test-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+  );
+  mkdirSync(dir, { recursive: true });
+  return dir;
+}
+
+describe("external credential discovery", () => {
+  let home: string;
+
+  beforeEach(() => {
+    home = tmpHome();
+  });
+
+  afterEach(() => {
+    if (existsSync(home)) rmSync(home, { recursive: true, force: true });
+  });
+
+  describe("Claude Code credentials", () => {
+    it("returns found=false when file does not exist", () => {
+      const result = discoverClaudeCodeCredentials(home);
+      expect(result.found).toBe(false);
+      expect(result.localOnly).toBe(true);
+    });
+
+    it("discovers oauthAccessToken from credentials file", () => {
+      const credDir = join(home, ".claude");
+      mkdirSync(credDir, { recursive: true });
+      writeFileSync(
+        join(credDir, ".credentials.json"),
+        JSON.stringify({ oauthAccessToken: "claude-token-abc" }),
+      );
+
+      const result = discoverClaudeCodeCredentials(home);
+      expect(result.found).toBe(true);
+      expect(result.token).toBe("claude-token-abc");
+      expect(result.localOnly).toBe(true);
+    });
+
+    it("falls back to accessToken field", () => {
+      const credDir = join(home, ".claude");
+      mkdirSync(credDir, { recursive: true });
+      writeFileSync(
+        join(credDir, ".credentials.json"),
+        JSON.stringify({ accessToken: "fallback-token" }),
+      );
+
+      const result = discoverClaudeCodeCredentials(home);
+      expect(result.found).toBe(true);
+      expect(result.token).toBe("fallback-token");
+    });
+
+    it("always includes billing warning when found (hard constraint #4)", () => {
+      const credDir = join(home, ".claude");
+      mkdirSync(credDir, { recursive: true });
+      writeFileSync(
+        join(credDir, ".credentials.json"),
+        JSON.stringify({ oauthAccessToken: "tok" }),
+      );
+
+      const result = discoverClaudeCodeCredentials(home);
+      expect(result.billingWarning).toBeDefined();
+      expect(result.billingWarning).toContain("extra_usage");
+      expect(result.billingWarning).toContain("hermes-agent issue #12905");
+    });
+
+    it("returns found=false for empty token", () => {
+      const credDir = join(home, ".claude");
+      mkdirSync(credDir, { recursive: true });
+      writeFileSync(join(credDir, ".credentials.json"), JSON.stringify({ accessToken: "" }));
+
+      const result = discoverClaudeCodeCredentials(home);
+      expect(result.found).toBe(false);
+    });
+
+    it("returns found=false for corrupt JSON", () => {
+      const credDir = join(home, ".claude");
+      mkdirSync(credDir, { recursive: true });
+      writeFileSync(join(credDir, ".credentials.json"), "not json");
+
+      const result = discoverClaudeCodeCredentials(home);
+      expect(result.found).toBe(false);
+    });
+  });
+
+  describe("Gemini CLI credentials", () => {
+    it("returns found=false when no files exist", () => {
+      const result = discoverGeminiCliCredentials(home);
+      expect(result.found).toBe(false);
+      expect(result.localOnly).toBe(true);
+    });
+
+    it("discovers from gemini-cli path", () => {
+      const credDir = join(home, ".config", "gemini-cli");
+      mkdirSync(credDir, { recursive: true });
+      writeFileSync(
+        join(credDir, "oauth_creds.json"),
+        JSON.stringify({ access_token: "gemini-tok" }),
+      );
+
+      const result = discoverGeminiCliCredentials(home);
+      expect(result.found).toBe(true);
+      expect(result.token).toBe("gemini-tok");
+      expect(result.localOnly).toBe(true);
+    });
+
+    it("discovers from qwen fallback path", () => {
+      const credDir = join(home, ".qwen");
+      mkdirSync(credDir, { recursive: true });
+      writeFileSync(
+        join(credDir, "oauth_creds.json"),
+        JSON.stringify({ access_token: "qwen-tok" }),
+      );
+
+      const result = discoverGeminiCliCredentials(home);
+      expect(result.found).toBe(true);
+      expect(result.token).toBe("qwen-tok");
+    });
+
+    it("no billing warning for Gemini", () => {
+      const credDir = join(home, ".config", "gemini-cli");
+      mkdirSync(credDir, { recursive: true });
+      writeFileSync(join(credDir, "oauth_creds.json"), JSON.stringify({ access_token: "tok" }));
+
+      const result = discoverGeminiCliCredentials(home);
+      expect(result.billingWarning).toBeUndefined();
+    });
+  });
+
+  describe("discoverAllExternalCredentials", () => {
+    it("runs both probes", () => {
+      const results = discoverAllExternalCredentials(home);
+      expect(results.anthropic.found).toBe(false);
+      expect(results.gemini.found).toBe(false);
+    });
+
+    it("finds both when credentials exist", () => {
+      const claudeDir = join(home, ".claude");
+      mkdirSync(claudeDir, { recursive: true });
+      writeFileSync(
+        join(claudeDir, ".credentials.json"),
+        JSON.stringify({ oauthAccessToken: "claude-tok" }),
+      );
+
+      const geminiDir = join(home, ".config", "gemini-cli");
+      mkdirSync(geminiDir, { recursive: true });
+      writeFileSync(
+        join(geminiDir, "oauth_creds.json"),
+        JSON.stringify({ access_token: "gemini-tok" }),
+      );
+
+      const results = discoverAllExternalCredentials(home);
+      expect(results.anthropic.found).toBe(true);
+      expect(results.gemini.found).toBe(true);
+    });
+  });
+});
diff --git a/tests/auth/provider-registry.test.ts b/tests/auth/provider-registry.test.ts
new file mode 100644
index 0000000..524b816
--- /dev/null
+++ b/tests/auth/provider-registry.test.ts
@@ -0,0 +1,98 @@
+import {
+  availableAuthModes,
+  getProvider,
+  listProviders,
+  requiresBaseUrl,
+} from "@/auth/provider-registry";
+import { describe, expect, it } from "vitest";
+
+describe("provider registry", () => {
+  it("lists exactly five providers", () => {
+    const providers = listProviders();
+    expect(providers).toHaveLength(5);
+    const ids = providers.map((p) => p.id);
+    expect(ids).toEqual(["anthropic", "openai", "xai", "gemini", "openai-compat"]);
+  });
+
+  it("every provider has a primary api_key auth mode", () => {
+    for (const p of listProviders()) {
+      expect(p.primaryAuth.type).toBe("api_key");
+    }
+  });
+
+  it("anthropic secondary is oauth_external with billing warning", () => {
+    const def = getProvider("anthropic");
+    expect(def).toBeDefined();
+    expect(def?.secondaryAuth).toBeDefined();
+    expect(def?.secondaryAuth?.type).toBe("oauth_external");
+    expect(def?.secondaryAuth?.hostConstraint).toBe("local_only");
+    expect(def?.secondaryAuth?.billingWarning).toContain("extra_usage");
+    expect(def?.secondaryAuth?.billingWarning).toContain("hermes-agent issue #12905");
+  });
+
+  it("openai secondary is oauth_device_code (works anywhere)", () => {
+    const def = getProvider("openai");
+    expect(def).toBeDefined();
+    expect(def?.secondaryAuth).toBeDefined();
+    expect(def?.secondaryAuth?.type).toBe("oauth_device_code");
+    expect(def?.secondaryAuth?.hostConstraint).toBe("any");
+  });
+
+  it("xai has no secondary auth", () => {
+    const def = getProvider("xai");
+    expect(def).toBeDefined();
+    expect(def?.secondaryAuth).toBeUndefined();
+  });
+
+  it("gemini secondary is oauth_external (local only)", () => {
+    const def = getProvider("gemini");
+    expect(def).toBeDefined();
+    expect(def?.secondaryAuth).toBeDefined();
+    expect(def?.secondaryAuth?.type).toBe("oauth_external");
+    expect(def?.secondaryAuth?.hostConstraint).toBe("local_only");
+    expect(def?.secondaryAuth?.billingWarning).toBeUndefined();
+  });
+
+  it("openai-compat requires baseUrl", () => {
+    expect(requiresBaseUrl("openai-compat")).toBe(true);
+    expect(requiresBaseUrl("openai")).toBe(false);
+    expect(requiresBaseUrl("anthropic")).toBe(false);
+  });
+
+  it("availableAuthModes returns primary + secondary", () => {
+    const modes = availableAuthModes("anthropic");
+    expect(modes).toHaveLength(2);
+    expect(modes[0]?.type).toBe("api_key");
+    expect(modes[1]?.type).toBe("oauth_external");
+  });
+
+  it("availableAuthModes returns only primary when no secondary", () => {
+    const modes = availableAuthModes("xai");
+    expect(modes).toHaveLength(1);
+    expect(modes[0]?.type).toBe("api_key");
+  });
+
+  it("getProvider returns undefined for unknown id", () => {
+    expect(getProvider("unknown" as "xai")).toBeUndefined();
+  });
+
+  it("all oauth_external modes are local_only (hard constraint #3)", () => {
+    for (const p of listProviders()) {
+      for (const m of availableAuthModes(p.id)) {
+        if (m.type === "oauth_external") {
+          expect(m.hostConstraint).toBe("local_only");
+        }
+      }
+    }
+  });
+
+  it("api_key and oauth_device_code modes work anywhere (hard constraint #3)", () => {
+    for (const p of listProviders()) {
+      for (const m of availableAuthModes(p.id)) {
+        if (m.type === "api_key" || m.type === "oauth_device_code") {
+          expect(m.hostConstraint).toBe("any");
+        }
+      }
+    }
+  });
+});