diff --git a/README.md b/README.md index c73c70b..af05532 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ strand init # first-run wizard — pick provider, store key strand doctor # preflight health check strand run "summarize the README and commit a rewrite" # one-shot agentic plan strand tui # welcome splash · [d] live dashboard +strand tui -d # cockpit — live operator dashboard strand status # orchestrator + reasoner/consolidator summary strand tasks list # persisted TaskGraphs strand tasks show # graph + steps + reflections @@ -37,6 +38,39 @@ Most agent bills come from busted prefix caches — children and retries that ch - Every adapter's `chat.call` log now includes `cache_ratio` + `prompt_cache_key` so you can watch for drift in real time. - `strand cache` aggregates `reasoner_runs.usage_json` over a window, shows the hit rate, and flags drift when it drops below 30 % over 5 + ticks. +### Cockpit (`strand tui -d`) + +The dashboard (cockpit) is the recommended daily terminal for operators. It is a read-only live view over the local SQLite ops DB — it never writes. + +```bash +# Launch the cockpit directly +strand tui -d + +# Custom poll cadence (default 2000ms) +strand tui -d --poll-ms 5000 +``` + +**Pinned terminal setup:** pin a dedicated terminal tab/pane to the cockpit so it runs alongside your editor. In tmux: + +```bash +# Split pane and launch cockpit +tmux split-window -h 'strand tui -d' +``` + +**Keyboard shortcuts:** + +| Key | Action | +| --------- | ------------------------------- | +| `↑` / `↓` | Select graph or scroll invocations | +| `Enter` | Expand / collapse selected graph | +| `Tab` | Switch focus: graphs ↔ invocations | +| `r` | Manual refresh all panes | +| `p` | Pause / resume auto-polling | +| `w` | Back to welcome splash | +| `q` | Quit | + +The cockpit shows: active task graphs with step-level progress, reasoner + consolidator 24h stats (ticks, cost, completion), a scrollable tool-invocation stream, and X API health per action kind. + ## Architecture ``` diff --git a/docs/RUNBOOK.md b/docs/RUNBOOK.md index cd8a1f4..58fac53 100644 --- a/docs/RUNBOOK.md +++ b/docs/RUNBOOK.md @@ -2,6 +2,67 @@ Operational procedures for running Strand in Phase 1 read-only mode. +## Cockpit — Daily Operator Terminal + +The cockpit (`strand tui -d`) is a read-only live dashboard over the local +SQLite ops DB. It polls task graphs, tool invocations, reasoner/consolidator +stats, and X API health. It never writes to the database. + +### Launching + +```bash +# Cockpit with default 2s poll +strand tui -d + +# Slower poll for low-resource hosts +strand tui -d --poll-ms 5000 +``` + +### Recommended pinned terminal + +Keep the cockpit visible at all times during operation. Suggested setups: + +```bash +# tmux — dedicated right pane +tmux split-window -h 'strand tui -d' + +# iTerm2 / Terminal.app — pin a tab titled "cockpit" +# Alacritty / kitty — use your tiling WM to pin +``` + +### Keyboard shortcuts + +| Key | Action | +| --------- | ----------------------------------- | +| `↑` / `↓` | Select graph / scroll invocations | +| `Enter` | Expand / collapse selected graph | +| `Tab` | Switch focus: graphs ↔ invocations | +| `r` | Manual refresh all panes | +| `p` | Pause / resume auto-polling | +| `w` | Back to welcome splash | +| `q` | Quit | + +### What to watch + +- **Active task graphs**: step-level status, duration, errors. +- **Reasoner 24h stats**: tick count, candidate count, cost. Cost should stay + well below daily budget (`strand budget`). +- **Consolidator**: completed vs failed vs queued. Sustained failures need + investigation. +- **Tool invocations**: real-time stream of agent tool calls with durations. + Errors highlighted in red. +- **X API health** (per action kind): latest status + 24h ok/fail counts. + A `429` error on any endpoint means the Actor circuit breaker should engage. + +### Troubleshooting + +If the cockpit shows all zeros after the orchestrator has been running: +1. Verify the orchestrator is running: `pnpm strand status` +2. Check `DATABASE_PATH` points to the same DB the orchestrator uses. +3. Restart the cockpit: `q` then re-launch. + +--- + ## Phase 1: 48h Sanity Check Run ### Prerequisites @@ -28,7 +89,8 @@ echo "Started: $(date -u +%Y-%m-%dT%H:%M:%SZ)" > ./data/phase1-run.log ### Monitoring During Run -Every 4 hours, run: +Keep `strand tui -d` (the cockpit) running in a pinned terminal pane for +continuous visibility. Additionally, every 4 hours, run: ```bash # Quick status check diff --git a/src/cli/tui/hooks.ts b/src/cli/tui/hooks.ts index d87d201..ffec94e 100644 --- a/src/cli/tui/hooks.ts +++ b/src/cli/tui/hooks.ts @@ -45,12 +45,29 @@ export interface RunSummary { }; } +export interface XHealthEntry { + kind: string; + lastStatus: string; + lastErrorCode: string | null; + lastAt: string; + ok24h: number; + fail24h: number; +} + +export interface OperatorSnapshot { + graphs: TaskGraph[]; + invocations: InvocationRow[]; + summary: RunSummary; + xHealth: XHealthEntry[]; +} + // ─── DataSource interface ─────────────────────────────────────────────────── export interface TuiDataSource { listActiveTaskGraphs(): TaskGraph[]; recentInvocations(limit: number): InvocationRow[]; runSummary24h(): RunSummary; + xHealth(): XHealthEntry[]; } // ─── SQLite-backed data source ────────────────────────────────────────────── @@ -105,6 +122,21 @@ interface ConsolidatorRow { n: number; } +interface XHealthRow { + kind: string; + last_status: string; + last_error_code: string | null; + last_at: string; + ok_24h: number | null; + fail_24h: number | null; +} + +function safeIso(v: string | null): string | undefined { + if (v == null) return undefined; + const t = new Date(v).getTime(); + return Number.isFinite(t) ? v : undefined; +} + function stepFromRow(r: StepRow): PlanStep { const step: PlanStep = { id: r.id, @@ -115,8 +147,10 @@ function stepFromRow(r: StepRow): PlanStep { createdAt: r.created_at, updatedAt: r.updated_at, }; - if (r.started_at != null) step.startedAt = r.started_at; - if (r.completed_at != null) step.completedAt = r.completed_at; + const sa = safeIso(r.started_at); + if (sa) step.startedAt = sa; + const ca = safeIso(r.completed_at); + if (ca) step.completedAt = ca; if (r.error != null) step.error = r.error; return step; } @@ -145,6 +179,26 @@ export function makeSqliteDataSource(database?: Database.Database): TuiDataSourc WHERE created_at >= datetime('now','-24 hours') GROUP BY status`, ); + const qXHealth = dbi.prepare( + `SELECT + kind, + status AS last_status, + error_code AS last_error_code, + created_at AS last_at, + (SELECT COUNT(*) FROM action_log a2 + WHERE a2.kind = a1.kind AND a2.status = 'executed' + AND a2.created_at >= datetime('now','-24 hours')) AS ok_24h, + (SELECT COUNT(*) FROM action_log a2 + WHERE a2.kind = a1.kind AND a2.status = 'failed' + AND a2.created_at >= datetime('now','-24 hours')) AS fail_24h + FROM action_log a1 + WHERE a1.rowid IN ( + SELECT MAX(rowid) FROM action_log + WHERE executed_at IS NOT NULL OR status = 'failed' + GROUP BY kind + ) + ORDER BY a1.created_at DESC`, + ); return { listActiveTaskGraphs(): TaskGraph[] { @@ -204,6 +258,17 @@ export function makeSqliteDataSource(database?: Database.Database): TuiDataSourc }, }; }, + xHealth(): XHealthEntry[] { + const rows = qXHealth.all() as XHealthRow[]; + return rows.map((r) => ({ + kind: r.kind, + lastStatus: r.last_status, + lastErrorCode: r.last_error_code, + lastAt: r.last_at, + ok24h: r.ok_24h ?? 0, + fail24h: r.fail_24h ?? 0, + })); + }, }; } @@ -280,3 +345,37 @@ export function useRecentInvocations(limit = 50, pollMs = 1000): PollState(() => src.recentInvocations(limit), [], pollMs); } + +export function useXHealth(pollMs = 10_000): PollState { + const src = useDataSource(); + return usePolled(() => src.xHealth(), [], pollMs); +} + +export function useOperatorSnapshot(pollMs = 2000): PollState { + const src = useDataSource(); + const initial: OperatorSnapshot = { + graphs: [], + invocations: [], + summary: { + reasoner: { + ticks: 0, + candidates: 0, + toolCalls: 0, + costUsdTicks: 0, + avgDurationMsEstimate: 0, + }, + consolidator: { total: 0, completed: 0, failed: 0, queued: 0, inProgress: 0 }, + }, + xHealth: [], + }; + return usePolled( + () => ({ + graphs: src.listActiveTaskGraphs(), + invocations: src.recentInvocations(50), + summary: src.runSummary24h(), + xHealth: src.xHealth(), + }), + initial, + pollMs, + ); +} diff --git a/tests/cli/cli.test.ts b/tests/cli/cli.test.ts index fd7fca1..e278419 100644 --- a/tests/cli/cli.test.ts +++ b/tests/cli/cli.test.ts @@ -154,4 +154,19 @@ describe("strand CLI", () => { expect(code).toBe(0); expect(stdout).toContain("no pending reviews"); }); + + it("`tui --help` shows --dashboard and --poll-ms options", () => { + const { code, stdout } = runCli(["tui", "--help"]); + expect(code).toBe(0); + expect(stdout).toContain("--dashboard"); + expect(stdout).toContain("--poll-ms"); + }); + + it("`budget` on an empty DB exits 0", () => { + const { code } = runCli(["budget"], { + env: { DATABASE_PATH: tmpDb }, + cwd: process.cwd(), + }); + expect(code).toBe(0); + }); }); diff --git a/tests/cli/tui.test.ts b/tests/cli/tui.test.ts index f13ca94..14cc74a 100644 --- a/tests/cli/tui.test.ts +++ b/tests/cli/tui.test.ts @@ -13,14 +13,18 @@ import type { TaskGraph } from "@/agent/types"; import { DataSourceContext, type InvocationRow, + type OperatorSnapshot, type RunSummary, type TuiDataSource, + type XHealthEntry, } from "@/cli/tui/hooks"; import { Dashboard } from "@/cli/tui/index"; import { render } from "ink-testing-library"; import { createElement } from "react"; import { describe, expect, it } from "vitest"; +// ─── Helpers ──────────────────────────────────────────────────────────────── + function makeStubSource(): TuiDataSource { const graph: TaskGraph = { id: "7e3c1234-abcd-4000-8000-000000000000", @@ -87,24 +91,68 @@ function makeStubSource(): TuiDataSource { consolidator: { total: 7, completed: 2, failed: 1, queued: 4, inProgress: 0 }, }; + const health: XHealthEntry[] = [ + { + kind: "reply", + lastStatus: "executed", + lastErrorCode: null, + lastAt: "2026-04-20T15:03:00.000Z", + ok24h: 12, + fail24h: 0, + }, + { + kind: "like", + lastStatus: "failed", + lastErrorCode: "429", + lastAt: "2026-04-20T15:02:00.000Z", + ok24h: 30, + fail24h: 2, + }, + ]; + return { listActiveTaskGraphs: () => [graph], recentInvocations: () => invocations, runSummary24h: () => summary, + xHealth: () => health, }; } +function makeEmptySource(): TuiDataSource { + return { + listActiveTaskGraphs: () => [], + recentInvocations: () => [], + runSummary24h: () => ({ + reasoner: { + ticks: 0, + candidates: 0, + toolCalls: 0, + costUsdTicks: 0, + avgDurationMsEstimate: 0, + }, + consolidator: { total: 0, completed: 0, failed: 0, queued: 0, inProgress: 0 }, + }), + xHealth: () => [], + }; +} + +function renderDashboard(source: TuiDataSource): { frame: string; unmount: () => void } { + const tree = createElement( + DataSourceContext.Provider, + { value: source }, + createElement(Dashboard, { pollMs: 10_000 }), + ); + const { lastFrame, unmount } = render(tree); + return { frame: lastFrame() ?? "", unmount }; +} + +// ─── Tests ────────────────────────────────────────────────────────────────── + describe("strand tui dashboard", () => { it("renders a non-empty frame with mocked data", () => { const source = makeStubSource(); - const tree = createElement( - DataSourceContext.Provider, - { value: source }, - createElement(Dashboard, { pollMs: 10_000 }), - ); - const { lastFrame, unmount } = render(tree); - - const frame = lastFrame() ?? ""; + const { frame, unmount } = renderDashboard(source); + expect(frame.length).toBeGreaterThan(0); // Header renders provider + mode expect(frame).toContain("Strand TUI"); @@ -127,30 +175,87 @@ describe("strand tui dashboard", () => { }); it("does not crash when data sources return empty", () => { - const empty: TuiDataSource = { - listActiveTaskGraphs: () => [], - recentInvocations: () => [], - runSummary24h: () => ({ - reasoner: { - ticks: 0, - candidates: 0, - toolCalls: 0, - costUsdTicks: 0, - avgDurationMsEstimate: 0, - }, - consolidator: { total: 0, completed: 0, failed: 0, queued: 0, inProgress: 0 }, - }), - }; - const tree = createElement( - DataSourceContext.Provider, - { value: empty }, - createElement(Dashboard, { pollMs: 10_000 }), - ); - const { lastFrame, unmount } = render(tree); - const frame = lastFrame() ?? ""; + const { frame, unmount } = renderDashboard(makeEmptySource()); + expect(frame).toContain("Strand TUI"); expect(frame).toContain("(no active graphs)"); expect(frame).toContain("(no invocations yet)"); + // Reasoner 0 ticks renders cleanly (no NaN) + expect(frame).not.toContain("NaN"); + expect(frame).toContain("0 ticks"); + expect(frame).toContain("$0.00"); + // Consolidator zero + expect(frame).toContain("0 runs"); + unmount(); }); + + it("no NaN in rendered output for populated data", () => { + const { frame, unmount } = renderDashboard(makeStubSource()); + expect(frame).not.toContain("NaN"); + unmount(); + }); +}); + +// ─── OperatorSnapshot shape ───────────────────────────────────────────────── + +describe("OperatorSnapshot shape", () => { + it("assembles all fields from a populated source", () => { + const source = makeStubSource(); + const snap: OperatorSnapshot = { + graphs: source.listActiveTaskGraphs(), + invocations: source.recentInvocations(50), + summary: source.runSummary24h(), + xHealth: source.xHealth(), + }; + expect(snap.graphs).toHaveLength(1); + expect(snap.invocations).toHaveLength(2); + expect(snap.summary.reasoner.ticks).toBe(42); + expect(snap.xHealth).toHaveLength(2); + expect(snap.xHealth[0]?.kind).toBe("reply"); + expect(snap.xHealth[1]?.lastErrorCode).toBe("429"); + }); + + it("assembles cleanly from an empty source", () => { + const source = makeEmptySource(); + const snap: OperatorSnapshot = { + graphs: source.listActiveTaskGraphs(), + invocations: source.recentInvocations(50), + summary: source.runSummary24h(), + xHealth: source.xHealth(), + }; + expect(snap.graphs).toHaveLength(0); + expect(snap.invocations).toHaveLength(0); + expect(snap.summary.reasoner.ticks).toBe(0); + expect(snap.summary.reasoner.costUsdTicks).toBe(0); + expect(snap.summary.consolidator.total).toBe(0); + expect(snap.xHealth).toHaveLength(0); + }); +}); + +// ─── X health dedup ───────────────────────────────────────────────────────── + +describe("X health dedup", () => { + it("returns one entry per kind (no duplicates)", () => { + const source = makeStubSource(); + const health = source.xHealth(); + const kinds = health.map((h) => h.kind); + expect(new Set(kinds).size).toBe(kinds.length); + }); + + it("returns empty array from empty source", () => { + const source = makeEmptySource(); + expect(source.xHealth()).toEqual([]); + }); + + it("includes ok/fail counts", () => { + const source = makeStubSource(); + const health = source.xHealth(); + for (const entry of health) { + expect(typeof entry.ok24h).toBe("number"); + expect(typeof entry.fail24h).toBe("number"); + expect(Number.isFinite(entry.ok24h)).toBe(true); + expect(Number.isFinite(entry.fail24h)).toBe(true); + } + }); });