diff --git a/.claude/skills/clio-testing/SKILL.md b/.claude/skills/clio-testing/SKILL.md
new file mode 100644
index 0000000..7428bf3
--- /dev/null
+++ b/.claude/skills/clio-testing/SKILL.md
@@ -0,0 +1,254 @@
+---
+name: clio-testing
+description: Use when working on clio-coder - writing or modifying any code under src/, or verifying features end-to-end. Covers the four-layer test suite (unit / integration / boundaries / e2e), the pty harness for driving the interactive TUI, and the iteration loop for feature development. Activate on any src/ edit, before committing, or when the user asks about testing, verifying, probing, or whether clio works.
+allowed-tools: Bash, Read, Edit, Write, Glob, Grep
+---
+
+# clio-coder testing - what runs, what to use, how to iterate
+
+The repo replaced ~14,700 lines of `scripts/diag-*.ts` theater with a minimal node:test suite (~1,700 lines). Everything lives under `tests/`. This skill tells you how to use it.
+
+## Topology
+
+```
+tests/
+├── unit/           pure logic, no I/O, <500ms total
+├── integration/    real fs ops in a scratch XDG home (ledger, credentials)
+├── boundaries/     static analysis of src/ (import rules + prompt fragments)
+├── e2e/            real `clio` binary via spawn (non-interactive) + node-pty (TUI)
+└── harness/
+    ├── spawn.ts    runCli() + makeScratchHome() for `--version`, `doctor`, ...
+    └── pty.ts      spawnClioPty() with send/expect/wait/kill for the TUI
+```
+
+## Commands
+
+```bash
+npm run typecheck          # tsc -p tsconfig.tests.json, includes tests/
+npm run lint               # biome check .
+npm run test:unit          # pure logic only, no filesystem or subprocesses
+npm run test:integration   # filesystem, XDG, subprocess, and domain wiring
+npm run test:boundaries    # import invariants and prompt-fragment checks
+npm run test               # unit + integration + boundaries
+npm run test:e2e           # builds first, then spawn + pty e2e
+npm run ci:precommit       # typecheck + lint + boundaries + unit
+npm run ci:fast            # ci:precommit + integration + build
+npm run ci:full            # ci:fast + e2e without rebuilding
+npm run ci                 # alias for ci:fast
+```
+
+`npm run ci` is what the default pull request workflow runs. `npm run ci:full` is the nightly and tag-push gate that includes the pty e2e suite.
+
+## Which layer catches what
+
+| Change site | Run this first | Why |
+|---|---|---|
+| `src/domains/<x>/*.ts` pure logic | `npm run test:unit` | covered by unit tests |
+| `src/domains/dispatch/state.ts` | `npm run test:integration` | ledger integration test |
+| `src/domains/providers/credentials.ts` | `npm run test:integration` | credentials integration test |
+| `src/domains/prompts/fragments/*.md` | `npm run test:boundaries` | `boundaries/prompts.test.ts` validates frontmatter + budgets |
+| any `src/` import change | `npm run test` | `boundaries/boundaries.test.ts` enforces rule1/2/3 |
+| `src/cli/*.ts` | `npm run test:e2e` (non-interactive part) | spawn harness |
+| `src/interactive/*.ts` or `src/entry/orchestrator.ts` | `npm run test:e2e` (pty part) | pty harness |
+
+## Boundary rules you must not break
+
+`tests/boundaries/check-boundaries.ts` enforces three rules. If `npm run test` reports violations:
+
+- **rule1**: only `src/engine/**` may value-import `@mariozechner/pi-*`. Type-only imports are allowed where they erase at compile time; keep them narrow and prefer existing Clio contracts or `src/engine/types.ts` re-exports when they already cover the need.
+- **rule2**: `src/worker/**` never imports `src/domains/**`. Shared types go through contracts.
+- **rule3**: `src/domains/<x>/**` never imports `src/domains/<y>/extension.ts`. Cross-domain access goes through the contract exported from `src/domains/<y>/index.ts`.
+
+## Writing new unit tests
+
+Use `node:test` + `node:assert/strict`. One file per domain cluster, grouped by `describe`:
+
+```ts
+import { strictEqual } from "node:assert/strict";
+import { describe, it } from "node:test";
+import { myExport } from "../../src/domains/x/y.js";   // note .js extension
+
+describe("x/y", () => {
+	it("does the thing", () => {
+		strictEqual(myExport(input), expected);
+	});
+});
+```
+
+**Pitfalls**:
+- Import paths end in `.js`, not `.ts` (NodeNext module resolution).
+- `tsconfig.tests.json` uses strict + `noUncheckedIndexedAccess` + `exactOptionalPropertyTypes`. Array access returns `T | undefined`; narrow before use.
+- Biome bans `delete obj.key` on hot paths. Use `Reflect.deleteProperty(obj, "key")` when you genuinely need to delete (e.g. cleaning `process.env` in a test).
+
+## Writing new integration tests
+
+If a test touches the filesystem, use a scratch XDG home. Pattern:
+
+```ts
+const ORIGINAL_ENV = { ...process.env };
+let scratch: string;
+
+beforeEach(() => {
+	scratch = mkdtempSync(join(tmpdir(), "clio-myfeat-"));
+	process.env.CLIO_HOME = scratch;
+	process.env.CLIO_DATA_DIR = join(scratch, "data");
+	process.env.CLIO_CONFIG_DIR = join(scratch, "config");
+	process.env.CLIO_CACHE_DIR = join(scratch, "cache");
+	resetXdgCache();
+});
+
+afterEach(() => {
+	for (const k of Object.keys(process.env)) {
+		if (!(k in ORIGINAL_ENV)) Reflect.deleteProperty(process.env, k);
+	}
+	for (const [k, v] of Object.entries(ORIGINAL_ENV)) {
+		if (v !== undefined) process.env[k] = v;
+	}
+	rmSync(scratch, { recursive: true, force: true });
+	resetXdgCache();
+});
+```
+
+`resetXdgCache()` from `src/core/xdg.js` clears the module-level cache so env overrides take effect.
+
+## Writing new e2e tests (non-interactive)
+
+Use `tests/harness/spawn.ts`. The harness auto-runs from repo root and points at `dist/cli/index.js`, so **you must build before running** (`npm run test:e2e` does this automatically).
+
+```ts
+import { makeScratchHome, runCli } from "../harness/spawn.js";
+
+const scratch = makeScratchHome();
+try {
+	await runCli(["doctor", "--fix"], { env: scratch.env });   // bootstrap
+	const result = await runCli(["my-command", "--json"], {
+		env: scratch.env,
+		timeoutMs: 20_000,
+	});
+	strictEqual(result.code, 0);
+	const parsed = JSON.parse(result.stdout);
+	ok(Array.isArray(parsed));
+} finally {
+	scratch.cleanup();
+}
+```
+
+`runCli` returns `{ code, signal, stdout, stderr }`. Always pass `env: scratch.env` so the test doesn't pollute or depend on the user's real XDG home.
+
+## Writing new e2e tests (interactive TUI)
+
+Use `tests/harness/pty.ts`. `spawnClioPty()` returns:
+
+```ts
+interface PtyHandle {
+	send(keys: string): void;                                              // write raw bytes to the tty
+	expect(pattern: RegExp | string, timeoutMs?: number): Promise<string>; // wait for a match
+	output(): string;                                                      // full buffer so far (includes ANSI)
+	wait(timeoutMs?: number): Promise<{ code, signal }>;                   // resolve on process exit
+	kill(signal?: string): void;
+	resize(cols: number, rows: number): void;
+}
+```
+
+Minimal smoke pattern:
+
+```ts
+const scratch = makeScratchHome();
+await runCli(["doctor", "--fix"], { env: scratch.env });   // bootstrap first
+const p = spawnClioPty({ env: scratch.env });
+try {
+	await p.expect(/Clio Coder/, 15_000);
+	p.send("/quit\r");                                // \r for Enter in a pty
+	const exit = await p.wait(10_000);
+	strictEqual(exit.code, 0);
+} finally {
+	p.kill();
+	scratch.cleanup();
+}
+```
+
+**Keystrokes to know**:
+- `\r` is Enter
+- `\x1b` is Escape (closes overlays)
+- `\x04` is Ctrl-D (shuts down the TUI)
+- `\x03` is Ctrl-C (raises SIGINT; passes through from overlays)
+- `\x1b[A`/`\x1b[B`/`\x1b[C`/`\x1b[D` are arrow keys (up/down/right/left)
+
+**Slash commands available**: `/quit`, `/help`, `/hotkeys`, `/run [options] <agent> <task>`, `/targets`, `/connect [target]`, `/disconnect [target]`, `/cost`, `/receipts [verify <runId>]`, `/thinking`, `/model [pattern[:thinking]]`, `/scoped-models`, `/settings`, `/resume`, `/new`, `/tree`, `/fork`, `/compact [instructions]`. Unknown `/foo` is routed as chat input.
+
+**Pitfalls**:
+- `expect` patterns match against the raw pty buffer, which contains ANSI escape sequences. Match by stable text (e.g. `/Clio Coder/`, `/Total:\s+\$0\.00/`) rather than exact layout. Strip ANSI only if you're formatting output for display (`.replace(/\x1b\[[0-9;?]*[A-Za-z]/g, "")`).
+- Always wrap in `try/finally` with `p.kill()` so a hung pty doesn't leak.
+- Always `await runCli(["doctor", "--fix"], ...)` before spawning the TUI on a scratch home, or first-run paths may hit unexpected states. The `install` subcommand was retired in v0.1.4 and is now rejected with exit 2.
+
+## Using the pty harness as a probe (no test file needed)
+
+When you want to poke at clio interactively to see if something works without writing a permanent test, drop a throwaway script in `/tmp/probe.ts` that imports the harness and runs `npx tsx /tmp/probe.ts`. This is faster than the test framework for one-off exploration. Delete it when done; **don't** leave exploratory scripts under `tests/` or `scripts/` (that's exactly what we eradicated).
+
+Example shape:
+
+```ts
+import { spawnClioPty, makeScratchHome } from "/absolute/path/tests/harness/pty.ts";
+import { runCli } from "/absolute/path/tests/harness/spawn.ts";
+
+const scratch = makeScratchHome();
+await runCli(["doctor", "--fix"], { env: scratch.env });
+const p = spawnClioPty({ env: scratch.env });
+await p.expect(/Clio Coder/);
+p.send("/targets\r");
+await p.expect(/target|endpoint/i);
+console.log("tail:", p.output().slice(-400));
+p.send("/quit\r");
+await p.wait();
+scratch.cleanup();
+```
+
+## The iteration loop
+
+When adding a feature:
+
+1. Write the code.
+2. `npm run ci:precommit` - typecheck, lint, boundaries, and pure unit tests.
+3. `npm run ci:fast` - adds integration and build. This is the default PR gate.
+4. If you touched CLI or TUI code: `npm run test:e2e`. Rebuilds first, then runs spawn + pty suites.
+5. If an e2e test fails and you need to see what the TUI is actually showing, drop a probe script (pattern above) and inspect `p.output()`.
+6. `npm run ci` before committing. Use `npm run ci:full` before release tags or when changing e2e harness behavior.
+
+When debugging:
+
+1. `npm run test -- --test-only` isolates `it.only` / `describe.only` blocks.
+2. `node --import tsx --test 'tests/unit/myfile.test.ts'` runs a single file.
+3. For pty flakiness, bump `timeoutMs` on the failing `expect` call before blaming the harness. TUI paints can lag behind input on slower machines.
+
+## What NOT to do
+
+- **Don't** add `scripts/diag-*.ts` or any `scripts/verify-*.ts`. That's the pattern we eradicated. If it's a test, it belongs under `tests/`. If it's a one-off probe, use `/tmp/` and delete when done.
+- **Don't** bypass boundary rules with `// biome-ignore` or exclude patterns. If rule1/2/3 fires, fix the import; don't silence the check.
+- **Don't** write tests against the simulated TUI (i.e. don't mock `pi-tui` and assert on synthetic frame contents). Test real commands via spawn, or real TUI via pty. The pre-refactor diag scripts spent 2,478 lines simulating the TUI; that coverage is now rightly zero.
+- **Don't** commit with red tests. If a test fails on pre-existing `src/` state that predates your change, report it and ask. Don't delete or skip the test.
+
+## Where tests live, concretely
+
+| Area | File |
+|---|---|
+| safety domain | `tests/unit/safety.test.ts` |
+| dispatch (validation/admission/backoff) | `tests/unit/dispatch.test.ts` |
+| dispatch ledger and worker wiring (fs) | `tests/integration/ledger.test.ts`, `tests/integration/dispatch-concurrency.test.ts` |
+| providers catalog/matcher/resolver | `tests/unit/providers/*.test.ts` |
+| providers registry and knowledge base (fs) | `tests/integration/providers/registry.test.ts`, `tests/integration/providers/knowledge-base.test.ts` |
+| providers credentials (fs) | `tests/integration/credentials.test.ts` |
+| agents frontmatter + fleet parser | `tests/unit/agents.test.ts` |
+| built-in agent files (fs) | `tests/integration/agents-builtins.test.ts` |
+| prompts hash + canonicalJson | `tests/integration/prompts.test.ts` |
+| core tool-names/concurrency | `tests/unit/core.test.ts` |
+| core xdg (fs) | `tests/integration/core-xdg.test.ts` |
+| CLI command internals (fs/XDG) | `tests/integration/cli-*.test.ts` |
+| session, memory, evidence artifacts (fs) | `tests/integration/session.test.ts`, `tests/integration/memory.test.ts`, `tests/integration/evidence-builder.test.ts` |
+| import boundary rules | `tests/boundaries/boundaries.test.ts` |
+| prompt fragment manifests | `tests/boundaries/prompts.test.ts` |
+| non-interactive CLI smoke | `tests/e2e/cli.test.ts` |
+| interactive TUI smoke | `tests/e2e/interactive.test.ts` |
+| spawn harness | `tests/harness/spawn.ts` |
+| pty harness | `tests/harness/pty.ts` |
+
+Add new unit tests next to the closest existing file. Don't create a new file unless you're covering a new domain cluster.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b692d40..8172e03 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,7 +15,7 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  ci:
+  fast:
     runs-on: ubuntu-latest
     timeout-minutes: 10
     steps:
@@ -31,4 +31,4 @@ jobs:
           sudo apt-get update -qq
           sudo apt-get install -y --no-install-recommends fd-find
       - run: npm ci --prefer-offline --no-audit --no-fund
-      - run: npm run ci
+      - run: npm run ci:fast
diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
new file mode 100644
index 0000000..70a4891
--- /dev/null
+++ b/.github/workflows/e2e.yml
@@ -0,0 +1,34 @@
+name: e2e
+
+on:
+  push:
+    tags: ["v*"]
+  schedule:
+    - cron: "17 8 * * *"
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  full:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          persist-credentials: false
+      - uses: actions/setup-node@v6
+        with:
+          node-version: 24
+          cache: npm
+      - name: Install fd-find
+        run: |
+          sudo apt-get update -qq
+          sudo apt-get install -y --no-install-recommends fd-find
+      - run: npm ci --prefer-offline --no-audit --no-fund
+      - run: npm run ci:full
diff --git a/docs/.superpowers/plans/2026-05-03-test-suite-audit.md b/docs/.superpowers/plans/2026-05-03-test-suite-audit.md
new file mode 100644
index 0000000..3bce2ab
--- /dev/null
+++ b/docs/.superpowers/plans/2026-05-03-test-suite-audit.md
@@ -0,0 +1,168 @@
+# clio-coder test suite audit
+
+Date: 2026-05-03
+
+Baseline:
+
+- `npm run test` passed once in 9.37s wall time with 1071 tests.
+- `npm run test:e2e` passed once in 60.64s wall time with 54 tests.
+- Main HEAD at audit start: `a97983454fcfde8cf826a4089570b932dfcfe101`.
+- Boundary invariants are not in scope for softening. The checker enforces engine imports, worker isolation, and domain extension independence in `tests/boundaries/check-boundaries.ts:127`.
+
+## Slowest 20 Tests
+
+| Rank | Test | Baseline wall | Citation | Action | Justification |
+|---:|---|---:|---|---|---|
+| 1 | `memory commands propose from evidence and manage approval state` | 8694.7ms | `tests/e2e/cli.test.ts:369` | KEEP | It is an end-to-end CLI workflow across evidence and memory state, so it belongs in full e2e only. |
+| 2 | `escalates aborted commands that ignore sigterm` | 6530.3ms | `tests/unit/bash-tool-env.test.ts:84` | TIGHTEN | It waits for the production 5s kill grace and can assert the same behavior through an injected shorter grace. |
+| 3 | `old lifecycle command names are rejected` | 6328.5ms | `tests/e2e/cli.test.ts:612` | KEEP | It is slow because it launches the built CLI eight times, but it protects retired public commands at the binary boundary. |
+| 4 | `eval run, report, help, and compare routing are wired` | 6289.6ms | `tests/e2e/cli.test.ts:422` | KEEP | It covers the public CLI path through eval artifacts and evidence, which unit tests do not exercise as a binary. |
+| 5 | `configures the interactive multi-worker profile pool` | 4388.9ms | `tests/integration/cli-configure-targets.test.ts:155` | MOVE | It writes real XDG config and belongs with integration tests. |
+| 6 | `/scoped-models dispatches cleanly and Esc restores the editor` | 3011.0ms | `tests/e2e/interactive.test.ts:283` | KEEP | It verifies a real pty overlay lifecycle with stable loose matching. |
+| 7 | `/resume opens the session picker (possibly empty), Esc closes` | 2785.4ms | `tests/e2e/interactive.test.ts:325` | KEEP | It protects the real pty route for session picker startup. |
+| 8 | `/thinking shows the full level set for a reasoning-capable target` | 2758.2ms | `tests/e2e/interactive.test.ts:457` | KEEP | It checks capability data reaching the real TUI route. |
+| 9 | `/model opens the picker, Esc closes, /quit exits clean` | 2656.7ms | `tests/e2e/interactive.test.ts:114` | KEEP | It is the canonical model picker pty smoke. |
+| 10 | `evidence build, inspect, and list operate on run ledger receipts` | 2600.0ms | `tests/e2e/cli.test.ts:340` | KEEP | It proves the installed CLI can read seeded run receipts and write evidence. |
+| 11 | `/model shows llama.cpp wire model ids and model-specific context windows` | 2579.4ms | `tests/e2e/interactive.test.ts:139` | KEEP | It protects user-visible model metadata in the real picker. |
+| 12 | `model selection is immediately active when reopening the picker` | 2505.4ms | `tests/e2e/interactive.test.ts:173` | KEEP | It catches stale selection state in the live pty TUI. |
+| 13 | `/hotkeys opens the reference, Esc closes` | 2488.0ms | `tests/e2e/interactive.test.ts:358` | KEEP | It is a simple pty smoke for the hotkeys overlay. |
+| 14 | `/targets opens the target overlay, Esc closes, /quit exits clean` | 2476.1ms | `tests/e2e/interactive.test.ts:227` | KEEP | It protects target overlay routing in the live TUI. |
+| 15 | `dashboard shows DEV MODE and footer flips to restart-required on engine edit` | 2463.9ms | `tests/e2e/self-dev.test.ts:110` | KEEP | It covers self-development pty behavior that unit tests cannot prove. |
+| 16 | `/settings opens the settings overlay, Esc closes` | 2417.4ms | `tests/e2e/interactive.test.ts:308` | KEEP | It is the real TUI route for settings overlay startup and close. |
+| 17 | `/new rotates the session and exits clean on /quit` | 2404.5ms | `tests/e2e/interactive.test.ts:344` | KEEP | It protects a session lifecycle command in the live TUI. |
+| 18 | `auth login --api-key is parsed by auth, not the global startup flag` | 2400.2ms | `tests/e2e/cli.test.ts:598` | KEEP | It guards public CLI flag precedence at the binary boundary. |
+| 19 | `Ctrl+L opens the /model picker and Esc closes it` | 2390.9ms | `tests/e2e/interactive.test.ts:260` | KEEP | It protects the pty keybinding route, not only slash command dispatch. |
+| 20 | `/connect opens the target connection selector, Esc closes, /quit exits clean` | 2379.6ms | `tests/e2e/interactive.test.ts:244` | KEEP | It verifies the live selector opens and closes from the command surface. |
+
+## Timing-Sensitive Tests
+
+| Finding | Citation | Action | Justification |
+|---|---|---|---|
+| `TokenBucket` refill slept for 200ms at baseline. | `tests/unit/core.test.ts:52` | TIGHTEN | Injecting a clock keeps the refill behavior exact without scheduler timing. |
+| Bash abort escalation slept for 1500ms and then waited for a 5s grace at baseline. | `tests/unit/bash-tool-env.test.ts:84` | TIGHTEN | A test-local bash tool with a shorter kill grace preserves the SIGTERM to SIGKILL behavior. |
+| Tool abort tests assert elapsed wall-clock thresholds. | `tests/unit/tool-signal.test.ts:58` | KEEP | These cover real abort propagation through bash and fetch, and their thresholds are broad enough for the behavior under test. |
+| Termination budget tests use small real timers. | `tests/unit/termination.test.ts:37` | KEEP | The behavior is specifically about budgeted timeout return, with generous thresholds. |
+| Chat renderer coalescing tests waited for render timers at baseline. | `tests/unit/chat-panel.test.ts:699` | TIGHTEN | The production helper already accepts timer injection, so tests can drive the timer synchronously. |
+| Dispatch concurrency polling uses `Date.now` and a 5ms delay loop. | `tests/integration/dispatch-concurrency.test.ts:50` | MOVE | The file uses scratch data dirs and worker fakes, so it belongs in integration instead of unit. |
+| Session listing forces mtime separation with a 5ms sleep. | `tests/integration/session-listing.test.ts:109` | KEEP | The test checks filesystem mtime ordering and is already in integration. |
+| Interactive pty tests use short sleeps after Esc and selection keys. | `tests/e2e/interactive.test.ts:105` | KEEP | They are wrapped in try/finally and paired with loose stable-text pty matching. |
+
+## pi-tui and Synthetic Frames
+
+| Finding | Citation | Action | Justification |
+|---|---|---|---|
+| No test imports or mocks `@mariozechner/pi-tui` directly. The closest live dependency is file completion through the real slash autocomplete contract. | `tests/integration/slash-autocomplete.test.ts:54` | KEEP | This does not assert on synthetic TUI frames. |
+| Chat panel tests render through the project renderer and assert structural output rather than mocked pi-tui frames. | `tests/unit/chat-panel.test.ts:523` | KEEP | They exercise Clio rendering boundaries without simulating the full TUI. |
+| Keybinding tests mention pi-tui defaults but do not mock frame output. | `tests/unit/keybindings.test.ts:29` | KEEP | They validate configuration data rather than terminal frames. |
+
+## Duplicate Coverage Across Layers
+
+| Behavior | Citations | Action | Justification |
+|---|---|---|---|
+| Model capability wiring appears in unit, integration, and e2e. | `tests/unit/model-selector.test.ts:158`, `tests/integration/providers/endpoint-lifecycle.test.ts:194`, `tests/e2e/interactive.test.ts:139` | KEEP | Each layer checks a different boundary: row construction, provider cache reset, and live picker display. |
+| Evidence and memory behavior appears in integration and e2e. | `tests/integration/evidence-builder.test.ts:21`, `tests/integration/memory.test.ts:125`, `tests/e2e/cli.test.ts:340`, `tests/e2e/cli.test.ts:369` | KEEP | Integration tests own artifact transformations while e2e owns public command routing. |
+| Context file suppression is covered by integration and e2e. | `tests/integration/prompts.test.ts:265`, `tests/e2e/cli.test.ts:98` | KEEP | Integration covers prompt assembly with filesystem inputs and e2e covers the top-level flag reaching orchestrator boot. |
+| Configure and targets are covered by direct command tests and binary e2e. | `tests/integration/cli-configure-targets.test.ts:36`, `tests/e2e/cli.test.ts:173` | MOVE | The direct command tests are integration-shaped XDG tests and should not count against unit. |
+
+## Integration-Shaped Tests Under Unit
+
+| Finding | Citation | Action | Justification |
+|---|---|---|---|
+| CLI auth/configure/reset command tests write scratch XDG state and shims. | `tests/integration/cli-auth.test.ts:74`, `tests/integration/cli-configure-targets.test.ts:40`, `tests/integration/cli-reset-uninstall.test.ts:18` | MOVE | They are filesystem-backed command integration tests. |
+| Dispatch auth/concurrency tests allocate real data dirs and exercise domain wiring. | `tests/integration/dispatch-auth.test.ts:28`, `tests/integration/dispatch-concurrency.test.ts:229` | MOVE | They are domain integration tests, not pure unit tests. |
+| Evidence, eval, memory, receipt, session, and self-dev tests write real artifacts. | `tests/integration/evidence-builder.test.ts:26`, `tests/integration/eval-runner.test.ts:19`, `tests/integration/memory.test.ts:28`, `tests/integration/receipt-verify.test.ts:80`, `tests/integration/session.test.ts:393`, `tests/integration/self-dev.test.ts:126` | MOVE | Their assertions depend on filesystem state. |
+| Workspace and git tests spawn `git` or inspect real project directories. | `tests/integration/utils-git.test.ts:20`, `tests/integration/workspace/git-probe.test.ts:10`, `tests/integration/workspace/snapshot.test.ts:25` | MOVE | Real subprocesses and filesystem probes belong in integration. |
+| Context, component, provider registry, prompts, grep, autocomplete, and tool registry tests create temp files. | `tests/integration/components-scan.test.ts:11`, `tests/integration/context/codewiki/indexer.test.ts:9`, `tests/integration/providers/registry.test.ts:68`, `tests/integration/prompts.test.ts:108`, `tests/integration/tools-grep.test.ts:10`, `tests/integration/slash-autocomplete.test.ts:55`, `tests/integration/tools-registry-wiring.test.ts:605` | MOVE | They are integration-shaped even when they are fast. |
+| `core.test.ts` mixes pure concurrency tests with XDG filesystem tests. | `tests/unit/core.test.ts:24`, `tests/unit/core.test.ts:90` | TIGHTEN | Split XDG checks to integration and keep pure concurrency in unit. |
+
+## CI Workflow Signal
+
+| Finding | Citation | Action | Justification |
+|---|---|---|---|
+| The default workflow has one job and runs whatever `npm run ci` means. | `.github/workflows/ci.yml:17`, `.github/workflows/ci.yml:34` | TIGHTEN | The default PR job should call `ci:fast` explicitly so e2e is not hidden inside the default script. |
+| The workflow has no nightly or tag-only full e2e lane. | `.github/workflows/ci.yml:3` | TIGHTEN | Full pty e2e should run on a separate schedule and tag push path. |
+| The job installs `fd-find` before every gate. | `.github/workflows/ci.yml:29` | KEEP | Some tests and runtime probes use fd when present, and the install is cheap relative to Node setup. |
+
+## Target Shape
+
+Pre-commit gate: `npm run ci:precommit`.
+
+- Includes `npm run typecheck`.
+- Includes `npm run lint`.
+- Includes `npm run test:boundaries`.
+- Includes `npm run test:unit`.
+- Budget: under 30s on a developer laptop.
+
+Pre-push gate and CI default job: `npm run ci:fast`.
+
+- Includes `ci:precommit`.
+- Includes `npm run test:integration`.
+- Includes `npm run build`.
+- Budget: under 3min.
+
+Nightly or tag-push gate: `npm run ci:full`.
+
+- Includes `ci:fast`.
+- Includes `npm run test:e2e:run` without rebuilding.
+- Budget: under 10min.
+
+Scripts:
+
+- `test:unit`: `node --import tsx --test 'tests/unit/**/*.test.ts'`
+- `test:integration`: `node --import tsx --test 'tests/integration/**/*.test.ts'`
+- `test:boundaries`: `node --import tsx --test 'tests/boundaries/**/*.test.ts'`
+- `test:e2e`: `npm run build && npm run test:e2e:run`
+- `ci:precommit`: `npm run typecheck && npm run lint && npm run test:boundaries && npm run test:unit`
+- `ci:fast`: `npm run ci:precommit && npm run test:integration && npm run build`
+- `ci:full`: `npm run ci:fast && npm run test:e2e:run`
+- `ci`: alias for `ci:fast`
+
+Gate file mapping after execution:
+
+- Pre-commit boundaries: every file under `tests/boundaries/**/*.test.ts`.
+- Pre-commit unit: every remaining file under `tests/unit/**/*.test.ts`, limited to pure logic with no filesystem or subprocess setup.
+- Pre-push integration: every file under `tests/integration/**/*.test.ts`, including the moved filesystem, XDG, git, and command tests from `tests/unit`.
+- Full e2e: every file under `tests/e2e/**/*.test.ts`.
+
+Moved from unit to integration:
+
+- `tests/unit/agents-builtins.test.ts` -> `tests/integration/agents-builtins.test.ts`
+- `tests/unit/cli-auth.test.ts` -> `tests/integration/cli-auth.test.ts`
+- `tests/unit/cli-configure-targets.test.ts` -> `tests/integration/cli-configure-targets.test.ts`
+- `tests/unit/cli-reset-uninstall.test.ts` -> `tests/integration/cli-reset-uninstall.test.ts`
+- `tests/unit/components-scan.test.ts` -> `tests/integration/components-scan.test.ts`
+- `tests/unit/context/codewiki/indexer.test.ts` -> `tests/integration/context/codewiki/indexer.test.ts`
+- `tests/unit/context/codewiki/tools.test.ts` -> `tests/integration/context/codewiki/tools.test.ts`
+- `tests/unit/context/fingerprint.test.ts` -> `tests/integration/context/fingerprint.test.ts`
+- `tests/unit/context/sibling-files.test.ts` -> `tests/integration/context/sibling-files.test.ts`
+- `tests/unit/context/state.test.ts` -> `tests/integration/context/state.test.ts`
+- `tests/unit/cwd-fallback.test.ts` -> `tests/integration/cwd-fallback.test.ts`
+- `tests/unit/dispatch-auth.test.ts` -> `tests/integration/dispatch-auth.test.ts`
+- `tests/unit/dispatch-concurrency.test.ts` -> `tests/integration/dispatch-concurrency.test.ts`
+- `tests/unit/eval-evidence.test.ts` -> `tests/integration/eval-evidence.test.ts`
+- `tests/unit/eval-runner.test.ts` -> `tests/integration/eval-runner.test.ts`
+- `tests/unit/evidence-builder.test.ts` -> `tests/integration/evidence-builder.test.ts`
+- `tests/unit/memory.test.ts` -> `tests/integration/memory.test.ts`
+- `tests/unit/prompts.test.ts` -> `tests/integration/prompts.test.ts`
+- `tests/unit/providers/knowledge-base.test.ts` -> `tests/integration/providers/knowledge-base.test.ts`
+- `tests/unit/providers/registry.test.ts` -> `tests/integration/providers/registry.test.ts`
+- `tests/unit/receipt-verify.test.ts` -> `tests/integration/receipt-verify.test.ts`
+- `tests/unit/safety-rule-packs.test.ts` -> `tests/integration/safety-rule-packs.test.ts`
+- `tests/unit/self-dev.test.ts` -> `tests/integration/self-dev.test.ts`
+- `tests/unit/session-compaction-entry.test.ts` -> `tests/integration/session-compaction-entry.test.ts`
+- `tests/unit/session.test.ts` -> `tests/integration/session.test.ts`
+- `tests/unit/slash-autocomplete.test.ts` -> `tests/integration/slash-autocomplete.test.ts`
+- `tests/unit/tools-grep.test.ts` -> `tests/integration/tools-grep.test.ts`
+- `tests/unit/tools-registry-wiring.test.ts` -> `tests/integration/tools-registry-wiring.test.ts`
+- `tests/unit/utils-git.test.ts` -> `tests/integration/utils-git.test.ts`
+- `tests/unit/workspace/git-probe.test.ts` -> `tests/integration/workspace/git-probe.test.ts`
+- `tests/unit/workspace/project-type.test.ts` -> `tests/integration/workspace/project-type.test.ts`
+- `tests/unit/workspace/snapshot.test.ts` -> `tests/integration/workspace/snapshot.test.ts`
+
+## Validation Measurements
+
+| Gate | Before | After |
+|---|---:|---:|
+| `npm run ci:precommit` | not split; old `npm run test` was 9.37s without typecheck or lint | 16.87s |
+| `npm run ci:fast` | not split; old default `npm run ci` included e2e | 23.74s |
+| `npm run ci:full` | old `npm run test` 9.37s plus `npm run test:e2e` 60.64s, not measured as one script | 82.06s |
diff --git a/package.json b/package.json
index 9e9ab36..dc17a82 100644
--- a/package.json
+++ b/package.json
@@ -67,10 +67,17 @@
 		"typecheck": "tsc -p tsconfig.tests.json",
 		"format": "biome format --write .",
 		"lint": "biome check .",
-		"check:boundaries": "node --import tsx --test 'tests/boundaries/**/*.test.ts'",
-		"test": "node --import tsx --test 'tests/unit/**/*.test.ts' 'tests/integration/**/*.test.ts' 'tests/boundaries/**/*.test.ts'",
-		"test:e2e": "npm run build && node --import tsx --test 'tests/e2e/**/*.test.ts'",
-		"ci": "npm run typecheck && npm run lint && npm run test && npm run build && npm run test:e2e",
+		"test:unit": "node --import tsx --test 'tests/unit/**/*.test.ts'",
+		"test:integration": "node --import tsx --test 'tests/integration/**/*.test.ts'",
+		"test:boundaries": "node --import tsx --test 'tests/boundaries/**/*.test.ts'",
+		"check:boundaries": "npm run test:boundaries",
+		"test": "npm run test:unit && npm run test:integration && npm run test:boundaries",
+		"test:e2e:run": "node --import tsx --test 'tests/e2e/**/*.test.ts'",
+		"test:e2e": "npm run build && npm run test:e2e:run",
+		"ci:precommit": "npm run typecheck && npm run lint && npm run test:boundaries && npm run test:unit",
+		"ci:fast": "npm run ci:precommit && npm run test:integration && npm run build",
+		"ci:full": "npm run ci:fast && npm run test:e2e:run",
+		"ci": "npm run ci:fast",
 		"hooks:install": "bash scripts/install-hooks.sh",
 		"prepublishOnly": "npm run typecheck && npm run lint && npm run build && node scripts/check-dist.mjs"
 	},
diff --git a/src/core/concurrency.ts b/src/core/concurrency.ts
index 09cf70f..9096d6a 100644
--- a/src/core/concurrency.ts
+++ b/src/core/concurrency.ts
@@ -43,10 +43,11 @@ export class TokenBucket {
 	constructor(
 		private readonly capacity: number,
 		private readonly refillPerSec: number,
+		private readonly now: () => number = Date.now,
 	) {
 		if (capacity < 1) throw new Error("TokenBucket capacity must be >= 1");
 		this.tokens = capacity;
-		this.lastRefillMs = Date.now();
+		this.lastRefillMs = this.now();
 	}
 
 	tryTake(n = 1): boolean {
@@ -57,7 +58,7 @@ export class TokenBucket {
 	}
 
 	private refill(): void {
-		const now = Date.now();
+		const now = this.now();
 		const elapsedSec = (now - this.lastRefillMs) / 1000;
 		if (elapsedSec <= 0) return;
 		this.tokens = Math.min(this.capacity, this.tokens + elapsedSec * this.refillPerSec);
diff --git a/src/tools/bash.ts b/src/tools/bash.ts
index 03667fb..aeb4c63 100644
--- a/src/tools/bash.ts
+++ b/src/tools/bash.ts
@@ -8,6 +8,10 @@ const MAX_OUTPUT_BYTES = 1_000_000;
 const TRUNCATION_MARKER = "\n[output truncated]\n";
 const CLIO_CONTROL_ENV_KEYS = ["CLIO_DEV", "CLIO_SELF_DEV", "CLIO_INTERACTIVE", "CLIO_RESUME_SESSION_ID"] as const;
 
+export interface BashToolOptions {
+	killGraceMs?: number;
+}
+
 function truncate(text: string): string {
 	return truncateUtf8(text, MAX_OUTPUT_BYTES, TRUNCATION_MARKER);
 }
@@ -34,6 +38,7 @@ function execBash(
 	cwd: string | undefined,
 	timeout: number,
 	signal?: AbortSignal,
+	options: BashToolOptions = {},
 ): Promise<ExecOutcome> {
 	return new Promise((resolve) => {
 		let aborted = false;
@@ -79,7 +84,7 @@ function execBash(
 			sendSignal("SIGTERM");
 			killGraceTimer = setTimeout(() => {
 				sendSignal("SIGKILL");
-			}, 5000);
+			}, options.killGraceMs ?? 5000);
 		};
 
 		function onAbort(): void {
@@ -142,61 +147,66 @@ function execBash(
 	});
 }
 
-export const bashTool: ToolSpec = {
-	name: ToolNames.Bash,
-	description:
-		"Execute a bash command in the current working directory via /bin/bash -lc. Returns stdout and stderr. Optionally provide a timeout in seconds (or timeout_ms in milliseconds) and a cwd. Default timeout is 5 minutes (300 seconds), enough for npm install and full ci runs.",
-	parameters: Type.Object({
-		command: Type.String({ description: "Bash command to execute." }),
-		cwd: Type.Optional(
-			Type.String({ description: "Working directory for the command. Defaults to the orchestrator cwd." }),
-		),
-		timeout: Type.Optional(Type.Number({ description: "Timeout in seconds. Alias of timeout_ms. Must be > 0." })),
-		timeout_ms: Type.Optional(
-			Type.Number({ description: "Timeout in milliseconds. Defaults to 300000 (5 min). Must be > 0." }),
-		),
-	}),
-	baseActionClass: "execute",
-	executionMode: "sequential",
-	async run(args, options): Promise<ToolResult> {
-		if (typeof args.command !== "string" || args.command.length === 0) {
-			return { kind: "error", message: "bash: missing command argument" };
-		}
-		const cwd = typeof args.cwd === "string" ? args.cwd : undefined;
-		const timeoutMsArg = typeof args.timeout_ms === "number" && args.timeout_ms > 0 ? args.timeout_ms : null;
-		const timeoutSecArg =
-			timeoutMsArg === null && typeof args.timeout === "number" && args.timeout > 0 ? args.timeout * 1000 : null;
-		const timeout = timeoutMsArg ?? timeoutSecArg ?? 300_000;
-		try {
-			const { error, stdout, stderr, aborted, timedOut, outputCapped } = await execBash(
-				args.command,
-				cwd,
-				timeout,
-				options?.signal,
-			);
-			if (aborted) {
-				return { kind: "error", message: "bash: command aborted" };
+export function createBashTool(toolOptions: BashToolOptions = {}): ToolSpec {
+	return {
+		name: ToolNames.Bash,
+		description:
+			"Execute a bash command in the current working directory via /bin/bash -lc. Returns stdout and stderr. Optionally provide a timeout in seconds (or timeout_ms in milliseconds) and a cwd. Default timeout is 5 minutes (300 seconds), enough for npm install and full ci runs.",
+		parameters: Type.Object({
+			command: Type.String({ description: "Bash command to execute." }),
+			cwd: Type.Optional(
+				Type.String({ description: "Working directory for the command. Defaults to the orchestrator cwd." }),
+			),
+			timeout: Type.Optional(Type.Number({ description: "Timeout in seconds. Alias of timeout_ms. Must be > 0." })),
+			timeout_ms: Type.Optional(
+				Type.Number({ description: "Timeout in milliseconds. Defaults to 300000 (5 min). Must be > 0." }),
+			),
+		}),
+		baseActionClass: "execute",
+		executionMode: "sequential",
+		async run(args, options): Promise<ToolResult> {
+			if (typeof args.command !== "string" || args.command.length === 0) {
+				return { kind: "error", message: "bash: missing command argument" };
 			}
-			if (timedOut) {
-				return { kind: "error", message: `bash: command timed out after ${timeout}ms` };
-			}
-			if (outputCapped) {
-				return { kind: "error", message: `bash: command output exceeded ${MAX_OUTPUT_BYTES * 2} bytes` };
-			}
-			if (error) {
-				const code = typeof error.code === "number" ? error.code : (error as { code?: string }).code;
-				const tail = stderr.length > 0 ? stderr : stdout;
-				const message = `bash: command failed (exit ${code ?? "?"}): ${truncate(tail).trim() || error.message}`;
-				return { kind: "error", message };
+			const cwd = typeof args.cwd === "string" ? args.cwd : undefined;
+			const timeoutMsArg = typeof args.timeout_ms === "number" && args.timeout_ms > 0 ? args.timeout_ms : null;
+			const timeoutSecArg =
+				timeoutMsArg === null && typeof args.timeout === "number" && args.timeout > 0 ? args.timeout * 1000 : null;
+			const timeout = timeoutMsArg ?? timeoutSecArg ?? 300_000;
+			try {
+				const { error, stdout, stderr, aborted, timedOut, outputCapped } = await execBash(
+					args.command,
+					cwd,
+					timeout,
+					options?.signal,
+					toolOptions,
+				);
+				if (aborted) {
+					return { kind: "error", message: "bash: command aborted" };
+				}
+				if (timedOut) {
+					return { kind: "error", message: `bash: command timed out after ${timeout}ms` };
+				}
+				if (outputCapped) {
+					return { kind: "error", message: `bash: command output exceeded ${MAX_OUTPUT_BYTES * 2} bytes` };
+				}
+				if (error) {
+					const code = typeof error.code === "number" ? error.code : (error as { code?: string }).code;
+					const tail = stderr.length > 0 ? stderr : stdout;
+					const message = `bash: command failed (exit ${code ?? "?"}): ${truncate(tail).trim() || error.message}`;
+					return { kind: "error", message };
+				}
+				const combined =
+					stderr.length > 0 ? `${stdout}${stdout.endsWith("\n") || stdout.length === 0 ? "" : "\n"}${stderr}` : stdout;
+				return { kind: "ok", output: truncate(combined) };
+			} catch (err) {
+				const msg = err instanceof Error ? err.message : String(err);
+				return { kind: "error", message: `bash: ${msg}` };
 			}
-			const combined =
-				stderr.length > 0 ? `${stdout}${stdout.endsWith("\n") || stdout.length === 0 ? "" : "\n"}${stderr}` : stdout;
-			return { kind: "ok", output: truncate(combined) };
-		} catch (err) {
-			const msg = err instanceof Error ? err.message : String(err);
-			return { kind: "error", message: `bash: ${msg}` };
-		}
-	},
-};
+		},
+	};
+}
+
+export const bashTool: ToolSpec = createBashTool();
 
 export { buildToolEnv, truncateUtf8 };
diff --git a/tests/unit/agents-builtins.test.ts b/tests/integration/agents-builtins.test.ts
similarity index 100%
rename from tests/unit/agents-builtins.test.ts
rename to tests/integration/agents-builtins.test.ts
diff --git a/tests/unit/cli-auth.test.ts b/tests/integration/cli-auth.test.ts
similarity index 100%
rename from tests/unit/cli-auth.test.ts
rename to tests/integration/cli-auth.test.ts
diff --git a/tests/unit/cli-configure-targets.test.ts b/tests/integration/cli-configure-targets.test.ts
similarity index 100%
rename from tests/unit/cli-configure-targets.test.ts
rename to tests/integration/cli-configure-targets.test.ts
diff --git a/tests/unit/cli-reset-uninstall.test.ts b/tests/integration/cli-reset-uninstall.test.ts
similarity index 100%
rename from tests/unit/cli-reset-uninstall.test.ts
rename to tests/integration/cli-reset-uninstall.test.ts
diff --git a/tests/unit/components-scan.test.ts b/tests/integration/components-scan.test.ts
similarity index 100%
rename from tests/unit/components-scan.test.ts
rename to tests/integration/components-scan.test.ts
diff --git a/tests/unit/context/codewiki/indexer.test.ts b/tests/integration/context/codewiki/indexer.test.ts
similarity index 100%
rename from tests/unit/context/codewiki/indexer.test.ts
rename to tests/integration/context/codewiki/indexer.test.ts
diff --git a/tests/unit/context/codewiki/tools.test.ts b/tests/integration/context/codewiki/tools.test.ts
similarity index 100%
rename from tests/unit/context/codewiki/tools.test.ts
rename to tests/integration/context/codewiki/tools.test.ts
diff --git a/tests/unit/context/fingerprint.test.ts b/tests/integration/context/fingerprint.test.ts
similarity index 100%
rename from tests/unit/context/fingerprint.test.ts
rename to tests/integration/context/fingerprint.test.ts
diff --git a/tests/unit/context/sibling-files.test.ts b/tests/integration/context/sibling-files.test.ts
similarity index 100%
rename from tests/unit/context/sibling-files.test.ts
rename to tests/integration/context/sibling-files.test.ts
diff --git a/tests/unit/context/state.test.ts b/tests/integration/context/state.test.ts
similarity index 100%
rename from tests/unit/context/state.test.ts
rename to tests/integration/context/state.test.ts
diff --git a/tests/integration/core-xdg.test.ts b/tests/integration/core-xdg.test.ts
new file mode 100644
index 0000000..49045c9
--- /dev/null
+++ b/tests/integration/core-xdg.test.ts
@@ -0,0 +1,54 @@
+import { ok, strictEqual } from "node:assert/strict";
+import { mkdtempSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, it } from "node:test";
+import { clioCacheDir, clioConfigDir, clioDataDir, resetXdgCache } from "../../src/core/xdg.js";
+
+describe("core/xdg", () => {
+	const original = {
+		CLIO_HOME: process.env.CLIO_HOME,
+		CLIO_DATA_DIR: process.env.CLIO_DATA_DIR,
+		CLIO_CONFIG_DIR: process.env.CLIO_CONFIG_DIR,
+		CLIO_CACHE_DIR: process.env.CLIO_CACHE_DIR,
+	};
+	let scratch: string;
+
+	beforeEach(() => {
+		scratch = mkdtempSync(join(tmpdir(), "clio-xdg-"));
+		process.env.CLIO_HOME = scratch;
+		process.env.CLIO_DATA_DIR = join(scratch, "data");
+		process.env.CLIO_CONFIG_DIR = join(scratch, "config");
+		process.env.CLIO_CACHE_DIR = join(scratch, "cache");
+		resetXdgCache();
+	});
+
+	afterEach(() => {
+		for (const key of Object.keys(process.env)) {
+			if (!(key in original)) Reflect.deleteProperty(process.env, key);
+		}
+		for (const [key, value] of Object.entries(original)) {
+			if (value !== undefined) process.env[key] = value;
+		}
+		rmSync(scratch, { recursive: true, force: true });
+		resetXdgCache();
+	});
+
+	it("CLIO_HOME routes all three dirs under it", () => {
+		Reflect.deleteProperty(process.env, "CLIO_DATA_DIR");
+		Reflect.deleteProperty(process.env, "CLIO_CONFIG_DIR");
+		Reflect.deleteProperty(process.env, "CLIO_CACHE_DIR");
+		resetXdgCache();
+
+		ok(clioDataDir().startsWith(scratch));
+		ok(clioConfigDir().startsWith(scratch));
+		ok(clioCacheDir().startsWith(scratch));
+	});
+
+	it("individual overrides take precedence over CLIO_HOME", () => {
+		const customData = join(scratch, "custom-data");
+		process.env.CLIO_DATA_DIR = customData;
+		resetXdgCache();
+		strictEqual(clioDataDir(), customData);
+	});
+});
diff --git a/tests/unit/cwd-fallback.test.ts b/tests/integration/cwd-fallback.test.ts
similarity index 100%
rename from tests/unit/cwd-fallback.test.ts
rename to tests/integration/cwd-fallback.test.ts
diff --git a/tests/unit/dispatch-auth.test.ts b/tests/integration/dispatch-auth.test.ts
similarity index 100%
rename from tests/unit/dispatch-auth.test.ts
rename to tests/integration/dispatch-auth.test.ts
diff --git a/tests/unit/dispatch-concurrency.test.ts b/tests/integration/dispatch-concurrency.test.ts
similarity index 100%
rename from tests/unit/dispatch-concurrency.test.ts
rename to tests/integration/dispatch-concurrency.test.ts
diff --git a/tests/unit/eval-evidence.test.ts b/tests/integration/eval-evidence.test.ts
similarity index 100%
rename from tests/unit/eval-evidence.test.ts
rename to tests/integration/eval-evidence.test.ts
diff --git a/tests/unit/eval-runner.test.ts b/tests/integration/eval-runner.test.ts
similarity index 100%
rename from tests/unit/eval-runner.test.ts
rename to tests/integration/eval-runner.test.ts
diff --git a/tests/unit/evidence-builder.test.ts b/tests/integration/evidence-builder.test.ts
similarity index 100%
rename from tests/unit/evidence-builder.test.ts
rename to tests/integration/evidence-builder.test.ts
diff --git a/tests/unit/memory.test.ts b/tests/integration/memory.test.ts
similarity index 100%
rename from tests/unit/memory.test.ts
rename to tests/integration/memory.test.ts
diff --git a/tests/unit/prompts.test.ts b/tests/integration/prompts.test.ts
similarity index 100%
rename from tests/unit/prompts.test.ts
rename to tests/integration/prompts.test.ts
diff --git a/tests/unit/providers/knowledge-base.test.ts b/tests/integration/providers/knowledge-base.test.ts
similarity index 100%
rename from tests/unit/providers/knowledge-base.test.ts
rename to tests/integration/providers/knowledge-base.test.ts
diff --git a/tests/unit/providers/registry.test.ts b/tests/integration/providers/registry.test.ts
similarity index 100%
rename from tests/unit/providers/registry.test.ts
rename to tests/integration/providers/registry.test.ts
diff --git a/tests/unit/receipt-verify.test.ts b/tests/integration/receipt-verify.test.ts
similarity index 100%
rename from tests/unit/receipt-verify.test.ts
rename to tests/integration/receipt-verify.test.ts
diff --git a/tests/unit/safety-rule-packs.test.ts b/tests/integration/safety-rule-packs.test.ts
similarity index 100%
rename from tests/unit/safety-rule-packs.test.ts
rename to tests/integration/safety-rule-packs.test.ts
diff --git a/tests/unit/self-dev.test.ts b/tests/integration/self-dev.test.ts
similarity index 100%
rename from tests/unit/self-dev.test.ts
rename to tests/integration/self-dev.test.ts
diff --git a/tests/unit/session-compaction-entry.test.ts b/tests/integration/session-compaction-entry.test.ts
similarity index 100%
rename from tests/unit/session-compaction-entry.test.ts
rename to tests/integration/session-compaction-entry.test.ts
diff --git a/tests/unit/session.test.ts b/tests/integration/session.test.ts
similarity index 100%
rename from tests/unit/session.test.ts
rename to tests/integration/session.test.ts
diff --git a/tests/unit/slash-autocomplete.test.ts b/tests/integration/slash-autocomplete.test.ts
similarity index 100%
rename from tests/unit/slash-autocomplete.test.ts
rename to tests/integration/slash-autocomplete.test.ts
diff --git a/tests/unit/tools-grep.test.ts b/tests/integration/tools-grep.test.ts
similarity index 100%
rename from tests/unit/tools-grep.test.ts
rename to tests/integration/tools-grep.test.ts
diff --git a/tests/unit/tools-registry-wiring.test.ts b/tests/integration/tools-registry-wiring.test.ts
similarity index 100%
rename from tests/unit/tools-registry-wiring.test.ts
rename to tests/integration/tools-registry-wiring.test.ts
diff --git a/tests/unit/utils-git.test.ts b/tests/integration/utils-git.test.ts
similarity index 100%
rename from tests/unit/utils-git.test.ts
rename to tests/integration/utils-git.test.ts
diff --git a/tests/unit/workspace/git-probe.test.ts b/tests/integration/workspace/git-probe.test.ts
similarity index 100%
rename from tests/unit/workspace/git-probe.test.ts
rename to tests/integration/workspace/git-probe.test.ts
diff --git a/tests/unit/workspace/project-type.test.ts b/tests/integration/workspace/project-type.test.ts
similarity index 100%
rename from tests/unit/workspace/project-type.test.ts
rename to tests/integration/workspace/project-type.test.ts
diff --git a/tests/unit/workspace/snapshot.test.ts b/tests/integration/workspace/snapshot.test.ts
similarity index 100%
rename from tests/unit/workspace/snapshot.test.ts
rename to tests/integration/workspace/snapshot.test.ts
diff --git a/tests/unit/bash-tool-env.test.ts b/tests/unit/bash-tool-env.test.ts
index 1377baf..3b08cc8 100644
--- a/tests/unit/bash-tool-env.test.ts
+++ b/tests/unit/bash-tool-env.test.ts
@@ -1,6 +1,6 @@
 import { ok, strictEqual } from "node:assert/strict";
 import { afterEach, describe, it } from "node:test";
-import { bashTool, buildToolEnv } from "../../src/tools/bash.js";
+import { bashTool, buildToolEnv, createBashTool } from "../../src/tools/bash.js";
 
 const ORIGINAL_ENV = { ...process.env };
 
@@ -83,6 +83,8 @@ describe("bash tool environment", () => {
 
 	it("escalates aborted commands that ignore sigterm", async () => {
 		const controller = new AbortController();
+		const killGraceMs = 50;
+		const testBashTool = createBashTool({ killGraceMs });
 		// The shell runs with `-l`, which on cold CI runners can spend a few
 		// hundred ms reading /etc/profile before reaching the user command. If
 		// SIGTERM arrives before the trap is installed, bash dies at the
@@ -95,7 +97,7 @@ describe("bash tool environment", () => {
 		// monotonic schedule. libuv-backed `setTimeout` already uses the
 		// monotonic clock; the test just needs to read the same source.
 		const startedAt = performance.now();
-		const started = bashTool.run(
+		const started = testBashTool.run(
 			{
 				command: 'trap "" TERM; end=$((SECONDS + 12)); while [ "$SECONDS" -lt "$end" ]; do sleep 1; done',
 				timeout_ms: 16_000,
@@ -108,8 +110,8 @@ describe("bash tool environment", () => {
 
 		strictEqual(result.kind, "error");
 		if (result.kind === "error") strictEqual(result.message, "bash: command aborted");
-		ok(elapsedMs >= 1500 + 4_500, `expected abort escalation after the grace period, got ${elapsedMs}ms`);
-		ok(elapsedMs < 1500 + 9_000, `expected abort escalation within window, got ${elapsedMs}ms`);
+		ok(elapsedMs >= 1500 + killGraceMs - 20, `expected abort escalation after the grace period, got ${elapsedMs}ms`);
+		ok(elapsedMs < 1500 + 2_000, `expected abort escalation within window, got ${elapsedMs}ms`);
 	});
 
 	it("reports output cap exits explicitly", async () => {
diff --git a/tests/unit/chat-panel.test.ts b/tests/unit/chat-panel.test.ts
index a4877aa..e6144a6 100644
--- a/tests/unit/chat-panel.test.ts
+++ b/tests/unit/chat-panel.test.ts
@@ -665,8 +665,10 @@ describe("createCoalescingChatRenderer", () => {
 		renderer: ReturnType<typeof createCoalescingChatRenderer>;
 		applied: ChatLoopEvent[];
 		count: () => number;
+		fireTimer: () => void;
 	} {
 		let renderCount = 0;
+		let pendingTimer: (() => void) | null = null;
 		const applied: ChatLoopEvent[] = [];
 		const fakePanel = {
 			appendUser: () => {},
@@ -688,16 +690,28 @@ describe("createCoalescingChatRenderer", () => {
 				renderCount += 1;
 			},
 			coalesceMs,
+			setTimer: (cb) => {
+				pendingTimer = cb;
+				return cb;
+			},
+			clearTimer: (id) => {
+				if (pendingTimer === id) pendingTimer = null;
+			},
 		});
 		return {
 			renderer,
 			applied,
 			count: () => renderCount,
+			fireTimer: () => {
+				const cb = pendingTimer;
+				pendingTimer = null;
+				cb?.();
+			},
 		};
 	}
 
-	it("coalesces 100 text_delta events into at most two requestRender calls", async () => {
-		const { renderer, applied, count } = setup(5);
+	it("coalesces 100 text_delta events into one requestRender call", () => {
+		const { renderer, applied, count, fireTimer } = setup(5);
 		for (let i = 0; i < 100; i += 1) {
 			renderer.applyEvent({ type: "text_delta", contentIndex: 0, delta: "x", partialText: "x".repeat(i + 1) });
 		}
@@ -706,14 +720,13 @@ describe("createCoalescingChatRenderer", () => {
 		strictEqual(applied.length, 100);
 		// Inside the same microtask the coalesce timer has not fired yet.
 		strictEqual(count(), 0, "no synchronous render under coalescing");
-		// Wait past the coalesce window. One render covers all 100 deltas.
-		await new Promise((r) => setTimeout(r, 25));
-		ok(count() <= 2, `expected at most 2 renders, got ${count()}`);
-		ok(count() >= 1, `expected at least 1 render, got ${count()}`);
+		// One timer render covers all 100 deltas.
+		fireTimer();
+		strictEqual(count(), 1);
 	});
 
-	it("message_end after deltas renders synchronously and drops the pending timer", async () => {
-		const { renderer, count } = setup(5);
+	it("message_end after deltas renders synchronously and drops the pending timer", () => {
+		const { renderer, count, fireTimer } = setup(5);
 		renderer.applyEvent({ type: "text_delta", contentIndex: 0, delta: "hello", partialText: "hello" });
 		// Pending timer scheduled, no synchronous render yet.
 		strictEqual(count(), 0);
@@ -723,26 +736,23 @@ describe("createCoalescingChatRenderer", () => {
 		});
 		// message_end forces a synchronous render and cancels the pending timer.
 		strictEqual(count(), 1);
-		// Coalesce window passes; no extra render fires since the timer was cancelled.
-		await new Promise((r) => setTimeout(r, 25));
+		fireTimer();
 		strictEqual(count(), 1, "pending timer must have been cancelled by message_end");
 	});
 
-	it("agent_end renders synchronously even with no pending deltas", async () => {
+	it("agent_end renders synchronously even with no pending deltas", () => {
 		const { renderer, count } = setup(5);
 		renderer.applyEvent({ type: "agent_end", messages: [] });
 		strictEqual(count(), 1);
-		await new Promise((r) => setTimeout(r, 25));
-		strictEqual(count(), 1);
 	});
 
-	it("flush() drains a pending coalesce timer and renders once", async () => {
-		const { renderer, count } = setup(50);
+	it("flush() drains a pending coalesce timer and renders once", () => {
+		const { renderer, count, fireTimer } = setup(50);
 		renderer.applyEvent({ type: "text_delta", contentIndex: 0, delta: "x", partialText: "x" });
 		strictEqual(count(), 0);
 		renderer.flush();
 		strictEqual(count(), 1);
-		await new Promise((r) => setTimeout(r, 75));
+		fireTimer();
 		strictEqual(count(), 1, "no double-render after explicit flush");
 	});
 
diff --git a/tests/unit/core.test.ts b/tests/unit/core.test.ts
index 79c5f7f..adc85a9 100644
--- a/tests/unit/core.test.ts
+++ b/tests/unit/core.test.ts
@@ -1,11 +1,7 @@
-import { ok, strictEqual, throws } from "node:assert/strict";
-import { mkdtempSync, rmSync } from "node:fs";
-import { tmpdir } from "node:os";
-import { join } from "node:path";
-import { afterEach, beforeEach, describe, it } from "node:test";
+import { strictEqual, throws } from "node:assert/strict";
+import { describe, it } from "node:test";
 import { Semaphore, TokenBucket } from "../../src/core/concurrency.js";
 import { ALL_TOOL_NAMES, ToolNames } from "../../src/core/tool-names.js";
-import { clioCacheDir, clioConfigDir, clioDataDir, resetXdgCache } from "../../src/core/xdg.js";
 
 describe("core/tool-names", () => {
 	it("ALL_TOOL_NAMES matches enum values", () => {
@@ -21,47 +17,6 @@ describe("core/tool-names", () => {
 	});
 });
 
-describe("core/xdg", () => {
-	const original = {
-		CLIO_HOME: process.env.CLIO_HOME,
-		CLIO_DATA_DIR: process.env.CLIO_DATA_DIR,
-		CLIO_CONFIG_DIR: process.env.CLIO_CONFIG_DIR,
-		CLIO_CACHE_DIR: process.env.CLIO_CACHE_DIR,
-	};
-	let scratch: string;
-
-	beforeEach(() => {
-		scratch = mkdtempSync(join(tmpdir(), "clio-xdg-"));
-		process.env.CLIO_HOME = scratch;
-		Reflect.deleteProperty(process.env, "CLIO_DATA_DIR");
-		Reflect.deleteProperty(process.env, "CLIO_CONFIG_DIR");
-		Reflect.deleteProperty(process.env, "CLIO_CACHE_DIR");
-		resetXdgCache();
-	});
-
-	afterEach(() => {
-		for (const [k, v] of Object.entries(original)) {
-			if (v === undefined) Reflect.deleteProperty(process.env, k);
-			else process.env[k] = v;
-		}
-		rmSync(scratch, { recursive: true, force: true });
-		resetXdgCache();
-	});
-
-	it("CLIO_HOME routes all three dirs under it", () => {
-		ok(clioDataDir().startsWith(scratch));
-		ok(clioConfigDir().startsWith(scratch));
-		ok(clioCacheDir().startsWith(scratch));
-	});
-
-	it("individual overrides take precedence over CLIO_HOME", () => {
-		const customData = join(scratch, "custom-data");
-		process.env.CLIO_DATA_DIR = customData;
-		resetXdgCache();
-		strictEqual(clioDataDir(), customData);
-	});
-});
-
 describe("core/concurrency/Semaphore", () => {
 	it("rejects permits < 1", () => {
 		throws(() => new Semaphore(0));
@@ -96,11 +51,12 @@ describe("core/concurrency/TokenBucket", () => {
 		strictEqual(b.tryTake(1), false);
 	});
 
-	it("refills at given rate", async () => {
-		const b = new TokenBucket(2, 10);
+	it("refills at given rate", () => {
+		let now = 1_000;
+		const b = new TokenBucket(2, 10, () => now);
 		b.tryTake(2);
 		strictEqual(b.tryTake(1), false);
-		await new Promise((resolve) => setTimeout(resolve, 200));
+		now += 200;
 		strictEqual(b.tryTake(1), true);
 	});
 });