diff --git a/.claude/skills/clio-testing/SKILL.md b/.claude/skills/clio-testing/SKILL.md new file mode 100644 index 0000000..7428bf3 --- /dev/null +++ b/.claude/skills/clio-testing/SKILL.md @@ -0,0 +1,254 @@ +--- +name: clio-testing +description: Use when working on clio-coder - writing or modifying any code under src/, or verifying features end-to-end. Covers the four-layer test suite (unit / integration / boundaries / e2e), the pty harness for driving the interactive TUI, and the iteration loop for feature development. Activate on any src/ edit, before committing, or when the user asks about testing, verifying, probing, or whether clio works. +allowed-tools: Bash, Read, Edit, Write, Glob, Grep +--- + +# clio-coder testing - what runs, what to use, how to iterate + +The repo replaced ~14,700 lines of `scripts/diag-*.ts` theater with a minimal node:test suite (~1,700 lines). Everything lives under `tests/`. This skill tells you how to use it. + +## Topology + +``` +tests/ +├── unit/ pure logic, no I/O, <500ms total +├── integration/ real fs ops in a scratch XDG home (ledger, credentials) +├── boundaries/ static analysis of src/ (import rules + prompt fragments) +├── e2e/ real `clio` binary via spawn (non-interactive) + node-pty (TUI) +└── harness/ + ├── spawn.ts runCli() + makeScratchHome() for `--version`, `doctor`, ... + └── pty.ts spawnClioPty() with send/expect/wait/kill for the TUI +``` + +## Commands + +```bash +npm run typecheck # tsc -p tsconfig.tests.json, includes tests/ +npm run lint # biome check . +npm run test:unit # pure logic only, no filesystem or subprocesses +npm run test:integration # filesystem, XDG, subprocess, and domain wiring +npm run test:boundaries # import invariants and prompt-fragment checks +npm run test # unit + integration + boundaries +npm run test:e2e # builds first, then spawn + pty e2e +npm run ci:precommit # typecheck + lint + boundaries + unit +npm run ci:fast # ci:precommit + integration + build +npm run ci:full # ci:fast + e2e without rebuilding +npm run ci # alias for ci:fast +``` + +`npm run ci` is what the default pull request workflow runs. `npm run ci:full` is the nightly and tag-push gate that includes the pty e2e suite. + +## Which layer catches what + +| Change site | Run this first | Why | +|---|---|---| +| `src/domains//*.ts` pure logic | `npm run test:unit` | covered by unit tests | +| `src/domains/dispatch/state.ts` | `npm run test:integration` | ledger integration test | +| `src/domains/providers/credentials.ts` | `npm run test:integration` | credentials integration test | +| `src/domains/prompts/fragments/*.md` | `npm run test:boundaries` | `boundaries/prompts.test.ts` validates frontmatter + budgets | +| any `src/` import change | `npm run test` | `boundaries/boundaries.test.ts` enforces rule1/2/3 | +| `src/cli/*.ts` | `npm run test:e2e` (non-interactive part) | spawn harness | +| `src/interactive/*.ts` or `src/entry/orchestrator.ts` | `npm run test:e2e` (pty part) | pty harness | + +## Boundary rules you must not break + +`tests/boundaries/check-boundaries.ts` enforces three rules. If `npm run test` reports violations: + +- **rule1**: only `src/engine/**` may value-import `@mariozechner/pi-*`. Type-only imports are allowed where they erase at compile time; keep them narrow and prefer existing Clio contracts or `src/engine/types.ts` re-exports when they already cover the need. +- **rule2**: `src/worker/**` never imports `src/domains/**`. Shared types go through contracts. +- **rule3**: `src/domains//**` never imports `src/domains//extension.ts`. Cross-domain access goes through the contract exported from `src/domains//index.ts`. + +## Writing new unit tests + +Use `node:test` + `node:assert/strict`. One file per domain cluster, grouped by `describe`: + +```ts +import { strictEqual } from "node:assert/strict"; +import { describe, it } from "node:test"; +import { myExport } from "../../src/domains/x/y.js"; // note .js extension + +describe("x/y", () => { + it("does the thing", () => { + strictEqual(myExport(input), expected); + }); +}); +``` + +**Pitfalls**: +- Import paths end in `.js`, not `.ts` (NodeNext module resolution). +- `tsconfig.tests.json` uses strict + `noUncheckedIndexedAccess` + `exactOptionalPropertyTypes`. Array access returns `T | undefined`; narrow before use. +- Biome bans `delete obj.key` on hot paths. Use `Reflect.deleteProperty(obj, "key")` when you genuinely need to delete (e.g. cleaning `process.env` in a test). + +## Writing new integration tests + +If a test touches the filesystem, use a scratch XDG home. Pattern: + +```ts +const ORIGINAL_ENV = { ...process.env }; +let scratch: string; + +beforeEach(() => { + scratch = mkdtempSync(join(tmpdir(), "clio-myfeat-")); + process.env.CLIO_HOME = scratch; + process.env.CLIO_DATA_DIR = join(scratch, "data"); + process.env.CLIO_CONFIG_DIR = join(scratch, "config"); + process.env.CLIO_CACHE_DIR = join(scratch, "cache"); + resetXdgCache(); +}); + +afterEach(() => { + for (const k of Object.keys(process.env)) { + if (!(k in ORIGINAL_ENV)) Reflect.deleteProperty(process.env, k); + } + for (const [k, v] of Object.entries(ORIGINAL_ENV)) { + if (v !== undefined) process.env[k] = v; + } + rmSync(scratch, { recursive: true, force: true }); + resetXdgCache(); +}); +``` + +`resetXdgCache()` from `src/core/xdg.js` clears the module-level cache so env overrides take effect. + +## Writing new e2e tests (non-interactive) + +Use `tests/harness/spawn.ts`. The harness auto-runs from repo root and points at `dist/cli/index.js`, so **you must build before running** (`npm run test:e2e` does this automatically). + +```ts +import { makeScratchHome, runCli } from "../harness/spawn.js"; + +const scratch = makeScratchHome(); +try { + await runCli(["doctor", "--fix"], { env: scratch.env }); // bootstrap + const result = await runCli(["my-command", "--json"], { + env: scratch.env, + timeoutMs: 20_000, + }); + strictEqual(result.code, 0); + const parsed = JSON.parse(result.stdout); + ok(Array.isArray(parsed)); +} finally { + scratch.cleanup(); +} +``` + +`runCli` returns `{ code, signal, stdout, stderr }`. Always pass `env: scratch.env` so the test doesn't pollute or depend on the user's real XDG home. + +## Writing new e2e tests (interactive TUI) + +Use `tests/harness/pty.ts`. `spawnClioPty()` returns: + +```ts +interface PtyHandle { + send(keys: string): void; // write raw bytes to the tty + expect(pattern: RegExp | string, timeoutMs?: number): Promise; // wait for a match + output(): string; // full buffer so far (includes ANSI) + wait(timeoutMs?: number): Promise<{ code, signal }>; // resolve on process exit + kill(signal?: string): void; + resize(cols: number, rows: number): void; +} +``` + +Minimal smoke pattern: + +```ts +const scratch = makeScratchHome(); +await runCli(["doctor", "--fix"], { env: scratch.env }); // bootstrap first +const p = spawnClioPty({ env: scratch.env }); +try { + await p.expect(/Clio Coder/, 15_000); + p.send("/quit\r"); // \r for Enter in a pty + const exit = await p.wait(10_000); + strictEqual(exit.code, 0); +} finally { + p.kill(); + scratch.cleanup(); +} +``` + +**Keystrokes to know**: +- `\r` is Enter +- `\x1b` is Escape (closes overlays) +- `\x04` is Ctrl-D (shuts down the TUI) +- `\x03` is Ctrl-C (raises SIGINT; passes through from overlays) +- `\x1b[A`/`\x1b[B`/`\x1b[C`/`\x1b[D` are arrow keys (up/down/right/left) + +**Slash commands available**: `/quit`, `/help`, `/hotkeys`, `/run [options] `, `/targets`, `/connect [target]`, `/disconnect [target]`, `/cost`, `/receipts [verify ]`, `/thinking`, `/model [pattern[:thinking]]`, `/scoped-models`, `/settings`, `/resume`, `/new`, `/tree`, `/fork`, `/compact [instructions]`. Unknown `/foo` is routed as chat input. + +**Pitfalls**: +- `expect` patterns match against the raw pty buffer, which contains ANSI escape sequences. Match by stable text (e.g. `/Clio Coder/`, `/Total:\s+\$0\.00/`) rather than exact layout. Strip ANSI only if you're formatting output for display (`.replace(/\x1b\[[0-9;?]*[A-Za-z]/g, "")`). +- Always wrap in `try/finally` with `p.kill()` so a hung pty doesn't leak. +- Always `await runCli(["doctor", "--fix"], ...)` before spawning the TUI on a scratch home, or first-run paths may hit unexpected states. The `install` subcommand was retired in v0.1.4 and is now rejected with exit 2. + +## Using the pty harness as a probe (no test file needed) + +When you want to poke at clio interactively to see if something works without writing a permanent test, drop a throwaway script in `/tmp/probe.ts` that imports the harness and runs `npx tsx /tmp/probe.ts`. This is faster than the test framework for one-off exploration. Delete it when done; **don't** leave exploratory scripts under `tests/` or `scripts/` (that's exactly what we eradicated). + +Example shape: + +```ts +import { spawnClioPty, makeScratchHome } from "/absolute/path/tests/harness/pty.ts"; +import { runCli } from "/absolute/path/tests/harness/spawn.ts"; + +const scratch = makeScratchHome(); +await runCli(["doctor", "--fix"], { env: scratch.env }); +const p = spawnClioPty({ env: scratch.env }); +await p.expect(/Clio Coder/); +p.send("/targets\r"); +await p.expect(/target|endpoint/i); +console.log("tail:", p.output().slice(-400)); +p.send("/quit\r"); +await p.wait(); +scratch.cleanup(); +``` + +## The iteration loop + +When adding a feature: + +1. Write the code. +2. `npm run ci:precommit` - typecheck, lint, boundaries, and pure unit tests. +3. `npm run ci:fast` - adds integration and build. This is the default PR gate. +4. If you touched CLI or TUI code: `npm run test:e2e`. Rebuilds first, then runs spawn + pty suites. +5. If an e2e test fails and you need to see what the TUI is actually showing, drop a probe script (pattern above) and inspect `p.output()`. +6. `npm run ci` before committing. Use `npm run ci:full` before release tags or when changing e2e harness behavior. + +When debugging: + +1. `npm run test -- --test-only` isolates `it.only` / `describe.only` blocks. +2. `node --import tsx --test 'tests/unit/myfile.test.ts'` runs a single file. +3. For pty flakiness, bump `timeoutMs` on the failing `expect` call before blaming the harness. TUI paints can lag behind input on slower machines. + +## What NOT to do + +- **Don't** add `scripts/diag-*.ts` or any `scripts/verify-*.ts`. That's the pattern we eradicated. If it's a test, it belongs under `tests/`. If it's a one-off probe, use `/tmp/` and delete when done. +- **Don't** bypass boundary rules with `// biome-ignore` or exclude patterns. If rule1/2/3 fires, fix the import; don't silence the check. +- **Don't** write tests against the simulated TUI (i.e. don't mock `pi-tui` and assert on synthetic frame contents). Test real commands via spawn, or real TUI via pty. The pre-refactor diag scripts spent 2,478 lines simulating the TUI; that coverage is now rightly zero. +- **Don't** commit with red tests. If a test fails on pre-existing `src/` state that predates your change, report it and ask. Don't delete or skip the test. + +## Where tests live, concretely + +| Area | File | +|---|---| +| safety domain | `tests/unit/safety.test.ts` | +| dispatch (validation/admission/backoff) | `tests/unit/dispatch.test.ts` | +| dispatch ledger and worker wiring (fs) | `tests/integration/ledger.test.ts`, `tests/integration/dispatch-concurrency.test.ts` | +| providers catalog/matcher/resolver | `tests/unit/providers/*.test.ts` | +| providers registry and knowledge base (fs) | `tests/integration/providers/registry.test.ts`, `tests/integration/providers/knowledge-base.test.ts` | +| providers credentials (fs) | `tests/integration/credentials.test.ts` | +| agents frontmatter + fleet parser | `tests/unit/agents.test.ts` | +| built-in agent files (fs) | `tests/integration/agents-builtins.test.ts` | +| prompts hash + canonicalJson | `tests/integration/prompts.test.ts` | +| core tool-names/concurrency | `tests/unit/core.test.ts` | +| core xdg (fs) | `tests/integration/core-xdg.test.ts` | +| CLI command internals (fs/XDG) | `tests/integration/cli-*.test.ts` | +| session, memory, evidence artifacts (fs) | `tests/integration/session.test.ts`, `tests/integration/memory.test.ts`, `tests/integration/evidence-builder.test.ts` | +| import boundary rules | `tests/boundaries/boundaries.test.ts` | +| prompt fragment manifests | `tests/boundaries/prompts.test.ts` | +| non-interactive CLI smoke | `tests/e2e/cli.test.ts` | +| interactive TUI smoke | `tests/e2e/interactive.test.ts` | +| spawn harness | `tests/harness/spawn.ts` | +| pty harness | `tests/harness/pty.ts` | + +Add new unit tests next to the closest existing file. Don't create a new file unless you're covering a new domain cluster. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b692d40..8172e03 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,7 @@ concurrency: cancel-in-progress: true jobs: - ci: + fast: runs-on: ubuntu-latest timeout-minutes: 10 steps: @@ -31,4 +31,4 @@ jobs: sudo apt-get update -qq sudo apt-get install -y --no-install-recommends fd-find - run: npm ci --prefer-offline --no-audit --no-fund - - run: npm run ci + - run: npm run ci:fast diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml new file mode 100644 index 0000000..70a4891 --- /dev/null +++ b/.github/workflows/e2e.yml @@ -0,0 +1,34 @@ +name: e2e + +on: + push: + tags: ["v*"] + schedule: + - cron: "17 8 * * *" + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + full: + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@v6 + with: + persist-credentials: false + - uses: actions/setup-node@v6 + with: + node-version: 24 + cache: npm + - name: Install fd-find + run: | + sudo apt-get update -qq + sudo apt-get install -y --no-install-recommends fd-find + - run: npm ci --prefer-offline --no-audit --no-fund + - run: npm run ci:full diff --git a/docs/.superpowers/plans/2026-05-03-test-suite-audit.md b/docs/.superpowers/plans/2026-05-03-test-suite-audit.md new file mode 100644 index 0000000..3bce2ab --- /dev/null +++ b/docs/.superpowers/plans/2026-05-03-test-suite-audit.md @@ -0,0 +1,168 @@ +# clio-coder test suite audit + +Date: 2026-05-03 + +Baseline: + +- `npm run test` passed once in 9.37s wall time with 1071 tests. +- `npm run test:e2e` passed once in 60.64s wall time with 54 tests. +- Main HEAD at audit start: `a97983454fcfde8cf826a4089570b932dfcfe101`. +- Boundary invariants are not in scope for softening. The checker enforces engine imports, worker isolation, and domain extension independence in `tests/boundaries/check-boundaries.ts:127`. + +## Slowest 20 Tests + +| Rank | Test | Baseline wall | Citation | Action | Justification | +|---:|---|---:|---|---|---| +| 1 | `memory commands propose from evidence and manage approval state` | 8694.7ms | `tests/e2e/cli.test.ts:369` | KEEP | It is an end-to-end CLI workflow across evidence and memory state, so it belongs in full e2e only. | +| 2 | `escalates aborted commands that ignore sigterm` | 6530.3ms | `tests/unit/bash-tool-env.test.ts:84` | TIGHTEN | It waits for the production 5s kill grace and can assert the same behavior through an injected shorter grace. | +| 3 | `old lifecycle command names are rejected` | 6328.5ms | `tests/e2e/cli.test.ts:612` | KEEP | It is slow because it launches the built CLI eight times, but it protects retired public commands at the binary boundary. | +| 4 | `eval run, report, help, and compare routing are wired` | 6289.6ms | `tests/e2e/cli.test.ts:422` | KEEP | It covers the public CLI path through eval artifacts and evidence, which unit tests do not exercise as a binary. | +| 5 | `configures the interactive multi-worker profile pool` | 4388.9ms | `tests/integration/cli-configure-targets.test.ts:155` | MOVE | It writes real XDG config and belongs with integration tests. | +| 6 | `/scoped-models dispatches cleanly and Esc restores the editor` | 3011.0ms | `tests/e2e/interactive.test.ts:283` | KEEP | It verifies a real pty overlay lifecycle with stable loose matching. | +| 7 | `/resume opens the session picker (possibly empty), Esc closes` | 2785.4ms | `tests/e2e/interactive.test.ts:325` | KEEP | It protects the real pty route for session picker startup. | +| 8 | `/thinking shows the full level set for a reasoning-capable target` | 2758.2ms | `tests/e2e/interactive.test.ts:457` | KEEP | It checks capability data reaching the real TUI route. | +| 9 | `/model opens the picker, Esc closes, /quit exits clean` | 2656.7ms | `tests/e2e/interactive.test.ts:114` | KEEP | It is the canonical model picker pty smoke. | +| 10 | `evidence build, inspect, and list operate on run ledger receipts` | 2600.0ms | `tests/e2e/cli.test.ts:340` | KEEP | It proves the installed CLI can read seeded run receipts and write evidence. | +| 11 | `/model shows llama.cpp wire model ids and model-specific context windows` | 2579.4ms | `tests/e2e/interactive.test.ts:139` | KEEP | It protects user-visible model metadata in the real picker. | +| 12 | `model selection is immediately active when reopening the picker` | 2505.4ms | `tests/e2e/interactive.test.ts:173` | KEEP | It catches stale selection state in the live pty TUI. | +| 13 | `/hotkeys opens the reference, Esc closes` | 2488.0ms | `tests/e2e/interactive.test.ts:358` | KEEP | It is a simple pty smoke for the hotkeys overlay. | +| 14 | `/targets opens the target overlay, Esc closes, /quit exits clean` | 2476.1ms | `tests/e2e/interactive.test.ts:227` | KEEP | It protects target overlay routing in the live TUI. | +| 15 | `dashboard shows DEV MODE and footer flips to restart-required on engine edit` | 2463.9ms | `tests/e2e/self-dev.test.ts:110` | KEEP | It covers self-development pty behavior that unit tests cannot prove. | +| 16 | `/settings opens the settings overlay, Esc closes` | 2417.4ms | `tests/e2e/interactive.test.ts:308` | KEEP | It is the real TUI route for settings overlay startup and close. | +| 17 | `/new rotates the session and exits clean on /quit` | 2404.5ms | `tests/e2e/interactive.test.ts:344` | KEEP | It protects a session lifecycle command in the live TUI. | +| 18 | `auth login --api-key is parsed by auth, not the global startup flag` | 2400.2ms | `tests/e2e/cli.test.ts:598` | KEEP | It guards public CLI flag precedence at the binary boundary. | +| 19 | `Ctrl+L opens the /model picker and Esc closes it` | 2390.9ms | `tests/e2e/interactive.test.ts:260` | KEEP | It protects the pty keybinding route, not only slash command dispatch. | +| 20 | `/connect opens the target connection selector, Esc closes, /quit exits clean` | 2379.6ms | `tests/e2e/interactive.test.ts:244` | KEEP | It verifies the live selector opens and closes from the command surface. | + +## Timing-Sensitive Tests + +| Finding | Citation | Action | Justification | +|---|---|---|---| +| `TokenBucket` refill slept for 200ms at baseline. | `tests/unit/core.test.ts:52` | TIGHTEN | Injecting a clock keeps the refill behavior exact without scheduler timing. | +| Bash abort escalation slept for 1500ms and then waited for a 5s grace at baseline. | `tests/unit/bash-tool-env.test.ts:84` | TIGHTEN | A test-local bash tool with a shorter kill grace preserves the SIGTERM to SIGKILL behavior. | +| Tool abort tests assert elapsed wall-clock thresholds. | `tests/unit/tool-signal.test.ts:58` | KEEP | These cover real abort propagation through bash and fetch, and their thresholds are broad enough for the behavior under test. | +| Termination budget tests use small real timers. | `tests/unit/termination.test.ts:37` | KEEP | The behavior is specifically about budgeted timeout return, with generous thresholds. | +| Chat renderer coalescing tests waited for render timers at baseline. | `tests/unit/chat-panel.test.ts:699` | TIGHTEN | The production helper already accepts timer injection, so tests can drive the timer synchronously. | +| Dispatch concurrency polling uses `Date.now` and a 5ms delay loop. | `tests/integration/dispatch-concurrency.test.ts:50` | MOVE | The file uses scratch data dirs and worker fakes, so it belongs in integration instead of unit. | +| Session listing forces mtime separation with a 5ms sleep. | `tests/integration/session-listing.test.ts:109` | KEEP | The test checks filesystem mtime ordering and is already in integration. | +| Interactive pty tests use short sleeps after Esc and selection keys. | `tests/e2e/interactive.test.ts:105` | KEEP | They are wrapped in try/finally and paired with loose stable-text pty matching. | + +## pi-tui and Synthetic Frames + +| Finding | Citation | Action | Justification | +|---|---|---|---| +| No test imports or mocks `@mariozechner/pi-tui` directly. The closest live dependency is file completion through the real slash autocomplete contract. | `tests/integration/slash-autocomplete.test.ts:54` | KEEP | This does not assert on synthetic TUI frames. | +| Chat panel tests render through the project renderer and assert structural output rather than mocked pi-tui frames. | `tests/unit/chat-panel.test.ts:523` | KEEP | They exercise Clio rendering boundaries without simulating the full TUI. | +| Keybinding tests mention pi-tui defaults but do not mock frame output. | `tests/unit/keybindings.test.ts:29` | KEEP | They validate configuration data rather than terminal frames. | + +## Duplicate Coverage Across Layers + +| Behavior | Citations | Action | Justification | +|---|---|---|---| +| Model capability wiring appears in unit, integration, and e2e. | `tests/unit/model-selector.test.ts:158`, `tests/integration/providers/endpoint-lifecycle.test.ts:194`, `tests/e2e/interactive.test.ts:139` | KEEP | Each layer checks a different boundary: row construction, provider cache reset, and live picker display. | +| Evidence and memory behavior appears in integration and e2e. | `tests/integration/evidence-builder.test.ts:21`, `tests/integration/memory.test.ts:125`, `tests/e2e/cli.test.ts:340`, `tests/e2e/cli.test.ts:369` | KEEP | Integration tests own artifact transformations while e2e owns public command routing. | +| Context file suppression is covered by integration and e2e. | `tests/integration/prompts.test.ts:265`, `tests/e2e/cli.test.ts:98` | KEEP | Integration covers prompt assembly with filesystem inputs and e2e covers the top-level flag reaching orchestrator boot. | +| Configure and targets are covered by direct command tests and binary e2e. | `tests/integration/cli-configure-targets.test.ts:36`, `tests/e2e/cli.test.ts:173` | MOVE | The direct command tests are integration-shaped XDG tests and should not count against unit. | + +## Integration-Shaped Tests Under Unit + +| Finding | Citation | Action | Justification | +|---|---|---|---| +| CLI auth/configure/reset command tests write scratch XDG state and shims. | `tests/integration/cli-auth.test.ts:74`, `tests/integration/cli-configure-targets.test.ts:40`, `tests/integration/cli-reset-uninstall.test.ts:18` | MOVE | They are filesystem-backed command integration tests. | +| Dispatch auth/concurrency tests allocate real data dirs and exercise domain wiring. | `tests/integration/dispatch-auth.test.ts:28`, `tests/integration/dispatch-concurrency.test.ts:229` | MOVE | They are domain integration tests, not pure unit tests. | +| Evidence, eval, memory, receipt, session, and self-dev tests write real artifacts. | `tests/integration/evidence-builder.test.ts:26`, `tests/integration/eval-runner.test.ts:19`, `tests/integration/memory.test.ts:28`, `tests/integration/receipt-verify.test.ts:80`, `tests/integration/session.test.ts:393`, `tests/integration/self-dev.test.ts:126` | MOVE | Their assertions depend on filesystem state. | +| Workspace and git tests spawn `git` or inspect real project directories. | `tests/integration/utils-git.test.ts:20`, `tests/integration/workspace/git-probe.test.ts:10`, `tests/integration/workspace/snapshot.test.ts:25` | MOVE | Real subprocesses and filesystem probes belong in integration. | +| Context, component, provider registry, prompts, grep, autocomplete, and tool registry tests create temp files. | `tests/integration/components-scan.test.ts:11`, `tests/integration/context/codewiki/indexer.test.ts:9`, `tests/integration/providers/registry.test.ts:68`, `tests/integration/prompts.test.ts:108`, `tests/integration/tools-grep.test.ts:10`, `tests/integration/slash-autocomplete.test.ts:55`, `tests/integration/tools-registry-wiring.test.ts:605` | MOVE | They are integration-shaped even when they are fast. | +| `core.test.ts` mixes pure concurrency tests with XDG filesystem tests. | `tests/unit/core.test.ts:24`, `tests/unit/core.test.ts:90` | TIGHTEN | Split XDG checks to integration and keep pure concurrency in unit. | + +## CI Workflow Signal + +| Finding | Citation | Action | Justification | +|---|---|---|---| +| The default workflow has one job and runs whatever `npm run ci` means. | `.github/workflows/ci.yml:17`, `.github/workflows/ci.yml:34` | TIGHTEN | The default PR job should call `ci:fast` explicitly so e2e is not hidden inside the default script. | +| The workflow has no nightly or tag-only full e2e lane. | `.github/workflows/ci.yml:3` | TIGHTEN | Full pty e2e should run on a separate schedule and tag push path. | +| The job installs `fd-find` before every gate. | `.github/workflows/ci.yml:29` | KEEP | Some tests and runtime probes use fd when present, and the install is cheap relative to Node setup. | + +## Target Shape + +Pre-commit gate: `npm run ci:precommit`. + +- Includes `npm run typecheck`. +- Includes `npm run lint`. +- Includes `npm run test:boundaries`. +- Includes `npm run test:unit`. +- Budget: under 30s on a developer laptop. + +Pre-push gate and CI default job: `npm run ci:fast`. + +- Includes `ci:precommit`. +- Includes `npm run test:integration`. +- Includes `npm run build`. +- Budget: under 3min. + +Nightly or tag-push gate: `npm run ci:full`. + +- Includes `ci:fast`. +- Includes `npm run test:e2e:run` without rebuilding. +- Budget: under 10min. + +Scripts: + +- `test:unit`: `node --import tsx --test 'tests/unit/**/*.test.ts'` +- `test:integration`: `node --import tsx --test 'tests/integration/**/*.test.ts'` +- `test:boundaries`: `node --import tsx --test 'tests/boundaries/**/*.test.ts'` +- `test:e2e`: `npm run build && npm run test:e2e:run` +- `ci:precommit`: `npm run typecheck && npm run lint && npm run test:boundaries && npm run test:unit` +- `ci:fast`: `npm run ci:precommit && npm run test:integration && npm run build` +- `ci:full`: `npm run ci:fast && npm run test:e2e:run` +- `ci`: alias for `ci:fast` + +Gate file mapping after execution: + +- Pre-commit boundaries: every file under `tests/boundaries/**/*.test.ts`. +- Pre-commit unit: every remaining file under `tests/unit/**/*.test.ts`, limited to pure logic with no filesystem or subprocess setup. +- Pre-push integration: every file under `tests/integration/**/*.test.ts`, including the moved filesystem, XDG, git, and command tests from `tests/unit`. +- Full e2e: every file under `tests/e2e/**/*.test.ts`. + +Moved from unit to integration: + +- `tests/unit/agents-builtins.test.ts` -> `tests/integration/agents-builtins.test.ts` +- `tests/unit/cli-auth.test.ts` -> `tests/integration/cli-auth.test.ts` +- `tests/unit/cli-configure-targets.test.ts` -> `tests/integration/cli-configure-targets.test.ts` +- `tests/unit/cli-reset-uninstall.test.ts` -> `tests/integration/cli-reset-uninstall.test.ts` +- `tests/unit/components-scan.test.ts` -> `tests/integration/components-scan.test.ts` +- `tests/unit/context/codewiki/indexer.test.ts` -> `tests/integration/context/codewiki/indexer.test.ts` +- `tests/unit/context/codewiki/tools.test.ts` -> `tests/integration/context/codewiki/tools.test.ts` +- `tests/unit/context/fingerprint.test.ts` -> `tests/integration/context/fingerprint.test.ts` +- `tests/unit/context/sibling-files.test.ts` -> `tests/integration/context/sibling-files.test.ts` +- `tests/unit/context/state.test.ts` -> `tests/integration/context/state.test.ts` +- `tests/unit/cwd-fallback.test.ts` -> `tests/integration/cwd-fallback.test.ts` +- `tests/unit/dispatch-auth.test.ts` -> `tests/integration/dispatch-auth.test.ts` +- `tests/unit/dispatch-concurrency.test.ts` -> `tests/integration/dispatch-concurrency.test.ts` +- `tests/unit/eval-evidence.test.ts` -> `tests/integration/eval-evidence.test.ts` +- `tests/unit/eval-runner.test.ts` -> `tests/integration/eval-runner.test.ts` +- `tests/unit/evidence-builder.test.ts` -> `tests/integration/evidence-builder.test.ts` +- `tests/unit/memory.test.ts` -> `tests/integration/memory.test.ts` +- `tests/unit/prompts.test.ts` -> `tests/integration/prompts.test.ts` +- `tests/unit/providers/knowledge-base.test.ts` -> `tests/integration/providers/knowledge-base.test.ts` +- `tests/unit/providers/registry.test.ts` -> `tests/integration/providers/registry.test.ts` +- `tests/unit/receipt-verify.test.ts` -> `tests/integration/receipt-verify.test.ts` +- `tests/unit/safety-rule-packs.test.ts` -> `tests/integration/safety-rule-packs.test.ts` +- `tests/unit/self-dev.test.ts` -> `tests/integration/self-dev.test.ts` +- `tests/unit/session-compaction-entry.test.ts` -> `tests/integration/session-compaction-entry.test.ts` +- `tests/unit/session.test.ts` -> `tests/integration/session.test.ts` +- `tests/unit/slash-autocomplete.test.ts` -> `tests/integration/slash-autocomplete.test.ts` +- `tests/unit/tools-grep.test.ts` -> `tests/integration/tools-grep.test.ts` +- `tests/unit/tools-registry-wiring.test.ts` -> `tests/integration/tools-registry-wiring.test.ts` +- `tests/unit/utils-git.test.ts` -> `tests/integration/utils-git.test.ts` +- `tests/unit/workspace/git-probe.test.ts` -> `tests/integration/workspace/git-probe.test.ts` +- `tests/unit/workspace/project-type.test.ts` -> `tests/integration/workspace/project-type.test.ts` +- `tests/unit/workspace/snapshot.test.ts` -> `tests/integration/workspace/snapshot.test.ts` + +## Validation Measurements + +| Gate | Before | After | +|---|---:|---:| +| `npm run ci:precommit` | not split; old `npm run test` was 9.37s without typecheck or lint | 16.87s | +| `npm run ci:fast` | not split; old default `npm run ci` included e2e | 23.74s | +| `npm run ci:full` | old `npm run test` 9.37s plus `npm run test:e2e` 60.64s, not measured as one script | 82.06s | diff --git a/package.json b/package.json index 9e9ab36..dc17a82 100644 --- a/package.json +++ b/package.json @@ -67,10 +67,17 @@ "typecheck": "tsc -p tsconfig.tests.json", "format": "biome format --write .", "lint": "biome check .", - "check:boundaries": "node --import tsx --test 'tests/boundaries/**/*.test.ts'", - "test": "node --import tsx --test 'tests/unit/**/*.test.ts' 'tests/integration/**/*.test.ts' 'tests/boundaries/**/*.test.ts'", - "test:e2e": "npm run build && node --import tsx --test 'tests/e2e/**/*.test.ts'", - "ci": "npm run typecheck && npm run lint && npm run test && npm run build && npm run test:e2e", + "test:unit": "node --import tsx --test 'tests/unit/**/*.test.ts'", + "test:integration": "node --import tsx --test 'tests/integration/**/*.test.ts'", + "test:boundaries": "node --import tsx --test 'tests/boundaries/**/*.test.ts'", + "check:boundaries": "npm run test:boundaries", + "test": "npm run test:unit && npm run test:integration && npm run test:boundaries", + "test:e2e:run": "node --import tsx --test 'tests/e2e/**/*.test.ts'", + "test:e2e": "npm run build && npm run test:e2e:run", + "ci:precommit": "npm run typecheck && npm run lint && npm run test:boundaries && npm run test:unit", + "ci:fast": "npm run ci:precommit && npm run test:integration && npm run build", + "ci:full": "npm run ci:fast && npm run test:e2e:run", + "ci": "npm run ci:fast", "hooks:install": "bash scripts/install-hooks.sh", "prepublishOnly": "npm run typecheck && npm run lint && npm run build && node scripts/check-dist.mjs" }, diff --git a/src/core/concurrency.ts b/src/core/concurrency.ts index 09cf70f..9096d6a 100644 --- a/src/core/concurrency.ts +++ b/src/core/concurrency.ts @@ -43,10 +43,11 @@ export class TokenBucket { constructor( private readonly capacity: number, private readonly refillPerSec: number, + private readonly now: () => number = Date.now, ) { if (capacity < 1) throw new Error("TokenBucket capacity must be >= 1"); this.tokens = capacity; - this.lastRefillMs = Date.now(); + this.lastRefillMs = this.now(); } tryTake(n = 1): boolean { @@ -57,7 +58,7 @@ export class TokenBucket { } private refill(): void { - const now = Date.now(); + const now = this.now(); const elapsedSec = (now - this.lastRefillMs) / 1000; if (elapsedSec <= 0) return; this.tokens = Math.min(this.capacity, this.tokens + elapsedSec * this.refillPerSec); diff --git a/src/tools/bash.ts b/src/tools/bash.ts index 03667fb..aeb4c63 100644 --- a/src/tools/bash.ts +++ b/src/tools/bash.ts @@ -8,6 +8,10 @@ const MAX_OUTPUT_BYTES = 1_000_000; const TRUNCATION_MARKER = "\n[output truncated]\n"; const CLIO_CONTROL_ENV_KEYS = ["CLIO_DEV", "CLIO_SELF_DEV", "CLIO_INTERACTIVE", "CLIO_RESUME_SESSION_ID"] as const; +export interface BashToolOptions { + killGraceMs?: number; +} + function truncate(text: string): string { return truncateUtf8(text, MAX_OUTPUT_BYTES, TRUNCATION_MARKER); } @@ -34,6 +38,7 @@ function execBash( cwd: string | undefined, timeout: number, signal?: AbortSignal, + options: BashToolOptions = {}, ): Promise { return new Promise((resolve) => { let aborted = false; @@ -79,7 +84,7 @@ function execBash( sendSignal("SIGTERM"); killGraceTimer = setTimeout(() => { sendSignal("SIGKILL"); - }, 5000); + }, options.killGraceMs ?? 5000); }; function onAbort(): void { @@ -142,61 +147,66 @@ function execBash( }); } -export const bashTool: ToolSpec = { - name: ToolNames.Bash, - description: - "Execute a bash command in the current working directory via /bin/bash -lc. Returns stdout and stderr. Optionally provide a timeout in seconds (or timeout_ms in milliseconds) and a cwd. Default timeout is 5 minutes (300 seconds), enough for npm install and full ci runs.", - parameters: Type.Object({ - command: Type.String({ description: "Bash command to execute." }), - cwd: Type.Optional( - Type.String({ description: "Working directory for the command. Defaults to the orchestrator cwd." }), - ), - timeout: Type.Optional(Type.Number({ description: "Timeout in seconds. Alias of timeout_ms. Must be > 0." })), - timeout_ms: Type.Optional( - Type.Number({ description: "Timeout in milliseconds. Defaults to 300000 (5 min). Must be > 0." }), - ), - }), - baseActionClass: "execute", - executionMode: "sequential", - async run(args, options): Promise { - if (typeof args.command !== "string" || args.command.length === 0) { - return { kind: "error", message: "bash: missing command argument" }; - } - const cwd = typeof args.cwd === "string" ? args.cwd : undefined; - const timeoutMsArg = typeof args.timeout_ms === "number" && args.timeout_ms > 0 ? args.timeout_ms : null; - const timeoutSecArg = - timeoutMsArg === null && typeof args.timeout === "number" && args.timeout > 0 ? args.timeout * 1000 : null; - const timeout = timeoutMsArg ?? timeoutSecArg ?? 300_000; - try { - const { error, stdout, stderr, aborted, timedOut, outputCapped } = await execBash( - args.command, - cwd, - timeout, - options?.signal, - ); - if (aborted) { - return { kind: "error", message: "bash: command aborted" }; +export function createBashTool(toolOptions: BashToolOptions = {}): ToolSpec { + return { + name: ToolNames.Bash, + description: + "Execute a bash command in the current working directory via /bin/bash -lc. Returns stdout and stderr. Optionally provide a timeout in seconds (or timeout_ms in milliseconds) and a cwd. Default timeout is 5 minutes (300 seconds), enough for npm install and full ci runs.", + parameters: Type.Object({ + command: Type.String({ description: "Bash command to execute." }), + cwd: Type.Optional( + Type.String({ description: "Working directory for the command. Defaults to the orchestrator cwd." }), + ), + timeout: Type.Optional(Type.Number({ description: "Timeout in seconds. Alias of timeout_ms. Must be > 0." })), + timeout_ms: Type.Optional( + Type.Number({ description: "Timeout in milliseconds. Defaults to 300000 (5 min). Must be > 0." }), + ), + }), + baseActionClass: "execute", + executionMode: "sequential", + async run(args, options): Promise { + if (typeof args.command !== "string" || args.command.length === 0) { + return { kind: "error", message: "bash: missing command argument" }; } - if (timedOut) { - return { kind: "error", message: `bash: command timed out after ${timeout}ms` }; - } - if (outputCapped) { - return { kind: "error", message: `bash: command output exceeded ${MAX_OUTPUT_BYTES * 2} bytes` }; - } - if (error) { - const code = typeof error.code === "number" ? error.code : (error as { code?: string }).code; - const tail = stderr.length > 0 ? stderr : stdout; - const message = `bash: command failed (exit ${code ?? "?"}): ${truncate(tail).trim() || error.message}`; - return { kind: "error", message }; + const cwd = typeof args.cwd === "string" ? args.cwd : undefined; + const timeoutMsArg = typeof args.timeout_ms === "number" && args.timeout_ms > 0 ? args.timeout_ms : null; + const timeoutSecArg = + timeoutMsArg === null && typeof args.timeout === "number" && args.timeout > 0 ? args.timeout * 1000 : null; + const timeout = timeoutMsArg ?? timeoutSecArg ?? 300_000; + try { + const { error, stdout, stderr, aborted, timedOut, outputCapped } = await execBash( + args.command, + cwd, + timeout, + options?.signal, + toolOptions, + ); + if (aborted) { + return { kind: "error", message: "bash: command aborted" }; + } + if (timedOut) { + return { kind: "error", message: `bash: command timed out after ${timeout}ms` }; + } + if (outputCapped) { + return { kind: "error", message: `bash: command output exceeded ${MAX_OUTPUT_BYTES * 2} bytes` }; + } + if (error) { + const code = typeof error.code === "number" ? error.code : (error as { code?: string }).code; + const tail = stderr.length > 0 ? stderr : stdout; + const message = `bash: command failed (exit ${code ?? "?"}): ${truncate(tail).trim() || error.message}`; + return { kind: "error", message }; + } + const combined = + stderr.length > 0 ? `${stdout}${stdout.endsWith("\n") || stdout.length === 0 ? "" : "\n"}${stderr}` : stdout; + return { kind: "ok", output: truncate(combined) }; + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + return { kind: "error", message: `bash: ${msg}` }; } - const combined = - stderr.length > 0 ? `${stdout}${stdout.endsWith("\n") || stdout.length === 0 ? "" : "\n"}${stderr}` : stdout; - return { kind: "ok", output: truncate(combined) }; - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - return { kind: "error", message: `bash: ${msg}` }; - } - }, -}; + }, + }; +} + +export const bashTool: ToolSpec = createBashTool(); export { buildToolEnv, truncateUtf8 }; diff --git a/tests/unit/agents-builtins.test.ts b/tests/integration/agents-builtins.test.ts similarity index 100% rename from tests/unit/agents-builtins.test.ts rename to tests/integration/agents-builtins.test.ts diff --git a/tests/unit/cli-auth.test.ts b/tests/integration/cli-auth.test.ts similarity index 100% rename from tests/unit/cli-auth.test.ts rename to tests/integration/cli-auth.test.ts diff --git a/tests/unit/cli-configure-targets.test.ts b/tests/integration/cli-configure-targets.test.ts similarity index 100% rename from tests/unit/cli-configure-targets.test.ts rename to tests/integration/cli-configure-targets.test.ts diff --git a/tests/unit/cli-reset-uninstall.test.ts b/tests/integration/cli-reset-uninstall.test.ts similarity index 100% rename from tests/unit/cli-reset-uninstall.test.ts rename to tests/integration/cli-reset-uninstall.test.ts diff --git a/tests/unit/components-scan.test.ts b/tests/integration/components-scan.test.ts similarity index 100% rename from tests/unit/components-scan.test.ts rename to tests/integration/components-scan.test.ts diff --git a/tests/unit/context/codewiki/indexer.test.ts b/tests/integration/context/codewiki/indexer.test.ts similarity index 100% rename from tests/unit/context/codewiki/indexer.test.ts rename to tests/integration/context/codewiki/indexer.test.ts diff --git a/tests/unit/context/codewiki/tools.test.ts b/tests/integration/context/codewiki/tools.test.ts similarity index 100% rename from tests/unit/context/codewiki/tools.test.ts rename to tests/integration/context/codewiki/tools.test.ts diff --git a/tests/unit/context/fingerprint.test.ts b/tests/integration/context/fingerprint.test.ts similarity index 100% rename from tests/unit/context/fingerprint.test.ts rename to tests/integration/context/fingerprint.test.ts diff --git a/tests/unit/context/sibling-files.test.ts b/tests/integration/context/sibling-files.test.ts similarity index 100% rename from tests/unit/context/sibling-files.test.ts rename to tests/integration/context/sibling-files.test.ts diff --git a/tests/unit/context/state.test.ts b/tests/integration/context/state.test.ts similarity index 100% rename from tests/unit/context/state.test.ts rename to tests/integration/context/state.test.ts diff --git a/tests/integration/core-xdg.test.ts b/tests/integration/core-xdg.test.ts new file mode 100644 index 0000000..49045c9 --- /dev/null +++ b/tests/integration/core-xdg.test.ts @@ -0,0 +1,54 @@ +import { ok, strictEqual } from "node:assert/strict"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, it } from "node:test"; +import { clioCacheDir, clioConfigDir, clioDataDir, resetXdgCache } from "../../src/core/xdg.js"; + +describe("core/xdg", () => { + const original = { + CLIO_HOME: process.env.CLIO_HOME, + CLIO_DATA_DIR: process.env.CLIO_DATA_DIR, + CLIO_CONFIG_DIR: process.env.CLIO_CONFIG_DIR, + CLIO_CACHE_DIR: process.env.CLIO_CACHE_DIR, + }; + let scratch: string; + + beforeEach(() => { + scratch = mkdtempSync(join(tmpdir(), "clio-xdg-")); + process.env.CLIO_HOME = scratch; + process.env.CLIO_DATA_DIR = join(scratch, "data"); + process.env.CLIO_CONFIG_DIR = join(scratch, "config"); + process.env.CLIO_CACHE_DIR = join(scratch, "cache"); + resetXdgCache(); + }); + + afterEach(() => { + for (const key of Object.keys(process.env)) { + if (!(key in original)) Reflect.deleteProperty(process.env, key); + } + for (const [key, value] of Object.entries(original)) { + if (value !== undefined) process.env[key] = value; + } + rmSync(scratch, { recursive: true, force: true }); + resetXdgCache(); + }); + + it("CLIO_HOME routes all three dirs under it", () => { + Reflect.deleteProperty(process.env, "CLIO_DATA_DIR"); + Reflect.deleteProperty(process.env, "CLIO_CONFIG_DIR"); + Reflect.deleteProperty(process.env, "CLIO_CACHE_DIR"); + resetXdgCache(); + + ok(clioDataDir().startsWith(scratch)); + ok(clioConfigDir().startsWith(scratch)); + ok(clioCacheDir().startsWith(scratch)); + }); + + it("individual overrides take precedence over CLIO_HOME", () => { + const customData = join(scratch, "custom-data"); + process.env.CLIO_DATA_DIR = customData; + resetXdgCache(); + strictEqual(clioDataDir(), customData); + }); +}); diff --git a/tests/unit/cwd-fallback.test.ts b/tests/integration/cwd-fallback.test.ts similarity index 100% rename from tests/unit/cwd-fallback.test.ts rename to tests/integration/cwd-fallback.test.ts diff --git a/tests/unit/dispatch-auth.test.ts b/tests/integration/dispatch-auth.test.ts similarity index 100% rename from tests/unit/dispatch-auth.test.ts rename to tests/integration/dispatch-auth.test.ts diff --git a/tests/unit/dispatch-concurrency.test.ts b/tests/integration/dispatch-concurrency.test.ts similarity index 100% rename from tests/unit/dispatch-concurrency.test.ts rename to tests/integration/dispatch-concurrency.test.ts diff --git a/tests/unit/eval-evidence.test.ts b/tests/integration/eval-evidence.test.ts similarity index 100% rename from tests/unit/eval-evidence.test.ts rename to tests/integration/eval-evidence.test.ts diff --git a/tests/unit/eval-runner.test.ts b/tests/integration/eval-runner.test.ts similarity index 100% rename from tests/unit/eval-runner.test.ts rename to tests/integration/eval-runner.test.ts diff --git a/tests/unit/evidence-builder.test.ts b/tests/integration/evidence-builder.test.ts similarity index 100% rename from tests/unit/evidence-builder.test.ts rename to tests/integration/evidence-builder.test.ts diff --git a/tests/unit/memory.test.ts b/tests/integration/memory.test.ts similarity index 100% rename from tests/unit/memory.test.ts rename to tests/integration/memory.test.ts diff --git a/tests/unit/prompts.test.ts b/tests/integration/prompts.test.ts similarity index 100% rename from tests/unit/prompts.test.ts rename to tests/integration/prompts.test.ts diff --git a/tests/unit/providers/knowledge-base.test.ts b/tests/integration/providers/knowledge-base.test.ts similarity index 100% rename from tests/unit/providers/knowledge-base.test.ts rename to tests/integration/providers/knowledge-base.test.ts diff --git a/tests/unit/providers/registry.test.ts b/tests/integration/providers/registry.test.ts similarity index 100% rename from tests/unit/providers/registry.test.ts rename to tests/integration/providers/registry.test.ts diff --git a/tests/unit/receipt-verify.test.ts b/tests/integration/receipt-verify.test.ts similarity index 100% rename from tests/unit/receipt-verify.test.ts rename to tests/integration/receipt-verify.test.ts diff --git a/tests/unit/safety-rule-packs.test.ts b/tests/integration/safety-rule-packs.test.ts similarity index 100% rename from tests/unit/safety-rule-packs.test.ts rename to tests/integration/safety-rule-packs.test.ts diff --git a/tests/unit/self-dev.test.ts b/tests/integration/self-dev.test.ts similarity index 100% rename from tests/unit/self-dev.test.ts rename to tests/integration/self-dev.test.ts diff --git a/tests/unit/session-compaction-entry.test.ts b/tests/integration/session-compaction-entry.test.ts similarity index 100% rename from tests/unit/session-compaction-entry.test.ts rename to tests/integration/session-compaction-entry.test.ts diff --git a/tests/unit/session.test.ts b/tests/integration/session.test.ts similarity index 100% rename from tests/unit/session.test.ts rename to tests/integration/session.test.ts diff --git a/tests/unit/slash-autocomplete.test.ts b/tests/integration/slash-autocomplete.test.ts similarity index 100% rename from tests/unit/slash-autocomplete.test.ts rename to tests/integration/slash-autocomplete.test.ts diff --git a/tests/unit/tools-grep.test.ts b/tests/integration/tools-grep.test.ts similarity index 100% rename from tests/unit/tools-grep.test.ts rename to tests/integration/tools-grep.test.ts diff --git a/tests/unit/tools-registry-wiring.test.ts b/tests/integration/tools-registry-wiring.test.ts similarity index 100% rename from tests/unit/tools-registry-wiring.test.ts rename to tests/integration/tools-registry-wiring.test.ts diff --git a/tests/unit/utils-git.test.ts b/tests/integration/utils-git.test.ts similarity index 100% rename from tests/unit/utils-git.test.ts rename to tests/integration/utils-git.test.ts diff --git a/tests/unit/workspace/git-probe.test.ts b/tests/integration/workspace/git-probe.test.ts similarity index 100% rename from tests/unit/workspace/git-probe.test.ts rename to tests/integration/workspace/git-probe.test.ts diff --git a/tests/unit/workspace/project-type.test.ts b/tests/integration/workspace/project-type.test.ts similarity index 100% rename from tests/unit/workspace/project-type.test.ts rename to tests/integration/workspace/project-type.test.ts diff --git a/tests/unit/workspace/snapshot.test.ts b/tests/integration/workspace/snapshot.test.ts similarity index 100% rename from tests/unit/workspace/snapshot.test.ts rename to tests/integration/workspace/snapshot.test.ts diff --git a/tests/unit/bash-tool-env.test.ts b/tests/unit/bash-tool-env.test.ts index 1377baf..3b08cc8 100644 --- a/tests/unit/bash-tool-env.test.ts +++ b/tests/unit/bash-tool-env.test.ts @@ -1,6 +1,6 @@ import { ok, strictEqual } from "node:assert/strict"; import { afterEach, describe, it } from "node:test"; -import { bashTool, buildToolEnv } from "../../src/tools/bash.js"; +import { bashTool, buildToolEnv, createBashTool } from "../../src/tools/bash.js"; const ORIGINAL_ENV = { ...process.env }; @@ -83,6 +83,8 @@ describe("bash tool environment", () => { it("escalates aborted commands that ignore sigterm", async () => { const controller = new AbortController(); + const killGraceMs = 50; + const testBashTool = createBashTool({ killGraceMs }); // The shell runs with `-l`, which on cold CI runners can spend a few // hundred ms reading /etc/profile before reaching the user command. If // SIGTERM arrives before the trap is installed, bash dies at the @@ -95,7 +97,7 @@ describe("bash tool environment", () => { // monotonic schedule. libuv-backed `setTimeout` already uses the // monotonic clock; the test just needs to read the same source. const startedAt = performance.now(); - const started = bashTool.run( + const started = testBashTool.run( { command: 'trap "" TERM; end=$((SECONDS + 12)); while [ "$SECONDS" -lt "$end" ]; do sleep 1; done', timeout_ms: 16_000, @@ -108,8 +110,8 @@ describe("bash tool environment", () => { strictEqual(result.kind, "error"); if (result.kind === "error") strictEqual(result.message, "bash: command aborted"); - ok(elapsedMs >= 1500 + 4_500, `expected abort escalation after the grace period, got ${elapsedMs}ms`); - ok(elapsedMs < 1500 + 9_000, `expected abort escalation within window, got ${elapsedMs}ms`); + ok(elapsedMs >= 1500 + killGraceMs - 20, `expected abort escalation after the grace period, got ${elapsedMs}ms`); + ok(elapsedMs < 1500 + 2_000, `expected abort escalation within window, got ${elapsedMs}ms`); }); it("reports output cap exits explicitly", async () => { diff --git a/tests/unit/chat-panel.test.ts b/tests/unit/chat-panel.test.ts index a4877aa..e6144a6 100644 --- a/tests/unit/chat-panel.test.ts +++ b/tests/unit/chat-panel.test.ts @@ -665,8 +665,10 @@ describe("createCoalescingChatRenderer", () => { renderer: ReturnType; applied: ChatLoopEvent[]; count: () => number; + fireTimer: () => void; } { let renderCount = 0; + let pendingTimer: (() => void) | null = null; const applied: ChatLoopEvent[] = []; const fakePanel = { appendUser: () => {}, @@ -688,16 +690,28 @@ describe("createCoalescingChatRenderer", () => { renderCount += 1; }, coalesceMs, + setTimer: (cb) => { + pendingTimer = cb; + return cb; + }, + clearTimer: (id) => { + if (pendingTimer === id) pendingTimer = null; + }, }); return { renderer, applied, count: () => renderCount, + fireTimer: () => { + const cb = pendingTimer; + pendingTimer = null; + cb?.(); + }, }; } - it("coalesces 100 text_delta events into at most two requestRender calls", async () => { - const { renderer, applied, count } = setup(5); + it("coalesces 100 text_delta events into one requestRender call", () => { + const { renderer, applied, count, fireTimer } = setup(5); for (let i = 0; i < 100; i += 1) { renderer.applyEvent({ type: "text_delta", contentIndex: 0, delta: "x", partialText: "x".repeat(i + 1) }); } @@ -706,14 +720,13 @@ describe("createCoalescingChatRenderer", () => { strictEqual(applied.length, 100); // Inside the same microtask the coalesce timer has not fired yet. strictEqual(count(), 0, "no synchronous render under coalescing"); - // Wait past the coalesce window. One render covers all 100 deltas. - await new Promise((r) => setTimeout(r, 25)); - ok(count() <= 2, `expected at most 2 renders, got ${count()}`); - ok(count() >= 1, `expected at least 1 render, got ${count()}`); + // One timer render covers all 100 deltas. + fireTimer(); + strictEqual(count(), 1); }); - it("message_end after deltas renders synchronously and drops the pending timer", async () => { - const { renderer, count } = setup(5); + it("message_end after deltas renders synchronously and drops the pending timer", () => { + const { renderer, count, fireTimer } = setup(5); renderer.applyEvent({ type: "text_delta", contentIndex: 0, delta: "hello", partialText: "hello" }); // Pending timer scheduled, no synchronous render yet. strictEqual(count(), 0); @@ -723,26 +736,23 @@ describe("createCoalescingChatRenderer", () => { }); // message_end forces a synchronous render and cancels the pending timer. strictEqual(count(), 1); - // Coalesce window passes; no extra render fires since the timer was cancelled. - await new Promise((r) => setTimeout(r, 25)); + fireTimer(); strictEqual(count(), 1, "pending timer must have been cancelled by message_end"); }); - it("agent_end renders synchronously even with no pending deltas", async () => { + it("agent_end renders synchronously even with no pending deltas", () => { const { renderer, count } = setup(5); renderer.applyEvent({ type: "agent_end", messages: [] }); strictEqual(count(), 1); - await new Promise((r) => setTimeout(r, 25)); - strictEqual(count(), 1); }); - it("flush() drains a pending coalesce timer and renders once", async () => { - const { renderer, count } = setup(50); + it("flush() drains a pending coalesce timer and renders once", () => { + const { renderer, count, fireTimer } = setup(50); renderer.applyEvent({ type: "text_delta", contentIndex: 0, delta: "x", partialText: "x" }); strictEqual(count(), 0); renderer.flush(); strictEqual(count(), 1); - await new Promise((r) => setTimeout(r, 75)); + fireTimer(); strictEqual(count(), 1, "no double-render after explicit flush"); }); diff --git a/tests/unit/core.test.ts b/tests/unit/core.test.ts index 79c5f7f..adc85a9 100644 --- a/tests/unit/core.test.ts +++ b/tests/unit/core.test.ts @@ -1,11 +1,7 @@ -import { ok, strictEqual, throws } from "node:assert/strict"; -import { mkdtempSync, rmSync } from "node:fs"; -import { tmpdir } from "node:os"; -import { join } from "node:path"; -import { afterEach, beforeEach, describe, it } from "node:test"; +import { strictEqual, throws } from "node:assert/strict"; +import { describe, it } from "node:test"; import { Semaphore, TokenBucket } from "../../src/core/concurrency.js"; import { ALL_TOOL_NAMES, ToolNames } from "../../src/core/tool-names.js"; -import { clioCacheDir, clioConfigDir, clioDataDir, resetXdgCache } from "../../src/core/xdg.js"; describe("core/tool-names", () => { it("ALL_TOOL_NAMES matches enum values", () => { @@ -21,47 +17,6 @@ describe("core/tool-names", () => { }); }); -describe("core/xdg", () => { - const original = { - CLIO_HOME: process.env.CLIO_HOME, - CLIO_DATA_DIR: process.env.CLIO_DATA_DIR, - CLIO_CONFIG_DIR: process.env.CLIO_CONFIG_DIR, - CLIO_CACHE_DIR: process.env.CLIO_CACHE_DIR, - }; - let scratch: string; - - beforeEach(() => { - scratch = mkdtempSync(join(tmpdir(), "clio-xdg-")); - process.env.CLIO_HOME = scratch; - Reflect.deleteProperty(process.env, "CLIO_DATA_DIR"); - Reflect.deleteProperty(process.env, "CLIO_CONFIG_DIR"); - Reflect.deleteProperty(process.env, "CLIO_CACHE_DIR"); - resetXdgCache(); - }); - - afterEach(() => { - for (const [k, v] of Object.entries(original)) { - if (v === undefined) Reflect.deleteProperty(process.env, k); - else process.env[k] = v; - } - rmSync(scratch, { recursive: true, force: true }); - resetXdgCache(); - }); - - it("CLIO_HOME routes all three dirs under it", () => { - ok(clioDataDir().startsWith(scratch)); - ok(clioConfigDir().startsWith(scratch)); - ok(clioCacheDir().startsWith(scratch)); - }); - - it("individual overrides take precedence over CLIO_HOME", () => { - const customData = join(scratch, "custom-data"); - process.env.CLIO_DATA_DIR = customData; - resetXdgCache(); - strictEqual(clioDataDir(), customData); - }); -}); - describe("core/concurrency/Semaphore", () => { it("rejects permits < 1", () => { throws(() => new Semaphore(0)); @@ -96,11 +51,12 @@ describe("core/concurrency/TokenBucket", () => { strictEqual(b.tryTake(1), false); }); - it("refills at given rate", async () => { - const b = new TokenBucket(2, 10); + it("refills at given rate", () => { + let now = 1_000; + const b = new TokenBucket(2, 10, () => now); b.tryTake(2); strictEqual(b.tryTake(1), false); - await new Promise((resolve) => setTimeout(resolve, 200)); + now += 200; strictEqual(b.tryTake(1), true); }); });