diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index b1bcf80..e4f0967 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -21,7 +21,9 @@ {"id":"beadle-40k","title":"feat(ethos): propose inputs.trigger field for mission contracts","description":"## Problem\n\nWhen beadle creates a mission triggered by an email, there is no standard field in the ethos mission contract to record the provenance (which email, from whom, what subject).\n\n## Proposal\n\nAdd inputs.trigger to the mission contract schema. Ethos stores it as metadata without validating contents. Structure:\n\n inputs:\n trigger:\n type: email | cron | manual | pipeline\n message_id: \"1205\"\n from: jim@punt-labs.com\n subject: \"Schedule a team meeting\"\n\nAudit value: trace a mission back to its trigger without reading the daemon's logs.\n\nConfirmed with ethos agent (biff conversation 2026-04-12): ethos is receptive to this as a metadata field.\n\n## Depends on\n\n- Ethos DES-031 (mission contracts)\n\n## See also\n\n- docs/orchestrator-design.md","status":"open","priority":2,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-12T11:44:01.690608388-07:00","created_by":"J F","updated_at":"2026-04-12T11:44:01.690608388-07:00"} {"id":"beadle-4cd","title":"Pin VERSION in install.sh and auto-run doctor at end","status":"closed","priority":2,"issue_type":"task","owner":"jmf@pobox.com","created_at":"2026-03-18T06:03:37.94796-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-03-18T07:10:13.939488-07:00","closed_at":"2026-03-18T07:10:13.939488-07:00","close_reason":"Merged in PR #35. VERSION pinned to 0.3.1, doctor auto-runs at end of install."} {"id":"beadle-4qk","title":"fix(install): add uninstall before install to enable upgrades","description":"install.sh:144 calls 'claude plugin install beadle@punt-labs --scope user' but never calls uninstall first. Re-running install.sh therefore does not upgrade an existing install — 'claude plugin install' is a no-op when the plugin is already in the cache.\n\nVerified 2026-04-07 on jfreeman's machine: cache stuck at ~/.claude/plugins/cache/punt-labs/beadle/0.8.0/ with gitCommitSha 0226725 (v0.8.0), even though marketplace clone has beadle.ref=v0.9.0 and the binary path was upgraded to v0.9.0 by re-running install.sh. The hook deploys commands from the locked v0.8.0 cache, so /beadle:inbox runs the rejected CronCreate logic instead of v0.9.0's set_poll_interval flow.\n\nSibling pattern: biff/install.sh:138, quarry/install.sh:175, vox/install.sh:169 all do 'claude plugin uninstall' immediately before 'claude plugin install'. beadle diverges.\n\nFix: add 'claude plugin uninstall \"beadle@$MARKETPLACE_NAME\" \u003c /dev/null 2\u003e/dev/null || true' immediately before the install call at install.sh:145. Match biff/quarry/vox phrasing.\n\nCompanion work: punt-kit standards do not document this pattern (distribution.md only says 'Idempotent — safe to re-run. Upgrades if already installed' without specifying how). Separate PR to punt-kit will add the explicit bullet so other projects don't reinvent the same gap.","status":"closed","priority":1,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-07T22:21:44.641420189-07:00","created_by":"J F","updated_at":"2026-04-07T22:58:49.619893035-07:00","closed_at":"2026-04-07T22:58:49.619893035-07:00","close_reason":"Merged in PR #91 (squash 8ab3568)","labels":["fix","install","upgrade-path"]} +{"id":"beadle-5ck","title":"feat(daemon): per-command MCP config wiring from command YAML","status":"closed","priority":1,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-14T19:44:23.320125431-07:00","created_by":"J F","updated_at":"2026-04-14T19:57:11.981573306-07:00","closed_at":"2026-04-14T19:57:11.981577306-07:00"} {"id":"beadle-5f7","title":"SessionStart hook: auto-allow permissions + command deployment","status":"closed","priority":1,"issue_type":"task","owner":"jmf@pobox.com","created_at":"2026-03-15T12:40:13.416903-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-03-15T12:57:40.992793-07:00","closed_at":"2026-03-15T12:57:40.992793-07:00","close_reason":"SessionStart hook already exists and is wired in hooks.json. Permissions are auto-allowed."} +{"id":"beadle-5tk","title":"feat(daemon): command YAML loader with GPG signature verification","status":"closed","priority":1,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-14T19:43:52.994660168-07:00","created_by":"J F","updated_at":"2026-04-14T19:51:42.15468048-07:00","closed_at":"2026-04-14T19:51:42.15469096-07:00"} {"id":"beadle-5xx","title":"Fix permission model bugs: remove owner override and removeContact gate","status":"closed","priority":1,"issue_type":"bug","owner":"jmf@pobox.com","created_at":"2026-03-19T21:45:39.127152-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-03-19T21:50:19.545956-07:00","closed_at":"2026-03-19T21:50:19.545956-07:00","close_reason":"Closed"} {"id":"beadle-607","title":"Set license to MIT: add LICENSE file, update README","status":"closed","priority":2,"issue_type":"task","owner":"jmf@pobox.com","created_at":"2026-03-18T06:03:36.687775-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-03-18T07:08:41.554646-07:00","closed_at":"2026-03-18T07:08:41.554646-07:00","close_reason":"Merged in PR #34. MIT LICENSE file added, README updated."} {"id":"beadle-6ob","title":"feat(daemon): poller reuse — extract Poller from MCP context to standalone daemon","status":"closed","priority":1,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-14T06:24:38.852672803-07:00","created_by":"J F","updated_at":"2026-04-14T07:18:46.973246488-07:00","closed_at":"2026-04-14T07:18:46.973248578-07:00"} @@ -32,7 +34,8 @@ {"id":"beadle-7j8","title":"read_message: add max_body_length parameter for lightweight preview","status":"closed","priority":3,"issue_type":"feature","owner":"claude@punt-labs.com","created_at":"2026-04-01T22:00:19.92239-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-04-01T22:30:07.452779-07:00","closed_at":"2026-04-01T22:30:07.452779-07:00","close_reason":"Closed"} {"id":"beadle-7td","title":"make dist must generate checksums.txt","description":"make dist cross-compiles 4 binaries but does not generate checksums.txt. install.sh requires checksums.txt for SHA256 verification (step 4). v0.5.0 and v0.6.0 were both released without checksums — v0.6.0 was manually patched by uploading checksums after the fact. Fix: add shasum -a 256 to make dist. Also add checksums.txt generation to the release workflow in CLAUDE.md so it's not missed again.","status":"closed","priority":1,"issue_type":"bug","owner":"jmf@pobox.com","created_at":"2026-03-20T14:53:35.525762-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-03-20T15:37:05.755711-07:00","closed_at":"2026-03-20T15:37:05.755711-07:00","close_reason":"PR #58 merged: make dist generates checksums.txt, backfilled v0.5.0 and v0.6.0"} {"id":"beadle-7yb","title":"session-start hook: add standalone MCP tool glob to auto-allow permissions","description":"The hook only adds mcp__plugin_beadle_email__* to permissions.allow but not mcp__beadle-email__* (standalone MCP server pattern). Users with standalone registration get prompted for every tool call. Fix: add both globs to PLUGIN_RULES in hooks/session-start.sh.","status":"closed","priority":2,"issue_type":"bug","owner":"claude@punt-labs.com","created_at":"2026-04-02T07:25:42.217506-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-04-07T23:26:16.999089229-07:00","closed_at":"2026-04-07T23:26:16.999089229-07:00","close_reason":"Already done in PR #90 (commit 0ffaa50, 2026-04-02). hooks/session-start.sh:79 includes 'mcp__beadle-email__*' alongside the plugin glob \"\". Bead was just stale, not stuck."} -{"id":"beadle-88g","title":"feat(daemon): pipeline orchestrator — serial mission chaining with failure handling","status":"in_progress","priority":1,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-14T06:24:54.85506948-07:00","created_by":"J F","updated_at":"2026-04-14T15:41:56.908064178-07:00"} +{"id":"beadle-85e","title":"feat(daemon): pipeline executor — sequential command chaining with try/else","status":"closed","priority":1,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-14T19:44:04.421419922-07:00","created_by":"J F","updated_at":"2026-04-14T20:03:32.762263945-07:00","closed_at":"2026-04-14T20:03:32.762269115-07:00"} +{"id":"beadle-88g","title":"feat(daemon): pipeline orchestrator — serial mission chaining with failure handling","status":"closed","priority":1,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-14T06:24:54.85506948-07:00","created_by":"J F","updated_at":"2026-04-14T18:46:31.138443382-07:00","closed_at":"2026-04-14T18:46:31.138447612-07:00"} {"id":"beadle-8tb","title":"refactor(cli): migrate to cobra framework","description":"Hand-rolled arg parser caused --flag=value bug. Migrate all 14 subcommands to cobra per punt-kit/standards/cli.md Go guidance.","status":"closed","priority":2,"issue_type":"feature","owner":"jmf@pobox.com","created_at":"2026-03-21T08:48:34.232529-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-03-21T09:17:15.963332-07:00","closed_at":"2026-03-21T09:17:15.963332-07:00","close_reason":"PR #65 merged: cobra migration complete"} {"id":"beadle-8u0","title":"Auto-allow skill permissions in SessionStart hook","status":"closed","priority":1,"issue_type":"task","owner":"jmf@pobox.com","created_at":"2026-03-17T09:20:46.171496-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-03-17T10:09:41.67772-07:00","closed_at":"2026-03-17T10:09:41.67772-07:00","close_reason":"Skill permissions auto-allowed in SessionStart hook, PR #29 merged"} {"id":"beadle-94n","title":"Docs and descriptions out of sync","description":"PR #1 review feedback. Four doc issues:\n\n1. `send_email` tool description says \"via Resend API\" — should say \"via Proton Bridge SMTP with Resend fallback\"\n2. README features list mentions \"search\" tool that doesn't exist — remove\n3. `email-channel-plan.md` config snippet has `imap_password_file` and `resend_api_key_file` fields that don't exist in implementation\n4. `verify.go` `Verify()` uses `os.MkdirTemp(\"\")` which can exceed gpg-agent socket path length on macOS — use `/tmp` like tests do\n\nFiles: `internal/mcp/tools.go`, `README.md`, `docs/email-channel-plan.md`, `internal/pgp/verify.go`","status":"closed","priority":2,"issue_type":"task","owner":"jmf@pobox.com","created_at":"2026-03-13T11:15:55.647764-07:00","created_by":"jmf-pobox","updated_at":"2026-03-13T11:21:08.863403-07:00","closed_at":"2026-03-13T11:21:08.863403-07:00","close_reason":"Closed"} @@ -49,11 +52,13 @@ {"id":"beadle-bof","title":"Fix installer showing 'ready' when doctor fails","status":"closed","priority":2,"issue_type":"bug","owner":"claude@punt-labs.com","created_at":"2026-03-29T18:52:58.919331-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-03-29T19:15:05.944491-07:00","closed_at":"2026-03-29T19:15:05.944491-07:00","close_reason":"Closed"} {"id":"beadle-bta","title":"fix(docs): update README install SHA to v0.6.1","status":"closed","priority":1,"issue_type":"bug","owner":"jmf@pobox.com","created_at":"2026-03-21T07:38:18.401297-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-03-21T07:43:39.292321-07:00","closed_at":"2026-03-21T07:43:39.292321-07:00","close_reason":"PR with README SHA fix"} {"id":"beadle-btr","title":"feat(daemon): worker spawner — exec.Command claude -p --bare with timeout and JSON capture","status":"closed","priority":1,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-14T06:24:42.782442386-07:00","created_by":"J F","updated_at":"2026-04-14T08:10:37.905932915-07:00","closed_at":"2026-04-14T08:10:37.905936235-07:00"} +{"id":"beadle-buw","title":"test(daemon): pipeline end-to-end integration test — multi-stage command pipeline","status":"open","priority":1,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-14T19:44:29.344167621-07:00","created_by":"J F","updated_at":"2026-04-14T19:44:29.344167621-07:00"} {"id":"beadle-by4","title":"fix(inbox): set_poll_interval must also create CronCreate loop for autonomous processing","description":"## Problem\n\nset_poll_interval persists the interval and starts the MCP poller, but the poller only sends tools/list_changed notifications. Nothing autonomously invokes /inbox — the feature only works when the user is interactive.\n\n## Required Changes\n\n1. commands/inbox.md — when a valid interval is received (5m, 10m, 15m, 30m, 1h, 2h): after calling set_poll_interval, also create a CronCreate job at the same interval to run /inbox. When 'n' is received: also delete the existing cron job.\n\n2. Session-start behavior — at session start, call get_poll_status; if Active=true and interval is set, recreate the CronCreate job (session-scoped jobs don't survive Claude Code restarts).\n\n## Acceptance Criteria\n\n- Setting a poll interval creates both the MCP poller and a CronCreate loop\n- Disabling polling (n) removes both\n- After Claude Code restart, session-start check recreates the cron job if polling is configured\n- /inbox is invoked autonomously at the configured interval regardless of user presence","status":"closed","priority":2,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-10T07:24:28.917878602-07:00","created_by":"J F","updated_at":"2026-04-10T07:59:01.084090453-07:00","closed_at":"2026-04-10T07:59:01.084090453-07:00","close_reason":"Closed"} {"id":"beadle-cbo","title":"beadle-email doctor should detect unprotected GPG keys","description":"Today doctor unconditionally reports '[!] gpg_passphrase credential not found' whenever the gpg-passphrase secret is absent. This is wrong for keys that have no passphrase set — doctor treats a non-issue as a failure, causing doctor to exit 1 and masking real problems.\n\nProposed behavior: at doctor run, probe the configured signing key (cfg.GPGSigner) with gpg --batch --pinentry-mode=error --dry-run --sign and classify:\n- Key needs passphrase and secret is set → [+] gpg_passphrase\n- Key needs passphrase and secret missing → [!] gpg_passphrase credential not found\n- Key has no passphrase → [+] gpg_passphrase not required (unprotected key) OR a separate warning '[!] signing key unprotected'\n\nSecond option is arguably better because it surfaces the unprotected state rather than hiding it. That aligns with the 'gpg is a real part of the system' principle — unprotected signing keys are a posture concern worth reporting, not silencing.\n\nRelated: beadle-72e (Claude key has no expiration AND no passphrase).\n\nScope:\n- cmd/beadle-email/admin_cmd.go (doctor handler): add the probe and three-way classification\n- Possibly new helper in internal/pgp/ (e.g. pgp.KeyHasPassphrase(signer string) (bool, error))\n- Tests with a sandboxed GNUPGHOME containing a passphraseless test key\n- Update docs/setup-guide.md doctor output table","status":"closed","priority":3,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-07T21:58:17.130789922-07:00","created_by":"J F","updated_at":"2026-04-08T13:13:30.124636406-07:00","closed_at":"2026-04-08T13:13:30.124636406-07:00","close_reason":"Merged in PR #101 (squash 050d5f0)","labels":["doctor","feature","security"]} {"id":"beadle-d0h","title":"Security hardening: timeouts, TLS, path traversal","description":"PR #1 review feedback. Five related hardening issues:\n\n1. `secret.Get()` should reject path separators in name and enforce file permissions before reading\n2. IMAP `Dial()` uses `net.Dial` with no timeout — use `net.DialTimeout`\n3. IMAP/SMTP `InsecureSkipVerify` should only be allowed for loopback addresses\n4. `Send()` uses `http.DefaultClient` with no timeout — create client with 30s timeout\n5. SMTP `net.Dial` needs timeout, magic number `2*1e9` should be `2*time.Second`\n\nFiles: `internal/secret/secret.go`, `internal/email/imap.go`, `internal/email/smtp.go`, `internal/email/send.go`","status":"closed","priority":1,"issue_type":"bug","owner":"jmf@pobox.com","created_at":"2026-03-13T11:15:55.416819-07:00","created_by":"jmf-pobox","updated_at":"2026-03-13T11:18:56.946398-07:00","closed_at":"2026-03-13T11:18:56.946398-07:00","close_reason":"Closed"} {"id":"beadle-dfp","title":"list_messages unread_only filter returns null for unread messages","status":"closed","priority":2,"issue_type":"bug","owner":"jmf@pobox.com","created_at":"2026-03-15T22:25:15.803605-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-03-15T22:39:54.288585-07:00","closed_at":"2026-03-15T22:39:54.288585-07:00","close_reason":"Fixed in PR #22 — UIDSearch + empty slice return"} {"id":"beadle-dsv","title":"feat(daemon): mission manager — create contracts from email, collect results, close","status":"closed","priority":1,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-14T06:24:46.683591942-07:00","created_by":"J F","updated_at":"2026-04-14T07:43:16.168824023-07:00","closed_at":"2026-04-14T07:43:16.168826493-07:00"} +{"id":"beadle-e3w","title":"feat(daemon): pipeline state persistence with crash recovery","status":"closed","priority":1,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-14T19:44:10.547409744-07:00","created_by":"J F","updated_at":"2026-04-14T20:08:54.63550117-07:00","closed_at":"2026-04-14T20:08:54.63550422-07:00"} {"id":"beadle-ffx","title":"CLI parity: add list, read, send, move subcommands","status":"closed","priority":1,"issue_type":"task","owner":"jmf@pobox.com","created_at":"2026-03-18T06:03:35.2238-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-03-18T09:03:41.542034-07:00","closed_at":"2026-03-18T09:03:41.542034-07:00","close_reason":"Merged in PR #38. CLI parity (list/read/send/move/folders), global flags (--json/--verbose/--quiet), install/uninstall subcommands."} {"id":"beadle-fpn","title":"Code quality: typed responses, UTF-8 truncation, boundary parsing","description":"PR #1 review feedback. Three code quality issues:\n\n1. `tools.go` uses `map[string]any` for tool responses — define typed structs per CLAUDE.md Go standards\n2. `truncate()` in `mime.go` uses byte slicing — splits multi-byte UTF-8 runes. Use rune-aware slicing.\n3. `extractSignedParts` in `verify.go` uses `bytes.Split` on boundary — can mis-split if boundary appears in body. Should anchor on CRLF + boundary at line start.\n\nFiles: `internal/mcp/tools.go`, `internal/email/mime.go`, `internal/pgp/verify.go`","status":"closed","priority":2,"issue_type":"task","owner":"jmf@pobox.com","created_at":"2026-03-13T11:15:55.759178-07:00","created_by":"jmf-pobox","updated_at":"2026-03-13T11:23:35.309301-07:00","closed_at":"2026-03-13T11:23:35.309301-07:00","close_reason":"Closed"} {"id":"beadle-fsj","title":"Plugin scaffold: .claude-plugin/plugin.json, hooks/, commands/","description":"Beadle has no Claude Code plugin infrastructure. Create the scaffold: .claude-plugin/plugin.json (name, version, mcpServers for beadle-email), hooks/ directory with hooks.json, commands/ directory. Follow punt-kit plugin standards. The mcpServers entry should reference the beadle-email binary with stdio transport. Dev name: beadle-dev, prod name: beadle.","status":"closed","priority":1,"issue_type":"task","owner":"jmf@pobox.com","created_at":"2026-03-13T14:54:39.676186-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-03-13T15:12:49.270052-07:00","closed_at":"2026-03-13T15:12:49.270052-07:00","close_reason":"Plugin scaffold created: plugin.json, hooks.json, suppress-output.sh, session-start.sh, commands/. DES-011 documents dual install path (plugin vs MCP-only)."} @@ -84,6 +89,7 @@ {"id":"beadle-th6","title":"format: cap total FROM cell width to bound row growth after z34 annotation","description":"After beadle-z34 merged, the FROM column can render up to 39 runes wide in the worst case: 20-rune display name (capped by maxDisplayNameRunes) + ' (via ' (6) + 12-rune domain label (maxRelayLabelRunes) + ')' (1) = 39. Combined with the EMAIL column added in beadle-0he, a long-name relay row can blow past the 80-col soft budget.\n\nProposed fix: cap the total FROM cell width, not just the display-name portion. Split budget: e.g. 28 runes total for the FROM cell. When the annotation would push past that, truncate the name further to fit. Precedence: the '(via \u003cdomain\u003e)' suffix is more important than the name's last few characters, because the disambiguation is the entire point of the annotation.\n\nSuggested implementation:\n- New const maxFromCellRunes = 28 (or similar, tunable).\n- formatFromCell computes the annotation first, subtracts its width from maxFromCellRunes, uses the remainder as the display-name budget (not maxDisplayNameRunes).\n- When the annotation is absent, the existing maxDisplayNameRunes cap still applies.\n\nTest additions:\n- A 25-rune name + relay annotation — name must be truncated further than maxDisplayNameRunes to fit the annotation.\n- A 10-rune name + relay annotation — no additional truncation needed.\n\nFiled as follow-up from pre-PR review of beadle-z34 by feature-dev:code-reviewer. The reviewer's exact wording: 'Consider capping the total FROM cell (name + annotation) to a fixed budget (e.g., 32 runes) to bound the column width regression relative to 0he.'\n\nNon-blocking for z34 ship — the current behavior is cosmetically wide but functionally correct. The variable SUBJECT column absorbs the excess via formatTable's existing clamp logic.\n\nRelated:\n- beadle-0he: introduced the EMAIL column (this PR adds to the total row width)\n- beadle-z34: introduced the relay annotation (the new width contribution)","notes":"Obsolete. The z34 (via \u003cdomain\u003e) annotation this bead was capping was deleted in DES-018 (PR #106). The new FROM column already enforces a total width via formatFromCell's name-cap logic (37 − len(email) − 3), so there is no unbounded growth to bound.","status":"closed","priority":3,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-08T17:01:42.168203501-07:00","created_by":"J F","updated_at":"2026-04-09T02:42:01.958851736-07:00","closed_at":"2026-04-09T02:42:01.958855056-07:00","labels":["format","tech-debt"]} {"id":"beadle-twc","title":"Add --json, --verbose, --quiet global flags to CLI","status":"closed","priority":1,"issue_type":"task","owner":"jmf@pobox.com","created_at":"2026-03-18T06:03:37.253054-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-03-18T09:03:41.54664-07:00","closed_at":"2026-03-18T09:03:41.54664-07:00","close_reason":"Merged in PR #38. CLI parity (list/read/send/move/folders), global flags (--json/--verbose/--quiet), install/uninstall subcommands."} {"id":"beadle-tx7","title":"download_attachment: extract and return attachment content by part index","description":"read_message lists attachment metadata (filename, content type, size) but cannot extract the actual content. Add a download_attachment tool that takes message_id and part index, extracts the MIME part bytes, and returns them (base64 for binary, text for text parts). Add suppress-output handler. Enables reading attached documents, saving files locally, forwarding attachments.","status":"closed","priority":1,"issue_type":"feature","owner":"jmf@pobox.com","created_at":"2026-03-13T23:53:55.404699-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-03-14T10:55:45.209781-07:00","closed_at":"2026-03-14T10:55:45.209781-07:00","close_reason":"Implemented in feat/download-attachment branch"} +{"id":"beadle-uce","title":"feat(daemon): Planner interface with LLM and Rule implementations","status":"closed","priority":1,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-14T19:43:58.103063784-07:00","created_by":"J F","updated_at":"2026-04-14T19:57:06.741025767-07:00","closed_at":"2026-04-14T19:57:06.741032727-07:00"} {"id":"beadle-vkh","title":"Fix ethos repo config contract: active → agent","status":"closed","priority":1,"issue_type":"bug","owner":"jmf@pobox.com","created_at":"2026-03-21T22:37:14.757186-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-03-21T23:03:19.439572-07:00","closed_at":"2026-03-21T23:03:19.439572-07:00","close_reason":"Closed"} {"id":"beadle-vwm","title":"PGP verification consistency: read_message and trust model fixes","description":"PR #1 review feedback. Three related issues:\n\n1. `read_message` handler doesn't run PGP verification like `list_messages` — user sees `verified` in listing but `unverified` when reading the same message\n2. `sign.go` declares `Content-Transfer-Encoding: quoted-printable` but writes body verbatim — change to `7bit`\n3. Dead re-parse in `verify.go` L159-163 — `mail.ReadMessage` result immediately discarded\n\nFiles: `internal/mcp/tools.go`, `internal/pgp/sign.go`, `internal/pgp/verify.go`","status":"closed","priority":1,"issue_type":"bug","owner":"jmf@pobox.com","created_at":"2026-03-13T11:15:55.307398-07:00","created_by":"jmf-pobox","updated_at":"2026-03-13T11:17:01.718592-07:00","closed_at":"2026-03-13T11:17:01.718592-07:00","close_reason":"Closed"} {"id":"beadle-vyv","title":"design: beadle as orchestrator — MCP server spawns Claude Code for inbox processing","description":"## Problem\n\nThe current two-layer polling design (DES-015) has a sync gap: the MCP server detects new mail (background goroutine) but relies on a durable CronCreate job in Claude Code for processing. These can drift — CronCreate expires after 7 days, manual intervention breaks sync, session crashes leave one layer orphaned.\n\nIn the Docker sandbox model, this gets worse: who starts Claude Code? Today Claude Code starts beadle. In a sandbox, beadle is the long-running daemon.\n\n## Solution\n\nInvert the launch. beadle spawns Claude Code with a prompt file:\n\n claude --prompt-file .beadle/startup.md\n\nWhere startup.md is:\n\n /loop 5m /inbox 5m\n\nThis single command sets both layers atomically:\n1. /loop 5m creates the durable CronCreate job that fires /inbox every 5m (processing layer)\n2. /loop immediately executes /inbox 5m on the first fire\n3. /inbox 5m calls set_poll_interval → MCP server background goroutine starts (detection layer)\n\nBoth layers in sync from the first turn. No SessionStart hooks, no CLAUDE.md instructions, no hoping the model acts. The prompt IS a user message — the model always processes it.\n\n## Sandbox startup sequence\n\n sbx run claude --prompt-file .beadle/startup.md\n\nOr beadle spawns it internally:\n\n exec.Command(\"claude\", \"--prompt-file\", promptPath)\n\nThe sync problem disappears because there is no gap between \"server starts\" and \"processing starts.\" One atomic launch.\n\n## Open Questions\n\n- Does claude --prompt-file exist today? What flags control headless/batch execution?\n- Can beadle inside a sandbox spawn claude inside the same sandbox?\n- What happens when the Claude Code session ends (context limit, crash)? Does beadle detect and re-launch?\n- Does this violate \"zero agent authority\"? The owner's signed config (poll_interval in email.json) is the authorization. The prompt file is a static, auditable artifact.\n- How does this interact with N:1 multi-session via mcp-proxy? The prompt-file session is the \"processing\" session; interactive sessions connect separately.\n\n## Relationship to Current Work\n\n- Not blocking beadle-o1w (Docker image). Ship the current two-layer design first.\n- This is a future architecture for when beadle runs as a long-lived sandbox daemon.\n- The PreToolUse sync-check hook is the interim enforcement for the current design.","status":"closed","priority":2,"issue_type":"feature","owner":"claude@punt-labs.com","created_at":"2026-04-12T08:32:53.391598859-07:00","created_by":"J F","updated_at":"2026-04-14T00:05:40.329372659-07:00","closed_at":"2026-04-14T00:05:40.329375869-07:00"} @@ -91,6 +97,7 @@ {"id":"beadle-xoo","title":"format: relayDomainLabel picks wrong label for PSL-style TLDs (.co.uk etc.)","description":"relayDomainLabel in internal/mcp/format.go uses a naive 'second-to-last dot-separated label' rule to pick the registrable label from an email domain. This gives the right answer for 2- and 3-label domains (github.com → github, ci.vercel.app → vercel) but the wrong answer for Public Suffix List (PSL) multi-label TLDs:\n\n x@example.co.uk → labels ['example', 'co', 'uk'] → picks 'co' (wrong, should be 'example')\n x@a.b.example.co.uk → labels ['a', 'b', 'example', 'co', 'uk'] → picks 'co' (wrong)\n\nThe relay annotation for such senders would render as '(via co)' instead of '(via example)', which is confusing but not security-relevant.\n\nDocumented as a known limitation in the patch code comments and the z34 bwk design notes. Filed here as a standalone bead so it's tracked.\n\nProposed fix: depend on a PSL library or table. Go has several options:\n- golang.org/x/net/publicsuffix — stdlib-adjacent, maintained, small dependency\n- A vendored PSL data file\n\nFor beadle's use case (displaying relay annotations), even a small hardcoded list of common PSL exceptions (co.uk, com.au, co.jp, etc.) would cover 99% of real traffic without the dependency weight.\n\nTest cases that would verify the fix:\n- x@example.co.uk → 'example'\n- x@example.com.au → 'example'\n- x@a.example.co.jp → 'example'\n\nNon-blocking. The current heuristic is correct for the primary targets (github.com, vercel.app, resend.com, etc.).","notes":"Obsolete. relayDomainLabel was deleted in DES-018 (PR #106) along with the rest of the z34 relay-annotation helpers. The PSL-bug this bead tracked no longer exists in the code.","status":"closed","priority":3,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-08T17:01:42.261597727-07:00","created_by":"J F","updated_at":"2026-04-09T02:41:56.367996584-07:00","closed_at":"2026-04-09T02:41:56.367999994-07:00","labels":["format","tech-debt"]} {"id":"beadle-ycx","title":"Contact permissions: per-identity rwx trust model","description":"Add permissions map to Contact struct: map[identity_email] → 'rwx'|'rw-'|'r--'|'---'. When processing inbound mail, gate behavior based on (current identity, sender) permissions combined with transport trust. Default r-- for unknown contacts. Owner always rwx. No inheritance between identities. See DES-012.","status":"closed","priority":1,"issue_type":"feature","owner":"jmf@pobox.com","created_at":"2026-03-18T08:02:34.692759-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-03-19T19:47:13.599052-07:00","closed_at":"2026-03-19T19:47:13.599052-07:00","close_reason":"Closed","dependencies":[{"issue_id":"beadle-ycx","depends_on_id":"beadle-3um","type":"blocks","created_at":"2026-03-18T08:02:50.00522-07:00","created_by":"\"jmf-pobox\""}]} {"id":"beadle-yqc","title":"send_email: support file attachments via multipart/mixed MIME","description":"send_email only sends text/plain today. Add an attachments parameter (list of file paths) and build multipart/mixed MIME in ComposeRaw. Must work through both Proton Bridge SMTP and Resend API paths. Enables /mail me a PDF, /send me the test results as a file, etc.","status":"closed","priority":1,"issue_type":"feature","owner":"jmf@pobox.com","created_at":"2026-03-13T23:53:54.653387-07:00","created_by":"\"jmf-pobox\"","updated_at":"2026-03-14T09:51:28.24362-07:00","closed_at":"2026-03-14T09:51:28.24362-07:00","close_reason":"Merged in PR #15"} +{"id":"beadle-yv6","title":"fix(daemon): migrate contract from inputs.ticket to inputs.trigger","status":"closed","priority":1,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-14T19:44:16.377444653-07:00","created_by":"J F","updated_at":"2026-04-14T19:49:10.950908445-07:00","closed_at":"2026-04-14T19:49:10.950912975-07:00"} {"id":"beadle-z16","title":"feat(rwx): enforce x-bit — mission authorization for email-triggered actions","description":"## Problem\n\nThe x bit in DES-012's rwx permission model has been declared but never enforced. Contacts with x permission can theoretically trigger autonomous actions, but no code checks x before taking action.\n\n## Solution\n\nWhen beadle's inbox processing detects an email containing an instruction from a contact with x permission, it creates an ethos mission contract. The x bit is the gate: without it, beadle reads and replies but never creates missions.\n\nEnforcement lives in beadle (not ethos). Beadle checks x on the contact, then calls ethos mission create. Ethos validates the contract but doesn't know or care about email permissions.\n\n## Depends on\n\n- beadle-vyv (orchestrator — daemon spawns Claude Code as mission worker)\n- ethos Phase 3.4+ (round enforcement, result artifacts)\n\n## See also\n\n- docs/orchestrator-design.md\n- DES-012 (rwx model)\n- Ethos DES-031 (mission contracts)","status":"closed","priority":1,"issue_type":"feature","owner":"claude@punt-labs.com","created_at":"2026-04-12T11:43:51.614339494-07:00","created_by":"J F","updated_at":"2026-04-14T07:43:11.225229111-07:00","closed_at":"2026-04-14T07:43:11.225232211-07:00"} {"id":"beadle-z34","title":"list_messages FROM column is misleading for notification services","description":"list_messages shows the display name from the From header without any indication that the actual sender is a notification relay. For GitHub in particular, every notification uses From: 'J Freeman \u003cnotifications@github.com\u003e' or 'Copilot \u003cnotifications@github.com\u003e' where the display name varies by who triggered the event but the email is always notifications@github.com.\n\nVerified 2026-04-08 during beadle-1aj troubleshooting: 285 unread messages in the claude@punt-labs.com inbox, listed under 7 distinct display names (J Freeman, Copilot, cursor[bot], vercel[bot], Claude Agento, github-actions[bot], claude[bot]). Every single message actually came from notifications@github.com. The display names were misleading: 'J Freeman' appears as a sender because Jim (jim@punt-labs.com) triggered a GitHub event, not because Jim sent beadle an email. 'Claude Agento' similarly shows up for events I triggered via my claude-puntlabs GitHub account.\n\nImpact:\n- Readers (human or agent) naturally trust the display name and act on it as the sender identity. In beadle's permission model this is wrong: the permission lookup uses the EMAIL not the display name, so the Jim Freeman contact with rwx does not grant permission on 'J Freeman' messages because those are actually notifications@github.com.\n- Contact bootstrap appears to need 7 contacts but actually needs 1 (notifications@github.com with r--).\n- Agents reading list_messages output can misattribute messages to the wrong identity.\n\nProposed fix (pair with beadle-\u003cother-one-filed-alongside-this\u003e):\n\n1. list_messages FROM column shows 'J Freeman via github.com' or 'J Freeman (notifications@github.com)' for messages where the From email domain does not match the claimed display-name identity.\n2. For known notification-relay senders (notifications@github.com, vercel, resend, sendgrid, etc.), list_messages could add a 'NOTIFY' badge or similar marker.\n3. Simpler: always show the email next to the display name. Addresses both this bead and the discoverability bead.\n\nRelated: display-name spoofing is a real security concern. A phishing email with From: 'Jim Freeman \u003cattacker@evil.com\u003e' would show up as 'Jim Freeman' in the FROM column of list_messages today. The agent would correctly NOT grant it Jim's permissions (since email lookup is by address), but a human skimming the list might trust it. Surfacing the email alongside the display name is a defense in depth against display-name spoofing.\n\nFiled during beadle-1aj troubleshooting 2026-04-08 on the same 285-message backlog that exposed it.","status":"closed","priority":3,"issue_type":"bug","owner":"claude@punt-labs.com","created_at":"2026-04-08T10:48:04.301264425-07:00","created_by":"J F","updated_at":"2026-04-08T17:27:43.324554542-07:00","closed_at":"2026-04-08T17:27:43.324557712-07:00","labels":["bug","contacts","security","ux"]} {"id":"beadle-z58","title":".claude/scheduled_tasks.json should be gitignored","description":"CronCreate with durable:true writes to .claude/scheduled_tasks.json. The file is machine-local cron persistence state, not repo config. .gitignore includes .claude/scheduled_tasks.lock but not the .json. After /inbox 10m ships the durable cron in PR #128, any session that re-runs /inbox 10m leaves an untracked file in the worktree that could be accidentally committed. Add '.claude/scheduled_tasks.json' to .gitignore next to .claude/scheduled_tasks.lock. One-line change.","status":"closed","priority":3,"issue_type":"task","owner":"claude@punt-labs.com","created_at":"2026-04-11T08:55:31.691210887-07:00","created_by":"J F","updated_at":"2026-04-12T06:45:10.990521763-07:00","closed_at":"2026-04-12T06:45:10.990521763-07:00","close_reason":"Closed"} diff --git a/CHANGELOG.md b/CHANGELOG.md index f9bbe09..32e539d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Added +- Pipeline orchestrator implementation: command YAML loader with typed arg + validation, Planner interface (Rule + LLM stub), sequential pipeline + executor with try/else error handling, per-command MCP config wiring, + crash-safe pipeline state persistence with startup recovery. + (beadle-5tk, beadle-uce, beadle-5ck, beadle-85e, beadle-e3w) - DES-027: Orchestrator design — daemon spawns Claude Code workers via `claude -p --bare` print mode with per-mission MCP config, system prompt, safety bounds, and ethos missions as control plane. (beadle-vyv) diff --git a/cmd/beadle-daemon/main.go b/cmd/beadle-daemon/main.go index 81482a1..2e3fe40 100644 --- a/cmd/beadle-daemon/main.go +++ b/cmd/beadle-daemon/main.go @@ -55,6 +55,25 @@ var runCmd = &cobra.Command{ if err != nil { return fmt.Errorf("resolve data dir: %w", err) } + + store := &daemon.PipelineStore{ + Dir: filepath.Join(dataDir, "pipelines"), + Logger: logger, + } + stale, err := store.LoadRunning() + if err != nil { + logger.Warn("load stale pipelines", "error", err) + } + for _, p := range stale { + logger.Warn("pipeline was running when daemon stopped", + "pipeline", p.ID, "from", p.Email.From) + p.Status = "failed" + p.Error = "daemon stopped while pipeline was running" + if saveErr := store.Save(p); saveErr != nil { + logger.Error("mark stale pipeline failed", "pipeline", p.ID, "error", saveErr) + } + } + missionsTmpDir := filepath.Join(dataDir, "tmp", "missions") if err := os.MkdirAll(missionsTmpDir, 0o750); err != nil { return fmt.Errorf("create missions tmp dir: %w", err) @@ -94,7 +113,7 @@ var runCmd = &cobra.Command{ logger.Warn("worker spawning disabled: no API key found (checked: secret backends, ANTHROPIC_API_KEY env)") } - handler := daemon.NewMailHandler(cmd.Context(), resolver, email.DefaultDialer{}, missions, spawner, templates, logger, 0) + handler := daemon.NewMailHandler(cmd.Context(), resolver, email.DefaultDialer{}, missions, spawner, templates, logger, 0, nil, nil) defer handler.Stop() poller := email.NewPoller(handler.OnNewMail, resolver, logger, email.DefaultDialer{}) diff --git a/internal/daemon/command.go b/internal/daemon/command.go new file mode 100644 index 0000000..1aeca45 --- /dev/null +++ b/internal/daemon/command.go @@ -0,0 +1,228 @@ +package daemon + +import ( + "fmt" + "log/slog" + "os" + "path/filepath" + "strings" + "time" + + "gopkg.in/yaml.v3" +) + +// CommandArg describes a single typed argument in a command definition. +type CommandArg struct { + Name string `yaml:"name"` + Type string `yaml:"type"` // string, enum, int, bool + Values []string `yaml:"values"` // for enum type + MaxLength int `yaml:"max_length"` // for string type + Required bool `yaml:"required"` + Default string `yaml:"default"` +} + +// Command is a GPG-signed YAML command definition for the pipeline orchestrator. +// See DES-028 in DESIGN.md. +type Command struct { + Name string `yaml:"name"` + Description string `yaml:"description"` + Signature string `yaml:"signature"` + Args []CommandArg `yaml:"args"` + Input string `yaml:"input"` // none | optional | required + Output string `yaml:"output"` // prose | json | files + WriteSet []string `yaml:"write_set"` + Budget struct { + Rounds int `yaml:"rounds"` + ReflectionAfterEach bool `yaml:"reflection_after_each"` + } `yaml:"budget"` + Timeout string `yaml:"timeout"` // duration string (2m, 30m, etc.) + Prompt string `yaml:"prompt"` + Tools []string `yaml:"tools"` + MCPServers []string `yaml:"mcp_servers"` + EnvVars []string `yaml:"env_vars"` +} + +var validArgTypes = map[string]bool{ + "string": true, + "enum": true, + "int": true, + "bool": true, +} + +var validInputModes = map[string]bool{ + "none": true, + "optional": true, + "required": true, +} + +var validOutputModes = map[string]bool{ + "prose": true, + "json": true, + "files": true, +} + +// LoadCommands scans dir for *.yaml files, parses each as a Command, +// validates required fields, and returns a map keyed by command name. +// Invalid files are logged and skipped. +func LoadCommands(dir string) (map[string]*Command, error) { + entries, err := os.ReadDir(dir) + if err != nil { + return nil, fmt.Errorf("read command dir %s: %w", dir, err) + } + + cmds := make(map[string]*Command) + for _, e := range entries { + if e.IsDir() || !strings.HasSuffix(e.Name(), ".yaml") { + continue + } + path := filepath.Join(dir, e.Name()) + cmd, err := loadCommand(path) + if err != nil { + slog.Warn("skip invalid command file", "path", path, "error", err) + continue + } + if _, dup := cmds[cmd.Name]; dup { + slog.Warn("skip duplicate command name", "name", cmd.Name, "path", path) + continue + } + cmds[cmd.Name] = cmd + } + return cmds, nil +} + +func loadCommand(path string) (*Command, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read %s: %w", path, err) + } + + var cmd Command + dec := yaml.NewDecoder(strings.NewReader(string(data))) + dec.KnownFields(true) + if err := dec.Decode(&cmd); err != nil { + return nil, fmt.Errorf("parse %s: %w", path, err) + } + + if err := validateCommand(&cmd); err != nil { + return nil, fmt.Errorf("validate %s: %w", path, err) + } + return &cmd, nil +} + +func validateCommand(cmd *Command) error { + if cmd.Name == "" { + return fmt.Errorf("missing required field: name") + } + if cmd.Prompt == "" { + return fmt.Errorf("missing required field: prompt") + } + if cmd.Budget.Rounds <= 0 { + return fmt.Errorf("budget.rounds must be > 0, got %d", cmd.Budget.Rounds) + } + + // Default empty input/output to their zero-values. + if cmd.Input == "" { + cmd.Input = "none" + } + if cmd.Output == "" { + cmd.Output = "prose" + } + if !validInputModes[cmd.Input] { + return fmt.Errorf("invalid input mode %q (want none, optional, required)", cmd.Input) + } + if !validOutputModes[cmd.Output] { + return fmt.Errorf("invalid output mode %q (want prose, json, files)", cmd.Output) + } + + if cmd.Timeout != "" { + if _, err := time.ParseDuration(cmd.Timeout); err != nil { + return fmt.Errorf("invalid timeout %q: %w", cmd.Timeout, err) + } + } + + for i, a := range cmd.Args { + if a.Name == "" { + return fmt.Errorf("arg[%d]: missing name", i) + } + if !validArgTypes[a.Type] { + return fmt.Errorf("arg %q: unrecognized type %q", a.Name, a.Type) + } + if a.Type == "enum" && len(a.Values) == 0 { + return fmt.Errorf("arg %q: enum type requires non-empty values list", a.Name) + } + } + return nil +} + +// ValidateArgs checks that args satisfies cmd's declared argument schema. +// Returns a descriptive error on the first violation. +func ValidateArgs(cmd *Command, args map[string]any) error { + // Build lookup of declared arg names. + declared := make(map[string]*CommandArg, len(cmd.Args)) + for i := range cmd.Args { + declared[cmd.Args[i].Name] = &cmd.Args[i] + } + + // Reject unknown arg names. + for name := range args { + if _, ok := declared[name]; !ok { + return fmt.Errorf("unknown arg %q for command %q", name, cmd.Name) + } + } + + // Check each declared arg. + for _, a := range cmd.Args { + v, present := args[a.Name] + if !present { + if a.Required { + return fmt.Errorf("missing required arg %q for command %q", a.Name, cmd.Name) + } + continue + } + + switch a.Type { + case "string": + s, ok := v.(string) + if !ok { + return fmt.Errorf("arg %q: expected string, got %T", a.Name, v) + } + if a.MaxLength > 0 && len(s) > a.MaxLength { + return fmt.Errorf("arg %q: length %d exceeds max_length %d", a.Name, len(s), a.MaxLength) + } + case "int": + switch v.(type) { + case int, int64, float64: + // accept numeric types (JSON/YAML decode as float64 or int) + default: + return fmt.Errorf("arg %q: expected int, got %T", a.Name, v) + } + case "bool": + if _, ok := v.(bool); !ok { + return fmt.Errorf("arg %q: expected bool, got %T", a.Name, v) + } + case "enum": + s, ok := v.(string) + if !ok { + return fmt.Errorf("arg %q: expected string for enum, got %T", a.Name, v) + } + found := false + for _, allowed := range a.Values { + if s == allowed { + found = true + break + } + } + if !found { + return fmt.Errorf("arg %q: value %q not in allowed values %v", a.Name, s, a.Values) + } + } + } + return nil +} + +// VerifySignature is a stub for GPG signature verification of command files. +// The signing workflow is not yet defined; the Signature field exists so +// YAML can carry the signature for future verification. +func VerifySignature(cmd *Command, gpgBinary string) error { + return nil +} diff --git a/internal/daemon/command_test.go b/internal/daemon/command_test.go new file mode 100644 index 0000000..8919b61 --- /dev/null +++ b/internal/daemon/command_test.go @@ -0,0 +1,426 @@ +package daemon + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const validCommandYAML = `name: wall +description: Broadcast a message to all active agents via biff +signature: deadbeef +args: + - name: message + type: string + max_length: 500 + required: true + - name: channel + type: enum + values: [general, alerts] + required: false + default: general +input: none +output: prose +write_set: [] +budget: + rounds: 1 + reflection_after_each: false +timeout: 2m +prompt: | + Read the message arg from the mission contract and call biff wall. +tools: + - Bash +mcp_servers: + - ethos + - biff +env_vars: + - BIFF_TOKEN +` + +func writeYAML(t *testing.T, dir, name, content string) { + t.Helper() + err := os.WriteFile(filepath.Join(dir, name), []byte(content), 0o644) + require.NoError(t, err) +} + +func TestLoadCommands(t *testing.T) { + tests := []struct { + name string + files map[string]string + wantNames []string + wantErr bool + }{ + { + name: "valid single command", + files: map[string]string{"wall.yaml": validCommandYAML}, + wantNames: []string{"wall"}, + }, + { + name: "multiple valid commands", + files: map[string]string{ + "wall.yaml": validCommandYAML, + "deploy.yaml": `name: deploy +prompt: deploy the thing +budget: + rounds: 2 +`, + }, + wantNames: []string{"wall", "deploy"}, + }, + { + name: "skip missing name", + files: map[string]string{ + "bad.yaml": `prompt: do something +budget: + rounds: 1 +`, + }, + wantNames: []string{}, + }, + { + name: "skip missing prompt", + files: map[string]string{ + "bad.yaml": `name: noprompt +budget: + rounds: 1 +`, + }, + wantNames: []string{}, + }, + { + name: "skip zero budget rounds", + files: map[string]string{ + "bad.yaml": `name: nobudget +prompt: hello +budget: + rounds: 0 +`, + }, + wantNames: []string{}, + }, + { + name: "skip unknown fields", + files: map[string]string{ + "bad.yaml": `name: unknown +prompt: hello +budget: + rounds: 1 +extra_field: should_fail +`, + }, + wantNames: []string{}, + }, + { + name: "skip invalid arg type", + files: map[string]string{ + "bad.yaml": `name: badarg +prompt: hello +budget: + rounds: 1 +args: + - name: x + type: float + required: true +`, + }, + wantNames: []string{}, + }, + { + name: "skip enum with no values", + files: map[string]string{ + "bad.yaml": `name: badenum +prompt: hello +budget: + rounds: 1 +args: + - name: x + type: enum + required: true +`, + }, + wantNames: []string{}, + }, + { + name: "skip invalid input mode", + files: map[string]string{ + "bad.yaml": `name: badinput +prompt: hello +budget: + rounds: 1 +input: stream +`, + }, + wantNames: []string{}, + }, + { + name: "skip invalid output mode", + files: map[string]string{ + "bad.yaml": `name: badoutput +prompt: hello +budget: + rounds: 1 +output: binary +`, + }, + wantNames: []string{}, + }, + { + name: "empty directory", + files: map[string]string{}, + wantNames: []string{}, + }, + { + name: "ignore non-yaml files", + files: map[string]string{ + "readme.txt": "not yaml", + "config.json": `{"key": "value"}`, + }, + wantNames: []string{}, + }, + { + name: "valid with defaults applied", + files: map[string]string{ + "minimal.yaml": `name: minimal +prompt: do the thing +budget: + rounds: 1 +`, + }, + wantNames: []string{"minimal"}, + }, + { + name: "skip invalid timeout", + files: map[string]string{ + "bad.yaml": `name: badtimeout +prompt: hello +budget: + rounds: 1 +timeout: not-a-duration +`, + }, + wantNames: []string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + dir := t.TempDir() + for name, content := range tt.files { + writeYAML(t, dir, name, content) + } + + cmds, err := LoadCommands(dir) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + + var got []string + for name := range cmds { + got = append(got, name) + } + assert.ElementsMatch(t, tt.wantNames, got) + }) + } +} + +func TestLoadCommands_DuplicateNames(t *testing.T) { + dir := t.TempDir() + writeYAML(t, dir, "wall1.yaml", `name: wall +prompt: first +budget: + rounds: 1 +`) + writeYAML(t, dir, "wall2.yaml", `name: wall +prompt: second +budget: + rounds: 1 +`) + + cmds, err := LoadCommands(dir) + require.NoError(t, err) + // One wins, one is skipped. Only one entry for "wall". + assert.Len(t, cmds, 1) + assert.Contains(t, cmds, "wall") +} + +func TestLoadCommands_NonexistentDir(t *testing.T) { + _, err := LoadCommands(filepath.Join(t.TempDir(), "does-not-exist")) + require.Error(t, err) + assert.Contains(t, err.Error(), "read command dir") +} + +func TestLoadCommands_FieldValues(t *testing.T) { + dir := t.TempDir() + writeYAML(t, dir, "wall.yaml", validCommandYAML) + + cmds, err := LoadCommands(dir) + require.NoError(t, err) + require.Contains(t, cmds, "wall") + + cmd := cmds["wall"] + assert.Equal(t, "wall", cmd.Name) + assert.Equal(t, "Broadcast a message to all active agents via biff", cmd.Description) + assert.Equal(t, "deadbeef", cmd.Signature) + assert.Equal(t, "none", cmd.Input) + assert.Equal(t, "prose", cmd.Output) + assert.Equal(t, "2m", cmd.Timeout) + assert.Equal(t, 1, cmd.Budget.Rounds) + assert.False(t, cmd.Budget.ReflectionAfterEach) + assert.Equal(t, []string{"Bash"}, cmd.Tools) + assert.Equal(t, []string{"ethos", "biff"}, cmd.MCPServers) + assert.Equal(t, []string{"BIFF_TOKEN"}, cmd.EnvVars) + + require.Len(t, cmd.Args, 2) + assert.Equal(t, "message", cmd.Args[0].Name) + assert.Equal(t, "string", cmd.Args[0].Type) + assert.Equal(t, 500, cmd.Args[0].MaxLength) + assert.True(t, cmd.Args[0].Required) + assert.Equal(t, "channel", cmd.Args[1].Name) + assert.Equal(t, "enum", cmd.Args[1].Type) + assert.Equal(t, []string{"general", "alerts"}, cmd.Args[1].Values) + assert.False(t, cmd.Args[1].Required) + assert.Equal(t, "general", cmd.Args[1].Default) +} + +func TestLoadCommands_DefaultInputOutput(t *testing.T) { + dir := t.TempDir() + writeYAML(t, dir, "min.yaml", `name: min +prompt: hello +budget: + rounds: 1 +`) + cmds, err := LoadCommands(dir) + require.NoError(t, err) + require.Contains(t, cmds, "min") + assert.Equal(t, "none", cmds["min"].Input) + assert.Equal(t, "prose", cmds["min"].Output) +} + +func TestValidateArgs(t *testing.T) { + cmd := &Command{ + Name: "test", + Args: []CommandArg{ + {Name: "message", Type: "string", MaxLength: 10, Required: true}, + {Name: "count", Type: "int", Required: false}, + {Name: "verbose", Type: "bool", Required: false}, + {Name: "env", Type: "enum", Values: []string{"dev", "prod"}, Required: true}, + }, + } + + tests := []struct { + name string + args map[string]any + wantErr string + }{ + { + name: "all valid", + args: map[string]any{"message": "hello", "count": 5, "verbose": true, "env": "dev"}, + wantErr: "", + }, + { + name: "required only", + args: map[string]any{"message": "hi", "env": "prod"}, + wantErr: "", + }, + { + name: "missing required message", + args: map[string]any{"env": "dev"}, + wantErr: "missing required arg \"message\"", + }, + { + name: "missing required env", + args: map[string]any{"message": "hi"}, + wantErr: "missing required arg \"env\"", + }, + { + name: "wrong type for string", + args: map[string]any{"message": 42, "env": "dev"}, + wantErr: "expected string", + }, + { + name: "wrong type for int", + args: map[string]any{"message": "hi", "env": "dev", "count": "five"}, + wantErr: "expected int", + }, + { + name: "wrong type for bool", + args: map[string]any{"message": "hi", "env": "dev", "verbose": "yes"}, + wantErr: "expected bool", + }, + { + name: "max_length exceeded", + args: map[string]any{"message": "this string is too long", "env": "dev"}, + wantErr: "exceeds max_length", + }, + { + name: "max_length exact boundary", + args: map[string]any{"message": "0123456789", "env": "dev"}, + wantErr: "", + }, + { + name: "enum value not allowed", + args: map[string]any{"message": "hi", "env": "staging"}, + wantErr: "not in allowed values", + }, + { + name: "enum wrong type", + args: map[string]any{"message": "hi", "env": 42}, + wantErr: "expected string for enum", + }, + { + name: "unknown arg", + args: map[string]any{"message": "hi", "env": "dev", "bogus": "val"}, + wantErr: "unknown arg \"bogus\"", + }, + { + name: "int as float64", + args: map[string]any{"message": "hi", "env": "dev", "count": float64(3)}, + wantErr: "", + }, + { + name: "int as int64", + args: map[string]any{"message": "hi", "env": "dev", "count": int64(7)}, + wantErr: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := ValidateArgs(cmd, tt.args) + if tt.wantErr == "" { + assert.NoError(t, err) + } else { + require.Error(t, err) + assert.Contains(t, err.Error(), tt.wantErr) + } + }) + } +} + +func TestValidateArgs_NoArgs(t *testing.T) { + cmd := &Command{Name: "simple", Args: nil} + err := ValidateArgs(cmd, map[string]any{}) + assert.NoError(t, err) +} + +func TestValidateArgs_EmptyArgsMap(t *testing.T) { + cmd := &Command{ + Name: "test", + Args: []CommandArg{ + {Name: "opt", Type: "string", Required: false}, + }, + } + err := ValidateArgs(cmd, map[string]any{}) + assert.NoError(t, err) +} + +func TestVerifySignature_Stub(t *testing.T) { + cmd := &Command{Name: "test", Signature: "deadbeef"} + err := VerifySignature(cmd, "gpg") + assert.NoError(t, err) +} diff --git a/internal/daemon/handler.go b/internal/daemon/handler.go index f073a40..839823c 100644 --- a/internal/daemon/handler.go +++ b/internal/daemon/handler.go @@ -5,7 +5,6 @@ import ( "context" "fmt" "log/slog" - "os" "strconv" "strings" "sync" @@ -25,13 +24,15 @@ type MissionCreator interface { // MailHandler processes new mail notifications from the poller. // For each unread message, it checks the sender's x-bit permission, -// creates an ethos mission, and spawns a Claude Code worker to execute it. +// creates an Executor pipeline, and runs it. type MailHandler struct { resolver *identity.Resolver dialer email.Dialer missions MissionCreator - spawner *WorkerSpawner + spawner Spawner templates *MissionTemplate + planner Planner + commands map[string]*Command logger *slog.Logger ctx context.Context @@ -43,12 +44,20 @@ type MailHandler struct { // NewMailHandler creates a MailHandler. If spawner or templates is nil, // mission creation still works but no worker is spawned. // maxWorkers sets the concurrency limit for worker goroutines (default 2). +// planner and commands configure the pipeline executor; if planner is nil, +// a StubPlanner is used that returns a single generic CommandCall. // The returned context governs worker subprocess lifetimes — call Stop // to cancel running workers and wait for them to exit. -func NewMailHandler(ctx context.Context, resolver *identity.Resolver, dialer email.Dialer, missions MissionCreator, spawner *WorkerSpawner, templates *MissionTemplate, logger *slog.Logger, maxWorkers int) *MailHandler { +func NewMailHandler(ctx context.Context, resolver *identity.Resolver, dialer email.Dialer, missions MissionCreator, spawner Spawner, templates *MissionTemplate, logger *slog.Logger, maxWorkers int, planner Planner, commands map[string]*Command) *MailHandler { if maxWorkers <= 0 { maxWorkers = 2 } + if planner == nil { + planner = &StubPlanner{Err: fmt.Errorf("no planner configured")} + } + if commands == nil { + commands = make(map[string]*Command) + } ctx, cancel := context.WithCancel(ctx) return &MailHandler{ resolver: resolver, @@ -56,6 +65,8 @@ func NewMailHandler(ctx context.Context, resolver *identity.Resolver, dialer ema missions: missions, spawner: spawner, templates: templates, + planner: planner, + commands: commands, logger: logger, ctx: ctx, cancel: cancel, @@ -153,18 +164,30 @@ func (h *MailHandler) OnNewMail(newCount uint32) { if h.spawner != nil && h.templates != nil { select { case h.workerSem <- struct{}{}: - missionID, err := h.missions.Create(meta) - if err != nil { - <-h.workerSem - h.logger.Error("create mission", "from", addr, "id", msg.ID, "error", err) - continue - } - h.logger.Info("mission created", "mission", missionID, "from", addr, "subject", msg.Subject) h.wg.Add(1) go func() { defer h.wg.Done() defer func() { <-h.workerSem }() - h.spawnWorker(h.ctx, missionID) + executor := &Executor{ + Planner: h.planner, + Commands: h.commands, + Missions: h.missions, + Spawner: h.spawner, + Templates: h.templates, + Registry: DefaultMCPRegistry(), + Logger: h.logger, + } + p, err := executor.Run(h.ctx, meta, "") + if err != nil { + h.logger.Error("pipeline failed", + "pipeline", p.ID, "from", addr, + "id", msg.ID, "error", err) + return + } + h.logger.Info("pipeline completed", + "pipeline", p.ID, "from", addr, + "subject", truncateLog(msg.Subject, 200), + "stages", len(p.Results)) }() default: h.logger.Warn("worker capacity full, skipping", "from", addr, "id", msg.ID) @@ -180,36 +203,6 @@ func (h *MailHandler) OnNewMail(newCount uint32) { } } -func (h *MailHandler) spawnWorker(ctx context.Context, missionID string) { - mcpPath, err := h.templates.BuildMCPConfig() - if err != nil { - h.logger.Error("build mcp config", "mission", missionID, "error", err) - return - } - - promptPath, err := h.templates.BuildSystemPrompt(missionID) - if err != nil { - os.Remove(mcpPath) - h.logger.Error("build system prompt", "mission", missionID, "error", err) - return - } - - result, err := h.spawner.Run(ctx, missionID, mcpPath, promptPath) - // Clean up temp files after subprocess exits. - os.Remove(mcpPath) - os.Remove(promptPath) - - if err != nil { - h.logger.Error("spawn worker", "mission", missionID, "error", err) - return - } - h.logger.Info("worker completed", - "mission", missionID, - "session", result.SessionID, - "isError", result.IsError, - "exitCode", result.ExitCode) -} - // verifyTrust determines the transport trust level for a message. // Proton headers are SMTP-injectable; only PGP verification provides // cryptographic proof of sender identity for x-bit execution. diff --git a/internal/daemon/handler_test.go b/internal/daemon/handler_test.go index d962bfc..c541fd2 100644 --- a/internal/daemon/handler_test.go +++ b/internal/daemon/handler_test.go @@ -151,7 +151,7 @@ func TestOnNewMail(t *testing.T) { } mock := &mockMissionCreator{} - handler := NewMailHandler(t.Context(), env.Resolver, dialer, mock, nil, nil, discardLogger(), 0) + handler := NewMailHandler(t.Context(), env.Resolver, dialer, mock, nil, nil, discardLogger(), 0, nil, nil) handler.OnNewMail(uint32(len(tt.messages))) @@ -189,7 +189,7 @@ func TestOnNewMail_PGPKeyMismatch(t *testing.T) { fix.AddRawMessage("INBOX", buildPGPSignedRFC822(t, gpgBin, "jim@punt-labs.com", "Test", "body")) mock := &mockMissionCreator{} - handler := NewMailHandler(t.Context(), env.Resolver, dialer, mock, nil, nil, discardLogger(), 0) + handler := NewMailHandler(t.Context(), env.Resolver, dialer, mock, nil, nil, discardLogger(), 0, nil, nil) handler.OnNewMail(1) assert.Equal(t, 0, len(mock.calls), "mission should not be created: key mismatch") @@ -217,7 +217,7 @@ func TestOnNewMail_PGPKeyMatch(t *testing.T) { fix.AddRawMessage("INBOX", raw) mock := &mockMissionCreator{} - handler := NewMailHandler(t.Context(), env.Resolver, dialer, mock, nil, nil, discardLogger(), 0) + handler := NewMailHandler(t.Context(), env.Resolver, dialer, mock, nil, nil, discardLogger(), 0, nil, nil) handler.OnNewMail(1) assert.Equal(t, 1, len(mock.calls), "mission should be created: key matches") diff --git a/internal/daemon/persist.go b/internal/daemon/persist.go new file mode 100644 index 0000000..312dfb2 --- /dev/null +++ b/internal/daemon/persist.go @@ -0,0 +1,90 @@ +package daemon + +import ( + "encoding/json" + "fmt" + "log/slog" + "os" + "path/filepath" + "strings" +) + +// PipelineStore persists pipeline state to JSON files for crash recovery. +type PipelineStore struct { + Dir string + Logger *slog.Logger +} + +// Save writes a pipeline to dir/.json via atomic rename. +func (s *PipelineStore) Save(p *Pipeline) error { + if strings.ContainsAny(p.ID, "/\\") || strings.Contains(p.ID, "..") || p.ID == "" { + return fmt.Errorf("invalid pipeline ID %q", p.ID) + } + + if err := os.MkdirAll(s.Dir, 0o700); err != nil { + return fmt.Errorf("create pipeline dir %s: %w", s.Dir, err) + } + + data, err := json.MarshalIndent(p, "", " ") + if err != nil { + return fmt.Errorf("marshal pipeline %s: %w", p.ID, err) + } + + tmp := filepath.Join(s.Dir, ".tmp-"+p.ID+".json") + final := filepath.Join(s.Dir, p.ID+".json") + + if err := os.WriteFile(tmp, data, 0o600); err != nil { + return fmt.Errorf("write temp file %s: %w", tmp, err) + } + if err := os.Rename(tmp, final); err != nil { + os.Remove(tmp) + return fmt.Errorf("rename %s to %s: %w", tmp, final, err) + } + return nil +} + +// LoadRunning reads all JSON files in dir and returns pipelines with status "running". +// Parse errors are logged and skipped. +// +// TRUST BOUNDARY: Pipeline JSON on disk is potentially untrusted +// (crafted files, corruption). This function returns the struct +// for inspection only — callers must not resume pipeline execution +// from loaded state without re-validating all fields. +func (s *PipelineStore) LoadRunning() ([]*Pipeline, error) { + entries, err := os.ReadDir(s.Dir) + if os.IsNotExist(err) { + return nil, nil + } + if err != nil { + return nil, fmt.Errorf("read pipeline dir %s: %w", s.Dir, err) + } + + var running []*Pipeline + for _, e := range entries { + if e.IsDir() || !strings.HasSuffix(e.Name(), ".json") { + continue + } + // Skip temp files from incomplete writes. + if strings.HasPrefix(e.Name(), ".tmp-") { + continue + } + + path := filepath.Join(s.Dir, e.Name()) + data, err := os.ReadFile(path) + if err != nil { + s.Logger.Warn("skip unreadable pipeline file", "path", path, "error", err) + continue + } + + var p Pipeline + if err := json.Unmarshal(data, &p); err != nil { + s.Logger.Warn("skip corrupt pipeline file", "path", path, "error", err) + continue + } + + if p.Status == "running" { + running = append(running, &p) + } + } + return running, nil +} diff --git a/internal/daemon/persist_test.go b/internal/daemon/persist_test.go new file mode 100644 index 0000000..9dffb01 --- /dev/null +++ b/internal/daemon/persist_test.go @@ -0,0 +1,140 @@ +package daemon + +import ( + "os" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestPipelineStore_SaveAndLoadRunning(t *testing.T) { + dir := t.TempDir() + s := &PipelineStore{Dir: dir, Logger: testLogger()} + + p := &Pipeline{ + Version: 1, + ID: "abc-123", + CreatedAt: time.Date(2026, 4, 15, 10, 0, 0, 0, time.UTC), + Email: EmailMeta{MessageID: "1", From: "jim@test.com", Subject: "Test"}, + Status: "running", + } + + require.NoError(t, s.Save(p)) + + got, err := s.LoadRunning() + require.NoError(t, err) + require.Len(t, got, 1) + assert.Equal(t, "abc-123", got[0].ID) + assert.Equal(t, "running", got[0].Status) + assert.Equal(t, "jim@test.com", got[0].Email.From) +} + +func TestPipelineStore_CompletedNotReturned(t *testing.T) { + dir := t.TempDir() + s := &PipelineStore{Dir: dir, Logger: testLogger()} + + p := &Pipeline{ + Version: 1, + ID: "done-1", + Email: EmailMeta{From: "x@test.com"}, + Status: "completed", + } + require.NoError(t, s.Save(p)) + + got, err := s.LoadRunning() + require.NoError(t, err) + assert.Empty(t, got) +} + +func TestPipelineStore_AtomicRename(t *testing.T) { + dir := t.TempDir() + s := &PipelineStore{Dir: dir, Logger: testLogger()} + + p := &Pipeline{ + Version: 1, + ID: "atomic-1", + Email: EmailMeta{From: "x@test.com"}, + Status: "running", + } + require.NoError(t, s.Save(p)) + + // Final file must exist. + _, err := os.Stat(filepath.Join(dir, "atomic-1.json")) + require.NoError(t, err) + + // Temp file must not exist. + _, err = os.Stat(filepath.Join(dir, ".tmp-atomic-1.json")) + assert.True(t, os.IsNotExist(err)) +} + +func TestPipelineStore_CorruptJSONSkipped(t *testing.T) { + dir := t.TempDir() + s := &PipelineStore{Dir: dir, Logger: testLogger()} + + // Write a valid pipeline. + p := &Pipeline{ + Version: 1, + ID: "good-1", + Email: EmailMeta{From: "x@test.com"}, + Status: "running", + } + require.NoError(t, s.Save(p)) + + // Write corrupt JSON alongside it. + require.NoError(t, os.WriteFile(filepath.Join(dir, "bad.json"), []byte("{corrupt"), 0o600)) + + got, err := s.LoadRunning() + require.NoError(t, err) + require.Len(t, got, 1) + assert.Equal(t, "good-1", got[0].ID) +} + +func TestPipelineStore_EmptyDir(t *testing.T) { + dir := t.TempDir() + s := &PipelineStore{Dir: dir, Logger: testLogger()} + + got, err := s.LoadRunning() + require.NoError(t, err) + assert.Empty(t, got) +} + +func TestPipelineStore_NonexistentDir(t *testing.T) { + s := &PipelineStore{Dir: filepath.Join(t.TempDir(), "missing"), Logger: testLogger()} + + got, err := s.LoadRunning() + require.NoError(t, err) + assert.Empty(t, got) +} + +func TestPipelineStore_MixedStatuses(t *testing.T) { + dir := t.TempDir() + s := &PipelineStore{Dir: dir, Logger: testLogger()} + + cases := []struct { + id string + status string + }{ + {"r1", "running"}, + {"c1", "completed"}, + {"f1", "failed"}, + {"r2", "running"}, + } + for _, tc := range cases { + p := &Pipeline{Version: 1, ID: tc.id, Status: tc.status, Email: EmailMeta{From: "x@test.com"}} + require.NoError(t, s.Save(p)) + } + + got, err := s.LoadRunning() + require.NoError(t, err) + require.Len(t, got, 2) + + ids := map[string]bool{} + for _, p := range got { + ids[p.ID] = true + } + assert.True(t, ids["r1"]) + assert.True(t, ids["r2"]) +} diff --git a/internal/daemon/pipeline.go b/internal/daemon/pipeline.go new file mode 100644 index 0000000..e43f240 --- /dev/null +++ b/internal/daemon/pipeline.go @@ -0,0 +1,294 @@ +package daemon + +import ( + "context" + "fmt" + "log/slog" + "os" + "time" + + "github.com/google/uuid" +) + +// Pipeline tracks the execution state of a planned command sequence. +type Pipeline struct { + Version int `json:"version"` + ID string `json:"id"` + CreatedAt time.Time `json:"created_at"` + Email EmailMeta `json:"email"` + Commands []CommandCall `json:"commands"` + ElseCmd *CommandCall `json:"else_cmd"` + Current int `json:"current"` + Results []string `json:"results"` + Status string `json:"status"` + Error string `json:"error"` + WriteSet []string `json:"write_set"` +} + +// Spawner runs a Claude Code worker session for a mission. +type Spawner interface { + Run(ctx context.Context, missionID, mcpConfigPath, systemPromptPath string, envOverrides map[string]string) (WorkerResult, error) +} + +// Executor plans, validates, and runs a command pipeline for an incoming email. +type Executor struct { + Planner Planner + Commands map[string]*Command + Missions MissionCreator + Spawner Spawner + Templates *MissionTemplate + Registry map[string]MCPServerConfig + Store *PipelineStore + Logger *slog.Logger +} + +// Run plans and executes a pipeline for the given email. +func (e *Executor) Run(ctx context.Context, meta EmailMeta, body string) (*Pipeline, error) { + p := &Pipeline{ + Version: 1, + ID: uuid.New().String(), + CreatedAt: time.Now(), + Email: meta, + Status: "running", + } + e.save(p) + + calls, err := e.Planner.Plan(ctx, meta, body) + if err != nil { + p.Status = "failed" + p.Error = fmt.Sprintf("plan: %v", err) + e.save(p) + e.fireElse(p) + return p, fmt.Errorf("plan pipeline %s: %w", p.ID, err) + } + if len(calls) == 0 { + p.Status = "failed" + p.Error = "plan returned empty command list" + e.save(p) + e.fireElse(p) + return p, fmt.Errorf("pipeline %s: empty plan", p.ID) + } + + // Validate all commands before execution. + for i, call := range calls { + cmd, ok := e.Commands[call.Command] + if !ok { + p.Status = "failed" + p.Error = fmt.Sprintf("stage %d: unknown command %q", i, call.Command) + e.save(p) + e.fireElse(p) + return p, fmt.Errorf("pipeline %s stage %d: unknown command %q", p.ID, i, call.Command) + } + if err := ValidateArgs(cmd, call.Args); err != nil { + p.Status = "failed" + p.Error = fmt.Sprintf("stage %d: %v", i, err) + e.save(p) + e.fireElse(p) + return p, fmt.Errorf("pipeline %s stage %d: %w", p.ID, i, err) + } + } + + p.Commands = calls + p.WriteSet = e.unionWriteSets(calls) + p.Results = make([]string, 0, len(calls)) + + // Execute sequentially. + for i, call := range calls { + p.Current = i + e.save(p) + + cmd := e.Commands[call.Command] + result, err := e.executeStage(ctx, p, i, cmd, call) + if err != nil { + p.Status = "failed" + p.Error = fmt.Sprintf("stage %d (%s): %v", i, call.Command, err) + e.save(p) + e.fireElse(p) + return p, fmt.Errorf("pipeline %s stage %d (%s): %w", p.ID, i, call.Command, err) + } + p.Results = append(p.Results, result) + e.save(p) + } + + p.Status = "completed" + e.save(p) + return p, nil +} + +// executeStage creates a mission and spawns a worker for one pipeline stage. +func (e *Executor) executeStage(ctx context.Context, p *Pipeline, idx int, cmd *Command, call CommandCall) (string, error) { + var prevOutput string + if idx > 0 { + prevOutput = p.Results[idx-1] + } + + contract := buildStageContract(p.Email, cmd, call, prevOutput) + + missionID, err := e.Missions.Create(EmailMeta{ + MessageID: p.Email.MessageID, + From: p.Email.From, + Subject: fmt.Sprintf("[pipeline %s stage %d] %s", p.ID, idx, call.Command), + }) + if err != nil { + return "", fmt.Errorf("create mission: %w", err) + } + + e.Logger.Info("stage mission created", + "pipeline", p.ID, "stage", idx, + "command", call.Command, "mission", missionID) + + // Build MCP config from the command's mcp_servers. + servers := cmd.MCPServers + if len(servers) == 0 { + servers = []string{"ethos", "beadle-email"} + } + mcpPath, err := e.Templates.BuildMCPConfig(servers, e.Registry) + if err != nil { + return "", fmt.Errorf("build mcp config: %w", err) + } + defer os.Remove(mcpPath) + + promptPath, err := e.Templates.BuildSystemPrompt(missionID) + if err != nil { + return "", fmt.Errorf("build system prompt: %w", err) + } + defer os.Remove(promptPath) + + // Resolve env overrides from command's env_vars. + envOverrides := resolveEnvVars(cmd.EnvVars) + + wr, err := e.Spawner.Run(ctx, missionID, mcpPath, promptPath, envOverrides) + if err != nil { + return "", fmt.Errorf("spawn worker: %w", err) + } + if wr.IsError { + return "", fmt.Errorf("worker error (exit %d): %s", wr.ExitCode, wr.Output) + } + + e.Logger.Info("stage completed", + "pipeline", p.ID, "stage", idx, + "command", call.Command, "mission", missionID) + + _ = contract // contract string is passed to MissionCreator via meta; kept for future use + return wr.Output, nil +} + +// buildStageContract generates a mission contract string for one pipeline stage. +func buildStageContract(meta EmailMeta, cmd *Command, call CommandCall, prevOutput string) string { + prev := "none" + if prevOutput != "" { + prev = escapeYAMLValue(prevOutput) + } + + argsYAML := "" + for k, v := range call.Args { + argsYAML += fmt.Sprintf(" %s: %s\n", k, escapeYAMLValue(fmt.Sprint(v))) + } + + return fmt.Sprintf(`leader: claude +worker: bwk +evaluator: + handle: mdm +inputs: + trigger: + type: email + message_id: %s + from: %s + subject: %s + args: +%s previous_output: %s +write_set: + - %s +success_criteria: + - %s +budget: + rounds: %d + reflection_after_each: %v +`, + escapeYAMLValue(meta.MessageID), + escapeYAMLValue(meta.From), + escapeYAMLValue(meta.Subject), + argsYAML, + prev, + writeSetYAML(cmd.WriteSet), + escapeYAMLValue(cmd.Prompt), + cmd.Budget.Rounds, + cmd.Budget.ReflectionAfterEach, + ) +} + +// writeSetYAML formats a write_set slice as YAML list items. +func writeSetYAML(ws []string) string { + if len(ws) == 0 { + return "daemon output" + } + s := ws[0] + for _, w := range ws[1:] { + s += "\n - " + w + } + return s +} + +// resolveEnvVars converts a list of "KEY" names to a map of KEY=value +// from the current process environment. Missing vars are silently skipped. +func resolveEnvVars(names []string) map[string]string { + if len(names) == 0 { + return nil + } + m := make(map[string]string, len(names)) + for _, name := range names { + if v, ok := os.LookupEnv(name); ok { + m[name] = v + } + } + if len(m) == 0 { + return nil + } + return m +} + +// unionWriteSets collects unique write_set entries across all commands. +func (e *Executor) unionWriteSets(calls []CommandCall) []string { + seen := make(map[string]bool) + var ws []string + for _, call := range calls { + cmd, ok := e.Commands[call.Command] + if !ok { + continue + } + for _, w := range cmd.WriteSet { + if !seen[w] { + seen[w] = true + ws = append(ws, w) + } + } + } + return ws +} + +// save persists the pipeline state if a Store is configured. +func (e *Executor) save(p *Pipeline) { + if e.Store == nil { + return + } + if err := e.Store.Save(p); err != nil { + e.Logger.Error("persist pipeline state", "pipeline", p.ID, "error", err) + } +} + +// truncateLog returns s truncated to max bytes, appending "..." if truncated. +func truncateLog(s string, max int) string { + if len(s) <= max { + return s + } + return s[:max] + "..." +} + +// fireElse logs the pipeline error. Sending a reply email is future work. +func (e *Executor) fireElse(p *Pipeline) { + e.Logger.Error("pipeline failed, else handler", + "pipeline", p.ID, + "error", p.Error, + "email_from", p.Email.From, + "email_subject", truncateLog(p.Email.Subject, 200)) +} diff --git a/internal/daemon/pipeline_test.go b/internal/daemon/pipeline_test.go new file mode 100644 index 0000000..6fcc48f --- /dev/null +++ b/internal/daemon/pipeline_test.go @@ -0,0 +1,346 @@ +package daemon + +import ( + "context" + "fmt" + "io" + "log/slog" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// mockSpawner returns preconfigured results per mission, keyed by call index. +type mockSpawner struct { + calls []mockSpawnerCall + results []WorkerResult + errs []error + idx int +} + +type mockSpawnerCall struct { + MissionID string + MCPConfigPath string + SystemPromptPath string + EnvOverrides map[string]string +} + +func (m *mockSpawner) Run(_ context.Context, missionID, mcpConfigPath, systemPromptPath string, envOverrides map[string]string) (WorkerResult, error) { + m.calls = append(m.calls, mockSpawnerCall{ + MissionID: missionID, + MCPConfigPath: mcpConfigPath, + SystemPromptPath: systemPromptPath, + EnvOverrides: envOverrides, + }) + i := m.idx + m.idx++ + if i < len(m.errs) && m.errs[i] != nil { + return WorkerResult{MissionID: missionID, IsError: true}, m.errs[i] + } + if i < len(m.results) { + return m.results[i], nil + } + return WorkerResult{MissionID: missionID, Output: "ok"}, nil +} + +func testLogger() *slog.Logger { + return slog.New(slog.NewTextHandler(io.Discard, nil)) +} + +func testCommands() map[string]*Command { + return map[string]*Command{ + "greet": { + Name: "greet", + Prompt: "Greet the user", + Input: "none", + Output: "prose", + Budget: struct { + Rounds int `yaml:"rounds"` + ReflectionAfterEach bool `yaml:"reflection_after_each"` + }{Rounds: 1}, + WriteSet: []string{"output/greet.txt"}, + MCPServers: []string{"ethos"}, + }, + "summarize": { + Name: "summarize", + Prompt: "Summarize the input", + Input: "required", + Output: "prose", + Budget: struct { + Rounds int `yaml:"rounds"` + ReflectionAfterEach bool `yaml:"reflection_after_each"` + }{Rounds: 1}, + WriteSet: []string{"output/summary.txt"}, + MCPServers: []string{"ethos", "beadle-email"}, + }, + "deploy": { + Name: "deploy", + Prompt: "Deploy to production", + Input: "optional", + Output: "prose", + Args: []CommandArg{ + {Name: "env", Type: "enum", Values: []string{"prod", "staging"}, Required: true}, + }, + Budget: struct { + Rounds int `yaml:"rounds"` + ReflectionAfterEach bool `yaml:"reflection_after_each"` + }{Rounds: 1}, + WriteSet: []string{"deploy/manifest.yaml"}, + MCPServers: []string{"ethos"}, + }, + } +} + +func TestExecutor_TwoStagePipeline(t *testing.T) { + sp := &mockSpawner{ + results: []WorkerResult{ + {Output: "Hello, Jim!"}, + {Output: "Summary: greeting sent"}, + }, + } + mock := &mockMissionCreator{} + tmpl := &MissionTemplate{TmpDir: t.TempDir()} + + exec := &Executor{ + Planner: &StubPlanner{ + Result: []CommandCall{ + {Command: "greet", Args: map[string]any{}}, + {Command: "summarize", Args: map[string]any{}}, + }, + }, + Commands: testCommands(), + Missions: mock, + Spawner: sp, + Templates: tmpl, + Registry: DefaultMCPRegistry(), + Logger: testLogger(), + } + + meta := EmailMeta{MessageID: "42", From: "jim@test.com", Subject: "Test"} + p, err := exec.Run(context.Background(), meta, "body") + require.NoError(t, err) + + assert.Equal(t, "completed", p.Status) + assert.Equal(t, "", p.Error) + assert.Len(t, p.Results, 2) + assert.Equal(t, "Hello, Jim!", p.Results[0]) + assert.Equal(t, "Summary: greeting sent", p.Results[1]) + assert.Len(t, sp.calls, 2) + assert.Len(t, mock.calls, 2) + + // WriteSet is the union of both commands. + assert.Contains(t, p.WriteSet, "output/greet.txt") + assert.Contains(t, p.WriteSet, "output/summary.txt") +} + +func TestExecutor_StageFailure(t *testing.T) { + sp := &mockSpawner{ + results: []WorkerResult{ + {Output: "stage 0 ok"}, + }, + errs: []error{ + nil, + fmt.Errorf("deploy exploded"), + }, + } + mock := &mockMissionCreator{} + tmpl := &MissionTemplate{TmpDir: t.TempDir()} + + exec := &Executor{ + Planner: &StubPlanner{ + Result: []CommandCall{ + {Command: "greet", Args: map[string]any{}}, + {Command: "summarize", Args: map[string]any{}}, + }, + }, + Commands: testCommands(), + Missions: mock, + Spawner: sp, + Templates: tmpl, + Registry: DefaultMCPRegistry(), + Logger: testLogger(), + } + + meta := EmailMeta{MessageID: "99", From: "jim@test.com", Subject: "Fail"} + p, err := exec.Run(context.Background(), meta, "body") + require.Error(t, err) + + assert.Equal(t, "failed", p.Status) + assert.Contains(t, p.Error, "stage 1") + assert.Len(t, p.Results, 1) // first stage succeeded +} + +func TestExecutor_PlannerFailure(t *testing.T) { + sp := &mockSpawner{} + mock := &mockMissionCreator{} + tmpl := &MissionTemplate{TmpDir: t.TempDir()} + + exec := &Executor{ + Planner: &StubPlanner{Err: fmt.Errorf("no rules matched")}, + Commands: testCommands(), + Missions: mock, + Spawner: sp, + Templates: tmpl, + Registry: DefaultMCPRegistry(), + Logger: testLogger(), + } + + meta := EmailMeta{MessageID: "1", From: "x@test.com", Subject: "Nope"} + p, err := exec.Run(context.Background(), meta, "body") + require.Error(t, err) + + assert.Equal(t, "failed", p.Status) + assert.Contains(t, p.Error, "plan") + assert.Contains(t, err.Error(), "plan pipeline") + assert.Len(t, sp.calls, 0) +} + +func TestExecutor_EmptyPlan(t *testing.T) { + sp := &mockSpawner{} + mock := &mockMissionCreator{} + tmpl := &MissionTemplate{TmpDir: t.TempDir()} + + exec := &Executor{ + Planner: &StubPlanner{Result: []CommandCall{}}, + Commands: testCommands(), + Missions: mock, + Spawner: sp, + Templates: tmpl, + Registry: DefaultMCPRegistry(), + Logger: testLogger(), + } + + meta := EmailMeta{MessageID: "2", From: "x@test.com", Subject: "Empty"} + p, err := exec.Run(context.Background(), meta, "body") + require.Error(t, err) + + assert.Equal(t, "failed", p.Status) + assert.Contains(t, p.Error, "empty") + assert.Len(t, sp.calls, 0) +} + +func TestExecutor_UnknownCommand(t *testing.T) { + sp := &mockSpawner{} + mock := &mockMissionCreator{} + tmpl := &MissionTemplate{TmpDir: t.TempDir()} + + exec := &Executor{ + Planner: &StubPlanner{ + Result: []CommandCall{ + {Command: "nonexistent", Args: map[string]any{}}, + }, + }, + Commands: testCommands(), + Missions: mock, + Spawner: sp, + Templates: tmpl, + Registry: DefaultMCPRegistry(), + Logger: testLogger(), + } + + meta := EmailMeta{MessageID: "3", From: "x@test.com", Subject: "Bad cmd"} + p, err := exec.Run(context.Background(), meta, "body") + require.Error(t, err) + + assert.Equal(t, "failed", p.Status) + assert.Contains(t, p.Error, "unknown command") + assert.Len(t, sp.calls, 0) +} + +func TestExecutor_InvalidArgs(t *testing.T) { + sp := &mockSpawner{} + mock := &mockMissionCreator{} + tmpl := &MissionTemplate{TmpDir: t.TempDir()} + + exec := &Executor{ + Planner: &StubPlanner{ + Result: []CommandCall{ + {Command: "deploy", Args: map[string]any{"env": "invalid-env"}}, + }, + }, + Commands: testCommands(), + Missions: mock, + Spawner: sp, + Templates: tmpl, + Registry: DefaultMCPRegistry(), + Logger: testLogger(), + } + + meta := EmailMeta{MessageID: "4", From: "x@test.com", Subject: "Bad args"} + p, err := exec.Run(context.Background(), meta, "body") + require.Error(t, err) + + assert.Equal(t, "failed", p.Status) + assert.Contains(t, p.Error, "stage 0") + assert.Len(t, sp.calls, 0) +} + +func TestExecutor_WorkerError(t *testing.T) { + sp := &mockSpawner{ + results: []WorkerResult{ + {Output: "something went wrong", IsError: true, ExitCode: 1}, + }, + } + mock := &mockMissionCreator{} + tmpl := &MissionTemplate{TmpDir: t.TempDir()} + + exec := &Executor{ + Planner: &StubPlanner{ + Result: []CommandCall{ + {Command: "greet", Args: map[string]any{}}, + }, + }, + Commands: testCommands(), + Missions: mock, + Spawner: sp, + Templates: tmpl, + Registry: DefaultMCPRegistry(), + Logger: testLogger(), + } + + meta := EmailMeta{MessageID: "5", From: "x@test.com", Subject: "Worker fail"} + p, err := exec.Run(context.Background(), meta, "body") + require.Error(t, err) + + assert.Equal(t, "failed", p.Status) + assert.Contains(t, p.Error, "worker error") +} + +func TestExecutor_ResultFlowing(t *testing.T) { + // Verify that the second stage's spawner call happens after the first + // completes, and the mock records both calls in order. + sp := &mockSpawner{ + results: []WorkerResult{ + {Output: "stage-0-output"}, + {Output: "stage-1-output"}, + }, + } + mock := &mockMissionCreator{} + tmpl := &MissionTemplate{TmpDir: t.TempDir()} + + exec := &Executor{ + Planner: &StubPlanner{ + Result: []CommandCall{ + {Command: "greet", Args: map[string]any{}}, + {Command: "summarize", Args: map[string]any{}}, + }, + }, + Commands: testCommands(), + Missions: mock, + Spawner: sp, + Templates: tmpl, + Registry: DefaultMCPRegistry(), + Logger: testLogger(), + } + + meta := EmailMeta{MessageID: "6", From: "x@test.com", Subject: "Flow"} + p, err := exec.Run(context.Background(), meta, "body") + require.NoError(t, err) + + assert.Equal(t, "completed", p.Status) + assert.Equal(t, []string{"stage-0-output", "stage-1-output"}, p.Results) + + // Both stages spawned in order. + require.Len(t, sp.calls, 2) +} diff --git a/internal/daemon/planner.go b/internal/daemon/planner.go new file mode 100644 index 0000000..ac4ec66 --- /dev/null +++ b/internal/daemon/planner.go @@ -0,0 +1,80 @@ +package daemon + +import ( + "context" + "fmt" + "regexp" +) + +// CommandCall is one step in a planned pipeline. +type CommandCall struct { + Command string `json:"command"` + Args map[string]any `json:"args"` +} + +// Planner decomposes an email instruction into a sequence of commands. +type Planner interface { + Plan(ctx context.Context, meta EmailMeta, body string) ([]CommandCall, error) +} + +// RuleEntry pairs a regex pattern with the commands to execute on match. +type RuleEntry struct { + Pattern string // regex matched against subject + "\n" + body + Commands []CommandCall // commands returned when Pattern matches +} + +type compiledRule struct { + re *regexp.Regexp + commands []CommandCall +} + +// RulePlanner matches email content against an ordered list of regex rules. +// First match wins. +type RulePlanner struct { + rules []compiledRule +} + +// NewRulePlanner compiles each rule's pattern and returns a RulePlanner. +// Returns an error if any pattern fails to compile. +func NewRulePlanner(rules []RuleEntry) (*RulePlanner, error) { + compiled := make([]compiledRule, len(rules)) + for i, r := range rules { + re, err := regexp.Compile(r.Pattern) + if err != nil { + return nil, fmt.Errorf("compile rule %d pattern %q: %w", i, r.Pattern, err) + } + compiled[i] = compiledRule{re: re, commands: r.Commands} + } + return &RulePlanner{rules: compiled}, nil +} + +// Plan iterates rules in order and returns the commands for the first match. +// The pattern is matched against meta.Subject + "\n" + body. +func (p *RulePlanner) Plan(_ context.Context, meta EmailMeta, body string) ([]CommandCall, error) { + text := meta.Subject + "\n" + body + for _, r := range p.rules { + if r.re.MatchString(text) { + return r.commands, nil + } + } + return nil, fmt.Errorf("no rule matches this instruction") +} + +// LLMPlanner is a stub for future LLM-based planning. +type LLMPlanner struct{} + +// Plan is not yet implemented. +func (p *LLMPlanner) Plan(_ context.Context, _ EmailMeta, _ string) ([]CommandCall, error) { + return nil, fmt.Errorf("LLMPlanner not yet implemented") +} + +// StubPlanner returns preconfigured results, for use in tests. +type StubPlanner struct { + Result []CommandCall + Err error +} + +// Plan returns the preconfigured Result and Err. +func (p *StubPlanner) Plan(_ context.Context, _ EmailMeta, _ string) ([]CommandCall, error) { + return p.Result, p.Err +} diff --git a/internal/daemon/planner_test.go b/internal/daemon/planner_test.go new file mode 100644 index 0000000..71d40ca --- /dev/null +++ b/internal/daemon/planner_test.go @@ -0,0 +1,124 @@ +package daemon + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestRulePlanner(t *testing.T) { + deployCmd := CommandCall{Command: "deploy", Args: map[string]any{"env": "prod"}} + statusCmd := CommandCall{Command: "status", Args: nil} + + tests := []struct { + name string + rules []RuleEntry + meta EmailMeta + body string + want []CommandCall + wantErr string + }{ + { + name: "match in subject", + rules: []RuleEntry{ + {Pattern: `(?i)deploy`, Commands: []CommandCall{deployCmd}}, + }, + meta: EmailMeta{Subject: "Please deploy now"}, + body: "no keywords here", + want: []CommandCall{deployCmd}, + }, + { + name: "match in body", + rules: []RuleEntry{ + {Pattern: `(?i)deploy`, Commands: []CommandCall{deployCmd}}, + }, + meta: EmailMeta{Subject: "Action needed"}, + body: "please deploy to production", + want: []CommandCall{deployCmd}, + }, + { + name: "first match wins", + rules: []RuleEntry{ + {Pattern: `deploy`, Commands: []CommandCall{deployCmd}}, + {Pattern: `deploy|status`, Commands: []CommandCall{statusCmd}}, + }, + meta: EmailMeta{Subject: "deploy"}, + body: "", + want: []CommandCall{deployCmd}, + }, + { + name: "no match returns error", + rules: []RuleEntry{ + {Pattern: `deploy`, Commands: []CommandCall{deployCmd}}, + }, + meta: EmailMeta{Subject: "hello"}, + body: "world", + wantErr: "no rule matches", + }, + { + name: "empty rules returns error", + rules: []RuleEntry{}, + meta: EmailMeta{Subject: "anything"}, + body: "", + wantErr: "no rule matches", + }, + { + name: "multiple commands in one rule", + rules: []RuleEntry{ + {Pattern: `pipeline`, Commands: []CommandCall{statusCmd, deployCmd}}, + }, + meta: EmailMeta{Subject: "run pipeline"}, + body: "", + want: []CommandCall{statusCmd, deployCmd}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p, err := NewRulePlanner(tt.rules) + require.NoError(t, err) + + got, err := p.Plan(context.Background(), tt.meta, tt.body) + if tt.wantErr != "" { + require.Error(t, err) + assert.Contains(t, err.Error(), tt.wantErr) + return + } + require.NoError(t, err) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestNewRulePlanner_InvalidRegex(t *testing.T) { + _, err := NewRulePlanner([]RuleEntry{ + {Pattern: `[invalid`, Commands: nil}, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "compile rule 0") +} + +func TestStubPlanner(t *testing.T) { + cmds := []CommandCall{{Command: "test", Args: map[string]any{"k": "v"}}} + p := &StubPlanner{Result: cmds, Err: nil} + + got, err := p.Plan(context.Background(), EmailMeta{}, "") + require.NoError(t, err) + assert.Equal(t, cmds, got) +} + +func TestStubPlanner_Error(t *testing.T) { + p := &StubPlanner{Err: assert.AnError} + + _, err := p.Plan(context.Background(), EmailMeta{}, "") + require.ErrorIs(t, err, assert.AnError) +} + +func TestLLMPlanner_NotImplemented(t *testing.T) { + p := &LLMPlanner{} + _, err := p.Plan(context.Background(), EmailMeta{}, "") + require.Error(t, err) + assert.Contains(t, err.Error(), "not yet implemented") +} diff --git a/internal/daemon/spawner.go b/internal/daemon/spawner.go index 579e53a..0e6135c 100644 --- a/internal/daemon/spawner.go +++ b/internal/daemon/spawner.go @@ -56,8 +56,10 @@ func (s *WorkerSpawner) logger() *slog.Logger { // Run executes a Claude Code worker for the given mission. // The context governs subprocess lifetime — cancel it for graceful shutdown. // mcpConfigPath and systemPromptPath must be paths to existing files; -// the caller is responsible for cleanup. -func (s *WorkerSpawner) Run(ctx context.Context, missionID, mcpConfigPath, systemPromptPath string) (WorkerResult, error) { +// the caller is responsible for cleanup. envOverrides are added to the +// subprocess environment (e.g. secrets resolved by the daemon for a command). +// Pass nil when no overrides are needed. +func (s *WorkerSpawner) Run(ctx context.Context, missionID, mcpConfigPath, systemPromptPath string, envOverrides map[string]string) (WorkerResult, error) { if !ValidMissionID(missionID) { return WorkerResult{MissionID: missionID}, fmt.Errorf("invalid mission ID %q", missionID) } @@ -114,12 +116,30 @@ func (s *WorkerSpawner) Run(ctx context.Context, missionID, mcpConfigPath, syste // Minimal env: only what claude needs. Do not leak daemon credentials // (BEADLE_IMAP_PASSWORD, BEADLE_RESEND_API_KEY, etc.) to the subprocess. // HOME is an isolated temp dir — no access to user's SSH keys, GPG, config. - cmd.Env = []string{ - "ANTHROPIC_API_KEY=" + s.APIKey, - "HOME=" + workerHome, - "PATH=" + workerPATH, - "USER=" + os.Getenv("USER"), - "TMPDIR=" + workerHome, + // + // Build env as a map, apply overrides, then force-set protected vars + // AFTER overrides so they always win. Without this, a malicious command + // config could override ANTHROPIC_API_KEY, HOME, PATH, or TMPDIR via + // envOverrides to escape the sandbox. + envMap := map[string]string{ + "ANTHROPIC_API_KEY": s.APIKey, + "HOME": workerHome, + "PATH": workerPATH, + "USER": os.Getenv("USER"), + "TMPDIR": workerHome, + } + for k, v := range envOverrides { + envMap[k] = v + } + // Force-set protected vars AFTER overrides — never allow override. + envMap["ANTHROPIC_API_KEY"] = s.APIKey + envMap["HOME"] = workerHome + envMap["PATH"] = workerPATH + envMap["TMPDIR"] = workerHome + + cmd.Env = make([]string, 0, len(envMap)) + for k, v := range envMap { + cmd.Env = append(cmd.Env, k+"="+v) } s.logger().Info("spawning worker", "mission", missionID, "timeout", timeout) diff --git a/internal/daemon/templates.go b/internal/daemon/templates.go index 448d640..cfee181 100644 --- a/internal/daemon/templates.go +++ b/internal/daemon/templates.go @@ -1,36 +1,64 @@ package daemon import ( + "encoding/json" "fmt" "os" ) +// MCPServerConfig defines how to invoke an MCP server. +type MCPServerConfig struct { + Command string `json:"command"` + Args []string `json:"args"` +} + +// DefaultMCPRegistry returns the built-in server registry. +func DefaultMCPRegistry() map[string]MCPServerConfig { + return map[string]MCPServerConfig{ + "ethos": {Command: "ethos", Args: []string{"mcp"}}, + "beadle-email": {Command: "beadle-email", Args: []string{"serve"}}, + "biff": {Command: "biff", Args: []string{"mcp"}}, + } +} + // MissionTemplate generates temporary config and prompt files for worker sessions. type MissionTemplate struct { TmpDir string } -// BuildMCPConfig writes a temporary MCP server configuration file and returns -// its path. The caller must os.Remove the file after use. -func (t *MissionTemplate) BuildMCPConfig() (string, error) { +// BuildMCPConfig writes a temporary MCP server configuration file containing +// only the named servers and returns its path. Each name must exist in registry. +// The caller must os.Remove the file after use. +func (t *MissionTemplate) BuildMCPConfig(servers []string, registry map[string]MCPServerConfig) (string, error) { if err := os.MkdirAll(t.TmpDir, 0o700); err != nil { return "", fmt.Errorf("create tmp dir %s: %w", t.TmpDir, err) } - config := `{ - "mcpServers": { - "ethos": {"command": "ethos", "args": ["mcp"]}, - "beadle-email": {"command": "beadle-email", "args": ["serve"]} - } -} -` + selected := make(map[string]MCPServerConfig, len(servers)) + for _, name := range servers { + cfg, ok := registry[name] + if !ok { + return "", fmt.Errorf("unknown MCP server %q", name) + } + selected[name] = cfg + } + + doc := struct { + MCPServers map[string]MCPServerConfig `json:"mcpServers"` + }{MCPServers: selected} + + data, err := json.MarshalIndent(doc, "", " ") + if err != nil { + return "", fmt.Errorf("marshal mcp config: %w", err) + } + f, err := os.CreateTemp(t.TmpDir, "mcp-config-*.json") if err != nil { return "", fmt.Errorf("create mcp config temp file: %w", err) } path := f.Name() - if _, err := f.WriteString(config); err != nil { + if _, err := f.Write(data); err != nil { f.Close() os.Remove(path) return "", fmt.Errorf("write mcp config to %s: %w", path, err) @@ -45,6 +73,10 @@ func (t *MissionTemplate) BuildMCPConfig() (string, error) { // BuildSystemPrompt writes a temporary system prompt file for the given mission // and returns its path. The caller must os.Remove the file after use. func (t *MissionTemplate) BuildSystemPrompt(missionID string) (string, error) { + if !ValidMissionID(missionID) { + return "", fmt.Errorf("invalid mission ID %q", missionID) + } + if err := os.MkdirAll(t.TmpDir, 0o700); err != nil { return "", fmt.Errorf("create tmp dir %s: %w", t.TmpDir, err) } diff --git a/internal/daemon/templates_test.go b/internal/daemon/templates_test.go index 5678ffd..906bee9 100644 --- a/internal/daemon/templates_test.go +++ b/internal/daemon/templates_test.go @@ -11,33 +11,112 @@ import ( ) func TestBuildMCPConfig(t *testing.T) { + registry := DefaultMCPRegistry() + + tests := []struct { + name string + servers []string + want []string // expected keys in mcpServers + wantErr string + }{ + { + name: "single server", + servers: []string{"ethos"}, + want: []string{"ethos"}, + }, + { + name: "two servers", + servers: []string{"ethos", "biff"}, + want: []string{"ethos", "biff"}, + }, + { + name: "all defaults", + servers: []string{"ethos", "beadle-email", "biff"}, + want: []string{"ethos", "beadle-email", "biff"}, + }, + { + name: "empty list", + servers: []string{}, + want: []string{}, + }, + { + name: "nil list", + servers: nil, + want: []string{}, + }, + { + name: "unknown server", + servers: []string{"ethos", "nosuchserver"}, + wantErr: `unknown MCP server "nosuchserver"`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tmpDir := t.TempDir() + tmpl := &MissionTemplate{TmpDir: tmpDir} + + path, err := tmpl.BuildMCPConfig(tt.servers, registry) + if tt.wantErr != "" { + require.Error(t, err) + assert.Contains(t, err.Error(), tt.wantErr) + return + } + require.NoError(t, err) + defer os.Remove(path) + + assert.True(t, strings.HasPrefix(path, tmpDir)) + + data, err := os.ReadFile(path) + require.NoError(t, err) + + var doc struct { + MCPServers map[string]MCPServerConfig `json:"mcpServers"` + } + require.NoError(t, json.Unmarshal(data, &doc)) + + assert.Equal(t, len(tt.want), len(doc.MCPServers), + "server count mismatch: got %v", doc.MCPServers) + for _, name := range tt.want { + _, ok := doc.MCPServers[name] + assert.True(t, ok, "missing server %q", name) + } + }) + } +} + +func TestBuildMCPConfigContent(t *testing.T) { + registry := DefaultMCPRegistry() tmpDir := t.TempDir() tmpl := &MissionTemplate{TmpDir: tmpDir} - path, err := tmpl.BuildMCPConfig() + path, err := tmpl.BuildMCPConfig([]string{"ethos", "beadle-email"}, registry) require.NoError(t, err) defer os.Remove(path) - assert.True(t, strings.HasPrefix(path, tmpDir), "file should be in TmpDir") - data, err := os.ReadFile(path) require.NoError(t, err) - // Must be valid JSON. - var doc map[string]any + var doc struct { + MCPServers map[string]MCPServerConfig `json:"mcpServers"` + } require.NoError(t, json.Unmarshal(data, &doc)) - // Must contain mcpServers with ethos and beadle-email. - servers, ok := doc["mcpServers"].(map[string]any) - require.True(t, ok, "mcpServers must be an object") + ethos := doc.MCPServers["ethos"] + assert.Equal(t, "ethos", ethos.Command) + assert.Equal(t, []string{"mcp"}, ethos.Args) - ethos, ok := servers["ethos"].(map[string]any) - require.True(t, ok, "ethos server entry must exist") - assert.Equal(t, "ethos", ethos["command"]) + beadle := doc.MCPServers["beadle-email"] + assert.Equal(t, "beadle-email", beadle.Command) + assert.Equal(t, []string{"serve"}, beadle.Args) +} - beadle, ok := servers["beadle-email"].(map[string]any) - require.True(t, ok, "beadle-email server entry must exist") - assert.Equal(t, "beadle-email", beadle["command"]) +func TestDefaultMCPRegistry(t *testing.T) { + reg := DefaultMCPRegistry() + assert.Len(t, reg, 3) + assert.Contains(t, reg, "ethos") + assert.Contains(t, reg, "beadle-email") + assert.Contains(t, reg, "biff") } func TestBuildSystemPrompt(t *testing.T) {