diff --git a/hands/devops/HAND.toml b/hands/devops/HAND.toml index 4d23172..dd76d96 100644 --- a/hands/devops/HAND.toml +++ b/hands/devops/HAND.toml @@ -123,6 +123,11 @@ aliases = [ "infrastructure monitoring", "deployment automation", "incident response", + "auto evolve", + "review github prs", + "triage issues", + "implement issue", + "fix bug from issue", ] weak_aliases = [ "deploy", @@ -131,6 +136,10 @@ weak_aliases = [ "container", "terraform", "helm", + "bug fix", + "feature implementation", + "bmad", + "draft pr", ] # ─── Configurable settings ─────────────────────────────────────────────────── @@ -286,6 +295,91 @@ description = "Queue deployment and infrastructure actions for your review inste setting_type = "toggle" default = "true" +# ─── Auto-Evolution settings ───────────────────────────────────────────────── +# These drive the Phase 7 evolution loop: periodic scan of configured +# GitHub repos, automated PR review via the reviewer sub-agent, and +# BMAD-style bug fix / feature implementation via the implementer +# sub-agent. All produce draft PRs and respect approval_mode. + +[[settings]] +key = "auto_evolve" +label = "Auto Evolution" +description = "Periodically scan configured GitHub repos and run PR review / issue triage / BMAD implementation" +setting_type = "toggle" +default = "false" + +[[settings]] +key = "evolution_repos" +label = "Evolution Target Repos" +description = "Comma-separated owner/repo pairs to watch (e.g. librefang/librefang,librefang/librefang-registry)" +setting_type = "text" +default = "" + +[[settings]] +key = "evolution_check_interval" +label = "Evolution Check Interval" +description = "How often to scan target repos for new PRs and issues" +setting_type = "select" +default = "15min" + +[[settings.options]] +value = "5min" +label = "Every 5 minutes" + +[[settings.options]] +value = "15min" +label = "Every 15 minutes" + +[[settings.options]] +value = "1hour" +label = "Every hour" + +[[settings.options]] +value = "6hour" +label = "Every 6 hours" + +[[settings.options]] +value = "1day" +label = "Daily" + +[[settings]] +key = "bmad_strictness" +label = "BMAD Strictness" +description = "How thoroughly to run the Brainstorm-Architect-PRD-Implement pipeline before producing a draft PR" +setting_type = "select" +default = "standard" + +[[settings.options]] +value = "light" +label = "Light (skip brainstorm, go straight to architect → implement)" + +[[settings.options]] +value = "standard" +label = "Standard (full 4-phase pipeline, draft PR at end)" + +[[settings.options]] +value = "strict" +label = "Strict (full pipeline + require human approval at each phase via queue)" + +[[settings]] +key = "max_changed_files" +label = "Max Changed Files Per Draft PR" +description = "Implementer stops and queues for human triage if a single draft PR would touch more than this many files. Decompose larger work into multiple PRs." +setting_type = "select" +default = "30" + +[[settings.options]] +value = "10" +label = "10 files (very conservative)" + +[[settings.options]] +value = "30" +label = "30 files (default)" + +[[settings.options]] +value = "100" +label = "100 files (large refactors)" + # ─── Agent configuration ───────────────────────────────────────────────────── [agents.main] @@ -543,6 +637,80 @@ Stop the current monitoring/incident session when ANY of these conditions is met --- +## Phase 7 — Evolution Loop (auto_evolve) + +Gate: skip entirely unless `auto_evolve` is ENABLED **and** `evolution_repos` is non-empty. + +The Hand is already `frequency = "continuous"`, so this Phase fires once per turn while gates pass. On entry, read `memory_recall devops_evolution_cursor__`. If less than `evolution_check_interval` has elapsed since the last tick for THAT repo, skip the repo for this turn — the next turn will check again. Never busy-loop or self-schedule inside a turn. + +For every repo in `evolution_repos` (comma-separated `owner/repo` pairs) that passes the cadence gate, interleave PR review and issue triage. + +### 7.1 PR Review Pass + +1. List open PRs (filter out drafts unless explicitly enabled): + ``` + curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \\ + "https://api.github.com/repos/OWNER/REPO/pulls?state=open&per_page=50" \\ + -o open_prs.json + ``` +2. For each PR, look up `devops_pr_review___` in memory. Skip if `head_sha` matches the last reviewed sha — already reviewed at this revision. +3. Fetch the diff + file list: + ``` + curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \\ + -H "Accept: application/vnd.github.v3.diff" \\ + "https://api.github.com/repos/OWNER/REPO/pulls/NUM" -o pr.diff + curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \\ + "https://api.github.com/repos/OWNER/REPO/pulls/NUM/files" -o pr_files.json + ``` +4. Delegate to the `code-reviewer` sub-agent with: PR title, body, diff, file list, target branch's `AGENTS.md`/`CLAUDE.md` if present. Capture the reviewer's structured output (approve / request changes / block + issues + positives). +5. Post the review back to GitHub: + ``` + curl -s -X POST -H "Authorization: Bearer $GITHUB_TOKEN" \\ + -H "Content-Type: application/json" \\ + -d "$REVIEW_BODY_JSON" \\ + "https://api.github.com/repos/OWNER/REPO/pulls/NUM/reviews" + ``` + Event `"COMMENT"` for advisory passes. Reserve `"REQUEST_CHANGES"` for blocking findings flagged by the reviewer; never auto-`"APPROVE"`. +6. Record the result in memory: `memory_store devops_pr_review___` with `{ head_sha, verdict, timestamp }`. Bump dashboard counter `devops_hand_prs_reviewed`. + +### 7.2 Issue Triage + Implementation Pass + +1. List open issues that match the configured triage filter (default: issues with no `wontfix` / `duplicate` / `invalid` labels and no existing linked PR): + ``` + curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \\ + "https://api.github.com/repos/OWNER/REPO/issues?state=open&per_page=50" \\ + -o open_issues.json + ``` +2. For each issue, classify via the **Issue Triage Playbook** (see `SKILL.md`): + - Labels first (`bug` / `enhancement` / `feature` / `question`) — cheap, deterministic + - LLM fallback only if labels are absent — single short prompt, never multi-turn + - Result is one of: `bug-fix`, `feature`, `needs-info`, `skip` +3. For `bug-fix` and `feature`, dispatch to the `implementer` sub-agent with the BMAD pipeline whose depth is set by `bmad_strictness`. +4. The implementer produces a **draft PR**. Always draft, never ready-for-review, regardless of `approval_mode` — this is the safety floor on auto-generated code. The user (or another reviewer) marks it ready. +5. Comment on the originating issue with a link to the draft PR and a one-line summary. +6. Record `memory_store devops_issue_state___` with `{ classification, pr_url, timestamp }`. Bump dashboard counter `devops_hand_issues_processed`. + +### 7.3 Safety Floor (NEVER bypass) + +- Always create a fresh git worktree per implementation task — never write to the user's working tree. +- Never commit to `main` / `master` / `trunk` directly. +- Never use `--no-verify`, `--force`, or `git push -f` against any remote branch. +- Honor whatever pre-commit / pre-push / commit-msg hooks the upstream repo configures (run via `git config core.hooksPath` discovery + executing each non-skipped hook). Abort the task on hook failure rather than retrying. +- Stop and queue (`devops_queue.json`) if the implementer wants to touch: + - `Cargo.toml` workspace members (any `members = [...]` change) + - migration files (paths under `*/migrations/*`, `*/migrate/*`, or matching `*.sql`) + - any path containing `secrets`, `.env`, `credentials`, `*.pem`, `*.key`, `id_rsa`, `id_ed25519` + - more than the configured `max_changed_files` setting (default 30) files in one PR +- Token budget: each evolution tick must stop on its own when the agent senses it is nearing the per-turn budget (target ~70% so the next tick has headroom). Estimate by tracking cumulative output tokens since turn start; the kernel-enforced hard cap is the upstream guard rail, not the primary control. + +### 7.4 Failure Handling + +- Network / API errors → exponential backoff, max 3 retries, then surface a `devops_evolution_blocked` event and skip this PR/issue for the current tick. +- Reviewer or implementer sub-agent times out → record a `timed_out` verdict in memory so we don't retry on the same head_sha next tick. +- `git push` rejected (protected branch, stale, etc.) → open the PR target as `wontfix` for this tick, surface to the user via event. + +--- + ## Guidelines - NEVER execute destructive commands without explicit user confirmation @@ -647,6 +815,95 @@ OUTPUT FORMAT: Be thorough but constructive. Focus on bugs and risks, not style preferences.""" +[agents.implementer] +invoke_hint = "BMAD pipeline executor — turns an issue into a draft PR via brainstorm, architect, PRD, implement phases" +name = "implementer" +description = "BMAD implementer. Takes a triaged issue (bug or feature) and produces a draft PR following the Brainstorm → Architect → PRD → Implement methodology, scaled by `bmad_strictness`." +module = "builtin:chat" +provider = "default" +model = "default" +max_tokens = 16384 +temperature = 0.2 +max_iterations = 80 +# Raise the history cap above the kernel default. BMAD work fans out +# across 4 phases (Brainstorm / Architect / PRD / Implement), each of +# which spawns shell_exec chains (cargo build/test cycles, git ops, +# file edits). 100 buys enough headroom that a single PR doesn't get +# truncated mid-implementation while still bounding worst-case cost. +max_history_messages = 100 +system_prompt = """You are Implementer, the BMAD execution sub-agent inside the DevOps Hand. You convert a single triaged GitHub issue into a draft pull request. You DO NOT push to protected branches, you DO NOT mark PRs ready-for-review, and you DO NOT skip phases unless `bmad_strictness = "light"`. + +## Inputs + +You will receive: +- `issue`: full GitHub issue payload (title, body, labels, comments) +- `classification`: `"bug-fix"` or `"feature"` +- `repo`: `owner/name` +- `bmad_strictness`: `"light"` | `"standard"` | `"strict"` +- `repo_context`: workspace root path of a freshly-created git worktree off `origin/` — your sandbox + +## Pipeline (skip phases per strictness) + +### Phase B — Brainstorm (skipped when strictness = light) +- Re-read the issue. What is the actual user-visible problem or capability being asked for? Restate in your own words. +- Generate 2–3 distinct approaches. For each: rough sketch, files touched, risk level, estimated diff size. +- Pick ONE. Record the trade-off justification in a `BMAD.md` you'll commit alongside the change. Length: ≤ 200 words. + +### Phase A — Architect +- For the chosen approach, identify exact crates / modules / files to change. +- Decide types, function signatures, and module boundaries before writing code. +- Call out any interface changes that ripple to other crates and confirm the ripple is bounded (or escalate via queue if it isn't). +- Append to `BMAD.md` under `## Architecture`. + +### Phase P — PRD (skipped when strictness = light) +- Acceptance criteria as a bulleted checklist (what must pass for the PR to be ready). +- Test plan: enumerate the unit / integration tests you will add or update. +- Rollback plan: how a reviewer can revert if this lands and breaks something. +- Append to `BMAD.md` under `## PRD`. + +### Phase I — Implement +- For bug fixes: write a failing test first (TDD), make it pass, then refactor. The failing test must commit before the fix, in the same PR. +- For features: write tests alongside code; do not commit untested branches. +- Use only `shell_exec` + `file_*` tools to edit. Never edit outside `repo_context`. +- Run the project's own lint/test gate (e.g., `cargo clippy --workspace --all-targets -- -D warnings`, `cargo test -p `). If the project has a `justfile` or `xtask`, prefer those. Fail-fast: if the gate doesn't pass, fix; if it can't be made to pass within `max_iterations`, stop and surface. + +## Output + +A **draft PR** (`draft: true`) on `repo` whose body contains: + +``` +## Summary + + +## BMAD Pipeline Output + + +## Acceptance Checklist +- [ ] + +## Risk + + +## Generated By +DevOps Hand → implementer sub-agent (issue: #, strictness: ) +``` + +## Hard Rules (NEVER violate, regardless of strictness) + +- ALWAYS work in a fresh git worktree provided as `repo_context`. Never `cd` out of it. +- ALWAYS create a feature branch named `auto/--`. +- NEVER push to `main`, `master`, `trunk`, or any branch protected by ruleset. +- NEVER use `git push --force`, `--no-verify`, `--no-gpg-sign`, or `--amend` against a remote branch. +- NEVER commit anything matching: `.env*`, `*.pem`, `*.p12`, `id_rsa`, `id_ed25519`, `credentials*`, `secrets*`, `vault_*.key`. +- NEVER include LLM-vendor attribution in commit messages or PR bodies — no `Co-Authored-By: Claude`, no `Generated with Claude / GPT / Anthropic / OpenAI`, no `🤖` emoji crediting an AI vendor. "Generated by DevOps Hand → implementer" (process attribution) is fine and encouraged for traceability; vendor attribution is not. Many upstream repos enforce this via commit-msg hook; we apply the rule regardless of upstream enforcement. +- Touch limit: stop at `max_changed_files` (default 30 files). If the implementation legitimately needs more, decompose into multiple draft PRs and stop after the first. +- If any acceptance-test command in PRD fails after your last fix attempt, DO NOT push. Write the partial state + failure details to `devops_queue.json` and surface for human triage. +- Token budget: stop on your own at 70% of the per-turn budget so the next tick has headroom. + +## On `bmad_strictness = "strict"` + +Between every phase, write the produced artifact to `devops_queue.json` with `phase: "bmad--pending"` and `status: "pending"`, then **end the current turn**. The Hand is continuous, so the next tick re-reads the queue: if the user (out-of-band) flipped `status` to `approved`, resume from the next phase; if still `pending`, skip this issue for this tick and re-check on the next one. Never poll or `sleep` for approval within a single turn — the agent loop has no in-turn pause primitive, and busy-waiting would block other Hand work and burn tokens. This is how a human keeps a leash on autonomous code changes without forcing the daemon to stall.""" + [dashboard] [[dashboard.metrics]] label = "Health Checks Run" @@ -668,6 +925,21 @@ label = "Deployments Managed" memory_key = "devops_hand_deployments_managed" format = "number" +[[dashboard.metrics]] +label = "PRs Reviewed" +memory_key = "devops_hand_prs_reviewed" +format = "number" + +[[dashboard.metrics]] +label = "Issues Processed" +memory_key = "devops_hand_issues_processed" +format = "number" + +[[dashboard.metrics]] +label = "Draft PRs Opened" +memory_key = "devops_hand_draft_prs_opened" +format = "number" + # ─── Token & Performance Metadata ───────────────────────────────────────────── [metadata] diff --git a/hands/devops/README.md b/hands/devops/README.md index 686fe8f..43c8871 100644 --- a/hands/devops/README.md +++ b/hands/devops/README.md @@ -8,7 +8,7 @@ Autonomous DevOps engineer -- CI/CD management, infrastructure monitoring, deplo |-------|-------| | Category | `development` | | Agent | `devops-hand` | -| Routing | `ci/cd`, `pipeline`, `github actions`, `infrastructure monitoring`, `deployment automation`, `incident response` | +| Routing | `ci/cd`, `pipeline`, `github actions`, `infrastructure monitoring`, `deployment automation`, `incident response`, `auto evolve`, `review github prs`, `triage issues`, `implement issue`, `fix bug from issue` | ## Integrations @@ -24,9 +24,44 @@ None required. - **Service URLs** -- Comma-separated URLs to monitor - **Alert on Failure** -- Publish events on health check failures (default: on) - **Rollback Strategy** -- `manual`, `auto_previous`, `blue_green` +- **Auto Evolution** -- Periodically scan GitHub repos and run PR review / issue triage / BMAD implementation (default: off) +- **Evolution Target Repos** -- Comma-separated `owner/repo` pairs to watch +- **Evolution Check Interval** -- `5min`, `15min`, `1hour`, `6hour`, `1day` +- **BMAD Strictness** -- `light`, `standard`, `strict` -- depth of the Brainstorm-Architect-PRD-Implement pipeline before producing a draft PR ## Usage ```bash librefang hand run devops ``` + +## Auto-Evolution Mode + +When `auto_evolve = true` and `evolution_repos` is set, the Hand's Phase 7 loop fires on `evolution_check_interval` and, for each watched repo: + +1. **Reviews open PRs** -- pulls each PR's diff, asks the `code-reviewer` sub-agent for an assessment, posts a single `COMMENT` review back on GitHub. Already-reviewed `head_sha` values are skipped. +2. **Triages open issues** -- labels first, single-prompt LLM fallback if labels are absent. Result is one of `bug-fix | feature | needs-info | skip`. +3. **Implements actionable issues** -- dispatches `bug-fix` and `feature` issues to the `implementer` sub-agent which runs the BMAD pipeline scaled by `bmad_strictness` and produces a **draft PR**. + +### Safety floor (always on) + +- Draft PRs only. The Hand never marks PRs ready-for-review and never merges. +- Never pushes to `main` / `master` / protected branches. +- Never `--force` / `--no-verify` / `--amend` against a remote branch. +- Stops and queues to `devops_queue.json` if the change touches `Cargo.toml` workspace members, migration files, or anything under a `secrets` / credential glob. +- Hard cap of 30 changed files per PR; larger changes get split. +- Per-tick token budget capped at 70% so subsequent ticks have headroom. + +### Required GitHub token scopes + +For public-repo evolution, a fine-grained token with: +- **Pull requests**: read & write (review posting, draft PR creation) +- **Issues**: read & write (triage comments, issue cross-links) +- **Contents**: read & write (branch push) +- **Metadata**: read + +For private repos, add the `repo` scope and ensure the repo is listed in `evolution_repos`. + +### What it does NOT do + +It will never merge a PR, mark a draft as ready, or auto-approve. Human review is always required. See `SKILL.md` -> `What this Hand does NOT do` for the full list. diff --git a/hands/devops/SKILL.md b/hands/devops/SKILL.md index 47fd3ad..637070b 100644 --- a/hands/devops/SKILL.md +++ b/hands/devops/SKILL.md @@ -868,3 +868,435 @@ rm /tmp/restore.dump **Communication template**: Subject `[INCIDENT] Service -- Status`. Body: what happened, impact, current status, ETA, next update time. **Post-recovery checklist**: health checks passing, data integrity verified, monitoring restored, backups resumed, incident report filed, post-mortem scheduled within 48h. + +--- + +## Auto-Evolution Workflow + +The Phase 7 evolution loop (gated by `auto_evolve = true`) periodically scans the repos listed in `evolution_repos` and takes one of three actions per item: + +- **Open PR** → review via the `code-reviewer` sub-agent, post a `COMMENT` review back to GitHub. +- **Open Issue** → triage, then dispatch to the `implementer` sub-agent if actionable. +- **Anything we've already processed at the same head_sha / issue revision** → skip. + +The pipeline never marks PRs ready-for-review and never pushes to protected branches. All produced PRs are drafts. + +### When the loop fires + +`schedule_create` registers a recurring trigger on `evolution_check_interval`. Each tick runs at most one full repo pass; if there's more work than fits in the token budget, the remainder waits for the next tick. The state cursor lives in `memory` so progress survives daemon restarts. + +### Memory keys this workflow owns + +| Key pattern | Stored value | +|---|---| +| `devops_pr_review___` | `{ head_sha, verdict, timestamp }` — last review per PR | +| `devops_issue_state___` | `{ classification, pr_url, timestamp }` — last triage per issue | +| `devops_evolution_cursor__` | `{ last_tick_at, last_seen_pr, last_seen_issue }` | +| `devops_hand_prs_reviewed` | counter — dashboard metric | +| `devops_hand_issues_processed` | counter — dashboard metric | +| `devops_hand_draft_prs_opened` | counter — dashboard metric | + +### Events this workflow publishes + +| Event name | Payload | When | +|---|---|---| +| `devops_evolution_pr_reviewed` | `{ pr_url, verdict, head_sha }` | After a PR review is posted to GitHub | +| `devops_evolution_pr_opened` | `{ pr_url, issue_url, classification }` | After a draft PR is created from a triaged issue | +| `devops_evolution_blocked` | `{ reason, pr_or_issue_url, retry_after }` | When a tick is aborted by safety floor, API failure, or hook rejection | +| `devops_evolution_skipped` | `{ pr_or_issue_url, reason }` | When an item is skipped by cadence gate, label filter, or already-processed check | + +These are advisory; subscribers (dashboard, audit log, downstream Hands) are optional. + +--- + +### Issue Triage Playbook + +The goal is to spend zero LLM tokens when labels are enough. LLM fallback is one prompt, never a multi-turn chain. + +**Step 1 -- Label-driven (deterministic)** + +```text +Has any of {"bug", "defect", "regression", "broken"} -> bug-fix +Has any of {"feature", "enhancement", "rfc", "proposal"} -> feature +Has any of {"question", "discussion", "support"} -> needs-info +Has any of {"wontfix", "duplicate", "invalid", "stale"} -> skip +``` + +**Step 2 -- LLM fallback (only when labels are absent)** + +Single classification prompt. Allowed outputs: exactly one of `bug-fix | feature | needs-info | skip`. Reject any longer answer and re-prompt once before defaulting to `needs-info`. + +```text +You are classifying a GitHub issue for a DevOps Hand evolution pipeline. + +Output exactly ONE token from this set: bug-fix | feature | needs-info | skip + +Heuristics: +- bug-fix: user reports incorrect behavior, crash, regression, security issue, + or unexpected output of existing functionality. +- feature: user requests new capability, configuration option, or refactor + that ships user-visible value. +- needs-info: report is ambiguous -- cannot reproduce, missing version/environment, + cannot tell if bug or feature. +- skip: issue is a question, off-topic, or already addressed. + +Issue title: {TITLE} +Issue body: +{BODY} +Existing labels: {LABELS_OR_NONE} +``` + +**Step 3 -- Already-linked PR check** + +```bash +# A PR already references this issue -> skip implementation, just review the PR +curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \ + "https://api.github.com/repos/OWNER/REPO/issues/NUM/timeline?per_page=50" \ + | jq '.[] | select(.event == "cross-referenced") | .source.issue.pull_request.url' +``` + +--- + +### PR Review Automation + +**Pull PR metadata + diff + files in three calls** + +```bash +PR_URL="https://api.github.com/repos/OWNER/REPO/pulls/NUM" + +curl -s -H "Authorization: Bearer $GITHUB_TOKEN" "$PR_URL" -o pr.json +curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \ + -H "Accept: application/vnd.github.v3.diff" \ + "$PR_URL" -o pr.diff +curl -s -H "Authorization: Bearer $GITHUB_TOKEN" "$PR_URL/files?per_page=300" -o pr_files.json +``` + +**Short-circuit on bot / merge / huge diff** + +Extract the cheap signals from already-fetched PR metadata: + +```bash +HEAD_SHA=$(jq -r .head.sha pr.json) +USER_TYPE=$(jq -r .user.type pr.json) +CHANGED=$(jq '. | length' pr_files.json) +``` + +Decision rules (the **agent** applies these in its loop, not the shell — `exit 0` would only end one `shell_exec`, not abort the Phase 7 pass): + +- **`USER_TYPE == "Bot"`** (dependabot, renovate, etc.): skip deep review for this PR. The agent then: + 1. calls `memory_store devops_pr_review___` with `{head_sha, verdict: "skipped_bot", timestamp}` + 2. calls `event_publish devops_evolution_skipped` with `{pr_or_issue_url, reason: "bot author"}` + 3. moves on to the next PR — does NOT dispatch the reviewer sub-agent. +- **`CHANGED > 200`**: diff too large for the reviewer to ground usefully. The agent then: + 1. calls `event_publish devops_evolution_skipped` with `{pr_or_issue_url, reason: "diff>200 files"}` + 2. moves on to the next PR. + +**Dispatch to reviewer sub-agent** + +Hand the reviewer the diff, file list, PR description, and (if present) the target branch's `AGENTS.md` / `CONTRIBUTING.md`. Capture the reviewer's structured JSON into `reviewer_output.json` (whatever your routing primitive is — `subagent_invoke`, A2A call, or local fork — write the result to that file so the next shell snippet can read it): + +```json +{ + "verdict": "approve | request_changes | block | comment_only", + "issues": [ + {"severity": "critical|major|minor", "file": "...", "line": 42, "body": "..."} + ], + "positives": ["..."], + "summary": "..." +} +``` + +**Post a single review (not N inline comments)** + +```bash +# Pull verdict + summary out of the reviewer's structured output. +VERDICT=$(jq -r .verdict reviewer_output.json) +SUMMARY_BODY=$(jq -r .summary reviewer_output.json) + +# Map verdict -> GitHub review event. Never auto-APPROVE. +BODY_PREFIX="" +case "$VERDICT" in + approve) + # Downgrade silent "approve" to advisory COMMENT — a human still merges. + EVENT="COMMENT" + ;; + request_changes) + EVENT="REQUEST_CHANGES" + ;; + block) + # Block is more severe than request_changes. We still post REQUEST_CHANGES + # (the strongest event we use), but flag the body so a human escalates. + EVENT="REQUEST_CHANGES" + BODY_PREFIX="**Reviewer flagged this PR as BLOCKING — please escalate to a maintainer before merge.** + +" + ;; + comment_only|*) + EVENT="COMMENT" + ;; +esac +SUMMARY_BODY="${BODY_PREFIX}${SUMMARY_BODY}" + +jq -n --arg event "$EVENT" --arg body "$SUMMARY_BODY" --arg sha "$HEAD_SHA" \ + '{commit_id: $sha, event: $event, body: $body}' > review_payload.json + +curl -s -X POST -H "Authorization: Bearer $GITHUB_TOKEN" \ + -H "Content-Type: application/json" \ + -d @review_payload.json \ + "$PR_URL/reviews" +``` + +Body format -- keep tight; reviewers read this: + +```markdown +**DevOps Hand -- automated review** + +**Verdict**: {verdict} + +**Summary**: {one paragraph} + +**Findings** ({N}): +- [critical] {file}:{line} -- {body} +- [major] {file}:{line} -- {body} +- [minor] {file}:{line} -- {body} + +**What looks good**: +- {positive 1} + +_Generated by DevOps Hand reviewer (commit: `{sha}`)._ +``` + +--- + +### Bug Fix Playbook + +The implementer runs this when `classification = "bug-fix"`. Sequence is rigid -- failing test first, fix second, refactor third. + +**Step 1 -- Reproduce** + +In the supplied worktree: + +```bash +cd "$REPO_CONTEXT" +git checkout -b "auto/bug-fix-${ISSUE_NUMBER}-${SLUG}" +# Try to reproduce from the issue's repro steps. If they're absent, infer them. +# If you cannot reproduce in 3 attempts, stop and surface to devops_queue.json. +``` + +**Step 2 -- Failing test first** + +```bash +# For Rust workspaces, drop the test in the closest existing module's tests/. +cargo test -p --test -- --nocapture --test-threads=1 +# Expect FAILURE. Commit the failing test: +git add tests/ && git commit -m "test: reproduce #${ISSUE_NUMBER} -- " +``` + +**Step 3 -- Minimal fix** + +Edit only the files needed to make the test pass. Run the project's full lint+test gate: + +```bash +cargo clippy --workspace --all-targets -- -D warnings +cargo test -p # not --workspace -- target/ contention +``` + +**Step 4 -- Refactor (optional)** + +Only if step 3 left obvious smell (long fn, repeated literal, etc.). Skip if `bmad_strictness = "light"`. + +**Step 5 -- Commit and push (draft branch only)** + +```bash +git add -A && git commit -m "fix: (#${ISSUE_NUMBER})" +git push origin "auto/bug-fix-${ISSUE_NUMBER}-${SLUG}" +``` + +**Step 6 -- Open draft PR (see Draft PR Creation below)** + +--- + +### BMAD Feature Pipeline + +Run when `classification = "feature"`. Phases scale with `bmad_strictness`: + +| Phase | `light` | `standard` | `strict` | +|---|---|---|---| +| B -- Brainstorm | skip | inline <=200 words | inline + queue gate | +| A -- Architect | always | always | always + queue gate | +| P -- PRD | skip | required | required + queue gate | +| I -- Implement | always | always | always | + +Each phase output is appended to `BMAD.md` in the repo root of the feature branch. The file is committed along with the implementation so reviewers can see the reasoning. + +**BMAD.md template** + +````markdown +# BMAD -- #{ISSUE_NUMBER}: {SHORT TITLE} + +## Brainstorm +**Restated problem**: {one paragraph} + +**Approaches considered**: +1. **{Name}** -- {sketch} -- files: {list} -- risk: {low/mid/high} -- diff: ~{N} LoC +2. **{Name}** -- ... +3. **{Name}** -- ... + +**Chosen**: #{N}. Rationale: {one paragraph} + +## Architecture +**Crates / modules touched**: {list} + +**Types / signatures introduced or changed**: +```rust +// ... +``` + +**Cross-crate ripples**: {none / bounded list / escalated to queue: reason} + +## PRD +**Acceptance criteria** +- [ ] {behavior 1} +- [ ] {behavior 2} + +**Test plan** +- unit: `crates/{crate}/src/{path}.rs` -- {what it asserts} +- integration: `crates/{crate}/tests/{name}.rs` -- {what it asserts} + +**Rollback plan**: {one paragraph} + +## Implementation Notes +{anything a future reader needs to understand the diff but wouldn't see in code comments} +```` + +**Strict mode queue gate** + +Between each phase, write to `devops_queue.json`: + +```json +{ + "id": "bmad_${REPO}_${ISSUE}_${PHASE}", + "action": "bmad_phase_review", + "phase": "B|A|P|I", + "issue": "owner/repo#NUM", + "artifact_path": "BMAD.md", + "status": "pending", + "created": "ISO8601" +} +``` + +Then **end the current turn**. The Hand is `frequency = "continuous"`, so the next tick will re-read `devops_queue.json`: + +- If the user (out-of-band) has flipped `status` to `approved`, resume from the next phase. +- If still `pending`, skip this issue for this tick and re-check on the following one. +- If flipped to `rejected`, abandon the issue, comment on it with the rejection rationale (if provided), and stop. + +Within a single turn, never poll or `sleep` waiting for approval — the agent loop has no in-turn pause primitive, and busy-waiting would block other Hand work and burn tokens. End the turn and let the kernel re-invoke you. + +--- + +### Draft PR Creation + +The final action of both bug-fix and feature paths. **Always `draft: true`.** + +**Step 1 -- Push the branch (if not already)** + +```bash +git push origin "auto/${CLASSIFICATION}-${ISSUE}-${SLUG}" +``` + +**Step 2 -- Compose the PR body** + +```markdown +## Summary +{one paragraph} + +## Linked Issue +Closes #{ISSUE_NUMBER} + +## BMAD Pipeline Output +{inline BMAD.md, or note that it's committed at `BMAD.md` in this PR} + +## Acceptance Checklist +- [ ] {copied from PRD} + +## Risk +{one paragraph -- what could go wrong, what's the blast radius} + +## Verification I ran locally +- `cargo clippy --workspace --all-targets -- -D warnings` -- passed +- `cargo test -p {crate}` -- passed +- {anything project-specific from justfile / xtask} + +## Generated by +DevOps Hand -> implementer (strictness: {level}) +``` + +**Step 3 -- Open the draft PR** + +First resolve the target branch. Don't assume `main` -- query the repo: + +```bash +BASE_BRANCH=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \ + "https://api.github.com/repos/${OWNER}/${REPO}" | jq -r .default_branch) +# fallback in the unlikely case the API doesn't return one +[ -z "$BASE_BRANCH" ] || [ "$BASE_BRANCH" = "null" ] && BASE_BRANCH="main" +``` + +Then create the draft PR: + +```bash +jq -n \ + --arg title "${PR_TITLE}" \ + --arg body "${PR_BODY}" \ + --arg head "auto/${CLASSIFICATION}-${ISSUE}-${SLUG}" \ + --arg base "${BASE_BRANCH}" \ + '{title: $title, body: $body, head: $head, base: $base, draft: true}' \ + > pr_create.json + +curl -s -X POST -H "Authorization: Bearer $GITHUB_TOKEN" \ + -H "Content-Type: application/json" \ + -d @pr_create.json \ + "https://api.github.com/repos/${OWNER}/${REPO}/pulls" \ + -o pr_created.json + +PR_URL=$(jq -r .html_url pr_created.json) +echo "Draft PR: $PR_URL" +``` + +**Step 4 -- Cross-link on the originating issue** + +Build the body in shell first so newlines survive (`jq --arg` is a literal-string +parameter, it does NOT interpret backslash escapes): + +```bash +ISSUE_COMMENT=$(printf 'Auto-implementation drafted: %s\n\n_Generated by DevOps Hand. Mark the PR ready-for-review when human triage agrees._' "$PR_URL") + +jq -n --arg body "$ISSUE_COMMENT" '{body: $body}' > issue_comment.json + +curl -s -X POST -H "Authorization: Bearer $GITHUB_TOKEN" \ + -H "Content-Type: application/json" \ + -d @issue_comment.json \ + "https://api.github.com/repos/${OWNER}/${REPO}/issues/${ISSUE_NUMBER}/comments" +``` + +**Step 5 -- Bump counters** + +```text +memory_store devops_hand_draft_prs_opened (current + 1) +memory_store devops_issue_state_${OWNER}_${REPO}_${ISSUE_NUMBER} = {classification, pr_url, timestamp} +event_publish devops_evolution_pr_opened {pr_url, issue} +``` + +--- + +### What this Hand does NOT do + +To set expectations for users and reviewers: + +- It does NOT merge PRs. A human always merges. +- It does NOT mark draft PRs as ready-for-review. +- It does NOT push to `main` / `master` / any protected branch. +- It does NOT operate on private repos unless the configured GITHUB_TOKEN has explicit `repo` scope and the repo is in `evolution_repos`. +- It does NOT modify `Cargo.toml` workspace members, migration files, secrets, or any path matching the safety floor in Phase 7.3 -- those escalate to `devops_queue.json` instead. +- It does NOT consume more than 70% of the per-turn token budget in a single tick. Long jobs are picked up by subsequent ticks.