From 30e1b137ec2b61b6f46ba5e9b758779c26d9708a Mon Sep 17 00:00:00 2001 From: Evan Hu Date: Thu, 14 May 2026 15:12:15 +0900 Subject: [PATCH 1/3] feat(devops): add auto-evolution loop (PR review + BMAD bug/feature pipeline) Extends the DevOps Hand to periodically scan configured GitHub repos and: - review open PRs via the existing code-reviewer sub-agent, posting a single COMMENT review back to GitHub (never auto-APPROVE) - triage open issues via labels first, single-prompt LLM fallback - dispatch actionable issues (bug-fix / feature) to a new implementer sub-agent which runs the BMAD pipeline (Brainstorm -> Architect -> PRD -> Implement) scaled by bmad_strictness and produces a DRAFT PR Safety floor (always on): - draft PRs only, never auto-ready, never merge - never push to main/master/protected branches - escalates to devops_queue.json when touching workspace Cargo.toml, migrations, secrets, or >30 changed files - 70% per-turn token budget cap so subsequent ticks have headroom New settings: auto_evolve, evolution_repos, evolution_check_interval, bmad_strictness. New sub-agent: agents.implementer. New SKILL.md sections: Issue Triage Playbook, PR Review Automation, Bug Fix Playbook, BMAD Feature Pipeline, Draft PR Creation. Three new dashboard metrics: prs_reviewed, issues_processed, draft_prs_opened. --- hands/devops/HAND.toml | 246 +++++++++++++++++++++++++++ hands/devops/README.md | 37 ++++- hands/devops/SKILL.md | 365 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 647 insertions(+), 1 deletion(-) diff --git a/hands/devops/HAND.toml b/hands/devops/HAND.toml index 4d23172..79056b0 100644 --- a/hands/devops/HAND.toml +++ b/hands/devops/HAND.toml @@ -123,6 +123,11 @@ aliases = [ "infrastructure monitoring", "deployment automation", "incident response", + "auto evolve", + "review github prs", + "triage issues", + "implement issue", + "fix bug from issue", ] weak_aliases = [ "deploy", @@ -131,6 +136,10 @@ weak_aliases = [ "container", "terraform", "helm", + "bug fix", + "feature implementation", + "bmad", + "draft pr", ] # ─── Configurable settings ─────────────────────────────────────────────────── @@ -286,6 +295,72 @@ description = "Queue deployment and infrastructure actions for your review inste setting_type = "toggle" default = "true" +# ─── Auto-Evolution settings ───────────────────────────────────────────────── +# These drive the Phase 6 evolution loop: periodic scan of configured +# GitHub repos, automated PR review via the reviewer sub-agent, and +# BMAD-style bug fix / feature implementation via the implementer +# sub-agent. All produce draft PRs and respect approval_mode. + +[[settings]] +key = "auto_evolve" +label = "Auto Evolution" +description = "Periodically scan configured GitHub repos and run PR review / issue triage / BMAD implementation" +setting_type = "toggle" +default = "false" + +[[settings]] +key = "evolution_repos" +label = "Evolution Target Repos" +description = "Comma-separated owner/repo pairs to watch (e.g. librefang/librefang,librefang/librefang-registry)" +setting_type = "text" +default = "" + +[[settings]] +key = "evolution_check_interval" +label = "Evolution Check Interval" +description = "How often to scan target repos for new PRs and issues" +setting_type = "select" +default = "15min" + +[[settings.options]] +value = "5min" +label = "Every 5 minutes" + +[[settings.options]] +value = "15min" +label = "Every 15 minutes" + +[[settings.options]] +value = "1hour" +label = "Every hour" + +[[settings.options]] +value = "6hour" +label = "Every 6 hours" + +[[settings.options]] +value = "1day" +label = "Daily" + +[[settings]] +key = "bmad_strictness" +label = "BMAD Strictness" +description = "How thoroughly to run the Brainstorm-Architect-PRD-Implement pipeline before producing a draft PR" +setting_type = "select" +default = "standard" + +[[settings.options]] +value = "light" +label = "Light (skip brainstorm, go straight to architect → implement)" + +[[settings.options]] +value = "standard" +label = "Standard (full 4-phase pipeline, draft PR at end)" + +[[settings.options]] +value = "strict" +label = "Strict (full pipeline + require human approval at each phase via queue)" + # ─── Agent configuration ───────────────────────────────────────────────────── [agents.main] @@ -543,6 +618,78 @@ Stop the current monitoring/incident session when ANY of these conditions is met --- +## Phase 7 — Evolution Loop (auto_evolve) + +Gate: skip entirely unless `auto_evolve` is ENABLED **and** `evolution_repos` is non-empty. + +For every repo in `evolution_repos` (comma-separated `owner/repo` pairs), interleave PR review and issue triage on the `evolution_check_interval` cadence. Use `schedule_create` to register a recurring tick if it does not already exist; never busy-loop inside one turn. + +### 7.1 PR Review Pass + +1. List open PRs (filter out drafts unless explicitly enabled): + ``` + curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \\ + "https://api.github.com/repos/OWNER/REPO/pulls?state=open&per_page=50" \\ + -o open_prs.json + ``` +2. For each PR, look up `devops_pr_review___` in memory. Skip if `head_sha` matches the last reviewed sha — already reviewed at this revision. +3. Fetch the diff + file list: + ``` + curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \\ + -H "Accept: application/vnd.github.v3.diff" \\ + "https://api.github.com/repos/OWNER/REPO/pulls/NUM" -o pr.diff + curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \\ + "https://api.github.com/repos/OWNER/REPO/pulls/NUM/files" -o pr_files.json + ``` +4. Delegate to the `code-reviewer` sub-agent with: PR title, body, diff, file list, target branch's `AGENTS.md`/`CLAUDE.md` if present. Capture the reviewer's structured output (approve / request changes / block + issues + positives). +5. Post the review back to GitHub: + ``` + curl -s -X POST -H "Authorization: Bearer $GITHUB_TOKEN" \\ + -H "Content-Type: application/json" \\ + -d "$REVIEW_BODY_JSON" \\ + "https://api.github.com/repos/OWNER/REPO/pulls/NUM/reviews" + ``` + Event `"COMMENT"` for advisory passes. Reserve `"REQUEST_CHANGES"` for blocking findings flagged by the reviewer; never auto-`"APPROVE"`. +6. Record the result in memory: `memory_store devops_pr_review___` with `{ head_sha, verdict, timestamp }`. Bump dashboard counter `devops_hand_prs_reviewed`. + +### 7.2 Issue Triage + Implementation Pass + +1. List open issues that match the configured triage filter (default: issues with no `wontfix` / `duplicate` / `invalid` labels and no existing linked PR): + ``` + curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \\ + "https://api.github.com/repos/OWNER/REPO/issues?state=open&per_page=50" \\ + -o open_issues.json + ``` +2. For each issue, classify via the **Issue Triage Playbook** (see `SKILL.md`): + - Labels first (`bug` / `enhancement` / `feature` / `question`) — cheap, deterministic + - LLM fallback only if labels are absent — single short prompt, never multi-turn + - Result is one of: `bug-fix`, `feature`, `needs-info`, `skip` +3. For `bug-fix` and `feature`, dispatch to the `implementer` sub-agent with the BMAD pipeline whose depth is set by `bmad_strictness`. +4. The implementer produces a **draft PR**. Always draft, never ready-for-review, regardless of `approval_mode` — this is the safety floor on auto-generated code. The user (or another reviewer) marks it ready. +5. Comment on the originating issue with a link to the draft PR and a one-line summary. +6. Record `memory_store devops_issue_state___` with `{ classification, pr_url, timestamp }`. Bump dashboard counter `devops_hand_issues_processed`. + +### 7.3 Safety Floor (NEVER bypass) + +- Always create a fresh git worktree per implementation task — never write to the user's working tree. +- Never commit to `main` / `master` directly. +- Never use `--no-verify`, `--force`, or `git push -f` to a protected branch. +- If the kernel `forbid-main-worktree` style hook is configured upstream, honor it; abort the task on hook failure rather than retrying. +- Stop and queue (`devops_queue.json`) if the implementer wants to touch: + - `Cargo.toml` workspace members + - migration files + - any path containing `secrets`, `.env`, `credentials`, `*.pem`, `*.key` + - more than `max_changed_files` (default 30) files in one PR +- Token budget: each evolution tick must stop on its own at 70% of the configured per-turn budget so the next tick has headroom. Use `metering_query` if available. + +### 7.4 Failure Handling + +- Network / API errors → exponential backoff, max 3 retries, then surface a `devops_evolution_blocked` event and skip this PR/issue for the current tick. +- Reviewer or implementer sub-agent times out → record a `timed_out` verdict in memory so we don't retry on the same head_sha next tick. +- `git push` rejected (protected branch, stale, etc.) → open the PR target as `wontfix` for this tick, surface to the user via event. + +--- + ## Guidelines - NEVER execute destructive commands without explicit user confirmation @@ -647,6 +794,90 @@ OUTPUT FORMAT: Be thorough but constructive. Focus on bugs and risks, not style preferences.""" +[agents.implementer] +invoke_hint = "BMAD pipeline executor — turns an issue into a draft PR via brainstorm, architect, PRD, implement phases" +name = "implementer" +description = "BMAD implementer. Takes a triaged issue (bug or feature) and produces a draft PR following the Brainstorm → Architect → PRD → Implement methodology, scaled by `bmad_strictness`." +module = "builtin:chat" +provider = "default" +model = "default" +max_tokens = 16384 +temperature = 0.2 +max_iterations = 80 +max_history_messages = 100 +system_prompt = """You are Implementer, the BMAD execution sub-agent inside the DevOps Hand. You convert a single triaged GitHub issue into a draft pull request. You DO NOT push to protected branches, you DO NOT mark PRs ready-for-review, and you DO NOT skip phases unless `bmad_strictness = "light"`. + +## Inputs + +You will receive: +- `issue`: full GitHub issue payload (title, body, labels, comments) +- `classification`: `"bug-fix"` or `"feature"` +- `repo`: `owner/name` +- `bmad_strictness`: `"light"` | `"standard"` | `"strict"` +- `repo_context`: workspace root path of a freshly-created git worktree off `origin/` — your sandbox + +## Pipeline (skip phases per strictness) + +### Phase B — Brainstorm (skipped when strictness = light) +- Re-read the issue. What is the actual user-visible problem or capability being asked for? Restate in your own words. +- Generate 2–3 distinct approaches. For each: rough sketch, files touched, risk level, estimated diff size. +- Pick ONE. Record the trade-off justification in a `BMAD.md` you'll commit alongside the change. Length: ≤ 200 words. + +### Phase A — Architect +- For the chosen approach, identify exact crates / modules / files to change. +- Decide types, function signatures, and module boundaries before writing code. +- Call out any interface changes that ripple to other crates and confirm the ripple is bounded (or escalate via queue if it isn't). +- Append to `BMAD.md` under `## Architecture`. + +### Phase P — PRD (skipped when strictness = light) +- Acceptance criteria as a bulleted checklist (what must pass for the PR to be ready). +- Test plan: enumerate the unit / integration tests you will add or update. +- Rollback plan: how a reviewer can revert if this lands and breaks something. +- Append to `BMAD.md` under `## PRD`. + +### Phase I — Implement +- For bug fixes: write a failing test first (TDD), make it pass, then refactor. The failing test must commit before the fix, in the same PR. +- For features: write tests alongside code; do not commit untested branches. +- Use only `shell_exec` + `file_*` tools to edit. Never edit outside `repo_context`. +- Run the project's own lint/test gate (e.g., `cargo clippy --workspace --all-targets -- -D warnings`, `cargo test -p `). If the project has a `justfile` or `xtask`, prefer those. Fail-fast: if the gate doesn't pass, fix; if it can't be made to pass within `max_iterations`, stop and surface. + +## Output + +A **draft PR** (`draft: true`) on `repo` whose body contains: + +``` +## Summary + + +## BMAD Pipeline Output + + +## Acceptance Checklist +- [ ] + +## Risk + + +## Generated By +DevOps Hand → implementer sub-agent (issue: #, strictness: ) +``` + +## Hard Rules (NEVER violate, regardless of strictness) + +- ALWAYS work in a fresh git worktree provided as `repo_context`. Never `cd` out of it. +- ALWAYS create a feature branch named `auto/--`. +- NEVER push to `main`, `master`, `trunk`, or any branch protected by ruleset. +- NEVER use `git push --force`, `--no-verify`, `--no-gpg-sign`, or `--amend` against a remote branch. +- NEVER commit anything matching: `.env*`, `*.pem`, `*.p12`, `id_rsa`, `id_ed25519`, `credentials*`, `secrets*`, `vault_*.key`. +- NEVER include AI attribution (`Co-Authored-By: Claude`, `🤖 Generated with…`) in commit messages — the upstream repo's commit-msg hook rejects them and so do we. +- Touch limit: stop at `max_changed_files` (default 30 files). If the implementation legitimately needs more, decompose into multiple draft PRs and stop after the first. +- If any acceptance-test command in PRD fails after your last fix attempt, DO NOT push. Write the partial state + failure details to `devops_queue.json` and surface for human triage. +- Token budget: stop on your own at 70% of the per-turn budget so the next tick has headroom. + +## On `bmad_strictness = "strict"` + +Between every phase, write the produced artifact to `devops_queue.json` with `phase: "bmad--pending"` and stop. The user manually flips the queue entry to `approved` before you proceed. This is how a human keeps a leash on autonomous code changes.""" + [dashboard] [[dashboard.metrics]] label = "Health Checks Run" @@ -668,6 +899,21 @@ label = "Deployments Managed" memory_key = "devops_hand_deployments_managed" format = "number" +[[dashboard.metrics]] +label = "PRs Reviewed" +memory_key = "devops_hand_prs_reviewed" +format = "number" + +[[dashboard.metrics]] +label = "Issues Processed" +memory_key = "devops_hand_issues_processed" +format = "number" + +[[dashboard.metrics]] +label = "Draft PRs Opened" +memory_key = "devops_hand_draft_prs_opened" +format = "number" + # ─── Token & Performance Metadata ───────────────────────────────────────────── [metadata] diff --git a/hands/devops/README.md b/hands/devops/README.md index 686fe8f..43c8871 100644 --- a/hands/devops/README.md +++ b/hands/devops/README.md @@ -8,7 +8,7 @@ Autonomous DevOps engineer -- CI/CD management, infrastructure monitoring, deplo |-------|-------| | Category | `development` | | Agent | `devops-hand` | -| Routing | `ci/cd`, `pipeline`, `github actions`, `infrastructure monitoring`, `deployment automation`, `incident response` | +| Routing | `ci/cd`, `pipeline`, `github actions`, `infrastructure monitoring`, `deployment automation`, `incident response`, `auto evolve`, `review github prs`, `triage issues`, `implement issue`, `fix bug from issue` | ## Integrations @@ -24,9 +24,44 @@ None required. - **Service URLs** -- Comma-separated URLs to monitor - **Alert on Failure** -- Publish events on health check failures (default: on) - **Rollback Strategy** -- `manual`, `auto_previous`, `blue_green` +- **Auto Evolution** -- Periodically scan GitHub repos and run PR review / issue triage / BMAD implementation (default: off) +- **Evolution Target Repos** -- Comma-separated `owner/repo` pairs to watch +- **Evolution Check Interval** -- `5min`, `15min`, `1hour`, `6hour`, `1day` +- **BMAD Strictness** -- `light`, `standard`, `strict` -- depth of the Brainstorm-Architect-PRD-Implement pipeline before producing a draft PR ## Usage ```bash librefang hand run devops ``` + +## Auto-Evolution Mode + +When `auto_evolve = true` and `evolution_repos` is set, the Hand's Phase 7 loop fires on `evolution_check_interval` and, for each watched repo: + +1. **Reviews open PRs** -- pulls each PR's diff, asks the `code-reviewer` sub-agent for an assessment, posts a single `COMMENT` review back on GitHub. Already-reviewed `head_sha` values are skipped. +2. **Triages open issues** -- labels first, single-prompt LLM fallback if labels are absent. Result is one of `bug-fix | feature | needs-info | skip`. +3. **Implements actionable issues** -- dispatches `bug-fix` and `feature` issues to the `implementer` sub-agent which runs the BMAD pipeline scaled by `bmad_strictness` and produces a **draft PR**. + +### Safety floor (always on) + +- Draft PRs only. The Hand never marks PRs ready-for-review and never merges. +- Never pushes to `main` / `master` / protected branches. +- Never `--force` / `--no-verify` / `--amend` against a remote branch. +- Stops and queues to `devops_queue.json` if the change touches `Cargo.toml` workspace members, migration files, or anything under a `secrets` / credential glob. +- Hard cap of 30 changed files per PR; larger changes get split. +- Per-tick token budget capped at 70% so subsequent ticks have headroom. + +### Required GitHub token scopes + +For public-repo evolution, a fine-grained token with: +- **Pull requests**: read & write (review posting, draft PR creation) +- **Issues**: read & write (triage comments, issue cross-links) +- **Contents**: read & write (branch push) +- **Metadata**: read + +For private repos, add the `repo` scope and ensure the repo is listed in `evolution_repos`. + +### What it does NOT do + +It will never merge a PR, mark a draft as ready, or auto-approve. Human review is always required. See `SKILL.md` -> `What this Hand does NOT do` for the full list. diff --git a/hands/devops/SKILL.md b/hands/devops/SKILL.md index 47fd3ad..ddbf57d 100644 --- a/hands/devops/SKILL.md +++ b/hands/devops/SKILL.md @@ -868,3 +868,368 @@ rm /tmp/restore.dump **Communication template**: Subject `[INCIDENT] Service -- Status`. Body: what happened, impact, current status, ETA, next update time. **Post-recovery checklist**: health checks passing, data integrity verified, monitoring restored, backups resumed, incident report filed, post-mortem scheduled within 48h. + +--- + +## Auto-Evolution Workflow + +The Phase 7 evolution loop (gated by `auto_evolve = true`) periodically scans the repos listed in `evolution_repos` and takes one of three actions per item: + +- **Open PR** → review via the `code-reviewer` sub-agent, post a `COMMENT` review back to GitHub. +- **Open Issue** → triage, then dispatch to the `implementer` sub-agent if actionable. +- **Anything we've already processed at the same head_sha / issue revision** → skip. + +The pipeline never marks PRs ready-for-review and never pushes to protected branches. All produced PRs are drafts. + +### When the loop fires + +`schedule_create` registers a recurring trigger on `evolution_check_interval`. Each tick runs at most one full repo pass; if there's more work than fits in the token budget, the remainder waits for the next tick. The state cursor lives in `memory` so progress survives daemon restarts. + +### Memory keys this workflow owns + +| Key pattern | Stored value | +|---|---| +| `devops_pr_review___` | `{ head_sha, verdict, timestamp }` — last review per PR | +| `devops_issue_state___` | `{ classification, pr_url, timestamp }` — last triage per issue | +| `devops_evolution_cursor__` | `{ last_tick_at, last_seen_pr, last_seen_issue }` | +| `devops_hand_prs_reviewed` | counter — dashboard metric | +| `devops_hand_issues_processed` | counter — dashboard metric | +| `devops_hand_draft_prs_opened` | counter — dashboard metric | + +--- + +### Issue Triage Playbook + +The goal is to spend zero LLM tokens when labels are enough. LLM fallback is one prompt, never a multi-turn chain. + +**Step 1 -- Label-driven (deterministic)** + +```text +Has any of {"bug", "defect", "regression", "broken"} -> bug-fix +Has any of {"feature", "enhancement", "rfc", "proposal"} -> feature +Has any of {"question", "discussion", "support"} -> needs-info +Has any of {"wontfix", "duplicate", "invalid", "stale"} -> skip +``` + +**Step 2 -- LLM fallback (only when labels are absent)** + +Single classification prompt. Allowed outputs: exactly one of `bug-fix | feature | needs-info | skip`. Reject any longer answer and re-prompt once before defaulting to `needs-info`. + +```text +You are classifying a GitHub issue for a DevOps Hand evolution pipeline. + +Output exactly ONE token from this set: bug-fix | feature | needs-info | skip + +Heuristics: +- bug-fix: user reports incorrect behavior, crash, regression, security issue, + or unexpected output of existing functionality. +- feature: user requests new capability, configuration option, or refactor + that ships user-visible value. +- needs-info: report is ambiguous -- cannot reproduce, missing version/environment, + cannot tell if bug or feature. +- skip: issue is a question, off-topic, or already addressed. + +Issue title: {TITLE} +Issue body: +{BODY} +Existing labels: {LABELS_OR_NONE} +``` + +**Step 3 -- Already-linked PR check** + +```bash +# A PR already references this issue -> skip implementation, just review the PR +curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \ + "https://api.github.com/repos/OWNER/REPO/issues/NUM/timeline?per_page=50" \ + | jq '.[] | select(.event == "cross-referenced") | .source.issue.pull_request.url' +``` + +--- + +### PR Review Automation + +**Pull PR metadata + diff + files in three calls** + +```bash +PR_URL="https://api.github.com/repos/OWNER/REPO/pulls/NUM" + +curl -s -H "Authorization: Bearer $GITHUB_TOKEN" "$PR_URL" -o pr.json +curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \ + -H "Accept: application/vnd.github.v3.diff" \ + "$PR_URL" -o pr.diff +curl -s -H "Authorization: Bearer $GITHUB_TOKEN" "$PR_URL/files?per_page=300" -o pr_files.json +``` + +**Short-circuit on bot / merge / huge diff** + +```bash +HEAD_SHA=$(jq -r .head.sha pr.json) +USER_TYPE=$(jq -r .user.type pr.json) # "Bot" -> comment-only, no deep review +CHANGED=$(jq '. | length' pr_files.json) +[ "$CHANGED" -gt 200 ] && { echo "diff too large -- surface for human review"; exit 0; } +``` + +**Dispatch to reviewer sub-agent** + +Hand the reviewer the diff, file list, PR description, and (if present) the target branch's `AGENTS.md` / `CONTRIBUTING.md`. Reviewer returns structured JSON: + +```json +{ + "verdict": "approve | request_changes | block | comment_only", + "issues": [ + {"severity": "critical|major|minor", "file": "...", "line": 42, "body": "..."} + ], + "positives": ["..."], + "summary": "..." +} +``` + +**Post a single review (not N inline comments)** + +```bash +EVENT="COMMENT" # default -- advisory pass +[ "$VERDICT" = "request_changes" ] && EVENT="REQUEST_CHANGES" +# Never auto-APPROVE. + +jq -n --arg event "$EVENT" --arg body "$SUMMARY_BODY" --arg sha "$HEAD_SHA" \ + '{commit_id: $sha, event: $event, body: $body}' > review_payload.json + +curl -s -X POST -H "Authorization: Bearer $GITHUB_TOKEN" \ + -H "Content-Type: application/json" \ + -d @review_payload.json \ + "$PR_URL/reviews" +``` + +Body format -- keep tight; reviewers read this: + +```markdown +**DevOps Hand -- automated review** + +**Verdict**: {verdict} + +**Summary**: {one paragraph} + +**Findings** ({N}): +- [critical] {file}:{line} -- {body} +- [major] {file}:{line} -- {body} +- [minor] {file}:{line} -- {body} + +**What looks good**: +- {positive 1} + +_Generated by DevOps Hand reviewer (commit: `{sha}`)._ +``` + +--- + +### Bug Fix Playbook + +The implementer runs this when `classification = "bug-fix"`. Sequence is rigid -- failing test first, fix second, refactor third. + +**Step 1 -- Reproduce** + +In the supplied worktree: + +```bash +cd "$REPO_CONTEXT" +git checkout -b "auto/bug-fix-${ISSUE_NUMBER}-${SLUG}" +# Try to reproduce from the issue's repro steps. If they're absent, infer them. +# If you cannot reproduce in 3 attempts, stop and surface to devops_queue.json. +``` + +**Step 2 -- Failing test first** + +```bash +# For Rust workspaces, drop the test in the closest existing module's tests/. +cargo test -p --test -- --nocapture --test-threads=1 +# Expect FAILURE. Commit the failing test: +git add tests/ && git commit -m "test: reproduce #${ISSUE_NUMBER} -- " +``` + +**Step 3 -- Minimal fix** + +Edit only the files needed to make the test pass. Run the project's full lint+test gate: + +```bash +cargo clippy --workspace --all-targets -- -D warnings +cargo test -p # not --workspace -- target/ contention +``` + +**Step 4 -- Refactor (optional)** + +Only if step 3 left obvious smell (long fn, repeated literal, etc.). Skip if `bmad_strictness = "light"`. + +**Step 5 -- Commit and push (draft branch only)** + +```bash +git add -A && git commit -m "fix: (#${ISSUE_NUMBER})" +git push origin "auto/bug-fix-${ISSUE_NUMBER}-${SLUG}" +``` + +**Step 6 -- Open draft PR (see Draft PR Creation below)** + +--- + +### BMAD Feature Pipeline + +Run when `classification = "feature"`. Phases scale with `bmad_strictness`: + +| Phase | `light` | `standard` | `strict` | +|---|---|---|---| +| B -- Brainstorm | skip | inline <=200 words | inline + queue gate | +| A -- Architect | always | always | always + queue gate | +| P -- PRD | skip | required | required + queue gate | +| I -- Implement | always | always | always | + +Each phase output is appended to `BMAD.md` in the repo root of the feature branch. The file is committed along with the implementation so reviewers can see the reasoning. + +**BMAD.md template** + +````markdown +# BMAD -- #{ISSUE_NUMBER}: {SHORT TITLE} + +## Brainstorm +**Restated problem**: {one paragraph} + +**Approaches considered**: +1. **{Name}** -- {sketch} -- files: {list} -- risk: {low/mid/high} -- diff: ~{N} LoC +2. **{Name}** -- ... +3. **{Name}** -- ... + +**Chosen**: #{N}. Rationale: {one paragraph} + +## Architecture +**Crates / modules touched**: {list} + +**Types / signatures introduced or changed**: +```rust +// ... +``` + +**Cross-crate ripples**: {none / bounded list / escalated to queue: reason} + +## PRD +**Acceptance criteria** +- [ ] {behavior 1} +- [ ] {behavior 2} + +**Test plan** +- unit: `crates/{crate}/src/{path}.rs` -- {what it asserts} +- integration: `crates/{crate}/tests/{name}.rs` -- {what it asserts} + +**Rollback plan**: {one paragraph} + +## Implementation Notes +{anything a future reader needs to understand the diff but wouldn't see in code comments} +```` + +**Strict mode queue gate** + +Between each phase, write to `devops_queue.json`: + +```json +{ + "id": "bmad_${REPO}_${ISSUE}_${PHASE}", + "action": "bmad_phase_review", + "phase": "B|A|P|I", + "issue": "owner/repo#NUM", + "artifact_path": "BMAD.md", + "status": "pending", + "created": "ISO8601" +} +``` + +Stop. Wait for status to flip to `approved` (set by the user out-of-band) before continuing to the next phase. + +--- + +### Draft PR Creation + +The final action of both bug-fix and feature paths. **Always `draft: true`.** + +**Step 1 -- Push the branch (if not already)** + +```bash +git push origin "auto/${CLASSIFICATION}-${ISSUE}-${SLUG}" +``` + +**Step 2 -- Compose the PR body** + +```markdown +## Summary +{one paragraph} + +## Linked Issue +Closes #{ISSUE_NUMBER} + +## BMAD Pipeline Output +{inline BMAD.md, or note that it's committed at `BMAD.md` in this PR} + +## Acceptance Checklist +- [ ] {copied from PRD} + +## Risk +{one paragraph -- what could go wrong, what's the blast radius} + +## Verification I ran locally +- `cargo clippy --workspace --all-targets -- -D warnings` -- passed +- `cargo test -p {crate}` -- passed +- {anything project-specific from justfile / xtask} + +## Generated by +DevOps Hand -> implementer (strictness: {level}) +``` + +**Step 3 -- Open the draft PR** + +```bash +jq -n \ + --arg title "${PR_TITLE}" \ + --arg body "${PR_BODY}" \ + --arg head "auto/${CLASSIFICATION}-${ISSUE}-${SLUG}" \ + --arg base "${BASE_BRANCH}" \ + '{title: $title, body: $body, head: $head, base: $base, draft: true}' \ + > pr_create.json + +curl -s -X POST -H "Authorization: Bearer $GITHUB_TOKEN" \ + -H "Content-Type: application/json" \ + -d @pr_create.json \ + "https://api.github.com/repos/${OWNER}/${REPO}/pulls" \ + -o pr_created.json + +PR_URL=$(jq -r .html_url pr_created.json) +echo "Draft PR: $PR_URL" +``` + +**Step 4 -- Cross-link on the originating issue** + +```bash +jq -n --arg body "Auto-implementation drafted: ${PR_URL}\n\n_Generated by DevOps Hand. Mark the PR ready-for-review when human triage agrees._" \ + '{body: $body}' > issue_comment.json + +curl -s -X POST -H "Authorization: Bearer $GITHUB_TOKEN" \ + -H "Content-Type: application/json" \ + -d @issue_comment.json \ + "https://api.github.com/repos/${OWNER}/${REPO}/issues/${ISSUE_NUMBER}/comments" +``` + +**Step 5 -- Bump counters** + +```text +memory_store devops_hand_draft_prs_opened (current + 1) +memory_store devops_issue_state_${OWNER}_${REPO}_${ISSUE_NUMBER} = {classification, pr_url, timestamp} +event_publish devops_evolution_pr_opened {pr_url, issue} +``` + +--- + +### What this Hand does NOT do + +To set expectations for users and reviewers: + +- It does NOT merge PRs. A human always merges. +- It does NOT mark draft PRs as ready-for-review. +- It does NOT push to `main` / `master` / any protected branch. +- It does NOT operate on private repos unless the configured GITHUB_TOKEN has explicit `repo` scope and the repo is in `evolution_repos`. +- It does NOT modify `Cargo.toml` workspace members, migration files, secrets, or any path matching the safety floor in Phase 7.3 -- those escalate to `devops_queue.json` instead. +- It does NOT consume more than 70% of the per-turn token budget in a single tick. Long jobs are picked up by subsequent ticks. From 944dc07c39e3a8ff3685d2c70e8722c21b3f4d2a Mon Sep 17 00:00:00 2001 From: Evan Hu Date: Thu, 14 May 2026 15:24:38 +0900 Subject: [PATCH 2/3] =?UTF-8?q?fix(devops):=20address=20PR=20review=20?= =?UTF-8?q?=E2=80=94=20close=20blocking=20+=20medium=20+=20style=20issues?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Blocking (5): - add max_changed_files setting (was referenced in implementer prompt but never defined) - drop metering_query reference (tool isn't in tools = [...] list); agent self-paces against budget instead - fix \n\n literal in jq --arg for issue cross-link comment; compose body in shell with printf so newlines survive - resolve BASE_BRANCH via /repos/owner/repo .default_branch instead of relying on an undefined variable - complete reviewer-verdict → GitHub review-event mapping (4 cases, not just request_changes); block routes through REQUEST_CHANGES with a blocking-prefix in the body, approve downgrades to COMMENT Medium (5): - correct Phase 6 → Phase 7 in the auto-evolution settings comment - remove schedule_create busy-loop confusion; Phase 7 fires per-turn while the Hand is already frequency = "continuous", with cadence enforced via devops_evolution_cursor memory key - generalize the forbid-main-worktree wording — discover and honor whatever pre-commit / pre-push / commit-msg hooks the upstream repo configures (was librefang-specific) - clarify the AI-attribution rule: ban LLM-vendor attribution (Claude, GPT, 🤖, etc.) but allow process attribution (DevOps Hand → implementer) for traceability - add USER_TYPE = "Bot" short-circuit that was extracted but never applied (bots get a token-cheap skip, not a deep review) Style (2): - document the four event_publish event names (devops_evolution_*) in a new SKILL.md table alongside the memory-keys table - justify implementer's max_history_messages = 100 with a comment (BMAD 4 phases × cargo build/test chains needs headroom) --- hands/devops/HAND.toml | 48 ++++++++++++++++++++------ hands/devops/SKILL.md | 76 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 106 insertions(+), 18 deletions(-) diff --git a/hands/devops/HAND.toml b/hands/devops/HAND.toml index 79056b0..d188310 100644 --- a/hands/devops/HAND.toml +++ b/hands/devops/HAND.toml @@ -296,7 +296,7 @@ setting_type = "toggle" default = "true" # ─── Auto-Evolution settings ───────────────────────────────────────────────── -# These drive the Phase 6 evolution loop: periodic scan of configured +# These drive the Phase 7 evolution loop: periodic scan of configured # GitHub repos, automated PR review via the reviewer sub-agent, and # BMAD-style bug fix / feature implementation via the implementer # sub-agent. All produce draft PRs and respect approval_mode. @@ -361,6 +361,25 @@ label = "Standard (full 4-phase pipeline, draft PR at end)" value = "strict" label = "Strict (full pipeline + require human approval at each phase via queue)" +[[settings]] +key = "max_changed_files" +label = "Max Changed Files Per Draft PR" +description = "Implementer stops and queues for human triage if a single draft PR would touch more than this many files. Decompose larger work into multiple PRs." +setting_type = "select" +default = "30" + +[[settings.options]] +value = "10" +label = "10 files (very conservative)" + +[[settings.options]] +value = "30" +label = "30 files (default)" + +[[settings.options]] +value = "100" +label = "100 files (large refactors)" + # ─── Agent configuration ───────────────────────────────────────────────────── [agents.main] @@ -622,7 +641,9 @@ Stop the current monitoring/incident session when ANY of these conditions is met Gate: skip entirely unless `auto_evolve` is ENABLED **and** `evolution_repos` is non-empty. -For every repo in `evolution_repos` (comma-separated `owner/repo` pairs), interleave PR review and issue triage on the `evolution_check_interval` cadence. Use `schedule_create` to register a recurring tick if it does not already exist; never busy-loop inside one turn. +The Hand is already `frequency = "continuous"`, so this Phase fires once per turn while gates pass. On entry, read `memory_recall devops_evolution_cursor__`. If less than `evolution_check_interval` has elapsed since the last tick for THAT repo, skip the repo for this turn — the next turn will check again. Never busy-loop or self-schedule inside a turn. + +For every repo in `evolution_repos` (comma-separated `owner/repo` pairs) that passes the cadence gate, interleave PR review and issue triage. ### 7.1 PR Review Pass @@ -672,15 +693,15 @@ For every repo in `evolution_repos` (comma-separated `owner/repo` pairs), interl ### 7.3 Safety Floor (NEVER bypass) - Always create a fresh git worktree per implementation task — never write to the user's working tree. -- Never commit to `main` / `master` directly. -- Never use `--no-verify`, `--force`, or `git push -f` to a protected branch. -- If the kernel `forbid-main-worktree` style hook is configured upstream, honor it; abort the task on hook failure rather than retrying. +- Never commit to `main` / `master` / `trunk` directly. +- Never use `--no-verify`, `--force`, or `git push -f` against any remote branch. +- Honor whatever pre-commit / pre-push / commit-msg hooks the upstream repo configures (run via `git config core.hooksPath` discovery + executing each non-skipped hook). Abort the task on hook failure rather than retrying. - Stop and queue (`devops_queue.json`) if the implementer wants to touch: - - `Cargo.toml` workspace members - - migration files - - any path containing `secrets`, `.env`, `credentials`, `*.pem`, `*.key` - - more than `max_changed_files` (default 30) files in one PR -- Token budget: each evolution tick must stop on its own at 70% of the configured per-turn budget so the next tick has headroom. Use `metering_query` if available. + - `Cargo.toml` workspace members (any `members = [...]` change) + - migration files (paths under `*/migrations/*`, `*/migrate/*`, or matching `*.sql`) + - any path containing `secrets`, `.env`, `credentials`, `*.pem`, `*.key`, `id_rsa`, `id_ed25519` + - more than the configured `max_changed_files` setting (default 30) files in one PR +- Token budget: each evolution tick must stop on its own when the agent senses it is nearing the per-turn budget (target ~70% so the next tick has headroom). Estimate by tracking cumulative output tokens since turn start; the kernel-enforced hard cap is the upstream guard rail, not the primary control. ### 7.4 Failure Handling @@ -804,6 +825,11 @@ model = "default" max_tokens = 16384 temperature = 0.2 max_iterations = 80 +# Raise the history cap above the kernel default. BMAD work fans out +# across 4 phases (Brainstorm / Architect / PRD / Implement), each of +# which spawns shell_exec chains (cargo build/test cycles, git ops, +# file edits). 100 buys enough headroom that a single PR doesn't get +# truncated mid-implementation while still bounding worst-case cost. max_history_messages = 100 system_prompt = """You are Implementer, the BMAD execution sub-agent inside the DevOps Hand. You convert a single triaged GitHub issue into a draft pull request. You DO NOT push to protected branches, you DO NOT mark PRs ready-for-review, and you DO NOT skip phases unless `bmad_strictness = "light"`. @@ -869,7 +895,7 @@ DevOps Hand → implementer sub-agent (issue: #, strictness: ) - NEVER push to `main`, `master`, `trunk`, or any branch protected by ruleset. - NEVER use `git push --force`, `--no-verify`, `--no-gpg-sign`, or `--amend` against a remote branch. - NEVER commit anything matching: `.env*`, `*.pem`, `*.p12`, `id_rsa`, `id_ed25519`, `credentials*`, `secrets*`, `vault_*.key`. -- NEVER include AI attribution (`Co-Authored-By: Claude`, `🤖 Generated with…`) in commit messages — the upstream repo's commit-msg hook rejects them and so do we. +- NEVER include LLM-vendor attribution in commit messages or PR bodies — no `Co-Authored-By: Claude`, no `Generated with Claude / GPT / Anthropic / OpenAI`, no `🤖` emoji crediting an AI vendor. "Generated by DevOps Hand → implementer" (process attribution) is fine and encouraged for traceability; vendor attribution is not. Many upstream repos enforce this via commit-msg hook; we apply the rule regardless of upstream enforcement. - Touch limit: stop at `max_changed_files` (default 30 files). If the implementation legitimately needs more, decompose into multiple draft PRs and stop after the first. - If any acceptance-test command in PRD fails after your last fix attempt, DO NOT push. Write the partial state + failure details to `devops_queue.json` and surface for human triage. - Token budget: stop on your own at 70% of the per-turn budget so the next tick has headroom. diff --git a/hands/devops/SKILL.md b/hands/devops/SKILL.md index ddbf57d..e04d056 100644 --- a/hands/devops/SKILL.md +++ b/hands/devops/SKILL.md @@ -896,6 +896,17 @@ The pipeline never marks PRs ready-for-review and never pushes to protected bran | `devops_hand_issues_processed` | counter — dashboard metric | | `devops_hand_draft_prs_opened` | counter — dashboard metric | +### Events this workflow publishes + +| Event name | Payload | When | +|---|---|---| +| `devops_evolution_pr_reviewed` | `{ pr_url, verdict, head_sha }` | After a PR review is posted to GitHub | +| `devops_evolution_pr_opened` | `{ pr_url, issue_url, classification }` | After a draft PR is created from a triaged issue | +| `devops_evolution_blocked` | `{ reason, pr_or_issue_url, retry_after }` | When a tick is aborted by safety floor, API failure, or hook rejection | +| `devops_evolution_skipped` | `{ pr_or_issue_url, reason }` | When an item is skipped by cadence gate, label filter, or already-processed check | + +These are advisory; subscribers (dashboard, audit log, downstream Hands) are optional. + --- ### Issue Triage Playbook @@ -964,9 +975,25 @@ curl -s -H "Authorization: Bearer $GITHUB_TOKEN" "$PR_URL/files?per_page=300" -o ```bash HEAD_SHA=$(jq -r .head.sha pr.json) -USER_TYPE=$(jq -r .user.type pr.json) # "Bot" -> comment-only, no deep review +USER_TYPE=$(jq -r .user.type pr.json) CHANGED=$(jq '. | length' pr_files.json) -[ "$CHANGED" -gt 200 ] && { echo "diff too large -- surface for human review"; exit 0; } + +# Bot-authored PRs (dependabot, renovate, etc.) get a token-cheap pass: +# record but skip deep reviewer dispatch. +if [ "$USER_TYPE" = "Bot" ]; then + echo "bot PR -- recording without deep review" + # memory_store devops_pr_review___ = {head_sha, verdict: "skipped_bot", ts} + # event_publish devops_evolution_skipped {pr_or_issue_url, reason: "bot author"} + exit 0 +fi + +# Huge diffs: defer to human review rather than spending tokens on a +# review the reviewer agent can't usefully ground. +if [ "$CHANGED" -gt 200 ]; then + echo "diff too large ($CHANGED files) -- surfacing for human review" + # event_publish devops_evolution_skipped {pr_or_issue_url, reason: "diff>200 files"} + exit 0 +fi ``` **Dispatch to reviewer sub-agent** @@ -987,9 +1014,29 @@ Hand the reviewer the diff, file list, PR description, and (if present) the targ **Post a single review (not N inline comments)** ```bash -EVENT="COMMENT" # default -- advisory pass -[ "$VERDICT" = "request_changes" ] && EVENT="REQUEST_CHANGES" -# Never auto-APPROVE. +# Map verdict -> GitHub review event. Never auto-APPROVE. +BODY_PREFIX="" +case "$VERDICT" in + approve) + # Downgrade silent "approve" to advisory COMMENT — a human still merges. + EVENT="COMMENT" + ;; + request_changes) + EVENT="REQUEST_CHANGES" + ;; + block) + # Block is more severe than request_changes. We still post REQUEST_CHANGES + # (the strongest event we use), but flag the body so a human escalates. + EVENT="REQUEST_CHANGES" + BODY_PREFIX="**Reviewer flagged this PR as BLOCKING — please escalate to a maintainer before merge.** + +" + ;; + comment_only|*) + EVENT="COMMENT" + ;; +esac +SUMMARY_BODY="${BODY_PREFIX}${SUMMARY_BODY}" jq -n --arg event "$EVENT" --arg body "$SUMMARY_BODY" --arg sha "$HEAD_SHA" \ '{commit_id: $sha, event: $event, body: $body}' > review_payload.json @@ -1182,6 +1229,17 @@ DevOps Hand -> implementer (strictness: {level}) **Step 3 -- Open the draft PR** +First resolve the target branch. Don't assume `main` -- query the repo: + +```bash +BASE_BRANCH=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \ + "https://api.github.com/repos/${OWNER}/${REPO}" | jq -r .default_branch) +# fallback in the unlikely case the API doesn't return one +[ -z "$BASE_BRANCH" ] || [ "$BASE_BRANCH" = "null" ] && BASE_BRANCH="main" +``` + +Then create the draft PR: + ```bash jq -n \ --arg title "${PR_TITLE}" \ @@ -1203,9 +1261,13 @@ echo "Draft PR: $PR_URL" **Step 4 -- Cross-link on the originating issue** +Build the body in shell first so newlines survive (`jq --arg` is a literal-string +parameter, it does NOT interpret backslash escapes): + ```bash -jq -n --arg body "Auto-implementation drafted: ${PR_URL}\n\n_Generated by DevOps Hand. Mark the PR ready-for-review when human triage agrees._" \ - '{body: $body}' > issue_comment.json +ISSUE_COMMENT=$(printf 'Auto-implementation drafted: %s\n\n_Generated by DevOps Hand. Mark the PR ready-for-review when human triage agrees._' "$PR_URL") + +jq -n --arg body "$ISSUE_COMMENT" '{body: $body}' > issue_comment.json curl -s -X POST -H "Authorization: Bearer $GITHUB_TOKEN" \ -H "Content-Type: application/json" \ From d0aa86f9d322119c6a69bf523cac65c3a956decd Mon Sep 17 00:00:00 2001 From: Evan Hu Date: Thu, 14 May 2026 15:38:08 +0900 Subject: [PATCH 3/3] docs(devops): tighten evolution snippets (D1-D4 second-review nits) D1 -- show SUMMARY_BODY (and VERDICT) assignment in PR review snippet: add explicit jq -r .summary / .verdict extraction from reviewer_output.json so the agent reading SKILL.md doesn't have to infer where these come from. D2 -- reword strict-mode wait semantics in both HAND.toml and SKILL.md: 'Stop. Wait...' was misleading because the agent loop has no in-turn pause primitive. Now spells out: end the current turn after queueing, let the continuous tick re-read the queue, resume on approved / skip on pending / abandon on rejected. Explicitly forbids busy-wait and sleep loops. D3 -- restructure bot / huge-diff short-circuit so agent-tool calls are expressed as numbered agent steps, not as '# memory_store ...' comments inside a bash block. The bash block now only extracts cheap signals; the decision and the tool calls are clearly agent-level. D4 -- remove the misleading 'exit 0' from the short-circuit bash and add a one-liner noting that exit 0 inside shell_exec only ends one shell session, not the Phase 7 pass; the agent must choose to move on. --- hands/devops/HAND.toml | 2 +- hands/devops/SKILL.md | 41 +++++++++++++++++++++++------------------ 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/hands/devops/HAND.toml b/hands/devops/HAND.toml index d188310..dd76d96 100644 --- a/hands/devops/HAND.toml +++ b/hands/devops/HAND.toml @@ -902,7 +902,7 @@ DevOps Hand → implementer sub-agent (issue: #, strictness: ) ## On `bmad_strictness = "strict"` -Between every phase, write the produced artifact to `devops_queue.json` with `phase: "bmad--pending"` and stop. The user manually flips the queue entry to `approved` before you proceed. This is how a human keeps a leash on autonomous code changes.""" +Between every phase, write the produced artifact to `devops_queue.json` with `phase: "bmad--pending"` and `status: "pending"`, then **end the current turn**. The Hand is continuous, so the next tick re-reads the queue: if the user (out-of-band) flipped `status` to `approved`, resume from the next phase; if still `pending`, skip this issue for this tick and re-check on the next one. Never poll or `sleep` for approval within a single turn — the agent loop has no in-turn pause primitive, and busy-waiting would block other Hand work and burn tokens. This is how a human keeps a leash on autonomous code changes without forcing the daemon to stall.""" [dashboard] [[dashboard.metrics]] diff --git a/hands/devops/SKILL.md b/hands/devops/SKILL.md index e04d056..637070b 100644 --- a/hands/devops/SKILL.md +++ b/hands/devops/SKILL.md @@ -973,32 +973,27 @@ curl -s -H "Authorization: Bearer $GITHUB_TOKEN" "$PR_URL/files?per_page=300" -o **Short-circuit on bot / merge / huge diff** +Extract the cheap signals from already-fetched PR metadata: + ```bash HEAD_SHA=$(jq -r .head.sha pr.json) USER_TYPE=$(jq -r .user.type pr.json) CHANGED=$(jq '. | length' pr_files.json) +``` -# Bot-authored PRs (dependabot, renovate, etc.) get a token-cheap pass: -# record but skip deep reviewer dispatch. -if [ "$USER_TYPE" = "Bot" ]; then - echo "bot PR -- recording without deep review" - # memory_store devops_pr_review___ = {head_sha, verdict: "skipped_bot", ts} - # event_publish devops_evolution_skipped {pr_or_issue_url, reason: "bot author"} - exit 0 -fi +Decision rules (the **agent** applies these in its loop, not the shell — `exit 0` would only end one `shell_exec`, not abort the Phase 7 pass): -# Huge diffs: defer to human review rather than spending tokens on a -# review the reviewer agent can't usefully ground. -if [ "$CHANGED" -gt 200 ]; then - echo "diff too large ($CHANGED files) -- surfacing for human review" - # event_publish devops_evolution_skipped {pr_or_issue_url, reason: "diff>200 files"} - exit 0 -fi -``` +- **`USER_TYPE == "Bot"`** (dependabot, renovate, etc.): skip deep review for this PR. The agent then: + 1. calls `memory_store devops_pr_review___` with `{head_sha, verdict: "skipped_bot", timestamp}` + 2. calls `event_publish devops_evolution_skipped` with `{pr_or_issue_url, reason: "bot author"}` + 3. moves on to the next PR — does NOT dispatch the reviewer sub-agent. +- **`CHANGED > 200`**: diff too large for the reviewer to ground usefully. The agent then: + 1. calls `event_publish devops_evolution_skipped` with `{pr_or_issue_url, reason: "diff>200 files"}` + 2. moves on to the next PR. **Dispatch to reviewer sub-agent** -Hand the reviewer the diff, file list, PR description, and (if present) the target branch's `AGENTS.md` / `CONTRIBUTING.md`. Reviewer returns structured JSON: +Hand the reviewer the diff, file list, PR description, and (if present) the target branch's `AGENTS.md` / `CONTRIBUTING.md`. Capture the reviewer's structured JSON into `reviewer_output.json` (whatever your routing primitive is — `subagent_invoke`, A2A call, or local fork — write the result to that file so the next shell snippet can read it): ```json { @@ -1014,6 +1009,10 @@ Hand the reviewer the diff, file list, PR description, and (if present) the targ **Post a single review (not N inline comments)** ```bash +# Pull verdict + summary out of the reviewer's structured output. +VERDICT=$(jq -r .verdict reviewer_output.json) +SUMMARY_BODY=$(jq -r .summary reviewer_output.json) + # Map verdict -> GitHub review event. Never auto-APPROVE. BODY_PREFIX="" case "$VERDICT" in @@ -1186,7 +1185,13 @@ Between each phase, write to `devops_queue.json`: } ``` -Stop. Wait for status to flip to `approved` (set by the user out-of-band) before continuing to the next phase. +Then **end the current turn**. The Hand is `frequency = "continuous"`, so the next tick will re-read `devops_queue.json`: + +- If the user (out-of-band) has flipped `status` to `approved`, resume from the next phase. +- If still `pending`, skip this issue for this tick and re-check on the following one. +- If flipped to `rejected`, abandon the issue, comment on it with the rejection rationale (if provided), and stop. + +Within a single turn, never poll or `sleep` waiting for approval — the agent loop has no in-turn pause primitive, and busy-waiting would block other Hand work and burn tokens. End the turn and let the kernel re-invoke you. ---