From 30e1b137ec2b61b6f46ba5e9b758779c26d9708a Mon Sep 17 00:00:00 2001
From: Evan Hu <suzukaze.haduki@gmail.com>
Date: Thu, 14 May 2026 15:12:15 +0900
Subject: [PATCH 1/3] feat(devops): add auto-evolution loop (PR review + BMAD
 bug/feature pipeline)

Extends the DevOps Hand to periodically scan configured GitHub repos and:
- review open PRs via the existing code-reviewer sub-agent, posting a
  single COMMENT review back to GitHub (never auto-APPROVE)
- triage open issues via labels first, single-prompt LLM fallback
- dispatch actionable issues (bug-fix / feature) to a new implementer
  sub-agent which runs the BMAD pipeline (Brainstorm -> Architect ->
  PRD -> Implement) scaled by bmad_strictness and produces a DRAFT PR

Safety floor (always on):
- draft PRs only, never auto-ready, never merge
- never push to main/master/protected branches
- escalates to devops_queue.json when touching workspace Cargo.toml,
  migrations, secrets, or >30 changed files
- 70% per-turn token budget cap so subsequent ticks have headroom

New settings: auto_evolve, evolution_repos, evolution_check_interval,
bmad_strictness. New sub-agent: agents.implementer. New SKILL.md
sections: Issue Triage Playbook, PR Review Automation, Bug Fix
Playbook, BMAD Feature Pipeline, Draft PR Creation. Three new
dashboard metrics: prs_reviewed, issues_processed, draft_prs_opened.
---
 hands/devops/HAND.toml | 246 +++++++++++++++++++++++++++
 hands/devops/README.md |  37 ++++-
 hands/devops/SKILL.md  | 365 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 647 insertions(+), 1 deletion(-)

diff --git a/hands/devops/HAND.toml b/hands/devops/HAND.toml
index 4d23172..79056b0 100644
--- a/hands/devops/HAND.toml
+++ b/hands/devops/HAND.toml
@@ -123,6 +123,11 @@ aliases = [
   "infrastructure monitoring",
   "deployment automation",
   "incident response",
+  "auto evolve",
+  "review github prs",
+  "triage issues",
+  "implement issue",
+  "fix bug from issue",
 ]
 weak_aliases = [
   "deploy",
@@ -131,6 +136,10 @@ weak_aliases = [
   "container",
   "terraform",
   "helm",
+  "bug fix",
+  "feature implementation",
+  "bmad",
+  "draft pr",
 ]
 
 # ─── Configurable settings ───────────────────────────────────────────────────
@@ -286,6 +295,72 @@ description = "Queue deployment and infrastructure actions for your review inste
 setting_type = "toggle"
 default = "true"
 
+# ─── Auto-Evolution settings ─────────────────────────────────────────────────
+# These drive the Phase 6 evolution loop: periodic scan of configured
+# GitHub repos, automated PR review via the reviewer sub-agent, and
+# BMAD-style bug fix / feature implementation via the implementer
+# sub-agent. All produce draft PRs and respect approval_mode.
+
+[[settings]]
+key = "auto_evolve"
+label = "Auto Evolution"
+description = "Periodically scan configured GitHub repos and run PR review / issue triage / BMAD implementation"
+setting_type = "toggle"
+default = "false"
+
+[[settings]]
+key = "evolution_repos"
+label = "Evolution Target Repos"
+description = "Comma-separated owner/repo pairs to watch (e.g. librefang/librefang,librefang/librefang-registry)"
+setting_type = "text"
+default = ""
+
+[[settings]]
+key = "evolution_check_interval"
+label = "Evolution Check Interval"
+description = "How often to scan target repos for new PRs and issues"
+setting_type = "select"
+default = "15min"
+
+[[settings.options]]
+value = "5min"
+label = "Every 5 minutes"
+
+[[settings.options]]
+value = "15min"
+label = "Every 15 minutes"
+
+[[settings.options]]
+value = "1hour"
+label = "Every hour"
+
+[[settings.options]]
+value = "6hour"
+label = "Every 6 hours"
+
+[[settings.options]]
+value = "1day"
+label = "Daily"
+
+[[settings]]
+key = "bmad_strictness"
+label = "BMAD Strictness"
+description = "How thoroughly to run the Brainstorm-Architect-PRD-Implement pipeline before producing a draft PR"
+setting_type = "select"
+default = "standard"
+
+[[settings.options]]
+value = "light"
+label = "Light (skip brainstorm, go straight to architect → implement)"
+
+[[settings.options]]
+value = "standard"
+label = "Standard (full 4-phase pipeline, draft PR at end)"
+
+[[settings.options]]
+value = "strict"
+label = "Strict (full pipeline + require human approval at each phase via queue)"
+
 # ─── Agent configuration ─────────────────────────────────────────────────────
 
 [agents.main]
@@ -543,6 +618,78 @@ Stop the current monitoring/incident session when ANY of these conditions is met
 
 ---
 
+## Phase 7 — Evolution Loop (auto_evolve)
+
+Gate: skip entirely unless `auto_evolve` is ENABLED **and** `evolution_repos` is non-empty.
+
+For every repo in `evolution_repos` (comma-separated `owner/repo` pairs), interleave PR review and issue triage on the `evolution_check_interval` cadence. Use `schedule_create` to register a recurring tick if it does not already exist; never busy-loop inside one turn.
+
+### 7.1 PR Review Pass
+
+1. List open PRs (filter out drafts unless explicitly enabled):
+   ```
+   curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \\
+     "https://api.github.com/repos/OWNER/REPO/pulls?state=open&per_page=50" \\
+     -o open_prs.json
+   ```
+2. For each PR, look up `devops_pr_review_<owner>_<repo>_<number>` in memory. Skip if `head_sha` matches the last reviewed sha — already reviewed at this revision.
+3. Fetch the diff + file list:
+   ```
+   curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \\
+     -H "Accept: application/vnd.github.v3.diff" \\
+     "https://api.github.com/repos/OWNER/REPO/pulls/NUM" -o pr.diff
+   curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \\
+     "https://api.github.com/repos/OWNER/REPO/pulls/NUM/files" -o pr_files.json
+   ```
+4. Delegate to the `code-reviewer` sub-agent with: PR title, body, diff, file list, target branch's `AGENTS.md`/`CLAUDE.md` if present. Capture the reviewer's structured output (approve / request changes / block + issues + positives).
+5. Post the review back to GitHub:
+   ```
+   curl -s -X POST -H "Authorization: Bearer $GITHUB_TOKEN" \\
+     -H "Content-Type: application/json" \\
+     -d "$REVIEW_BODY_JSON" \\
+     "https://api.github.com/repos/OWNER/REPO/pulls/NUM/reviews"
+   ```
+   Event `"COMMENT"` for advisory passes. Reserve `"REQUEST_CHANGES"` for blocking findings flagged by the reviewer; never auto-`"APPROVE"`.
+6. Record the result in memory: `memory_store devops_pr_review_<owner>_<repo>_<number>` with `{ head_sha, verdict, timestamp }`. Bump dashboard counter `devops_hand_prs_reviewed`.
+
+### 7.2 Issue Triage + Implementation Pass
+
+1. List open issues that match the configured triage filter (default: issues with no `wontfix` / `duplicate` / `invalid` labels and no existing linked PR):
+   ```
+   curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \\
+     "https://api.github.com/repos/OWNER/REPO/issues?state=open&per_page=50" \\
+     -o open_issues.json
+   ```
+2. For each issue, classify via the **Issue Triage Playbook** (see `SKILL.md`):
+   - Labels first (`bug` / `enhancement` / `feature` / `question`) — cheap, deterministic
+   - LLM fallback only if labels are absent — single short prompt, never multi-turn
+   - Result is one of: `bug-fix`, `feature`, `needs-info`, `skip`
+3. For `bug-fix` and `feature`, dispatch to the `implementer` sub-agent with the BMAD pipeline whose depth is set by `bmad_strictness`.
+4. The implementer produces a **draft PR**. Always draft, never ready-for-review, regardless of `approval_mode` — this is the safety floor on auto-generated code. The user (or another reviewer) marks it ready.
+5. Comment on the originating issue with a link to the draft PR and a one-line summary.
+6. Record `memory_store devops_issue_state_<owner>_<repo>_<number>` with `{ classification, pr_url, timestamp }`. Bump dashboard counter `devops_hand_issues_processed`.
+
+### 7.3 Safety Floor (NEVER bypass)
+
+- Always create a fresh git worktree per implementation task — never write to the user's working tree.
+- Never commit to `main` / `master` directly.
+- Never use `--no-verify`, `--force`, or `git push -f` to a protected branch.
+- If the kernel `forbid-main-worktree` style hook is configured upstream, honor it; abort the task on hook failure rather than retrying.
+- Stop and queue (`devops_queue.json`) if the implementer wants to touch:
+  - `Cargo.toml` workspace members
+  - migration files
+  - any path containing `secrets`, `.env`, `credentials`, `*.pem`, `*.key`
+  - more than `max_changed_files` (default 30) files in one PR
+- Token budget: each evolution tick must stop on its own at 70% of the configured per-turn budget so the next tick has headroom. Use `metering_query` if available.
+
+### 7.4 Failure Handling
+
+- Network / API errors → exponential backoff, max 3 retries, then surface a `devops_evolution_blocked` event and skip this PR/issue for the current tick.
+- Reviewer or implementer sub-agent times out → record a `timed_out` verdict in memory so we don't retry on the same head_sha next tick.
+- `git push` rejected (protected branch, stale, etc.) → open the PR target as `wontfix` for this tick, surface to the user via event.
+
+---
+
 ## Guidelines
 
 - NEVER execute destructive commands without explicit user confirmation
@@ -647,6 +794,90 @@ OUTPUT FORMAT:
 
 Be thorough but constructive. Focus on bugs and risks, not style preferences."""
 
+[agents.implementer]
+invoke_hint = "BMAD pipeline executor — turns an issue into a draft PR via brainstorm, architect, PRD, implement phases"
+name = "implementer"
+description = "BMAD implementer. Takes a triaged issue (bug or feature) and produces a draft PR following the Brainstorm → Architect → PRD → Implement methodology, scaled by `bmad_strictness`."
+module = "builtin:chat"
+provider = "default"
+model = "default"
+max_tokens = 16384
+temperature = 0.2
+max_iterations = 80
+max_history_messages = 100
+system_prompt = """You are Implementer, the BMAD execution sub-agent inside the DevOps Hand. You convert a single triaged GitHub issue into a draft pull request. You DO NOT push to protected branches, you DO NOT mark PRs ready-for-review, and you DO NOT skip phases unless `bmad_strictness = "light"`.
+
+## Inputs
+
+You will receive:
+- `issue`: full GitHub issue payload (title, body, labels, comments)
+- `classification`: `"bug-fix"` or `"feature"`
+- `repo`: `owner/name`
+- `bmad_strictness`: `"light"` | `"standard"` | `"strict"`
+- `repo_context`: workspace root path of a freshly-created git worktree off `origin/<default-branch>` — your sandbox
+
+## Pipeline (skip phases per strictness)
+
+### Phase B — Brainstorm    (skipped when strictness = light)
+- Re-read the issue. What is the actual user-visible problem or capability being asked for? Restate in your own words.
+- Generate 2–3 distinct approaches. For each: rough sketch, files touched, risk level, estimated diff size.
+- Pick ONE. Record the trade-off justification in a `BMAD.md` you'll commit alongside the change. Length: ≤ 200 words.
+
+### Phase A — Architect
+- For the chosen approach, identify exact crates / modules / files to change.
+- Decide types, function signatures, and module boundaries before writing code.
+- Call out any interface changes that ripple to other crates and confirm the ripple is bounded (or escalate via queue if it isn't).
+- Append to `BMAD.md` under `## Architecture`.
+
+### Phase P — PRD     (skipped when strictness = light)
+- Acceptance criteria as a bulleted checklist (what must pass for the PR to be ready).
+- Test plan: enumerate the unit / integration tests you will add or update.
+- Rollback plan: how a reviewer can revert if this lands and breaks something.
+- Append to `BMAD.md` under `## PRD`.
+
+### Phase I — Implement
+- For bug fixes: write a failing test first (TDD), make it pass, then refactor. The failing test must commit before the fix, in the same PR.
+- For features: write tests alongside code; do not commit untested branches.
+- Use only `shell_exec` + `file_*` tools to edit. Never edit outside `repo_context`.
+- Run the project's own lint/test gate (e.g., `cargo clippy --workspace --all-targets -- -D warnings`, `cargo test -p <crate>`). If the project has a `justfile` or `xtask`, prefer those. Fail-fast: if the gate doesn't pass, fix; if it can't be made to pass within `max_iterations`, stop and surface.
+
+## Output
+
+A **draft PR** (`draft: true`) on `repo` whose body contains:
+
+```
+## Summary
+<one paragraph — what this PR does and why>
+
+## BMAD Pipeline Output
+<inline copy of BMAD.md sections, or link to the committed BMAD.md if too large>
+
+## Acceptance Checklist
+- [ ] <copied from PRD>
+
+## Risk
+<one paragraph>
+
+## Generated By
+DevOps Hand → implementer sub-agent  (issue: #<num>, strictness: <level>)
+```
+
+## Hard Rules (NEVER violate, regardless of strictness)
+
+- ALWAYS work in a fresh git worktree provided as `repo_context`. Never `cd` out of it.
+- ALWAYS create a feature branch named `auto/<classification>-<issue-number>-<slug>`.
+- NEVER push to `main`, `master`, `trunk`, or any branch protected by ruleset.
+- NEVER use `git push --force`, `--no-verify`, `--no-gpg-sign`, or `--amend` against a remote branch.
+- NEVER commit anything matching: `.env*`, `*.pem`, `*.p12`, `id_rsa`, `id_ed25519`, `credentials*`, `secrets*`, `vault_*.key`.
+- NEVER include AI attribution (`Co-Authored-By: Claude`, `🤖 Generated with…`) in commit messages — the upstream repo's commit-msg hook rejects them and so do we.
+- Touch limit: stop at `max_changed_files` (default 30 files). If the implementation legitimately needs more, decompose into multiple draft PRs and stop after the first.
+- If any acceptance-test command in PRD fails after your last fix attempt, DO NOT push. Write the partial state + failure details to `devops_queue.json` and surface for human triage.
+- Token budget: stop on your own at 70% of the per-turn budget so the next tick has headroom.
+
+## On `bmad_strictness = "strict"`
+
+Between every phase, write the produced artifact to `devops_queue.json` with `phase: "bmad-<letter>-pending"` and stop. The user manually flips the queue entry to `approved` before you proceed. This is how a human keeps a leash on autonomous code changes."""
+
 [dashboard]
 [[dashboard.metrics]]
 label = "Health Checks Run"
@@ -668,6 +899,21 @@ label = "Deployments Managed"
 memory_key = "devops_hand_deployments_managed"
 format = "number"
 
+[[dashboard.metrics]]
+label = "PRs Reviewed"
+memory_key = "devops_hand_prs_reviewed"
+format = "number"
+
+[[dashboard.metrics]]
+label = "Issues Processed"
+memory_key = "devops_hand_issues_processed"
+format = "number"
+
+[[dashboard.metrics]]
+label = "Draft PRs Opened"
+memory_key = "devops_hand_draft_prs_opened"
+format = "number"
+
 # ─── Token & Performance Metadata ─────────────────────────────────────────────
 
 [metadata]
diff --git a/hands/devops/README.md b/hands/devops/README.md
index 686fe8f..43c8871 100644
--- a/hands/devops/README.md
+++ b/hands/devops/README.md
@@ -8,7 +8,7 @@ Autonomous DevOps engineer -- CI/CD management, infrastructure monitoring, deplo
 |-------|-------|
 | Category | `development` |
 | Agent | `devops-hand` |
-| Routing | `ci/cd`, `pipeline`, `github actions`, `infrastructure monitoring`, `deployment automation`, `incident response` |
+| Routing | `ci/cd`, `pipeline`, `github actions`, `infrastructure monitoring`, `deployment automation`, `incident response`, `auto evolve`, `review github prs`, `triage issues`, `implement issue`, `fix bug from issue` |
 
 ## Integrations
 
@@ -24,9 +24,44 @@ None required.
 - **Service URLs** -- Comma-separated URLs to monitor
 - **Alert on Failure** -- Publish events on health check failures (default: on)
 - **Rollback Strategy** -- `manual`, `auto_previous`, `blue_green`
+- **Auto Evolution** -- Periodically scan GitHub repos and run PR review / issue triage / BMAD implementation (default: off)
+- **Evolution Target Repos** -- Comma-separated `owner/repo` pairs to watch
+- **Evolution Check Interval** -- `5min`, `15min`, `1hour`, `6hour`, `1day`
+- **BMAD Strictness** -- `light`, `standard`, `strict` -- depth of the Brainstorm-Architect-PRD-Implement pipeline before producing a draft PR
 
 ## Usage
 
 ```bash
 librefang hand run devops
 ```
+
+## Auto-Evolution Mode
+
+When `auto_evolve = true` and `evolution_repos` is set, the Hand's Phase 7 loop fires on `evolution_check_interval` and, for each watched repo:
+
+1. **Reviews open PRs** -- pulls each PR's diff, asks the `code-reviewer` sub-agent for an assessment, posts a single `COMMENT` review back on GitHub. Already-reviewed `head_sha` values are skipped.
+2. **Triages open issues** -- labels first, single-prompt LLM fallback if labels are absent. Result is one of `bug-fix | feature | needs-info | skip`.
+3. **Implements actionable issues** -- dispatches `bug-fix` and `feature` issues to the `implementer` sub-agent which runs the BMAD pipeline scaled by `bmad_strictness` and produces a **draft PR**.
+
+### Safety floor (always on)
+
+- Draft PRs only. The Hand never marks PRs ready-for-review and never merges.
+- Never pushes to `main` / `master` / protected branches.
+- Never `--force` / `--no-verify` / `--amend` against a remote branch.
+- Stops and queues to `devops_queue.json` if the change touches `Cargo.toml` workspace members, migration files, or anything under a `secrets` / credential glob.
+- Hard cap of 30 changed files per PR; larger changes get split.
+- Per-tick token budget capped at 70% so subsequent ticks have headroom.
+
+### Required GitHub token scopes
+
+For public-repo evolution, a fine-grained token with:
+- **Pull requests**: read & write (review posting, draft PR creation)
+- **Issues**: read & write (triage comments, issue cross-links)
+- **Contents**: read & write (branch push)
+- **Metadata**: read
+
+For private repos, add the `repo` scope and ensure the repo is listed in `evolution_repos`.
+
+### What it does NOT do
+
+It will never merge a PR, mark a draft as ready, or auto-approve. Human review is always required. See `SKILL.md` -> `What this Hand does NOT do` for the full list.
diff --git a/hands/devops/SKILL.md b/hands/devops/SKILL.md
index 47fd3ad..ddbf57d 100644
--- a/hands/devops/SKILL.md
+++ b/hands/devops/SKILL.md
@@ -868,3 +868,368 @@ rm /tmp/restore.dump
 **Communication template**: Subject `[INCIDENT] Service -- Status`. Body: what happened, impact, current status, ETA, next update time.
 
 **Post-recovery checklist**: health checks passing, data integrity verified, monitoring restored, backups resumed, incident report filed, post-mortem scheduled within 48h.
+
+---
+
+## Auto-Evolution Workflow
+
+The Phase 7 evolution loop (gated by `auto_evolve = true`) periodically scans the repos listed in `evolution_repos` and takes one of three actions per item:
+
+- **Open PR** → review via the `code-reviewer` sub-agent, post a `COMMENT` review back to GitHub.
+- **Open Issue** → triage, then dispatch to the `implementer` sub-agent if actionable.
+- **Anything we've already processed at the same head_sha / issue revision** → skip.
+
+The pipeline never marks PRs ready-for-review and never pushes to protected branches. All produced PRs are drafts.
+
+### When the loop fires
+
+`schedule_create` registers a recurring trigger on `evolution_check_interval`. Each tick runs at most one full repo pass; if there's more work than fits in the token budget, the remainder waits for the next tick. The state cursor lives in `memory` so progress survives daemon restarts.
+
+### Memory keys this workflow owns
+
+| Key pattern | Stored value |
+|---|---|
+| `devops_pr_review_<owner>_<repo>_<num>` | `{ head_sha, verdict, timestamp }` — last review per PR |
+| `devops_issue_state_<owner>_<repo>_<num>` | `{ classification, pr_url, timestamp }` — last triage per issue |
+| `devops_evolution_cursor_<owner>_<repo>` | `{ last_tick_at, last_seen_pr, last_seen_issue }` |
+| `devops_hand_prs_reviewed` | counter — dashboard metric |
+| `devops_hand_issues_processed` | counter — dashboard metric |
+| `devops_hand_draft_prs_opened` | counter — dashboard metric |
+
+---
+
+### Issue Triage Playbook
+
+The goal is to spend zero LLM tokens when labels are enough. LLM fallback is one prompt, never a multi-turn chain.
+
+**Step 1 -- Label-driven (deterministic)**
+
+```text
+Has any of {"bug", "defect", "regression", "broken"}        -> bug-fix
+Has any of {"feature", "enhancement", "rfc", "proposal"}    -> feature
+Has any of {"question", "discussion", "support"}            -> needs-info
+Has any of {"wontfix", "duplicate", "invalid", "stale"}     -> skip
+```
+
+**Step 2 -- LLM fallback (only when labels are absent)**
+
+Single classification prompt. Allowed outputs: exactly one of `bug-fix | feature | needs-info | skip`. Reject any longer answer and re-prompt once before defaulting to `needs-info`.
+
+```text
+You are classifying a GitHub issue for a DevOps Hand evolution pipeline.
+
+Output exactly ONE token from this set: bug-fix | feature | needs-info | skip
+
+Heuristics:
+- bug-fix:    user reports incorrect behavior, crash, regression, security issue,
+              or unexpected output of existing functionality.
+- feature:    user requests new capability, configuration option, or refactor
+              that ships user-visible value.
+- needs-info: report is ambiguous -- cannot reproduce, missing version/environment,
+              cannot tell if bug or feature.
+- skip:       issue is a question, off-topic, or already addressed.
+
+Issue title: {TITLE}
+Issue body:
+{BODY}
+Existing labels: {LABELS_OR_NONE}
+```
+
+**Step 3 -- Already-linked PR check**
+
+```bash
+# A PR already references this issue -> skip implementation, just review the PR
+curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
+  "https://api.github.com/repos/OWNER/REPO/issues/NUM/timeline?per_page=50" \
+  | jq '.[] | select(.event == "cross-referenced") | .source.issue.pull_request.url'
+```
+
+---
+
+### PR Review Automation
+
+**Pull PR metadata + diff + files in three calls**
+
+```bash
+PR_URL="https://api.github.com/repos/OWNER/REPO/pulls/NUM"
+
+curl -s -H "Authorization: Bearer $GITHUB_TOKEN" "$PR_URL" -o pr.json
+curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
+     -H "Accept: application/vnd.github.v3.diff" \
+     "$PR_URL" -o pr.diff
+curl -s -H "Authorization: Bearer $GITHUB_TOKEN" "$PR_URL/files?per_page=300" -o pr_files.json
+```
+
+**Short-circuit on bot / merge / huge diff**
+
+```bash
+HEAD_SHA=$(jq -r .head.sha pr.json)
+USER_TYPE=$(jq -r .user.type pr.json)               # "Bot" -> comment-only, no deep review
+CHANGED=$(jq '. | length' pr_files.json)
+[ "$CHANGED" -gt 200 ] && { echo "diff too large -- surface for human review"; exit 0; }
+```
+
+**Dispatch to reviewer sub-agent**
+
+Hand the reviewer the diff, file list, PR description, and (if present) the target branch's `AGENTS.md` / `CONTRIBUTING.md`. Reviewer returns structured JSON:
+
+```json
+{
+  "verdict": "approve | request_changes | block | comment_only",
+  "issues": [
+    {"severity": "critical|major|minor", "file": "...", "line": 42, "body": "..."}
+  ],
+  "positives": ["..."],
+  "summary": "..."
+}
+```
+
+**Post a single review (not N inline comments)**
+
+```bash
+EVENT="COMMENT"                       # default -- advisory pass
+[ "$VERDICT" = "request_changes" ] && EVENT="REQUEST_CHANGES"
+# Never auto-APPROVE.
+
+jq -n --arg event "$EVENT" --arg body "$SUMMARY_BODY" --arg sha "$HEAD_SHA" \
+  '{commit_id: $sha, event: $event, body: $body}' > review_payload.json
+
+curl -s -X POST -H "Authorization: Bearer $GITHUB_TOKEN" \
+  -H "Content-Type: application/json" \
+  -d @review_payload.json \
+  "$PR_URL/reviews"
+```
+
+Body format -- keep tight; reviewers read this:
+
+```markdown
+**DevOps Hand -- automated review**
+
+**Verdict**: {verdict}
+
+**Summary**: {one paragraph}
+
+**Findings** ({N}):
+- [critical] {file}:{line} -- {body}
+- [major]    {file}:{line} -- {body}
+- [minor]    {file}:{line} -- {body}
+
+**What looks good**:
+- {positive 1}
+
+_Generated by DevOps Hand reviewer (commit: `{sha}`)._
+```
+
+---
+
+### Bug Fix Playbook
+
+The implementer runs this when `classification = "bug-fix"`. Sequence is rigid -- failing test first, fix second, refactor third.
+
+**Step 1 -- Reproduce**
+
+In the supplied worktree:
+
+```bash
+cd "$REPO_CONTEXT"
+git checkout -b "auto/bug-fix-${ISSUE_NUMBER}-${SLUG}"
+# Try to reproduce from the issue's repro steps. If they're absent, infer them.
+# If you cannot reproduce in 3 attempts, stop and surface to devops_queue.json.
+```
+
+**Step 2 -- Failing test first**
+
+```bash
+# For Rust workspaces, drop the test in the closest existing module's tests/.
+cargo test -p <crate> --test <existing_test_file> -- --nocapture --test-threads=1
+# Expect FAILURE. Commit the failing test:
+git add tests/ && git commit -m "test: reproduce #${ISSUE_NUMBER} -- <one-line>"
+```
+
+**Step 3 -- Minimal fix**
+
+Edit only the files needed to make the test pass. Run the project's full lint+test gate:
+
+```bash
+cargo clippy --workspace --all-targets -- -D warnings
+cargo test -p <crate>                  # not --workspace -- target/ contention
+```
+
+**Step 4 -- Refactor (optional)**
+
+Only if step 3 left obvious smell (long fn, repeated literal, etc.). Skip if `bmad_strictness = "light"`.
+
+**Step 5 -- Commit and push (draft branch only)**
+
+```bash
+git add -A && git commit -m "fix: <subject> (#${ISSUE_NUMBER})"
+git push origin "auto/bug-fix-${ISSUE_NUMBER}-${SLUG}"
+```
+
+**Step 6 -- Open draft PR (see Draft PR Creation below)**
+
+---
+
+### BMAD Feature Pipeline
+
+Run when `classification = "feature"`. Phases scale with `bmad_strictness`:
+
+| Phase | `light` | `standard` | `strict` |
+|---|---|---|---|
+| B -- Brainstorm | skip | inline <=200 words | inline + queue gate |
+| A -- Architect | always | always | always + queue gate |
+| P -- PRD | skip | required | required + queue gate |
+| I -- Implement | always | always | always |
+
+Each phase output is appended to `BMAD.md` in the repo root of the feature branch. The file is committed along with the implementation so reviewers can see the reasoning.
+
+**BMAD.md template**
+
+````markdown
+# BMAD -- #{ISSUE_NUMBER}: {SHORT TITLE}
+
+## Brainstorm
+**Restated problem**: {one paragraph}
+
+**Approaches considered**:
+1. **{Name}** -- {sketch} -- files: {list} -- risk: {low/mid/high} -- diff: ~{N} LoC
+2. **{Name}** -- ...
+3. **{Name}** -- ...
+
+**Chosen**: #{N}. Rationale: {one paragraph}
+
+## Architecture
+**Crates / modules touched**: {list}
+
+**Types / signatures introduced or changed**:
+```rust
+// ...
+```
+
+**Cross-crate ripples**: {none / bounded list / escalated to queue: reason}
+
+## PRD
+**Acceptance criteria**
+- [ ] {behavior 1}
+- [ ] {behavior 2}
+
+**Test plan**
+- unit: `crates/{crate}/src/{path}.rs` -- {what it asserts}
+- integration: `crates/{crate}/tests/{name}.rs` -- {what it asserts}
+
+**Rollback plan**: {one paragraph}
+
+## Implementation Notes
+{anything a future reader needs to understand the diff but wouldn't see in code comments}
+````
+
+**Strict mode queue gate**
+
+Between each phase, write to `devops_queue.json`:
+
+```json
+{
+  "id": "bmad_${REPO}_${ISSUE}_${PHASE}",
+  "action": "bmad_phase_review",
+  "phase": "B|A|P|I",
+  "issue": "owner/repo#NUM",
+  "artifact_path": "BMAD.md",
+  "status": "pending",
+  "created": "ISO8601"
+}
+```
+
+Stop. Wait for status to flip to `approved` (set by the user out-of-band) before continuing to the next phase.
+
+---
+
+### Draft PR Creation
+
+The final action of both bug-fix and feature paths. **Always `draft: true`.**
+
+**Step 1 -- Push the branch (if not already)**
+
+```bash
+git push origin "auto/${CLASSIFICATION}-${ISSUE}-${SLUG}"
+```
+
+**Step 2 -- Compose the PR body**
+
+```markdown
+## Summary
+{one paragraph}
+
+## Linked Issue
+Closes #{ISSUE_NUMBER}
+
+## BMAD Pipeline Output
+{inline BMAD.md, or note that it's committed at `BMAD.md` in this PR}
+
+## Acceptance Checklist
+- [ ] {copied from PRD}
+
+## Risk
+{one paragraph -- what could go wrong, what's the blast radius}
+
+## Verification I ran locally
+- `cargo clippy --workspace --all-targets -- -D warnings` -- passed
+- `cargo test -p {crate}` -- passed
+- {anything project-specific from justfile / xtask}
+
+## Generated by
+DevOps Hand -> implementer  (strictness: {level})
+```
+
+**Step 3 -- Open the draft PR**
+
+```bash
+jq -n \
+  --arg title "${PR_TITLE}" \
+  --arg body  "${PR_BODY}" \
+  --arg head  "auto/${CLASSIFICATION}-${ISSUE}-${SLUG}" \
+  --arg base  "${BASE_BRANCH}" \
+  '{title: $title, body: $body, head: $head, base: $base, draft: true}' \
+  > pr_create.json
+
+curl -s -X POST -H "Authorization: Bearer $GITHUB_TOKEN" \
+  -H "Content-Type: application/json" \
+  -d @pr_create.json \
+  "https://api.github.com/repos/${OWNER}/${REPO}/pulls" \
+  -o pr_created.json
+
+PR_URL=$(jq -r .html_url pr_created.json)
+echo "Draft PR: $PR_URL"
+```
+
+**Step 4 -- Cross-link on the originating issue**
+
+```bash
+jq -n --arg body "Auto-implementation drafted: ${PR_URL}\n\n_Generated by DevOps Hand. Mark the PR ready-for-review when human triage agrees._" \
+  '{body: $body}' > issue_comment.json
+
+curl -s -X POST -H "Authorization: Bearer $GITHUB_TOKEN" \
+  -H "Content-Type: application/json" \
+  -d @issue_comment.json \
+  "https://api.github.com/repos/${OWNER}/${REPO}/issues/${ISSUE_NUMBER}/comments"
+```
+
+**Step 5 -- Bump counters**
+
+```text
+memory_store devops_hand_draft_prs_opened (current + 1)
+memory_store devops_issue_state_${OWNER}_${REPO}_${ISSUE_NUMBER} = {classification, pr_url, timestamp}
+event_publish devops_evolution_pr_opened {pr_url, issue}
+```
+
+---
+
+### What this Hand does NOT do
+
+To set expectations for users and reviewers:
+
+- It does NOT merge PRs. A human always merges.
+- It does NOT mark draft PRs as ready-for-review.
+- It does NOT push to `main` / `master` / any protected branch.
+- It does NOT operate on private repos unless the configured GITHUB_TOKEN has explicit `repo` scope and the repo is in `evolution_repos`.
+- It does NOT modify `Cargo.toml` workspace members, migration files, secrets, or any path matching the safety floor in Phase 7.3 -- those escalate to `devops_queue.json` instead.
+- It does NOT consume more than 70% of the per-turn token budget in a single tick. Long jobs are picked up by subsequent ticks.

From 944dc07c39e3a8ff3685d2c70e8722c21b3f4d2a Mon Sep 17 00:00:00 2001
From: Evan Hu <suzukaze.haduki@gmail.com>
Date: Thu, 14 May 2026 15:24:38 +0900
Subject: [PATCH 2/3] =?UTF-8?q?fix(devops):=20address=20PR=20review=20?=
 =?UTF-8?q?=E2=80=94=20close=20blocking=20+=20medium=20+=20style=20issues?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Blocking (5):
- add max_changed_files setting (was referenced in implementer prompt
  but never defined)
- drop metering_query reference (tool isn't in tools = [...] list);
  agent self-paces against budget instead
- fix \n\n literal in jq --arg for issue cross-link comment; compose
  body in shell with printf so newlines survive
- resolve BASE_BRANCH via /repos/owner/repo .default_branch instead
  of relying on an undefined variable
- complete reviewer-verdict → GitHub review-event mapping (4 cases,
  not just request_changes); block routes through REQUEST_CHANGES
  with a blocking-prefix in the body, approve downgrades to COMMENT

Medium (5):
- correct Phase 6 → Phase 7 in the auto-evolution settings comment
- remove schedule_create busy-loop confusion; Phase 7 fires per-turn
  while the Hand is already frequency = "continuous", with cadence
  enforced via devops_evolution_cursor memory key
- generalize the forbid-main-worktree wording — discover and honor
  whatever pre-commit / pre-push / commit-msg hooks the upstream
  repo configures (was librefang-specific)
- clarify the AI-attribution rule: ban LLM-vendor attribution
  (Claude, GPT, 🤖, etc.) but allow process attribution
  (DevOps Hand → implementer) for traceability
- add USER_TYPE = "Bot" short-circuit that was extracted but never
  applied (bots get a token-cheap skip, not a deep review)

Style (2):
- document the four event_publish event names (devops_evolution_*)
  in a new SKILL.md table alongside the memory-keys table
- justify implementer's max_history_messages = 100 with a comment
  (BMAD 4 phases × cargo build/test chains needs headroom)
---
 hands/devops/HAND.toml | 48 ++++++++++++++++++++------
 hands/devops/SKILL.md  | 76 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 106 insertions(+), 18 deletions(-)

diff --git a/hands/devops/HAND.toml b/hands/devops/HAND.toml
index 79056b0..d188310 100644
--- a/hands/devops/HAND.toml
+++ b/hands/devops/HAND.toml
@@ -296,7 +296,7 @@ setting_type = "toggle"
 default = "true"
 
 # ─── Auto-Evolution settings ─────────────────────────────────────────────────
-# These drive the Phase 6 evolution loop: periodic scan of configured
+# These drive the Phase 7 evolution loop: periodic scan of configured
 # GitHub repos, automated PR review via the reviewer sub-agent, and
 # BMAD-style bug fix / feature implementation via the implementer
 # sub-agent. All produce draft PRs and respect approval_mode.
@@ -361,6 +361,25 @@ label = "Standard (full 4-phase pipeline, draft PR at end)"
 value = "strict"
 label = "Strict (full pipeline + require human approval at each phase via queue)"
 
+[[settings]]
+key = "max_changed_files"
+label = "Max Changed Files Per Draft PR"
+description = "Implementer stops and queues for human triage if a single draft PR would touch more than this many files. Decompose larger work into multiple PRs."
+setting_type = "select"
+default = "30"
+
+[[settings.options]]
+value = "10"
+label = "10 files (very conservative)"
+
+[[settings.options]]
+value = "30"
+label = "30 files (default)"
+
+[[settings.options]]
+value = "100"
+label = "100 files (large refactors)"
+
 # ─── Agent configuration ─────────────────────────────────────────────────────
 
 [agents.main]
@@ -622,7 +641,9 @@ Stop the current monitoring/incident session when ANY of these conditions is met
 
 Gate: skip entirely unless `auto_evolve` is ENABLED **and** `evolution_repos` is non-empty.
 
-For every repo in `evolution_repos` (comma-separated `owner/repo` pairs), interleave PR review and issue triage on the `evolution_check_interval` cadence. Use `schedule_create` to register a recurring tick if it does not already exist; never busy-loop inside one turn.
+The Hand is already `frequency = "continuous"`, so this Phase fires once per turn while gates pass. On entry, read `memory_recall devops_evolution_cursor_<owner>_<repo>`. If less than `evolution_check_interval` has elapsed since the last tick for THAT repo, skip the repo for this turn — the next turn will check again. Never busy-loop or self-schedule inside a turn.
+
+For every repo in `evolution_repos` (comma-separated `owner/repo` pairs) that passes the cadence gate, interleave PR review and issue triage.
 
 ### 7.1 PR Review Pass
 
@@ -672,15 +693,15 @@ For every repo in `evolution_repos` (comma-separated `owner/repo` pairs), interl
 ### 7.3 Safety Floor (NEVER bypass)
 
 - Always create a fresh git worktree per implementation task — never write to the user's working tree.
-- Never commit to `main` / `master` directly.
-- Never use `--no-verify`, `--force`, or `git push -f` to a protected branch.
-- If the kernel `forbid-main-worktree` style hook is configured upstream, honor it; abort the task on hook failure rather than retrying.
+- Never commit to `main` / `master` / `trunk` directly.
+- Never use `--no-verify`, `--force`, or `git push -f` against any remote branch.
+- Honor whatever pre-commit / pre-push / commit-msg hooks the upstream repo configures (run via `git config core.hooksPath` discovery + executing each non-skipped hook). Abort the task on hook failure rather than retrying.
 - Stop and queue (`devops_queue.json`) if the implementer wants to touch:
-  - `Cargo.toml` workspace members
-  - migration files
-  - any path containing `secrets`, `.env`, `credentials`, `*.pem`, `*.key`
-  - more than `max_changed_files` (default 30) files in one PR
-- Token budget: each evolution tick must stop on its own at 70% of the configured per-turn budget so the next tick has headroom. Use `metering_query` if available.
+  - `Cargo.toml` workspace members (any `members = [...]` change)
+  - migration files (paths under `*/migrations/*`, `*/migrate/*`, or matching `*.sql`)
+  - any path containing `secrets`, `.env`, `credentials`, `*.pem`, `*.key`, `id_rsa`, `id_ed25519`
+  - more than the configured `max_changed_files` setting (default 30) files in one PR
+- Token budget: each evolution tick must stop on its own when the agent senses it is nearing the per-turn budget (target ~70% so the next tick has headroom). Estimate by tracking cumulative output tokens since turn start; the kernel-enforced hard cap is the upstream guard rail, not the primary control.
 
 ### 7.4 Failure Handling
 
@@ -804,6 +825,11 @@ model = "default"
 max_tokens = 16384
 temperature = 0.2
 max_iterations = 80
+# Raise the history cap above the kernel default. BMAD work fans out
+# across 4 phases (Brainstorm / Architect / PRD / Implement), each of
+# which spawns shell_exec chains (cargo build/test cycles, git ops,
+# file edits). 100 buys enough headroom that a single PR doesn't get
+# truncated mid-implementation while still bounding worst-case cost.
 max_history_messages = 100
 system_prompt = """You are Implementer, the BMAD execution sub-agent inside the DevOps Hand. You convert a single triaged GitHub issue into a draft pull request. You DO NOT push to protected branches, you DO NOT mark PRs ready-for-review, and you DO NOT skip phases unless `bmad_strictness = "light"`.
 
@@ -869,7 +895,7 @@ DevOps Hand → implementer sub-agent  (issue: #<num>, strictness: <level>)
 - NEVER push to `main`, `master`, `trunk`, or any branch protected by ruleset.
 - NEVER use `git push --force`, `--no-verify`, `--no-gpg-sign`, or `--amend` against a remote branch.
 - NEVER commit anything matching: `.env*`, `*.pem`, `*.p12`, `id_rsa`, `id_ed25519`, `credentials*`, `secrets*`, `vault_*.key`.
-- NEVER include AI attribution (`Co-Authored-By: Claude`, `🤖 Generated with…`) in commit messages — the upstream repo's commit-msg hook rejects them and so do we.
+- NEVER include LLM-vendor attribution in commit messages or PR bodies — no `Co-Authored-By: Claude`, no `Generated with Claude / GPT / Anthropic / OpenAI`, no `🤖` emoji crediting an AI vendor. "Generated by DevOps Hand → implementer" (process attribution) is fine and encouraged for traceability; vendor attribution is not. Many upstream repos enforce this via commit-msg hook; we apply the rule regardless of upstream enforcement.
 - Touch limit: stop at `max_changed_files` (default 30 files). If the implementation legitimately needs more, decompose into multiple draft PRs and stop after the first.
 - If any acceptance-test command in PRD fails after your last fix attempt, DO NOT push. Write the partial state + failure details to `devops_queue.json` and surface for human triage.
 - Token budget: stop on your own at 70% of the per-turn budget so the next tick has headroom.
diff --git a/hands/devops/SKILL.md b/hands/devops/SKILL.md
index ddbf57d..e04d056 100644
--- a/hands/devops/SKILL.md
+++ b/hands/devops/SKILL.md
@@ -896,6 +896,17 @@ The pipeline never marks PRs ready-for-review and never pushes to protected bran
 | `devops_hand_issues_processed` | counter — dashboard metric |
 | `devops_hand_draft_prs_opened` | counter — dashboard metric |
 
+### Events this workflow publishes
+
+| Event name | Payload | When |
+|---|---|---|
+| `devops_evolution_pr_reviewed` | `{ pr_url, verdict, head_sha }` | After a PR review is posted to GitHub |
+| `devops_evolution_pr_opened` | `{ pr_url, issue_url, classification }` | After a draft PR is created from a triaged issue |
+| `devops_evolution_blocked` | `{ reason, pr_or_issue_url, retry_after }` | When a tick is aborted by safety floor, API failure, or hook rejection |
+| `devops_evolution_skipped` | `{ pr_or_issue_url, reason }` | When an item is skipped by cadence gate, label filter, or already-processed check |
+
+These are advisory; subscribers (dashboard, audit log, downstream Hands) are optional.
+
 ---
 
 ### Issue Triage Playbook
@@ -964,9 +975,25 @@ curl -s -H "Authorization: Bearer $GITHUB_TOKEN" "$PR_URL/files?per_page=300" -o
 
 ```bash
 HEAD_SHA=$(jq -r .head.sha pr.json)
-USER_TYPE=$(jq -r .user.type pr.json)               # "Bot" -> comment-only, no deep review
+USER_TYPE=$(jq -r .user.type pr.json)
 CHANGED=$(jq '. | length' pr_files.json)
-[ "$CHANGED" -gt 200 ] && { echo "diff too large -- surface for human review"; exit 0; }
+
+# Bot-authored PRs (dependabot, renovate, etc.) get a token-cheap pass:
+# record but skip deep reviewer dispatch.
+if [ "$USER_TYPE" = "Bot" ]; then
+    echo "bot PR -- recording without deep review"
+    # memory_store devops_pr_review_<owner>_<repo>_<num> = {head_sha, verdict: "skipped_bot", ts}
+    # event_publish devops_evolution_skipped {pr_or_issue_url, reason: "bot author"}
+    exit 0
+fi
+
+# Huge diffs: defer to human review rather than spending tokens on a
+# review the reviewer agent can't usefully ground.
+if [ "$CHANGED" -gt 200 ]; then
+    echo "diff too large ($CHANGED files) -- surfacing for human review"
+    # event_publish devops_evolution_skipped {pr_or_issue_url, reason: "diff>200 files"}
+    exit 0
+fi
 ```
 
 **Dispatch to reviewer sub-agent**
@@ -987,9 +1014,29 @@ Hand the reviewer the diff, file list, PR description, and (if present) the targ
 **Post a single review (not N inline comments)**
 
 ```bash
-EVENT="COMMENT"                       # default -- advisory pass
-[ "$VERDICT" = "request_changes" ] && EVENT="REQUEST_CHANGES"
-# Never auto-APPROVE.
+# Map verdict -> GitHub review event. Never auto-APPROVE.
+BODY_PREFIX=""
+case "$VERDICT" in
+  approve)
+    # Downgrade silent "approve" to advisory COMMENT — a human still merges.
+    EVENT="COMMENT"
+    ;;
+  request_changes)
+    EVENT="REQUEST_CHANGES"
+    ;;
+  block)
+    # Block is more severe than request_changes. We still post REQUEST_CHANGES
+    # (the strongest event we use), but flag the body so a human escalates.
+    EVENT="REQUEST_CHANGES"
+    BODY_PREFIX="**Reviewer flagged this PR as BLOCKING — please escalate to a maintainer before merge.**
+
+"
+    ;;
+  comment_only|*)
+    EVENT="COMMENT"
+    ;;
+esac
+SUMMARY_BODY="${BODY_PREFIX}${SUMMARY_BODY}"
 
 jq -n --arg event "$EVENT" --arg body "$SUMMARY_BODY" --arg sha "$HEAD_SHA" \
   '{commit_id: $sha, event: $event, body: $body}' > review_payload.json
@@ -1182,6 +1229,17 @@ DevOps Hand -> implementer  (strictness: {level})
 
 **Step 3 -- Open the draft PR**
 
+First resolve the target branch. Don't assume `main` -- query the repo:
+
+```bash
+BASE_BRANCH=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \
+  "https://api.github.com/repos/${OWNER}/${REPO}" | jq -r .default_branch)
+# fallback in the unlikely case the API doesn't return one
+[ -z "$BASE_BRANCH" ] || [ "$BASE_BRANCH" = "null" ] && BASE_BRANCH="main"
+```
+
+Then create the draft PR:
+
 ```bash
 jq -n \
   --arg title "${PR_TITLE}" \
@@ -1203,9 +1261,13 @@ echo "Draft PR: $PR_URL"
 
 **Step 4 -- Cross-link on the originating issue**
 
+Build the body in shell first so newlines survive (`jq --arg` is a literal-string
+parameter, it does NOT interpret backslash escapes):
+
 ```bash
-jq -n --arg body "Auto-implementation drafted: ${PR_URL}\n\n_Generated by DevOps Hand. Mark the PR ready-for-review when human triage agrees._" \
-  '{body: $body}' > issue_comment.json
+ISSUE_COMMENT=$(printf 'Auto-implementation drafted: %s\n\n_Generated by DevOps Hand. Mark the PR ready-for-review when human triage agrees._' "$PR_URL")
+
+jq -n --arg body "$ISSUE_COMMENT" '{body: $body}' > issue_comment.json
 
 curl -s -X POST -H "Authorization: Bearer $GITHUB_TOKEN" \
   -H "Content-Type: application/json" \

From d0aa86f9d322119c6a69bf523cac65c3a956decd Mon Sep 17 00:00:00 2001
From: Evan Hu <suzukaze.haduki@gmail.com>
Date: Thu, 14 May 2026 15:38:08 +0900
Subject: [PATCH 3/3] docs(devops): tighten evolution snippets (D1-D4
 second-review nits)

D1 -- show SUMMARY_BODY (and VERDICT) assignment in PR review snippet:
add explicit jq -r .summary / .verdict extraction from reviewer_output.json
so the agent reading SKILL.md doesn't have to infer where these come from.

D2 -- reword strict-mode wait semantics in both HAND.toml and SKILL.md:
'Stop. Wait...' was misleading because the agent loop has no in-turn
pause primitive. Now spells out: end the current turn after queueing,
let the continuous tick re-read the queue, resume on approved / skip
on pending / abandon on rejected. Explicitly forbids busy-wait and
sleep loops.

D3 -- restructure bot / huge-diff short-circuit so agent-tool calls are
expressed as numbered agent steps, not as '# memory_store ...' comments
inside a bash block. The bash block now only extracts cheap signals;
the decision and the tool calls are clearly agent-level.

D4 -- remove the misleading 'exit 0' from the short-circuit bash and
add a one-liner noting that exit 0 inside shell_exec only ends one
shell session, not the Phase 7 pass; the agent must choose to move on.
---
 hands/devops/HAND.toml |  2 +-
 hands/devops/SKILL.md  | 41 +++++++++++++++++++++++------------------
 2 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/hands/devops/HAND.toml b/hands/devops/HAND.toml
index d188310..dd76d96 100644
--- a/hands/devops/HAND.toml
+++ b/hands/devops/HAND.toml
@@ -902,7 +902,7 @@ DevOps Hand → implementer sub-agent  (issue: #<num>, strictness: <level>)
 
 ## On `bmad_strictness = "strict"`
 
-Between every phase, write the produced artifact to `devops_queue.json` with `phase: "bmad-<letter>-pending"` and stop. The user manually flips the queue entry to `approved` before you proceed. This is how a human keeps a leash on autonomous code changes."""
+Between every phase, write the produced artifact to `devops_queue.json` with `phase: "bmad-<letter>-pending"` and `status: "pending"`, then **end the current turn**. The Hand is continuous, so the next tick re-reads the queue: if the user (out-of-band) flipped `status` to `approved`, resume from the next phase; if still `pending`, skip this issue for this tick and re-check on the next one. Never poll or `sleep` for approval within a single turn — the agent loop has no in-turn pause primitive, and busy-waiting would block other Hand work and burn tokens. This is how a human keeps a leash on autonomous code changes without forcing the daemon to stall."""
 
 [dashboard]
 [[dashboard.metrics]]
diff --git a/hands/devops/SKILL.md b/hands/devops/SKILL.md
index e04d056..637070b 100644
--- a/hands/devops/SKILL.md
+++ b/hands/devops/SKILL.md
@@ -973,32 +973,27 @@ curl -s -H "Authorization: Bearer $GITHUB_TOKEN" "$PR_URL/files?per_page=300" -o
 
 **Short-circuit on bot / merge / huge diff**
 
+Extract the cheap signals from already-fetched PR metadata:
+
 ```bash
 HEAD_SHA=$(jq -r .head.sha pr.json)
 USER_TYPE=$(jq -r .user.type pr.json)
 CHANGED=$(jq '. | length' pr_files.json)
+```
 
-# Bot-authored PRs (dependabot, renovate, etc.) get a token-cheap pass:
-# record but skip deep reviewer dispatch.
-if [ "$USER_TYPE" = "Bot" ]; then
-    echo "bot PR -- recording without deep review"
-    # memory_store devops_pr_review_<owner>_<repo>_<num> = {head_sha, verdict: "skipped_bot", ts}
-    # event_publish devops_evolution_skipped {pr_or_issue_url, reason: "bot author"}
-    exit 0
-fi
+Decision rules (the **agent** applies these in its loop, not the shell — `exit 0` would only end one `shell_exec`, not abort the Phase 7 pass):
 
-# Huge diffs: defer to human review rather than spending tokens on a
-# review the reviewer agent can't usefully ground.
-if [ "$CHANGED" -gt 200 ]; then
-    echo "diff too large ($CHANGED files) -- surfacing for human review"
-    # event_publish devops_evolution_skipped {pr_or_issue_url, reason: "diff>200 files"}
-    exit 0
-fi
-```
+- **`USER_TYPE == "Bot"`** (dependabot, renovate, etc.): skip deep review for this PR. The agent then:
+  1. calls `memory_store devops_pr_review_<owner>_<repo>_<num>` with `{head_sha, verdict: "skipped_bot", timestamp}`
+  2. calls `event_publish devops_evolution_skipped` with `{pr_or_issue_url, reason: "bot author"}`
+  3. moves on to the next PR — does NOT dispatch the reviewer sub-agent.
+- **`CHANGED > 200`**: diff too large for the reviewer to ground usefully. The agent then:
+  1. calls `event_publish devops_evolution_skipped` with `{pr_or_issue_url, reason: "diff>200 files"}`
+  2. moves on to the next PR.
 
 **Dispatch to reviewer sub-agent**
 
-Hand the reviewer the diff, file list, PR description, and (if present) the target branch's `AGENTS.md` / `CONTRIBUTING.md`. Reviewer returns structured JSON:
+Hand the reviewer the diff, file list, PR description, and (if present) the target branch's `AGENTS.md` / `CONTRIBUTING.md`. Capture the reviewer's structured JSON into `reviewer_output.json` (whatever your routing primitive is — `subagent_invoke`, A2A call, or local fork — write the result to that file so the next shell snippet can read it):
 
 ```json
 {
@@ -1014,6 +1009,10 @@ Hand the reviewer the diff, file list, PR description, and (if present) the targ
 **Post a single review (not N inline comments)**
 
 ```bash
+# Pull verdict + summary out of the reviewer's structured output.
+VERDICT=$(jq -r .verdict reviewer_output.json)
+SUMMARY_BODY=$(jq -r .summary reviewer_output.json)
+
 # Map verdict -> GitHub review event. Never auto-APPROVE.
 BODY_PREFIX=""
 case "$VERDICT" in
@@ -1186,7 +1185,13 @@ Between each phase, write to `devops_queue.json`:
 }
 ```
 
-Stop. Wait for status to flip to `approved` (set by the user out-of-band) before continuing to the next phase.
+Then **end the current turn**. The Hand is `frequency = "continuous"`, so the next tick will re-read `devops_queue.json`:
+
+- If the user (out-of-band) has flipped `status` to `approved`, resume from the next phase.
+- If still `pending`, skip this issue for this tick and re-check on the following one.
+- If flipped to `rejected`, abandon the issue, comment on it with the rejection rationale (if provided), and stop.
+
+Within a single turn, never poll or `sleep` waiting for approval — the agent loop has no in-turn pause primitive, and busy-waiting would block other Hand work and burn tokens. End the turn and let the kernel re-invoke you.
 
 ---