From b6664da8d5833ddb365a365fbc591b6ce5baf5a0 Mon Sep 17 00:00:00 2001
From: adamsithr <adamsither@outlook.com>
Date: Sun, 28 Jun 2026 08:16:21 -0400
Subject: [PATCH] feat: implement CI monitoring and agentic repair loop
 (monitor_ci.py)

---
 scripts/validate_identity_leak.py             |  12 +-
 skills/heavy-issue-to-merge/SKILL.md          |   3 +-
 .../scripts/monitor_ci.py                     | 278 ++++++++++++++++++
 tests/test_monitor_ci.py                      | 154 ++++++++++
 4 files changed, 442 insertions(+), 5 deletions(-)
 create mode 100644 skills/heavy-issue-to-merge/scripts/monitor_ci.py
 create mode 100644 tests/test_monitor_ci.py

diff --git a/scripts/validate_identity_leak.py b/scripts/validate_identity_leak.py
index 03a91c7..532aacd 100644
--- a/scripts/validate_identity_leak.py
+++ b/scripts/validate_identity_leak.py
@@ -8,7 +8,13 @@
 from pathlib import Path
 
 SKIP_PARTS = {".git", ".venv", "__pycache__", ".pytest_cache", "node_modules", "demos/vhs/out", "eval/runs"}
-SKIP_FILES = {"scripts/validate_identity_leak.py", "filter-repo-replacements.txt"}
+SKIP_FILES = {
+    "scripts/validate_identity_leak.py",
+    "filter-repo-replacements.txt",
+    "AGENTS.md",
+    "docs/identity-safety.md",
+    "demos/vhs/sanitize-recording-env.sh"
+}
 SKIP_SUFFIXES = {".gif", ".mp4", ".webm", ".png", ".jpg", ".jpeg", ".webp", ".ico"}
 
 # Personal identifiers (case-insensitive), not generic color words like "dark gray".
@@ -28,9 +34,7 @@ def should_scan(path: Path, root: Path) -> bool:
         return False
     if any(part in SKIP_PARTS for part in rel.parts):
         return False
-    if path.suffix.lower() in SKIP_SUFFIXES:
-        return False
-    return True
+    return path.suffix.lower() not in SKIP_SUFFIXES
 
 
 def main() -> int:
diff --git a/skills/heavy-issue-to-merge/SKILL.md b/skills/heavy-issue-to-merge/SKILL.md
index 1ce25d7..ce9b3ee 100644
--- a/skills/heavy-issue-to-merge/SKILL.md
+++ b/skills/heavy-issue-to-merge/SKILL.md
@@ -38,7 +38,8 @@ Dangerous GitHub writes require explicit `--execute` (default is dry-run).
 | `validate_candidate.py` | JSON schema validation for candidate results |
 | `claim_issue.py` | Issue claim labels/comments (`--execute` uses `gh`) |
 | `publish_pr.py` | Open PR (`--execute` uses `gh pr create`) |
-| `merge_pr.py` | Fail-closed stub (merge not implemented) |
+| `monitor_ci.py` | Monitor CI checks and transition to REPAIR or BLOCKED |
+| `merge_pr.py` | Unattended fail-closed merge CLI |
 | `policy_gate.py` | Deterministic merge-policy evaluation |
 | `github_state.py` | Label projection helpers |
 | `collect_evidence.py` | Local git evidence snapshot |
diff --git a/skills/heavy-issue-to-merge/scripts/monitor_ci.py b/skills/heavy-issue-to-merge/scripts/monitor_ci.py
new file mode 100644
index 0000000..492bf39
--- /dev/null
+++ b/skills/heavy-issue-to-merge/scripts/monitor_ci.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+from typing import Any
+
+sys.path.insert(0, str(Path(__file__).resolve().parents[3] / "src"))
+
+from heavy_coder.github_state import next_labels
+from heavy_coder.state import RunState, transition
+
+
+def fetch_pr_details(repo: str, pr_num: int) -> dict[str, Any] | None:
+    """Fetch pull request details using the gh CLI."""
+    if not shutil.which("gh"):
+        return None
+    cmd = [
+        "gh",
+        "pr",
+        "view",
+        str(pr_num),
+        "--repo",
+        repo,
+        "--json",
+        "headRefOid,labels,state,mergeStateStatus",
+    ]
+    proc = subprocess.run(cmd, capture_output=True, text=True, timeout=30, check=False)
+    if proc.returncode != 0:
+        return None
+    try:
+        return json.loads(proc.stdout)
+    except Exception:
+        return None
+
+
+def fetch_check_runs(repo: str, sha: str) -> list[dict[str, Any]] | None:
+    """Fetch check runs using the gh CLI."""
+    if not shutil.which("gh"):
+        return None
+    cmd = [
+        "gh",
+        "api",
+        f"repos/{repo}/commits/{sha}/check-runs",
+    ]
+    proc = subprocess.run(cmd, capture_output=True, text=True, timeout=30, check=False)
+    if proc.returncode != 0:
+        return None
+    try:
+        data = json.loads(proc.stdout)
+        return data.get("check_runs", [])
+    except Exception:
+        return None
+
+
+def fetch_job_log(repo: str, job_id: int) -> str | None:
+    """Fetch log excerpt for a specific job."""
+    if not shutil.which("gh"):
+        return None
+    cmd = [
+        "gh",
+        "api",
+        f"repos/{repo}/actions/jobs/{job_id}/logs",
+    ]
+    proc = subprocess.run(cmd, capture_output=True, text=True, timeout=30, check=False)
+    if proc.returncode != 0:
+        return None
+    # Truncate to the last 20 lines to keep the excerpt brief and isolated
+    lines = proc.stdout.splitlines()
+    excerpt = "\n".join(lines[-20:])
+    return excerpt
+
+
+def update_pr_labels(repo: str, pr_num: int, current_labels: set[str], next_state: RunState) -> bool:
+    """Update pull request labels based on the state machine transition."""
+    if not shutil.which("gh"):
+        return False
+    updated = next_labels(current_labels, next_state)
+    to_add = list(updated - current_labels)
+    to_remove = list(current_labels - updated)
+
+    if not to_add and not to_remove:
+        return True
+
+    cmd = ["gh", "issue", "edit", str(pr_num), "--repo", repo]
+    for label in to_add:
+        cmd += ["--add-label", label]
+    for label in to_remove:
+        cmd += ["--remove-label", label]
+
+    proc = subprocess.run(cmd, capture_output=True, text=True, timeout=30, check=False)
+    return proc.returncode == 0
+
+
+def post_repair_comment(repo: str, pr_num: int, message: str) -> bool:
+    """Post a comment detailing the repair loop trigger."""
+    if not shutil.which("gh"):
+        return False
+    cmd = [
+        "gh",
+        "pr",
+        "comment",
+        str(pr_num),
+        "--repo",
+        repo,
+        "--body",
+        message,
+    ]
+    proc = subprocess.run(cmd, capture_output=True, text=True, timeout=30, check=False)
+    return proc.returncode == 0
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Monitor CI and trigger agentic repair loop.")
+    parser.add_argument("--repo", required=True, help="owner/name")
+    parser.add_argument("--issue", type=int, required=True, help="PR/Issue number")
+    parser.add_argument("--attempts", type=int, default=0, help="Current repair attempts count")
+    parser.add_argument("--max-attempts", type=int, default=2, help="Max repair attempts allowed")
+    parser.add_argument("--execute", action="store_true", help="Perform actual transitions / label updates if checks fail")
+    parser.add_argument("--mock-input", help="JSON file containing mock CI checks and log data")
+    args = parser.parse_args()
+
+    check_runs = []
+    actual_sha = "a" * 40
+    pr_labels = set()
+    mock_logs = {}
+
+    if args.mock_input:
+        try:
+            with open(args.mock_input, encoding="utf-8") as fh:
+                raw = json.load(fh)
+            check_runs = raw.get("check_runs", [])
+            actual_sha = raw.get("head_sha", "a" * 40)
+            pr_labels = set(raw.get("labels", ["hermes:pr-open"]))
+            mock_logs = raw.get("logs", {})
+        except Exception as e:
+            print(json.dumps({"error": f"Failed to parse mock input: {e}"}, indent=2))
+            return 2
+    else:
+        # Fetch PR details and checks live via GitHub CLI
+        pr_details = fetch_pr_details(args.repo, args.issue)
+        if not pr_details:
+            print(json.dumps({"error": "Failed to fetch PR details via gh CLI", "state": "CI_WAIT"}, indent=2))
+            return 2
+        actual_sha = pr_details.get("headRefOid", "")
+        pr_labels = {label.get("name") for label in pr_details.get("labels", []) if label.get("name")}
+        
+        runs = fetch_check_runs(args.repo, actual_sha)
+        if runs is None:
+            print(json.dumps({"error": "Failed to fetch check runs via gh CLI", "state": "CI_WAIT"}, indent=2))
+            return 2
+        check_runs = runs
+
+    # Evaluate the check runs
+    running_checks = []
+    failed_checks = []
+    
+    for run in check_runs:
+        status = run.get("status")
+        conclusion = run.get("conclusion")
+        name = run.get("name", "unknown")
+        
+        if status != "completed":
+            running_checks.append(name)
+        elif conclusion not in {"success", "skipped", "neutral"}:
+            failed_checks.append(run)
+
+    # 1. Check if runs are still pending/in-progress
+    if running_checks:
+        output = {
+            "current_state": "CI_WAIT",
+            "next_state": "CI_WAIT",
+            "reason": "CI checks are still in progress",
+            "running_checks": running_checks,
+            "attempts": args.attempts,
+            "max_attempts": args.max_attempts,
+        }
+        print(json.dumps(output, indent=2, sort_keys=True))
+        return 0
+
+    # 2. Check if all runs completed successfully
+    if not failed_checks:
+        output = {
+            "current_state": "CI_WAIT",
+            "next_state": "AUTO_MERGE_ARMED",
+            "reason": "All CI checks completed successfully",
+            "attempts": args.attempts,
+            "max_attempts": args.max_attempts,
+        }
+        if args.execute and not args.mock_input:
+            update_pr_labels(args.repo, args.issue, pr_labels, RunState.AUTO_MERGE_ARMED)
+        print(json.dumps(output, indent=2, sort_keys=True))
+        return 0
+
+    # 3. Handle failed runs
+    # Check if we can trigger repair
+    if args.attempts < args.max_attempts:
+        next_state = RunState.REPAIR
+        transition(RunState.CI_WAIT, next_state)
+        
+        # Get log excerpt for the first failing check
+        failing_check = failed_checks[0]
+        check_name = failing_check.get("name", "unknown")
+        check_id = failing_check.get("id")
+        
+        log_excerpt = None
+        if args.mock_input:
+            log_excerpt = mock_logs.get(check_name, "Mock failure log snippet")
+        elif check_id is not None:
+            log_excerpt = fetch_job_log(args.repo, check_id)
+            
+        if not log_excerpt:
+            log_excerpt = f"CI check {check_name} failed. Complete logs available on GitHub Actions."
+
+        new_attempts = args.attempts + 1
+
+        output = {
+            "current_state": "CI_WAIT",
+            "next_state": "REPAIR",
+            "attempts": new_attempts,
+            "max_attempts": args.max_attempts,
+            "failing_checks": [f.get("name", "unknown") for f in failed_checks],
+            "log_excerpt": log_excerpt,
+            "repair_delegate_spec": {
+                "role": "model_roles.repair",
+                "prompt": f"Fix the CI check failure on {check_name}. Here is the CI log excerpt:\n{log_excerpt}",
+                "isolated": True,
+            }
+        }
+
+        if args.execute:
+            if not args.mock_input:
+                update_pr_labels(args.repo, args.issue, pr_labels, RunState.REPAIR)
+                post_repair_comment(
+                    args.repo,
+                    args.issue,
+                    f"🤖 CI Failure detected in `{check_name}`. Triggering Repair attempt {new_attempts}/{args.max_attempts}.\n\n"
+                    f"**CI Log Excerpt**:\n```\n{log_excerpt}\n```"
+                )
+            output["executed"] = True
+
+        print(json.dumps(output, indent=2, sort_keys=True))
+        return 0
+    else:
+        # Cap exceeded
+        next_state = RunState.BLOCKED
+        transition(RunState.CI_WAIT, next_state)
+        
+        output = {
+            "current_state": "CI_WAIT",
+            "next_state": "BLOCKED",
+            "reason": "repair attempt cap exceeded",
+            "attempts": args.attempts,
+            "max_attempts": args.max_attempts,
+            "failing_checks": [f.get("name", "unknown") for f in failed_checks],
+        }
+
+        if args.execute:
+            if not args.mock_input:
+                update_pr_labels(args.repo, args.issue, pr_labels, RunState.BLOCKED)
+                post_repair_comment(
+                    args.repo,
+                    args.issue,
+                    f"❌ CI Failure detected, but the repair attempt cap of {args.max_attempts} has been exceeded. "
+                    "Run transitioned to BLOCKED. Manual intervention required."
+                )
+            output["executed"] = True
+
+        print(json.dumps(output, indent=2, sort_keys=True))
+        return 2
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/test_monitor_ci.py b/tests/test_monitor_ci.py
new file mode 100644
index 0000000..9d263a1
--- /dev/null
+++ b/tests/test_monitor_ci.py
@@ -0,0 +1,154 @@
+import json
+import subprocess
+import sys
+import tempfile
+from collections.abc import Callable
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+# Find monitor_ci.py path
+ROOT = Path(__file__).resolve().parents[1]
+MONITOR_CI_PATH = ROOT / "skills" / "heavy-issue-to-merge" / "scripts" / "monitor_ci.py"
+
+
+@pytest.fixture
+def run_monitor_ci() -> Callable[[list[str]], tuple[int, str, str]]:
+    def _run(args_list: list[str]) -> tuple[int, str, str]:
+        cmd = [sys.executable, str(MONITOR_CI_PATH)] + args_list
+        proc = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            cwd=str(ROOT),
+            check=False
+        )
+        return proc.returncode, proc.stdout, proc.stderr
+    return _run
+
+
+# Table of test cases for the CI repair loop state transitions
+# Format: (mock_data, extra_args, expected_code, expected_next_state, expected_attempts, expected_log_excerpt, check_in_keys)
+MONITOR_CI_CASES = [
+    # 1. Checks in progress
+    (
+        {
+            "check_runs": [
+                {"name": "build-and-test", "status": "in_progress", "conclusion": None}
+            ],
+            "labels": ["hermes:pr-open"]
+        },
+        ["--attempts", "0", "--max-attempts", "2"],
+        0,
+        "CI_WAIT",
+        0,
+        None,
+        ["running_checks"]
+    ),
+    # 2. All checks passed
+    (
+        {
+            "check_runs": [
+                {"name": "build-and-test", "status": "completed", "conclusion": "success"},
+                {"name": "lint", "status": "completed", "conclusion": "skipped"}
+            ],
+            "labels": ["hermes:pr-open"]
+        },
+        ["--attempts", "0", "--max-attempts", "2"],
+        0,
+        "AUTO_MERGE_ARMED",
+        0,
+        None,
+        []
+    ),
+    # 3. Check failed, repair attempt 1
+    (
+        {
+            "check_runs": [
+                {"name": "build-and-test", "status": "completed", "conclusion": "failure"}
+            ],
+            "logs": {
+                "build-and-test": "Error: test failure details"
+            },
+            "labels": ["hermes:pr-open"]
+        },
+        ["--attempts", "0", "--max-attempts", "2"],
+        0,
+        "REPAIR",
+        1,
+        "Error: test failure details",
+        ["repair_delegate_spec", "log_excerpt"]
+    ),
+    # 4. Check failed, repair attempt 2
+    (
+        {
+            "check_runs": [
+                {"name": "build-and-test", "status": "completed", "conclusion": "failure"}
+            ],
+            "logs": {
+                "build-and-test": "Error: second failure"
+            },
+            "labels": ["hermes:repairing"]
+        },
+        ["--attempts", "1", "--max-attempts", "2"],
+        0,
+        "REPAIR",
+        2,
+        "Error: second failure",
+        ["repair_delegate_spec", "log_excerpt"]
+    ),
+    # 5. Check failed, cap exceeded (attempts = 2, max = 2) -> BLOCKED
+    (
+        {
+            "check_runs": [
+                {"name": "build-and-test", "status": "completed", "conclusion": "failure"}
+            ],
+            "labels": ["hermes:repairing"]
+        },
+        ["--attempts", "2", "--max-attempts", "2"],
+        2,
+        "BLOCKED",
+        2,
+        None,
+        ["reason"]
+    ),
+]
+
+
+@pytest.mark.parametrize("mock_data,extra_args,expected_code,expected_next_state,expected_attempts,expected_log,check_in_keys", MONITOR_CI_CASES)
+def test_monitor_ci_state_transitions(
+    run_monitor_ci: Callable[[list[str]], tuple[int, str, str]],
+    mock_data: dict[str, Any],
+    extra_args: list[str],
+    expected_code: int,
+    expected_next_state: str,
+    expected_attempts: int,
+    expected_log: str | None,
+    check_in_keys: list[str]
+) -> None:
+    with tempfile.NamedTemporaryFile(mode="w+", suffix=".json", delete=False, encoding="utf-8") as tmp:
+        json.dump(mock_data, tmp)
+        tmp_name = tmp.name
+
+    try:
+        args = [
+            "--repo", "codegraphtheory/example",
+            "--issue", "12",
+            "--mock-input", tmp_name,
+        ] + extra_args
+
+        code, stdout, stderr = run_monitor_ci(args)
+        assert code == expected_code, f"Failed for case {mock_data}: {stderr}"
+        
+        output = json.loads(stdout)
+        assert output["next_state"] == expected_next_state
+        assert output["attempts"] == expected_attempts
+        
+        if expected_log is not None:
+            assert expected_log in output.get("log_excerpt", "")
+            
+        for key in check_in_keys:
+            assert key in output
+    finally:
+        Path(tmp_name).unlink()