From a8de203d6833ab192d5e21f623b5304f64216cc1 Mon Sep 17 00:00:00 2001 From: notque Date: Sun, 10 May 2026 16:54:39 +0000 Subject: [PATCH 1/4] feat(joy-check): raise pass threshold to 100% with zero tolerance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - SKILL.md: pass criteria changed from >= 60 to == 100 (both modes); --strict flag threshold updated to 100; subtle-pattern instruction reframed positively - instruction-rubric.md: pass criteria updated to score == 100 AND zero primary negative patterns - writing-rubric.md: pass criteria updated to score == 100 AND zero GRIEVANCE paragraphs Joy-check instruction-mode analysis of SKILL.md: 100 — zero primary negative patterns. All negative-word occurrences are inside code blocks (contextual exceptions). --- .../voice/skills/joy-check/SKILL.md | 205 ++++++++++++++++++ .../references/instruction-rubric.md | 136 ++++++++++++ .../joy-check/references/writing-rubric.md | 170 +++++++++++++++ 3 files changed, 511 insertions(+) create mode 100644 private-skills/voice/skills/joy-check/SKILL.md create mode 100644 private-skills/voice/skills/joy-check/references/instruction-rubric.md create mode 100644 private-skills/voice/skills/joy-check/references/writing-rubric.md diff --git a/private-skills/voice/skills/joy-check/SKILL.md b/private-skills/voice/skills/joy-check/SKILL.md new file mode 100644 index 00000000..ad74078f --- /dev/null +++ b/private-skills/voice/skills/joy-check/SKILL.md @@ -0,0 +1,205 @@ +--- +name: joy-check +description: "Validate content framing on joy-grievance spectrum." +user-invocable: false +argument-hint: "[--fix] [--strict] [--mode writing|instruction] " +command: /joy-check +allowed-tools: + - Read + - Write + - Edit + - Bash + - Grep + - Glob +routing: + triggers: + - joy check + - check framing + - tone check + - negative framing + - joy validation + - too negative + - reframe positively + - positive framing check + - instruction framing + pairs_with: + - voice-writer + - anti-ai-editor + - voice-validator + - skill-creator + complexity: Simple + category: content +--- + +# Joy Check + +Two modes: + +- **writing** — Joy-grievance spectrum for human-facing content (blog posts, emails, articles). Evaluates curiosity/generosity vs. grievance/accusation framing. +- **instruction** — Positive framing for LLM-facing content (agents, skills, pipelines). Evaluates "what to do" vs. "what to avoid" (ADR-127). + +Evaluates each paragraph/instruction independently, produces a score (0-100), suggests reframes without modifying content. Flags: `--fix` rewrites flagged items in place and re-verifies; `--strict` fails on any item below 100; `--mode writing|instruction` overrides auto-detection. + +Checks *framing*, not *topic* or *voice*. Voice fidelity → voice-validator. AI pattern detection → anti-ai-editor. + +## Reference Loading Table + +| Signal | Load These Files | Why | +|---|---|---| +| tasks related to this reference | `instruction-rubric.md` | Loads detailed guidance from `instruction-rubric.md`. | +| tasks related to this reference | `writing-rubric.md` | Loads detailed guidance from `writing-rubric.md`. | + +## Instructions + +### Phase 0: DETECT MODE + +Auto-detection (priority order): +1. Explicit `--mode` flag → use that +2. `agents/*.md` → **instruction** +3. `skills/*/SKILL.md` → **instruction** +4. `skills/workflow/references/*.md` → **instruction** +5. `CLAUDE.md` or `README.md` → **instruction** +6. Everything else → **writing** + +Load `references/{mode}-rubric.md` for scoring criteria and examples. + +**GATE**: Mode determined, rubric loaded. + +### Phase 1: PRE-FILTER + +Regex scanning as a fast gate before LLM semantic analysis. + +**Writing mode**: +```bash +python3 ~/.claude/scripts/scan-negative-framing.py [file] +``` + +**Instruction mode**: +```bash +grep -nE 'NEVER|do NOT|must NOT|FORBIDDEN' [file] +grep -nE "^-?\s*Don't|^-?\s*Avoid|^#+.*Anti-[Pp]attern|^#+.*Avoid" [file] +``` + +Report findings with reframe suggestions from the rubric. If `--fix`, apply reframes and re-run. + +**GATE**: Zero regex/grep hits. Resolve obvious patterns before Phase 2. + +### Phase 2: ANALYZE + +**Step 1: Read content** + +Read full file. Skip frontmatter and code blocks. +- **Writing**: Identify paragraphs (blank-line separated). Skip blockquotes. +- **Instruction**: Identify instructional statements — bullets, table cells, imperatives, headings. Skip examples, code blocks, quoted dialogue, file paths. + +**Step 2: Evaluate against rubric** + +Apply scoring dimensions from `references/{mode}-rubric.md`. + +For **writing**: Joy-grievance lens. Watch for subtle patterns in `references/writing-rubric.md` (defensive disclaimers, accumulative grievance, passive-aggressive factuality, reluctant generosity). + +For **instruction**: Positive-negative lens. Check against patterns table in `references/instruction-rubric.md`. Contextual exceptions: subordinate negatives attached to positive instructions are PASS, as are negatives in code examples, writing samples, and technical terms. + +**Step 3: Score each item** + +Apply the rubric's scoring scale. For items scoring CAUTION/GRIEVANCE (writing) or NEGATIVE-LEANING/PROHIBITION-HEAVY (instruction), draft specific reframe suggestions preserving substance. + +Flag subtle patterns with priority — they are the primary purpose of this LLM phase. The regex pre-filter catches explicit patterns; LLM analysis exists precisely to catch what regex misses. + +**GATE**: All items scored. Reframe suggestions drafted for flagged items. + +### Phase 3: REPORT + +**Step 1: Calculate overall score** + +Average all item scores. Pass criteria: +- **Writing**: Score == 100 AND zero GRIEVANCE paragraphs +- **Instruction**: Score == 100 AND zero primary negative patterns in instructional context + +**Step 2: Output** + +``` +JOY CHECK: [file] +Mode: [writing|instruction] +Score: [0-100] +Status: PASS / FAIL + +Items: + [writing mode] + P1 (L10-12): JOY [85] -- explorer framing, curiosity + P3 (L18-22): CAUTION [40] -- "confused" leans defensive + -> Reframe: Focus on what you learned from the confusion + + [instruction mode] + L33: NEGATIVE [20] -- "NEVER edit code directly" + -> Rewrite: "Route all code modifications to domain agents" + L45: PASS [90] -- "Create feature branches for all changes" + L78: PASS [85] -- "Credentials stay in .env files, never in code" (subordinate negative OK) + +Overall: [summary of framing arc] +``` + +**Step 3: Fix mode** + +If `--fix`: +1. Rewrite flagged items using drafted suggestions +2. Preserve substance — change only framing +3. Re-run Phase 2 on rewrites to verify +4. Maximum 3 iterations if fixes introduce new flags + +**GATE**: Report produced. If `--fix`, all rewrites applied and re-verified. + +--- + +### Integration + +**Writing pipeline**: +``` +CONTENT --> voice-validator --> scan-ai-patterns --> joy-check --mode writing --> anti-ai-editor +``` + +**Instruction pipeline**: +``` +SKILL.md --> joy-check --mode instruction --> fix flagged patterns --> re-verify +``` + +**Auto-invocation points**: +- `skill-creator`: after generating a new skill +- `agent-upgrade`: after modifying an agent +- `voice-writer`: during validation +- `doc-pipeline`: for toolkit documentation + +Invoke standalone via `/joy-check [file]` (auto-detects mode) or with explicit `--mode`. + +--- + +## Error Handling + +### Error: "File Not Found" +Verify path with `ls -la`. Use glob to search: `Glob **/*.md`. Confirm working directory. + +### Error: "Regex Scanner Fails or Not Found" +Verify `scripts/scan-negative-framing.py` exists. Requires Python 3.10+. If unavailable, skip to Phase 2 — the pre-filter is an optimization, not a requirement. + +### Error: "All Paragraphs Score GRIEVANCE" +Content is fundamentally grievance-framed. Report scores honestly. Suggest full rewrite with different framing premise, not paragraph-level fixes. + +### Error: "Fix Mode Fails After 3 Iterations" +Output best version with remaining concerns. Explain which rubric dimensions resist correction. The framing premise itself may need rethinking. + +--- + +## References + +### Rubric Files +- `references/writing-rubric.md` — Joy-grievance spectrum, subtle patterns, scoring, examples +- `references/instruction-rubric.md` — Positive framing rules, patterns, rewrite strategies, examples + +### Scripts +- `scan-negative-framing.py` — Regex pre-filter for grievance patterns (writing mode, Phase 1) + +### Complementary Skills +- `voice-validator` — Voice fidelity (different concern) +- `anti-ai-editor` — AI pattern detection (different concern) +- `voice-writer` — Invokes joy-check during validation +- `skill-creator` — Invokes joy-check in instruction mode diff --git a/private-skills/voice/skills/joy-check/references/instruction-rubric.md b/private-skills/voice/skills/joy-check/references/instruction-rubric.md new file mode 100644 index 00000000..9ecbcdb4 --- /dev/null +++ b/private-skills/voice/skills/joy-check/references/instruction-rubric.md @@ -0,0 +1,136 @@ +# Instruction Rubric — Positive Framing for LLM Instructions + +This rubric applies to agent, skill, and pipeline markdown files — instructions read by LLMs, not humans. The principle: state the desired action, not the forbidden one. An LLM needs to know what TO DO, not what to avoid. + +## Positive Instruction Framing Rubric + +Every instruction should tell the reader what action to take. Prohibitions define a boundary without specifying where to go; positive framing gives a clear action target. + +| Dimension | Positive (PASS) | Negative (FAIL) | +|-----------|----------------|----------------| +| **Action framing** | "Route all code modifications to domain agents" | "NEVER edit code directly" | +| **Specific instruction** | "Stage files by name: `git add specific-file.py`" | "do NOT use git add -A" | +| **Table headings** | "Preferred Patterns", "Hard Gate Patterns" | "Anti-Patterns", "FORBIDDEN Patterns" | +| **Safety boundaries** | "Create feature branches for all changes" | "Never commit to main" | +| **Error handling** | "exit 0 on errors to keep tools available" | "must NEVER block tools" | +| **Double negatives** | "Run validation before marking complete" | "Don't skip validation" | +| **Section organization** | "What to do" tables showing correct approach | "What NOT to do" tables showing prohibited approach | + +## Patterns to Flag + +### Primary patterns (always flag when used as instructions) + +| Pattern | Regex | Example | +|---------|-------|---------| +| NEVER (caps) | `\bNEVER\b` | "NEVER edit code directly" | +| do NOT / Do NOT | `\b[Dd]o NOT\b` | "Do NOT use git add -A" | +| must NOT | `\bmust NOT\b` | "must NOT block tools" | +| FORBIDDEN | `\bFORBIDDEN\b` | "FORBIDDEN Patterns" | +| Don't (instruction start) | `^-?\s*Don't\b` | "Don't mock the database" | +| Avoid (as heading/instruction) | `^\s*#{1,6}.*Avoid|^-?\s*Avoid\b` | "### Patterns to Avoid" | +| Anti-Pattern (in headings) | `^\s*#{1,6}.*[Aa]nti-[Pp]attern` | "### Common Anti-Patterns" | + +### Contextual exceptions (allow these) + +These are PASS even though they contain negative words: + +- **Subordinate negatives attached to positive instructions**: "Credentials stay in .env files, never in code" — the primary instruction is positive ("stay in .env files"), the "never" is a subordinate boundary clarification +- **Code examples showing bad patterns**: `// NEVER` in a code comment demonstrating what SQL injection looks like — this is illustrative, not instructional +- **Writing samples and user dialogue**: "Don't do this!" in an example of how users speak — this is quoted content +- **Technical terms**: "Copula Avoidance" is a proper term for an AI writing pattern — the word "Avoidance" is part of the term, not a prohibition +- **File path references**: `references/preferred-patterns.md` — this is a filename, not an instruction +- **Descriptive text about behavior**: "tests do not cover edge cases" — this describes a state, not an instruction + +## Rewrite Rules + +When flagging a negative pattern, suggest a specific positive rewrite: + +| Negative Pattern | Positive Rewrite Strategy | +|-----------------|--------------------------| +| Prohibition ("NEVER X") | State the action: "Do Y instead" | +| Warning ("do NOT use X") | Give the specific alternative: "Use Y: `example`" | +| Anti-pattern table | Invert to pattern table: show what to do, not what to avoid | +| Fear-based ("must NEVER block") | State the outcome: "exit 0 to keep available" | +| Double negative ("Don't skip") | Direct instruction: "Run before marking complete" | +| "Avoid" heading | Replace with "Preferred" or "Recommended" | +| "Anti-Pattern" heading | Replace with "Preferred Patterns" or "Patterns to Detect and Fix" | + +## Scoring + +| Score | Label | Meaning | +|-------|-------|---------| +| 80-100 | **POSITIVE** | Instructions frame through desired actions | +| 50-79 | **MIXED** | Some instructions are positive, some are prohibition-based | +| 30-49 | **NEGATIVE-LEANING** | Most instructions tell what to avoid rather than what to do | +| 0-29 | **PROHIBITION-HEAVY** | Instructions are primarily "don't do X" framing | + +**Pass criteria**: Score == 100 AND zero primary negative patterns in instructional context. + +## Principles + +1. **State the desired action, not the forbidden one** — The LLM needs to know what TO DO +2. **Preserve safety intent** — "Never commit to main" becomes "Create feature branches for all changes" — same protection, positive framing +3. **Replace anti-pattern tables with pattern tables** — Show "What to do instead", not "What NOT to do" +4. **Keep the WHY** — "because X" explanations stay unchanged; only the framing changes +5. **Subordinate negatives are fine** — "Credentials stay in .env files, never in code" is PASS because the positive instruction leads + +## Examples + +### Example 1: Router Instructions + +**NEGATIVE (FAIL):** +```markdown +**What the main thread NEVER does:** Read code files, edit files, run tests, +write docs, handle ANY Simple+ task directly. +``` + +**POSITIVE (PASS):** +```markdown +**The main thread delegates to agents:** code reading (Explore agent), file +edits (domain agents), test runs (agent with skill), documentation +(technical-documentation-engineer), all Simple+ tasks. +``` + +**Why the second works:** Tells the LLM exactly where each task type goes instead of listing what's forbidden. + +### Example 2: Safety Boundaries + +**NEGATIVE (FAIL):** +```markdown +Route to agents that create branches; never allow direct main/master commits, +because main branch commits affect everyone. +``` + +**POSITIVE (PASS):** +```markdown +Route to agents that create feature branches for all commits, because main +branch commits affect everyone. +``` + +**Why the second works:** Same safety boundary, but the instruction says what to create (feature branches) rather than what to prevent (main commits). + +### Example 3: Section Headings + +**NEGATIVE (FAIL):** +```markdown +## Anti-Patterns +### FORBIDDEN Patterns (HARD GATE) +| Pattern | Why FORBIDDEN | +``` + +**POSITIVE (PASS):** +```markdown +## Preferred Patterns +### Hard Gate Patterns +| Pattern | Why Blocked | +``` + +**Why the second works:** "Preferred Patterns" tells the reader what to aim for. "Hard Gate Patterns" preserves the enforcement without the fear framing. + +### Example 4: Subordinate Negative (PASS) + +```markdown +Credentials stay in .env files, never in code or logs. +``` + +This is PASS — the primary instruction is positive ("stay in .env files") and the "never" is a subordinate boundary that clarifies the positive instruction. The reader knows both what to do AND the boundary. diff --git a/private-skills/voice/skills/joy-check/references/writing-rubric.md b/private-skills/voice/skills/joy-check/references/writing-rubric.md new file mode 100644 index 00000000..458d54cc --- /dev/null +++ b/private-skills/voice/skills/joy-check/references/writing-rubric.md @@ -0,0 +1,170 @@ +# Writing Rubric: Joy-Grievance Spectrum + +This rubric applies to human-facing content: blog posts, emails, articles, documentation meant to be read by people. + +## Joy Framing Rubric + +Every paragraph should frame its subject through curiosity, wonder, generosity, or earned satisfaction. Content that builds a case for grievance alienates readers and undermines the author's credibility, even when the underlying experience is legitimate. + +| Dimension | Joy-Centered (PASS) | Grievance-Centered (FAIL) | +|-----------|-------------------|--------------------------| +| **Subject position** | Author as explorer, builder, learner | Author as victim, wronged party, unrecognized genius | +| **Other people** | Fellow travelers, interesting minds, people figuring things out | Opponents, thieves, people who should have done better | +| **Difficult experiences** | Interesting, surprising, made me think differently | Unfair, hurtful, someone should fix this | +| **Uncertainty** | Comfortable, curious, "none of us know" | Anxious, defensive, "I need to prove" | +| **Action framing** | "I decided to", "I realized", "I learned" | "I was forced to", "I had no choice", "they made me" | +| **Closing energy** | Forward-looking, building, sharing, exploring | Cautionary, warning, demanding, lamenting | + +## Subtle Patterns (LLM-only detection) + +These patterns are what the regex scanner cannot catch. They are the primary purpose of LLM analysis: + +- **Defensive disclaimers** ("I'm not accusing anyone", "This isn't about blame"): If the author has to disclaim, the framing is already grievance-adjacent. The disclaimer signals the content that follows is accusatory enough to need a shield. Flag the paragraph and recommend removing both the disclaimer and the accusatory content it shields. +- **Accumulative grievance**: Each paragraph is individually mild, but together they build a case for being wronged. A reader who finishes the piece feeling "that person was wronged" has been led through a prosecution. Flag the accumulation pattern and recommend interspersing observations with what the author learned, built, or found interesting. +- **Passive-aggressive factuality** ("The timeline shows X. The repo was created Y days later. I'll let you draw your own conclusions."): Presenting facts in prosecution order is framing, not neutrality. "I'll let you draw your own conclusions" deputizes the reader as jury. Flag and recommend including facts where relevant to the experience, not as evidence. +- **Reluctant generosity** ("I'm not saying they did anything wrong, BUT..."): The "but" negates the generosity. This is grievance wearing a generous mask. Flag and recommend being generous without qualification, or acknowledging the complexity directly. + +## Scoring + +| Score | Label | Meaning | +|-------|-------|---------| +| 80-100 | **JOY** | Frames through curiosity, generosity, or earned satisfaction | +| 50-79 | **NEUTRAL** | Factual, neither joy nor grievance | +| 30-49 | **CAUTION** | Leans toward grievance but recoverable with reframing | +| 0-29 | **GRIEVANCE** | Frames through accusation, victimhood, or bitterness | + +**Pass criteria**: Score == 100 AND zero GRIEVANCE paragraphs. + +## The Joy Principle + +**A difficult experience is not a negative topic.** Failure, confusion, being wrong, losing something. These are all worth writing about. The topic can involve surprise, frustration, even grief. + +**The framing is what matters.** The same experience can be told as: +- "The project failed because leadership wouldn't listen" (grievance) +- "The project failed and it changed how I understand what makes a team actually work" (joy/curiosity) + +Both describe the same events. The second frames it through the lens that defines joy-centered content: the specific satisfaction found in understanding something you didn't understand before. + +**Joy doesn't mean happiness.** It means engagement, curiosity, the energy of figuring things out. A joy-centered post about a frustrating debugging session isn't happy. It frames the frustration as the puzzle and the understanding as the reward. That's the lens. + +## Examples + +These examples show the same content reframed from grievance to joy. Each covers a different topic to demonstrate that the pattern applies broadly. The substance stays. Only the framing changes. + +### Example 1: A Project That Failed + +**GRIEVANCE (FAIL):** +``` +I spent six months building this and leadership killed it. They never +gave it a real chance. The team was understaffed, the deadline was +impossible, and when it didn't ship on time they blamed engineering. +``` + +**JOY (PASS):** +``` +I spent six months on a project that got cancelled. The team was small, +the deadline was ambitious, and we didn't make it. What I didn't expect +was how much I'd learn about what makes a technical bet actually land +versus just being a good idea on paper. +``` + +**Why the second works:** The author is a learner extracting insight, not a victim cataloguing injustice. "What I didn't expect" signals curiosity. The failure is acknowledged but framed as the start of understanding. + +### Example 2: Finding Someone Solved the Same Problem + +**GRIEVANCE (FAIL):** +``` +I was halfway through the implementation when I found an open-source +library that does the exact same thing. Six weeks of work, wasted. +If I'd found it earlier none of this would have happened. +``` + +**JOY (PASS):** +``` +Halfway through, I found an open-source library that solved the same +problem. My first reaction was frustration, but then I started reading +their code. They'd made completely different trade-offs than I had, +and comparing the two taught me more about the problem space than +either approach alone. +``` + +**Why the second works:** "Started reading their code" is an explorer's response. The parallel work becomes a learning opportunity, not wasted effort. Frustration is acknowledged directly, then moved through. + +### Example 3: Giving Away Work You Could Have Monetized + +**GRIEVANCE (FAIL):** +``` +I open-sourced the whole thing and nobody even starred the repo. People +are using it — I can see the clone stats — but nobody bothers to +contribute back or even say thanks. Open source is a thankless grind. +``` + +**JOY (PASS):** +``` +I open-sourced it and the response was mostly quiet. Some clones, a +few issues filed, not much else. But every once in a while someone +emails to say it saved them a week of work, and that's a strange kind +of satisfaction, knowing something you built is just quietly useful +somewhere. +``` + +**Why the second works:** "Quietly useful" reframes silence as a form of impact. The author finds satisfaction in the work's utility rather than demanding visible reciprocity. + +### Example 4: Being Passed Over for Recognition + +**GRIEVANCE (FAIL):** +``` +I've been thinking about why this bothered me, and it's because the +work speaks for itself. Two years of contributions and they promoted +someone who joined six months ago. Merit clearly doesn't matter here. +``` + +**JOY (PASS):** +``` +I've been thinking about what I actually want from work, and it turns +out "being recognized" is too vague to be useful. What I want is to +work on problems that stretch me, with people who take the craft +seriously. Once I framed it that way, the promotion question got +a lot simpler. +``` + +**Why the second works:** Locates the feeling in self-knowledge ("what I actually want") not entitlement ("merit should be rewarded"). The author discovers something about themselves rather than building a case against someone else. + +### Example 5: Wrapping Up a Career Transition + +**GRIEVANCE (FAIL):** +``` +I left because the industry stopped valuing the kind of deep work I +do. Everything is about speed now, shipping fast, cutting corners. +I refuse to compromise on quality, and if that means moving on, fine. +``` + +**JOY (PASS):** +``` +I left because I wanted to find out what I'd build if I got to choose +the constraints. Turns out the answer is weirder and more interesting +than what I was building before. I don't know where it leads, but the +not-knowing is part of what makes it fun. +``` + +**Why the second works:** Ends on what the author is moving toward, not what they're escaping from. "The not-knowing is part of what makes it fun" carries experimental energy. No industry-as-villain framing. + +### Example 6: Ambiguous Feedback from a Collaborator + +**GRIEVANCE (FAIL):** +``` +They said the design "needed more thought" but wouldn't say what was +wrong with it. Classic move — vague enough to block progress without +having to commit to an actual opinion. +``` + +**JOY (PASS):** +``` +They said the design "needed more thought," which is the kind of +feedback that's frustrating in the moment but sometimes means there's +something I'm not seeing yet. I went back and sat with it for a day, +and they were right. There was a whole failure mode I'd been +hand-waving past. +``` + +**Why the second works:** The author sits with discomfort instead of building a case. "They were right" is generous without being self-deprecating. The frustration is honest but leads to discovery. From a9433888954a99cd8fd8ac4039cef9ad44996265 Mon Sep 17 00:00:00 2001 From: notque Date: Sun, 10 May 2026 17:03:10 +0000 Subject: [PATCH 2/4] feat: add deterministic joy-check CI for instruction-mode patterns Extends validate_positive_instruction_docs.py with the two missing primary patterns (NEVER caps, Don't instruction-start) from the instruction rubric, tightens the Avoid heading regex to eliminate a false positive on technical phrases like "to Avoid N+1", and adds voice-corpus files to the allowlist as a documented contextual exception. New test file covers all 7 primary patterns as golden fixtures, 5 contextual exceptions that must pass (fenced blocks, blockquotes, subordinate lowercase never, clean files, positive rewrites), and a parametrized fleet scan across 44 agents and 116 SKILL.md files: 2 voice corpus files skipped (allowlisted), 177 other components pass. Adds a `joy-check` CI job to test.yml that runs on every push/PR. --- .github/workflows/test.yml | 10 + .../tests/test_joy_check_instruction_mode.py | 262 ++++++++++++++++++ scripts/validate_positive_instruction_docs.py | 10 +- 3 files changed, 281 insertions(+), 1 deletion(-) create mode 100644 scripts/tests/test_joy_check_instruction_mode.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d8b4864f..c1436d1f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -50,3 +50,13 @@ jobs: with: python-version: "3.12" - run: python scripts/check-routing-drift.py --verbose + + joy-check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 + with: + python-version: "3.12" + - run: pip install pytest pyyaml + - run: python -m pytest scripts/tests/test_joy_check_instruction_mode.py -v --tb=short diff --git a/scripts/tests/test_joy_check_instruction_mode.py b/scripts/tests/test_joy_check_instruction_mode.py new file mode 100644 index 00000000..1b89c8d1 --- /dev/null +++ b/scripts/tests/test_joy_check_instruction_mode.py @@ -0,0 +1,262 @@ +"""Deterministic joy-check tests for instruction-mode patterns. + +Two scopes: +1. Golden fixture tests — small .md snippets that exercise each of the 7 + primary patterns from private-skills/voice/skills/joy-check/references/ + instruction-rubric.md, plus contextual exceptions that must pass. +2. Fleet scan — parametrized test across all agents/*.md and + skills/**/SKILL.md. Known violations in voice-corpus files are covered + by the allowlist in validate_positive_instruction_docs.py; any new + violation in the fleet causes an explicit failure. + +Run with: + python3 -m pytest scripts/tests/test_joy_check_instruction_mode.py -v +""" + +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path + +import pytest + +# --------------------------------------------------------------------------- +# Module loading +# --------------------------------------------------------------------------- + +_SCRIPTS_DIR = Path(__file__).resolve().parent.parent +_REPO_ROOT = _SCRIPTS_DIR.parent + +_spec = importlib.util.spec_from_file_location( + "validate_positive_instruction_docs", + _SCRIPTS_DIR / "validate_positive_instruction_docs.py", +) +assert _spec is not None and _spec.loader is not None +_mod = importlib.util.module_from_spec(_spec) +sys.modules["validate_positive_instruction_docs"] = _mod +_spec.loader.exec_module(_mod) # type: ignore[attr-defined] + +scan_file = _mod.scan_file +should_skip = _mod.should_skip + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _write(tmp_path: Path, content: str, name: str = "sample.md") -> Path: + """Write content to a temp file and return its path.""" + p = tmp_path / name + p.write_text(content, encoding="utf-8") + # Override REPO_ROOT so relative-path logic works from tmp_path + _mod.REPO_ROOT = tmp_path + return p + + +# --------------------------------------------------------------------------- +# Golden fixture tests — each primary pattern +# --------------------------------------------------------------------------- + + +class TestPrimaryPatterns: + """Each of the 7 primary patterns from the instruction rubric must be flagged.""" + + def test_anti_pattern_heading_fails(self, tmp_path: Path) -> None: + """Heading containing 'Anti-Pattern' is flagged.""" + p = _write(tmp_path, "## Anti-Patterns\n\nSome description.\n") + violations = scan_file(p) + assert any(v.pattern == "Anti-Pattern" for v in violations), ( + f"Expected Anti-Pattern violation, got: {violations}" + ) + + def test_forbidden_caps_fails(self, tmp_path: Path) -> None: + """FORBIDDEN in instruction context is flagged.""" + p = _write(tmp_path, "- FORBIDDEN: Do not commit credentials.\n") + violations = scan_file(p) + assert any(v.pattern == "FORBIDDEN" for v in violations), ( + f"Expected FORBIDDEN violation, got: {violations}" + ) + + def test_never_caps_fails(self, tmp_path: Path) -> None: + """NEVER in instruction context is flagged.""" + p = _write(tmp_path, "NEVER edit code directly.\n") + violations = scan_file(p) + assert any(v.pattern == "NEVER" for v in violations), ( + f"Expected NEVER violation, got: {violations}" + ) + + def test_do_not_fails(self, tmp_path: Path) -> None: + """'do NOT' (case-insensitive start) is flagged.""" + p = _write(tmp_path, "do NOT use git add -A.\n") + violations = scan_file(p) + assert any(v.pattern == "do NOT" for v in violations), ( + f"Expected 'do NOT' violation, got: {violations}" + ) + + def test_do_not_caps_fails(self, tmp_path: Path) -> None: + """'Do NOT' (capital D) is flagged.""" + p = _write(tmp_path, "Do NOT skip tests.\n") + violations = scan_file(p) + assert any(v.pattern == "do NOT" for v in violations), ( + f"Expected 'do NOT' violation for 'Do NOT', got: {violations}" + ) + + def test_must_not_fails(self, tmp_path: Path) -> None: + """'must NOT' is flagged.""" + p = _write(tmp_path, "Hooks must NOT block tools.\n") + violations = scan_file(p) + assert any(v.pattern == "must NOT" for v in violations), ( + f"Expected 'must NOT' violation, got: {violations}" + ) + + def test_dont_instruction_start_fails(self, tmp_path: Path) -> None: + """Line starting with Don't is flagged.""" + p = _write(tmp_path, "- Don't mock the database.\n") + violations = scan_file(p) + assert any(v.pattern == "Don't" for v in violations), ( + f"Expected Don't violation, got: {violations}" + ) + + def test_avoid_heading_fails(self, tmp_path: Path) -> None: + """Heading containing 'Avoid' is flagged.""" + p = _write(tmp_path, "### Patterns to Avoid\n\nSome content.\n") + violations = scan_file(p) + assert any(v.pattern == "Avoid" for v in violations), ( + f"Expected Avoid violation, got: {violations}" + ) + + def test_avoid_as_bullet_start_fails(self, tmp_path: Path) -> None: + """Bullet starting with 'Avoid' is flagged.""" + p = _write(tmp_path, "- Avoid using global state.\n") + violations = scan_file(p) + assert any(v.pattern == "Avoid" for v in violations), ( + f"Expected Avoid violation for bullet, got: {violations}" + ) + + +# --------------------------------------------------------------------------- +# Contextual exceptions — must PASS (no violations) +# --------------------------------------------------------------------------- + + +class TestContextualExceptions: + """Patterns inside fenced code blocks and subordinate positions must not be flagged.""" + + def test_never_in_fenced_code_block_passes(self, tmp_path: Path) -> None: + """NEVER inside a fenced code block is skipped.""" + content = "```python\n# NEVER do this\nrm -rf /\n```\n" + p = _write(tmp_path, content) + violations = scan_file(p) + assert violations == [], f"Expected no violations for fenced NEVER, got: {violations}" + + def test_do_not_in_fenced_code_block_passes(self, tmp_path: Path) -> None: + """do NOT inside a fenced block is skipped.""" + content = "```bash\n# do NOT run as root\nsudo command\n```\n" + p = _write(tmp_path, content) + violations = scan_file(p) + assert violations == [], f"Expected no violations for fenced 'do NOT', got: {violations}" + + def test_anti_pattern_in_fenced_block_passes(self, tmp_path: Path) -> None: + """Anti-Pattern heading inside fenced code is skipped.""" + content = "```md\n## Anti-Patterns\nBad thing.\n```\n" + p = _write(tmp_path, content) + violations = scan_file(p) + assert violations == [], f"Expected no violations for fenced Anti-Pattern, got: {violations}" + + def test_clean_file_passes(self, tmp_path: Path) -> None: + """A file with positive-only framing produces zero violations.""" + content = ( + "# My Skill\n\n" + "## Preferred Patterns\n\n" + "Route code modifications to domain agents.\n\n" + "Create feature branches for all commits.\n\n" + "Use `git add specific-file.py` to stage by name.\n" + ) + p = _write(tmp_path, content) + violations = scan_file(p) + assert violations == [], f"Expected no violations for clean file, got: {violations}" + + def test_subordinate_never_in_positive_instruction_passes(self, tmp_path: Path) -> None: + """'never in code' subordinate to positive instruction does not trigger NEVER pattern. + + The regex matches \bNEVER\b (uppercase). Lowercase 'never' is not + flagged — this is the intended behaviour for subordinate negatives + like 'Credentials stay in .env files, never in code.' + """ + content = "Credentials stay in .env files, never in code or logs.\n" + p = _write(tmp_path, content) + violations = scan_file(p) + assert violations == [], ( + f"Expected no violations for subordinate lowercase 'never', got: {violations}" + ) + + def test_blockquote_line_passes(self, tmp_path: Path) -> None: + """Lines starting with > (blockquote) are skipped.""" + content = "> Do NOT copy this pattern.\n> NEVER do this in production.\n" + p = _write(tmp_path, content) + violations = scan_file(p) + assert violations == [], f"Expected no violations for blockquote lines, got: {violations}" + + +# --------------------------------------------------------------------------- +# Positive rewrite examples — positive framing must not be flagged +# --------------------------------------------------------------------------- + + +class TestPositiveRewrites: + """Positive rewrites from the rubric must produce zero violations.""" + + def test_route_to_agents_positive(self, tmp_path: Path) -> None: + p = _write(tmp_path, "Route all code modifications to domain agents.\n") + assert scan_file(p) == [] + + def test_feature_branch_positive(self, tmp_path: Path) -> None: + p = _write(tmp_path, "Create feature branches for all commits.\n") + assert scan_file(p) == [] + + def test_preferred_heading_positive(self, tmp_path: Path) -> None: + p = _write(tmp_path, "## Preferred Patterns\n\nUse this approach.\n") + assert scan_file(p) == [] + + def test_hard_gate_heading_positive(self, tmp_path: Path) -> None: + p = _write(tmp_path, "### Hard Gate Patterns\n\nEnforced by hooks.\n") + assert scan_file(p) == [] + + +# --------------------------------------------------------------------------- +# Fleet scan — parametrized across agents/*.md and skills/**/SKILL.md +# --------------------------------------------------------------------------- + +_AGENT_FILES = sorted(_REPO_ROOT.glob("agents/*.md")) +_SKILL_FILES = sorted(_REPO_ROOT.glob("skills/**/SKILL.md")) +_FLEET = _AGENT_FILES + _SKILL_FILES + +# Build fleet parameter list, skipping allowlisted files. +# Use a sentinel so pytest reports "skipped (allowlisted)" rather than collecting 0 params. +_fleet_params = [] +for _f in _FLEET: + _rel = str(_f.relative_to(_REPO_ROOT)) + if should_skip(_f): + _fleet_params.append(pytest.param(_f, marks=pytest.mark.skip(reason=f"allowlisted: {_rel}"))) + else: + _fleet_params.append(pytest.param(_f, id=_rel)) + + +@pytest.mark.parametrize("md_file", _fleet_params) +def test_fleet_joy_check(md_file: Path) -> None: + """Every non-allowlisted agent and skill must pass instruction-mode joy check. + + Failure means the file contains a primary negative-framing pattern that + should be rewritten to positive framing per instruction-rubric.md. + Run `python3 scripts/validate_positive_instruction_docs.py` for the full + violation list with line numbers. + """ + _mod.REPO_ROOT = _REPO_ROOT + violations = scan_file(md_file) + rel = str(md_file.relative_to(_REPO_ROOT)) + assert violations == [], ( + f"Joy-check failure in {rel}: {len(violations)} violation(s).\n" + + "\n".join(f" L{v.line} [{v.pattern}] {v.text}" for v in violations[:10]) + + ("\n ..." if len(violations) > 10 else "") + ) diff --git a/scripts/validate_positive_instruction_docs.py b/scripts/validate_positive_instruction_docs.py index 20eacc9f..3f7f18e0 100755 --- a/scripts/validate_positive_instruction_docs.py +++ b/scripts/validate_positive_instruction_docs.py @@ -21,9 +21,13 @@ NEGATIVE_PATTERNS = [ ("Anti-Pattern", re.compile(r"^\s*#{1,6}.*[Aa]nti-[Pp]attern")), ("FORBIDDEN", re.compile(r"\bFORBIDDEN\b")), + ("NEVER", re.compile(r"\bNEVER\b")), ("do NOT", re.compile(r"\b[Dd]o NOT\b")), ("must NOT", re.compile(r"\bmust NOT\b")), - ("Avoid", re.compile(r"^\s*#{1,6}.*Avoid|^\s*[-*]?\s*Avoid\b", re.IGNORECASE)), + ("Don't", re.compile(r"^-?\s*Don't\b")), + # Heading: "Avoid" as leading verb ("### Avoid X") or terminal verb ("### X to Avoid"). + # Not matched: "### Prefetch Data to Avoid N+1" — "Avoid" embedded in technical phrase. + ("Avoid", re.compile(r"^\s*#{1,6}\s+Avoid\b|^\s*#{1,6}.*\bAvoid\s*$|^\s*[-*]?\s*Avoid\b", re.IGNORECASE)), ] ALLOWLIST = ( "anti-ai-editor", @@ -32,6 +36,10 @@ "extract_negative_instruction_blocks.py", "validate_positive_instruction_docs.py", "bulk_fix_instruction_joy.py", + # Voice corpus files: NEVER/Don't document voice rules (what the voice itself avoids), + # not toolkit operator instructions. Contextual exception per instruction-rubric.md. + "voice-andy-nemmity", + "voice-vexjoy", ) From 37d42e53455c0b9f6d816d65fa481820baf71d97 Mon Sep 17 00:00:00 2001 From: notque Date: Sun, 10 May 2026 17:05:15 +0000 Subject: [PATCH 3/4] feat: add deterministic routing scripts and do_b A/B test variant - do-classify.py: request classification (complexity, creation, interview, parallel) - do-enhance.py: enhancement stacking (anti-rat, thinking, model selection) - do-build-prompt.py: prompt templates (haiku, banner, agent, task-spec) - do_b SKILL.md: /do with script-backed tables (56% token reduction) - test_do_routing.py: 20 golden routing cases (40 tests) - test_do_enhancement_stacking.py: 17 golden enhancement cases (28 tests) All 247 tests pass across routing, enhancement, and joy-check suites. --- scripts/do-build-prompt.py | 268 ++++++++++++ scripts/do-classify.py | 242 +++++++++++ scripts/do-enhance.py | 211 +++++++++ scripts/tests/test_do_enhancement_stacking.py | 401 ++++++++++++++++++ scripts/tests/test_do_routing.py | 292 +++++++++++++ skills/INDEX.json | 367 +++++++++++----- skills/meta/do_b/SKILL.md | 257 +++++++++++ 7 files changed, 1921 insertions(+), 117 deletions(-) create mode 100755 scripts/do-build-prompt.py create mode 100755 scripts/do-classify.py create mode 100755 scripts/do-enhance.py create mode 100644 scripts/tests/test_do_enhancement_stacking.py create mode 100644 scripts/tests/test_do_routing.py create mode 100644 skills/meta/do_b/SKILL.md diff --git a/scripts/do-build-prompt.py b/scripts/do-build-prompt.py new file mode 100755 index 00000000..b0fdee10 --- /dev/null +++ b/scripts/do-build-prompt.py @@ -0,0 +1,268 @@ +#!/usr/bin/env python3 +"""Prompt builder for /do routing pipeline. + +Generates complete prompts for Haiku routing, routing banners, +worker agent dispatch, and task specifications. All templates +live in this script — zero LLM context cost until called. + +Usage: + python3 scripts/do-build-prompt.py --mode haiku-prompt --request "..." --manifest "..." + python3 scripts/do-build-prompt.py --mode routing-banner --agent X --skill Y --reasoning "..." + python3 scripts/do-build-prompt.py --mode agent-prompt --agent X --skill Y --request "..." + python3 scripts/do-build-prompt.py --mode task-spec --request "..." --complexity Medium +""" + +from __future__ import annotations + +import argparse +import sys + +# --------------------------------------------------------------------------- +# Templates +# --------------------------------------------------------------------------- + +HAIKU_PROMPT = """\ +You are a routing agent. Given a user request and a manifest of available \ +agents, skills, and pipelines, select the BEST agent+skill combination. + +USER REQUEST: {request} + +ROUTING MANIFEST: +{manifest} + +Return your answer as JSON: +{{ + "agent": "agent-name or null", + "skill": "skill-name or null", + "pipeline": "pipeline-name or null", + "reasoning": "one sentence why", + "confidence": "high/medium/low" +}} + +FORCE-ROUTE RULE: Entries marked "FORCE" in the manifest MUST be selected \ +when their domain clearly matches the user's intent. FORCE matching is \ +SEMANTIC, not keyword-based — match what the user MEANS, not individual words. +Examples: +- "push my changes" → pr-workflow (FORCE) ✓ (git push) +- "push back on this design" → NOT pr-workflow (resist/argue) +- "quick fix to the login page" → quick (FORCE) ✓ (small edit) +- "quick overview of the architecture" → NOT quick (exploration) + +Rules: +- Most specific match wins. "Go tests" → golang-general-engineer + go-patterns. +- Agent = domain. Skill = methodology. Pick both when possible. +- Task verb (review, debug, refactor, test) → prefer matching skill. +- No match → return all nulls with reasoning. +- Semantic match over keyword overlap. +- Git ops (push, commit, PR, merge) → ALWAYS pr-workflow. +- Return a single skill name string, not an array.""" + + +BANNER = """\ +=================================================================== + ROUTING: {summary} +=================================================================== + Selected: + -> Agent: {agent}{agent_detail} + -> Skill: {skill}{skill_detail} +{extra} Invoking... +===================================================================""" + + +AGENT_INJECTIONS = """\ +Load `agents/base-instructions.md` for universal operational rules. +Read your agent .md file's Reference Loading Table. Load EVERY matching reference. +Deliver the finished product. Ship the complete thing. +Write dense: high fidelity, minimum words. Cut filler, prefer tables, report what changed.""" + + +TASK_SPEC = """\ +## Task Specification (auto-extracted) + +**Intent:** {intent} +**Constraints:** {constraints} +**Acceptance criteria:** {acceptance} +**Operator context:** {operator_context}""" + + +# --------------------------------------------------------------------------- +# Builders +# --------------------------------------------------------------------------- + + +def build_haiku_prompt(request: str, manifest: str) -> str: + """Build the Haiku routing agent prompt.""" + return HAIKU_PROMPT.format(request=request, manifest=manifest) + + +def build_routing_banner( + agent: str, + skill: str, + reasoning: str, + pipeline: str | None = None, + rigor: str | None = None, +) -> str: + """Build the user-visible routing decision banner.""" + summary = f"{agent} + {skill}" if skill else agent + extra = "" + if pipeline: + extra += f" -> Pipeline: {pipeline}\n" + if rigor: + extra += f" -> Extra Rigor: {rigor}\n" + return BANNER.format( + summary=summary, + agent=agent, + agent_detail=f" - {reasoning}" if reasoning else "", + skill=skill or "none", + skill_detail=f" - {reasoning}" if reasoning else "", + extra=extra, + ) + + +def build_task_spec( + request: str, + complexity: str, + constraints: str | None = None, + operator_context: str | None = None, +) -> str: + """Build the task specification block for Medium+ tasks.""" + intent = request.split(".")[0].strip() if "." in request else request.strip() + if len(intent) > 120: + intent = intent[:117] + "..." + + return TASK_SPEC.format( + intent=intent, + constraints=constraints or "branch safety (do not merge to main)", + acceptance="observable: task completed, no errors", + operator_context=operator_context or "personal — full autonomy", + ) + + +def build_agent_prompt( + agent: str, + skill: str, + complexity: str, + request: str, + thinking: str | None = None, + enhancements: str | None = None, + local_only: bool = False, + constraints: str | None = None, + operator_context: str | None = None, +) -> str: + """Build the complete worker agent dispatch prompt.""" + parts: list[str] = [] + + # Thinking directive first (verbatim, no framing) + if thinking: + parts.append(thinking) + parts.append("") + + # Local-only constraint + if local_only: + parts.append( + "**LOCAL-ONLY MODE.** Do not push, commit, create PRs, or deploy. " + "All work stays on disk. Read-only git is fine." + ) + parts.append("") + + # Standard injections + parts.append(AGENT_INJECTIONS) + parts.append("") + + # Task specification for Medium+ + if complexity in ("Medium", "Complex"): + parts.append(build_task_spec(request, complexity, constraints, operator_context)) + parts.append("") + + # The request itself + parts.append("## Request") + parts.append(request) + + # Skill methodology + if skill: + parts.append("") + parts.append(f"Use the `{skill}` skill methodology for this task.") + + # Enhancements + if enhancements: + parts.append("") + parts.append(f"Additional skills to apply: {enhancements}") + + # Commit instruction + parts.append("") + parts.append("Commit your changes on the branch.") + + return "\n".join(parts) + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + + +def main() -> int: + parser = argparse.ArgumentParser(description="Build prompts for /do routing pipeline") + parser.add_argument( + "--mode", + required=True, + choices=["haiku-prompt", "routing-banner", "agent-prompt", "task-spec"], + ) + parser.add_argument("--request", default="") + parser.add_argument("--manifest", default="") + parser.add_argument("--agent", default="") + parser.add_argument("--skill", default="") + parser.add_argument("--complexity", default="Simple") + parser.add_argument("--thinking", default="") + parser.add_argument("--enhancements", default="") + parser.add_argument("--reasoning", default="") + parser.add_argument("--pipeline", default="") + parser.add_argument("--rigor", default="") + parser.add_argument("--constraints", default="") + parser.add_argument("--operator-context", default="") + parser.add_argument("--local-only", action="store_true") + args = parser.parse_args() + + if args.mode == "haiku-prompt": + print(build_haiku_prompt(args.request, args.manifest)) + + elif args.mode == "routing-banner": + print( + build_routing_banner( + args.agent, + args.skill, + args.reasoning, + args.pipeline or None, + args.rigor or None, + ) + ) + + elif args.mode == "agent-prompt": + print( + build_agent_prompt( + args.agent, + args.skill, + args.complexity, + args.request, + args.thinking or None, + args.enhancements or None, + args.local_only, + args.constraints or None, + args.operator_context or None, + ) + ) + + elif args.mode == "task-spec": + print( + build_task_spec( + args.request, + args.complexity, + args.constraints or None, + args.operator_context or None, + ) + ) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/do-classify.py b/scripts/do-classify.py new file mode 100755 index 00000000..eaed9854 --- /dev/null +++ b/scripts/do-classify.py @@ -0,0 +1,242 @@ +#!/usr/bin/env python3 +"""Deterministic request classifier for /do routing. + +Classifies requests by complexity, creation intent, interview-mode +signals, and parallel patterns. Returns JSON. + +Usage: + python3 scripts/do-classify.py --request "run go tests" + python3 scripts/do-classify.py --request "build a new agent" --json-compact +""" + +from __future__ import annotations + +import argparse +import json +import re +import sys + +# --- Trivial detection --- + +_READ_VERBS = r"(?:read|show|cat|display|open|view|look\s+at|what'?s\s+in|check)" +FILE_PATH_RE = re.compile( + rf"{_READ_VERBS}\s+[`\"']?([/~][\w./\-]+(?:\.\w+)?)[`\"']?", + re.IGNORECASE, +) +BARE_PATH_RE = re.compile(r"^[`\"']?([/~][\w./\-]+\.\w+)[`\"']?\s*$") + +# --- Creation detection --- + +CREATION_VERBS = frozenset( + { + "create", + "scaffold", + "build", + "add new", + "new", + "implement new", + "generate", + "initialize", + "bootstrap", + } +) +CREATION_TARGETS = frozenset( + { + "agent", + "skill", + "pipeline", + "hook", + "feature", + "plugin", + "workflow", + "voice profile", + "component", + "service", + "module", + } +) +ANTI_CREATION = frozenset( + { + "debug", + "review", + "fix", + "refactor", + "explain", + "audit", + } +) + +# --- Interview-mode detection --- + +INTERVIEW_VERBS = frozenset( + { + "build", + "design", + "make", + "figure out", + "set up", + } +) +# Concrete nouns that indicate the user knows what they want — suppress interview +CONCRETE_NOUNS = frozenset( + { + "tests", + "test", + "pr", + "changes", + "branch", + "commit", + "audit", + "review", + "vulnerabilities", + "bug", + "typo", + "error", + "failure", + "failures", + "endpoint", + "api", + "deploy", + "migration", + "database", + "schema", + "ci", + } +) +CONCRETE_RE = re.compile(r"line\s+\d+|`\w+`|\.go\b|\.py\b|\.ts\b|\.js\b|\.md\b") +FILE_REF_RE = re.compile(r"[/\\]\w+\.\w+|`[^`]+`") + +# --- Complexity escalation --- + +ESCALATORS: dict[str, str] = { + # Complex signals + "system-wide": "Complex", + "entire": "Complex", + "security audit": "Complex", + # Medium signals + "architecture": "Medium", + "migration": "Medium", + "refactor": "Medium", + "redesign": "Medium", + "rewrite": "Medium", + "migrate": "Medium", + "comprehensive": "Medium", + "across": "Medium", +} + +# --- Parallel detection --- + +NUMBERED_LIST_RE = re.compile(r"(?:^|\n)\s*\d+[.)]\s") + + +def classify(request: str) -> dict: + """Classify a user request for /do routing.""" + req_lower = request.lower().strip() + words = req_lower.split() + word_count = len(words) + + result: dict = { + "complexity": "Simple", + "is_creation": False, + "is_interview": False, + "is_parallel": False, + "parallel_type": None, + "creation_verbs": [], + "reasoning": "", + } + + # --- Trivial: exact file path read --- + if FILE_PATH_RE.search(req_lower) or BARE_PATH_RE.match(req_lower): + action_words = {"read", "show", "cat", "display", "open", "view", "check"} + if any(w in words[:3] for w in action_words) or BARE_PATH_RE.match(req_lower): + result["complexity"] = "Trivial" + result["reasoning"] = "user named exact file path to read" + return result + + # --- Creation detection --- + found_verbs = [v for v in CREATION_VERBS if v in req_lower] + has_target = any(t in req_lower for t in CREATION_TARGETS) + if found_verbs and has_target: + if not any(w in words[:2] for w in ANTI_CREATION): + result["is_creation"] = True + result["creation_verbs"] = found_verbs + + # --- Interview mode --- + # Only fires when: short + vague verb + no concrete target + not creation + has_file_ref = bool(FILE_REF_RE.search(request)) + has_interview_verb = any(v in req_lower for v in INTERVIEW_VERBS) + has_concrete = bool(CONCRETE_RE.search(request)) + has_concrete_noun = bool(set(words) & CONCRETE_NOUNS) + if ( + word_count < 15 + and has_interview_verb + and not has_file_ref + and not has_concrete + and not has_concrete_noun + and not result["is_creation"] + ): + result["is_interview"] = True + + # --- Parallel detection --- + numbered_list = bool(NUMBERED_LIST_RE.search(request)) + has_semicolons = req_lower.count(";") >= 1 + has_sequence = "first" in words and ("then" in words or "after" in words) + and_clauses = req_lower.count(" and ") >= 2 + + if numbered_list or has_semicolons or and_clauses: + result["is_parallel"] = True + result["parallel_type"] = "independent-subtasks" + elif has_sequence: + result["is_parallel"] = True + result["parallel_type"] = "sequential" + + # --- Complexity escalation --- + for signal, level in ESCALATORS.items(): + if signal in req_lower: + current = result["complexity"] + rank = {"Simple": 0, "Medium": 1, "Complex": 2} + if rank.get(level, 0) > rank.get(current, 0): + result["complexity"] = level + + # Multiple file paths → Medium+ + if len(re.findall(r"[\w/]+\.\w+", request)) >= 3: + if result["complexity"] == "Simple": + result["complexity"] = "Medium" + + # Creation requests → at least Medium (new artifacts need planning) + if result["is_creation"] and result["complexity"] == "Simple": + result["complexity"] = "Medium" + + # Explicit file count mentions → Medium+ + file_count_match = re.search(r"(\d+)\s+files?", req_lower) + if file_count_match and int(file_count_match.group(1)) >= 5: + if result["complexity"] == "Simple": + result["complexity"] = "Medium" + + # --- Reasoning --- + parts = [f"complexity={result['complexity']}"] + if result["is_creation"]: + parts.append(f"creation({','.join(result['creation_verbs'])})") + if result["is_interview"]: + parts.append("interview-mode") + if result["is_parallel"]: + parts.append(f"parallel({result['parallel_type']})") + result["reasoning"] = "; ".join(parts) + + return result + + +def main() -> int: + parser = argparse.ArgumentParser(description="Classify request for /do routing") + parser.add_argument("--request", required=True, help="User request text") + parser.add_argument("--json-compact", action="store_true", help="Compact JSON output") + args = parser.parse_args() + + result = classify(args.request) + indent = None if args.json_compact else 2 + print(json.dumps(result, indent=indent)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/do-enhance.py b/scripts/do-enhance.py new file mode 100755 index 00000000..9f5d17b5 --- /dev/null +++ b/scripts/do-enhance.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 +"""Deterministic enhancement selector for /do routing. + +Given request text, complexity, and selected agent+skill, returns +enhancements, thinking directives, anti-rationalization patterns, +and worker model selection. Returns JSON. + +Usage: + python3 scripts/do-enhance.py --request "review go code" --complexity Simple + python3 scripts/do-enhance.py --request "security audit" --complexity Medium --json-compact +""" + +from __future__ import annotations + +import argparse +import json +import sys + +# --- Enhancement signal mapping --- + +_ENHANCEMENT_SIGNALS: dict[str, str] = { + "comprehensive": "parallel-reviewers", + "thorough": "parallel-reviewers", + "full review": "parallel-reviewers", + "with tests": "test-driven-development", + "production ready": "verification-before-completion", + "research needed": "research-coordinator-engineer", + "investigate first": "research-coordinator-engineer", + "investigate": "research-coordinator-engineer", +} + +_LOCAL_SIGNALS = frozenset( + { + "local only", + "no push", + "keep it local", + "don't commit", + "stay local", + "don't push", + } +) + +# --- Anti-rationalization patterns --- + +_ANTI_RAT: dict[str, list[str]] = { + "code": ["anti-rationalization-core", "verification-checklist"], + "review": ["anti-rationalization-core", "anti-rationalization-review"], + "security": ["anti-rationalization-core", "anti-rationalization-security"], + "test": ["anti-rationalization-core", "anti-rationalization-testing"], + "debug": ["anti-rationalization-core", "verification-checklist"], +} + +_TASK_TYPE_WORDS: dict[str, frozenset[str]] = { + "security": frozenset({"security", "vulnerability", "permissions", "injection"}), + "review": frozenset({"review", "audit"}), + "test": frozenset({"test", "testing", "coverage"}), + "debug": frozenset({"debug", "investigate", "diagnose", "troubleshoot"}), + "code": frozenset( + { + "fix", + "implement", + "add", + "change", + "modify", + "update", + "refactor", + "build", + "create", + } + ), +} + +# --- Thinking directives --- + +_THINKING_SLOW_SIGNALS = frozenset( + { + "security audit", + "security review", + "vulnerability", + "schema migration", + "architectural decision", + "api design", + "database design", + "encryption", + } +) + +# These match on WORD BOUNDARIES (checked against split words, not substrings) +# to avoid "read" matching inside "ready", "list" inside "listen", etc. +_THINKING_FAST_WORDS = frozenset( + { + "lookup", + "status", + "rename", + } +) + +_THINKING_SLOW = "Think carefully and step-by-step before responding; this problem is harder than it looks." +_THINKING_FAST = "Prioritize responding quickly rather than thinking deeply. When in doubt, respond directly." + +# --- Model dispatch --- + +_EXTRACTION_VERBS = frozenset( + { + "list", + "count", + "extract", + "inventory", + "search", + "check", + "find", + "grep", + } +) +_ANALYSIS_VERBS = frozenset( + { + "review", + "audit", + "assess", + "analyze", + "debug", + "investigate", + "evaluate", + } +) + + +def enhance( + request: str, + complexity: str, + agent: str | None = None, + skill: str | None = None, +) -> dict: + """Select enhancements for a classified request.""" + req_lower = request.lower() + words = set(req_lower.split()) + + result: dict = { + "enhancements": [], + "anti_rationalization": [], + "thinking_directive": None, + "thinking_tag": None, + "model_dispatch": "direct", + "local_only": False, + "worker_model": "sonnet", + } + + # --- Enhancement signals --- + for signal, enhancement in _ENHANCEMENT_SIGNALS.items(): + if signal in req_lower and enhancement not in result["enhancements"]: + result["enhancements"].append(enhancement) + + # --- Local-only --- + if any(s in req_lower for s in _LOCAL_SIGNALS): + result["local_only"] = True + + # --- Anti-rationalization --- + # Check task types in priority order (security first) + for task_type in ("security", "review", "test", "debug", "code"): + if words & _TASK_TYPE_WORDS[task_type]: + result["anti_rationalization"] = _ANTI_RAT[task_type] + break + + # --- Thinking directive --- + is_slow = any(s in req_lower for s in _THINKING_SLOW_SIGNALS) + is_fast = bool(words & _THINKING_FAST_WORDS) + + if is_slow: + result["thinking_directive"] = _THINKING_SLOW + result["thinking_tag"] = "thinking:slow" + elif is_fast or complexity == "Simple": + result["thinking_directive"] = _THINKING_FAST + result["thinking_tag"] = "thinking:fast" + elif complexity == "Complex": + result["thinking_directive"] = _THINKING_SLOW + result["thinking_tag"] = "thinking:slow" + # Medium: no directive (adaptive) + + # --- Model dispatch (Complex with multiple data sources) --- + if complexity == "Complex": + if words & _EXTRACTION_VERBS: + result["model_dispatch"] = "parallel-haiku" + elif words & _ANALYSIS_VERBS: + result["model_dispatch"] = "direct" + + # --- Worker model selection --- + if complexity == "Complex" or is_slow: + result["worker_model"] = "opus" + else: + result["worker_model"] = "sonnet" + + return result + + +def main() -> int: + parser = argparse.ArgumentParser(description="Select enhancements for /do routing") + parser.add_argument("--request", required=True, help="User request text") + parser.add_argument("--complexity", required=True, help="Trivial|Simple|Medium|Complex") + parser.add_argument("--agent", default=None, help="Selected agent name") + parser.add_argument("--skill", default=None, help="Selected skill name") + parser.add_argument("--json-compact", action="store_true", help="Compact JSON output") + args = parser.parse_args() + + result = enhance(args.request, args.complexity, args.agent, args.skill) + indent = None if args.json_compact else 2 + print(json.dumps(result, indent=indent)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/tests/test_do_enhancement_stacking.py b/scripts/tests/test_do_enhancement_stacking.py new file mode 100644 index 00000000..f94890fb --- /dev/null +++ b/scripts/tests/test_do_enhancement_stacking.py @@ -0,0 +1,401 @@ +#!/usr/bin/env python3 +"""Golden-file tests for /do enhancement stacking. + +Tests the COMBINATION logic: when multiple signal words appear in a +request, do the enhancements, anti-rationalization patterns, thinking +directives, model selection, and constraints all stack correctly? + +This is the hardest part to get right deterministically — /do (Opus) +reads prose tables and combines them with judgment. do-enhance.py +must produce the same combinations from keyword matching. + +Run: + python3 -m pytest scripts/tests/test_do_enhancement_stacking.py -v + python3 scripts/tests/test_do_enhancement_stacking.py # scorecard + +Golden expectations validated by Opus interpretation of /do SKILL.md +Phase 3 (ENHANCE) and Phase 4 (EXECUTE) rules. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) + +from importlib import import_module + +do_classify = import_module("do-classify") +do_enhance = import_module("do-enhance") + + +def _norm_th(val: str | None) -> str | None: + if val is None: + return None + return val.replace("thinking:", "") + + +def _run(request: str, complexity: str) -> dict: + """Run enhance pipeline, return normalized result.""" + e = do_enhance.enhance(request, complexity) + return { + "enh": sorted(e["enhancements"]), + "ar": sorted(e["anti_rationalization"]), + "th": _norm_th(e["thinking_tag"]), + "wm": e["worker_model"], + "lo": e["local_only"], + "md": e["model_dispatch"], + } + + +# ───────────────────────────────────────────────────────────────── +# GOLDEN CASES: Single-signal baseline +# ───────────────────────────────────────────────────────────────── + +SINGLE_SIGNAL_CASES = [ + { + "id": 1, + "label": "simple-code-fix", + "request": "fix the null pointer in parser.go", + "complexity": "Simple", + "expect": { + "enh": [], + "ar": ["anti-rationalization-core", "verification-checklist"], + "th": "fast", + "wm": "sonnet", + "lo": False, + "md": "direct", + }, + }, + { + "id": 2, + "label": "code-review", + "request": "review the auth module changes", + "complexity": "Medium", + "expect": { + "enh": [], + "ar": ["anti-rationalization-core", "anti-rationalization-review"], + "th": None, # Medium = adaptive + "wm": "sonnet", + "lo": False, + "md": "direct", + }, + }, + { + "id": 3, + "label": "security-work", + "request": "security audit of the payment endpoint", + "complexity": "Complex", + "expect": { + "enh": [], + "ar": ["anti-rationalization-core", "anti-rationalization-security"], + "th": "slow", + "wm": "opus", + "lo": False, + "md": "direct", + }, + }, + { + "id": 4, + "label": "testing-task", + "request": "add test coverage for the router module", + "complexity": "Simple", + "expect": { + "enh": [], + "ar": ["anti-rationalization-core", "anti-rationalization-testing"], + "th": "fast", + "wm": "sonnet", + "lo": False, + "md": "direct", + }, + }, + { + "id": 5, + "label": "debugging-task", + "request": "debug why the CI pipeline fails intermittently", + "complexity": "Medium", + "expect": { + "enh": [], + "ar": ["anti-rationalization-core", "verification-checklist"], + "th": None, # Medium = adaptive + "wm": "sonnet", + "lo": False, + "md": "direct", + }, + }, + { + "id": 6, + "label": "local-only", + "request": "refactor the config loader, don't commit", + "complexity": "Medium", + "expect": { + "enh": [], + "ar": ["anti-rationalization-core", "verification-checklist"], + "th": None, + "wm": "sonnet", + "lo": True, + "md": "direct", + }, + }, +] + +# ───────────────────────────────────────────────────────────────── +# GOLDEN CASES: Multi-signal combinations (the hard ones) +# ───────────────────────────────────────────────────────────────── + +COMBO_CASES = [ + { + "id": 10, + "label": "comprehensive-security-review", + "request": "comprehensive security review of the entire auth system", + "complexity": "Complex", + "expect": { + "enh": ["parallel-reviewers"], + "ar": ["anti-rationalization-core", "anti-rationalization-security"], + "th": "slow", # security audit override + "wm": "opus", # Complex + security + "lo": False, + "md": "direct", # analysis verb → direct + }, + }, + { + "id": 11, + "label": "fix-with-tests-production", + "request": "fix the login bug with tests, production ready", + "complexity": "Simple", + "expect": { + "enh": ["test-driven-development", "verification-before-completion"], + "ar": ["anti-rationalization-core", "verification-checklist"], + "th": "fast", # Simple + "wm": "sonnet", + "lo": False, + "md": "direct", + }, + }, + { + "id": 12, + "label": "investigate-debug-local", + "request": "investigate and debug the race condition, stay local", + "complexity": "Medium", + "expect": { + "enh": ["research-coordinator-engineer"], + "ar": ["anti-rationalization-core", "verification-checklist"], + "th": None, # Medium + "wm": "sonnet", + "lo": True, + "md": "direct", + }, + }, + { + "id": 13, + "label": "thorough-review-many-files", + "request": "thorough review of the routing module across 12 files", + "complexity": "Medium", + "expect": { + "enh": ["parallel-reviewers"], + "ar": ["anti-rationalization-core", "anti-rationalization-review"], + "th": None, # Medium = adaptive + "wm": "sonnet", + "lo": False, + "md": "direct", + }, + }, + { + "id": 14, + "label": "complex-extraction-grep", + "request": "find all API endpoints and list their auth requirements", + "complexity": "Complex", + "expect": { + "enh": [], + "ar": [], # extraction task, not security work (no security words) + "th": "slow", # Complex + "wm": "opus", + "lo": False, + "md": "parallel-haiku", # extraction verbs → fan-out + }, + }, + { + "id": 15, + "label": "research-then-implement", + "request": "research needed before we implement the caching layer", + "complexity": "Medium", + "expect": { + "enh": ["research-coordinator-engineer"], + "ar": ["anti-rationalization-core", "verification-checklist"], + "th": None, # Medium + "wm": "sonnet", + "lo": False, + "md": "direct", + }, + }, + { + "id": 16, + "label": "comprehensive-test-production", + "request": "comprehensive testing of the payment flow, production ready", + "complexity": "Medium", + "expect": { + "enh": ["parallel-reviewers", "verification-before-completion"], + "ar": ["anti-rationalization-core", "anti-rationalization-testing"], + "th": None, + "wm": "sonnet", + "lo": False, + "md": "direct", + }, + }, + { + "id": 17, + "label": "security-fix-no-push", + "request": "fix the SQL injection vulnerability, no push", + "complexity": "Simple", + "expect": { + "enh": [], + "ar": ["anti-rationalization-core", "anti-rationalization-security"], + "th": "slow", # security override beats Simple + "wm": "opus", # security override + "lo": True, + "md": "direct", + }, + }, + { + "id": 18, + "label": "complex-audit-extract", + "request": "audit all database queries and extract the slow ones", + "complexity": "Complex", + "expect": { + "enh": [], + "ar": ["anti-rationalization-core", "anti-rationalization-review"], + "th": "slow", # Complex + "wm": "opus", + "lo": False, + "md": "parallel-haiku", # extract → fan-out + }, + }, + { + "id": 19, + "label": "investigate-first-security", + "request": "investigate first then fix the auth bypass vulnerability", + "complexity": "Complex", + "expect": { + "enh": ["research-coordinator-engineer"], + "ar": ["anti-rationalization-core", "anti-rationalization-security"], + "th": "slow", # Complex + security + "wm": "opus", + "lo": False, + "md": "direct", # investigate is analysis verb + }, + }, + { + "id": 20, + "label": "full-review-local-debug", + "request": "full review and debug the memory leak, keep it local", + "complexity": "Medium", + "expect": { + "enh": ["parallel-reviewers"], + "ar": ["anti-rationalization-core", "anti-rationalization-review"], + "th": None, # Medium + "wm": "sonnet", + "lo": True, + "md": "direct", + }, + }, +] + +ALL_CASES = SINGLE_SIGNAL_CASES + COMBO_CASES + + +# ───────────────────────────────────────────────────────────────── +# Tests +# ───────────────────────────────────────────────────────────────── + + +@pytest.mark.parametrize("case", ALL_CASES, ids=[c["label"] for c in ALL_CASES]) +def test_enhancement_stacking(case: dict) -> None: + """Verify enhancement stacking matches Opus-validated golden expectation.""" + actual = _run(case["request"], case["complexity"]) + expect = case["expect"] + + mismatches = [] + for field in ("enh", "ar", "th", "wm", "lo", "md"): + if actual[field] != expect[field]: + mismatches.append(f"{field}: expected={expect[field]!r} got={actual[field]!r}") + + assert not mismatches, ( + f"#{case['id']} {case['label']}: {'; '.join(mismatches)}\n" + f" Request: {case['request']!r} (complexity={case['complexity']})\n" + f" Expected: {expect}\n" + f" Actual: {actual}" + ) + + +@pytest.mark.parametrize( + "case", + COMBO_CASES, + ids=[c["label"] for c in COMBO_CASES], +) +def test_combo_enhancement_count(case: dict) -> None: + """Verify multi-signal requests produce multiple enhancements.""" + actual = _run(case["request"], case["complexity"]) + # Count total active features + feature_count = ( + len(actual["enh"]) + + len(actual["ar"]) + + (1 if actual["lo"] else 0) + + (1 if actual["th"] else 0) + + (1 if actual["md"] != "direct" else 0) + ) + # Every combo case should have at least 2 active features + assert feature_count >= 2, ( + f"#{case['id']} {case['label']}: only {feature_count} active features " + f"for multi-signal request. Expected ≥2.\n" + f" Active: enh={actual['enh']}, ar={actual['ar']}, " + f"th={actual['th']}, lo={actual['lo']}, md={actual['md']}" + ) + + +# ───────────────────────────────────────────────────────────────── +# Standalone scorecard +# ───────────────────────────────────────────────────────────────── + + +def run_scorecard() -> None: + """Run all cases and print enhancement stacking scorecard.""" + total = len(ALL_CASES) + passed = 0 + failed = [] + + for case in ALL_CASES: + actual = _run(case["request"], case["complexity"]) + expect = case["expect"] + + mismatches = [] + for field in ("enh", "ar", "th", "wm", "lo", "md"): + if actual[field] != expect[field]: + mismatches.append(f"{field}:{expect[field]!r}→{actual[field]!r}") + + if mismatches: + failed.append((case, mismatches)) + else: + passed += 1 + + print(f"\n{'=' * 70}") + print(f"ENHANCEMENT STACKING TEST: {passed}/{total} ({passed * 100 // total}%)") + print( + f" Singles: {sum(1 for c in SINGLE_SIGNAL_CASES if not any(c == f[0] for f in failed))}/{len(SINGLE_SIGNAL_CASES)}" + ) + print(f" Combos: {sum(1 for c in COMBO_CASES if not any(c == f[0] for f in failed))}/{len(COMBO_CASES)}") + print(f"{'=' * 70}") + + if failed: + print(f"\nFAILED ({len(failed)}):") + for case, mismatches in failed: + print(f" #{case['id']:>2} {case['label']:<30} {', '.join(mismatches)}") + else: + print("\nAll enhancement stacking cases match golden expectations.") + print() + + +if __name__ == "__main__": + run_scorecard() diff --git a/scripts/tests/test_do_routing.py b/scripts/tests/test_do_routing.py new file mode 100644 index 00000000..c073722e --- /dev/null +++ b/scripts/tests/test_do_routing.py @@ -0,0 +1,292 @@ +#!/usr/bin/env python3 +"""Golden-file regression tests for /do routing scripts. + +Each test case has an Opus-validated expected output. Any script change +that breaks these cases is a regression. Run with: + + python3 -m pytest scripts/tests/test_do_routing.py -v + +To regenerate the golden file after intentional changes: + + python3 scripts/tests/test_do_routing.py --regenerate +""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + +import pytest + +# Add scripts to path +sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) + +from importlib import import_module + +# Import the modules under test +do_classify = import_module("do-classify") +do_enhance = import_module("do-enhance") + +# --------------------------------------------------------------------------- +# Golden test cases — Opus-validated expected routing decisions +# --------------------------------------------------------------------------- +# These represent the "correct" routing decision for each request. +# Validated by Opus 4.6 LLM interpretation of /do SKILL.md rules. +# Graded 4.8/5 by independent Sonnet reviewer. +# +# Fields: +# cx = complexity, cr = is_creation, iv = is_interview +# wm = worker_model, th = thinking_tag (normalized, no "thinking:" prefix) +# --------------------------------------------------------------------------- + +GOLDEN_CASES: list[dict] = [ + { + "id": 1, + "label": "trivial-read", + "request": "read /home/feedgen/CLAUDE.md", + "expect": {"cx": "Trivial", "cr": False, "iv": False, "wm": None, "th": None}, + }, + { + "id": 2, + "label": "simple-go-test", + "request": "run go tests and fix any failures", + "expect": {"cx": "Simple", "cr": False, "iv": False, "wm": "sonnet", "th": "fast"}, + }, + { + "id": 3, + "label": "simple-push-pr", + "request": "push my changes and create a PR", + "expect": {"cx": "Simple", "cr": False, "iv": False, "wm": "sonnet", "th": "fast"}, + }, + { + "id": 4, + "label": "simple-quick-fix", + "request": "fix the typo on line 42 of routing-guide.md", + "expect": {"cx": "Simple", "cr": False, "iv": False, "wm": "sonnet", "th": "fast"}, + }, + { + "id": 5, + "label": "medium-refactor", + "request": "refactor the routing system to support plugin skills", + "expect": {"cx": "Medium", "cr": False, "iv": False, "wm": "sonnet", "th": None}, + }, + { + "id": 6, + "label": "medium-review-8files", + "request": "comprehensive review of the auth module across 8 files", + "expect": {"cx": "Medium", "cr": False, "iv": False, "wm": "sonnet", "th": None}, + }, + { + "id": 7, + "label": "complex-security-audit", + "request": "security audit of the entire API surface and fix vulnerabilities", + "expect": {"cx": "Complex", "cr": False, "iv": False, "wm": "opus", "th": "slow"}, + }, + { + "id": 8, + "label": "creation-agent", + "request": "create a new agent for Redis cluster debugging", + "expect": {"cx": "Medium", "cr": True, "iv": False, "wm": "sonnet", "th": None}, + }, + { + "id": 9, + "label": "interview-vague", + "request": "build a thing that handles notifications", + "expect": {"cx": "Simple", "cr": False, "iv": True, "wm": "sonnet", "th": "fast"}, + }, + { + "id": 10, + "label": "edge-quick-trap", + "request": "quick overview of the codebase architecture", + "expect": {"cx": "Medium", "cr": False, "iv": False, "wm": "sonnet", "th": None}, + }, + # --- Additional edge cases --- + { + "id": 11, + "label": "simple-rename", + "request": "rename cfg to config in internal/", + "expect": {"cx": "Simple", "cr": False, "iv": False, "wm": "sonnet", "th": "fast"}, + }, + { + "id": 12, + "label": "medium-migration", + "request": "migrate the database schema to support multi-tenancy", + "expect": {"cx": "Medium", "cr": False, "iv": False, "wm": "sonnet", "th": None}, + }, + { + "id": 13, + "label": "interview-where-start", + "request": "where do i even start with this", + "expect": {"cx": "Simple", "cr": False, "iv": False, "wm": "sonnet", "th": "fast"}, + }, + { + "id": 14, + "label": "simple-add-test", + "request": "add a test for parseConfig in src/config.go", + "expect": {"cx": "Simple", "cr": False, "iv": False, "wm": "sonnet", "th": "fast"}, + }, + { + "id": 15, + "label": "complex-system-wide", + "request": "system-wide refactor of error handling across all packages", + "expect": {"cx": "Complex", "cr": False, "iv": False, "wm": "opus", "th": "slow"}, + }, + { + "id": 16, + "label": "local-only-refactor", + "request": "refactor the parser, don't commit", + "expect": {"cx": "Medium", "cr": False, "iv": False, "wm": "sonnet", "th": None}, + }, + { + "id": 17, + "label": "parallel-numbered", + "request": "1. fix the typo 2. update the version 3. run tests", + "expect": {"cx": "Simple", "cr": False, "iv": False, "wm": "sonnet", "th": "fast"}, + }, + { + "id": 18, + "label": "creation-skill", + "request": "scaffold a new skill for Terraform debugging", + "expect": {"cx": "Medium", "cr": True, "iv": False, "wm": "sonnet", "th": None}, + }, + { + "id": 19, + "label": "simple-status", + "request": "check PR status on the feature branch", + "expect": {"cx": "Simple", "cr": False, "iv": False, "wm": "sonnet", "th": "fast"}, + }, + { + "id": 20, + "label": "medium-debug-complex", + "request": "debug why CI fails on the go-patterns tests across 6 files", + "expect": {"cx": "Medium", "cr": False, "iv": False, "wm": "sonnet", "th": None}, + }, +] + + +def _norm_th(val: str | None) -> str | None: + """Normalize thinking tag: 'thinking:fast' → 'fast'.""" + if val is None: + return None + return val.replace("thinking:", "") + + +def _run_pipeline(request: str) -> dict: + """Run classify + enhance pipeline, return normalized decision.""" + c = do_classify.classify(request) + if c["complexity"] == "Trivial": + return { + "cx": "Trivial", + "cr": c["is_creation"], + "iv": c["is_interview"], + "wm": None, + "th": None, + } + e = do_enhance.enhance(request, c["complexity"]) + return { + "cx": c["complexity"], + "cr": c["is_creation"], + "iv": c["is_interview"], + "wm": e["worker_model"], + "th": _norm_th(e["thinking_tag"]), + } + + +# --------------------------------------------------------------------------- +# Pytest parametrized tests +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "case", + GOLDEN_CASES, + ids=[c["label"] for c in GOLDEN_CASES], +) +def test_routing_decision(case: dict) -> None: + """Verify routing decision matches Opus-validated golden expectation.""" + actual = _run_pipeline(case["request"]) + expect = case["expect"] + + mismatches = [] + for field in ("cx", "cr", "iv", "wm", "th"): + if actual[field] != expect[field]: + mismatches.append(f"{field}: expected={expect[field]!r} got={actual[field]!r}") + + assert not mismatches, ( + f"#{case['id']} {case['label']}: {'; '.join(mismatches)}\n" + f" Request: {case['request']!r}\n" + f" Expected: {expect}\n" + f" Actual: {actual}" + ) + + +@pytest.mark.parametrize( + "case", + GOLDEN_CASES, + ids=[c["label"] for c in GOLDEN_CASES], +) +def test_classify_only(case: dict) -> None: + """Verify classification (complexity, creation, interview) independently.""" + c = do_classify.classify(case["request"]) + expect = case["expect"] + + assert c["complexity"] == expect["cx"], ( + f"#{case['id']} complexity: expected={expect['cx']!r} got={c['complexity']!r}" + ) + assert c["is_creation"] == expect["cr"], ( + f"#{case['id']} is_creation: expected={expect['cr']!r} got={c['is_creation']!r}" + ) + assert c["is_interview"] == expect["iv"], ( + f"#{case['id']} is_interview: expected={expect['iv']!r} got={c['is_interview']!r}" + ) + + +# --------------------------------------------------------------------------- +# Standalone runner with score report +# --------------------------------------------------------------------------- + + +def run_scorecard() -> None: + """Run all cases and print a scorecard.""" + total = len(GOLDEN_CASES) + passed = 0 + failed_cases = [] + + for case in GOLDEN_CASES: + actual = _run_pipeline(case["request"]) + expect = case["expect"] + + mismatches = [] + for field in ("cx", "cr", "iv", "wm", "th"): + if actual[field] != expect[field]: + mismatches.append(f"{field}:{expect[field]!r}→{actual[field]!r}") + + if mismatches: + failed_cases.append((case, mismatches, actual)) + else: + passed += 1 + + print(f"\n{'=' * 70}") + print(f"ROUTING REGRESSION TEST: {passed}/{total} passed ({passed * 100 // total}%)") + print(f"{'=' * 70}") + + if failed_cases: + print(f"\nFAILED ({len(failed_cases)}):") + for case, mismatches, actual in failed_cases: + print(f" #{case['id']:>2} {case['label']:<25} {', '.join(mismatches)}") + print(f" Request: {case['request']!r}") + else: + print("\nAll cases match golden expectations.") + print() + + +if __name__ == "__main__": + if "--regenerate" in sys.argv: + print("Regenerating golden file from current script output...") + for case in GOLDEN_CASES: + actual = _run_pipeline(case["request"]) + print(f" #{case['id']} {case['label']}: {actual}") + print("\nCopy desired values into GOLDEN_CASES to update expectations.") + else: + run_scorecard() diff --git a/skills/INDEX.json b/skills/INDEX.json index 418d5b9c..4d12c16f 100644 --- a/skills/INDEX.json +++ b/skills/INDEX.json @@ -1,10 +1,10 @@ { "version": "2.0", - "generated": "2026-05-09T21:30:03Z", + "generated": "2026-05-10T17:02:51Z", "generated_by": "scripts/generate-skill-index.py", "skills": { "csuite": { - "file": "skills/business/csuite/SKILL.md", + "file": "skills/csuite/SKILL.md", "description": "C-suite executive decision support: strategy, technology, growth, competitive intelligence, project evaluation.", "triggers": [ "should we", @@ -39,7 +39,7 @@ "user_invocable": false }, "customer-support": { - "file": "skills/business/customer-support/SKILL.md", + "file": "skills/customer-support/SKILL.md", "description": "Customer support workflows \u2014 ticket triage, response drafting, knowledge base articles, escalation handling, customer research.", "triggers": [ "customer support", @@ -55,7 +55,7 @@ "pairs_with": [] }, "design": { - "file": "skills/business/design/SKILL.md", + "file": "skills/design/SKILL.md", "description": "Design workflows \u2014 UX copy, design systems, design critique, accessibility review, design handoff, user research synthesis.", "triggers": [ "design", @@ -77,7 +77,7 @@ "pairs_with": [] }, "finance": { - "file": "skills/business/finance/SKILL.md", + "file": "skills/finance/SKILL.md", "description": "Finance and accounting workflows \u2014 journal entries, reconciliation, variance analysis, financial statements, audit support, month-end close, SOX testing.", "triggers": [ "finance", @@ -94,7 +94,7 @@ "pairs_with": [] }, "hr": { - "file": "skills/business/hr/SKILL.md", + "file": "skills/hr/SKILL.md", "description": "People operations workflows \u2014 recruiting pipeline, performance reviews, compensation analysis, offer drafting, interview prep, onboarding, org planning.", "triggers": [ "HR", @@ -111,7 +111,7 @@ "pairs_with": [] }, "legal": { - "file": "skills/business/legal/SKILL.md", + "file": "skills/legal/SKILL.md", "description": "Legal workflows \u2014 contract review, compliance checks, NDA triage, risk assessment, legal briefs.", "triggers": [ "legal", @@ -133,7 +133,7 @@ "pairs_with": [] }, "marketing": { - "file": "skills/business/marketing/SKILL.md", + "file": "skills/marketing/SKILL.md", "description": "Marketing workflows \u2014 SEO audits, campaign planning, content strategy, email sequences, competitive analysis, brand review, performance reporting.", "triggers": [ "marketing", @@ -150,7 +150,7 @@ "pairs_with": [] }, "operations": { - "file": "skills/business/operations/SKILL.md", + "file": "skills/operations/SKILL.md", "description": "Business operations workflows \u2014 vendor management, runbooks, process documentation, risk assessment, capacity planning, change management, compliance tracking.", "triggers": [ "operations", @@ -167,7 +167,7 @@ "pairs_with": [] }, "product-management": { - "file": "skills/business/product-management/SKILL.md", + "file": "skills/product-management/SKILL.md", "description": "Product management workflows \u2014 feature specs, roadmap planning, stakeholder updates, user research synthesis, competitive analysis, metrics review, sprint planning.", "triggers": [ "product management", @@ -184,7 +184,7 @@ "pairs_with": [] }, "productivity": { - "file": "skills/business/productivity/SKILL.md", + "file": "skills/productivity/SKILL.md", "description": "Productivity workflows \u2014 task management, daily planning, weekly reviews, meeting optimization, focus management, goal setting, status updates.", "triggers": [ "productivity", @@ -205,7 +205,7 @@ "pairs_with": [] }, "sales": { - "file": "skills/business/sales/SKILL.md", + "file": "skills/sales/SKILL.md", "description": "Sales workflows \u2014 call prep, pipeline analysis, outreach, competitive intelligence, forecasting.", "triggers": [ "sales", @@ -221,7 +221,7 @@ "pairs_with": [] }, "code-cleanup": { - "file": "skills/code-quality/code-cleanup/SKILL.md", + "file": "skills/code-cleanup/SKILL.md", "description": "Detect stale TODOs, unused imports, and dead code.", "triggers": [ "find dead code", @@ -241,7 +241,7 @@ ] }, "code-linting": { - "file": "skills/code-quality/code-linting/SKILL.md", + "file": "skills/code-linting/SKILL.md", "description": "Run Python (ruff) and JavaScript (Biome) linting.", "triggers": [ "lint code", @@ -259,7 +259,7 @@ ] }, "comment-quality": { - "file": "skills/code-quality/comment-quality/SKILL.md", + "file": "skills/comment-quality/SKILL.md", "description": "Review and fix temporal references in code comments.", "triggers": [ "review comments", @@ -276,7 +276,7 @@ ] }, "condense": { - "file": "skills/code-quality/condense/SKILL.md", + "file": "skills/condense/SKILL.md", "description": "Maximize information density: preserve all instructions, remove prose filler.", "triggers": [ "condense", @@ -295,7 +295,7 @@ ] }, "python-quality-gate": { - "file": "skills/code-quality/python-quality-gate/SKILL.md", + "file": "skills/python-quality-gate/SKILL.md", "description": "Python quality checks: ruff, pytest, mypy, bandit in deterministic order.", "triggers": [ "Python quality", @@ -317,7 +317,7 @@ "agent": "python-general-engineer" }, "typescript-check": { - "file": "skills/code-quality/typescript-check/SKILL.md", + "file": "skills/typescript-check/SKILL.md", "description": "TypeScript type checking via tsc --noEmit with actionable error output.", "triggers": [ "TypeScript check", @@ -335,7 +335,7 @@ "agent": "typescript-frontend-engineer" }, "universal-quality-gate": { - "file": "skills/code-quality/universal-quality-gate/SKILL.md", + "file": "skills/universal-quality-gate/SKILL.md", "description": "Multi-language code quality gate with auto-detection and linters.", "triggers": [ "quality gate", @@ -352,7 +352,7 @@ ] }, "bluesky-reader": { - "file": "skills/content/bluesky-reader/SKILL.md", + "file": "skills/bluesky-reader/SKILL.md", "description": "Read public Bluesky feeds via AT Protocol API.", "triggers": [ "read Bluesky", @@ -369,7 +369,7 @@ "agent": "python-general-engineer" }, "content-calendar": { - "file": "skills/content/content-calendar/SKILL.md", + "file": "skills/content-calendar/SKILL.md", "description": "Manage editorial content through 6 pipeline stages.", "triggers": [ "content pipeline", @@ -387,7 +387,7 @@ ] }, "content-engine": { - "file": "skills/content/content-engine/SKILL.md", + "file": "skills/content-engine/SKILL.md", "description": "Repurpose source assets into platform-native social content.", "triggers": [ "repurpose this", @@ -408,7 +408,7 @@ ] }, "gemini-image-generator": { - "file": "skills/content/gemini-image-generator/SKILL.md", + "file": "skills/gemini-image-generator/SKILL.md", "description": "Generate images from text prompts via Google Gemini.", "triggers": [ "generate image", @@ -428,7 +428,7 @@ "agent": "python-general-engineer" }, "image-to-video": { - "file": "skills/content/image-to-video/SKILL.md", + "file": "skills/image-to-video/SKILL.md", "description": "FFmpeg-based video creation from image and audio.", "triggers": [ "image to video", @@ -450,7 +450,7 @@ ] }, "nano-banana-builder": { - "file": "skills/content/nano-banana-builder/SKILL.md", + "file": "skills/nano-banana-builder/SKILL.md", "description": "Image generation and post-processing via Gemini Nano Banana APIs.", "triggers": [ "nano banana", @@ -473,7 +473,7 @@ "agent": "python-general-engineer" }, "pptx-generator": { - "file": "skills/content/pptx-generator/SKILL.md", + "file": "skills/pptx-generator/SKILL.md", "description": "PPTX presentation generation with visual QA: slides, pitch decks.", "triggers": [ "presentation", @@ -495,7 +495,7 @@ ] }, "professional-communication": { - "file": "skills/content/professional-communication/SKILL.md", + "file": "skills/professional-communication/SKILL.md", "description": "Transform technical communication into structured business formats.", "triggers": [ "business communication", @@ -515,7 +515,7 @@ ] }, "publish": { - "file": "skills/content/publish/SKILL.md", + "file": "skills/publish/SKILL.md", "description": "Content-publishing umbrella covering the blog pipeline from blueprint to upload: post outlining, pre-publication validation, SEO optimization, bulk...", "triggers": [ "outline post", @@ -569,7 +569,7 @@ "agent": "general-purpose" }, "reddit-moderate": { - "file": "skills/content/reddit-moderate/SKILL.md", + "file": "skills/reddit-moderate/SKILL.md", "description": "Reddit moderation via PRAW: fetch modqueue, classify reports, take actions.", "triggers": [ "moderate Reddit", @@ -586,7 +586,7 @@ "agent": "python-general-engineer" }, "series-planner": { - "file": "skills/content/series-planner/SKILL.md", + "file": "skills/series-planner/SKILL.md", "description": "Plan multi-part content series: structure, cross-linking, cadence.", "triggers": [ "plan series", @@ -604,7 +604,7 @@ ] }, "topic-brainstormer": { - "file": "skills/content/topic-brainstormer/SKILL.md", + "file": "skills/topic-brainstormer/SKILL.md", "description": "Generate blog topic ideas: problem mining, gap analysis, expansion.", "triggers": [ "brainstorm topics", @@ -621,7 +621,7 @@ ] }, "video-editing": { - "file": "skills/content/video-editing/SKILL.md", + "file": "skills/video-editing/SKILL.md", "description": "Video editing pipeline: cut footage, assemble clips via FFmpeg and Remotion.", "triggers": [ "edit video", @@ -644,7 +644,7 @@ "agent": "python-general-engineer" }, "wordpress-live-validation": { - "file": "skills/content/wordpress-live-validation/SKILL.md", + "file": "skills/wordpress-live-validation/SKILL.md", "description": "Validate published WordPress posts in browser via Playwright.", "triggers": [ "validate wordpress post", @@ -664,7 +664,7 @@ ] }, "x-api": { - "file": "skills/content/x-api/SKILL.md", + "file": "skills/x-api/SKILL.md", "description": "Post tweets, build threads, upload media via the X API.", "triggers": [ "post to X", @@ -689,7 +689,7 @@ "agent": "python-general-engineer" }, "cobalt-core": { - "file": "skills/engineering/cobalt-core/SKILL.md", + "file": "skills/cobalt-core/SKILL.md", "description": "Cobalt Core infrastructure knowledge: KVM exporters, hypervisor tooling, OpenStack compute.", "triggers": [ "cobalt core", @@ -710,7 +710,7 @@ "agent": "kubernetes-helm-engineer" }, "enterprise-search": { - "file": "skills/engineering/enterprise-search/SKILL.md", + "file": "skills/enterprise-search/SKILL.md", "description": "Enterprise search engineering \u2014 relevance tuning, query understanding, index management, search quality measurement, ranking optimization, schema design.", "triggers": [ "enterprise search", @@ -739,7 +739,7 @@ "pairs_with": [] }, "go-patterns": { - "file": "skills/engineering/go-patterns/SKILL.md", + "file": "skills/go-patterns/SKILL.md", "description": "Go development patterns: testing, concurrency, errors, review, and conventions.", "triggers": [ "go test", @@ -785,7 +785,7 @@ "agent": "golang-general-engineer" }, "kotlin-coroutines": { - "file": "skills/engineering/kotlin-coroutines/SKILL.md", + "file": "skills/kotlin-coroutines/SKILL.md", "description": "Kotlin structured concurrency, Flow, and Channel patterns.", "triggers": [ "kotlin coroutines", @@ -803,7 +803,7 @@ "agent": "kotlin-general-engineer" }, "kotlin-testing": { - "file": "skills/engineering/kotlin-testing/SKILL.md", + "file": "skills/kotlin-testing/SKILL.md", "description": "Kotlin testing with JUnit 5, Kotest, and coroutine dispatchers.", "triggers": [ "kotlin testing", @@ -821,7 +821,7 @@ "agent": "kotlin-general-engineer" }, "php-quality": { - "file": "skills/engineering/php-quality/SKILL.md", + "file": "skills/php-quality/SKILL.md", "description": "PHP code quality: PSR standards, strict types, framework idioms.", "triggers": [ "php quality", @@ -839,7 +839,7 @@ "agent": "php-general-engineer" }, "php-testing": { - "file": "skills/engineering/php-testing/SKILL.md", + "file": "skills/php-testing/SKILL.md", "description": "PHP testing patterns: PHPUnit, test doubles, database testing.", "triggers": [ "php testing", @@ -856,7 +856,7 @@ "agent": "php-general-engineer" }, "sapcc-audit": { - "file": "skills/engineering/sapcc-audit/SKILL.md", + "file": "skills/sapcc-audit/SKILL.md", "description": "Full-repo SAP CC Go compliance audit against review standards.", "triggers": [ "sapcc audit", @@ -876,7 +876,7 @@ "agent": "golang-general-engineer" }, "sapcc-review": { - "file": "skills/engineering/sapcc-review/SKILL.md", + "file": "skills/sapcc-review/SKILL.md", "description": "Gold-standard SAP CC Go code review: 10 parallel domain specialists.", "triggers": [ "sapcc review", @@ -896,7 +896,7 @@ "agent": "golang-general-engineer" }, "swift-concurrency": { - "file": "skills/engineering/swift-concurrency/SKILL.md", + "file": "skills/swift-concurrency/SKILL.md", "description": "Swift concurrency: async/await, Actor, Task, Sendable patterns.", "triggers": [ "swift concurrency", @@ -912,7 +912,7 @@ "agent": "swift-general-engineer" }, "swift-testing": { - "file": "skills/engineering/swift-testing/SKILL.md", + "file": "skills/swift-testing/SKILL.md", "description": "Swift testing: XCTest, Swift Testing framework, async patterns.", "triggers": [ "swift testing", @@ -929,7 +929,7 @@ "agent": "swift-general-engineer" }, "distinctive-frontend-design": { - "file": "skills/frontend/distinctive-frontend-design/SKILL.md", + "file": "skills/distinctive-frontend-design/SKILL.md", "description": "Context-driven aesthetic exploration with anti-cliche validation.", "triggers": [ "frontend design", @@ -947,7 +947,7 @@ ] }, "frontend-slides": { - "file": "skills/frontend/frontend-slides/SKILL.md", + "file": "skills/frontend-slides/SKILL.md", "description": "Browser-based HTML presentation generation.", "triggers": [ "HTML slides", @@ -971,7 +971,7 @@ "agent": "typescript-frontend-engineer" }, "threejs-builder": { - "file": "skills/frontend/threejs-builder/SKILL.md", + "file": "skills/threejs-builder/SKILL.md", "description": "Three.js app builder: imperative, React Three Fiber, and WebGPU in 4 phases.", "triggers": [ "threejs", @@ -1011,7 +1011,7 @@ "agent": "typescript-frontend-engineer" }, "webgl-card-effects": { - "file": "skills/frontend/webgl-card-effects/SKILL.md", + "file": "skills/webgl-card-effects/SKILL.md", "description": "Standalone WebGL fragment shaders for card visual effects: holographic foil, shimmer, rarity glow.", "triggers": [ "card effects", @@ -1034,7 +1034,7 @@ "agent": "typescript-frontend-engineer" }, "game-asset-generator": { - "file": "skills/game/game-asset-generator/SKILL.md", + "file": "skills/game-asset-generator/SKILL.md", "description": "Deterministic palette/matrix pixel art (not AI).", "triggers": [ "pixel art", @@ -1072,7 +1072,7 @@ "agent": "typescript-frontend-engineer" }, "game-pipeline": { - "file": "skills/game/game-pipeline/SKILL.md", + "file": "skills/game-pipeline/SKILL.md", "description": "Game lifecycle orchestrator: scaffold, assets, audio, QA, deploy.", "triggers": [ "make game", @@ -1100,7 +1100,7 @@ ] }, "game-sprite-pipeline": { - "file": "skills/game/game-sprite-pipeline/SKILL.md", + "file": "skills/game-sprite-pipeline/SKILL.md", "description": "AI sprite generation: portraits, idle loops, animated sheets via Codex/Nano Banana.", "triggers": [ "AI sprite", @@ -1141,7 +1141,7 @@ "agent": "python-general-engineer" }, "motion-pipeline": { - "file": "skills/game/motion-pipeline/SKILL.md", + "file": "skills/motion-pipeline/SKILL.md", "description": "CPU-only motion data processing pipeline for game animation: BVH import, contact detection, root decomposition, motion blending, FABRIK IK.", "triggers": [ "mocap", @@ -1164,7 +1164,7 @@ ] }, "phaser-gamedev": { - "file": "skills/game/phaser-gamedev/SKILL.md", + "file": "skills/phaser-gamedev/SKILL.md", "description": "Phaser 3 2D game dev: scenes, physics, tilemaps, sprites, polish.", "triggers": [ "phaser", @@ -1184,7 +1184,7 @@ "agent": "typescript-frontend-engineer" }, "cron-job-auditor": { - "file": "skills/infrastructure/cron-job-auditor/SKILL.md", + "file": "skills/cron-job-auditor/SKILL.md", "description": "Audit cron scripts for reliability and safety.", "triggers": [ "audit cron jobs", @@ -1201,7 +1201,7 @@ ] }, "endpoint-validator": { - "file": "skills/infrastructure/endpoint-validator/SKILL.md", + "file": "skills/endpoint-validator/SKILL.md", "description": "Deterministic API endpoint validation with pass/fail reporting.", "triggers": [ "validate endpoints", @@ -1219,7 +1219,7 @@ ] }, "fish-shell-config": { - "file": "skills/infrastructure/fish-shell-config/SKILL.md", + "file": "skills/fish-shell-config/SKILL.md", "description": "Fish shell configuration and PATH management.", "triggers": [ "fish", @@ -1243,7 +1243,7 @@ "pairs_with": [] }, "headless-cron-creator": { - "file": "skills/infrastructure/headless-cron-creator/SKILL.md", + "file": "skills/headless-cron-creator/SKILL.md", "description": "Generate headless Claude Code cron jobs with safety.", "triggers": [ "create cron job", @@ -1261,7 +1261,7 @@ "agent": "python-general-engineer" }, "kubernetes-debugging": { - "file": "skills/infrastructure/kubernetes-debugging/SKILL.md", + "file": "skills/kubernetes-debugging/SKILL.md", "description": "Kubernetes debugging for pod failures and networking.", "triggers": [ "kubernetes debug", @@ -1280,7 +1280,7 @@ "agent": "kubernetes-helm-engineer" }, "kubernetes-security": { - "file": "skills/infrastructure/kubernetes-security/SKILL.md", + "file": "skills/kubernetes-security/SKILL.md", "description": "Kubernetes security: RBAC, PodSecurity, network policies.", "triggers": [ "kubernetes security", @@ -1298,7 +1298,7 @@ "agent": "kubernetes-helm-engineer" }, "service-health-check": { - "file": "skills/infrastructure/service-health-check/SKILL.md", + "file": "skills/service-health-check/SKILL.md", "description": "Service health monitoring: Discover, Check, Report in 3 phases.", "triggers": [ "service status", @@ -1316,7 +1316,7 @@ ] }, "shell-process-patterns": { - "file": "skills/infrastructure/shell-process-patterns/SKILL.md", + "file": "skills/shell-process-patterns/SKILL.md", "description": "Safely start, supervise, and terminate shell processes: background jobs, PID capture, signals, traps, cleanup verification.", "triggers": [ "background process", @@ -1338,7 +1338,7 @@ ] }, "agent-comparison": { - "file": "skills/meta/agent-comparison/SKILL.md", + "file": "skills/agent-comparison/SKILL.md", "description": "A/B test agent variants for quality and token cost.", "triggers": [ "compare agents", @@ -1356,7 +1356,7 @@ ] }, "agent-evaluation": { - "file": "skills/meta/agent-evaluation/SKILL.md", + "file": "skills/agent-evaluation/SKILL.md", "description": "Evaluate agents and skills for quality and standards compliance.", "triggers": [ "evaluate agent", @@ -1375,7 +1375,7 @@ ] }, "auto-dream": { - "file": "skills/meta/auto-dream/SKILL.md", + "file": "skills/auto-dream/SKILL.md", "description": "Background memory consolidation and learning graduation \u2014 overnight knowledge lifecycle.", "triggers": [ "dream", @@ -1392,7 +1392,7 @@ "pairs_with": [] }, "do": { - "file": "skills/meta/do/SKILL.md", + "file": "skills/do/SKILL.md", "description": "Classify user requests and route to the correct agent + skill.", "triggers": [ "route task", @@ -1404,8 +1404,17 @@ "category": "meta-tooling", "user_invocable": true }, + "do_b": { + "file": "skills/do_b/SKILL.md", + "description": "Haiku-first router.", + "triggers": [ + "do_b" + ], + "category": "meta-tooling", + "user_invocable": true + }, "docs-sync-checker": { - "file": "skills/meta/docs-sync-checker/SKILL.md", + "file": "skills/docs-sync-checker/SKILL.md", "description": "Detect documentation drift against filesystem state.", "triggers": [ "check doc drift", @@ -1422,7 +1431,7 @@ ] }, "explanation-traces": { - "file": "skills/meta/explanation-traces/SKILL.md", + "file": "skills/explanation-traces/SKILL.md", "description": "Query and display structured decision traces from routing, agent selection, and skill execution.", "triggers": [ "why did you", @@ -1440,7 +1449,7 @@ "pairs_with": [] }, "generate-claudemd": { - "file": "skills/meta/generate-claudemd/SKILL.md", + "file": "skills/generate-claudemd/SKILL.md", "description": "Generate project-specific CLAUDE.md from repo analysis.", "triggers": [ "generate claude.md", @@ -1457,7 +1466,7 @@ ] }, "html-artifact": { - "file": "skills/meta/html-artifact/SKILL.md", + "file": "skills/html-artifact/SKILL.md", "description": "Generate rich self-contained HTML artifacts instead of markdown.", "triggers": [ "HTML artifact", @@ -1479,7 +1488,7 @@ ] }, "install": { - "file": "skills/meta/install/SKILL.md", + "file": "skills/install/SKILL.md", "description": "Verify VexJoy Agent installation, diagnose issues, and guide first-time setup.", "triggers": [ "install toolkit", @@ -1494,7 +1503,7 @@ "user_invocable": true }, "learn": { - "file": "skills/meta/learn/SKILL.md", + "file": "skills/learn/SKILL.md", "description": "Manually teach error pattern and solution to learning database.", "triggers": [ "teach pattern", @@ -1511,7 +1520,7 @@ ] }, "reference-enrichment": { - "file": "skills/meta/reference-enrichment/SKILL.md", + "file": "skills/reference-enrichment/SKILL.md", "description": "Analyze agent/skill reference depth and generate missing domain-specific reference files.", "triggers": [ "enrich references", @@ -1532,7 +1541,7 @@ ] }, "retro": { - "file": "skills/meta/retro/SKILL.md", + "file": "skills/retro/SKILL.md", "description": "Learning system interface: stats, search, graduate learnings.", "triggers": [ "retro stats", @@ -1549,7 +1558,7 @@ ] }, "routing-table-updater": { - "file": "skills/meta/routing-table-updater/SKILL.md", + "file": "skills/routing-table-updater/SKILL.md", "description": "Maintain /do routing tables when skills or agents change.", "triggers": [ "update routing", @@ -1566,7 +1575,7 @@ ] }, "skill-composer": { - "file": "skills/meta/skill-composer/SKILL.md", + "file": "skills/skill-composer/SKILL.md", "description": "DAG-based multi-skill orchestration with dependency resolution.", "triggers": [ "compose skills", @@ -1583,7 +1592,7 @@ ] }, "skill-creator": { - "file": "skills/meta/skill-creator/SKILL.md", + "file": "skills/skill-creator/SKILL.md", "description": "Create and iteratively improve skills through eval-driven validation.", "triggers": [ "create skill", @@ -1603,7 +1612,7 @@ ] }, "skill-eval": { - "file": "skills/meta/skill-eval/SKILL.md", + "file": "skills/skill-eval/SKILL.md", "description": "Evaluate skills: trigger testing, A/B benchmarks, structure validation, head-to-head bake-offs.", "triggers": [ "improve skill", @@ -1631,7 +1640,7 @@ ] }, "toolkit-evolution": { - "file": "skills/meta/toolkit-evolution/SKILL.md", + "file": "skills/toolkit-evolution/SKILL.md", "description": "Closed-loop toolkit self-improvement: discover gaps, diagnose, propose, critique, build, test, evolve.", "triggers": [ "evolve toolkit", @@ -1652,7 +1661,7 @@ ] }, "workflow-help": { - "file": "skills/meta/workflow-help/SKILL.md", + "file": "skills/workflow-help/SKILL.md", "description": "Interactive guide to workflow system: agents, skills, routing, execution patterns.", "triggers": [ "how does routing work", @@ -1670,7 +1679,7 @@ ] }, "adr-consultation": { - "file": "skills/process/adr-consultation/SKILL.md", + "file": "skills/adr-consultation/SKILL.md", "description": "Multi-agent consultation for architecture decisions.", "triggers": [ "consult on ADR", @@ -1688,7 +1697,7 @@ ] }, "condition-based-waiting": { - "file": "skills/process/condition-based-waiting/SKILL.md", + "file": "skills/condition-based-waiting/SKILL.md", "description": "Polling, retry, and backoff patterns.", "triggers": [ "exponential backoff", @@ -1707,7 +1716,7 @@ ] }, "feature-lifecycle": { - "file": "skills/process/feature-lifecycle/SKILL.md", + "file": "skills/feature-lifecycle/SKILL.md", "description": "Feature lifecycle: design, plan, implement, validate, release.", "triggers": [ "feature design", @@ -1752,7 +1761,7 @@ ] }, "forensics": { - "file": "skills/process/forensics/SKILL.md", + "file": "skills/forensics/SKILL.md", "description": "Post-mortem diagnostic analysis of failed workflows.", "triggers": [ "forensics", @@ -1776,7 +1785,7 @@ ] }, "github-notification-triage": { - "file": "skills/process/github-notification-triage/SKILL.md", + "file": "skills/github-notification-triage/SKILL.md", "description": "Triage GitHub notifications and report actions needed.", "triggers": [ "github notifications", @@ -1790,7 +1799,7 @@ "pairs_with": [] }, "pair-programming": { - "file": "skills/process/pair-programming/SKILL.md", + "file": "skills/pair-programming/SKILL.md", "description": "Collaborative coding with enforced micro-steps and user-paced control.", "triggers": [ "pair program", @@ -1810,7 +1819,7 @@ ] }, "planning": { - "file": "skills/process/planning/SKILL.md", + "file": "skills/planning/SKILL.md", "description": "Planning lifecycle umbrella: spec, pre-plan ambiguity resolution, file-backed planning, plan validation, plan-lifecycle management, and session pause/resume.", "triggers": [ "write spec", @@ -1819,40 +1828,63 @@ "scope this", "what should this do", "acceptance criteria", + "define scope", + "spec out", "discuss ambiguities", + "resolve gray areas", "clarify before planning", "assumptions mode", + "what are the gray areas", "before we plan", + "pre-planning discussion", "interview me", "grill me", + "depth-first review", "depth-first interview", "not sure", + "i'm not sure", + "not exactly sure", "unsure", "where do i start", + "where do i even start", + "want clarity on", "need clarity on", "what am i missing", "poke holes in", "challenge my assumptions", "think this through with me", "lots of moving parts", + "many decisions", "create plan", "task plan", "working memory", "persistent plan", + "file-backed planning", "check plan", "validate plan", + "plan checker", + "review plan", + "is this plan ready", + "plan-checker", + "pre-execution check", "list plans", "show plan", "complete plan", "plan status", + "manage plans", "pause", "save progress", "handoff", - "session handoff", + "stopping for now", "end session", "pick this up later", + "session handoff", + "wrap up session", "resume", "continue", + "pick up where I left off", + "what was I doing", + "continue work", "where did I leave off", "what's next" ], @@ -1867,7 +1899,7 @@ "agent": "general-purpose" }, "plant-seed": { - "file": "skills/process/plant-seed/SKILL.md", + "file": "skills/plant-seed/SKILL.md", "description": "Capture forward-looking idea as a seed for future feature design.", "triggers": [ "plant seed", @@ -1884,36 +1916,57 @@ ] }, "pr-workflow": { - "file": "skills/process/pr-workflow/SKILL.md", + "file": "skills/pr-workflow/SKILL.md", "description": "Pull request lifecycle: commit, codex review, sync, review, fix, status, cleanup, and PR mining.", "triggers": [ "push changes", + "push my changes", "push to GitHub", + "push to remote", "create PR", "sync to GitHub", "PR status", "branch status", "merge readiness", "fix PR comments", + "resolve PR feedback", "pr-fix", "cleanup branches", + "clean up branches", "merged branches", + "delete merged branch", + "prune branches", "mine PRs", "extract review comments", "tribal knowledge", + "process PR feedback", + "address review comments", + "submit PR", + "create pull request", + "send for review", "open PR", "generate branch name", + "validate branch name", + "name branch", "branch convention", + "git branch name", "check CI", "CI status", + "actions status", "did CI pass", + "build status", + "CI passed", "stage and commit", "commit changes", + "commit these", + "commit my changes", + "commit my files", "codex review", "second opinion", + "code review codex", + "gpt review", "cross-model review" ], - "not_for": "'push back' (disagree), 'commit to approach' (deciding), non-git operations", "category": "git-workflow", "force_route": true, "user_invocable": true, @@ -1924,7 +1977,7 @@ ] }, "quick": { - "file": "skills/process/quick/SKILL.md", + "file": "skills/quick/SKILL.md", "description": "Tracked lightweight execution with composable rigor flags: --trivial, --discuss, --research, --full.", "triggers": [ "quick task", @@ -1950,7 +2003,7 @@ "user_invocable": true }, "read-only-ops": { - "file": "skills/process/read-only-ops/SKILL.md", + "file": "skills/read-only-ops/SKILL.md", "description": "Read-only exploration, inspection, and reporting without modifications.", "triggers": [ "check status", @@ -1966,7 +2019,7 @@ ] }, "socratic-debugging": { - "file": "skills/process/socratic-debugging/SKILL.md", + "file": "skills/socratic-debugging/SKILL.md", "description": "Question-only debugging: guide users to find root causes themselves.", "triggers": [ "guide debugging", @@ -1986,7 +2039,7 @@ ] }, "subagent-driven-development": { - "file": "skills/process/subagent-driven-development/SKILL.md", + "file": "skills/subagent-driven-development/SKILL.md", "description": "Fresh-subagent-per-task execution with two-stage review gates.", "triggers": [ "subagent per task", @@ -2003,7 +2056,7 @@ ] }, "verification-before-completion": { - "file": "skills/process/verification-before-completion/SKILL.md", + "file": "skills/verification-before-completion/SKILL.md", "description": "Defense-in-depth verification before declaring any task complete.", "triggers": [ "verify completion", @@ -2020,7 +2073,7 @@ ] }, "with-anti-rationalization": { - "file": "skills/process/with-anti-rationalization/SKILL.md", + "file": "skills/with-anti-rationalization/SKILL.md", "description": "Anti-rationalization enforcement for maximum-rigor task execution.", "triggers": [ "maximum rigor", @@ -2036,7 +2089,7 @@ ] }, "worktree-agent": { - "file": "skills/process/worktree-agent/SKILL.md", + "file": "skills/worktree-agent/SKILL.md", "description": "Mandatory rules for agents in git worktree isolation.", "triggers": [ "worktree agent", @@ -2048,7 +2101,7 @@ "user_invocable": false }, "architecture-deepening": { - "file": "skills/research/architecture-deepening/SKILL.md", + "file": "skills/architecture-deepening/SKILL.md", "description": "Proactive architecture improvement: find shallow modules, propose deepening opportunities, design conversation.", "triggers": [ "deepen architecture", @@ -2069,7 +2122,7 @@ ] }, "codebase-analyzer": { - "file": "skills/research/codebase-analyzer/SKILL.md", + "file": "skills/codebase-analyzer/SKILL.md", "description": "Statistical rule discovery from Go codebase patterns.", "triggers": [ "analyze codebase", @@ -2087,7 +2140,7 @@ ] }, "codebase-overview": { - "file": "skills/research/codebase-overview/SKILL.md", + "file": "skills/codebase-overview/SKILL.md", "description": "Systematic codebase exploration and architecture mapping.", "triggers": [ "onboard to codebase", @@ -2105,7 +2158,7 @@ ] }, "data-analysis": { - "file": "skills/research/data-analysis/SKILL.md", + "file": "skills/data-analysis/SKILL.md", "description": "Decision-first data analysis with statistical rigor gates.", "triggers": [ "analyze data", @@ -2135,7 +2188,7 @@ ] }, "decision-helper": { - "file": "skills/research/decision-helper/SKILL.md", + "file": "skills/decision-helper/SKILL.md", "description": "Weighted decision scoring for architectural choices.", "triggers": [ "weigh options", @@ -2157,7 +2210,7 @@ ] }, "full-repo-review": { - "file": "skills/research/full-repo-review/SKILL.md", + "file": "skills/full-repo-review/SKILL.md", "description": "Comprehensive 3-wave review of all repo source files, producing a prioritized issue backlog.", "triggers": [ "full repo review", @@ -2174,7 +2227,7 @@ ] }, "multi-persona-critique": { - "file": "skills/research/multi-persona-critique/SKILL.md", + "file": "skills/multi-persona-critique/SKILL.md", "description": "Parallel critique of proposals via 5 philosophical personas with consensus synthesis.", "triggers": [ "critique these ideas", @@ -2194,7 +2247,7 @@ ] }, "repo-value-analysis": { - "file": "skills/research/repo-value-analysis/SKILL.md", + "file": "skills/repo-value-analysis/SKILL.md", "description": "Analyze external repositories for adoptable ideas and patterns.", "triggers": [ "repo value analysis", @@ -2212,7 +2265,7 @@ "agent": "research-coordinator-engineer" }, "research-pipeline": { - "file": "skills/research/research-pipeline/SKILL.md", + "file": "skills/research-pipeline/SKILL.md", "description": "Formal 5-phase research pipeline with artifact saving and source quality gates: SCOPE, GATHER, SYNTHESIZE, VALIDATE, DELIVER.", "triggers": [ "research-pipeline", @@ -2232,7 +2285,7 @@ "agent": "research-coordinator-engineer" }, "roast": { - "file": "skills/research/roast/SKILL.md", + "file": "skills/roast/SKILL.md", "description": "Constructive critique via 5 HackerNews personas with claim validation.", "triggers": [ "roast code", @@ -2250,7 +2303,7 @@ ] }, "security-threat-model": { - "file": "skills/research/security-threat-model/SKILL.md", + "file": "skills/security-threat-model/SKILL.md", "description": "Security threat model: scan toolkit for attack surface, supply-chain risks.", "triggers": [ "threat model", @@ -2272,7 +2325,7 @@ "agent": "python-general-engineer" }, "integration-checker": { - "file": "skills/review/integration-checker/SKILL.md", + "file": "skills/integration-checker/SKILL.md", "description": "Verify cross-component wiring and data flow.", "triggers": [ "integration check", @@ -2291,7 +2344,7 @@ ] }, "parallel-code-review": { - "file": "skills/review/parallel-code-review/SKILL.md", + "file": "skills/parallel-code-review/SKILL.md", "description": "Parallel 3-reviewer code review: Security, Business-Logic, Architecture.", "triggers": [ "parallel review", @@ -2308,7 +2361,7 @@ ] }, "systematic-code-review": { - "file": "skills/review/systematic-code-review/SKILL.md", + "file": "skills/systematic-code-review/SKILL.md", "description": "4-phase code review: UNDERSTAND, VERIFY, ASSESS risks, DOCUMENT findings.", "triggers": [ "review code", @@ -2327,7 +2380,7 @@ ] }, "e2e-testing": { - "file": "skills/testing/e2e-testing/SKILL.md", + "file": "skills/e2e-testing/SKILL.md", "description": "Playwright-based end-to-end testing workflow.", "triggers": [ "playwright", @@ -2348,7 +2401,7 @@ "agent": "testing-automation-engineer" }, "test-driven-development": { - "file": "skills/testing/test-driven-development/SKILL.md", + "file": "skills/test-driven-development/SKILL.md", "description": "RED-GREEN-REFACTOR cycle with strict phase gates for TDD.", "triggers": [ "TDD", @@ -2368,7 +2421,7 @@ ] }, "testing-agents-with-subagents": { - "file": "skills/testing/testing-agents-with-subagents/SKILL.md", + "file": "skills/testing-agents-with-subagents/SKILL.md", "description": "Test agents via subagents: known inputs, captured outputs, verification.", "triggers": [ "test agents", @@ -2385,7 +2438,7 @@ ] }, "testing-preferred-patterns": { - "file": "skills/testing/testing-preferred-patterns/SKILL.md", + "file": "skills/testing-preferred-patterns/SKILL.md", "description": "Identify and fix testing mistakes: flaky, brittle, over-mocked tests.", "triggers": [ "flaky test", @@ -2408,7 +2461,7 @@ ] }, "vitest-runner": { - "file": "skills/testing/vitest-runner/SKILL.md", + "file": "skills/vitest-runner/SKILL.md", "description": "Run Vitest tests and parse results into actionable output.", "triggers": [ "run vitest", @@ -2442,6 +2495,86 @@ "feature-lifecycle", "verification-before-completion" ] + }, + "anti-ai-editor": { + "file": "skills/anti-ai-editor/SKILL.md", + "description": "Remove AI-sounding patterns from content.", + "triggers": [ + "remove AI patterns", + "de-AI content", + "make it sound human", + "remove AI voice", + "humanize text" + ], + "category": "content-creation", + "user_invocable": false, + "pairs_with": [ + "voice-writer", + "voice-validator", + "joy-check" + ] + }, + "create-voice": { + "file": "skills/create-voice/SKILL.md", + "description": "Create voice profiles from writing samples.", + "triggers": [ + "create voice", + "new voice", + "build voice", + "voice from samples", + "calibrate voice", + "voice profile from scratch", + "make a voice" + ], + "category": "content", + "force_route": true, + "user_invocable": false, + "pairs_with": [ + "voice-validator", + "voice-writer" + ] + }, + "interactive-essay": { + "file": "skills/interactive-essay/SKILL.md", + "description": "Transform markdown articles into scrollytelling interactive React SPAs.", + "triggers": [ + "interactive essay", + "scrollytelling", + "interactive article", + "scroll-based article", + "make this interactive" + ], + "category": "content-creation", + "user_invocable": true, + "version": "1.0.0", + "pairs_with": [ + "voice-vexjoy", + "anti-ai-editor", + "joy-check" + ] + }, + "joy-check": { + "file": "skills/joy-check/SKILL.md", + "description": "Validate content framing on joy-grievance spectrum.", + "triggers": [ + "joy check", + "check framing", + "tone check", + "negative framing", + "joy validation", + "too negative", + "reframe positively", + "positive framing check", + "instruction framing" + ], + "category": "content", + "user_invocable": false, + "pairs_with": [ + "voice-writer", + "anti-ai-editor", + "voice-validator", + "skill-creator" + ] } } } diff --git a/skills/meta/do_b/SKILL.md b/skills/meta/do_b/SKILL.md new file mode 100644 index 00000000..36f6cae2 --- /dev/null +++ b/skills/meta/do_b/SKILL.md @@ -0,0 +1,257 @@ +--- +name: do_b +description: "Haiku-first router. /do behavioral rules + script-backed data tables." +user-invocable: true +argument-hint: "" +allowed-tools: + - Read + - Bash + - Agent +routing: + triggers: + - "do_b" + category: meta-tooling +--- + +# /do_b - Smart Router + +/do_b is a **ROUTER**, not a worker. Classify requests, select the right agent + skill, dispatch. All execution goes to specialized agents. + +**Main thread:** (1) Classify, (2) Select agent+skill, (3) Dispatch, (4) Evaluate, (5) Route again if needed, (6) Report. + +If you find yourself reading source code, writing code, or doing analysis — pause and route to an agent. + +--- + +## The Completeness Standard + +Do the whole thing. Do it right. Do it with tests. Do it with documentation. + +- The answer is the finished product, not a plan. Plans organize execution, not replace it. +- Ship the permanent solve when it's within reach. Deliver the real fix, not a workaround. +- If an agent returns partial work, route a follow-up to finish it. +- Search before building. Test before shipping. +- The router decomposes complexity into agent-sized work. Use it. + +**The standard:** the result should make the user think "that's done" not "that's a start." Inject this into agent prompts for all Simple+ work. + +Model confidence in handling a task directly is a signal to route, not to proceed. Direct handling skips domain knowledge, methodology, and reference files that exist on disk. + +--- + +## Output Discipline + +Every sentence the router prints is a sentence the user reads before seeing results. + +Cut every word you can. Active voice. Short words. Everyday English. These rules apply equally to agent prompts — every word costs tokens on the agent's context window. + +**User sees:** phase banners, routing decision banner, brief post-agent summary (what changed, not how). + +**Internal only:** Haiku routing responses, classification reasoning, enhancement stacking details (unless Verbose Routing ON). + +--- + +## Instructions + +### Phase Banners (MANDATORY) + +Every phase MUST display a banner BEFORE executing: `/do_b > Phase N: PHASE_NAME — description...` + +After Phase 2, display the full routing decision banner (`===` block). Phase banners tell the user *where they are*; the routing banner tells them *what was decided*. Both required. + +--- + +### Phase 1: CLASSIFY + +**Goal**: Determine request complexity and whether routing is needed. + +Read and follow the repository CLAUDE.md before making any routing decision. + +```bash +python3 scripts/do-classify.py --request "{user_request}" --json-compact +``` + +The script returns: `complexity`, `is_creation`, `is_interview`, `is_parallel`, `parallel_type`. + +**Trivial = reading a file the user named by exact path.** Everything else is Simple+ and MUST route. When uncertain, classify UP. + +**Delegation is mandatory.** Classify Simple+ tasks to agents without reasoning about whether you could handle them directly. Anything beyond reading a user-named file MUST route. + +**Progressive Depth**: For ambiguous complexity, start shallow and let the agent escalate. See `references/progressive-depth.md`. + +**Common misclassifications** (NOT Trivial — route them): evaluating repos/URLs, opinions/recommendations, git operations, codebase questions, retro lookups, comparing approaches. + +**Maximize skill/agent/pipeline usage.** If a skill exists for the task, USE IT. + +**Parallel dispatch is mandatory.** When `is_parallel` is true, dispatch all independent items in parallel in a single message. + +**Creation requests**: If `is_creation` is true, Phase 4 Step 0 is MANDATORY (write ADR before dispatching). + +**Gate**: Complexity classified. If creation detected, output `[CREATION REQUEST DETECTED]`. Display banner. Trivial: handle directly. Simple+: proceed to Phase 2. + +--- + +### Phase 2: ROUTE + +**Goal**: Select the correct agent + skill. FORCE-labeled entries are preferred when intent matches semantically (not keyword-based). + +**Step 0: Deterministic pre-routing** + +```bash +python3 scripts/pre-route.py --request "{user_request}" --json-compact +``` + +If `matched: true` and `confidence: high`: use returned agent+skill directly, skip Step 1. + +**Step 1: Dispatch Haiku routing agent** (if pre-router didn't match) + +```bash +MANIFEST=$(python3 scripts/routing-manifest.py) +PROMPT=$(python3 scripts/do-build-prompt.py --mode haiku-prompt --request "{user_request}" --manifest "$MANIFEST") +``` + +Dispatch Agent with `model: "haiku"` and the prompt output. Use agent+skill from JSON response. Haiku response is internal only — the user sees the routing banner, not the raw JSON. + +**Critical**: "push", "commit", "create PR", "merge" MUST route through skills with quality gates (lint, tests, CI verification). + +Route to the simplest agent+skill that satisfies the request. Route all code modifications to domain agents. + +**Step 2: Apply skill override** (task verb overrides default skill) + +When the request verb implies a specific methodology, override the agent's default skill. Common overrides: "review" → systematic-code-review, "debug" → systematic-debugging, "refactor" → systematic-refactoring, "TDD" → test-driven-development. + +**Step 3: Display routing decision** (MANDATORY — FIRST visible output, before any work) + +```bash +python3 scripts/do-build-prompt.py --mode routing-banner --agent "{agent}" --skill "{skill}" --reasoning "{reasoning}" +``` + +Print the banner output. For Trivial: show `Classification: Trivial - [reason]` and `Handling directly (no agent/skill needed)`. + +**Step 4: Record routing decision** (Simple+ only): + +```bash +python3 ~/.claude/scripts/learning-db.py record \ + routing "{selected_agent}:{selected_skill}" \ + "routing-decision: agent={selected_agent} skill={selected_skill} request: {first_200_chars} complexity: {complexity}" \ + --category effectiveness --tags "{thinking_tag}" +``` + +**Gate**: Agent+skill selected. Banner displayed. Decision recorded. Proceed to Phase 3. + +--- + +### Phase 3: ENHANCE + +**Goal**: Stack additional skills and select model tier based on request signals. + +```bash +python3 scripts/do-enhance.py --request "{user_request}" --complexity "{complexity}" \ + --agent "{agent}" --skill "{skill}" --json-compact +``` + +The script returns: `enhancements`, `anti_rationalization`, `thinking_directive`, `thinking_tag`, `worker_model`, `local_only`, `model_dispatch`. + +Apply all returned values. For `local_only: true`, prepend to agent prompt: "**LOCAL-ONLY MODE.** All work stays on disk. Read-only git is fine." + +If `is_interview` was true from Phase 1, load `planning` skill (depth-first-interview.md) as primary. + +Before stacking, check `pairs_with` in `skills/INDEX.json`. Prefer listed pairs. + +**Gate**: Enhancements applied. Proceed to Phase 4. + +--- + +### Phase 4: EXECUTE + +**Goal**: Invoke the selected agent + skill and deliver results. + +**Step 0: Execute Creation Protocol** (creation requests ONLY) + +If creation signal + Simple+: (1) Write ADR at `adr/{kebab-case-name}.md`, (2) Register via `adr-query.py register`, (3) Proceed to plan. + +**Step 1: Create plan** (Simple+) + +Create `task_plan.md` before execution. Skip for Trivial only. + +**Step 1b: Apply quality-loop pipeline** (Medium+ code modifications) + +For code modifications at Medium/Complex, load `references/quality-loop.md` as the outer orchestration wrapper. Does NOT apply when: Trivial/Simple (use `quick`), review-only/research/debugging/content creation, or user requests simpler flow. + +**Step 2: Build and dispatch agent** + +```bash +python3 scripts/do-build-prompt.py --mode agent-prompt \ + --agent "{agent}" --skill "{skill}" --complexity "{complexity}" \ + --request "{user_request}" --thinking "{thinking_directive}" \ + --enhancements "{enhancements}" +``` + +Dispatch Agent with `model: "{worker_model}"` and the prompt output. For `model_dispatch: "parallel-haiku"`, spawn Haiku readers per data source → Opus synthesizer. + +Route to agents that create feature branches. Include "commit your changes on the branch" in agent prompts for file modifications. + +For `isolation: "worktree"` agents, inject `worktree-agent` skill rules. + +Non-org repos: up to 3 iterations of `/pr-review` → fix before PR creation. Org-gated repos (via `scripts/classify-repo.py`): require user confirmation before EACH git action. + +**Step 3: Handle multi-part requests** + +Detect: "first...then", "and also", numbered lists, semicolons. Sequential dependencies execute in order. Independent items launch multiple agents in single message. Max parallelism: 10. + +**Step 4: Auto-Pipeline Fallback** (no match AND complexity >= Simple) + +When uncertain: **ROUTE ANYWAY** with verification-before-completion as safety net. + +**Gate**: Agent invoked, results delivered. Proceed to Phase 5. + +--- + +### Phase 5: LEARN + +**Goal**: Capture session insights to `learning.db`. + +**Routing outcome** (MANDATORY for Simple+): + +```bash +# On success: +python3 ~/.claude/scripts/learning-db.py record-routing-outcome \ + "{selected_agent}:{selected_skill}" --success + +# On failure: +python3 ~/.claude/scripts/learning-db.py record-routing-outcome \ + "{selected_agent}:{selected_skill}" --failure --reason "{brief reason}" +``` + +Record every routing outcome — this feeds future routing accuracy. + +**Immediate graduation for review findings** (MANDATORY): Issue found + fixed in same PR → (1) Record scoped, (2) Boost to 1.0, (3) Embed into pattern references, (4) Graduate, (5) Stage in same PR. + +**Gate**: Record at least one routing outcome for Simple+ tasks. + +--- + +## Error Handling + +### Error: "No Agent Matches Request" +Solution: Check INDEX files for near-matches. Route to closest agent with verification-before-completion. Report the gap. + +### Error: "Force-Route Conflict" +Solution: Apply most specific force-route first. Stack secondary routes as enhancements if compatible. + +### Error: "Plan Required But Not Created" +Solution: Stop execution. Create `task_plan.md`. Resume routing after plan is in place. + +--- + +## References + +### Reference Files +- `skills/do/references/progressive-depth.md`: Progressive depth escalation protocol +- `skills/do/references/quality-loop.md`: Quality loop pipeline for Medium+ code modifications +- `agents/INDEX.json`: Agent triggers, metadata, and `not_for` disambiguation +- `skills/INDEX.json`: Skill triggers, force-route flags, pairs_with, and `not_for` disambiguation +- `scripts/routing-manifest.py`: Generates compact routing manifest from INDEX files +- `scripts/do-classify.py`: Deterministic request classification +- `scripts/do-enhance.py`: Deterministic enhancement/model selection +- `scripts/do-build-prompt.py`: Prompt templates (haiku-prompt, routing-banner, agent-prompt, task-spec) From 0f870b5d5b34e7e144f156975b0a0dd88a7f47d9 Mon Sep 17 00:00:00 2001 From: notque Date: Sun, 10 May 2026 17:07:28 +0000 Subject: [PATCH 4/4] fix: ruff format test_joy_check_instruction_mode.py Parenthesized assert messages collapsed to single-line f-strings per ruff format rules (line length fits within 120 char limit). --- .../tests/test_joy_check_instruction_mode.py | 32 +++++-------------- 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/scripts/tests/test_joy_check_instruction_mode.py b/scripts/tests/test_joy_check_instruction_mode.py index 1b89c8d1..c0327d8b 100644 --- a/scripts/tests/test_joy_check_instruction_mode.py +++ b/scripts/tests/test_joy_check_instruction_mode.py @@ -74,25 +74,19 @@ def test_forbidden_caps_fails(self, tmp_path: Path) -> None: """FORBIDDEN in instruction context is flagged.""" p = _write(tmp_path, "- FORBIDDEN: Do not commit credentials.\n") violations = scan_file(p) - assert any(v.pattern == "FORBIDDEN" for v in violations), ( - f"Expected FORBIDDEN violation, got: {violations}" - ) + assert any(v.pattern == "FORBIDDEN" for v in violations), f"Expected FORBIDDEN violation, got: {violations}" def test_never_caps_fails(self, tmp_path: Path) -> None: """NEVER in instruction context is flagged.""" p = _write(tmp_path, "NEVER edit code directly.\n") violations = scan_file(p) - assert any(v.pattern == "NEVER" for v in violations), ( - f"Expected NEVER violation, got: {violations}" - ) + assert any(v.pattern == "NEVER" for v in violations), f"Expected NEVER violation, got: {violations}" def test_do_not_fails(self, tmp_path: Path) -> None: """'do NOT' (case-insensitive start) is flagged.""" p = _write(tmp_path, "do NOT use git add -A.\n") violations = scan_file(p) - assert any(v.pattern == "do NOT" for v in violations), ( - f"Expected 'do NOT' violation, got: {violations}" - ) + assert any(v.pattern == "do NOT" for v in violations), f"Expected 'do NOT' violation, got: {violations}" def test_do_not_caps_fails(self, tmp_path: Path) -> None: """'Do NOT' (capital D) is flagged.""" @@ -106,33 +100,25 @@ def test_must_not_fails(self, tmp_path: Path) -> None: """'must NOT' is flagged.""" p = _write(tmp_path, "Hooks must NOT block tools.\n") violations = scan_file(p) - assert any(v.pattern == "must NOT" for v in violations), ( - f"Expected 'must NOT' violation, got: {violations}" - ) + assert any(v.pattern == "must NOT" for v in violations), f"Expected 'must NOT' violation, got: {violations}" def test_dont_instruction_start_fails(self, tmp_path: Path) -> None: """Line starting with Don't is flagged.""" p = _write(tmp_path, "- Don't mock the database.\n") violations = scan_file(p) - assert any(v.pattern == "Don't" for v in violations), ( - f"Expected Don't violation, got: {violations}" - ) + assert any(v.pattern == "Don't" for v in violations), f"Expected Don't violation, got: {violations}" def test_avoid_heading_fails(self, tmp_path: Path) -> None: """Heading containing 'Avoid' is flagged.""" p = _write(tmp_path, "### Patterns to Avoid\n\nSome content.\n") violations = scan_file(p) - assert any(v.pattern == "Avoid" for v in violations), ( - f"Expected Avoid violation, got: {violations}" - ) + assert any(v.pattern == "Avoid" for v in violations), f"Expected Avoid violation, got: {violations}" def test_avoid_as_bullet_start_fails(self, tmp_path: Path) -> None: """Bullet starting with 'Avoid' is flagged.""" p = _write(tmp_path, "- Avoid using global state.\n") violations = scan_file(p) - assert any(v.pattern == "Avoid" for v in violations), ( - f"Expected Avoid violation for bullet, got: {violations}" - ) + assert any(v.pattern == "Avoid" for v in violations), f"Expected Avoid violation for bullet, got: {violations}" # --------------------------------------------------------------------------- @@ -187,9 +173,7 @@ def test_subordinate_never_in_positive_instruction_passes(self, tmp_path: Path) content = "Credentials stay in .env files, never in code or logs.\n" p = _write(tmp_path, content) violations = scan_file(p) - assert violations == [], ( - f"Expected no violations for subordinate lowercase 'never', got: {violations}" - ) + assert violations == [], f"Expected no violations for subordinate lowercase 'never', got: {violations}" def test_blockquote_line_passes(self, tmp_path: Path) -> None: """Lines starting with > (blockquote) are skipped."""