diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 74c224d..a1576bb 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -12,7 +12,7 @@ { "name": "compound-engineering", "description": "OpenCode-first AI-powered development tools. Includes 29 specialized agents, 26 commands, and 24 skills spanning code review, research, design, and workflow automation, with generated Copilot support and Claude Code compatibility outputs.", - "version": "4.4.0", + "version": "4.5.0", "author": { "name": "The Rabak", "email": "arielvaron@gmail.com", diff --git a/.github/skills/brainstorming/SKILL.md b/.github/skills/brainstorming/SKILL.md index 6c9d44e..719c2a5 100644 --- a/.github/skills/brainstorming/SKILL.md +++ b/.github/skills/brainstorming/SKILL.md @@ -11,7 +11,7 @@ This skill provides detailed process knowledge for effective brainstorming sessi The brainstorm produces three lynchpin artifacts that anchor all downstream phases: 1. **Problem Narrative & User Story** -- the WHY (consumed by plan, work, and review) 2. **Architectural Context Map** -- the WHERE (consumed by execution agents and reviewers) -3. **Design Decisions** -- the WHAT (consumed by plan for task decomposition) +3. **Design Decisions** -- the WHAT (consumed by plan for execution-slice decomposition) ## When to Use This Skill @@ -306,18 +306,18 @@ This prevents wasted effort on misaligned designs. The brainstorm document is the **feature-level spec and handoff contract** for downstream work. The project constitution, when present, remains the repo-wide governing artifact: **`/workflows-plan` consumes:** -- Problem narrative and user story -> structures phases around the WHY -- Architectural context -> informs task decomposition, file mapping, dependencies +- Problem narrative and user story -> structures execution slices around the WHY +- Architectural context -> informs slice decomposition, file mapping, dependencies - Success criteria -> becomes the plan's acceptance criteria foundation - Key decisions -> preserved and enriched, not re-decided **`/deepen-plan` consumes:** -- Problem narrative -> evaluates whether deepened tasks still serve the original intent +- Problem narrative -> evaluates whether deepened slices still serve the original intent - Success criteria -> grounds best-practice research in actual goals **`/workflows-work` consumes:** - Architectural context -> populates `{{ARCHITECTURAL_CONTEXT}}` for every execution agent -- User story -> orchestrator validates each task contributes to the story +- User story -> orchestrator validates each slice contributes to the story - Problem narrative -> included in scoped prompts so agents understand purpose **`/workflows-review` consumes:** diff --git a/.github/skills/deepen-plan/SKILL.md b/.github/skills/deepen-plan/SKILL.md index 847200e..9450642 100644 --- a/.github/skills/deepen-plan/SKILL.md +++ b/.github/skills/deepen-plan/SKILL.md @@ -81,7 +81,8 @@ First, read and parse the plan to extract the WHY artifacts (problem narrative, - [ ] Overview/Proposed Solution sections - [ ] Technical Approach/Architecture -- [ ] Implementation phases/steps (noting which user story aspect each phase serves) +- [ ] `execution_shape` frontmatter + `## Execution Shape` section +- [ ] Execution packets / phase wrappers (noting which user story aspect each packet serves) - [ ] Code examples and file references - [ ] Acceptance criteria - [ ] Any UI/UX components mentioned @@ -100,41 +101,60 @@ The "Serves" column ensures every deepening activity traces back to WHY we're bu ### 1.1 Validate Execution Readiness -Check if the plan has sufficiently structured execution chunks for the subagent orchestration model in /workflows-work. Plans need per-task success criteria, test commands, and file lists. Also validate that phases trace to the user story. +Check if the plan has sufficiently structured execution packets for the subagent orchestration model in `/workflows-work`. Use `references/execution-shape.md` as the source of truth. Plans need packets that are independently executable, testable, and traceable back to the user story without forcing fake verticality. -**Scan each implementation task/phase for these required fields:** +**Resolve execution shape first:** -- [ ] **Files:** List of files to create or modify -- [ ] **Depends on:** Dependencies on other tasks -- [ ] **Success criteria:** Testable checkboxes defining "done" -- [ ] **Test command:** Exact command to verify completion -- [ ] **TDD alignment:** Task-level test commands collectively satisfy the resolved unit/e2e evidence contract, or the plan records a justified exception with replacement evidence +- [ ] Read `execution_shape.mode`; if missing, default it to `vertical-slices` +- [ ] Read `execution_shape.rationale`; require it when the mode is not `vertical-slices` +- [ ] Ensure the body includes a matching `## Execution Shape` section +- [ ] If the chosen mode looks wrong for the real work, add a `### WHY Reassessment` note instead of silently changing it + +**Scan each execution packet using the required fields from `references/execution-shape.md`:** + +- [ ] **`vertical-slices`:** slice type, serves, demo scenario, scope + scope fence, files, depends on, dependency type, success criteria, test command +- [ ] **`infra-track`:** capability enabled, consumers / downstream work unlocked, scope, files, depends on, risk / rollback, validation command, success criteria +- [ ] **`fix-batch`:** problem, repro / expected outcome, files, depends on, validation command, success criteria +- [ ] **TDD alignment:** packet-level validation commands collectively satisfy the resolved unit/e2e evidence contract, or the plan records a justified exception with replacement evidence **Validate WHY tracing:** -- [ ] **Each phase has a "Serves:" line** stating which user story aspect or success criterion it delivers -- [ ] **Success criteria trace to plan-level success criteria** -- task criteria should be decomposed from the plan's success criteria, not invented independently -- [ ] **No orphan phases** -- every phase should trace to at least one success criterion. If a phase doesn't serve any success criterion, flag it: "Phase [X] doesn't trace to any success criterion. Is it necessary, or is a success criterion missing?" +- [ ] **Each packet has a purpose line** (`Serves`, `Consumers`, or equivalent) tying it to user story value or explicit downstream unlocks +- [ ] **Success criteria trace to plan-level success criteria** -- packet criteria should be decomposed from the plan's success criteria, not invented independently +- [ ] **No orphan packets** -- every packet should trace to at least one success criterion or explicit enabling outcome +- [ ] **Phase wrappers stay optional** -- if the plan uses phases or tracks, confirm they are grouping containers only and do not replace packet-level tracing -**Expected task format:** +**Expected packet format:** ```markdown -##### Task N.1: [Task Name] +##### Slice N.1: [Slice Title] +**Slice type:** tracer-bullet | expansion | hardening +**Serves:** [Which aspect of the user story / which success criterion this slice delivers] +**Demo scenario:** [Smallest end-to-end behavior this slice proves] **Files:** `path/to/file1.php`, `path/to/file2.php` -**Depends on:** Task N-1.2 (or "None") -**Success criteria:** +**Depends on:** Slice N-1.2 (or "None") +**Dependency type:** real | stub-available | parallel-safe + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What intentionally waits] +- **Scope fence:** [Boundary that keeps the slice thin] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `command to run` + +###### Evidence +- **Test command:** `command to run` ``` **Scoring:** -Count how many implementation tasks have all four fields. Report: +Count how many execution packets have the full structure. Report: ``` -Execution Readiness: X/Y tasks have complete structure (Z%) +Execution Readiness: X/Y packets have complete structure (Z%) ``` **Actions based on score:** @@ -142,50 +162,56 @@ Execution Readiness: X/Y tasks have complete structure (Z%) | Score | Action | |-------|--------| | 80-100% | Plan is execution-ready. Proceed with deepening. | -| 50-79% | Flag incomplete tasks. During deepening, add missing fields. | -| 0-49% | Plan needs significant restructuring. Add an "Execution Readiness" enhancement pass that decomposes vague phases into structured tasks with all required fields. **Note:** `/workflows-work` will refuse to execute plans that lack this structure. | +| 50-79% | Flag incomplete packets. During deepening, add missing fields. | +| 0-49% | Plan needs significant restructuring. Add an "Execution Readiness" enhancement pass that decomposes vague phases/tasks into the packet shape required by the selected mode. **Note:** `/workflows-work` will refuse to execute plans that lack a coherent execution shape unless the user explicitly approves a mode change or legacy adaptation. | -**For tasks missing structure, the deepening process should:** +**For packets missing structure, the deepening process should:** -1. Break vague phases into specific, scoped tasks -2. Identify which files each task will create or modify -3. Write concrete success criteria (not vague goals) -4. Determine the test command (look at existing test patterns in the codebase) -5. Make it explicit whether the test command contributes unit evidence, e2e evidence, or both -6. Map dependencies between tasks -7. Add a suggested commit message per task (conventional format: `feat(scope): description`) +1. Pick or confirm the execution shape that best matches the real work +2. Break vague phases or legacy tasks into specific packets for that mode +3. Identify the smallest honest outcome each packet proves or unlocks +4. Identify which files each packet will create or modify +5. Write concrete success criteria (not vague goals) +6. Determine the validation command (look at existing test patterns in the codebase) +7. Make it explicit whether the validation command contributes unit evidence, e2e evidence, or both +8. Map dependencies between packets +9. Add a suggested commit message per packet (conventional format: `feat(scope): description`) -### 1.2 Task Complexity Check +### 1.2 Execution Shape Complexity Check -Check if any tasks are too large for reliable subagent execution. Large tasks with many files or success criteria should be split. +Check if any packets are too large, too vague, or shaped incorrectly for reliable subagent execution. Cross-layer work is allowed in `vertical-slices`; the failure mode is not "touches backend and frontend" but "tries to deliver multiple outcomes or no honest outcome at all." -**For each task, check complexity:** +**For each packet, check complexity against the selected mode:** | Metric | Threshold | Action | |--------|-----------|--------| -| Files touched | > 3 files | Flag for splitting | +| Outcomes or unlocks | > 1 meaningful outcome | Flag for splitting | +| Files touched | > 6 files | Flag for review; confirm the packet is still thin | | Success criteria | > 5 criteria | Flag for splitting | -| Multiple concerns | Mixes backend + frontend | Flag for splitting | -| Vague scope | "Implement the feature" | Flag for clarification | +| Scope fence | Missing or vague | Flag for clarification | +| Shape fit | `vertical-slices` used for horizontal-only work, or `infra-track` / `fix-batch` used to hide a real feature slice | Reassess mode | +| Risk controls | `Blast radius: high` with no rollback path | Add safety fields before execution | + +**Important:** A packet that touches backend + frontend is **not automatically too large**. If the same thin slice needs a migration, service method, API handler, and tiny UI change to prove one observable behavior, keep it intact. -**If any tasks exceed thresholds:** +**If any slices exceed thresholds:** Report: ``` -Task Complexity Warning: X tasks may be too large for reliable subagent execution. +Execution Shape Warning: X packets may be too large or incorrectly shaped for reliable subagent execution. -Task 2.1: "Build user auth" -- touches 5 files, 7 success criteria - Suggestion: Split into "Create auth service" (3 files) and "Add auth middleware" (2 files) +Slice 2.1: "User can complete first login tracer bullet" -- 2 demo scenarios, 7 success criteria + Suggestion: Split into "User submits credentials and receives success state" and "User sees first authenticated dashboard shell" -Task 3.2: "Build dashboard UI" -- mixes backend API + frontend component - Suggestion: Split into "Create dashboard API endpoint" and "Build dashboard component" +Packet 3.2: "Create auth schema foundation" -- no demo scenario, horizontal-only outcome + Suggestion: Either rewrite as "User can submit credentials and persist the first auth record" or switch this track to `infra-track` if it is truly enablement-only ``` -Suggest splits that create self-contained tasks with non-overlapping file sets. Each split task should be completable by one subagent in a single session. **When splitting, ensure each new task retains its "Serves:" tracing to the user story -- a split should never orphan a task from its purpose.** +Suggest splits that create self-contained packets with clear ownership and non-overlapping file sets where possible. **When splitting, ensure each new packet retains its tracing to the user story or enabling outcome.** -**This validation ensures the plan is ready for `/workflows-work`'s subagent orchestration model**, where each task is delegated to a focused subagent with clear scope and termination criteria. +**This validation ensures the plan is ready for `/workflows-work`'s subagent orchestration model**, where each packet is delegated to a focused subagent with clear scope, proof, and termination criteria. ### 1.5 Re-fetch Source Documents (if available) @@ -607,7 +633,7 @@ Merge research findings back into the plan, adding depth without changing the or - User Story - Architectural Context - Success Criteria -- Phase "Serves:" lines +- Execution shape contract and packet tracing lines - Handoff frontmatter If research suggests changes to these, add a `### WHY Reassessment` note at the end of the plan for the user to review manually. Do not edit the originals. @@ -622,7 +648,7 @@ If research suggests changes to these, add a `### WHY Reassessment` note at the ```markdown ## [Original Section Title] -[Original content preserved -- including any "Serves:" lines] +[Original content preserved -- including any execution-shape and packet tracing lines] ### Research Insights @@ -664,7 +690,8 @@ At the top of the plan, add a summary section: - User Story: [preserved / flagged for reassessment] - Architectural Context: [preserved / expanded / flagged for reassessment] - Success Criteria: [preserved / flagged for reassessment] -- Phase tracing: [all phases still trace to user story: yes/no] +- Execution shape: [preserved / flagged for reassessment] +- Packet tracing: [all packets still trace to user story or enabling outcome: yes/no] ### TDD Contract Check - Precedence: [plan overrides local / inherit uses local / fallback default noted] @@ -707,16 +734,17 @@ Before finalizing: - [ ] Links are valid and relevant - [ ] No contradictions between sections - [ ] Enhancement summary accurately reflects changes -- [ ] Implementation tasks have execution-ready structure (files, success criteria, test commands, dependencies) -- [ ] TDD contract is explicit, precedence is documented, and unit/e2e evidence stays aligned with task test commands unless an exception says otherwise +- [ ] Execution packets have execution-ready structure for the selected mode +- [ ] TDD contract is explicit, precedence is documented, and unit/e2e evidence stays aligned with packet validation commands unless an exception says otherwise **WHY integrity:** - [ ] Problem Narrative, User Story, Success Criteria, and Architectural Context are unmodified from the original plan - [ ] Handoff frontmatter is intact and still accurate -- [ ] Every phase still has its "Serves:" tracing line -- [ ] No new phases added without a "Serves:" line connecting them to the user story +- [ ] `execution_shape` frontmatter and `## Execution Shape` still agree +- [ ] Every packet still has its tracing line +- [ ] No new packets were added without a tracing line connecting them to the user story or enabling outcome - [ ] Enhancements tagged with which success criterion they serve -- [ ] Scope-expanding recommendations flagged in "Scope Warnings" rather than silently added to phases +- [ ] Scope-expanding recommendations flagged in "Scope Warnings" rather than silently added to packets - [ ] If WHY reassessment was needed, it's in a clearly marked section at the end (not inline edits) - [ ] `tdd` frontmatter and `## TDD & Evidence Contract` still agree on precedence, effective loop, evidence, and exceptions diff --git a/.github/skills/orchestrating-swarms/SKILL.md b/.github/skills/orchestrating-swarms/SKILL.md index ba1662d..246c9ff 100644 --- a/.github/skills/orchestrating-swarms/SKILL.md +++ b/.github/skills/orchestrating-swarms/SKILL.md @@ -28,7 +28,7 @@ Use swarms when the work has real parallelism, specialist boundaries, or depende - Keep the team small. Extra workers are justified only when they remove wall-clock time or increase specialist quality. ### Task design -- Write tasks as outcomes, not vague topics. +- Write work items as outcomes, not vague topics. - Keep scopes non-overlapping unless the assignment is an explicit cross-check. - Prefer DAG-style dependencies over ad hoc sequencing. - State what evidence counts as done: files changed, tests run, findings delivered, screenshots captured, or open questions listed. @@ -42,7 +42,7 @@ Have workers report in a terse, machine-checkable shape: - `risks`: unresolved concerns ### Leader responsibilities -- Keep the canonical task list and dependency map. +- Keep the canonical slice/work-item list and dependency map. - Resolve blockers instead of letting workers stall silently. - Merge duplicate findings and remove contradictory advice. - Re-run shared verification after integrating worker output. diff --git a/.github/skills/setup/SKILL.md b/.github/skills/setup/SKILL.md index 2af65a2..64cdcae 100644 --- a/.github/skills/setup/SKILL.md +++ b/.github/skills/setup/SKILL.md @@ -211,7 +211,7 @@ options: - `tdd.evidence.unit`: `required` or `optional` - `tdd.evidence.e2e`: `required` or `optional` - `tdd.exceptions`: `[]` by default. Plans must carry any justified exceptions. -- `review_mode`: "bulk" (default), "inline", or "both" (controls per-task review in workflows:work) +- `review_mode`: "bulk" (default), "inline", or "both" (controls per-slice review in workflows:work) Write `compound-engineering.local.md`: diff --git a/.github/skills/workflows-architecture/references/execution-agent-prompt.md b/.github/skills/workflows-architecture/references/execution-agent-prompt.md index 78cd269..9047914 100644 --- a/.github/skills/workflows-architecture/references/execution-agent-prompt.md +++ b/.github/skills/workflows-architecture/references/execution-agent-prompt.md @@ -1,5 +1,10 @@ --- -{} +model: claude-sonnet-4.6 +platforms: + copilot: + model: gpt-5.3-codex + opencode: + model: openrouter/moonshotai/kimi-k2.6 --- # Execution Agent Prompt Template @@ -10,24 +15,32 @@ This template is used by the `workflows:work` orchestrator to construct prompts --- -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. -## Your Task +## Your Unit -**Task:** {{TASK_NAME}} +**Unit:** {{UNIT_TITLE}} -{{TASK_DESCRIPTION}} +{{UNIT_DESCRIPTION}} + +**Unit kind:** {{UNIT_KIND}} + +**Outcome scenario:** {{OUTCOME_SCENARIO}} + +**Scope:** {{UNIT_SCOPE}} + +**Scope fence:** {{UNIT_SCOPE_FENCE}} **Files to create/modify:** {{FILE_LIST}} **Success criteria:** {{SUCCESS_CRITERIA}} -**Test command:** `{{TEST_COMMAND}}` +**Validation command:** `{{VALIDATION_COMMAND}}` **Dependencies completed:** {{COMPLETED_DEPENDENCIES}} -## Why This Task Exists +## Why This Unit Exists {{WHY_CONTEXT}} @@ -35,7 +48,11 @@ You are an execution agent implementing a specific task from a work plan. Follow {{ARCHITECTURAL_CONTEXT}} -## Learnings from Previous Tasks +## Architecture Handoff + +{{ARCHITECTURE_HANDOFF}} + +## Learnings from Previous Units {{LEARNINGS_BRIEF}} @@ -51,7 +68,7 @@ You are an execution agent implementing a specific task from a work plan. Follow ## Phase 1: Understand Before Building -Before writing ANY code, review the task requirements AND the "Why This Task Exists" section carefully. +Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. **If anything is unclear, ambiguous, or could be interpreted multiple ways:** - List your questions explicitly @@ -60,7 +77,7 @@ Before writing ANY code, review the task requirements AND the "Why This Task Exi **If everything is clear:** - State your interpretation of the requirements in 2-3 sentences -- State how this task serves the overall user story (from the WHY context) +- State how this unit serves the overall user story (from the WHY context) - List any assumptions you are making (even obvious ones) - Proceed to Phase 2 @@ -84,7 +101,7 @@ If tests fail after implementation: 1. Read the error message carefully -- understand what failed and why 2. Analyze whether the failure is in your implementation or in the test 3. Fix the issue -4. Re-run the test command +4. Re-run the validation command 5. Repeat up to 3 total attempts 6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly @@ -98,7 +115,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che - [ ] Did I miss any requirements? **Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Task Exists" section describes? +- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? - [ ] Would a user achieve the stated outcome with this code? - [ ] Did I build anything that doesn't trace back to the success criteria or user story? @@ -117,7 +134,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che **Testing:** - [ ] Do tests verify actual behavior (not just mock behavior)? - [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? +- [ ] Did I run the validation command and confirm it passes? **Evidence:** - [ ] Can I show actual test output (not just "tests pass")? @@ -133,13 +150,13 @@ Return a structured execution report in exactly this format: Use `commands/workflows/references/tdd-evidence-contract.md` as the single source for the `### TDD Evidence` block. `Red` and `Green` prove behavior coverage. `Post-Refactor Green` proves cleanup safety after refactor. Each `Evidence` line should quote the decisive failing or passing signal in one sentence, not a narrative. ```markdown -## Execution Report: [Task Name] +## Execution Report: [Unit Title] ### Interpretation [Your 2-3 sentence interpretation of what was asked] ### Purpose Served -[Which user story aspect / success criterion this task delivers, from the WHY context] +[Which user story aspect / success criterion this unit delivers, from the WHY context] ### Assumptions Made - [List each assumption, even if obvious] @@ -154,7 +171,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [Insert the exact Ralph evidence block from `commands/workflows/references/tdd-evidence-contract.md`. Preserve the `Red`, `Green`, and `Post-Refactor Green` headings with their command/result/evidence fields.] ### Test Results -- Command: `[test command]` +- Command: `[validation command]` - Result: PASS/FAIL - Attempts: [n] - Output: @@ -171,7 +188,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [If no problems: "None"] ### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] +- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future units] [If none: "None"] @@ -188,9 +205,9 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc _This section is included only when the resolved TDD contract explicitly allows standard implementation._ 1. Read referenced files and understand existing patterns -2. Implement the task following project conventions +2. Implement the unit following project conventions 3. Write tests matching the success criteria -4. Run the test command: `{{TEST_COMMAND}}` +4. Run the validation command: `{{VALIDATION_COMMAND}}` 5. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) --- diff --git a/.github/skills/workflows-architecture/references/execution-shape.md b/.github/skills/workflows-architecture/references/execution-shape.md new file mode 100644 index 0000000..4ad4d31 --- /dev/null +++ b/.github/skills/workflows-architecture/references/execution-shape.md @@ -0,0 +1,93 @@ +# Execution Shape Contract + +Use this reference when planning, deepening, or executing work so the workflow can preserve judgment without re-explaining the same rules in every prompt. + +## Default + +Choose `vertical-slices` unless that would create fake end-to-end work just to satisfy the template. + +## Allowed modes + +### `vertical-slices` (default) + +Use when a thin, testable, end-to-end behavior exists. + +Required packet: +- `Slice type` +- `Serves` +- `Demo scenario` +- `Scope` +- `Scope fence` +- `Files` +- `Depends on` +- `Dependency type` +- `Success criteria` +- `Test command` + +### `infra-track` + +Use for enabling or foundational work where no honest user-visible tracer bullet exists yet. + +Required packet: +- `Capability enabled` +- `Consumers / downstream work unlocked` +- `Scope` +- `Files` +- `Depends on` +- `Risk / rollback` +- `Validation command` +- `Success criteria` + +### `fix-batch` + +Use for a series of small mostly independent fixes where forcing one vertical slice would blur the real work. + +Required packet: +- `Problem` +- `Repro / expected outcome` +- `Files` +- `Depends on` +- `Validation command` +- `Success criteria` + +## Selection rules + +1. Default to `vertical-slices`. +2. Switch to `infra-track` only when the honest near-term value is enabling capability, not a user-visible behavior. +3. Switch to `fix-batch` only when the work is truly a batch of small fixes, not a feature being split too late. +4. Never force `vertical-slices` if that would create fake verticality. +5. If the mode is not the default, record why in `execution_shape.rationale` and in `## Execution Shape`. + +## Plan shape + +Add this frontmatter block to every plan: + +```yaml +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" # required when mode is not vertical-slices +``` + +Add this body section: + +```markdown +## Execution Shape +- **Mode:** [vertical-slices | infra-track | fix-batch] +- **Why:** [1-2 sentences] +``` + +Then use the packet section that matches the chosen mode: +- `## Execution Slices` +- `## Infrastructure Work Packets` +- `## Fix Batch Items` + +## Deepening rules + +- Validate the plan against the chosen mode, not against `vertical-slices` unconditionally. +- You may recommend switching modes if the selected one is clearly wrong, but record that as a `WHY Reassessment` note instead of silently rewriting intent. + +## Execution rules + +- `/workflows:work` must execute the units defined by the chosen mode. +- Do not coerce `infra-track` or `fix-batch` plans into slices unless the user explicitly approves a mode change. +- Session tracking may stay generic (`unit`, `work status`) even when the selected mode is `vertical-slices`. diff --git a/.github/skills/workflows-architecture/references/spec-review-prompt.md b/.github/skills/workflows-architecture/references/spec-review-prompt.md index d6bcaa5..34bcbc1 100644 --- a/.github/skills/workflows-architecture/references/spec-review-prompt.md +++ b/.github/skills/workflows-architecture/references/spec-review-prompt.md @@ -14,15 +14,15 @@ You are a spec compliance reviewer. Your job is to verify whether an implementat ## What Was Requested -{{TASK_REQUIREMENTS}} +{{UNIT_REQUIREMENTS}} ## Success Criteria {{SUCCESS_CRITERIA}} -## Task Purpose +## Unit Purpose -{{TASK_SERVES}} +{{UNIT_PURPOSE}} ## What Implementer Claims They Built diff --git a/.github/skills/workflows-brainstorm/references/execution-agent-prompt.md b/.github/skills/workflows-brainstorm/references/execution-agent-prompt.md index 78cd269..9047914 100644 --- a/.github/skills/workflows-brainstorm/references/execution-agent-prompt.md +++ b/.github/skills/workflows-brainstorm/references/execution-agent-prompt.md @@ -1,5 +1,10 @@ --- -{} +model: claude-sonnet-4.6 +platforms: + copilot: + model: gpt-5.3-codex + opencode: + model: openrouter/moonshotai/kimi-k2.6 --- # Execution Agent Prompt Template @@ -10,24 +15,32 @@ This template is used by the `workflows:work` orchestrator to construct prompts --- -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. -## Your Task +## Your Unit -**Task:** {{TASK_NAME}} +**Unit:** {{UNIT_TITLE}} -{{TASK_DESCRIPTION}} +{{UNIT_DESCRIPTION}} + +**Unit kind:** {{UNIT_KIND}} + +**Outcome scenario:** {{OUTCOME_SCENARIO}} + +**Scope:** {{UNIT_SCOPE}} + +**Scope fence:** {{UNIT_SCOPE_FENCE}} **Files to create/modify:** {{FILE_LIST}} **Success criteria:** {{SUCCESS_CRITERIA}} -**Test command:** `{{TEST_COMMAND}}` +**Validation command:** `{{VALIDATION_COMMAND}}` **Dependencies completed:** {{COMPLETED_DEPENDENCIES}} -## Why This Task Exists +## Why This Unit Exists {{WHY_CONTEXT}} @@ -35,7 +48,11 @@ You are an execution agent implementing a specific task from a work plan. Follow {{ARCHITECTURAL_CONTEXT}} -## Learnings from Previous Tasks +## Architecture Handoff + +{{ARCHITECTURE_HANDOFF}} + +## Learnings from Previous Units {{LEARNINGS_BRIEF}} @@ -51,7 +68,7 @@ You are an execution agent implementing a specific task from a work plan. Follow ## Phase 1: Understand Before Building -Before writing ANY code, review the task requirements AND the "Why This Task Exists" section carefully. +Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. **If anything is unclear, ambiguous, or could be interpreted multiple ways:** - List your questions explicitly @@ -60,7 +77,7 @@ Before writing ANY code, review the task requirements AND the "Why This Task Exi **If everything is clear:** - State your interpretation of the requirements in 2-3 sentences -- State how this task serves the overall user story (from the WHY context) +- State how this unit serves the overall user story (from the WHY context) - List any assumptions you are making (even obvious ones) - Proceed to Phase 2 @@ -84,7 +101,7 @@ If tests fail after implementation: 1. Read the error message carefully -- understand what failed and why 2. Analyze whether the failure is in your implementation or in the test 3. Fix the issue -4. Re-run the test command +4. Re-run the validation command 5. Repeat up to 3 total attempts 6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly @@ -98,7 +115,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che - [ ] Did I miss any requirements? **Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Task Exists" section describes? +- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? - [ ] Would a user achieve the stated outcome with this code? - [ ] Did I build anything that doesn't trace back to the success criteria or user story? @@ -117,7 +134,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che **Testing:** - [ ] Do tests verify actual behavior (not just mock behavior)? - [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? +- [ ] Did I run the validation command and confirm it passes? **Evidence:** - [ ] Can I show actual test output (not just "tests pass")? @@ -133,13 +150,13 @@ Return a structured execution report in exactly this format: Use `commands/workflows/references/tdd-evidence-contract.md` as the single source for the `### TDD Evidence` block. `Red` and `Green` prove behavior coverage. `Post-Refactor Green` proves cleanup safety after refactor. Each `Evidence` line should quote the decisive failing or passing signal in one sentence, not a narrative. ```markdown -## Execution Report: [Task Name] +## Execution Report: [Unit Title] ### Interpretation [Your 2-3 sentence interpretation of what was asked] ### Purpose Served -[Which user story aspect / success criterion this task delivers, from the WHY context] +[Which user story aspect / success criterion this unit delivers, from the WHY context] ### Assumptions Made - [List each assumption, even if obvious] @@ -154,7 +171,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [Insert the exact Ralph evidence block from `commands/workflows/references/tdd-evidence-contract.md`. Preserve the `Red`, `Green`, and `Post-Refactor Green` headings with their command/result/evidence fields.] ### Test Results -- Command: `[test command]` +- Command: `[validation command]` - Result: PASS/FAIL - Attempts: [n] - Output: @@ -171,7 +188,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [If no problems: "None"] ### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] +- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future units] [If none: "None"] @@ -188,9 +205,9 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc _This section is included only when the resolved TDD contract explicitly allows standard implementation._ 1. Read referenced files and understand existing patterns -2. Implement the task following project conventions +2. Implement the unit following project conventions 3. Write tests matching the success criteria -4. Run the test command: `{{TEST_COMMAND}}` +4. Run the validation command: `{{VALIDATION_COMMAND}}` 5. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) --- diff --git a/.github/skills/workflows-brainstorm/references/execution-shape.md b/.github/skills/workflows-brainstorm/references/execution-shape.md new file mode 100644 index 0000000..4ad4d31 --- /dev/null +++ b/.github/skills/workflows-brainstorm/references/execution-shape.md @@ -0,0 +1,93 @@ +# Execution Shape Contract + +Use this reference when planning, deepening, or executing work so the workflow can preserve judgment without re-explaining the same rules in every prompt. + +## Default + +Choose `vertical-slices` unless that would create fake end-to-end work just to satisfy the template. + +## Allowed modes + +### `vertical-slices` (default) + +Use when a thin, testable, end-to-end behavior exists. + +Required packet: +- `Slice type` +- `Serves` +- `Demo scenario` +- `Scope` +- `Scope fence` +- `Files` +- `Depends on` +- `Dependency type` +- `Success criteria` +- `Test command` + +### `infra-track` + +Use for enabling or foundational work where no honest user-visible tracer bullet exists yet. + +Required packet: +- `Capability enabled` +- `Consumers / downstream work unlocked` +- `Scope` +- `Files` +- `Depends on` +- `Risk / rollback` +- `Validation command` +- `Success criteria` + +### `fix-batch` + +Use for a series of small mostly independent fixes where forcing one vertical slice would blur the real work. + +Required packet: +- `Problem` +- `Repro / expected outcome` +- `Files` +- `Depends on` +- `Validation command` +- `Success criteria` + +## Selection rules + +1. Default to `vertical-slices`. +2. Switch to `infra-track` only when the honest near-term value is enabling capability, not a user-visible behavior. +3. Switch to `fix-batch` only when the work is truly a batch of small fixes, not a feature being split too late. +4. Never force `vertical-slices` if that would create fake verticality. +5. If the mode is not the default, record why in `execution_shape.rationale` and in `## Execution Shape`. + +## Plan shape + +Add this frontmatter block to every plan: + +```yaml +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" # required when mode is not vertical-slices +``` + +Add this body section: + +```markdown +## Execution Shape +- **Mode:** [vertical-slices | infra-track | fix-batch] +- **Why:** [1-2 sentences] +``` + +Then use the packet section that matches the chosen mode: +- `## Execution Slices` +- `## Infrastructure Work Packets` +- `## Fix Batch Items` + +## Deepening rules + +- Validate the plan against the chosen mode, not against `vertical-slices` unconditionally. +- You may recommend switching modes if the selected one is clearly wrong, but record that as a `WHY Reassessment` note instead of silently rewriting intent. + +## Execution rules + +- `/workflows:work` must execute the units defined by the chosen mode. +- Do not coerce `infra-track` or `fix-batch` plans into slices unless the user explicitly approves a mode change. +- Session tracking may stay generic (`unit`, `work status`) even when the selected mode is `vertical-slices`. diff --git a/.github/skills/workflows-brainstorm/references/spec-review-prompt.md b/.github/skills/workflows-brainstorm/references/spec-review-prompt.md index d6bcaa5..34bcbc1 100644 --- a/.github/skills/workflows-brainstorm/references/spec-review-prompt.md +++ b/.github/skills/workflows-brainstorm/references/spec-review-prompt.md @@ -14,15 +14,15 @@ You are a spec compliance reviewer. Your job is to verify whether an implementat ## What Was Requested -{{TASK_REQUIREMENTS}} +{{UNIT_REQUIREMENTS}} ## Success Criteria {{SUCCESS_CRITERIA}} -## Task Purpose +## Unit Purpose -{{TASK_SERVES}} +{{UNIT_PURPOSE}} ## What Implementer Claims They Built diff --git a/.github/skills/workflows-compound-refresh/references/execution-agent-prompt.md b/.github/skills/workflows-compound-refresh/references/execution-agent-prompt.md index 78cd269..9047914 100644 --- a/.github/skills/workflows-compound-refresh/references/execution-agent-prompt.md +++ b/.github/skills/workflows-compound-refresh/references/execution-agent-prompt.md @@ -1,5 +1,10 @@ --- -{} +model: claude-sonnet-4.6 +platforms: + copilot: + model: gpt-5.3-codex + opencode: + model: openrouter/moonshotai/kimi-k2.6 --- # Execution Agent Prompt Template @@ -10,24 +15,32 @@ This template is used by the `workflows:work` orchestrator to construct prompts --- -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. -## Your Task +## Your Unit -**Task:** {{TASK_NAME}} +**Unit:** {{UNIT_TITLE}} -{{TASK_DESCRIPTION}} +{{UNIT_DESCRIPTION}} + +**Unit kind:** {{UNIT_KIND}} + +**Outcome scenario:** {{OUTCOME_SCENARIO}} + +**Scope:** {{UNIT_SCOPE}} + +**Scope fence:** {{UNIT_SCOPE_FENCE}} **Files to create/modify:** {{FILE_LIST}} **Success criteria:** {{SUCCESS_CRITERIA}} -**Test command:** `{{TEST_COMMAND}}` +**Validation command:** `{{VALIDATION_COMMAND}}` **Dependencies completed:** {{COMPLETED_DEPENDENCIES}} -## Why This Task Exists +## Why This Unit Exists {{WHY_CONTEXT}} @@ -35,7 +48,11 @@ You are an execution agent implementing a specific task from a work plan. Follow {{ARCHITECTURAL_CONTEXT}} -## Learnings from Previous Tasks +## Architecture Handoff + +{{ARCHITECTURE_HANDOFF}} + +## Learnings from Previous Units {{LEARNINGS_BRIEF}} @@ -51,7 +68,7 @@ You are an execution agent implementing a specific task from a work plan. Follow ## Phase 1: Understand Before Building -Before writing ANY code, review the task requirements AND the "Why This Task Exists" section carefully. +Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. **If anything is unclear, ambiguous, or could be interpreted multiple ways:** - List your questions explicitly @@ -60,7 +77,7 @@ Before writing ANY code, review the task requirements AND the "Why This Task Exi **If everything is clear:** - State your interpretation of the requirements in 2-3 sentences -- State how this task serves the overall user story (from the WHY context) +- State how this unit serves the overall user story (from the WHY context) - List any assumptions you are making (even obvious ones) - Proceed to Phase 2 @@ -84,7 +101,7 @@ If tests fail after implementation: 1. Read the error message carefully -- understand what failed and why 2. Analyze whether the failure is in your implementation or in the test 3. Fix the issue -4. Re-run the test command +4. Re-run the validation command 5. Repeat up to 3 total attempts 6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly @@ -98,7 +115,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che - [ ] Did I miss any requirements? **Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Task Exists" section describes? +- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? - [ ] Would a user achieve the stated outcome with this code? - [ ] Did I build anything that doesn't trace back to the success criteria or user story? @@ -117,7 +134,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che **Testing:** - [ ] Do tests verify actual behavior (not just mock behavior)? - [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? +- [ ] Did I run the validation command and confirm it passes? **Evidence:** - [ ] Can I show actual test output (not just "tests pass")? @@ -133,13 +150,13 @@ Return a structured execution report in exactly this format: Use `commands/workflows/references/tdd-evidence-contract.md` as the single source for the `### TDD Evidence` block. `Red` and `Green` prove behavior coverage. `Post-Refactor Green` proves cleanup safety after refactor. Each `Evidence` line should quote the decisive failing or passing signal in one sentence, not a narrative. ```markdown -## Execution Report: [Task Name] +## Execution Report: [Unit Title] ### Interpretation [Your 2-3 sentence interpretation of what was asked] ### Purpose Served -[Which user story aspect / success criterion this task delivers, from the WHY context] +[Which user story aspect / success criterion this unit delivers, from the WHY context] ### Assumptions Made - [List each assumption, even if obvious] @@ -154,7 +171,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [Insert the exact Ralph evidence block from `commands/workflows/references/tdd-evidence-contract.md`. Preserve the `Red`, `Green`, and `Post-Refactor Green` headings with their command/result/evidence fields.] ### Test Results -- Command: `[test command]` +- Command: `[validation command]` - Result: PASS/FAIL - Attempts: [n] - Output: @@ -171,7 +188,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [If no problems: "None"] ### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] +- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future units] [If none: "None"] @@ -188,9 +205,9 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc _This section is included only when the resolved TDD contract explicitly allows standard implementation._ 1. Read referenced files and understand existing patterns -2. Implement the task following project conventions +2. Implement the unit following project conventions 3. Write tests matching the success criteria -4. Run the test command: `{{TEST_COMMAND}}` +4. Run the validation command: `{{VALIDATION_COMMAND}}` 5. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) --- diff --git a/.github/skills/workflows-compound-refresh/references/execution-shape.md b/.github/skills/workflows-compound-refresh/references/execution-shape.md new file mode 100644 index 0000000..4ad4d31 --- /dev/null +++ b/.github/skills/workflows-compound-refresh/references/execution-shape.md @@ -0,0 +1,93 @@ +# Execution Shape Contract + +Use this reference when planning, deepening, or executing work so the workflow can preserve judgment without re-explaining the same rules in every prompt. + +## Default + +Choose `vertical-slices` unless that would create fake end-to-end work just to satisfy the template. + +## Allowed modes + +### `vertical-slices` (default) + +Use when a thin, testable, end-to-end behavior exists. + +Required packet: +- `Slice type` +- `Serves` +- `Demo scenario` +- `Scope` +- `Scope fence` +- `Files` +- `Depends on` +- `Dependency type` +- `Success criteria` +- `Test command` + +### `infra-track` + +Use for enabling or foundational work where no honest user-visible tracer bullet exists yet. + +Required packet: +- `Capability enabled` +- `Consumers / downstream work unlocked` +- `Scope` +- `Files` +- `Depends on` +- `Risk / rollback` +- `Validation command` +- `Success criteria` + +### `fix-batch` + +Use for a series of small mostly independent fixes where forcing one vertical slice would blur the real work. + +Required packet: +- `Problem` +- `Repro / expected outcome` +- `Files` +- `Depends on` +- `Validation command` +- `Success criteria` + +## Selection rules + +1. Default to `vertical-slices`. +2. Switch to `infra-track` only when the honest near-term value is enabling capability, not a user-visible behavior. +3. Switch to `fix-batch` only when the work is truly a batch of small fixes, not a feature being split too late. +4. Never force `vertical-slices` if that would create fake verticality. +5. If the mode is not the default, record why in `execution_shape.rationale` and in `## Execution Shape`. + +## Plan shape + +Add this frontmatter block to every plan: + +```yaml +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" # required when mode is not vertical-slices +``` + +Add this body section: + +```markdown +## Execution Shape +- **Mode:** [vertical-slices | infra-track | fix-batch] +- **Why:** [1-2 sentences] +``` + +Then use the packet section that matches the chosen mode: +- `## Execution Slices` +- `## Infrastructure Work Packets` +- `## Fix Batch Items` + +## Deepening rules + +- Validate the plan against the chosen mode, not against `vertical-slices` unconditionally. +- You may recommend switching modes if the selected one is clearly wrong, but record that as a `WHY Reassessment` note instead of silently rewriting intent. + +## Execution rules + +- `/workflows:work` must execute the units defined by the chosen mode. +- Do not coerce `infra-track` or `fix-batch` plans into slices unless the user explicitly approves a mode change. +- Session tracking may stay generic (`unit`, `work status`) even when the selected mode is `vertical-slices`. diff --git a/.github/skills/workflows-compound-refresh/references/spec-review-prompt.md b/.github/skills/workflows-compound-refresh/references/spec-review-prompt.md index d6bcaa5..34bcbc1 100644 --- a/.github/skills/workflows-compound-refresh/references/spec-review-prompt.md +++ b/.github/skills/workflows-compound-refresh/references/spec-review-prompt.md @@ -14,15 +14,15 @@ You are a spec compliance reviewer. Your job is to verify whether an implementat ## What Was Requested -{{TASK_REQUIREMENTS}} +{{UNIT_REQUIREMENTS}} ## Success Criteria {{SUCCESS_CRITERIA}} -## Task Purpose +## Unit Purpose -{{TASK_SERVES}} +{{UNIT_PURPOSE}} ## What Implementer Claims They Built diff --git a/.github/skills/workflows-compound/references/execution-agent-prompt.md b/.github/skills/workflows-compound/references/execution-agent-prompt.md index 78cd269..9047914 100644 --- a/.github/skills/workflows-compound/references/execution-agent-prompt.md +++ b/.github/skills/workflows-compound/references/execution-agent-prompt.md @@ -1,5 +1,10 @@ --- -{} +model: claude-sonnet-4.6 +platforms: + copilot: + model: gpt-5.3-codex + opencode: + model: openrouter/moonshotai/kimi-k2.6 --- # Execution Agent Prompt Template @@ -10,24 +15,32 @@ This template is used by the `workflows:work` orchestrator to construct prompts --- -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. -## Your Task +## Your Unit -**Task:** {{TASK_NAME}} +**Unit:** {{UNIT_TITLE}} -{{TASK_DESCRIPTION}} +{{UNIT_DESCRIPTION}} + +**Unit kind:** {{UNIT_KIND}} + +**Outcome scenario:** {{OUTCOME_SCENARIO}} + +**Scope:** {{UNIT_SCOPE}} + +**Scope fence:** {{UNIT_SCOPE_FENCE}} **Files to create/modify:** {{FILE_LIST}} **Success criteria:** {{SUCCESS_CRITERIA}} -**Test command:** `{{TEST_COMMAND}}` +**Validation command:** `{{VALIDATION_COMMAND}}` **Dependencies completed:** {{COMPLETED_DEPENDENCIES}} -## Why This Task Exists +## Why This Unit Exists {{WHY_CONTEXT}} @@ -35,7 +48,11 @@ You are an execution agent implementing a specific task from a work plan. Follow {{ARCHITECTURAL_CONTEXT}} -## Learnings from Previous Tasks +## Architecture Handoff + +{{ARCHITECTURE_HANDOFF}} + +## Learnings from Previous Units {{LEARNINGS_BRIEF}} @@ -51,7 +68,7 @@ You are an execution agent implementing a specific task from a work plan. Follow ## Phase 1: Understand Before Building -Before writing ANY code, review the task requirements AND the "Why This Task Exists" section carefully. +Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. **If anything is unclear, ambiguous, or could be interpreted multiple ways:** - List your questions explicitly @@ -60,7 +77,7 @@ Before writing ANY code, review the task requirements AND the "Why This Task Exi **If everything is clear:** - State your interpretation of the requirements in 2-3 sentences -- State how this task serves the overall user story (from the WHY context) +- State how this unit serves the overall user story (from the WHY context) - List any assumptions you are making (even obvious ones) - Proceed to Phase 2 @@ -84,7 +101,7 @@ If tests fail after implementation: 1. Read the error message carefully -- understand what failed and why 2. Analyze whether the failure is in your implementation or in the test 3. Fix the issue -4. Re-run the test command +4. Re-run the validation command 5. Repeat up to 3 total attempts 6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly @@ -98,7 +115,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che - [ ] Did I miss any requirements? **Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Task Exists" section describes? +- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? - [ ] Would a user achieve the stated outcome with this code? - [ ] Did I build anything that doesn't trace back to the success criteria or user story? @@ -117,7 +134,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che **Testing:** - [ ] Do tests verify actual behavior (not just mock behavior)? - [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? +- [ ] Did I run the validation command and confirm it passes? **Evidence:** - [ ] Can I show actual test output (not just "tests pass")? @@ -133,13 +150,13 @@ Return a structured execution report in exactly this format: Use `commands/workflows/references/tdd-evidence-contract.md` as the single source for the `### TDD Evidence` block. `Red` and `Green` prove behavior coverage. `Post-Refactor Green` proves cleanup safety after refactor. Each `Evidence` line should quote the decisive failing or passing signal in one sentence, not a narrative. ```markdown -## Execution Report: [Task Name] +## Execution Report: [Unit Title] ### Interpretation [Your 2-3 sentence interpretation of what was asked] ### Purpose Served -[Which user story aspect / success criterion this task delivers, from the WHY context] +[Which user story aspect / success criterion this unit delivers, from the WHY context] ### Assumptions Made - [List each assumption, even if obvious] @@ -154,7 +171,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [Insert the exact Ralph evidence block from `commands/workflows/references/tdd-evidence-contract.md`. Preserve the `Red`, `Green`, and `Post-Refactor Green` headings with their command/result/evidence fields.] ### Test Results -- Command: `[test command]` +- Command: `[validation command]` - Result: PASS/FAIL - Attempts: [n] - Output: @@ -171,7 +188,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [If no problems: "None"] ### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] +- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future units] [If none: "None"] @@ -188,9 +205,9 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc _This section is included only when the resolved TDD contract explicitly allows standard implementation._ 1. Read referenced files and understand existing patterns -2. Implement the task following project conventions +2. Implement the unit following project conventions 3. Write tests matching the success criteria -4. Run the test command: `{{TEST_COMMAND}}` +4. Run the validation command: `{{VALIDATION_COMMAND}}` 5. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) --- diff --git a/.github/skills/workflows-compound/references/execution-shape.md b/.github/skills/workflows-compound/references/execution-shape.md new file mode 100644 index 0000000..4ad4d31 --- /dev/null +++ b/.github/skills/workflows-compound/references/execution-shape.md @@ -0,0 +1,93 @@ +# Execution Shape Contract + +Use this reference when planning, deepening, or executing work so the workflow can preserve judgment without re-explaining the same rules in every prompt. + +## Default + +Choose `vertical-slices` unless that would create fake end-to-end work just to satisfy the template. + +## Allowed modes + +### `vertical-slices` (default) + +Use when a thin, testable, end-to-end behavior exists. + +Required packet: +- `Slice type` +- `Serves` +- `Demo scenario` +- `Scope` +- `Scope fence` +- `Files` +- `Depends on` +- `Dependency type` +- `Success criteria` +- `Test command` + +### `infra-track` + +Use for enabling or foundational work where no honest user-visible tracer bullet exists yet. + +Required packet: +- `Capability enabled` +- `Consumers / downstream work unlocked` +- `Scope` +- `Files` +- `Depends on` +- `Risk / rollback` +- `Validation command` +- `Success criteria` + +### `fix-batch` + +Use for a series of small mostly independent fixes where forcing one vertical slice would blur the real work. + +Required packet: +- `Problem` +- `Repro / expected outcome` +- `Files` +- `Depends on` +- `Validation command` +- `Success criteria` + +## Selection rules + +1. Default to `vertical-slices`. +2. Switch to `infra-track` only when the honest near-term value is enabling capability, not a user-visible behavior. +3. Switch to `fix-batch` only when the work is truly a batch of small fixes, not a feature being split too late. +4. Never force `vertical-slices` if that would create fake verticality. +5. If the mode is not the default, record why in `execution_shape.rationale` and in `## Execution Shape`. + +## Plan shape + +Add this frontmatter block to every plan: + +```yaml +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" # required when mode is not vertical-slices +``` + +Add this body section: + +```markdown +## Execution Shape +- **Mode:** [vertical-slices | infra-track | fix-batch] +- **Why:** [1-2 sentences] +``` + +Then use the packet section that matches the chosen mode: +- `## Execution Slices` +- `## Infrastructure Work Packets` +- `## Fix Batch Items` + +## Deepening rules + +- Validate the plan against the chosen mode, not against `vertical-slices` unconditionally. +- You may recommend switching modes if the selected one is clearly wrong, but record that as a `WHY Reassessment` note instead of silently rewriting intent. + +## Execution rules + +- `/workflows:work` must execute the units defined by the chosen mode. +- Do not coerce `infra-track` or `fix-batch` plans into slices unless the user explicitly approves a mode change. +- Session tracking may stay generic (`unit`, `work status`) even when the selected mode is `vertical-slices`. diff --git a/.github/skills/workflows-compound/references/spec-review-prompt.md b/.github/skills/workflows-compound/references/spec-review-prompt.md index d6bcaa5..34bcbc1 100644 --- a/.github/skills/workflows-compound/references/spec-review-prompt.md +++ b/.github/skills/workflows-compound/references/spec-review-prompt.md @@ -14,15 +14,15 @@ You are a spec compliance reviewer. Your job is to verify whether an implementat ## What Was Requested -{{TASK_REQUIREMENTS}} +{{UNIT_REQUIREMENTS}} ## Success Criteria {{SUCCESS_CRITERIA}} -## Task Purpose +## Unit Purpose -{{TASK_SERVES}} +{{UNIT_PURPOSE}} ## What Implementer Claims They Built diff --git a/.github/skills/workflows-constitution/references/execution-agent-prompt.md b/.github/skills/workflows-constitution/references/execution-agent-prompt.md index 78cd269..9047914 100644 --- a/.github/skills/workflows-constitution/references/execution-agent-prompt.md +++ b/.github/skills/workflows-constitution/references/execution-agent-prompt.md @@ -1,5 +1,10 @@ --- -{} +model: claude-sonnet-4.6 +platforms: + copilot: + model: gpt-5.3-codex + opencode: + model: openrouter/moonshotai/kimi-k2.6 --- # Execution Agent Prompt Template @@ -10,24 +15,32 @@ This template is used by the `workflows:work` orchestrator to construct prompts --- -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. -## Your Task +## Your Unit -**Task:** {{TASK_NAME}} +**Unit:** {{UNIT_TITLE}} -{{TASK_DESCRIPTION}} +{{UNIT_DESCRIPTION}} + +**Unit kind:** {{UNIT_KIND}} + +**Outcome scenario:** {{OUTCOME_SCENARIO}} + +**Scope:** {{UNIT_SCOPE}} + +**Scope fence:** {{UNIT_SCOPE_FENCE}} **Files to create/modify:** {{FILE_LIST}} **Success criteria:** {{SUCCESS_CRITERIA}} -**Test command:** `{{TEST_COMMAND}}` +**Validation command:** `{{VALIDATION_COMMAND}}` **Dependencies completed:** {{COMPLETED_DEPENDENCIES}} -## Why This Task Exists +## Why This Unit Exists {{WHY_CONTEXT}} @@ -35,7 +48,11 @@ You are an execution agent implementing a specific task from a work plan. Follow {{ARCHITECTURAL_CONTEXT}} -## Learnings from Previous Tasks +## Architecture Handoff + +{{ARCHITECTURE_HANDOFF}} + +## Learnings from Previous Units {{LEARNINGS_BRIEF}} @@ -51,7 +68,7 @@ You are an execution agent implementing a specific task from a work plan. Follow ## Phase 1: Understand Before Building -Before writing ANY code, review the task requirements AND the "Why This Task Exists" section carefully. +Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. **If anything is unclear, ambiguous, or could be interpreted multiple ways:** - List your questions explicitly @@ -60,7 +77,7 @@ Before writing ANY code, review the task requirements AND the "Why This Task Exi **If everything is clear:** - State your interpretation of the requirements in 2-3 sentences -- State how this task serves the overall user story (from the WHY context) +- State how this unit serves the overall user story (from the WHY context) - List any assumptions you are making (even obvious ones) - Proceed to Phase 2 @@ -84,7 +101,7 @@ If tests fail after implementation: 1. Read the error message carefully -- understand what failed and why 2. Analyze whether the failure is in your implementation or in the test 3. Fix the issue -4. Re-run the test command +4. Re-run the validation command 5. Repeat up to 3 total attempts 6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly @@ -98,7 +115,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che - [ ] Did I miss any requirements? **Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Task Exists" section describes? +- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? - [ ] Would a user achieve the stated outcome with this code? - [ ] Did I build anything that doesn't trace back to the success criteria or user story? @@ -117,7 +134,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che **Testing:** - [ ] Do tests verify actual behavior (not just mock behavior)? - [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? +- [ ] Did I run the validation command and confirm it passes? **Evidence:** - [ ] Can I show actual test output (not just "tests pass")? @@ -133,13 +150,13 @@ Return a structured execution report in exactly this format: Use `commands/workflows/references/tdd-evidence-contract.md` as the single source for the `### TDD Evidence` block. `Red` and `Green` prove behavior coverage. `Post-Refactor Green` proves cleanup safety after refactor. Each `Evidence` line should quote the decisive failing or passing signal in one sentence, not a narrative. ```markdown -## Execution Report: [Task Name] +## Execution Report: [Unit Title] ### Interpretation [Your 2-3 sentence interpretation of what was asked] ### Purpose Served -[Which user story aspect / success criterion this task delivers, from the WHY context] +[Which user story aspect / success criterion this unit delivers, from the WHY context] ### Assumptions Made - [List each assumption, even if obvious] @@ -154,7 +171,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [Insert the exact Ralph evidence block from `commands/workflows/references/tdd-evidence-contract.md`. Preserve the `Red`, `Green`, and `Post-Refactor Green` headings with their command/result/evidence fields.] ### Test Results -- Command: `[test command]` +- Command: `[validation command]` - Result: PASS/FAIL - Attempts: [n] - Output: @@ -171,7 +188,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [If no problems: "None"] ### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] +- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future units] [If none: "None"] @@ -188,9 +205,9 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc _This section is included only when the resolved TDD contract explicitly allows standard implementation._ 1. Read referenced files and understand existing patterns -2. Implement the task following project conventions +2. Implement the unit following project conventions 3. Write tests matching the success criteria -4. Run the test command: `{{TEST_COMMAND}}` +4. Run the validation command: `{{VALIDATION_COMMAND}}` 5. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) --- diff --git a/.github/skills/workflows-constitution/references/execution-shape.md b/.github/skills/workflows-constitution/references/execution-shape.md new file mode 100644 index 0000000..4ad4d31 --- /dev/null +++ b/.github/skills/workflows-constitution/references/execution-shape.md @@ -0,0 +1,93 @@ +# Execution Shape Contract + +Use this reference when planning, deepening, or executing work so the workflow can preserve judgment without re-explaining the same rules in every prompt. + +## Default + +Choose `vertical-slices` unless that would create fake end-to-end work just to satisfy the template. + +## Allowed modes + +### `vertical-slices` (default) + +Use when a thin, testable, end-to-end behavior exists. + +Required packet: +- `Slice type` +- `Serves` +- `Demo scenario` +- `Scope` +- `Scope fence` +- `Files` +- `Depends on` +- `Dependency type` +- `Success criteria` +- `Test command` + +### `infra-track` + +Use for enabling or foundational work where no honest user-visible tracer bullet exists yet. + +Required packet: +- `Capability enabled` +- `Consumers / downstream work unlocked` +- `Scope` +- `Files` +- `Depends on` +- `Risk / rollback` +- `Validation command` +- `Success criteria` + +### `fix-batch` + +Use for a series of small mostly independent fixes where forcing one vertical slice would blur the real work. + +Required packet: +- `Problem` +- `Repro / expected outcome` +- `Files` +- `Depends on` +- `Validation command` +- `Success criteria` + +## Selection rules + +1. Default to `vertical-slices`. +2. Switch to `infra-track` only when the honest near-term value is enabling capability, not a user-visible behavior. +3. Switch to `fix-batch` only when the work is truly a batch of small fixes, not a feature being split too late. +4. Never force `vertical-slices` if that would create fake verticality. +5. If the mode is not the default, record why in `execution_shape.rationale` and in `## Execution Shape`. + +## Plan shape + +Add this frontmatter block to every plan: + +```yaml +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" # required when mode is not vertical-slices +``` + +Add this body section: + +```markdown +## Execution Shape +- **Mode:** [vertical-slices | infra-track | fix-batch] +- **Why:** [1-2 sentences] +``` + +Then use the packet section that matches the chosen mode: +- `## Execution Slices` +- `## Infrastructure Work Packets` +- `## Fix Batch Items` + +## Deepening rules + +- Validate the plan against the chosen mode, not against `vertical-slices` unconditionally. +- You may recommend switching modes if the selected one is clearly wrong, but record that as a `WHY Reassessment` note instead of silently rewriting intent. + +## Execution rules + +- `/workflows:work` must execute the units defined by the chosen mode. +- Do not coerce `infra-track` or `fix-batch` plans into slices unless the user explicitly approves a mode change. +- Session tracking may stay generic (`unit`, `work status`) even when the selected mode is `vertical-slices`. diff --git a/.github/skills/workflows-constitution/references/spec-review-prompt.md b/.github/skills/workflows-constitution/references/spec-review-prompt.md index d6bcaa5..34bcbc1 100644 --- a/.github/skills/workflows-constitution/references/spec-review-prompt.md +++ b/.github/skills/workflows-constitution/references/spec-review-prompt.md @@ -14,15 +14,15 @@ You are a spec compliance reviewer. Your job is to verify whether an implementat ## What Was Requested -{{TASK_REQUIREMENTS}} +{{UNIT_REQUIREMENTS}} ## Success Criteria {{SUCCESS_CRITERIA}} -## Task Purpose +## Unit Purpose -{{TASK_SERVES}} +{{UNIT_PURPOSE}} ## What Implementer Claims They Built diff --git a/.github/skills/workflows-ideate/references/execution-agent-prompt.md b/.github/skills/workflows-ideate/references/execution-agent-prompt.md index 78cd269..9047914 100644 --- a/.github/skills/workflows-ideate/references/execution-agent-prompt.md +++ b/.github/skills/workflows-ideate/references/execution-agent-prompt.md @@ -1,5 +1,10 @@ --- -{} +model: claude-sonnet-4.6 +platforms: + copilot: + model: gpt-5.3-codex + opencode: + model: openrouter/moonshotai/kimi-k2.6 --- # Execution Agent Prompt Template @@ -10,24 +15,32 @@ This template is used by the `workflows:work` orchestrator to construct prompts --- -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. -## Your Task +## Your Unit -**Task:** {{TASK_NAME}} +**Unit:** {{UNIT_TITLE}} -{{TASK_DESCRIPTION}} +{{UNIT_DESCRIPTION}} + +**Unit kind:** {{UNIT_KIND}} + +**Outcome scenario:** {{OUTCOME_SCENARIO}} + +**Scope:** {{UNIT_SCOPE}} + +**Scope fence:** {{UNIT_SCOPE_FENCE}} **Files to create/modify:** {{FILE_LIST}} **Success criteria:** {{SUCCESS_CRITERIA}} -**Test command:** `{{TEST_COMMAND}}` +**Validation command:** `{{VALIDATION_COMMAND}}` **Dependencies completed:** {{COMPLETED_DEPENDENCIES}} -## Why This Task Exists +## Why This Unit Exists {{WHY_CONTEXT}} @@ -35,7 +48,11 @@ You are an execution agent implementing a specific task from a work plan. Follow {{ARCHITECTURAL_CONTEXT}} -## Learnings from Previous Tasks +## Architecture Handoff + +{{ARCHITECTURE_HANDOFF}} + +## Learnings from Previous Units {{LEARNINGS_BRIEF}} @@ -51,7 +68,7 @@ You are an execution agent implementing a specific task from a work plan. Follow ## Phase 1: Understand Before Building -Before writing ANY code, review the task requirements AND the "Why This Task Exists" section carefully. +Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. **If anything is unclear, ambiguous, or could be interpreted multiple ways:** - List your questions explicitly @@ -60,7 +77,7 @@ Before writing ANY code, review the task requirements AND the "Why This Task Exi **If everything is clear:** - State your interpretation of the requirements in 2-3 sentences -- State how this task serves the overall user story (from the WHY context) +- State how this unit serves the overall user story (from the WHY context) - List any assumptions you are making (even obvious ones) - Proceed to Phase 2 @@ -84,7 +101,7 @@ If tests fail after implementation: 1. Read the error message carefully -- understand what failed and why 2. Analyze whether the failure is in your implementation or in the test 3. Fix the issue -4. Re-run the test command +4. Re-run the validation command 5. Repeat up to 3 total attempts 6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly @@ -98,7 +115,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che - [ ] Did I miss any requirements? **Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Task Exists" section describes? +- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? - [ ] Would a user achieve the stated outcome with this code? - [ ] Did I build anything that doesn't trace back to the success criteria or user story? @@ -117,7 +134,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che **Testing:** - [ ] Do tests verify actual behavior (not just mock behavior)? - [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? +- [ ] Did I run the validation command and confirm it passes? **Evidence:** - [ ] Can I show actual test output (not just "tests pass")? @@ -133,13 +150,13 @@ Return a structured execution report in exactly this format: Use `commands/workflows/references/tdd-evidence-contract.md` as the single source for the `### TDD Evidence` block. `Red` and `Green` prove behavior coverage. `Post-Refactor Green` proves cleanup safety after refactor. Each `Evidence` line should quote the decisive failing or passing signal in one sentence, not a narrative. ```markdown -## Execution Report: [Task Name] +## Execution Report: [Unit Title] ### Interpretation [Your 2-3 sentence interpretation of what was asked] ### Purpose Served -[Which user story aspect / success criterion this task delivers, from the WHY context] +[Which user story aspect / success criterion this unit delivers, from the WHY context] ### Assumptions Made - [List each assumption, even if obvious] @@ -154,7 +171,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [Insert the exact Ralph evidence block from `commands/workflows/references/tdd-evidence-contract.md`. Preserve the `Red`, `Green`, and `Post-Refactor Green` headings with their command/result/evidence fields.] ### Test Results -- Command: `[test command]` +- Command: `[validation command]` - Result: PASS/FAIL - Attempts: [n] - Output: @@ -171,7 +188,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [If no problems: "None"] ### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] +- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future units] [If none: "None"] @@ -188,9 +205,9 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc _This section is included only when the resolved TDD contract explicitly allows standard implementation._ 1. Read referenced files and understand existing patterns -2. Implement the task following project conventions +2. Implement the unit following project conventions 3. Write tests matching the success criteria -4. Run the test command: `{{TEST_COMMAND}}` +4. Run the validation command: `{{VALIDATION_COMMAND}}` 5. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) --- diff --git a/.github/skills/workflows-ideate/references/execution-shape.md b/.github/skills/workflows-ideate/references/execution-shape.md new file mode 100644 index 0000000..4ad4d31 --- /dev/null +++ b/.github/skills/workflows-ideate/references/execution-shape.md @@ -0,0 +1,93 @@ +# Execution Shape Contract + +Use this reference when planning, deepening, or executing work so the workflow can preserve judgment without re-explaining the same rules in every prompt. + +## Default + +Choose `vertical-slices` unless that would create fake end-to-end work just to satisfy the template. + +## Allowed modes + +### `vertical-slices` (default) + +Use when a thin, testable, end-to-end behavior exists. + +Required packet: +- `Slice type` +- `Serves` +- `Demo scenario` +- `Scope` +- `Scope fence` +- `Files` +- `Depends on` +- `Dependency type` +- `Success criteria` +- `Test command` + +### `infra-track` + +Use for enabling or foundational work where no honest user-visible tracer bullet exists yet. + +Required packet: +- `Capability enabled` +- `Consumers / downstream work unlocked` +- `Scope` +- `Files` +- `Depends on` +- `Risk / rollback` +- `Validation command` +- `Success criteria` + +### `fix-batch` + +Use for a series of small mostly independent fixes where forcing one vertical slice would blur the real work. + +Required packet: +- `Problem` +- `Repro / expected outcome` +- `Files` +- `Depends on` +- `Validation command` +- `Success criteria` + +## Selection rules + +1. Default to `vertical-slices`. +2. Switch to `infra-track` only when the honest near-term value is enabling capability, not a user-visible behavior. +3. Switch to `fix-batch` only when the work is truly a batch of small fixes, not a feature being split too late. +4. Never force `vertical-slices` if that would create fake verticality. +5. If the mode is not the default, record why in `execution_shape.rationale` and in `## Execution Shape`. + +## Plan shape + +Add this frontmatter block to every plan: + +```yaml +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" # required when mode is not vertical-slices +``` + +Add this body section: + +```markdown +## Execution Shape +- **Mode:** [vertical-slices | infra-track | fix-batch] +- **Why:** [1-2 sentences] +``` + +Then use the packet section that matches the chosen mode: +- `## Execution Slices` +- `## Infrastructure Work Packets` +- `## Fix Batch Items` + +## Deepening rules + +- Validate the plan against the chosen mode, not against `vertical-slices` unconditionally. +- You may recommend switching modes if the selected one is clearly wrong, but record that as a `WHY Reassessment` note instead of silently rewriting intent. + +## Execution rules + +- `/workflows:work` must execute the units defined by the chosen mode. +- Do not coerce `infra-track` or `fix-batch` plans into slices unless the user explicitly approves a mode change. +- Session tracking may stay generic (`unit`, `work status`) even when the selected mode is `vertical-slices`. diff --git a/.github/skills/workflows-ideate/references/spec-review-prompt.md b/.github/skills/workflows-ideate/references/spec-review-prompt.md index d6bcaa5..34bcbc1 100644 --- a/.github/skills/workflows-ideate/references/spec-review-prompt.md +++ b/.github/skills/workflows-ideate/references/spec-review-prompt.md @@ -14,15 +14,15 @@ You are a spec compliance reviewer. Your job is to verify whether an implementat ## What Was Requested -{{TASK_REQUIREMENTS}} +{{UNIT_REQUIREMENTS}} ## Success Criteria {{SUCCESS_CRITERIA}} -## Task Purpose +## Unit Purpose -{{TASK_SERVES}} +{{UNIT_PURPOSE}} ## What Implementer Claims They Built diff --git a/.github/skills/workflows-plan/SKILL.md b/.github/skills/workflows-plan/SKILL.md index 93971d8..384d9af 100644 --- a/.github/skills/workflows-plan/SKILL.md +++ b/.github/skills/workflows-plan/SKILL.md @@ -14,13 +14,14 @@ description: Transform feature descriptions into structured project plans anchor Transform feature descriptions, bug reports, or improvement ideas into well-structured, execution-ready plans that: 1. **Anchor to WHY** -- every plan traces back to a user story and problem narrative -2. **Map WHERE** -- architectural context grounds task decomposition in the system's structure +2. **Map WHERE** -- architectural context grounds slice decomposition in the system's structure 3. **Define DONE** -- success criteria tied to user outcomes, not just technical checkboxes 4. **Honor project guardrails** -- constitution principles, baselines, and approval rules are made explicit 5. **Make TDD explicit** -- the plan declares the Ralph/default loop, required unit + e2e evidence, and any justified exceptions -6. **Enable architecture-first execution** -- `/workflows-architecture` turns the plan into a dedicated architecture artifact before `/deepen-plan`, `/workflows-work`, and `/workflows-review` harden or execute it +6. **Choose the right execution shape** -- vertical slices are the default, but infra tracks and fix batches are valid when they fit the real work better +7. **Enable architecture-first execution** -- `/workflows-architecture` turns the plan into a dedicated architecture artifact before `/deepen-plan`, `/workflows-work`, and `/workflows-review` harden or execute it -Plans consume the project constitution from `/workflows-constitution` when available, plus lynchpin artifacts from `/workflows-brainstorm` when available, or construct feature context fresh when running standalone. Either way, the plan document carries forward the WHY, WHERE, DONE, GUARDRAIL, and TDD contract that all downstream phases depend on. After the plan is written, the next explicit step is `/workflows-architecture`, not direct deepening. +Plans consume the project constitution from `/workflows-constitution` when available, plus lynchpin artifacts from `/workflows-brainstorm` when available, or construct feature context fresh when running standalone. Either way, the plan document carries forward the WHY, WHERE, DONE, GUARDRAIL, TDD, and **execution shape** contract that all downstream phases depend on. After the plan is written, the next explicit step is `/workflows-architecture`, not direct deepening. ## Feature Description @@ -72,6 +73,15 @@ Every plan must then write its own `tdd:` frontmatter block plus a `## TDD & Evi - **Exception rule:** Any deviation from the resolved default loop or evidence requirements must be explicit and justified in `tdd.exceptions` and in the plan body. - **Shared source of truth:** Reuse `references/tdd-evidence-contract.md` for contract resolution, the `## TDD & Evidence Contract` section shape, Ralph evidence semantics, and exception handling. +#### Execution Shape Baseline (Runs Before Path A/B/C) + +Use `references/execution-shape.md` as the single source for choosing and documenting the execution shape. + +- **Default mode:** `vertical-slices` +- **Allowed overrides:** `infra-track`, `fix-batch` +- **Override rule:** Any non-default mode must include a short rationale in frontmatter and in the plan body +- **Anti-coercion rule:** Do not force work into slices if that would create fake end-to-end structure + #### Path A: Spec/Plan File Provided **Check if arguments contain a plan or spec file:** @@ -80,7 +90,7 @@ If the feature description (`#$ARGUMENTS`) is or contains a path to a `.md` file 1. Read the file 2. Announce: "Found existing plan/spec: `[file path]`. Using as foundation." -3. Extract: title, problem statement, proposed approach, acceptance criteria, implementation phases, and any existing tasks +3. Extract: title, problem statement, proposed approach, acceptance criteria, execution shape (if any), and any existing execution units 4. **Check for brainstorm reference** -- look for a `brainstorm_ref` field in frontmatter, or search `docs/brainstorms/` for a matching topic. If found, read and extract lynchpin artifacts (see Path B). 5. **Extract or construct WHY artifacts from the spec:** - If the spec has a Problem Narrative / User Story / Architectural Context -- use them directly @@ -92,7 +102,7 @@ If the feature description (`#$ARGUMENTS`) is or contains a path to a `.md` file 6. **Skip free-form idea refinement** -- the spec defines WHAT to build 7. Proceed to Step 0.5 to gather any additional project inputs, then to research -In Step 2 (Issue Planning), **build upon the existing plan structure** -- preserve its sections, fill gaps, add execution-readiness fields (Files, Depends on, Success criteria, Test command) to any tasks that lack them, and enrich with research findings. Do NOT discard or rewrite sections that are already well-defined. +In Step 2 (Issue Planning), **build upon the existing plan structure** -- preserve its sections, fill gaps, add the execution-shape contract and execution-readiness fields to any legacy execution units that lack them, and enrich with research findings. Do NOT discard or rewrite sections that are already well-defined. #### Path B: Brainstorm Document Found @@ -115,11 +125,11 @@ ls -la docs/brainstorms/*.md 2>/dev/null | head -10 3. Announce: "Found brainstorm from [date]: [topic]. Consuming lynchpin artifacts." 4. **Extract and surface all lynchpin sections:** - **Problem Narrative** -- the synthesized WHY (carry forward verbatim into plan) - - **User Story** -- the north star (carry forward, plan tasks must trace to this) + - **User Story** -- the north star (carry forward, plan slices must trace to this) - **Architectural Context** -- the WHERE map (feeds `{{ARCHITECTURAL_CONTEXT}}` in work.md) - **Success Criteria** -- the DONE definition (plan acceptance criteria must include these) - **Stakeholder Impact** -- who is affected (informs stakeholder analysis) - - **Chosen Approach** and **Key Decisions** -- the WHAT (informs task decomposition) + - **Chosen Approach** and **Key Decisions** -- the WHAT (informs slice decomposition) - **Open Questions** -- must be resolved before planning proceeds 5. **If any handoff fields are `false` or sections are empty**, flag them: "Brainstorm is missing [X]. I'll construct this during planning." 6. **Resolve open questions** -- if the brainstorm has unresolved questions, use **AskUserQuestion tool** to resolve each one before proceeding @@ -332,7 +342,7 @@ Now that we have concrete codebase knowledge, refine the WHY artifacts establish Explicitly state how research findings confirm, challenge, or refine the planned approach relative to the user story. Examples: - "Codebase already has a similar pattern in `app/Services/AuthService.php` -- we should follow it for consistency, which aligns with the user story because..." - "Learnings doc warns about [gotcha] -- this affects our approach because..." -- "No existing patterns found for this -- higher risk, may need more tasks for validation." +- "No existing patterns found for this -- higher risk, may need more slices for validation." - "Constitution requires [baseline] -- the plan must make that visible in acceptance criteria or approvals." **Optional validation:** Briefly summarize the refined WHY artifacts and key research findings, then ask if anything looks off or missing before proceeding to planning. @@ -367,28 +377,34 @@ Think like a product manager -- what would make this issue clear, actionable, an - [ ] Gather supporting materials (error logs, screenshots, design mockups) - [ ] Prepare code examples or reproduction steps if applicable, name the mock filenames in the lists -**Phase Decomposition (traced to user story):** +**Execution Shape Selection (traced to user story):** + +Use `references/execution-shape.md` as the source of truth for selecting and documenting the plan's execution shape. -Each implementation phase must state **what aspect of the user story it serves**. This creates a traceable chain: -- User Story → Phase → Tasks → Files +Default to **`vertical-slices`**: +- User Story → Phase/Track (optional grouping) → Slice → Files +- Start with the thinnest tracer bullet +- Slice vertically across layers when needed +- Treat phases as wrappers, not executable units +- Forbid horizontal slice titles unless they still produce a demoable outcome -When decomposing into phases: -- **Group by user-facing capability**, not by technical layer. "User can log in" is a phase; "Create database tables" is a task within a phase. -- **Each phase should deliver a testable slice** of the user story where possible -- **Each subphase/task should be a self-contained execution unit** -- after its listed dependencies are satisfied, the executor should have the context, scope, relevant files, success criteria, and verification command needed to complete it without reconstructing intent from neighboring phases -- **Cross-reference success criteria** -- map each success criterion to the phase(s) that deliver it -- **Architectural context informs boundaries** -- use the WHERE map to identify natural phase boundaries (e.g., service boundaries, module boundaries) +Switch only when that default would be fake: +- **`infra-track`** for enabling/foundation work with no honest user-visible tracer bullet yet +- **`fix-batch`** for a batch of small mostly independent fixes + +Every plan must record: +- `execution_shape.mode` +- `execution_shape.rationale` (required when mode is not `vertical-slices`) +- A matching `## Execution Shape` section in the body **Execution Readiness:** -For plans that will be executed via `/workflows-work`, ensure each implementation task includes: -- **Scope:** What this task owns, what it changes, and any important boundary or non-goal that keeps the slice contained -- **Files:** List of files to create or modify -- **Depends on:** Which other tasks must complete first (or "None") -- **Success criteria:** Testable checkboxes that define "done" -- **Test command:** The exact command to verify the task is complete. Across the plan, these commands must satisfy the plan-level TDD evidence contract. +For plans that will be executed via `/workflows-work`, the plan must include the packet section required by the selected mode: +- `## Execution Slices` +- `## Infrastructure Work Packets` +- `## Fix Batch Items` -This structured format enables the `/workflows-work` orchestrator to delegate each task to a focused subagent with clear scope and termination criteria. Treat every task as a mini-handoff packet: if an executor had only that task plus the shared WHY/architecture context, they should still know what to touch, what not to touch, and how to prove it is done. Plans without this structure will be flagged for refinement before execution begins. +Each packet must include the fields defined in `references/execution-shape.md`. Plans without a declared shape and packet structure will be flagged for refinement before execution begins. **TDD & Evidence Contract (mandatory):** @@ -408,10 +424,10 @@ Apply the shared `Named Agent Dispatch` protocol from `references/orchestration- - Use the spec-flow-analyzer skill to: feature_description, user_story, success_criteria, research_findings The SpecFlow Analyzer should evaluate: -- Do the planned phases cover all aspects of the user story? +- Do the planned slices cover all aspects of the user story? - Are there user flows implied by the user story that the plan doesn't address? - Do edge cases threaten any of the success criteria? -- Are there gaps between what the user needs (story) and what the plan delivers (tasks)? +- Are there gaps between what the user needs (story) and what the plan delivers (slices)? **SpecFlow Analyzer Output:** @@ -422,7 +438,7 @@ The SpecFlow Analyzer should evaluate: ### 4. Choose Implementation Detail Level -**Important for `/workflows-work` compatibility:** All detail levels can be executed, but the MORE and A LOT levels produce plans with structured execution chunks (per-task scope, success criteria, test commands, and file lists) that enable the subagent orchestration model in `/workflows-work`. MINIMAL plans work but may require the orchestrator to decompose tasks further before delegating to subagents and supply any missing containment details. +**Important for `/workflows-work` compatibility:** All detail levels can be executed, but each level must still declare an execution shape and produce the matching packet section. `vertical-slices` is the default and usually the best choice. MORE and A LOT provide the richest packets (scope fence, dependencies, evidence, and safety notes) and therefore give the most predictable subagent orchestration. **All detail levels include WHY sections.** The Problem Narrative, User Story, Architectural Context, and Success Criteria are mandatory at every level -- they are the contract that downstream phases depend on. The difference between levels is how much implementation detail surrounds them. @@ -438,7 +454,7 @@ Select how comprehensive you want the issue to be, simpler is mostly better. - Basic acceptance criteria - Essential context only -**Note:** MINIMAL plans may need to be enriched with per-task success criteria before running `/workflows-work`. The orchestrator can handle this decomposition automatically, but providing structured tasks up front leads to more predictable execution. +**Note:** MINIMAL plans may still contain only a few units, but they must include at least one execution-ready packet from the selected mode before `/workflows-work` should execute them. When in doubt, choose a tracer-bullet slice. **Structure:** @@ -469,6 +485,9 @@ tdd: unit: inherit # inherit | required | optional e2e: inherit # inherit | required | optional exceptions: [] # [{ scope, reason, replacement_evidence }] +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" --- # [Issue Title] @@ -501,6 +520,11 @@ which causes [impact]. Use the exact section shape from `references/tdd-evidence-contract.md` with the resolved values for this plan. Do not omit any bullet, and make every deviation explicit with `replacement_evidence`. +## Execution Shape + +- **Mode:** vertical-slices +- **Why:** The work has a real tracer-bullet path, so default to end-to-end slices. + ## Constitution Alignment - **Relevant principles:** [Project rules that apply to this work] @@ -511,6 +535,34 @@ Use the exact section shape from `references/tdd-evidence-contract.md` with the [Brief description of what to build and how] +## Execution Slices + +Use this default section only when `execution_shape.mode` is `vertical-slices`. If the selected mode is `infra-track` or `fix-batch`, replace this section with the matching packet section from `references/execution-shape.md`. + +##### Slice 1.1: [Tracer Bullet Slice Title] +**Slice type:** tracer-bullet +**Serves:** [Which aspect of the user story / which success criterion this slice proves] +**Demo scenario:** [Describe the smallest end-to-end behavior this slice makes observable] +**Files:** `path/to/file1.php`, `path/to/file2.php` +**Depends on:** None +**Dependency type:** real | stub-available | parallel-safe + +###### What to build +[Brief description of the thin end-to-end path] + +###### Scope +- **Owns:** [What this slice is responsible for] +- **Non-goals:** [What this slice intentionally does not solve yet] +- **Scope fence:** [What would count as widening the slice too far] + +###### Acceptance criteria +- [ ] Criterion 1 +- [ ] Criterion 2 + +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What this command proves for the tracer bullet] + ## References - Related issue: #[issue_number] @@ -525,7 +577,7 @@ Use the exact section shape from `references/tdd-evidence-contract.md` with the - Detailed background and motivation - Technical considerations -- Phased implementation with story tracing +- Issue-shaped execution slices with story tracing - Success metrics - Dependencies and risks @@ -558,6 +610,9 @@ tdd: unit: inherit # inherit | required | optional e2e: inherit # inherit | required | optional exceptions: [] # [{ scope, reason, replacement_evidence }] +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" --- # [Issue Title] @@ -594,6 +649,11 @@ which causes [impact]. Use the exact section shape from `references/tdd-evidence-contract.md` with the resolved values for this plan. Do not omit any bullet, and make every deviation explicit with `replacement_evidence`. +## Execution Shape + +- **Mode:** vertical-slices +- **Why:** The default tracer-bullet decomposition matches the real behavior being delivered. + ## Constitution Alignment - **Relevant principles:** [Project rules that apply to this work] @@ -611,37 +671,88 @@ Use the exact section shape from `references/tdd-evidence-contract.md` with the - Performance implications - Security considerations -## Implementation Phases +## Execution Slices + +Use this default section only when `execution_shape.mode` is `vertical-slices`. If the selected mode is `infra-track` or `fix-batch`, replace this section with the matching packet section from `references/execution-shape.md`. -#### Phase 1: [Phase Name] -**Serves:** [Which aspect of the user story / which success criterion this phase delivers] +#### Phase 1: [Optional grouping / milestone] +**Purpose:** [Why these slices belong together or why this track exists] +**Not executable by itself:** `/workflows-work` executes the slices below, not the phase wrapper. -##### Task 1.1: [Task Name] +##### Slice 1.1: [Tracer Bullet Slice Title] +**Slice type:** tracer-bullet +**Serves:** [Which aspect of the user story / which success criterion this slice proves] +**Demo scenario:** [Describe the smallest end-to-end behavior this slice makes observable] **Files:** `path/to/file1.php`, `path/to/file2.php` **Depends on:** None -**Success criteria:** +**Dependency type:** parallel-safe + +###### What to build +[Describe the thin vertical cut through the system] + +###### Scope +- **Owns:** [What this slice is responsible for] +- **Non-goals:** [What it intentionally does not solve yet] +- **Scope fence:** [What would widen the slice too far] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -##### Task 1.2: [Task Name] +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] + +##### Slice 1.2: [Expansion Slice Title] +**Slice type:** expansion +**Serves:** [Which aspect of the user story / which success criterion this slice extends] +**Demo scenario:** [Describe the next observable behavior] **Files:** `path/to/file3.php` -**Depends on:** Task 1.1 -**Success criteria:** +**Depends on:** Slice 1.1 +**Dependency type:** real + +###### What to build +[Describe the next thin vertical cut] + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What stays out] +- **Scope fence:** [Boundary for this expansion] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -#### Phase 2: [Phase Name] -**Serves:** [Which aspect of the user story / which success criterion this phase delivers] +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] -##### Task 2.1: [Task Name] +#### Phase 2: [Optional grouping / milestone] +**Purpose:** [Why the next slices are grouped here] + +##### Slice 2.1: [Hardening or follow-on slice] +**Slice type:** expansion | hardening +**Serves:** [Which aspect of the user story / which success criterion this slice delivers] +**Demo scenario:** [Describe the observable behavior or guardrail added here] **Files:** `path/to/file4.php` -**Depends on:** Task 1.2 -**Success criteria:** +**Depends on:** Slice 1.2 +**Dependency type:** real + +###### What to build +[Describe the slice] + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What stays out] +- **Scope fence:** [Boundary for this slice] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` + +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] ## Acceptance Criteria @@ -670,7 +781,7 @@ Use the exact section shape from `references/tdd-evidence-contract.md` with the **Includes everything from MORE plus:** -- Detailed implementation plan with phases +- Detailed implementation plan with slice groups - Alternative approaches considered (traced to user story) - Extensive technical specifications - Resource requirements and timeline @@ -707,6 +818,9 @@ tdd: unit: inherit # inherit | required | optional e2e: inherit # inherit | required | optional exceptions: [] # [{ scope, reason, replacement_evidence }] +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" --- # [Issue Title] @@ -750,6 +864,11 @@ As a [persona 2], I need to [action] so that [outcome]. Use the exact section shape from `references/tdd-evidence-contract.md` with the resolved values for this plan. Do not omit any bullet, and make every deviation explicit with `replacement_evidence`. +## Execution Shape + +- **Mode:** vertical-slices +- **Why:** The plan delivers meaningful user-visible tracer bullets, so slices stay the best default. + ## Constitution Alignment - **Relevant principles:** [Project rules that apply to this work] @@ -778,65 +897,162 @@ Use the exact section shape from `references/tdd-evidence-contract.md` with the [Detailed technical design, grounded in the architectural context map] -### Implementation Phases +### Execution Slices -#### Phase 1: [Foundation] -**Serves:** [Which aspect of the user story / which success criteria this phase delivers] -**Rationale:** [Why this phase comes first -- what it enables for subsequent phases] +Use this default section only when `execution_shape.mode` is `vertical-slices`. If the selected mode is `infra-track` or `fix-batch`, replace this section with the matching packet section from `references/execution-shape.md`. -##### Task 1.1: [Task Name] +#### Phase 1: [Tracer bullet track] +**Purpose:** [Why these slices come first] +**Rationale:** [What this track proves before later widening] + +##### Slice 1.1: [Tracer Bullet Slice Title] +**Slice type:** tracer-bullet +**Serves:** [Which aspect of the user story / which success criteria this slice proves] +**Demo scenario:** [Describe the smallest end-to-end behavior] **Files:** `path/to/file1.php`, `path/to/file2.php` **Depends on:** None -**Success criteria:** +**Dependency type:** real | stub-available | parallel-safe +**Blast radius:** low | medium | high +**Shared state changes:** [None, or list] +**Rollback path:** [How to back out safely if risky] + +###### What to build +[Describe the tracer bullet as an issue-sized vertical slice] + +###### Scope +- **Owns:** [What this slice is responsible for] +- **Non-goals:** [What intentionally waits for later slices] +- **Scope fence:** [What would widen the slice too far] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -##### Task 1.2: [Task Name] +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] + +##### Slice 1.2: [Follow-on expansion slice] +**Slice type:** expansion +**Serves:** [Which aspect of the user story / which success criteria this slice extends] +**Demo scenario:** [Describe the next observable behavior] **Files:** `path/to/file3.php` -**Depends on:** Task 1.1 -**Success criteria:** +**Depends on:** Slice 1.1 +**Dependency type:** real +**Blast radius:** low | medium | high +**Shared state changes:** [None, or list] +**Rollback path:** [How to back out safely if risky] + +###### What to build +[Describe the slice] + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What stays out] +- **Scope fence:** [Boundary for this slice] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -#### Phase 2: [Core Implementation] -**Serves:** [Which aspect of the user story / which success criteria this phase delivers] -**Rationale:** [Why this phase order -- what it builds on from Phase 1] +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] + +#### Phase 2: [Core widening track] +**Purpose:** [Why these slices come after the tracer bullet] +**Rationale:** [What this track widens or hardens] -##### Task 2.1: [Task Name] +##### Slice 2.1: [Core slice] +**Slice type:** expansion | hardening +**Serves:** [Which aspect of the user story / which success criteria this slice delivers] +**Demo scenario:** [Describe the user-visible behavior] **Files:** `path/to/file4.php`, `path/to/file5.php` -**Depends on:** Task 1.2 -**Success criteria:** +**Depends on:** Slice 1.2 +**Dependency type:** real +**Blast radius:** low | medium | high +**Shared state changes:** [None, or list] +**Rollback path:** [How to back out safely if risky] + +###### What to build +[Describe the slice] + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What stays out] +- **Scope fence:** [Boundary for this slice] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -##### Task 2.2: [Task Name] +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] + +##### Slice 2.2: [Parallel-safe or stub-removal slice] +**Slice type:** expansion | hardening +**Serves:** [Which aspect of the user story / which success criteria this slice delivers] +**Demo scenario:** [Describe the observable outcome] **Files:** `path/to/file6.php` -**Depends on:** Task 2.1 -**Success criteria:** +**Depends on:** Slice 2.1 +**Dependency type:** real | stub-available | parallel-safe +**Blast radius:** low | medium | high +**Shared state changes:** [None, or list] +**Rollback path:** [How to back out safely if risky] + +###### What to build +[Describe the slice] + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What stays out] +- **Scope fence:** [Boundary for this slice] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -#### Phase 3: [Polish & Optimization] -**Serves:** [Which success criteria / quality aspects this phase delivers] +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] -##### Task 3.1: [Task Name] +#### Phase 3: [Hardening / rollout track] +**Purpose:** [Why these slices close the loop] + +##### Slice 3.1: [Hardening slice] +**Slice type:** hardening +**Serves:** [Which success criteria / quality aspects this slice delivers] +**Demo scenario:** [Describe the behavior or safety improvement] **Files:** `path/to/file7.php` -**Depends on:** Task 2.2 -**Success criteria:** +**Depends on:** Slice 2.2 +**Dependency type:** real +**Blast radius:** low | medium | high +**Shared state changes:** [None, or list] +**Rollback path:** [How to back out safely if risky] + +###### What to build +[Describe the slice] + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What stays out] +- **Scope fence:** [Boundary for this slice] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -### Phase-to-Story Traceability +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] + +### Slice-to-Story Traceability -| Success Criterion | Delivered by Phase(s) | Key Tasks | +| Success Criterion | Delivered by Slice(s) | Demo scenarios | |---|---|---| -| [Criterion 1 from Success Criteria] | Phase 1, Phase 2 | Task 1.1, Task 2.1 | -| [Criterion 2 from Success Criteria] | Phase 2 | Task 2.1, Task 2.2 | +| [Criterion 1 from Success Criteria] | Slice 1.1, Slice 2.1 | [Scenario names] | +| [Criterion 2 from Success Criteria] | Slice 2.1, Slice 2.2 | [Scenario names] | ## Alternative Approaches Considered @@ -971,10 +1187,12 @@ public function processUser(User $user): array - [ ] Architectural Context is grounded in actual repo research (not hypothetical) - [ ] Success Criteria are tied to user outcomes, not just technical checkboxes - [ ] If `docs/constitution.md` exists, Constitution Alignment names the applicable rules, approvals, and any waivers explicitly -- [ ] Every implementation phase states which user story aspect / success criterion it serves +- [ ] Every execution slice states which user story aspect / success criterion it serves - [ ] `handoff` frontmatter fields are all `true` - [ ] `tdd` frontmatter is present and the precedence rule is explicit - [ ] `## TDD & Evidence Contract` names the effective loop, required evidence, and any justified exceptions +- [ ] `execution_shape` frontmatter is present and matches the body section +- [ ] Non-default execution shapes include an explicit rationale **Content Quality:** @@ -988,14 +1206,17 @@ public function processUser(User $user): array **Execution Readiness (for `/workflows-work`):** -- [ ] Each task is a self-contained execution unit once dependencies are met -- [ ] Each task has: Files, Depends on, Success criteria, Test command -- [ ] Each task scope is explicit enough that an executor does not need to infer missing boundaries from adjacent phases -- [ ] Task success criteria are testable (not vague) -- [ ] Dependencies between tasks are explicit +- [ ] The selected execution shape matches the real work instead of forcing fake verticality +- [ ] The plan includes the packet section required by the selected mode +- [ ] Every packet includes the required fields from `references/execution-shape.md` +- [ ] If mode is `vertical-slices`, the first slice is a tracer bullet, not a broad foundation phase +- [ ] If mode is `vertical-slices`, no slice is a disguised horizontal layer bucket unless it still delivers a demoable outcome +- [ ] Packet scope is explicit enough that an executor does not need to infer missing boundaries from adjacent packets +- [ ] Packet success criteria are testable (not vague) +- [ ] Dependencies are explicit wherever ordering matters - [ ] Architectural context is specific enough to fill `{{ARCHITECTURAL_CONTEXT}}` in execution agent prompts - [ ] The plan declares unit + e2e evidence by default, or records a justified exception with replacement evidence -- [ ] Task test commands collectively satisfy the resolved TDD contract +- [ ] Validation/test commands collectively satisfy the resolved TDD contract ## Directory Setup & Gitignore @@ -1081,12 +1302,12 @@ The plan document is a structured contract consumed by all downstream phases. He **`/workflows-architecture`** reads: - Problem Narrative, User Story, Success Criteria, and Architectural Context -- the WHY/WHERE contract it must preserve -- Implementation phases and tasks -- identifies the deepening candidates that need structural clarification +- Execution shape plus execution packets -- identifies the deepening candidates and boundaries that need structural clarification - Constitution Alignment / waivers / brainstorm decisions -- keeps architecture decisions inside approved project guardrails - **Must write**: a dedicated artifact in `docs/architecture/` plus an `architecture_ref` back into the plan **`/deepen-plan`** reads: -- Implementation phases and tasks -- enriches each with parallel research (best practices, performance, UI patterns) +- Execution shape plus execution packets -- enriches each with parallel research and splits, merges, or reshapes packets when the current mode is weak - Success criteria -- validates they are testable and complete - Architectural Context -- uses it to ground research in the right part of the system - `tdd` frontmatter and `## TDD & Evidence Contract` -- preserves the effective Ralph/default loop, evidence requirements, and any justified exceptions @@ -1094,12 +1315,12 @@ The plan document is a structured contract consumed by all downstream phases. He - **Must preserve**: Problem Narrative, User Story, and handoff contract unchanged **`/workflows-work`** reads: -- **Problem Narrative & User Story** -- the orchestrator uses these to validate task outcomes make sense in context, not just pass tests +- **Problem Narrative & User Story** -- the orchestrator uses these to validate slice outcomes make sense in context, not just pass tests - **Architectural Context** -- feeds directly into `{{ARCHITECTURAL_CONTEXT}}` in each execution agent's prompt. This is WHY grounded arch context matters -- every subagent gets system-level awareness - **`architecture_ref` / `docs/architecture/` artifact / explicit architecture handoff contract** -- feeds deletion-test decisions, interfaces as test surfaces, seams, adapters, and contracts into execution so subagents do not invent structure ad hoc - **`tdd` frontmatter + `## TDD & Evidence Contract`** -- plan-level values win; `inherit` falls back to `compound-engineering.local.md`; if neither exists, execution should assume Ralph-driven unit + e2e evidence -- **Implementation phases & tasks** -- the execution chunk structure (Files, Depends on, Success criteria, Test command) -- **Success Criteria** -- the orchestrator checks final outcomes against these, not just individual task passes +- **`execution_shape` + execution packets** -- tells the orchestrator whether to execute slices, infrastructure packets, or fix-batch items, and which fields each unit must respect +- **Success Criteria** -- the orchestrator checks final outcomes against these, not just individual unit passes - **`constitution_version` / `constitution_waivers` / Constitution Alignment** -- the execution phase enforces repo-wide guardrails and knows which exceptions were approved - **`brainstorm_ref`** -- if present, the orchestrator can read the original brainstorm for additional context @@ -1109,6 +1330,7 @@ The plan document is a structured contract consumed by all downstream phases. He - **Architectural Context** -- used to evaluate whether the implementation respects system boundaries and integration points - **`architecture_ref` / `docs/architecture/` artifact / explicit architecture handoff contract** -- supplies the architecture intent, deletion-test outcomes, interfaces, seams, adapters, and contracts that reviewers must verify or flag as drift - **`tdd` frontmatter + `## TDD & Evidence Contract`** -- review must verify the declared evidence exists and that any deviation from Ralph/unit+e2e is explicitly justified +- **`execution_shape` + execution packets** -- review uses the chosen mode to judge whether the work was decomposed honestly and executed completely - **Constitution Alignment and waivers** -- used to distinguish approved exceptions from blocking constitution violations - **Stakeholder Impact** (A LOT level) -- informs stakeholder-perspective review diff --git a/.github/skills/workflows-plan/references/execution-agent-prompt.md b/.github/skills/workflows-plan/references/execution-agent-prompt.md index 78cd269..9047914 100644 --- a/.github/skills/workflows-plan/references/execution-agent-prompt.md +++ b/.github/skills/workflows-plan/references/execution-agent-prompt.md @@ -1,5 +1,10 @@ --- -{} +model: claude-sonnet-4.6 +platforms: + copilot: + model: gpt-5.3-codex + opencode: + model: openrouter/moonshotai/kimi-k2.6 --- # Execution Agent Prompt Template @@ -10,24 +15,32 @@ This template is used by the `workflows:work` orchestrator to construct prompts --- -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. -## Your Task +## Your Unit -**Task:** {{TASK_NAME}} +**Unit:** {{UNIT_TITLE}} -{{TASK_DESCRIPTION}} +{{UNIT_DESCRIPTION}} + +**Unit kind:** {{UNIT_KIND}} + +**Outcome scenario:** {{OUTCOME_SCENARIO}} + +**Scope:** {{UNIT_SCOPE}} + +**Scope fence:** {{UNIT_SCOPE_FENCE}} **Files to create/modify:** {{FILE_LIST}} **Success criteria:** {{SUCCESS_CRITERIA}} -**Test command:** `{{TEST_COMMAND}}` +**Validation command:** `{{VALIDATION_COMMAND}}` **Dependencies completed:** {{COMPLETED_DEPENDENCIES}} -## Why This Task Exists +## Why This Unit Exists {{WHY_CONTEXT}} @@ -35,7 +48,11 @@ You are an execution agent implementing a specific task from a work plan. Follow {{ARCHITECTURAL_CONTEXT}} -## Learnings from Previous Tasks +## Architecture Handoff + +{{ARCHITECTURE_HANDOFF}} + +## Learnings from Previous Units {{LEARNINGS_BRIEF}} @@ -51,7 +68,7 @@ You are an execution agent implementing a specific task from a work plan. Follow ## Phase 1: Understand Before Building -Before writing ANY code, review the task requirements AND the "Why This Task Exists" section carefully. +Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. **If anything is unclear, ambiguous, or could be interpreted multiple ways:** - List your questions explicitly @@ -60,7 +77,7 @@ Before writing ANY code, review the task requirements AND the "Why This Task Exi **If everything is clear:** - State your interpretation of the requirements in 2-3 sentences -- State how this task serves the overall user story (from the WHY context) +- State how this unit serves the overall user story (from the WHY context) - List any assumptions you are making (even obvious ones) - Proceed to Phase 2 @@ -84,7 +101,7 @@ If tests fail after implementation: 1. Read the error message carefully -- understand what failed and why 2. Analyze whether the failure is in your implementation or in the test 3. Fix the issue -4. Re-run the test command +4. Re-run the validation command 5. Repeat up to 3 total attempts 6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly @@ -98,7 +115,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che - [ ] Did I miss any requirements? **Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Task Exists" section describes? +- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? - [ ] Would a user achieve the stated outcome with this code? - [ ] Did I build anything that doesn't trace back to the success criteria or user story? @@ -117,7 +134,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che **Testing:** - [ ] Do tests verify actual behavior (not just mock behavior)? - [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? +- [ ] Did I run the validation command and confirm it passes? **Evidence:** - [ ] Can I show actual test output (not just "tests pass")? @@ -133,13 +150,13 @@ Return a structured execution report in exactly this format: Use `commands/workflows/references/tdd-evidence-contract.md` as the single source for the `### TDD Evidence` block. `Red` and `Green` prove behavior coverage. `Post-Refactor Green` proves cleanup safety after refactor. Each `Evidence` line should quote the decisive failing or passing signal in one sentence, not a narrative. ```markdown -## Execution Report: [Task Name] +## Execution Report: [Unit Title] ### Interpretation [Your 2-3 sentence interpretation of what was asked] ### Purpose Served -[Which user story aspect / success criterion this task delivers, from the WHY context] +[Which user story aspect / success criterion this unit delivers, from the WHY context] ### Assumptions Made - [List each assumption, even if obvious] @@ -154,7 +171,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [Insert the exact Ralph evidence block from `commands/workflows/references/tdd-evidence-contract.md`. Preserve the `Red`, `Green`, and `Post-Refactor Green` headings with their command/result/evidence fields.] ### Test Results -- Command: `[test command]` +- Command: `[validation command]` - Result: PASS/FAIL - Attempts: [n] - Output: @@ -171,7 +188,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [If no problems: "None"] ### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] +- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future units] [If none: "None"] @@ -188,9 +205,9 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc _This section is included only when the resolved TDD contract explicitly allows standard implementation._ 1. Read referenced files and understand existing patterns -2. Implement the task following project conventions +2. Implement the unit following project conventions 3. Write tests matching the success criteria -4. Run the test command: `{{TEST_COMMAND}}` +4. Run the validation command: `{{VALIDATION_COMMAND}}` 5. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) --- diff --git a/.github/skills/workflows-plan/references/execution-shape.md b/.github/skills/workflows-plan/references/execution-shape.md new file mode 100644 index 0000000..4ad4d31 --- /dev/null +++ b/.github/skills/workflows-plan/references/execution-shape.md @@ -0,0 +1,93 @@ +# Execution Shape Contract + +Use this reference when planning, deepening, or executing work so the workflow can preserve judgment without re-explaining the same rules in every prompt. + +## Default + +Choose `vertical-slices` unless that would create fake end-to-end work just to satisfy the template. + +## Allowed modes + +### `vertical-slices` (default) + +Use when a thin, testable, end-to-end behavior exists. + +Required packet: +- `Slice type` +- `Serves` +- `Demo scenario` +- `Scope` +- `Scope fence` +- `Files` +- `Depends on` +- `Dependency type` +- `Success criteria` +- `Test command` + +### `infra-track` + +Use for enabling or foundational work where no honest user-visible tracer bullet exists yet. + +Required packet: +- `Capability enabled` +- `Consumers / downstream work unlocked` +- `Scope` +- `Files` +- `Depends on` +- `Risk / rollback` +- `Validation command` +- `Success criteria` + +### `fix-batch` + +Use for a series of small mostly independent fixes where forcing one vertical slice would blur the real work. + +Required packet: +- `Problem` +- `Repro / expected outcome` +- `Files` +- `Depends on` +- `Validation command` +- `Success criteria` + +## Selection rules + +1. Default to `vertical-slices`. +2. Switch to `infra-track` only when the honest near-term value is enabling capability, not a user-visible behavior. +3. Switch to `fix-batch` only when the work is truly a batch of small fixes, not a feature being split too late. +4. Never force `vertical-slices` if that would create fake verticality. +5. If the mode is not the default, record why in `execution_shape.rationale` and in `## Execution Shape`. + +## Plan shape + +Add this frontmatter block to every plan: + +```yaml +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" # required when mode is not vertical-slices +``` + +Add this body section: + +```markdown +## Execution Shape +- **Mode:** [vertical-slices | infra-track | fix-batch] +- **Why:** [1-2 sentences] +``` + +Then use the packet section that matches the chosen mode: +- `## Execution Slices` +- `## Infrastructure Work Packets` +- `## Fix Batch Items` + +## Deepening rules + +- Validate the plan against the chosen mode, not against `vertical-slices` unconditionally. +- You may recommend switching modes if the selected one is clearly wrong, but record that as a `WHY Reassessment` note instead of silently rewriting intent. + +## Execution rules + +- `/workflows:work` must execute the units defined by the chosen mode. +- Do not coerce `infra-track` or `fix-batch` plans into slices unless the user explicitly approves a mode change. +- Session tracking may stay generic (`unit`, `work status`) even when the selected mode is `vertical-slices`. diff --git a/.github/skills/workflows-plan/references/spec-review-prompt.md b/.github/skills/workflows-plan/references/spec-review-prompt.md index d6bcaa5..34bcbc1 100644 --- a/.github/skills/workflows-plan/references/spec-review-prompt.md +++ b/.github/skills/workflows-plan/references/spec-review-prompt.md @@ -14,15 +14,15 @@ You are a spec compliance reviewer. Your job is to verify whether an implementat ## What Was Requested -{{TASK_REQUIREMENTS}} +{{UNIT_REQUIREMENTS}} ## Success Criteria {{SUCCESS_CRITERIA}} -## Task Purpose +## Unit Purpose -{{TASK_SERVES}} +{{UNIT_PURPOSE}} ## What Implementer Claims They Built diff --git a/.github/skills/workflows-review/SKILL.md b/.github/skills/workflows-review/SKILL.md index 842959b..d2c778e 100644 --- a/.github/skills/workflows-review/SKILL.md +++ b/.github/skills/workflows-review/SKILL.md @@ -143,7 +143,7 @@ This context is passed to EVERY review agent below. It is not optional. #### TDD Evidence Gate (BEFORE reviewer dispatch) -If a `docs/execution-sessions/work-*/state.md` file exists for this branch, read the completed task session files before dispatching review agents and build a terse evidence ledger. +If a `docs/execution-sessions/work-*/state.md` file exists for this branch, read the completed execution unit session files before dispatching review agents and build a terse evidence ledger. Apply `references/tdd-evidence-contract.md` as the source of truth for the Ralph evidence block and review-gate classifications. Verify the plan's approved exception contract instead of improvising replacement evidence rules. @@ -521,8 +521,8 @@ After creating all todo files, present comprehensive summary: ### TDD Evidence Gate -- **Behavior coverage:** PASS / FAIL — [task/session refs with weak or missing `Red`/`Green` evidence] -- **Cleanup after refactor:** PASS / FAIL — [task/session refs with weak or missing `Post-Refactor Green` evidence] +- **Behavior coverage:** PASS / FAIL — [unit/session refs with weak or missing `Red`/`Green` evidence] +- **Cleanup after refactor:** PASS / FAIL — [unit/session refs with weak or missing `Post-Refactor Green` evidence] [If PARTIALLY or NO:] **Gaps:** diff --git a/.github/skills/workflows-review/references/execution-agent-prompt.md b/.github/skills/workflows-review/references/execution-agent-prompt.md index 78cd269..9047914 100644 --- a/.github/skills/workflows-review/references/execution-agent-prompt.md +++ b/.github/skills/workflows-review/references/execution-agent-prompt.md @@ -1,5 +1,10 @@ --- -{} +model: claude-sonnet-4.6 +platforms: + copilot: + model: gpt-5.3-codex + opencode: + model: openrouter/moonshotai/kimi-k2.6 --- # Execution Agent Prompt Template @@ -10,24 +15,32 @@ This template is used by the `workflows:work` orchestrator to construct prompts --- -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. -## Your Task +## Your Unit -**Task:** {{TASK_NAME}} +**Unit:** {{UNIT_TITLE}} -{{TASK_DESCRIPTION}} +{{UNIT_DESCRIPTION}} + +**Unit kind:** {{UNIT_KIND}} + +**Outcome scenario:** {{OUTCOME_SCENARIO}} + +**Scope:** {{UNIT_SCOPE}} + +**Scope fence:** {{UNIT_SCOPE_FENCE}} **Files to create/modify:** {{FILE_LIST}} **Success criteria:** {{SUCCESS_CRITERIA}} -**Test command:** `{{TEST_COMMAND}}` +**Validation command:** `{{VALIDATION_COMMAND}}` **Dependencies completed:** {{COMPLETED_DEPENDENCIES}} -## Why This Task Exists +## Why This Unit Exists {{WHY_CONTEXT}} @@ -35,7 +48,11 @@ You are an execution agent implementing a specific task from a work plan. Follow {{ARCHITECTURAL_CONTEXT}} -## Learnings from Previous Tasks +## Architecture Handoff + +{{ARCHITECTURE_HANDOFF}} + +## Learnings from Previous Units {{LEARNINGS_BRIEF}} @@ -51,7 +68,7 @@ You are an execution agent implementing a specific task from a work plan. Follow ## Phase 1: Understand Before Building -Before writing ANY code, review the task requirements AND the "Why This Task Exists" section carefully. +Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. **If anything is unclear, ambiguous, or could be interpreted multiple ways:** - List your questions explicitly @@ -60,7 +77,7 @@ Before writing ANY code, review the task requirements AND the "Why This Task Exi **If everything is clear:** - State your interpretation of the requirements in 2-3 sentences -- State how this task serves the overall user story (from the WHY context) +- State how this unit serves the overall user story (from the WHY context) - List any assumptions you are making (even obvious ones) - Proceed to Phase 2 @@ -84,7 +101,7 @@ If tests fail after implementation: 1. Read the error message carefully -- understand what failed and why 2. Analyze whether the failure is in your implementation or in the test 3. Fix the issue -4. Re-run the test command +4. Re-run the validation command 5. Repeat up to 3 total attempts 6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly @@ -98,7 +115,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che - [ ] Did I miss any requirements? **Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Task Exists" section describes? +- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? - [ ] Would a user achieve the stated outcome with this code? - [ ] Did I build anything that doesn't trace back to the success criteria or user story? @@ -117,7 +134,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che **Testing:** - [ ] Do tests verify actual behavior (not just mock behavior)? - [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? +- [ ] Did I run the validation command and confirm it passes? **Evidence:** - [ ] Can I show actual test output (not just "tests pass")? @@ -133,13 +150,13 @@ Return a structured execution report in exactly this format: Use `commands/workflows/references/tdd-evidence-contract.md` as the single source for the `### TDD Evidence` block. `Red` and `Green` prove behavior coverage. `Post-Refactor Green` proves cleanup safety after refactor. Each `Evidence` line should quote the decisive failing or passing signal in one sentence, not a narrative. ```markdown -## Execution Report: [Task Name] +## Execution Report: [Unit Title] ### Interpretation [Your 2-3 sentence interpretation of what was asked] ### Purpose Served -[Which user story aspect / success criterion this task delivers, from the WHY context] +[Which user story aspect / success criterion this unit delivers, from the WHY context] ### Assumptions Made - [List each assumption, even if obvious] @@ -154,7 +171,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [Insert the exact Ralph evidence block from `commands/workflows/references/tdd-evidence-contract.md`. Preserve the `Red`, `Green`, and `Post-Refactor Green` headings with their command/result/evidence fields.] ### Test Results -- Command: `[test command]` +- Command: `[validation command]` - Result: PASS/FAIL - Attempts: [n] - Output: @@ -171,7 +188,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [If no problems: "None"] ### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] +- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future units] [If none: "None"] @@ -188,9 +205,9 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc _This section is included only when the resolved TDD contract explicitly allows standard implementation._ 1. Read referenced files and understand existing patterns -2. Implement the task following project conventions +2. Implement the unit following project conventions 3. Write tests matching the success criteria -4. Run the test command: `{{TEST_COMMAND}}` +4. Run the validation command: `{{VALIDATION_COMMAND}}` 5. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) --- diff --git a/.github/skills/workflows-review/references/execution-shape.md b/.github/skills/workflows-review/references/execution-shape.md new file mode 100644 index 0000000..4ad4d31 --- /dev/null +++ b/.github/skills/workflows-review/references/execution-shape.md @@ -0,0 +1,93 @@ +# Execution Shape Contract + +Use this reference when planning, deepening, or executing work so the workflow can preserve judgment without re-explaining the same rules in every prompt. + +## Default + +Choose `vertical-slices` unless that would create fake end-to-end work just to satisfy the template. + +## Allowed modes + +### `vertical-slices` (default) + +Use when a thin, testable, end-to-end behavior exists. + +Required packet: +- `Slice type` +- `Serves` +- `Demo scenario` +- `Scope` +- `Scope fence` +- `Files` +- `Depends on` +- `Dependency type` +- `Success criteria` +- `Test command` + +### `infra-track` + +Use for enabling or foundational work where no honest user-visible tracer bullet exists yet. + +Required packet: +- `Capability enabled` +- `Consumers / downstream work unlocked` +- `Scope` +- `Files` +- `Depends on` +- `Risk / rollback` +- `Validation command` +- `Success criteria` + +### `fix-batch` + +Use for a series of small mostly independent fixes where forcing one vertical slice would blur the real work. + +Required packet: +- `Problem` +- `Repro / expected outcome` +- `Files` +- `Depends on` +- `Validation command` +- `Success criteria` + +## Selection rules + +1. Default to `vertical-slices`. +2. Switch to `infra-track` only when the honest near-term value is enabling capability, not a user-visible behavior. +3. Switch to `fix-batch` only when the work is truly a batch of small fixes, not a feature being split too late. +4. Never force `vertical-slices` if that would create fake verticality. +5. If the mode is not the default, record why in `execution_shape.rationale` and in `## Execution Shape`. + +## Plan shape + +Add this frontmatter block to every plan: + +```yaml +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" # required when mode is not vertical-slices +``` + +Add this body section: + +```markdown +## Execution Shape +- **Mode:** [vertical-slices | infra-track | fix-batch] +- **Why:** [1-2 sentences] +``` + +Then use the packet section that matches the chosen mode: +- `## Execution Slices` +- `## Infrastructure Work Packets` +- `## Fix Batch Items` + +## Deepening rules + +- Validate the plan against the chosen mode, not against `vertical-slices` unconditionally. +- You may recommend switching modes if the selected one is clearly wrong, but record that as a `WHY Reassessment` note instead of silently rewriting intent. + +## Execution rules + +- `/workflows:work` must execute the units defined by the chosen mode. +- Do not coerce `infra-track` or `fix-batch` plans into slices unless the user explicitly approves a mode change. +- Session tracking may stay generic (`unit`, `work status`) even when the selected mode is `vertical-slices`. diff --git a/.github/skills/workflows-review/references/spec-review-prompt.md b/.github/skills/workflows-review/references/spec-review-prompt.md index d6bcaa5..34bcbc1 100644 --- a/.github/skills/workflows-review/references/spec-review-prompt.md +++ b/.github/skills/workflows-review/references/spec-review-prompt.md @@ -14,15 +14,15 @@ You are a spec compliance reviewer. Your job is to verify whether an implementat ## What Was Requested -{{TASK_REQUIREMENTS}} +{{UNIT_REQUIREMENTS}} ## Success Criteria {{SUCCESS_CRITERIA}} -## Task Purpose +## Unit Purpose -{{TASK_SERVES}} +{{UNIT_PURPOSE}} ## What Implementer Claims They Built diff --git a/.github/skills/workflows-work/SKILL.md b/.github/skills/workflows-work/SKILL.md index e44f304..27158ad 100644 --- a/.github/skills/workflows-work/SKILL.md +++ b/.github/skills/workflows-work/SKILL.md @@ -14,17 +14,17 @@ Execute a work plan while maintaining WHY tracing from problem narrative through ## Introduction -This command takes a work document (plan, specification, or todo file) and executes it systematically using a **subagent orchestration model**. The orchestrator (this conversation) decomposes the plan into scoped chunks and delegates each to a focused subagent. Each subagent follows a standardized 4-phase protocol (understand, implement, self-review, report) defined in the execution agent prompt template. +This command takes a work document (plan, specification, or todo file) and executes it systematically using a **subagent orchestration model**. The orchestrator (this conversation) loads or adapts the plan into execution units and delegates each unit to a focused subagent. `vertical-slices` is the default execution shape, but `infra-track` and `fix-batch` are also valid when declared by the plan. Each subagent follows a standardized 4-phase protocol (understand, implement, self-review, report) defined in the execution agent prompt template. -**WHY-grounded execution:** Every subagent receives the plan's WHY context -- the problem narrative, user story, architectural context, the architecture handoff contract, and which success criterion their specific task serves. This prevents implementation drift where technically correct code fails to deliver the user's actual need. The orchestrator is the guardian of WHY: it extracts purpose from the plan, threads it through every task prompt, and validates that the combined output delivers the stated user story. +**WHY-grounded execution:** Every subagent receives the plan's WHY context -- the problem narrative, user story, architectural context, the architecture handoff contract, and which success criterion their specific unit serves. This prevents implementation drift where technically correct code fails to deliver the user's actual need. The orchestrator is the guardian of WHY: it extracts purpose from the plan, threads it through every unit prompt, and validates that the combined output delivers the stated user story. ### Review Mode This command supports a `--review-mode` argument that controls when code review happens: -- **`bulk`** (default) -- Review happens after ALL tasks complete, using `/workflows-review`. This is the standard behavior and is fastest for most work. -- **`inline`** -- After each task, a lightweight two-stage review (spec compliance then code quality) runs automatically. Catches spec drift early but adds 2-4 extra subagent calls per task. -- **`both`** -- Inline review per task AND comprehensive `/workflows-review` at the end. Maximum quality assurance. +- **`bulk`** (default) -- Review happens after ALL units complete, using `/workflows-review`. This is the standard behavior and is fastest for most work. +- **`inline`** -- After each unit, a lightweight two-stage review (spec compliance then code quality) runs automatically. Catches spec drift early but adds 2-4 extra subagent calls per unit. +- **`both`** -- Inline review per unit AND comprehensive `/workflows-review` at the end. Maximum quality assurance. If no `--review-mode` is specified, check `compound-engineering.local.md` for a `review_mode` setting. If not found there either, default to `bulk`. @@ -38,15 +38,16 @@ If no `--review-mode` is specified, check `compound-engineering.local.md` for a 1. **Read Plan and Extract WHY + Guardrail Context** - - Read the work document completely - - **Extract WHY artifacts** from the plan (these ground everything that follows): - - **Problem Narrative** -- why this work exists, what pain it solves - - **User Story** -- who benefits and what outcome they get - - **Architectural Context** -- how the solution fits in the system - - **Success Criteria** -- measurable conditions that define "done" - - **Phase-to-story tracing** -- each phase's "Serves:" line showing what user story aspect it delivers - - **Constitution alignment** -- relevant principles, required approvals, and any approved waivers - - **`tdd` frontmatter + `## TDD & Evidence Contract`** -- resolve the effective TDD contract using `references/tdd-evidence-contract.md` (plan overrides local, `inherit` falls back, and no-local-config defaults to Ralph-driven `red-green-refactor` with unit + e2e evidence required) + - Read the work document completely + - **Extract WHY artifacts** from the plan (these ground everything that follows): + - **Problem Narrative** -- why this work exists, what pain it solves + - **User Story** -- who benefits and what outcome they get + - **Architectural Context** -- how the solution fits in the system + - **Success Criteria** -- measurable conditions that define "done" + - **Execution shape** -- resolve it using `references/execution-shape.md` + - **Unit tracing** -- each packet's `Serves`, `Consumers`, or equivalent purpose line showing what outcome it delivers or unlocks + - **Constitution alignment** -- relevant principles, required approvals, and any approved waivers + - **`tdd` frontmatter + `## TDD & Evidence Contract`** -- resolve the effective TDD contract using `references/tdd-evidence-contract.md` (plan overrides local, `inherit` falls back, and no-local-config defaults to Ralph-driven `red-green-refactor` with unit + e2e evidence required) - Check for `handoff:` frontmatter in the plan. If present, verify all flags are `true` (problem_narrative, user_story, architectural_context, success_criteria). If any are `false`, warn the user that WHY context is incomplete and suggest running `/workflows-brainstorm` or `/workflows-plan` first. - If the resolved contract weakens Ralph/unit+e2e without a justified exception in the plan, stop and ask for the plan contract to be corrected before execution - If `docs/constitution.md` exists, read it and extract the active constitution version, applicable principles, execution baselines, and approval rules. If the plan lists `constitution_waivers`, honor only those explicit exceptions. @@ -55,7 +56,8 @@ If no `--review-mode` is specified, check `compound-engineering.local.md` for a - If no architecture artifact is recorded, assemble an explicit architecture handoff contract from the plan's Architectural Context, Key Decisions, Constitution Alignment, brainstorm context, and execution constraints. Tell the user this is a fallback and recommend `/workflows-architecture` if boundaries are still unsettled. - Review any other references or links provided in the plan - If the constitution requires explicit approval for any part of the planned work (for example, risky writes, schema changes, auth changes, or scope expansions), surface that before execution starts - - If anything is unclear or ambiguous, ask clarifying questions now + - If the document is not already in a declared execution shape, treat it as **legacy input**. Before spawning subagents, adapt it into execution units in STATE.md. If that adaptation materially changes scope or ordering, ask the user to approve the adapted unit backlog before proceeding. + - If anything is unclear or ambiguous, ask clarifying questions now - Get user approval to proceed - **Do not skip this** - better to ask questions now than build the wrong thing @@ -103,27 +105,27 @@ If no `--review-mode` is specified, check `compound-engineering.local.md` for a - You want to keep the default branch clean while experimenting - You plan to switch between branches frequently -3. **Preview Task Breakdown** - - Mentally identify the major tasks from the plan +3. **Preview Unit Breakdown** + - Mentally identify the major execution units from the plan - Note any questions about dependencies or scope - - The formal task decomposition happens in Phase 2 Step 4 (STATE.md), which is the persistent record of progress + - The formal unit decomposition happens in Phase 2 Step 4 (STATE.md), which is the persistent record of progress - TodoWrite can be used for in-conversation progress tracking if helpful, but STATE.md is the source of truth ### Phase 2: Orchestrated Execution -Phase 2 is where the orchestrator (this conversation) decomposes the plan into scoped chunks and delegates each to a focused subagent. The orchestrator does NOT implement code itself -- it decomposes, delegates, records, and routes. +Phase 2 is where the orchestrator (this conversation) resolves the plan's execution shape, decomposes the work into execution units, and delegates each to a focused subagent. The orchestrator does NOT implement code itself -- it decomposes, delegates, records, and routes. #### Step 1: Validate Plan Readiness -Before executing, validate four things: **structural readiness** (tasks are granular and testable), **WHY readiness** (the plan carries purpose context), **TDD readiness** (the execution contract is explicit and enforceable), and **guardrail readiness** (repo-wide rules are visible and actionable). +Before executing, validate four things: **structural readiness** (the selected execution shape is honest and its units are testable), **WHY readiness** (the plan carries purpose context), **TDD readiness** (the execution contract is explicit and enforceable), and **guardrail readiness** (repo-wide rules are visible and actionable). -**Structural readiness** -- each implementation task should have: +**Structural readiness** -- first resolve `execution_shape` using `references/execution-shape.md`, then verify the units for that mode: -- **Task description** -- what needs to be done -- **Files to create/modify** -- specific file paths -- **Success criteria** -- checkboxes that define "done" -- **Test command** -- how to verify the task works -- **Dependencies** -- which other tasks must complete first +- **`vertical-slices`** -- slice type, serves, demo scenario, scope fence, files, success criteria, validation command, dependencies, dependency type +- **`infra-track`** -- capability enabled, consumers / downstream work unlocked, scope, files, risk / rollback, success criteria, validation command, dependencies +- **`fix-batch`** -- problem, repro / expected outcome, files, success criteria, validation command, dependencies +- **Default rule** -- if `execution_shape` is missing, assume `vertical-slices` +- **Anti-coercion rule** -- do not force infra or fix-batch work into slices if that would create fake verticality **Guardrail readiness** -- when the project has `docs/constitution.md`, the plan should make repo-wide rules visible: @@ -138,25 +140,25 @@ Before executing, validate four things: **structural readiness** (tasks are gran - **`## TDD & Evidence Contract` present** -- states the resolved execution path in plain language - **Effective mode resolved** -- Ralph-driven by default unless the plan explicitly approves a standard-mode exception - **Required evidence resolved** -- unit + e2e by default, or justified replacement evidence when explicitly waived -- **Report contract visible** -- Ralph-driven tasks must emit stable red, green, and post-refactor green evidence blocks +- **Report contract visible** -- Ralph-driven units must emit stable red, green, and post-refactor green evidence blocks **WHY readiness** -- the plan should have: - **Problem Narrative** -- present and non-empty - **User Story** -- present with clear "As a... I want... So that..." - **Architectural Context** -- present, describing system fit -- **Success Criteria** -- present at plan level (not just task level) -- **Phase tracing** -- each phase has a "Serves:" line connecting it to the user story +- **Success Criteria** -- present at plan level (not just unit level) +- **Unit tracing** -- each execution unit has a purpose line connecting it to the user story or explicit enabling outcome -If the plan lacks structural details, or if no architecture artifact / handoff contract can explain the boundaries, refuse to proceed and suggest running `/workflows-architecture` first if the boundaries are still fuzzy, then `/deepen-plan`, or manually breaking down the plan. +If the plan lacks structural details, or if no architecture artifact / handoff contract can explain the boundaries, refuse to proceed and suggest running `/workflows-architecture` first if the boundaries are still fuzzy, then `/deepen-plan`, or manually breaking down the plan into execution units. If the plan lacks the `tdd` block or `## TDD & Evidence Contract`, or if the resolved contract is ambiguous, refuse to proceed and suggest `/workflows-plan` or `/deepen-plan` to repair the execution contract before spawning subagents. If the plan lacks WHY artifacts, the orchestrator should **construct minimal WHY context** before proceeding: 1. Ask the user: "This plan doesn't include a problem narrative or user story. In one sentence, what problem are we solving and for whom?" -2. Infer success criteria from the task-level criteria +2. Infer success criteria from the unit-level criteria 3. Infer architectural context from the file paths and technologies mentioned -4. Record these in STATE.md (see Step 3) so they're available for all tasks +4. Record these in STATE.md (see Step 3) so they're available for all units #### Step 2: Check for Resumable Session @@ -169,7 +171,7 @@ ls docs/execution-sessions/work-*/state.md 2>/dev/null If a previous session exists for the same plan file and has `status: in_progress`: - Ask the user: "Found incomplete session `[session_id]` for this plan. Resume where you left off, or start fresh?" -- **If resume**: Read STATE.md, load the WHY Context section plus the Architecture Handoff section, skip completed tasks, load the learnings brief, and continue from `current_task` +- **If resume**: Read STATE.md, load the WHY Context section plus the Architecture Handoff section, skip completed units, load the learnings brief, and continue from `current_unit` - **If fresh**: Archive the old session directory (rename with `-archived` suffix), then start a new session If no resumable session exists, proceed to Step 3. @@ -191,8 +193,9 @@ plan_file: [path to plan] brainstorm_ref: [path to brainstorm, if available] started: [ISO timestamp] status: in_progress -current_task: 0 -total_tasks: [count] +execution_shape: [vertical-slices | infra-track | fix-batch] +current_unit: 0 +total_units: [count] session_id: [SESSION_ID] --- @@ -227,57 +230,68 @@ session_id: [SESSION_ID] - Seams / adapters / contracts: [boundaries this execution must honor] - Review guidance: [what `/workflows-review` must verify later] -## Task Status -| # | Task | Serves | Status | Attempts | Session File | -|---|------|--------|--------|----------|--------------| -| 1 | [task name] | [which user story aspect] | pending | -- | -- | -| 2 | [task name] | [which user story aspect] | pending | -- | -- | +## Work Status +| # | Unit | Kind | Serves / Unlocks | Status | Attempts | Session File | +|---|------|------|------------------|--------|----------|--------------| +| 1 | [unit title] | tracer-bullet | [which user story aspect or enabling outcome] | pending | -- | -- | +| 2 | [unit title] | expansion | [which user story aspect or enabling outcome] | pending | -- | -- | ... ## Learnings Brief _No learnings yet._ ``` -#### Step 4: Decompose Plan into Execution Chunks +#### Step 4: Load or Adapt Execution Units -The orchestrator parses the plan and creates a list of execution chunks. Each chunk is a self-contained unit of work. The orchestrator does the heavy lifting here: +The orchestrator parses the plan and creates a list of execution units. Each unit is a self-contained packet of work defined by the selected execution shape. The orchestrator does the heavy lifting here: -- **Break large phases** into smaller tasks if needed (each task should be completable in one subagent session) -- **Preserve WHY tracing** -- when splitting a phase, each resulting task inherits the parent phase's "Serves:" line. Never create an orphan task with no connection to the user story. -- **Identify file dependencies** between tasks (Task B modifies a file created by Task A) -- **Determine parallelizable tasks** -- tasks with non-overlapping file sets can run simultaneously -- **Ensure each chunk has clear success criteria** -- if the plan already defines them, use them directly; otherwise, the orchestrator must create them -- **Map each task to its purpose** -- record which success criterion or user story aspect each task delivers (this goes in STATE.md's "Serves" column) +- **Prefer plan-defined units directly** -- if the plan already declares a coherent execution shape, execute those packets as written +- **Adapt legacy phase/task plans into units before coding** -- do not execute raw task lists directly once the shape contract is available +- **Break oversized units** into smaller units if needed (each unit should be completable in one subagent session) +- **Preserve WHY tracing** -- when splitting a unit, each resulting unit inherits or refines the parent unit's purpose line. Never create an orphan unit with no connection to the user story. +- **Identify file dependencies** between units +- **Determine parallelizable units** -- only units with non-overlapping file sets and compatible dependencies can run simultaneously +- **Ensure each unit has clear success criteria** -- if the plan already defines them, use them directly; otherwise, the orchestrator must create them +- **Map each unit to its purpose** -- record which success criterion or enabling outcome each unit delivers (this goes in STATE.md's "Serves / Unlocks" column) -If the plan already has well-defined tasks with success criteria, use them directly. If not, the orchestrator must create them before proceeding. +Mode-specific rules: +- **`vertical-slices`** -- execute slices directly; keep the first unit a tracer bullet +- **`infra-track`** -- execute infrastructure work packets directly; do not coerce them into fake slices +- **`fix-batch`** -- execute fix items directly; keep each one narrow and independently verifiable -#### Step 5: Execute Task Loop +If the plan already has well-defined units with success criteria, use them directly. If not, the orchestrator must create them before proceeding. -For each task (or parallel batch of tasks), follow this cycle: +#### Step 5: Execute Unit Loop + +For each unit (or parallel batch of units), follow this cycle: ##### a. Build Scoped Prompt -For each task, the orchestrator constructs a focused prompt by loading the **execution agent prompt template** from `references/execution-agent-prompt.md` and filling in the context blocks. +For each unit, the orchestrator constructs a focused prompt by loading the **execution agent prompt template** from `references/execution-agent-prompt.md` and filling in the context blocks. Before building `scoped_prompt`, apply the shared `Reference Template Loading` protocol in `references/orchestration-protocol.md` to `execution-agent-prompt.md`. Fill the placeholders from the loaded template and do not reconstruct the prompt from memory. -- **{{TASK_NAME}}** and **{{TASK_DESCRIPTION}}** -- from the plan +- **{{UNIT_TITLE}}** and **{{UNIT_DESCRIPTION}}** -- from the plan +- **{{UNIT_KIND}}** -- from the plan (`tracer-bullet`, `infra-packet`, `fix-item`, etc.) +- **{{OUTCOME_SCENARIO}}** -- the observable behavior or enabling outcome this unit proves +- **{{UNIT_SCOPE}}** -- what the unit owns and excludes +- **{{UNIT_SCOPE_FENCE}}** -- the boundary that keeps the unit thin - **{{FILE_LIST}}** -- files to create/modify from the plan - **{{SUCCESS_CRITERIA}}** -- checkboxes that define "done" -- **{{TEST_COMMAND}}** -- how to verify the task works -- **{{COMPLETED_DEPENDENCIES}}** -- list of already-completed tasks this depends on +- **{{VALIDATION_COMMAND}}** -- how to verify the unit works +- **{{COMPLETED_DEPENDENCIES}}** -- list of already-completed units this depends on - **{{WHY_CONTEXT}}** -- the purpose grounding block (constructed by orchestrator): ``` - ## Why This Task Exists + ## Why This Unit Exists **Problem:** [problem narrative from plan -- 1-2 sentences] **User Story:** [user story from plan] - **This task serves:** [the "Serves:" line from this task's parent phase -- which user story aspect or success criterion this delivers] + **This unit serves:** [the packet purpose line from this unit -- which user story aspect, success criterion, or enabling outcome this delivers] **Overall success criteria:** [plan-level success criteria list] **Guardrails:** [relevant constitution principles, approval rules, and approved waivers] ``` -- **{{ARCHITECTURAL_CONTEXT}}** -- from the plan's Architectural Context section, filtered to what's relevant for this task's files and domain -- **{{ARCHITECTURE_HANDOFF}}** -- from the `docs/architecture/` artifact or explicit plan-derived handoff contract; include deletion-test decisions, interfaces as test surfaces, seams, adapters, contracts, and downstream review guidance relevant to this task -- **{{LEARNINGS_BRIEF}}** -- from previous tasks, filtered by domain relevance (only include backend learnings for backend tasks, frontend learnings for frontend tasks, etc.) +- **{{ARCHITECTURAL_CONTEXT}}** -- from the plan's Architectural Context section, filtered to what's relevant for this unit's files and domain +- **{{ARCHITECTURE_HANDOFF}}** -- from the `docs/architecture/` artifact or explicit plan-derived handoff contract; include deletion-test decisions, interfaces as test surfaces, seams, adapters, contracts, and downstream review guidance relevant to this unit +- **{{LEARNINGS_BRIEF}}** -- from previous units, filtered by domain relevance - **{{PROJECT_CONVENTIONS}}** -- from CLAUDE.md plus relevant constitution baselines - **{{TDD_CONTRACT}}** -- the resolved execution contract: effective mode, Ralph/default loop, required unit/e2e evidence, and any explicit exceptions - **{{TDD_SECTION}}** -- if the resolved effective mode is Ralph-driven, include the Ralph/TDD Implementation Section from the template; otherwise include the Standard Implementation Section. Do not treat Ralph as an adjacent side command when it is the resolved default. @@ -290,7 +304,7 @@ The execution agent template instructs each subagent to follow a 4-phase protoco ##### b. Spawn Subagent -Delegate the task to a focused subagent: +Delegate the unit to a focused subagent: ``` Task(general-purpose, prompt=scoped_prompt) @@ -312,23 +326,23 @@ The subagent prompt is constructed from the loaded execution agent template (`re - Final test results (pass/fail) - Attempt count -**For parallel tasks**: Spawn multiple subagents simultaneously. Only parallelize tasks with non-overlapping file sets. Before parallelizing, verify file sets do not overlap. +**For parallel units**: Spawn multiple subagents simultaneously only when their file sets do not overlap and their dependency types allow parallel work. Before parallelizing, verify file sets do not overlap and no unit claims shared mutable state without an explicit guard. **Example scoped prompt:** ``` -You are implementing Task 3 of a feature plan. Here is your scoped context: +You are implementing Unit 3 of a feature plan. Here is your scoped context: -## Why This Task Exists +## Why This Unit Exists **Problem:** Users currently cannot authenticate, forcing manual session management that's error-prone and insecure. **User Story:** As a user, I want to log in with my credentials so that I can access my personalized dashboard securely. -**This task serves:** "Secure authentication flow" -- implementing the core token generation that enables the login experience. +**This unit serves:** "Secure authentication flow" -- implementing the first thin end-to-end login path. **Overall success criteria:** - Users can log in and receive a JWT token - Invalid credentials are rejected with clear error messages - Tokens expire after the configured TTL -## Task +## Unit Create the UserAuthService with JWT token generation and validation. ## Files to Create/Modify @@ -342,11 +356,11 @@ Create the UserAuthService with JWT token generation and validation. - [ ] authenticate() throws AuthenticationError for invalid credentials - [ ] Token validation works for valid and expired tokens -## Test Command +## Validation Command npm test -- --filter UserAuthService ## Architectural Context -JWT-based stateless auth. Tokens issued by UserAuthService, validated by middleware (Task 4). No server-side session storage. +JWT-based stateless auth. Tokens issued by UserAuthService, validated by middleware (Unit 4). No server-side session storage. ## TDD Execution Contract - Effective mode: Ralph-driven TDD @@ -359,7 +373,7 @@ JWT-based stateless auth. Tokens issued by UserAuthService, validated by middlew - Variables are camelCase - Type annotations on all parameters and return types -## Learnings from Previous Tasks +## Learnings from Previous Units - [backend] Use jest.mock() for module mocking - [backend] Factory pattern: createUser() helper not new User() - [testing] Use expect().toThrow() for error assertions @@ -377,15 +391,16 @@ JWT-based stateless auth. Tokens issued by UserAuthService, validated by middlew When the subagent returns, the orchestrator processes the results: -**0. Validate the execution contract evidence** -- audit the report against `references/tdd-evidence-contract.md`. If a Ralph-driven task is missing stable `Red`, `Green`, and `Post-Refactor Green` evidence blocks, treat the report as incomplete and send it back for correction before marking the task complete. +**0. Validate the execution contract evidence** -- audit the report against `references/tdd-evidence-contract.md`. If a Ralph-driven unit is missing stable `Red`, `Green`, and `Post-Refactor Green` evidence blocks, treat the report as incomplete and send it back for correction before marking the unit complete. -**1. Write session file** to `docs/execution-sessions/${SESSION_ID}/task-{nn}-{slug}.md`: +**1. Write session file** to `docs/execution-sessions/${SESSION_ID}/unit-{nn}-{slug}.md`: ```markdown --- -task: "[task name]" -task_number: [n] -serves: "[which user story aspect / success criterion this task delivers]" +unit: "[unit title]" +unit_number: [n] +unit_kind: [tracer-bullet|expansion|hardening|infra-packet|fix-item] +serves: "[which user story aspect / success criterion / enabling outcome this unit delivers]" status: [completed|failed] attempt_count: [n] domains: [backend, frontend, testing, database, etc.] @@ -422,22 +437,22 @@ session_id: [SESSION_ID] **2. Inline Review (when `--review-mode inline` or `--review-mode both`)** - If the `--review-mode` argument is `inline` or `both`, perform a two-stage inline review before proceeding to the next task. If `--review-mode` is `bulk` (the default), skip this step. +If the `--review-mode` argument is `inline` or `both`, perform a two-stage inline review before proceeding to the next unit. If `--review-mode` is `bulk` (the default), skip this step. **Stage 1: Spec Compliance Review** Apply the shared `Reference Template Loading` protocol from `references/orchestration-protocol.md`, substituting `spec-review-prompt.md`. If the template cannot be loaded and quoted, stop the inline review loop and report the missing template instead of improvising. Then fill in: - - `{{TASK_REQUIREMENTS}}` -- the task description and success criteria + - `{{UNIT_REQUIREMENTS}}` -- the unit description, outcome scenario, scope fence, and success criteria - `{{SUCCESS_CRITERIA}}` -- the success criteria checkboxes - `{{IMPLEMENTER_REPORT}}` -- the execution report from the subagent - - `{{TASK_SERVES}}` -- what user story aspect this task delivers (from the task's "Serves:" line) + - `{{UNIT_PURPOSE}}` -- what user story aspect or enabling outcome this unit delivers (from the unit's purpose line) Spawn a spec reviewer subagent: ``` Task(general-purpose, prompt=filled_spec_review_prompt) ``` - The spec reviewer should check not just checkbox compliance but whether the implementation actually delivers on the purpose stated in "Serves:". A task can pass all checkboxes but miss the intent. + The spec reviewer should check not just checkbox compliance but whether the implementation actually delivers on the recorded purpose. A unit can pass all checkboxes but miss the intent. - If **PASS**: proceed to Stage 2 - If **FAIL**: spawn a new execution subagent with the specific issues to fix, then re-run the spec reviewer (max 2 fix-review cycles). If still failing after 2 cycles, log the issues and ask the user how to proceed. @@ -455,26 +470,26 @@ session_id: [SESSION_ID] - If **PASS**: proceed to next steps - If **FAIL** with Critical issues: spawn fix subagent, re-review (max 2 cycles) - - If **FAIL** with only Important/Minor issues: log them for the orchestrator's attention but proceed to next task (these will also be caught by `/workflows-review` if run later) + - If **FAIL** with only Important/Minor issues: log them for the orchestrator's attention but proceed to next unit (these will also be caught by `/workflows-review` if run later) - **Note:** Inline review is a lightweight per-task check. It does NOT replace the comprehensive `/workflows-review` multi-agent review. When `--review-mode both` is active, inline review runs per-task AND `/workflows-review` runs after all tasks complete. + **Note:** Inline review is a lightweight per-unit check. It does NOT replace the comprehensive `/workflows-review` multi-agent review. When `--review-mode both` is active, inline review runs per-unit AND `/workflows-review` runs after all units complete. -**3. Update STATE.md** -- mark the task status, increment `current_task`, update the task status table +**3. Update STATE.md** -- mark the unit status, increment `current_unit`, update the work status table -**4. Update learnings brief** -- add new learnings from this task, tagged by domain, deduplicated against existing learnings +**4. Update learnings brief** -- add new learnings from this unit, tagged by domain, deduplicated against existing learnings **5. Update plan file** -- check off completed items (`[ ]` to `[x]`) in the original plan document -**6. Regression guard** -- run test commands from ALL previously completed tasks. If any regress: - - Log the regression in the current task's session file +**6. Regression guard** -- run validation commands from ALL previously completed units. If any regress: + - Log the regression in the current unit's session file - Spawn a fix subagent with context about what broke and why - - Do not proceed to the next task until the regression is fixed + - Do not proceed to the next unit until the regression is fixed **7. Incremental commit** if appropriate (logical unit complete, tests pass): | Commit when... | Don't commit when... | |----------------|---------------------| - | Logical unit complete (model, service, component) | Small part of a larger unit | + | Logical unit complete (one observable outcome) | Small part of a larger unit | | Tests pass + meaningful progress | Tests failing | | About to switch contexts (backend to frontend) | Purely scaffolding with no behavior | | About to attempt risky/uncertain changes | Would need a "WIP" commit message | @@ -495,10 +510,10 @@ session_id: [SESSION_ID] If a subagent fails after its internal retries: -1. **Reframe**: Can the task be broken down differently? Try spawning a new subagent with a different approach or smaller scope. -2. **Ask user**: Use AskUserQuestion -- "Task [name] failed after 3 attempts. [error summary]. How should I proceed?" - - Options: "Retry with different approach", "Skip and continue", "Stop pipeline", "I'll fix it manually" -3. **Skip and continue**: Mark task as `skipped` in STATE.md. Note it as a blocker for any dependent tasks. Dependent tasks are also skipped automatically. +1. **Reframe**: Can the unit be broken down differently? Try spawning a new subagent with a different approach or smaller scope. +2. **Ask user**: Use AskUserQuestion -- "Unit [name] failed after 3 attempts. [error summary]. How should I proceed?" + - Options: "Retry with different approach", "Skip and continue", "Stop pipeline", "I'll fix it manually" +3. **Skip and continue**: Mark the unit as `skipped` in STATE.md. Note it as a blocker for any dependent units. Dependent units are also skipped automatically. 4. **Stop pipeline**: Save all state to STATE.md with `status: paused`, present a summary of what was completed and what remains. ### Phase 3: Quality Check @@ -519,11 +534,11 @@ If a subagent fails after its internal retries: Before mechanical quality checks, validate that the combined work delivers on the WHY: - - **User story delivered?** -- Review the user story from STATE.md. Can a user actually achieve the stated outcome with what was built? If any success criterion is unmet or any task was skipped, note the gap. - - **Architectural integrity?** -- Does the implementation match the architectural context from the plan? Flag any deviations (e.g., plan said "stateless JWT" but implementation uses server sessions). - - **Constitution honored?** -- Does the implementation respect the constitution baselines and approval rules captured in STATE.md? Flag any unwaived violations. - - **Ralph evidence complete?** -- For Ralph-driven tasks, does every session file include Red, Green, and Post-Refactor Green evidence aligned to the resolved unit/e2e contract or an explicitly approved exception? - - **No orphan code** -- Is there any implemented code that doesn't trace back to the user story or success criteria? This may indicate scope creep during execution. + - **User story delivered?** -- Review the user story from STATE.md. Can a user actually achieve the stated outcome with what was built? If any success criterion is unmet or any unit was skipped, note the gap. + - **Architectural integrity?** -- Does the implementation match the architectural context from the plan? Flag any deviations (e.g., plan said "stateless JWT" but implementation uses server sessions). + - **Constitution honored?** -- Does the implementation respect the constitution baselines and approval rules captured in STATE.md? Flag any unwaived violations. + - **Ralph evidence complete?** -- For Ralph-driven units, does every session file include Red, Green, and Post-Refactor Green evidence aligned to the resolved unit/e2e contract or an explicitly approved exception? + - **No orphan code** -- Is there any implemented code that doesn't trace back to the user story or success criteria? This may indicate scope creep during execution. If purpose validation reveals gaps, present them to the user before proceeding to PR. @@ -536,8 +551,8 @@ If a subagent fails after its internal retries: Run configured agents in parallel with Task tool. **Pass the WHY context (problem narrative, user story, success criteria) to reviewer agents** so they can evaluate fitness for purpose, not just code quality. Present findings and address critical issues. 4. **Final Validation** - - All tasks in STATE.md marked `completed` (or explicitly `skipped` with user approval) - - All tests pass (including regression tests from every completed task) + - All units in STATE.md marked `completed` (or explicitly `skipped` with user approval) + - All tests pass (including regression tests from every completed unit) - Linting passes - Code follows existing patterns - Purpose validation passed (user story deliverable, architecture intact) @@ -632,8 +647,8 @@ If the `finishing-branch` skill is not available, follow the manual steps below: - **Key decisions made:** [architectural or design choices] ## Success Criteria Status - - [x] [criterion 1 from plan] -- delivered by Task N - - [x] [criterion 2 from plan] -- delivered by Task N + - [x] [criterion 1 from plan] -- delivered by Unit N + - [x] [criterion 2 from plan] -- delivered by Unit N - [ ] [criterion 3 if skipped] -- skipped: [reason] ## Testing @@ -692,7 +707,7 @@ If the `finishing-branch` skill is not available, follow the manual steps below: 5. **Notify User** - Summarize what was completed - Link to PR - - Highlight any tasks that were skipped and why + - Highlight any units that were skipped and why - Reference the execution session directory for detailed logs - Note any follow-up work needed - Suggest next steps if applicable @@ -707,7 +722,7 @@ For complex plans with multiple independent workstreams, enable swarm mode for p | Use Swarm Mode when... | Use Standard Mode when... | |------------------------|---------------------------| -| Plan has 5+ independent tasks | Plan is linear/sequential | +| Plan has 5+ independent units | Plan is linear/sequential | | Multiple specialists needed (review + test + implement) | Single-focus work | | Want maximum parallelism | Simpler mental model preferred | | Large feature with clear phases | Small feature or bug fix | @@ -716,7 +731,7 @@ For complex plans with multiple independent workstreams, enable swarm mode for p To trigger swarm execution, say: -> "Make a Task list and launch an army of agent swarm subagents to build the plan" +> "Make a unit list and launch an army of agent swarm subagents to build the plan" Or explicitly request: "Use swarm mode for this work" @@ -729,10 +744,10 @@ When swarm mode is enabled, the workflow changes: Teammate({ operation: "spawnTeam", team_name: "work-{timestamp}" }) ``` -2. **Create Task List with Dependencies** - - Parse plan into TaskCreate items +2. **Create Unit List with Dependencies** + - Parse plan into execution work items - Set up blockedBy relationships for sequential dependencies - - Independent tasks have no blockers (can run in parallel) + - Independent units have no blockers (can run in parallel) 3. **Spawn Specialized Teammates** ``` @@ -740,7 +755,7 @@ When swarm mode is enabled, the workflow changes: team_name: "work-{timestamp}", name: "implementer", subagent_type: "general-purpose", - prompt: "Claim implementation tasks, execute, mark complete", + prompt: "Claim implementation units, execute, mark complete", run_in_background: true }) @@ -748,13 +763,13 @@ When swarm mode is enabled, the workflow changes: team_name: "work-{timestamp}", name: "tester", subagent_type: "general-purpose", - prompt: "Claim testing tasks, run tests, mark complete", + prompt: "Claim testing units, run tests, mark complete", run_in_background: true }) ``` 4. **Coordinate and Monitor** - - Team lead monitors task completion + - Team lead monitors unit completion - Spawn additional workers as phases unblock - Handle plan approval if required @@ -775,7 +790,7 @@ See the `orchestrating-swarms` skill for detailed swarm patterns and best practi ### WHY Grounds Everything -- Every subagent knows why its task exists, not just what to build +- Every subagent knows why its unit exists, not just what to build - The orchestrator is the guardian of WHY: it extracts, threads, and validates purpose - Purpose drift is caught by inline reviews and Phase 3 validation, not just at the end - If the combined work doesn't deliver the user story, passing tests don't matter @@ -784,7 +799,7 @@ See the `orchestrating-swarms` skill for detailed swarm patterns and best practi - The orchestrator decomposes, delegates, records, and routes. It does NOT implement code itself. - Each subagent gets only the context it needs. No conversation history pollution. -- Learnings compound: each task benefits from everything learned in previous tasks. +- Learnings compound: each unit benefits from everything learned in previous units. ### Start Fast, Execute Faster @@ -814,7 +829,7 @@ See the `orchestrating-swarms` skill for detailed swarm patterns and best practi ### Ship Complete Features -- Mark all tasks completed before moving on +- Mark all units completed before moving on - Don't leave features 80% done - A finished feature that ships beats a perfect feature that doesn't @@ -822,7 +837,7 @@ See the `orchestrating-swarms` skill for detailed swarm patterns and best practi - Escalation path (reframe, ask user, skip, stop) -- not infinite loops - Progress is persistent: STATE.md means you can resume after crashes -- Regression is caught early: previous tests re-run after each task +- Regression is caught early: previous tests re-run after each unit - When debugging unexpected errors, use the `systematic-debugging` skill for structured root-cause analysis instead of trial-and-error ## Quality Checklist @@ -830,12 +845,12 @@ See the `orchestrating-swarms` skill for detailed swarm patterns and best practi Before creating PR, verify: - [ ] All clarifying questions asked and answered -- [ ] All tasks in STATE.md marked completed (or explicitly skipped with user approval) +- [ ] All units in STATE.md marked completed (or explicitly skipped with user approval) - [ ] **User story deliverable** -- the combined work enables the stated user outcome - [ ] **Success criteria met** -- every plan-level success criterion addressed (or gap documented) - [ ] **Architecture intact** -- implementation matches the plan's architectural context - [ ] Tests pass (run project's test command) -- [ ] Regression tests from all completed tasks pass +- [ ] Regression tests from all completed units pass - [ ] Linting passes (use linting-agent) - [ ] Code follows existing patterns - [ ] Figma designs match implementation (if applicable) @@ -861,14 +876,14 @@ For most features: tests + linting + following patterns is sufficient. ## Common Pitfalls to Avoid - **Losing the WHY** - Subagents build what's specified but miss the intent. Always pass WHY context. -- **Purpose drift** - Tasks individually pass but combined output doesn't deliver the user story. Validate at Phase 3. +- **Purpose drift** - Units individually pass but combined output doesn't deliver the user story. Validate at Phase 3. - **Analysis paralysis** - Don't overthink, read the plan and execute - **Skipping clarifying questions** - Ask now, not after building wrong thing - **Ignoring plan references** - The plan has links for a reason - **Testing at the end** - Test continuously or suffer later - **Orchestrator doing implementation** - Delegate to subagents, don't implement inline -- **Skipping regression checks** - A passing task that breaks previous work is not progress -- **Losing session state** - Always write to STATE.md before and after each task +- **Skipping regression checks** - A passing unit that breaks previous work is not progress +- **Losing session state** - Always write to STATE.md before and after each unit - **Dumping all session files into subagent context** - Use the learnings brief, filtered by domain - **Over-reviewing simple changes** - Save reviewer agents for complex work - **80% done syndrome** - Finish the feature, don't move on early diff --git a/.github/skills/workflows-work/references/execution-agent-prompt.md b/.github/skills/workflows-work/references/execution-agent-prompt.md index 78cd269..9047914 100644 --- a/.github/skills/workflows-work/references/execution-agent-prompt.md +++ b/.github/skills/workflows-work/references/execution-agent-prompt.md @@ -1,5 +1,10 @@ --- -{} +model: claude-sonnet-4.6 +platforms: + copilot: + model: gpt-5.3-codex + opencode: + model: openrouter/moonshotai/kimi-k2.6 --- # Execution Agent Prompt Template @@ -10,24 +15,32 @@ This template is used by the `workflows:work` orchestrator to construct prompts --- -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. -## Your Task +## Your Unit -**Task:** {{TASK_NAME}} +**Unit:** {{UNIT_TITLE}} -{{TASK_DESCRIPTION}} +{{UNIT_DESCRIPTION}} + +**Unit kind:** {{UNIT_KIND}} + +**Outcome scenario:** {{OUTCOME_SCENARIO}} + +**Scope:** {{UNIT_SCOPE}} + +**Scope fence:** {{UNIT_SCOPE_FENCE}} **Files to create/modify:** {{FILE_LIST}} **Success criteria:** {{SUCCESS_CRITERIA}} -**Test command:** `{{TEST_COMMAND}}` +**Validation command:** `{{VALIDATION_COMMAND}}` **Dependencies completed:** {{COMPLETED_DEPENDENCIES}} -## Why This Task Exists +## Why This Unit Exists {{WHY_CONTEXT}} @@ -35,7 +48,11 @@ You are an execution agent implementing a specific task from a work plan. Follow {{ARCHITECTURAL_CONTEXT}} -## Learnings from Previous Tasks +## Architecture Handoff + +{{ARCHITECTURE_HANDOFF}} + +## Learnings from Previous Units {{LEARNINGS_BRIEF}} @@ -51,7 +68,7 @@ You are an execution agent implementing a specific task from a work plan. Follow ## Phase 1: Understand Before Building -Before writing ANY code, review the task requirements AND the "Why This Task Exists" section carefully. +Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. **If anything is unclear, ambiguous, or could be interpreted multiple ways:** - List your questions explicitly @@ -60,7 +77,7 @@ Before writing ANY code, review the task requirements AND the "Why This Task Exi **If everything is clear:** - State your interpretation of the requirements in 2-3 sentences -- State how this task serves the overall user story (from the WHY context) +- State how this unit serves the overall user story (from the WHY context) - List any assumptions you are making (even obvious ones) - Proceed to Phase 2 @@ -84,7 +101,7 @@ If tests fail after implementation: 1. Read the error message carefully -- understand what failed and why 2. Analyze whether the failure is in your implementation or in the test 3. Fix the issue -4. Re-run the test command +4. Re-run the validation command 5. Repeat up to 3 total attempts 6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly @@ -98,7 +115,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che - [ ] Did I miss any requirements? **Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Task Exists" section describes? +- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? - [ ] Would a user achieve the stated outcome with this code? - [ ] Did I build anything that doesn't trace back to the success criteria or user story? @@ -117,7 +134,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che **Testing:** - [ ] Do tests verify actual behavior (not just mock behavior)? - [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? +- [ ] Did I run the validation command and confirm it passes? **Evidence:** - [ ] Can I show actual test output (not just "tests pass")? @@ -133,13 +150,13 @@ Return a structured execution report in exactly this format: Use `commands/workflows/references/tdd-evidence-contract.md` as the single source for the `### TDD Evidence` block. `Red` and `Green` prove behavior coverage. `Post-Refactor Green` proves cleanup safety after refactor. Each `Evidence` line should quote the decisive failing or passing signal in one sentence, not a narrative. ```markdown -## Execution Report: [Task Name] +## Execution Report: [Unit Title] ### Interpretation [Your 2-3 sentence interpretation of what was asked] ### Purpose Served -[Which user story aspect / success criterion this task delivers, from the WHY context] +[Which user story aspect / success criterion this unit delivers, from the WHY context] ### Assumptions Made - [List each assumption, even if obvious] @@ -154,7 +171,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [Insert the exact Ralph evidence block from `commands/workflows/references/tdd-evidence-contract.md`. Preserve the `Red`, `Green`, and `Post-Refactor Green` headings with their command/result/evidence fields.] ### Test Results -- Command: `[test command]` +- Command: `[validation command]` - Result: PASS/FAIL - Attempts: [n] - Output: @@ -171,7 +188,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [If no problems: "None"] ### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] +- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future units] [If none: "None"] @@ -188,9 +205,9 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc _This section is included only when the resolved TDD contract explicitly allows standard implementation._ 1. Read referenced files and understand existing patterns -2. Implement the task following project conventions +2. Implement the unit following project conventions 3. Write tests matching the success criteria -4. Run the test command: `{{TEST_COMMAND}}` +4. Run the validation command: `{{VALIDATION_COMMAND}}` 5. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) --- diff --git a/.github/skills/workflows-work/references/execution-shape.md b/.github/skills/workflows-work/references/execution-shape.md new file mode 100644 index 0000000..4ad4d31 --- /dev/null +++ b/.github/skills/workflows-work/references/execution-shape.md @@ -0,0 +1,93 @@ +# Execution Shape Contract + +Use this reference when planning, deepening, or executing work so the workflow can preserve judgment without re-explaining the same rules in every prompt. + +## Default + +Choose `vertical-slices` unless that would create fake end-to-end work just to satisfy the template. + +## Allowed modes + +### `vertical-slices` (default) + +Use when a thin, testable, end-to-end behavior exists. + +Required packet: +- `Slice type` +- `Serves` +- `Demo scenario` +- `Scope` +- `Scope fence` +- `Files` +- `Depends on` +- `Dependency type` +- `Success criteria` +- `Test command` + +### `infra-track` + +Use for enabling or foundational work where no honest user-visible tracer bullet exists yet. + +Required packet: +- `Capability enabled` +- `Consumers / downstream work unlocked` +- `Scope` +- `Files` +- `Depends on` +- `Risk / rollback` +- `Validation command` +- `Success criteria` + +### `fix-batch` + +Use for a series of small mostly independent fixes where forcing one vertical slice would blur the real work. + +Required packet: +- `Problem` +- `Repro / expected outcome` +- `Files` +- `Depends on` +- `Validation command` +- `Success criteria` + +## Selection rules + +1. Default to `vertical-slices`. +2. Switch to `infra-track` only when the honest near-term value is enabling capability, not a user-visible behavior. +3. Switch to `fix-batch` only when the work is truly a batch of small fixes, not a feature being split too late. +4. Never force `vertical-slices` if that would create fake verticality. +5. If the mode is not the default, record why in `execution_shape.rationale` and in `## Execution Shape`. + +## Plan shape + +Add this frontmatter block to every plan: + +```yaml +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" # required when mode is not vertical-slices +``` + +Add this body section: + +```markdown +## Execution Shape +- **Mode:** [vertical-slices | infra-track | fix-batch] +- **Why:** [1-2 sentences] +``` + +Then use the packet section that matches the chosen mode: +- `## Execution Slices` +- `## Infrastructure Work Packets` +- `## Fix Batch Items` + +## Deepening rules + +- Validate the plan against the chosen mode, not against `vertical-slices` unconditionally. +- You may recommend switching modes if the selected one is clearly wrong, but record that as a `WHY Reassessment` note instead of silently rewriting intent. + +## Execution rules + +- `/workflows:work` must execute the units defined by the chosen mode. +- Do not coerce `infra-track` or `fix-batch` plans into slices unless the user explicitly approves a mode change. +- Session tracking may stay generic (`unit`, `work status`) even when the selected mode is `vertical-slices`. diff --git a/.github/skills/workflows-work/references/spec-review-prompt.md b/.github/skills/workflows-work/references/spec-review-prompt.md index d6bcaa5..34bcbc1 100644 --- a/.github/skills/workflows-work/references/spec-review-prompt.md +++ b/.github/skills/workflows-work/references/spec-review-prompt.md @@ -14,15 +14,15 @@ You are a spec compliance reviewer. Your job is to verify whether an implementat ## What Was Requested -{{TASK_REQUIREMENTS}} +{{UNIT_REQUIREMENTS}} ## Success Criteria {{SUCCESS_CRITERIA}} -## Task Purpose +## Unit Purpose -{{TASK_SERVES}} +{{UNIT_PURPOSE}} ## What Implementer Claims They Built diff --git a/README.md b/README.md index a775fee..75c6977 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,8 @@ The core value is the workflow: Each phase has a defined purpose, handoff, and artifact. The system is designed to reduce drift between what you intended, what you built, and what got reviewed. +Planning, deepening, and execution now default to **issue-shaped vertical slices**. The first slice should be a tracer bullet, and later slices widen or harden the feature without regressing into layer-by-layer planning. When the work is honestly better represented as enablement or a tiny-fix batch, the workflow can switch to explicit `infra-track` or `fix-batch` execution shapes instead of faking verticality. + ### 2. Architecture happens before execution hardening `/workflows:architecture` is a first-class phase, not an afterthought. It creates an architecture artifact in `docs/architecture/` that captures deletion tests, interfaces, seams, adapters, contracts, and deepening candidates. @@ -84,10 +86,10 @@ This repo is built for: | `/workflows:constitution` | repo-wide principles and guardrails | keeps project-wide policy out of feature-specific docs | | `/workflows:ideate` | grounded candidate directions | avoids rushing into the first idea | | `/workflows:brainstorm` | feature-level WHY / WHAT / WHERE handoff | clarifies the problem and intended outcome | -| `/workflows:plan` | execution-ready HOW | breaks work into phases, tasks, dependencies, and success criteria | +| `/workflows:plan` | execution-ready HOW | chooses an execution shape, then breaks work into slices or other execution packets with dependencies and success criteria | | `/workflows:architecture` | architecture artifact in `docs/architecture/` | forces the important structural decisions into the open | -| `/deepen-plan` | stronger plan with research and review input | hardens the plan before execution | -| `/workflows:work` | executed implementation with session state and learnings | drives the Ralph-first build loop through scoped subagents | +| `/deepen-plan` | stronger plan with research and review input | hardens the selected execution backlog before execution | +| `/workflows:work` | executed implementation with session state and learnings | drives the Ralph-first build loop by executing the selected units through scoped subagents | | `/workflows:review` | purpose-aware review against code, architecture, and evidence | checks fit, not just syntax | | `/workflows:compound` | reusable solution docs and refreshed learnings | turns one solved problem into future leverage | @@ -107,6 +109,7 @@ For most serious work: - `/technical_review` is gone - `/workflows:architecture` is now the supported architecture handoff +- plan/deepen/work now default to issue-shaped vertical slices and tracer-bullet sequencing, while still allowing explicit `infra-track` and `fix-batch` modes when slices would be fake - Ralph-driven TDD is explicit across setup, planning, execution, and review - workflow prompts are slimmer because shared contracts now live in reusable references - heavyweight agent and skill prompts were tightened around a shared concise structure diff --git a/plugins/compound-engineering/.claude-plugin/plugin.json b/plugins/compound-engineering/.claude-plugin/plugin.json index c961b18..6efb22c 100644 --- a/plugins/compound-engineering/.claude-plugin/plugin.json +++ b/plugins/compound-engineering/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "compound-engineering", - "version": "4.4.0", + "version": "4.5.0", "description": "OpenCode-first AI-powered development tools. Includes 29 specialized agents, 26 commands, and 24 skills spanning code review, research, design, and workflow automation, with generated Copilot support and Claude Code compatibility outputs.", "author": { "name": "The Rabak", diff --git a/plugins/compound-engineering/CHANGELOG.md b/plugins/compound-engineering/CHANGELOG.md index 7a34365..ac61d16 100644 --- a/plugins/compound-engineering/CHANGELOG.md +++ b/plugins/compound-engineering/CHANGELOG.md @@ -5,6 +5,22 @@ All notable changes to the compound-engineering plugin will be documented in thi The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [4.5.0] - 2026-04-29 + +### Changed + +- **Vertical-slice workflow contract** -- `/workflows:plan`, `/deepen-plan`, and `/workflows:work` now default to issue-shaped execution slices as the canonical unit of planning and execution. Phases remain optional grouping wrappers, while slices carry the real handoff contract: slice type, serves, demo scenario, scope fence, files, dependency type, success criteria, and test command. +- **Tracer-bullet planning rules** -- Planning and deepening now explicitly require the first slice to be the thinnest demoable tracer bullet, forbid horizontal layer-only slices unless they still prove an observable outcome, and allow thin cross-layer backend/frontend slices when they improve testability and feedback loops. +- **Execution-shape flexibility** -- Planning, deepening, and execution now share a compact `execution_shape` contract with `vertical-slices` as the default and explicit `infra-track` / `fix-batch` modes for infrastructure-heavy or tiny-fix work where forcing slices would be misleading. +- **Execution session state** -- `/workflows:work` now tracks generic execution units in `## Work Status`, writes `unit-*.md` execution logs, and executes the declared plan shape instead of assuming every packet is a slice. +- **Execution/review prompt handoff** -- The execution agent prompt now receives explicit `## Architecture Handoff` context, inline review prompts consume unit-specific placeholders, and `/workflows:review` reads completed execution session files when building the TDD evidence ledger. + +### Migration notes + +- **Plan shape** -- Prefer `## Execution Slices` with issue-shaped slice blocks, but switch to `## Infrastructure Work Packets` or `## Fix Batch Items` when that better matches the real work. +- **Execution semantics** -- `/workflows:work` executes the declared execution units directly; any legacy phase/task input should be translated into the selected shape before spawning subagents. +- **Review mode semantics** -- `inline` and `both` review modes now operate per execution unit, not per task. + ## [4.4.0] - 2026-04-27 ### Added diff --git a/plugins/compound-engineering/README.md b/plugins/compound-engineering/README.md index db59e45..190ffd1 100644 --- a/plugins/compound-engineering/README.md +++ b/plugins/compound-engineering/README.md @@ -15,6 +15,7 @@ This repository also ships generated Copilot assets under the repo root `.github ## Workflow contract highlights - `/workflows:architecture` is the architecture-improvement handoff between planning and `/deepen-plan`. +- `/workflows:plan`, `/deepen-plan`, and `/workflows:work` now default to issue-shaped execution slices, with the first slice acting as the tracer bullet, while still allowing explicit `infra-track` and `fix-batch` modes when slices would be fake. - `/workflows:work` is the Ralph-first execution path; `/ralph-loop` and `/cancel-ralph` are helpers, not a detached workflow. - Plans default to unit + e2e evidence unless an explicit exception documents replacement evidence. @@ -107,10 +108,10 @@ Core workflow commands use `workflows:` prefix to avoid collisions with built-in | `/workflows:constitution` | Create or update the repo-level constitution that governs downstream workflows | | `/workflows:ideate` | Generate and rank grounded improvement ideas before selecting one to brainstorm | | `/workflows:brainstorm` | Explore requirements and approaches before planning | -| `/workflows:plan` | Create implementation plans with structured project inputs (tickets, docs, designs) | +| `/workflows:plan` | Create implementation plans with issue-shaped execution slices and structured project inputs | | `/workflows:architecture` | Produce a dedicated architecture improvement artifact before deepening and execution | | `/workflows:review` | Run comprehensive code reviews | -| `/workflows:work` | Execute work items systematically | +| `/workflows:work` | Execute execution slices systematically | | `/workflows:compound` | Document solved problems to compound team knowledge | | `/workflows:compound-refresh` | Refresh stale learnings and pattern docs in `docs/solutions/` | @@ -120,7 +121,7 @@ Core workflow commands use `workflows:` prefix to avoid collisions with built-in |---------|-------------| | `/lrj` | Full autonomous engineering workflow | | `/slrj` | Full autonomous workflow with swarm mode for parallel execution | -| `/deepen-plan` | Enhance plans with parallel research agents for each section | +| `/deepen-plan` | Enhance plans and harden execution slices with parallel research | | `/changelog` | Create engaging changelogs for recent merges | | `/create-agent-skill` | Create or edit Claude Code skills | | `/generate_command` | Generate new slash commands | diff --git a/plugins/compound-engineering/commands/deepen-plan.md b/plugins/compound-engineering/commands/deepen-plan.md index 7b88437..c497844 100644 --- a/plugins/compound-engineering/commands/deepen-plan.md +++ b/plugins/compound-engineering/commands/deepen-plan.md @@ -79,7 +79,8 @@ First, read and parse the plan to extract the WHY artifacts (problem narrative, - [ ] Overview/Proposed Solution sections - [ ] Technical Approach/Architecture -- [ ] Implementation phases/steps (noting which user story aspect each phase serves) +- [ ] `execution_shape` frontmatter + `## Execution Shape` section +- [ ] Execution packets / phase wrappers (noting which user story aspect each packet serves) - [ ] Code examples and file references - [ ] Acceptance criteria - [ ] Any UI/UX components mentioned @@ -98,41 +99,60 @@ The "Serves" column ensures every deepening activity traces back to WHY we're bu ### 1.1 Validate Execution Readiness -Check if the plan has sufficiently structured execution chunks for the subagent orchestration model in /workflows:work. Plans need per-task success criteria, test commands, and file lists. Also validate that phases trace to the user story. +Check if the plan has sufficiently structured execution packets for the subagent orchestration model in `/workflows:work`. Use `commands/workflows/references/execution-shape.md` as the source of truth. Plans need packets that are independently executable, testable, and traceable back to the user story without forcing fake verticality. -**Scan each implementation task/phase for these required fields:** +**Resolve execution shape first:** -- [ ] **Files:** List of files to create or modify -- [ ] **Depends on:** Dependencies on other tasks -- [ ] **Success criteria:** Testable checkboxes defining "done" -- [ ] **Test command:** Exact command to verify completion -- [ ] **TDD alignment:** Task-level test commands collectively satisfy the resolved unit/e2e evidence contract, or the plan records a justified exception with replacement evidence +- [ ] Read `execution_shape.mode`; if missing, default it to `vertical-slices` +- [ ] Read `execution_shape.rationale`; require it when the mode is not `vertical-slices` +- [ ] Ensure the body includes a matching `## Execution Shape` section +- [ ] If the chosen mode looks wrong for the real work, add a `### WHY Reassessment` note instead of silently changing it + +**Scan each execution packet using the required fields from `commands/workflows/references/execution-shape.md`:** + +- [ ] **`vertical-slices`:** slice type, serves, demo scenario, scope + scope fence, files, depends on, dependency type, success criteria, test command +- [ ] **`infra-track`:** capability enabled, consumers / downstream work unlocked, scope, files, depends on, risk / rollback, validation command, success criteria +- [ ] **`fix-batch`:** problem, repro / expected outcome, files, depends on, validation command, success criteria +- [ ] **TDD alignment:** packet-level validation commands collectively satisfy the resolved unit/e2e evidence contract, or the plan records a justified exception with replacement evidence **Validate WHY tracing:** -- [ ] **Each phase has a "Serves:" line** stating which user story aspect or success criterion it delivers -- [ ] **Success criteria trace to plan-level success criteria** -- task criteria should be decomposed from the plan's success criteria, not invented independently -- [ ] **No orphan phases** -- every phase should trace to at least one success criterion. If a phase doesn't serve any success criterion, flag it: "Phase [X] doesn't trace to any success criterion. Is it necessary, or is a success criterion missing?" +- [ ] **Each packet has a purpose line** (`Serves`, `Consumers`, or equivalent) tying it to user story value or explicit downstream unlocks +- [ ] **Success criteria trace to plan-level success criteria** -- packet criteria should be decomposed from the plan's success criteria, not invented independently +- [ ] **No orphan packets** -- every packet should trace to at least one success criterion or explicit enabling outcome +- [ ] **Phase wrappers stay optional** -- if the plan uses phases or tracks, confirm they are grouping containers only and do not replace packet-level tracing -**Expected task format:** +**Expected packet format:** ```markdown -##### Task N.1: [Task Name] +##### Slice N.1: [Slice Title] +**Slice type:** tracer-bullet | expansion | hardening +**Serves:** [Which aspect of the user story / which success criterion this slice delivers] +**Demo scenario:** [Smallest end-to-end behavior this slice proves] **Files:** `path/to/file1.php`, `path/to/file2.php` -**Depends on:** Task N-1.2 (or "None") -**Success criteria:** +**Depends on:** Slice N-1.2 (or "None") +**Dependency type:** real | stub-available | parallel-safe + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What intentionally waits] +- **Scope fence:** [Boundary that keeps the slice thin] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `command to run` + +###### Evidence +- **Test command:** `command to run` ``` **Scoring:** -Count how many implementation tasks have all four fields. Report: +Count how many execution packets have the full structure. Report: ``` -Execution Readiness: X/Y tasks have complete structure (Z%) +Execution Readiness: X/Y packets have complete structure (Z%) ``` **Actions based on score:** @@ -140,50 +160,56 @@ Execution Readiness: X/Y tasks have complete structure (Z%) | Score | Action | |-------|--------| | 80-100% | Plan is execution-ready. Proceed with deepening. | -| 50-79% | Flag incomplete tasks. During deepening, add missing fields. | -| 0-49% | Plan needs significant restructuring. Add an "Execution Readiness" enhancement pass that decomposes vague phases into structured tasks with all required fields. **Note:** `/workflows:work` will refuse to execute plans that lack this structure. | +| 50-79% | Flag incomplete packets. During deepening, add missing fields. | +| 0-49% | Plan needs significant restructuring. Add an "Execution Readiness" enhancement pass that decomposes vague phases/tasks into the packet shape required by the selected mode. **Note:** `/workflows:work` will refuse to execute plans that lack a coherent execution shape unless the user explicitly approves a mode change or legacy adaptation. | -**For tasks missing structure, the deepening process should:** +**For packets missing structure, the deepening process should:** -1. Break vague phases into specific, scoped tasks -2. Identify which files each task will create or modify -3. Write concrete success criteria (not vague goals) -4. Determine the test command (look at existing test patterns in the codebase) -5. Make it explicit whether the test command contributes unit evidence, e2e evidence, or both -6. Map dependencies between tasks -7. Add a suggested commit message per task (conventional format: `feat(scope): description`) +1. Pick or confirm the execution shape that best matches the real work +2. Break vague phases or legacy tasks into specific packets for that mode +3. Identify the smallest honest outcome each packet proves or unlocks +4. Identify which files each packet will create or modify +5. Write concrete success criteria (not vague goals) +6. Determine the validation command (look at existing test patterns in the codebase) +7. Make it explicit whether the validation command contributes unit evidence, e2e evidence, or both +8. Map dependencies between packets +9. Add a suggested commit message per packet (conventional format: `feat(scope): description`) -### 1.2 Task Complexity Check +### 1.2 Execution Shape Complexity Check -Check if any tasks are too large for reliable subagent execution. Large tasks with many files or success criteria should be split. +Check if any packets are too large, too vague, or shaped incorrectly for reliable subagent execution. Cross-layer work is allowed in `vertical-slices`; the failure mode is not "touches backend and frontend" but "tries to deliver multiple outcomes or no honest outcome at all." -**For each task, check complexity:** +**For each packet, check complexity against the selected mode:** | Metric | Threshold | Action | |--------|-----------|--------| -| Files touched | > 3 files | Flag for splitting | +| Outcomes or unlocks | > 1 meaningful outcome | Flag for splitting | +| Files touched | > 6 files | Flag for review; confirm the packet is still thin | | Success criteria | > 5 criteria | Flag for splitting | -| Multiple concerns | Mixes backend + frontend | Flag for splitting | -| Vague scope | "Implement the feature" | Flag for clarification | +| Scope fence | Missing or vague | Flag for clarification | +| Shape fit | `vertical-slices` used for horizontal-only work, or `infra-track` / `fix-batch` used to hide a real feature slice | Reassess mode | +| Risk controls | `Blast radius: high` with no rollback path | Add safety fields before execution | + +**Important:** A packet that touches backend + frontend is **not automatically too large**. If the same thin slice needs a migration, service method, API handler, and tiny UI change to prove one observable behavior, keep it intact. -**If any tasks exceed thresholds:** +**If any slices exceed thresholds:** Report: ``` -Task Complexity Warning: X tasks may be too large for reliable subagent execution. +Execution Shape Warning: X packets may be too large or incorrectly shaped for reliable subagent execution. -Task 2.1: "Build user auth" -- touches 5 files, 7 success criteria - Suggestion: Split into "Create auth service" (3 files) and "Add auth middleware" (2 files) +Slice 2.1: "User can complete first login tracer bullet" -- 2 demo scenarios, 7 success criteria + Suggestion: Split into "User submits credentials and receives success state" and "User sees first authenticated dashboard shell" -Task 3.2: "Build dashboard UI" -- mixes backend API + frontend component - Suggestion: Split into "Create dashboard API endpoint" and "Build dashboard component" +Packet 3.2: "Create auth schema foundation" -- no demo scenario, horizontal-only outcome + Suggestion: Either rewrite as "User can submit credentials and persist the first auth record" or switch this track to `infra-track` if it is truly enablement-only ``` -Suggest splits that create self-contained tasks with non-overlapping file sets. Each split task should be completable by one subagent in a single session. **When splitting, ensure each new task retains its "Serves:" tracing to the user story -- a split should never orphan a task from its purpose.** +Suggest splits that create self-contained packets with clear ownership and non-overlapping file sets where possible. **When splitting, ensure each new packet retains its tracing to the user story or enabling outcome.** -**This validation ensures the plan is ready for `/workflows:work`'s subagent orchestration model**, where each task is delegated to a focused subagent with clear scope and termination criteria. +**This validation ensures the plan is ready for `/workflows:work`'s subagent orchestration model**, where each packet is delegated to a focused subagent with clear scope, proof, and termination criteria. ### 1.5 Re-fetch Source Documents (if available) @@ -605,7 +631,7 @@ Merge research findings back into the plan, adding depth without changing the or - User Story - Architectural Context - Success Criteria -- Phase "Serves:" lines +- Execution shape contract and packet tracing lines - Handoff frontmatter If research suggests changes to these, add a `### WHY Reassessment` note at the end of the plan for the user to review manually. Do not edit the originals. @@ -620,7 +646,7 @@ If research suggests changes to these, add a `### WHY Reassessment` note at the ```markdown ## [Original Section Title] -[Original content preserved -- including any "Serves:" lines] +[Original content preserved -- including any execution-shape and packet tracing lines] ### Research Insights @@ -662,7 +688,8 @@ At the top of the plan, add a summary section: - User Story: [preserved / flagged for reassessment] - Architectural Context: [preserved / expanded / flagged for reassessment] - Success Criteria: [preserved / flagged for reassessment] -- Phase tracing: [all phases still trace to user story: yes/no] +- Execution shape: [preserved / flagged for reassessment] +- Packet tracing: [all packets still trace to user story or enabling outcome: yes/no] ### TDD Contract Check - Precedence: [plan overrides local / inherit uses local / fallback default noted] @@ -705,16 +732,17 @@ Before finalizing: - [ ] Links are valid and relevant - [ ] No contradictions between sections - [ ] Enhancement summary accurately reflects changes -- [ ] Implementation tasks have execution-ready structure (files, success criteria, test commands, dependencies) -- [ ] TDD contract is explicit, precedence is documented, and unit/e2e evidence stays aligned with task test commands unless an exception says otherwise +- [ ] Execution packets have execution-ready structure for the selected mode +- [ ] TDD contract is explicit, precedence is documented, and unit/e2e evidence stays aligned with packet validation commands unless an exception says otherwise **WHY integrity:** - [ ] Problem Narrative, User Story, Success Criteria, and Architectural Context are unmodified from the original plan - [ ] Handoff frontmatter is intact and still accurate -- [ ] Every phase still has its "Serves:" tracing line -- [ ] No new phases added without a "Serves:" line connecting them to the user story +- [ ] `execution_shape` frontmatter and `## Execution Shape` still agree +- [ ] Every packet still has its tracing line +- [ ] No new packets were added without a tracing line connecting them to the user story or enabling outcome - [ ] Enhancements tagged with which success criterion they serve -- [ ] Scope-expanding recommendations flagged in "Scope Warnings" rather than silently added to phases +- [ ] Scope-expanding recommendations flagged in "Scope Warnings" rather than silently added to packets - [ ] If WHY reassessment was needed, it's in a clearly marked section at the end (not inline edits) - [ ] `tdd` frontmatter and `## TDD & Evidence Contract` still agree on precedence, effective loop, evidence, and exceptions diff --git a/plugins/compound-engineering/commands/workflows/plan.md b/plugins/compound-engineering/commands/workflows/plan.md index e5a500d..4a6e7a8 100644 --- a/plugins/compound-engineering/commands/workflows/plan.md +++ b/plugins/compound-engineering/commands/workflows/plan.md @@ -12,13 +12,14 @@ argument-hint: "[feature description, bug report, or improvement idea]" Transform feature descriptions, bug reports, or improvement ideas into well-structured, execution-ready plans that: 1. **Anchor to WHY** -- every plan traces back to a user story and problem narrative -2. **Map WHERE** -- architectural context grounds task decomposition in the system's structure +2. **Map WHERE** -- architectural context grounds slice decomposition in the system's structure 3. **Define DONE** -- success criteria tied to user outcomes, not just technical checkboxes 4. **Honor project guardrails** -- constitution principles, baselines, and approval rules are made explicit 5. **Make TDD explicit** -- the plan declares the Ralph/default loop, required unit + e2e evidence, and any justified exceptions -6. **Enable architecture-first execution** -- `/workflows:architecture` turns the plan into a dedicated architecture artifact before `/deepen-plan`, `/workflows:work`, and `/workflows:review` harden or execute it +6. **Choose the right execution shape** -- vertical slices are the default, but infra tracks and fix batches are valid when they fit the real work better +7. **Enable architecture-first execution** -- `/workflows:architecture` turns the plan into a dedicated architecture artifact before `/deepen-plan`, `/workflows:work`, and `/workflows:review` harden or execute it -Plans consume the project constitution from `/workflows:constitution` when available, plus lynchpin artifacts from `/workflows:brainstorm` when available, or construct feature context fresh when running standalone. Either way, the plan document carries forward the WHY, WHERE, DONE, GUARDRAIL, and TDD contract that all downstream phases depend on. After the plan is written, the next explicit step is `/workflows:architecture`, not direct deepening. +Plans consume the project constitution from `/workflows:constitution` when available, plus lynchpin artifacts from `/workflows:brainstorm` when available, or construct feature context fresh when running standalone. Either way, the plan document carries forward the WHY, WHERE, DONE, GUARDRAIL, TDD, and **execution shape** contract that all downstream phases depend on. After the plan is written, the next explicit step is `/workflows:architecture`, not direct deepening. ## Feature Description @@ -70,6 +71,15 @@ Every plan must then write its own `tdd:` frontmatter block plus a `## TDD & Evi - **Exception rule:** Any deviation from the resolved default loop or evidence requirements must be explicit and justified in `tdd.exceptions` and in the plan body. - **Shared source of truth:** Reuse `commands/workflows/references/tdd-evidence-contract.md` for contract resolution, the `## TDD & Evidence Contract` section shape, Ralph evidence semantics, and exception handling. +#### Execution Shape Baseline (Runs Before Path A/B/C) + +Use `commands/workflows/references/execution-shape.md` as the single source for choosing and documenting the execution shape. + +- **Default mode:** `vertical-slices` +- **Allowed overrides:** `infra-track`, `fix-batch` +- **Override rule:** Any non-default mode must include a short rationale in frontmatter and in the plan body +- **Anti-coercion rule:** Do not force work into slices if that would create fake end-to-end structure + #### Path A: Spec/Plan File Provided **Check if arguments contain a plan or spec file:** @@ -78,7 +88,7 @@ If the feature description (`#$ARGUMENTS`) is or contains a path to a `.md` file 1. Read the file 2. Announce: "Found existing plan/spec: `[file path]`. Using as foundation." -3. Extract: title, problem statement, proposed approach, acceptance criteria, implementation phases, and any existing tasks +3. Extract: title, problem statement, proposed approach, acceptance criteria, execution shape (if any), and any existing execution units 4. **Check for brainstorm reference** -- look for a `brainstorm_ref` field in frontmatter, or search `docs/brainstorms/` for a matching topic. If found, read and extract lynchpin artifacts (see Path B). 5. **Extract or construct WHY artifacts from the spec:** - If the spec has a Problem Narrative / User Story / Architectural Context -- use them directly @@ -90,7 +100,7 @@ If the feature description (`#$ARGUMENTS`) is or contains a path to a `.md` file 6. **Skip free-form idea refinement** -- the spec defines WHAT to build 7. Proceed to Step 0.5 to gather any additional project inputs, then to research -In Step 2 (Issue Planning), **build upon the existing plan structure** -- preserve its sections, fill gaps, add execution-readiness fields (Files, Depends on, Success criteria, Test command) to any tasks that lack them, and enrich with research findings. Do NOT discard or rewrite sections that are already well-defined. +In Step 2 (Issue Planning), **build upon the existing plan structure** -- preserve its sections, fill gaps, add the execution-shape contract and execution-readiness fields to any legacy execution units that lack them, and enrich with research findings. Do NOT discard or rewrite sections that are already well-defined. #### Path B: Brainstorm Document Found @@ -113,11 +123,11 @@ ls -la docs/brainstorms/*.md 2>/dev/null | head -10 3. Announce: "Found brainstorm from [date]: [topic]. Consuming lynchpin artifacts." 4. **Extract and surface all lynchpin sections:** - **Problem Narrative** -- the synthesized WHY (carry forward verbatim into plan) - - **User Story** -- the north star (carry forward, plan tasks must trace to this) + - **User Story** -- the north star (carry forward, plan slices must trace to this) - **Architectural Context** -- the WHERE map (feeds `{{ARCHITECTURAL_CONTEXT}}` in work.md) - **Success Criteria** -- the DONE definition (plan acceptance criteria must include these) - **Stakeholder Impact** -- who is affected (informs stakeholder analysis) - - **Chosen Approach** and **Key Decisions** -- the WHAT (informs task decomposition) + - **Chosen Approach** and **Key Decisions** -- the WHAT (informs slice decomposition) - **Open Questions** -- must be resolved before planning proceeds 5. **If any handoff fields are `false` or sections are empty**, flag them: "Brainstorm is missing [X]. I'll construct this during planning." 6. **Resolve open questions** -- if the brainstorm has unresolved questions, use **AskUserQuestion tool** to resolve each one before proceeding @@ -330,7 +340,7 @@ Now that we have concrete codebase knowledge, refine the WHY artifacts establish Explicitly state how research findings confirm, challenge, or refine the planned approach relative to the user story. Examples: - "Codebase already has a similar pattern in `app/Services/AuthService.php` -- we should follow it for consistency, which aligns with the user story because..." - "Learnings doc warns about [gotcha] -- this affects our approach because..." -- "No existing patterns found for this -- higher risk, may need more tasks for validation." +- "No existing patterns found for this -- higher risk, may need more slices for validation." - "Constitution requires [baseline] -- the plan must make that visible in acceptance criteria or approvals." **Optional validation:** Briefly summarize the refined WHY artifacts and key research findings, then ask if anything looks off or missing before proceeding to planning. @@ -365,28 +375,34 @@ Think like a product manager -- what would make this issue clear, actionable, an - [ ] Gather supporting materials (error logs, screenshots, design mockups) - [ ] Prepare code examples or reproduction steps if applicable, name the mock filenames in the lists -**Phase Decomposition (traced to user story):** +**Execution Shape Selection (traced to user story):** + +Use `commands/workflows/references/execution-shape.md` as the source of truth for selecting and documenting the plan's execution shape. -Each implementation phase must state **what aspect of the user story it serves**. This creates a traceable chain: -- User Story → Phase → Tasks → Files +Default to **`vertical-slices`**: +- User Story → Phase/Track (optional grouping) → Slice → Files +- Start with the thinnest tracer bullet +- Slice vertically across layers when needed +- Treat phases as wrappers, not executable units +- Forbid horizontal slice titles unless they still produce a demoable outcome -When decomposing into phases: -- **Group by user-facing capability**, not by technical layer. "User can log in" is a phase; "Create database tables" is a task within a phase. -- **Each phase should deliver a testable slice** of the user story where possible -- **Each subphase/task should be a self-contained execution unit** -- after its listed dependencies are satisfied, the executor should have the context, scope, relevant files, success criteria, and verification command needed to complete it without reconstructing intent from neighboring phases -- **Cross-reference success criteria** -- map each success criterion to the phase(s) that deliver it -- **Architectural context informs boundaries** -- use the WHERE map to identify natural phase boundaries (e.g., service boundaries, module boundaries) +Switch only when that default would be fake: +- **`infra-track`** for enabling/foundation work with no honest user-visible tracer bullet yet +- **`fix-batch`** for a batch of small mostly independent fixes + +Every plan must record: +- `execution_shape.mode` +- `execution_shape.rationale` (required when mode is not `vertical-slices`) +- A matching `## Execution Shape` section in the body **Execution Readiness:** -For plans that will be executed via `/workflows:work`, ensure each implementation task includes: -- **Scope:** What this task owns, what it changes, and any important boundary or non-goal that keeps the slice contained -- **Files:** List of files to create or modify -- **Depends on:** Which other tasks must complete first (or "None") -- **Success criteria:** Testable checkboxes that define "done" -- **Test command:** The exact command to verify the task is complete. Across the plan, these commands must satisfy the plan-level TDD evidence contract. +For plans that will be executed via `/workflows:work`, the plan must include the packet section required by the selected mode: +- `## Execution Slices` +- `## Infrastructure Work Packets` +- `## Fix Batch Items` -This structured format enables the `/workflows:work` orchestrator to delegate each task to a focused subagent with clear scope and termination criteria. Treat every task as a mini-handoff packet: if an executor had only that task plus the shared WHY/architecture context, they should still know what to touch, what not to touch, and how to prove it is done. Plans without this structure will be flagged for refinement before execution begins. +Each packet must include the fields defined in `commands/workflows/references/execution-shape.md`. Plans without a declared shape and packet structure will be flagged for refinement before execution begins. **TDD & Evidence Contract (mandatory):** @@ -406,10 +422,10 @@ Apply the shared `Named Agent Dispatch` protocol from `commands/workflows/refere - Task spec-flow-analyzer(feature_description, user_story, success_criteria, research_findings) The SpecFlow Analyzer should evaluate: -- Do the planned phases cover all aspects of the user story? +- Do the planned slices cover all aspects of the user story? - Are there user flows implied by the user story that the plan doesn't address? - Do edge cases threaten any of the success criteria? -- Are there gaps between what the user needs (story) and what the plan delivers (tasks)? +- Are there gaps between what the user needs (story) and what the plan delivers (slices)? **SpecFlow Analyzer Output:** @@ -420,7 +436,7 @@ The SpecFlow Analyzer should evaluate: ### 4. Choose Implementation Detail Level -**Important for `/workflows:work` compatibility:** All detail levels can be executed, but the MORE and A LOT levels produce plans with structured execution chunks (per-task scope, success criteria, test commands, and file lists) that enable the subagent orchestration model in `/workflows:work`. MINIMAL plans work but may require the orchestrator to decompose tasks further before delegating to subagents and supply any missing containment details. +**Important for `/workflows:work` compatibility:** All detail levels can be executed, but each level must still declare an execution shape and produce the matching packet section. `vertical-slices` is the default and usually the best choice. MORE and A LOT provide the richest packets (scope fence, dependencies, evidence, and safety notes) and therefore give the most predictable subagent orchestration. **All detail levels include WHY sections.** The Problem Narrative, User Story, Architectural Context, and Success Criteria are mandatory at every level -- they are the contract that downstream phases depend on. The difference between levels is how much implementation detail surrounds them. @@ -436,7 +452,7 @@ Select how comprehensive you want the issue to be, simpler is mostly better. - Basic acceptance criteria - Essential context only -**Note:** MINIMAL plans may need to be enriched with per-task success criteria before running `/workflows:work`. The orchestrator can handle this decomposition automatically, but providing structured tasks up front leads to more predictable execution. +**Note:** MINIMAL plans may still contain only a few units, but they must include at least one execution-ready packet from the selected mode before `/workflows:work` should execute them. When in doubt, choose a tracer-bullet slice. **Structure:** @@ -467,6 +483,9 @@ tdd: unit: inherit # inherit | required | optional e2e: inherit # inherit | required | optional exceptions: [] # [{ scope, reason, replacement_evidence }] +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" --- # [Issue Title] @@ -499,6 +518,11 @@ which causes [impact]. Use the exact section shape from `commands/workflows/references/tdd-evidence-contract.md` with the resolved values for this plan. Do not omit any bullet, and make every deviation explicit with `replacement_evidence`. +## Execution Shape + +- **Mode:** vertical-slices +- **Why:** The work has a real tracer-bullet path, so default to end-to-end slices. + ## Constitution Alignment - **Relevant principles:** [Project rules that apply to this work] @@ -509,6 +533,34 @@ Use the exact section shape from `commands/workflows/references/tdd-evidence-con [Brief description of what to build and how] +## Execution Slices + +Use this default section only when `execution_shape.mode` is `vertical-slices`. If the selected mode is `infra-track` or `fix-batch`, replace this section with the matching packet section from `commands/workflows/references/execution-shape.md`. + +##### Slice 1.1: [Tracer Bullet Slice Title] +**Slice type:** tracer-bullet +**Serves:** [Which aspect of the user story / which success criterion this slice proves] +**Demo scenario:** [Describe the smallest end-to-end behavior this slice makes observable] +**Files:** `path/to/file1.php`, `path/to/file2.php` +**Depends on:** None +**Dependency type:** real | stub-available | parallel-safe + +###### What to build +[Brief description of the thin end-to-end path] + +###### Scope +- **Owns:** [What this slice is responsible for] +- **Non-goals:** [What this slice intentionally does not solve yet] +- **Scope fence:** [What would count as widening the slice too far] + +###### Acceptance criteria +- [ ] Criterion 1 +- [ ] Criterion 2 + +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What this command proves for the tracer bullet] + ## References - Related issue: #[issue_number] @@ -523,7 +575,7 @@ Use the exact section shape from `commands/workflows/references/tdd-evidence-con - Detailed background and motivation - Technical considerations -- Phased implementation with story tracing +- Issue-shaped execution slices with story tracing - Success metrics - Dependencies and risks @@ -556,6 +608,9 @@ tdd: unit: inherit # inherit | required | optional e2e: inherit # inherit | required | optional exceptions: [] # [{ scope, reason, replacement_evidence }] +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" --- # [Issue Title] @@ -592,6 +647,11 @@ which causes [impact]. Use the exact section shape from `commands/workflows/references/tdd-evidence-contract.md` with the resolved values for this plan. Do not omit any bullet, and make every deviation explicit with `replacement_evidence`. +## Execution Shape + +- **Mode:** vertical-slices +- **Why:** The default tracer-bullet decomposition matches the real behavior being delivered. + ## Constitution Alignment - **Relevant principles:** [Project rules that apply to this work] @@ -609,37 +669,88 @@ Use the exact section shape from `commands/workflows/references/tdd-evidence-con - Performance implications - Security considerations -## Implementation Phases +## Execution Slices + +Use this default section only when `execution_shape.mode` is `vertical-slices`. If the selected mode is `infra-track` or `fix-batch`, replace this section with the matching packet section from `commands/workflows/references/execution-shape.md`. -#### Phase 1: [Phase Name] -**Serves:** [Which aspect of the user story / which success criterion this phase delivers] +#### Phase 1: [Optional grouping / milestone] +**Purpose:** [Why these slices belong together or why this track exists] +**Not executable by itself:** `/workflows:work` executes the slices below, not the phase wrapper. -##### Task 1.1: [Task Name] +##### Slice 1.1: [Tracer Bullet Slice Title] +**Slice type:** tracer-bullet +**Serves:** [Which aspect of the user story / which success criterion this slice proves] +**Demo scenario:** [Describe the smallest end-to-end behavior this slice makes observable] **Files:** `path/to/file1.php`, `path/to/file2.php` **Depends on:** None -**Success criteria:** +**Dependency type:** parallel-safe + +###### What to build +[Describe the thin vertical cut through the system] + +###### Scope +- **Owns:** [What this slice is responsible for] +- **Non-goals:** [What it intentionally does not solve yet] +- **Scope fence:** [What would widen the slice too far] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -##### Task 1.2: [Task Name] +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] + +##### Slice 1.2: [Expansion Slice Title] +**Slice type:** expansion +**Serves:** [Which aspect of the user story / which success criterion this slice extends] +**Demo scenario:** [Describe the next observable behavior] **Files:** `path/to/file3.php` -**Depends on:** Task 1.1 -**Success criteria:** +**Depends on:** Slice 1.1 +**Dependency type:** real + +###### What to build +[Describe the next thin vertical cut] + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What stays out] +- **Scope fence:** [Boundary for this expansion] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -#### Phase 2: [Phase Name] -**Serves:** [Which aspect of the user story / which success criterion this phase delivers] +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] -##### Task 2.1: [Task Name] +#### Phase 2: [Optional grouping / milestone] +**Purpose:** [Why the next slices are grouped here] + +##### Slice 2.1: [Hardening or follow-on slice] +**Slice type:** expansion | hardening +**Serves:** [Which aspect of the user story / which success criterion this slice delivers] +**Demo scenario:** [Describe the observable behavior or guardrail added here] **Files:** `path/to/file4.php` -**Depends on:** Task 1.2 -**Success criteria:** +**Depends on:** Slice 1.2 +**Dependency type:** real + +###### What to build +[Describe the slice] + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What stays out] +- **Scope fence:** [Boundary for this slice] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` + +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] ## Acceptance Criteria @@ -668,7 +779,7 @@ Use the exact section shape from `commands/workflows/references/tdd-evidence-con **Includes everything from MORE plus:** -- Detailed implementation plan with phases +- Detailed implementation plan with slice groups - Alternative approaches considered (traced to user story) - Extensive technical specifications - Resource requirements and timeline @@ -705,6 +816,9 @@ tdd: unit: inherit # inherit | required | optional e2e: inherit # inherit | required | optional exceptions: [] # [{ scope, reason, replacement_evidence }] +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" --- # [Issue Title] @@ -748,6 +862,11 @@ As a [persona 2], I need to [action] so that [outcome]. Use the exact section shape from `commands/workflows/references/tdd-evidence-contract.md` with the resolved values for this plan. Do not omit any bullet, and make every deviation explicit with `replacement_evidence`. +## Execution Shape + +- **Mode:** vertical-slices +- **Why:** The plan delivers meaningful user-visible tracer bullets, so slices stay the best default. + ## Constitution Alignment - **Relevant principles:** [Project rules that apply to this work] @@ -776,65 +895,162 @@ Use the exact section shape from `commands/workflows/references/tdd-evidence-con [Detailed technical design, grounded in the architectural context map] -### Implementation Phases +### Execution Slices -#### Phase 1: [Foundation] -**Serves:** [Which aspect of the user story / which success criteria this phase delivers] -**Rationale:** [Why this phase comes first -- what it enables for subsequent phases] +Use this default section only when `execution_shape.mode` is `vertical-slices`. If the selected mode is `infra-track` or `fix-batch`, replace this section with the matching packet section from `commands/workflows/references/execution-shape.md`. -##### Task 1.1: [Task Name] +#### Phase 1: [Tracer bullet track] +**Purpose:** [Why these slices come first] +**Rationale:** [What this track proves before later widening] + +##### Slice 1.1: [Tracer Bullet Slice Title] +**Slice type:** tracer-bullet +**Serves:** [Which aspect of the user story / which success criteria this slice proves] +**Demo scenario:** [Describe the smallest end-to-end behavior] **Files:** `path/to/file1.php`, `path/to/file2.php` **Depends on:** None -**Success criteria:** +**Dependency type:** real | stub-available | parallel-safe +**Blast radius:** low | medium | high +**Shared state changes:** [None, or list] +**Rollback path:** [How to back out safely if risky] + +###### What to build +[Describe the tracer bullet as an issue-sized vertical slice] + +###### Scope +- **Owns:** [What this slice is responsible for] +- **Non-goals:** [What intentionally waits for later slices] +- **Scope fence:** [What would widen the slice too far] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -##### Task 1.2: [Task Name] +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] + +##### Slice 1.2: [Follow-on expansion slice] +**Slice type:** expansion +**Serves:** [Which aspect of the user story / which success criteria this slice extends] +**Demo scenario:** [Describe the next observable behavior] **Files:** `path/to/file3.php` -**Depends on:** Task 1.1 -**Success criteria:** +**Depends on:** Slice 1.1 +**Dependency type:** real +**Blast radius:** low | medium | high +**Shared state changes:** [None, or list] +**Rollback path:** [How to back out safely if risky] + +###### What to build +[Describe the slice] + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What stays out] +- **Scope fence:** [Boundary for this slice] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -#### Phase 2: [Core Implementation] -**Serves:** [Which aspect of the user story / which success criteria this phase delivers] -**Rationale:** [Why this phase order -- what it builds on from Phase 1] +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] + +#### Phase 2: [Core widening track] +**Purpose:** [Why these slices come after the tracer bullet] +**Rationale:** [What this track widens or hardens] -##### Task 2.1: [Task Name] +##### Slice 2.1: [Core slice] +**Slice type:** expansion | hardening +**Serves:** [Which aspect of the user story / which success criteria this slice delivers] +**Demo scenario:** [Describe the user-visible behavior] **Files:** `path/to/file4.php`, `path/to/file5.php` -**Depends on:** Task 1.2 -**Success criteria:** +**Depends on:** Slice 1.2 +**Dependency type:** real +**Blast radius:** low | medium | high +**Shared state changes:** [None, or list] +**Rollback path:** [How to back out safely if risky] + +###### What to build +[Describe the slice] + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What stays out] +- **Scope fence:** [Boundary for this slice] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -##### Task 2.2: [Task Name] +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] + +##### Slice 2.2: [Parallel-safe or stub-removal slice] +**Slice type:** expansion | hardening +**Serves:** [Which aspect of the user story / which success criteria this slice delivers] +**Demo scenario:** [Describe the observable outcome] **Files:** `path/to/file6.php` -**Depends on:** Task 2.1 -**Success criteria:** +**Depends on:** Slice 2.1 +**Dependency type:** real | stub-available | parallel-safe +**Blast radius:** low | medium | high +**Shared state changes:** [None, or list] +**Rollback path:** [How to back out safely if risky] + +###### What to build +[Describe the slice] + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What stays out] +- **Scope fence:** [Boundary for this slice] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -#### Phase 3: [Polish & Optimization] -**Serves:** [Which success criteria / quality aspects this phase delivers] +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] -##### Task 3.1: [Task Name] +#### Phase 3: [Hardening / rollout track] +**Purpose:** [Why these slices close the loop] + +##### Slice 3.1: [Hardening slice] +**Slice type:** hardening +**Serves:** [Which success criteria / quality aspects this slice delivers] +**Demo scenario:** [Describe the behavior or safety improvement] **Files:** `path/to/file7.php` -**Depends on:** Task 2.2 -**Success criteria:** +**Depends on:** Slice 2.2 +**Dependency type:** real +**Blast radius:** low | medium | high +**Shared state changes:** [None, or list] +**Rollback path:** [How to back out safely if risky] + +###### What to build +[Describe the slice] + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What stays out] +- **Scope fence:** [Boundary for this slice] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -### Phase-to-Story Traceability +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] + +### Slice-to-Story Traceability -| Success Criterion | Delivered by Phase(s) | Key Tasks | +| Success Criterion | Delivered by Slice(s) | Demo scenarios | |---|---|---| -| [Criterion 1 from Success Criteria] | Phase 1, Phase 2 | Task 1.1, Task 2.1 | -| [Criterion 2 from Success Criteria] | Phase 2 | Task 2.1, Task 2.2 | +| [Criterion 1 from Success Criteria] | Slice 1.1, Slice 2.1 | [Scenario names] | +| [Criterion 2 from Success Criteria] | Slice 2.1, Slice 2.2 | [Scenario names] | ## Alternative Approaches Considered @@ -969,10 +1185,12 @@ public function processUser(User $user): array - [ ] Architectural Context is grounded in actual repo research (not hypothetical) - [ ] Success Criteria are tied to user outcomes, not just technical checkboxes - [ ] If `docs/constitution.md` exists, Constitution Alignment names the applicable rules, approvals, and any waivers explicitly -- [ ] Every implementation phase states which user story aspect / success criterion it serves +- [ ] Every execution slice states which user story aspect / success criterion it serves - [ ] `handoff` frontmatter fields are all `true` - [ ] `tdd` frontmatter is present and the precedence rule is explicit - [ ] `## TDD & Evidence Contract` names the effective loop, required evidence, and any justified exceptions +- [ ] `execution_shape` frontmatter is present and matches the body section +- [ ] Non-default execution shapes include an explicit rationale **Content Quality:** @@ -986,14 +1204,17 @@ public function processUser(User $user): array **Execution Readiness (for `/workflows:work`):** -- [ ] Each task is a self-contained execution unit once dependencies are met -- [ ] Each task has: Files, Depends on, Success criteria, Test command -- [ ] Each task scope is explicit enough that an executor does not need to infer missing boundaries from adjacent phases -- [ ] Task success criteria are testable (not vague) -- [ ] Dependencies between tasks are explicit +- [ ] The selected execution shape matches the real work instead of forcing fake verticality +- [ ] The plan includes the packet section required by the selected mode +- [ ] Every packet includes the required fields from `commands/workflows/references/execution-shape.md` +- [ ] If mode is `vertical-slices`, the first slice is a tracer bullet, not a broad foundation phase +- [ ] If mode is `vertical-slices`, no slice is a disguised horizontal layer bucket unless it still delivers a demoable outcome +- [ ] Packet scope is explicit enough that an executor does not need to infer missing boundaries from adjacent packets +- [ ] Packet success criteria are testable (not vague) +- [ ] Dependencies are explicit wherever ordering matters - [ ] Architectural context is specific enough to fill `{{ARCHITECTURAL_CONTEXT}}` in execution agent prompts - [ ] The plan declares unit + e2e evidence by default, or records a justified exception with replacement evidence -- [ ] Task test commands collectively satisfy the resolved TDD contract +- [ ] Validation/test commands collectively satisfy the resolved TDD contract ## Directory Setup & Gitignore @@ -1079,12 +1300,12 @@ The plan document is a structured contract consumed by all downstream phases. He **`/workflows:architecture`** reads: - Problem Narrative, User Story, Success Criteria, and Architectural Context -- the WHY/WHERE contract it must preserve -- Implementation phases and tasks -- identifies the deepening candidates that need structural clarification +- Execution shape plus execution packets -- identifies the deepening candidates and boundaries that need structural clarification - Constitution Alignment / waivers / brainstorm decisions -- keeps architecture decisions inside approved project guardrails - **Must write**: a dedicated artifact in `docs/architecture/` plus an `architecture_ref` back into the plan **`/deepen-plan`** reads: -- Implementation phases and tasks -- enriches each with parallel research (best practices, performance, UI patterns) +- Execution shape plus execution packets -- enriches each with parallel research and splits, merges, or reshapes packets when the current mode is weak - Success criteria -- validates they are testable and complete - Architectural Context -- uses it to ground research in the right part of the system - `tdd` frontmatter and `## TDD & Evidence Contract` -- preserves the effective Ralph/default loop, evidence requirements, and any justified exceptions @@ -1092,12 +1313,12 @@ The plan document is a structured contract consumed by all downstream phases. He - **Must preserve**: Problem Narrative, User Story, and handoff contract unchanged **`/workflows:work`** reads: -- **Problem Narrative & User Story** -- the orchestrator uses these to validate task outcomes make sense in context, not just pass tests +- **Problem Narrative & User Story** -- the orchestrator uses these to validate slice outcomes make sense in context, not just pass tests - **Architectural Context** -- feeds directly into `{{ARCHITECTURAL_CONTEXT}}` in each execution agent's prompt. This is WHY grounded arch context matters -- every subagent gets system-level awareness - **`architecture_ref` / `docs/architecture/` artifact / explicit architecture handoff contract** -- feeds deletion-test decisions, interfaces as test surfaces, seams, adapters, and contracts into execution so subagents do not invent structure ad hoc - **`tdd` frontmatter + `## TDD & Evidence Contract`** -- plan-level values win; `inherit` falls back to `compound-engineering.local.md`; if neither exists, execution should assume Ralph-driven unit + e2e evidence -- **Implementation phases & tasks** -- the execution chunk structure (Files, Depends on, Success criteria, Test command) -- **Success Criteria** -- the orchestrator checks final outcomes against these, not just individual task passes +- **`execution_shape` + execution packets** -- tells the orchestrator whether to execute slices, infrastructure packets, or fix-batch items, and which fields each unit must respect +- **Success Criteria** -- the orchestrator checks final outcomes against these, not just individual unit passes - **`constitution_version` / `constitution_waivers` / Constitution Alignment** -- the execution phase enforces repo-wide guardrails and knows which exceptions were approved - **`brainstorm_ref`** -- if present, the orchestrator can read the original brainstorm for additional context @@ -1107,6 +1328,7 @@ The plan document is a structured contract consumed by all downstream phases. He - **Architectural Context** -- used to evaluate whether the implementation respects system boundaries and integration points - **`architecture_ref` / `docs/architecture/` artifact / explicit architecture handoff contract** -- supplies the architecture intent, deletion-test outcomes, interfaces, seams, adapters, and contracts that reviewers must verify or flag as drift - **`tdd` frontmatter + `## TDD & Evidence Contract`** -- review must verify the declared evidence exists and that any deviation from Ralph/unit+e2e is explicitly justified +- **`execution_shape` + execution packets** -- review uses the chosen mode to judge whether the work was decomposed honestly and executed completely - **Constitution Alignment and waivers** -- used to distinguish approved exceptions from blocking constitution violations - **Stakeholder Impact** (A LOT level) -- informs stakeholder-perspective review diff --git a/plugins/compound-engineering/commands/workflows/review.md b/plugins/compound-engineering/commands/workflows/review.md index db2d362..13698e7 100644 --- a/plugins/compound-engineering/commands/workflows/review.md +++ b/plugins/compound-engineering/commands/workflows/review.md @@ -141,7 +141,7 @@ This context is passed to EVERY review agent below. It is not optional. #### TDD Evidence Gate (BEFORE reviewer dispatch) -If a `docs/execution-sessions/work-*/STATE.md` file exists for this branch, read the completed task session files before dispatching review agents and build a terse evidence ledger. +If a `docs/execution-sessions/work-*/STATE.md` file exists for this branch, read the completed execution unit session files before dispatching review agents and build a terse evidence ledger. Apply `commands/workflows/references/tdd-evidence-contract.md` as the source of truth for the Ralph evidence block and review-gate classifications. Verify the plan's approved exception contract instead of improvising replacement evidence rules. @@ -519,8 +519,8 @@ After creating all todo files, present comprehensive summary: ### TDD Evidence Gate -- **Behavior coverage:** PASS / FAIL — [task/session refs with weak or missing `Red`/`Green` evidence] -- **Cleanup after refactor:** PASS / FAIL — [task/session refs with weak or missing `Post-Refactor Green` evidence] +- **Behavior coverage:** PASS / FAIL — [unit/session refs with weak or missing `Red`/`Green` evidence] +- **Cleanup after refactor:** PASS / FAIL — [unit/session refs with weak or missing `Post-Refactor Green` evidence] [If PARTIALLY or NO:] **Gaps:** diff --git a/plugins/compound-engineering/commands/workflows/work.md b/plugins/compound-engineering/commands/workflows/work.md index 217e4ac..270e790 100644 --- a/plugins/compound-engineering/commands/workflows/work.md +++ b/plugins/compound-engineering/commands/workflows/work.md @@ -12,17 +12,17 @@ Execute a work plan while maintaining WHY tracing from problem narrative through ## Introduction -This command takes a work document (plan, specification, or todo file) and executes it systematically using a **subagent orchestration model**. The orchestrator (this conversation) decomposes the plan into scoped chunks and delegates each to a focused subagent. Each subagent follows a standardized 4-phase protocol (understand, implement, self-review, report) defined in the execution agent prompt template. +This command takes a work document (plan, specification, or todo file) and executes it systematically using a **subagent orchestration model**. The orchestrator (this conversation) loads or adapts the plan into execution units and delegates each unit to a focused subagent. `vertical-slices` is the default execution shape, but `infra-track` and `fix-batch` are also valid when declared by the plan. Each subagent follows a standardized 4-phase protocol (understand, implement, self-review, report) defined in the execution agent prompt template. -**WHY-grounded execution:** Every subagent receives the plan's WHY context -- the problem narrative, user story, architectural context, the architecture handoff contract, and which success criterion their specific task serves. This prevents implementation drift where technically correct code fails to deliver the user's actual need. The orchestrator is the guardian of WHY: it extracts purpose from the plan, threads it through every task prompt, and validates that the combined output delivers the stated user story. +**WHY-grounded execution:** Every subagent receives the plan's WHY context -- the problem narrative, user story, architectural context, the architecture handoff contract, and which success criterion their specific unit serves. This prevents implementation drift where technically correct code fails to deliver the user's actual need. The orchestrator is the guardian of WHY: it extracts purpose from the plan, threads it through every unit prompt, and validates that the combined output delivers the stated user story. ### Review Mode This command supports a `--review-mode` argument that controls when code review happens: -- **`bulk`** (default) -- Review happens after ALL tasks complete, using `/workflows:review`. This is the standard behavior and is fastest for most work. -- **`inline`** -- After each task, a lightweight two-stage review (spec compliance then code quality) runs automatically. Catches spec drift early but adds 2-4 extra subagent calls per task. -- **`both`** -- Inline review per task AND comprehensive `/workflows:review` at the end. Maximum quality assurance. +- **`bulk`** (default) -- Review happens after ALL units complete, using `/workflows:review`. This is the standard behavior and is fastest for most work. +- **`inline`** -- After each unit, a lightweight two-stage review (spec compliance then code quality) runs automatically. Catches spec drift early but adds 2-4 extra subagent calls per unit. +- **`both`** -- Inline review per unit AND comprehensive `/workflows:review` at the end. Maximum quality assurance. If no `--review-mode` is specified, check `compound-engineering.local.md` for a `review_mode` setting. If not found there either, default to `bulk`. @@ -36,15 +36,16 @@ If no `--review-mode` is specified, check `compound-engineering.local.md` for a 1. **Read Plan and Extract WHY + Guardrail Context** - - Read the work document completely - - **Extract WHY artifacts** from the plan (these ground everything that follows): - - **Problem Narrative** -- why this work exists, what pain it solves - - **User Story** -- who benefits and what outcome they get - - **Architectural Context** -- how the solution fits in the system - - **Success Criteria** -- measurable conditions that define "done" - - **Phase-to-story tracing** -- each phase's "Serves:" line showing what user story aspect it delivers - - **Constitution alignment** -- relevant principles, required approvals, and any approved waivers - - **`tdd` frontmatter + `## TDD & Evidence Contract`** -- resolve the effective TDD contract using `commands/workflows/references/tdd-evidence-contract.md` (plan overrides local, `inherit` falls back, and no-local-config defaults to Ralph-driven `red-green-refactor` with unit + e2e evidence required) + - Read the work document completely + - **Extract WHY artifacts** from the plan (these ground everything that follows): + - **Problem Narrative** -- why this work exists, what pain it solves + - **User Story** -- who benefits and what outcome they get + - **Architectural Context** -- how the solution fits in the system + - **Success Criteria** -- measurable conditions that define "done" + - **Execution shape** -- resolve it using `commands/workflows/references/execution-shape.md` + - **Unit tracing** -- each packet's `Serves`, `Consumers`, or equivalent purpose line showing what outcome it delivers or unlocks + - **Constitution alignment** -- relevant principles, required approvals, and any approved waivers + - **`tdd` frontmatter + `## TDD & Evidence Contract`** -- resolve the effective TDD contract using `commands/workflows/references/tdd-evidence-contract.md` (plan overrides local, `inherit` falls back, and no-local-config defaults to Ralph-driven `red-green-refactor` with unit + e2e evidence required) - Check for `handoff:` frontmatter in the plan. If present, verify all flags are `true` (problem_narrative, user_story, architectural_context, success_criteria). If any are `false`, warn the user that WHY context is incomplete and suggest running `/workflows:brainstorm` or `/workflows:plan` first. - If the resolved contract weakens Ralph/unit+e2e without a justified exception in the plan, stop and ask for the plan contract to be corrected before execution - If `docs/constitution.md` exists, read it and extract the active constitution version, applicable principles, execution baselines, and approval rules. If the plan lists `constitution_waivers`, honor only those explicit exceptions. @@ -53,7 +54,8 @@ If no `--review-mode` is specified, check `compound-engineering.local.md` for a - If no architecture artifact is recorded, assemble an explicit architecture handoff contract from the plan's Architectural Context, Key Decisions, Constitution Alignment, brainstorm context, and execution constraints. Tell the user this is a fallback and recommend `/workflows:architecture` if boundaries are still unsettled. - Review any other references or links provided in the plan - If the constitution requires explicit approval for any part of the planned work (for example, risky writes, schema changes, auth changes, or scope expansions), surface that before execution starts - - If anything is unclear or ambiguous, ask clarifying questions now + - If the document is not already in a declared execution shape, treat it as **legacy input**. Before spawning subagents, adapt it into execution units in STATE.md. If that adaptation materially changes scope or ordering, ask the user to approve the adapted unit backlog before proceeding. + - If anything is unclear or ambiguous, ask clarifying questions now - Get user approval to proceed - **Do not skip this** - better to ask questions now than build the wrong thing @@ -101,27 +103,27 @@ If no `--review-mode` is specified, check `compound-engineering.local.md` for a - You want to keep the default branch clean while experimenting - You plan to switch between branches frequently -3. **Preview Task Breakdown** - - Mentally identify the major tasks from the plan +3. **Preview Unit Breakdown** + - Mentally identify the major execution units from the plan - Note any questions about dependencies or scope - - The formal task decomposition happens in Phase 2 Step 4 (STATE.md), which is the persistent record of progress + - The formal unit decomposition happens in Phase 2 Step 4 (STATE.md), which is the persistent record of progress - TodoWrite can be used for in-conversation progress tracking if helpful, but STATE.md is the source of truth ### Phase 2: Orchestrated Execution -Phase 2 is where the orchestrator (this conversation) decomposes the plan into scoped chunks and delegates each to a focused subagent. The orchestrator does NOT implement code itself -- it decomposes, delegates, records, and routes. +Phase 2 is where the orchestrator (this conversation) resolves the plan's execution shape, decomposes the work into execution units, and delegates each to a focused subagent. The orchestrator does NOT implement code itself -- it decomposes, delegates, records, and routes. #### Step 1: Validate Plan Readiness -Before executing, validate four things: **structural readiness** (tasks are granular and testable), **WHY readiness** (the plan carries purpose context), **TDD readiness** (the execution contract is explicit and enforceable), and **guardrail readiness** (repo-wide rules are visible and actionable). +Before executing, validate four things: **structural readiness** (the selected execution shape is honest and its units are testable), **WHY readiness** (the plan carries purpose context), **TDD readiness** (the execution contract is explicit and enforceable), and **guardrail readiness** (repo-wide rules are visible and actionable). -**Structural readiness** -- each implementation task should have: +**Structural readiness** -- first resolve `execution_shape` using `commands/workflows/references/execution-shape.md`, then verify the units for that mode: -- **Task description** -- what needs to be done -- **Files to create/modify** -- specific file paths -- **Success criteria** -- checkboxes that define "done" -- **Test command** -- how to verify the task works -- **Dependencies** -- which other tasks must complete first +- **`vertical-slices`** -- slice type, serves, demo scenario, scope fence, files, success criteria, validation command, dependencies, dependency type +- **`infra-track`** -- capability enabled, consumers / downstream work unlocked, scope, files, risk / rollback, success criteria, validation command, dependencies +- **`fix-batch`** -- problem, repro / expected outcome, files, success criteria, validation command, dependencies +- **Default rule** -- if `execution_shape` is missing, assume `vertical-slices` +- **Anti-coercion rule** -- do not force infra or fix-batch work into slices if that would create fake verticality **Guardrail readiness** -- when the project has `docs/constitution.md`, the plan should make repo-wide rules visible: @@ -136,25 +138,25 @@ Before executing, validate four things: **structural readiness** (tasks are gran - **`## TDD & Evidence Contract` present** -- states the resolved execution path in plain language - **Effective mode resolved** -- Ralph-driven by default unless the plan explicitly approves a standard-mode exception - **Required evidence resolved** -- unit + e2e by default, or justified replacement evidence when explicitly waived -- **Report contract visible** -- Ralph-driven tasks must emit stable red, green, and post-refactor green evidence blocks +- **Report contract visible** -- Ralph-driven units must emit stable red, green, and post-refactor green evidence blocks **WHY readiness** -- the plan should have: - **Problem Narrative** -- present and non-empty - **User Story** -- present with clear "As a... I want... So that..." - **Architectural Context** -- present, describing system fit -- **Success Criteria** -- present at plan level (not just task level) -- **Phase tracing** -- each phase has a "Serves:" line connecting it to the user story +- **Success Criteria** -- present at plan level (not just unit level) +- **Unit tracing** -- each execution unit has a purpose line connecting it to the user story or explicit enabling outcome -If the plan lacks structural details, or if no architecture artifact / handoff contract can explain the boundaries, refuse to proceed and suggest running `/workflows:architecture` first if the boundaries are still fuzzy, then `/deepen-plan`, or manually breaking down the plan. +If the plan lacks structural details, or if no architecture artifact / handoff contract can explain the boundaries, refuse to proceed and suggest running `/workflows:architecture` first if the boundaries are still fuzzy, then `/deepen-plan`, or manually breaking down the plan into execution units. If the plan lacks the `tdd` block or `## TDD & Evidence Contract`, or if the resolved contract is ambiguous, refuse to proceed and suggest `/workflows:plan` or `/deepen-plan` to repair the execution contract before spawning subagents. If the plan lacks WHY artifacts, the orchestrator should **construct minimal WHY context** before proceeding: 1. Ask the user: "This plan doesn't include a problem narrative or user story. In one sentence, what problem are we solving and for whom?" -2. Infer success criteria from the task-level criteria +2. Infer success criteria from the unit-level criteria 3. Infer architectural context from the file paths and technologies mentioned -4. Record these in STATE.md (see Step 3) so they're available for all tasks +4. Record these in STATE.md (see Step 3) so they're available for all units #### Step 2: Check for Resumable Session @@ -167,7 +169,7 @@ ls docs/execution-sessions/work-*/STATE.md 2>/dev/null If a previous session exists for the same plan file and has `status: in_progress`: - Ask the user: "Found incomplete session `[session_id]` for this plan. Resume where you left off, or start fresh?" -- **If resume**: Read STATE.md, load the WHY Context section plus the Architecture Handoff section, skip completed tasks, load the learnings brief, and continue from `current_task` +- **If resume**: Read STATE.md, load the WHY Context section plus the Architecture Handoff section, skip completed units, load the learnings brief, and continue from `current_unit` - **If fresh**: Archive the old session directory (rename with `-archived` suffix), then start a new session If no resumable session exists, proceed to Step 3. @@ -189,8 +191,9 @@ plan_file: [path to plan] brainstorm_ref: [path to brainstorm, if available] started: [ISO timestamp] status: in_progress -current_task: 0 -total_tasks: [count] +execution_shape: [vertical-slices | infra-track | fix-batch] +current_unit: 0 +total_units: [count] session_id: [SESSION_ID] --- @@ -225,57 +228,68 @@ session_id: [SESSION_ID] - Seams / adapters / contracts: [boundaries this execution must honor] - Review guidance: [what `/workflows:review` must verify later] -## Task Status -| # | Task | Serves | Status | Attempts | Session File | -|---|------|--------|--------|----------|--------------| -| 1 | [task name] | [which user story aspect] | pending | -- | -- | -| 2 | [task name] | [which user story aspect] | pending | -- | -- | +## Work Status +| # | Unit | Kind | Serves / Unlocks | Status | Attempts | Session File | +|---|------|------|------------------|--------|----------|--------------| +| 1 | [unit title] | tracer-bullet | [which user story aspect or enabling outcome] | pending | -- | -- | +| 2 | [unit title] | expansion | [which user story aspect or enabling outcome] | pending | -- | -- | ... ## Learnings Brief _No learnings yet._ ``` -#### Step 4: Decompose Plan into Execution Chunks +#### Step 4: Load or Adapt Execution Units -The orchestrator parses the plan and creates a list of execution chunks. Each chunk is a self-contained unit of work. The orchestrator does the heavy lifting here: +The orchestrator parses the plan and creates a list of execution units. Each unit is a self-contained packet of work defined by the selected execution shape. The orchestrator does the heavy lifting here: -- **Break large phases** into smaller tasks if needed (each task should be completable in one subagent session) -- **Preserve WHY tracing** -- when splitting a phase, each resulting task inherits the parent phase's "Serves:" line. Never create an orphan task with no connection to the user story. -- **Identify file dependencies** between tasks (Task B modifies a file created by Task A) -- **Determine parallelizable tasks** -- tasks with non-overlapping file sets can run simultaneously -- **Ensure each chunk has clear success criteria** -- if the plan already defines them, use them directly; otherwise, the orchestrator must create them -- **Map each task to its purpose** -- record which success criterion or user story aspect each task delivers (this goes in STATE.md's "Serves" column) +- **Prefer plan-defined units directly** -- if the plan already declares a coherent execution shape, execute those packets as written +- **Adapt legacy phase/task plans into units before coding** -- do not execute raw task lists directly once the shape contract is available +- **Break oversized units** into smaller units if needed (each unit should be completable in one subagent session) +- **Preserve WHY tracing** -- when splitting a unit, each resulting unit inherits or refines the parent unit's purpose line. Never create an orphan unit with no connection to the user story. +- **Identify file dependencies** between units +- **Determine parallelizable units** -- only units with non-overlapping file sets and compatible dependencies can run simultaneously +- **Ensure each unit has clear success criteria** -- if the plan already defines them, use them directly; otherwise, the orchestrator must create them +- **Map each unit to its purpose** -- record which success criterion or enabling outcome each unit delivers (this goes in STATE.md's "Serves / Unlocks" column) -If the plan already has well-defined tasks with success criteria, use them directly. If not, the orchestrator must create them before proceeding. +Mode-specific rules: +- **`vertical-slices`** -- execute slices directly; keep the first unit a tracer bullet +- **`infra-track`** -- execute infrastructure work packets directly; do not coerce them into fake slices +- **`fix-batch`** -- execute fix items directly; keep each one narrow and independently verifiable -#### Step 5: Execute Task Loop +If the plan already has well-defined units with success criteria, use them directly. If not, the orchestrator must create them before proceeding. -For each task (or parallel batch of tasks), follow this cycle: +#### Step 5: Execute Unit Loop + +For each unit (or parallel batch of units), follow this cycle: ##### a. Build Scoped Prompt -For each task, the orchestrator constructs a focused prompt by loading the **execution agent prompt template** from `commands/workflows/references/execution-agent-prompt.md` and filling in the context blocks. +For each unit, the orchestrator constructs a focused prompt by loading the **execution agent prompt template** from `commands/workflows/references/execution-agent-prompt.md` and filling in the context blocks. Before building `scoped_prompt`, apply the shared `Reference Template Loading` protocol in `commands/workflows/references/orchestration-protocol.md` to `execution-agent-prompt.md`. Fill the placeholders from the loaded template and do not reconstruct the prompt from memory. -- **{{TASK_NAME}}** and **{{TASK_DESCRIPTION}}** -- from the plan +- **{{UNIT_TITLE}}** and **{{UNIT_DESCRIPTION}}** -- from the plan +- **{{UNIT_KIND}}** -- from the plan (`tracer-bullet`, `infra-packet`, `fix-item`, etc.) +- **{{OUTCOME_SCENARIO}}** -- the observable behavior or enabling outcome this unit proves +- **{{UNIT_SCOPE}}** -- what the unit owns and excludes +- **{{UNIT_SCOPE_FENCE}}** -- the boundary that keeps the unit thin - **{{FILE_LIST}}** -- files to create/modify from the plan - **{{SUCCESS_CRITERIA}}** -- checkboxes that define "done" -- **{{TEST_COMMAND}}** -- how to verify the task works -- **{{COMPLETED_DEPENDENCIES}}** -- list of already-completed tasks this depends on +- **{{VALIDATION_COMMAND}}** -- how to verify the unit works +- **{{COMPLETED_DEPENDENCIES}}** -- list of already-completed units this depends on - **{{WHY_CONTEXT}}** -- the purpose grounding block (constructed by orchestrator): ``` - ## Why This Task Exists + ## Why This Unit Exists **Problem:** [problem narrative from plan -- 1-2 sentences] **User Story:** [user story from plan] - **This task serves:** [the "Serves:" line from this task's parent phase -- which user story aspect or success criterion this delivers] + **This unit serves:** [the packet purpose line from this unit -- which user story aspect, success criterion, or enabling outcome this delivers] **Overall success criteria:** [plan-level success criteria list] **Guardrails:** [relevant constitution principles, approval rules, and approved waivers] ``` -- **{{ARCHITECTURAL_CONTEXT}}** -- from the plan's Architectural Context section, filtered to what's relevant for this task's files and domain -- **{{ARCHITECTURE_HANDOFF}}** -- from the `docs/architecture/` artifact or explicit plan-derived handoff contract; include deletion-test decisions, interfaces as test surfaces, seams, adapters, contracts, and downstream review guidance relevant to this task -- **{{LEARNINGS_BRIEF}}** -- from previous tasks, filtered by domain relevance (only include backend learnings for backend tasks, frontend learnings for frontend tasks, etc.) +- **{{ARCHITECTURAL_CONTEXT}}** -- from the plan's Architectural Context section, filtered to what's relevant for this unit's files and domain +- **{{ARCHITECTURE_HANDOFF}}** -- from the `docs/architecture/` artifact or explicit plan-derived handoff contract; include deletion-test decisions, interfaces as test surfaces, seams, adapters, contracts, and downstream review guidance relevant to this unit +- **{{LEARNINGS_BRIEF}}** -- from previous units, filtered by domain relevance - **{{PROJECT_CONVENTIONS}}** -- from CLAUDE.md plus relevant constitution baselines - **{{TDD_CONTRACT}}** -- the resolved execution contract: effective mode, Ralph/default loop, required unit/e2e evidence, and any explicit exceptions - **{{TDD_SECTION}}** -- if the resolved effective mode is Ralph-driven, include the Ralph/TDD Implementation Section from the template; otherwise include the Standard Implementation Section. Do not treat Ralph as an adjacent side command when it is the resolved default. @@ -288,7 +302,7 @@ The execution agent template instructs each subagent to follow a 4-phase protoco ##### b. Spawn Subagent -Delegate the task to a focused subagent: +Delegate the unit to a focused subagent: ``` Task(general-purpose, prompt=scoped_prompt) @@ -310,23 +324,23 @@ The subagent prompt is constructed from the loaded execution agent template (`co - Final test results (pass/fail) - Attempt count -**For parallel tasks**: Spawn multiple subagents simultaneously. Only parallelize tasks with non-overlapping file sets. Before parallelizing, verify file sets do not overlap. +**For parallel units**: Spawn multiple subagents simultaneously only when their file sets do not overlap and their dependency types allow parallel work. Before parallelizing, verify file sets do not overlap and no unit claims shared mutable state without an explicit guard. **Example scoped prompt:** ``` -You are implementing Task 3 of a feature plan. Here is your scoped context: +You are implementing Unit 3 of a feature plan. Here is your scoped context: -## Why This Task Exists +## Why This Unit Exists **Problem:** Users currently cannot authenticate, forcing manual session management that's error-prone and insecure. **User Story:** As a user, I want to log in with my credentials so that I can access my personalized dashboard securely. -**This task serves:** "Secure authentication flow" -- implementing the core token generation that enables the login experience. +**This unit serves:** "Secure authentication flow" -- implementing the first thin end-to-end login path. **Overall success criteria:** - Users can log in and receive a JWT token - Invalid credentials are rejected with clear error messages - Tokens expire after the configured TTL -## Task +## Unit Create the UserAuthService with JWT token generation and validation. ## Files to Create/Modify @@ -340,11 +354,11 @@ Create the UserAuthService with JWT token generation and validation. - [ ] authenticate() throws AuthenticationError for invalid credentials - [ ] Token validation works for valid and expired tokens -## Test Command +## Validation Command npm test -- --filter UserAuthService ## Architectural Context -JWT-based stateless auth. Tokens issued by UserAuthService, validated by middleware (Task 4). No server-side session storage. +JWT-based stateless auth. Tokens issued by UserAuthService, validated by middleware (Unit 4). No server-side session storage. ## TDD Execution Contract - Effective mode: Ralph-driven TDD @@ -357,7 +371,7 @@ JWT-based stateless auth. Tokens issued by UserAuthService, validated by middlew - Variables are camelCase - Type annotations on all parameters and return types -## Learnings from Previous Tasks +## Learnings from Previous Units - [backend] Use jest.mock() for module mocking - [backend] Factory pattern: createUser() helper not new User() - [testing] Use expect().toThrow() for error assertions @@ -375,15 +389,16 @@ JWT-based stateless auth. Tokens issued by UserAuthService, validated by middlew When the subagent returns, the orchestrator processes the results: -**0. Validate the execution contract evidence** -- audit the report against `commands/workflows/references/tdd-evidence-contract.md`. If a Ralph-driven task is missing stable `Red`, `Green`, and `Post-Refactor Green` evidence blocks, treat the report as incomplete and send it back for correction before marking the task complete. +**0. Validate the execution contract evidence** -- audit the report against `commands/workflows/references/tdd-evidence-contract.md`. If a Ralph-driven unit is missing stable `Red`, `Green`, and `Post-Refactor Green` evidence blocks, treat the report as incomplete and send it back for correction before marking the unit complete. -**1. Write session file** to `docs/execution-sessions/${SESSION_ID}/task-{nn}-{slug}.md`: +**1. Write session file** to `docs/execution-sessions/${SESSION_ID}/unit-{nn}-{slug}.md`: ```markdown --- -task: "[task name]" -task_number: [n] -serves: "[which user story aspect / success criterion this task delivers]" +unit: "[unit title]" +unit_number: [n] +unit_kind: [tracer-bullet|expansion|hardening|infra-packet|fix-item] +serves: "[which user story aspect / success criterion / enabling outcome this unit delivers]" status: [completed|failed] attempt_count: [n] domains: [backend, frontend, testing, database, etc.] @@ -420,22 +435,22 @@ session_id: [SESSION_ID] **2. Inline Review (when `--review-mode inline` or `--review-mode both`)** - If the `--review-mode` argument is `inline` or `both`, perform a two-stage inline review before proceeding to the next task. If `--review-mode` is `bulk` (the default), skip this step. +If the `--review-mode` argument is `inline` or `both`, perform a two-stage inline review before proceeding to the next unit. If `--review-mode` is `bulk` (the default), skip this step. **Stage 1: Spec Compliance Review** Apply the shared `Reference Template Loading` protocol from `commands/workflows/references/orchestration-protocol.md`, substituting `spec-review-prompt.md`. If the template cannot be loaded and quoted, stop the inline review loop and report the missing template instead of improvising. Then fill in: - - `{{TASK_REQUIREMENTS}}` -- the task description and success criteria + - `{{UNIT_REQUIREMENTS}}` -- the unit description, outcome scenario, scope fence, and success criteria - `{{SUCCESS_CRITERIA}}` -- the success criteria checkboxes - `{{IMPLEMENTER_REPORT}}` -- the execution report from the subagent - - `{{TASK_SERVES}}` -- what user story aspect this task delivers (from the task's "Serves:" line) + - `{{UNIT_PURPOSE}}` -- what user story aspect or enabling outcome this unit delivers (from the unit's purpose line) Spawn a spec reviewer subagent: ``` Task(general-purpose, prompt=filled_spec_review_prompt) ``` - The spec reviewer should check not just checkbox compliance but whether the implementation actually delivers on the purpose stated in "Serves:". A task can pass all checkboxes but miss the intent. + The spec reviewer should check not just checkbox compliance but whether the implementation actually delivers on the recorded purpose. A unit can pass all checkboxes but miss the intent. - If **PASS**: proceed to Stage 2 - If **FAIL**: spawn a new execution subagent with the specific issues to fix, then re-run the spec reviewer (max 2 fix-review cycles). If still failing after 2 cycles, log the issues and ask the user how to proceed. @@ -453,26 +468,26 @@ session_id: [SESSION_ID] - If **PASS**: proceed to next steps - If **FAIL** with Critical issues: spawn fix subagent, re-review (max 2 cycles) - - If **FAIL** with only Important/Minor issues: log them for the orchestrator's attention but proceed to next task (these will also be caught by `/workflows:review` if run later) + - If **FAIL** with only Important/Minor issues: log them for the orchestrator's attention but proceed to next unit (these will also be caught by `/workflows:review` if run later) - **Note:** Inline review is a lightweight per-task check. It does NOT replace the comprehensive `/workflows:review` multi-agent review. When `--review-mode both` is active, inline review runs per-task AND `/workflows:review` runs after all tasks complete. + **Note:** Inline review is a lightweight per-unit check. It does NOT replace the comprehensive `/workflows:review` multi-agent review. When `--review-mode both` is active, inline review runs per-unit AND `/workflows:review` runs after all units complete. -**3. Update STATE.md** -- mark the task status, increment `current_task`, update the task status table +**3. Update STATE.md** -- mark the unit status, increment `current_unit`, update the work status table -**4. Update learnings brief** -- add new learnings from this task, tagged by domain, deduplicated against existing learnings +**4. Update learnings brief** -- add new learnings from this unit, tagged by domain, deduplicated against existing learnings **5. Update plan file** -- check off completed items (`[ ]` to `[x]`) in the original plan document -**6. Regression guard** -- run test commands from ALL previously completed tasks. If any regress: - - Log the regression in the current task's session file +**6. Regression guard** -- run validation commands from ALL previously completed units. If any regress: + - Log the regression in the current unit's session file - Spawn a fix subagent with context about what broke and why - - Do not proceed to the next task until the regression is fixed + - Do not proceed to the next unit until the regression is fixed **7. Incremental commit** if appropriate (logical unit complete, tests pass): | Commit when... | Don't commit when... | |----------------|---------------------| - | Logical unit complete (model, service, component) | Small part of a larger unit | + | Logical unit complete (one observable outcome) | Small part of a larger unit | | Tests pass + meaningful progress | Tests failing | | About to switch contexts (backend to frontend) | Purely scaffolding with no behavior | | About to attempt risky/uncertain changes | Would need a "WIP" commit message | @@ -493,10 +508,10 @@ session_id: [SESSION_ID] If a subagent fails after its internal retries: -1. **Reframe**: Can the task be broken down differently? Try spawning a new subagent with a different approach or smaller scope. -2. **Ask user**: Use AskUserQuestion -- "Task [name] failed after 3 attempts. [error summary]. How should I proceed?" - - Options: "Retry with different approach", "Skip and continue", "Stop pipeline", "I'll fix it manually" -3. **Skip and continue**: Mark task as `skipped` in STATE.md. Note it as a blocker for any dependent tasks. Dependent tasks are also skipped automatically. +1. **Reframe**: Can the unit be broken down differently? Try spawning a new subagent with a different approach or smaller scope. +2. **Ask user**: Use AskUserQuestion -- "Unit [name] failed after 3 attempts. [error summary]. How should I proceed?" + - Options: "Retry with different approach", "Skip and continue", "Stop pipeline", "I'll fix it manually" +3. **Skip and continue**: Mark the unit as `skipped` in STATE.md. Note it as a blocker for any dependent units. Dependent units are also skipped automatically. 4. **Stop pipeline**: Save all state to STATE.md with `status: paused`, present a summary of what was completed and what remains. ### Phase 3: Quality Check @@ -517,11 +532,11 @@ If a subagent fails after its internal retries: Before mechanical quality checks, validate that the combined work delivers on the WHY: - - **User story delivered?** -- Review the user story from STATE.md. Can a user actually achieve the stated outcome with what was built? If any success criterion is unmet or any task was skipped, note the gap. - - **Architectural integrity?** -- Does the implementation match the architectural context from the plan? Flag any deviations (e.g., plan said "stateless JWT" but implementation uses server sessions). - - **Constitution honored?** -- Does the implementation respect the constitution baselines and approval rules captured in STATE.md? Flag any unwaived violations. - - **Ralph evidence complete?** -- For Ralph-driven tasks, does every session file include Red, Green, and Post-Refactor Green evidence aligned to the resolved unit/e2e contract or an explicitly approved exception? - - **No orphan code** -- Is there any implemented code that doesn't trace back to the user story or success criteria? This may indicate scope creep during execution. + - **User story delivered?** -- Review the user story from STATE.md. Can a user actually achieve the stated outcome with what was built? If any success criterion is unmet or any unit was skipped, note the gap. + - **Architectural integrity?** -- Does the implementation match the architectural context from the plan? Flag any deviations (e.g., plan said "stateless JWT" but implementation uses server sessions). + - **Constitution honored?** -- Does the implementation respect the constitution baselines and approval rules captured in STATE.md? Flag any unwaived violations. + - **Ralph evidence complete?** -- For Ralph-driven units, does every session file include Red, Green, and Post-Refactor Green evidence aligned to the resolved unit/e2e contract or an explicitly approved exception? + - **No orphan code** -- Is there any implemented code that doesn't trace back to the user story or success criteria? This may indicate scope creep during execution. If purpose validation reveals gaps, present them to the user before proceeding to PR. @@ -534,8 +549,8 @@ If a subagent fails after its internal retries: Run configured agents in parallel with Task tool. **Pass the WHY context (problem narrative, user story, success criteria) to reviewer agents** so they can evaluate fitness for purpose, not just code quality. Present findings and address critical issues. 4. **Final Validation** - - All tasks in STATE.md marked `completed` (or explicitly `skipped` with user approval) - - All tests pass (including regression tests from every completed task) + - All units in STATE.md marked `completed` (or explicitly `skipped` with user approval) + - All tests pass (including regression tests from every completed unit) - Linting passes - Code follows existing patterns - Purpose validation passed (user story deliverable, architecture intact) @@ -630,8 +645,8 @@ If the `finishing-branch` skill is not available, follow the manual steps below: - **Key decisions made:** [architectural or design choices] ## Success Criteria Status - - [x] [criterion 1 from plan] -- delivered by Task N - - [x] [criterion 2 from plan] -- delivered by Task N + - [x] [criterion 1 from plan] -- delivered by Unit N + - [x] [criterion 2 from plan] -- delivered by Unit N - [ ] [criterion 3 if skipped] -- skipped: [reason] ## Testing @@ -690,7 +705,7 @@ If the `finishing-branch` skill is not available, follow the manual steps below: 5. **Notify User** - Summarize what was completed - Link to PR - - Highlight any tasks that were skipped and why + - Highlight any units that were skipped and why - Reference the execution session directory for detailed logs - Note any follow-up work needed - Suggest next steps if applicable @@ -705,7 +720,7 @@ For complex plans with multiple independent workstreams, enable swarm mode for p | Use Swarm Mode when... | Use Standard Mode when... | |------------------------|---------------------------| -| Plan has 5+ independent tasks | Plan is linear/sequential | +| Plan has 5+ independent units | Plan is linear/sequential | | Multiple specialists needed (review + test + implement) | Single-focus work | | Want maximum parallelism | Simpler mental model preferred | | Large feature with clear phases | Small feature or bug fix | @@ -714,7 +729,7 @@ For complex plans with multiple independent workstreams, enable swarm mode for p To trigger swarm execution, say: -> "Make a Task list and launch an army of agent swarm subagents to build the plan" +> "Make a unit list and launch an army of agent swarm subagents to build the plan" Or explicitly request: "Use swarm mode for this work" @@ -727,10 +742,10 @@ When swarm mode is enabled, the workflow changes: Teammate({ operation: "spawnTeam", team_name: "work-{timestamp}" }) ``` -2. **Create Task List with Dependencies** - - Parse plan into TaskCreate items +2. **Create Unit List with Dependencies** + - Parse plan into execution work items - Set up blockedBy relationships for sequential dependencies - - Independent tasks have no blockers (can run in parallel) + - Independent units have no blockers (can run in parallel) 3. **Spawn Specialized Teammates** ``` @@ -738,7 +753,7 @@ When swarm mode is enabled, the workflow changes: team_name: "work-{timestamp}", name: "implementer", subagent_type: "general-purpose", - prompt: "Claim implementation tasks, execute, mark complete", + prompt: "Claim implementation units, execute, mark complete", run_in_background: true }) @@ -746,13 +761,13 @@ When swarm mode is enabled, the workflow changes: team_name: "work-{timestamp}", name: "tester", subagent_type: "general-purpose", - prompt: "Claim testing tasks, run tests, mark complete", + prompt: "Claim testing units, run tests, mark complete", run_in_background: true }) ``` 4. **Coordinate and Monitor** - - Team lead monitors task completion + - Team lead monitors unit completion - Spawn additional workers as phases unblock - Handle plan approval if required @@ -773,7 +788,7 @@ See the `orchestrating-swarms` skill for detailed swarm patterns and best practi ### WHY Grounds Everything -- Every subagent knows why its task exists, not just what to build +- Every subagent knows why its unit exists, not just what to build - The orchestrator is the guardian of WHY: it extracts, threads, and validates purpose - Purpose drift is caught by inline reviews and Phase 3 validation, not just at the end - If the combined work doesn't deliver the user story, passing tests don't matter @@ -782,7 +797,7 @@ See the `orchestrating-swarms` skill for detailed swarm patterns and best practi - The orchestrator decomposes, delegates, records, and routes. It does NOT implement code itself. - Each subagent gets only the context it needs. No conversation history pollution. -- Learnings compound: each task benefits from everything learned in previous tasks. +- Learnings compound: each unit benefits from everything learned in previous units. ### Start Fast, Execute Faster @@ -812,7 +827,7 @@ See the `orchestrating-swarms` skill for detailed swarm patterns and best practi ### Ship Complete Features -- Mark all tasks completed before moving on +- Mark all units completed before moving on - Don't leave features 80% done - A finished feature that ships beats a perfect feature that doesn't @@ -820,7 +835,7 @@ See the `orchestrating-swarms` skill for detailed swarm patterns and best practi - Escalation path (reframe, ask user, skip, stop) -- not infinite loops - Progress is persistent: STATE.md means you can resume after crashes -- Regression is caught early: previous tests re-run after each task +- Regression is caught early: previous tests re-run after each unit - When debugging unexpected errors, use the `systematic-debugging` skill for structured root-cause analysis instead of trial-and-error ## Quality Checklist @@ -828,12 +843,12 @@ See the `orchestrating-swarms` skill for detailed swarm patterns and best practi Before creating PR, verify: - [ ] All clarifying questions asked and answered -- [ ] All tasks in STATE.md marked completed (or explicitly skipped with user approval) +- [ ] All units in STATE.md marked completed (or explicitly skipped with user approval) - [ ] **User story deliverable** -- the combined work enables the stated user outcome - [ ] **Success criteria met** -- every plan-level success criterion addressed (or gap documented) - [ ] **Architecture intact** -- implementation matches the plan's architectural context - [ ] Tests pass (run project's test command) -- [ ] Regression tests from all completed tasks pass +- [ ] Regression tests from all completed units pass - [ ] Linting passes (use linting-agent) - [ ] Code follows existing patterns - [ ] Figma designs match implementation (if applicable) @@ -859,14 +874,14 @@ For most features: tests + linting + following patterns is sufficient. ## Common Pitfalls to Avoid - **Losing the WHY** - Subagents build what's specified but miss the intent. Always pass WHY context. -- **Purpose drift** - Tasks individually pass but combined output doesn't deliver the user story. Validate at Phase 3. +- **Purpose drift** - Units individually pass but combined output doesn't deliver the user story. Validate at Phase 3. - **Analysis paralysis** - Don't overthink, read the plan and execute - **Skipping clarifying questions** - Ask now, not after building wrong thing - **Ignoring plan references** - The plan has links for a reason - **Testing at the end** - Test continuously or suffer later - **Orchestrator doing implementation** - Delegate to subagents, don't implement inline -- **Skipping regression checks** - A passing task that breaks previous work is not progress -- **Losing session state** - Always write to STATE.md before and after each task +- **Skipping regression checks** - A passing unit that breaks previous work is not progress +- **Losing session state** - Always write to STATE.md before and after each unit - **Dumping all session files into subagent context** - Use the learnings brief, filtered by domain - **Over-reviewing simple changes** - Save reviewer agents for complex work - **80% done syndrome** - Finish the feature, don't move on early diff --git a/plugins/compound-engineering/skills/brainstorming/SKILL.md b/plugins/compound-engineering/skills/brainstorming/SKILL.md index 8d5b970..d86a478 100644 --- a/plugins/compound-engineering/skills/brainstorming/SKILL.md +++ b/plugins/compound-engineering/skills/brainstorming/SKILL.md @@ -11,7 +11,7 @@ This skill provides detailed process knowledge for effective brainstorming sessi The brainstorm produces three lynchpin artifacts that anchor all downstream phases: 1. **Problem Narrative & User Story** -- the WHY (consumed by plan, work, and review) 2. **Architectural Context Map** -- the WHERE (consumed by execution agents and reviewers) -3. **Design Decisions** -- the WHAT (consumed by plan for task decomposition) +3. **Design Decisions** -- the WHAT (consumed by plan for execution-slice decomposition) ## When to Use This Skill @@ -306,18 +306,18 @@ This prevents wasted effort on misaligned designs. The brainstorm document is the **feature-level spec and handoff contract** for downstream work. The project constitution, when present, remains the repo-wide governing artifact: **`/workflows:plan` consumes:** -- Problem narrative and user story -> structures phases around the WHY -- Architectural context -> informs task decomposition, file mapping, dependencies +- Problem narrative and user story -> structures execution slices around the WHY +- Architectural context -> informs slice decomposition, file mapping, dependencies - Success criteria -> becomes the plan's acceptance criteria foundation - Key decisions -> preserved and enriched, not re-decided **`/deepen-plan` consumes:** -- Problem narrative -> evaluates whether deepened tasks still serve the original intent +- Problem narrative -> evaluates whether deepened slices still serve the original intent - Success criteria -> grounds best-practice research in actual goals **`/workflows:work` consumes:** - Architectural context -> populates `{{ARCHITECTURAL_CONTEXT}}` for every execution agent -- User story -> orchestrator validates each task contributes to the story +- User story -> orchestrator validates each slice contributes to the story - Problem narrative -> included in scoped prompts so agents understand purpose **`/workflows:review` consumes:** diff --git a/plugins/compound-engineering/skills/orchestrating-swarms/SKILL.md b/plugins/compound-engineering/skills/orchestrating-swarms/SKILL.md index 37a5bf0..d3993ff 100644 --- a/plugins/compound-engineering/skills/orchestrating-swarms/SKILL.md +++ b/plugins/compound-engineering/skills/orchestrating-swarms/SKILL.md @@ -29,7 +29,7 @@ Use swarms when the work has real parallelism, specialist boundaries, or depende - Keep the team small. Extra workers are justified only when they remove wall-clock time or increase specialist quality. ### Task design -- Write tasks as outcomes, not vague topics. +- Write work items as outcomes, not vague topics. - Keep scopes non-overlapping unless the assignment is an explicit cross-check. - Prefer DAG-style dependencies over ad hoc sequencing. - State what evidence counts as done: files changed, tests run, findings delivered, screenshots captured, or open questions listed. @@ -43,7 +43,7 @@ Have workers report in a terse, machine-checkable shape: - `risks`: unresolved concerns ### Leader responsibilities -- Keep the canonical task list and dependency map. +- Keep the canonical slice/work-item list and dependency map. - Resolve blockers instead of letting workers stall silently. - Merge duplicate findings and remove contradictory advice. - Re-run shared verification after integrating worker output. diff --git a/plugins/compound-engineering/skills/setup/SKILL.md b/plugins/compound-engineering/skills/setup/SKILL.md index aeef015..ce8de50 100644 --- a/plugins/compound-engineering/skills/setup/SKILL.md +++ b/plugins/compound-engineering/skills/setup/SKILL.md @@ -212,7 +212,7 @@ options: - `tdd.evidence.unit`: `required` or `optional` - `tdd.evidence.e2e`: `required` or `optional` - `tdd.exceptions`: `[]` by default. Plans must carry any justified exceptions. -- `review_mode`: "bulk" (default), "inline", or "both" (controls per-task review in workflows:work) +- `review_mode`: "bulk" (default), "inline", or "both" (controls per-slice review in workflows:work) Write `compound-engineering.local.md`: diff --git a/portable/compound-engineering/commands/deepen-plan.md b/portable/compound-engineering/commands/deepen-plan.md index 5eb2f93..d15c1d2 100644 --- a/portable/compound-engineering/commands/deepen-plan.md +++ b/portable/compound-engineering/commands/deepen-plan.md @@ -80,7 +80,8 @@ First, read and parse the plan to extract the WHY artifacts (problem narrative, - [ ] Overview/Proposed Solution sections - [ ] Technical Approach/Architecture -- [ ] Implementation phases/steps (noting which user story aspect each phase serves) +- [ ] `execution_shape` frontmatter + `## Execution Shape` section +- [ ] Execution packets / phase wrappers (noting which user story aspect each packet serves) - [ ] Code examples and file references - [ ] Acceptance criteria - [ ] Any UI/UX components mentioned @@ -99,41 +100,60 @@ The "Serves" column ensures every deepening activity traces back to WHY we're bu ### 1.1 Validate Execution Readiness -Check if the plan has sufficiently structured execution chunks for the subagent orchestration model in /workflows:work. Plans need per-task success criteria, test commands, and file lists. Also validate that phases trace to the user story. +Check if the plan has sufficiently structured execution packets for the subagent orchestration model in `/workflows:work`. Use `commands/workflows/references/execution-shape.md` as the source of truth. Plans need packets that are independently executable, testable, and traceable back to the user story without forcing fake verticality. -**Scan each implementation task/phase for these required fields:** +**Resolve execution shape first:** -- [ ] **Files:** List of files to create or modify -- [ ] **Depends on:** Dependencies on other tasks -- [ ] **Success criteria:** Testable checkboxes defining "done" -- [ ] **Test command:** Exact command to verify completion -- [ ] **TDD alignment:** Task-level test commands collectively satisfy the resolved unit/e2e evidence contract, or the plan records a justified exception with replacement evidence +- [ ] Read `execution_shape.mode`; if missing, default it to `vertical-slices` +- [ ] Read `execution_shape.rationale`; require it when the mode is not `vertical-slices` +- [ ] Ensure the body includes a matching `## Execution Shape` section +- [ ] If the chosen mode looks wrong for the real work, add a `### WHY Reassessment` note instead of silently changing it + +**Scan each execution packet using the required fields from `commands/workflows/references/execution-shape.md`:** + +- [ ] **`vertical-slices`:** slice type, serves, demo scenario, scope + scope fence, files, depends on, dependency type, success criteria, test command +- [ ] **`infra-track`:** capability enabled, consumers / downstream work unlocked, scope, files, depends on, risk / rollback, validation command, success criteria +- [ ] **`fix-batch`:** problem, repro / expected outcome, files, depends on, validation command, success criteria +- [ ] **TDD alignment:** packet-level validation commands collectively satisfy the resolved unit/e2e evidence contract, or the plan records a justified exception with replacement evidence **Validate WHY tracing:** -- [ ] **Each phase has a "Serves:" line** stating which user story aspect or success criterion it delivers -- [ ] **Success criteria trace to plan-level success criteria** -- task criteria should be decomposed from the plan's success criteria, not invented independently -- [ ] **No orphan phases** -- every phase should trace to at least one success criterion. If a phase doesn't serve any success criterion, flag it: "Phase [X] doesn't trace to any success criterion. Is it necessary, or is a success criterion missing?" +- [ ] **Each packet has a purpose line** (`Serves`, `Consumers`, or equivalent) tying it to user story value or explicit downstream unlocks +- [ ] **Success criteria trace to plan-level success criteria** -- packet criteria should be decomposed from the plan's success criteria, not invented independently +- [ ] **No orphan packets** -- every packet should trace to at least one success criterion or explicit enabling outcome +- [ ] **Phase wrappers stay optional** -- if the plan uses phases or tracks, confirm they are grouping containers only and do not replace packet-level tracing -**Expected task format:** +**Expected packet format:** ```markdown -##### Task N.1: [Task Name] +##### Slice N.1: [Slice Title] +**Slice type:** tracer-bullet | expansion | hardening +**Serves:** [Which aspect of the user story / which success criterion this slice delivers] +**Demo scenario:** [Smallest end-to-end behavior this slice proves] **Files:** `path/to/file1.php`, `path/to/file2.php` -**Depends on:** Task N-1.2 (or "None") -**Success criteria:** +**Depends on:** Slice N-1.2 (or "None") +**Dependency type:** real | stub-available | parallel-safe + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What intentionally waits] +- **Scope fence:** [Boundary that keeps the slice thin] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `command to run` + +###### Evidence +- **Test command:** `command to run` ``` **Scoring:** -Count how many implementation tasks have all four fields. Report: +Count how many execution packets have the full structure. Report: ``` -Execution Readiness: X/Y tasks have complete structure (Z%) +Execution Readiness: X/Y packets have complete structure (Z%) ``` **Actions based on score:** @@ -141,50 +161,56 @@ Execution Readiness: X/Y tasks have complete structure (Z%) | Score | Action | |-------|--------| | 80-100% | Plan is execution-ready. Proceed with deepening. | -| 50-79% | Flag incomplete tasks. During deepening, add missing fields. | -| 0-49% | Plan needs significant restructuring. Add an "Execution Readiness" enhancement pass that decomposes vague phases into structured tasks with all required fields. **Note:** `/workflows:work` will refuse to execute plans that lack this structure. | +| 50-79% | Flag incomplete packets. During deepening, add missing fields. | +| 0-49% | Plan needs significant restructuring. Add an "Execution Readiness" enhancement pass that decomposes vague phases/tasks into the packet shape required by the selected mode. **Note:** `/workflows:work` will refuse to execute plans that lack a coherent execution shape unless the user explicitly approves a mode change or legacy adaptation. | -**For tasks missing structure, the deepening process should:** +**For packets missing structure, the deepening process should:** -1. Break vague phases into specific, scoped tasks -2. Identify which files each task will create or modify -3. Write concrete success criteria (not vague goals) -4. Determine the test command (look at existing test patterns in the codebase) -5. Make it explicit whether the test command contributes unit evidence, e2e evidence, or both -6. Map dependencies between tasks -7. Add a suggested commit message per task (conventional format: `feat(scope): description`) +1. Pick or confirm the execution shape that best matches the real work +2. Break vague phases or legacy tasks into specific packets for that mode +3. Identify the smallest honest outcome each packet proves or unlocks +4. Identify which files each packet will create or modify +5. Write concrete success criteria (not vague goals) +6. Determine the validation command (look at existing test patterns in the codebase) +7. Make it explicit whether the validation command contributes unit evidence, e2e evidence, or both +8. Map dependencies between packets +9. Add a suggested commit message per packet (conventional format: `feat(scope): description`) -### 1.2 Task Complexity Check +### 1.2 Execution Shape Complexity Check -Check if any tasks are too large for reliable subagent execution. Large tasks with many files or success criteria should be split. +Check if any packets are too large, too vague, or shaped incorrectly for reliable subagent execution. Cross-layer work is allowed in `vertical-slices`; the failure mode is not "touches backend and frontend" but "tries to deliver multiple outcomes or no honest outcome at all." -**For each task, check complexity:** +**For each packet, check complexity against the selected mode:** | Metric | Threshold | Action | |--------|-----------|--------| -| Files touched | > 3 files | Flag for splitting | +| Outcomes or unlocks | > 1 meaningful outcome | Flag for splitting | +| Files touched | > 6 files | Flag for review; confirm the packet is still thin | | Success criteria | > 5 criteria | Flag for splitting | -| Multiple concerns | Mixes backend + frontend | Flag for splitting | -| Vague scope | "Implement the feature" | Flag for clarification | +| Scope fence | Missing or vague | Flag for clarification | +| Shape fit | `vertical-slices` used for horizontal-only work, or `infra-track` / `fix-batch` used to hide a real feature slice | Reassess mode | +| Risk controls | `Blast radius: high` with no rollback path | Add safety fields before execution | + +**Important:** A packet that touches backend + frontend is **not automatically too large**. If the same thin slice needs a migration, service method, API handler, and tiny UI change to prove one observable behavior, keep it intact. -**If any tasks exceed thresholds:** +**If any slices exceed thresholds:** Report: ``` -Task Complexity Warning: X tasks may be too large for reliable subagent execution. +Execution Shape Warning: X packets may be too large or incorrectly shaped for reliable subagent execution. -Task 2.1: "Build user auth" -- touches 5 files, 7 success criteria - Suggestion: Split into "Create auth service" (3 files) and "Add auth middleware" (2 files) +Slice 2.1: "User can complete first login tracer bullet" -- 2 demo scenarios, 7 success criteria + Suggestion: Split into "User submits credentials and receives success state" and "User sees first authenticated dashboard shell" -Task 3.2: "Build dashboard UI" -- mixes backend API + frontend component - Suggestion: Split into "Create dashboard API endpoint" and "Build dashboard component" +Packet 3.2: "Create auth schema foundation" -- no demo scenario, horizontal-only outcome + Suggestion: Either rewrite as "User can submit credentials and persist the first auth record" or switch this track to `infra-track` if it is truly enablement-only ``` -Suggest splits that create self-contained tasks with non-overlapping file sets. Each split task should be completable by one subagent in a single session. **When splitting, ensure each new task retains its "Serves:" tracing to the user story -- a split should never orphan a task from its purpose.** +Suggest splits that create self-contained packets with clear ownership and non-overlapping file sets where possible. **When splitting, ensure each new packet retains its tracing to the user story or enabling outcome.** -**This validation ensures the plan is ready for `/workflows:work`'s subagent orchestration model**, where each task is delegated to a focused subagent with clear scope and termination criteria. +**This validation ensures the plan is ready for `/workflows:work`'s subagent orchestration model**, where each packet is delegated to a focused subagent with clear scope, proof, and termination criteria. ### 1.5 Re-fetch Source Documents (if available) @@ -606,7 +632,7 @@ Merge research findings back into the plan, adding depth without changing the or - User Story - Architectural Context - Success Criteria -- Phase "Serves:" lines +- Execution shape contract and packet tracing lines - Handoff frontmatter If research suggests changes to these, add a `### WHY Reassessment` note at the end of the plan for the user to review manually. Do not edit the originals. @@ -621,7 +647,7 @@ If research suggests changes to these, add a `### WHY Reassessment` note at the ```markdown ## [Original Section Title] -[Original content preserved -- including any "Serves:" lines] +[Original content preserved -- including any execution-shape and packet tracing lines] ### Research Insights @@ -663,7 +689,8 @@ At the top of the plan, add a summary section: - User Story: [preserved / flagged for reassessment] - Architectural Context: [preserved / expanded / flagged for reassessment] - Success Criteria: [preserved / flagged for reassessment] -- Phase tracing: [all phases still trace to user story: yes/no] +- Execution shape: [preserved / flagged for reassessment] +- Packet tracing: [all packets still trace to user story or enabling outcome: yes/no] ### TDD Contract Check - Precedence: [plan overrides local / inherit uses local / fallback default noted] @@ -706,16 +733,17 @@ Before finalizing: - [ ] Links are valid and relevant - [ ] No contradictions between sections - [ ] Enhancement summary accurately reflects changes -- [ ] Implementation tasks have execution-ready structure (files, success criteria, test commands, dependencies) -- [ ] TDD contract is explicit, precedence is documented, and unit/e2e evidence stays aligned with task test commands unless an exception says otherwise +- [ ] Execution packets have execution-ready structure for the selected mode +- [ ] TDD contract is explicit, precedence is documented, and unit/e2e evidence stays aligned with packet validation commands unless an exception says otherwise **WHY integrity:** - [ ] Problem Narrative, User Story, Success Criteria, and Architectural Context are unmodified from the original plan - [ ] Handoff frontmatter is intact and still accurate -- [ ] Every phase still has its "Serves:" tracing line -- [ ] No new phases added without a "Serves:" line connecting them to the user story +- [ ] `execution_shape` frontmatter and `## Execution Shape` still agree +- [ ] Every packet still has its tracing line +- [ ] No new packets were added without a tracing line connecting them to the user story or enabling outcome - [ ] Enhancements tagged with which success criterion they serve -- [ ] Scope-expanding recommendations flagged in "Scope Warnings" rather than silently added to phases +- [ ] Scope-expanding recommendations flagged in "Scope Warnings" rather than silently added to packets - [ ] If WHY reassessment was needed, it's in a clearly marked section at the end (not inline edits) - [ ] `tdd` frontmatter and `## TDD & Evidence Contract` still agree on precedence, effective loop, evidence, and exceptions diff --git a/portable/compound-engineering/commands/workflows/plan.md b/portable/compound-engineering/commands/workflows/plan.md index ffa39af..4198723 100644 --- a/portable/compound-engineering/commands/workflows/plan.md +++ b/portable/compound-engineering/commands/workflows/plan.md @@ -13,13 +13,14 @@ argument-hint: '[feature description, bug report, or improvement idea]' Transform feature descriptions, bug reports, or improvement ideas into well-structured, execution-ready plans that: 1. **Anchor to WHY** -- every plan traces back to a user story and problem narrative -2. **Map WHERE** -- architectural context grounds task decomposition in the system's structure +2. **Map WHERE** -- architectural context grounds slice decomposition in the system's structure 3. **Define DONE** -- success criteria tied to user outcomes, not just technical checkboxes 4. **Honor project guardrails** -- constitution principles, baselines, and approval rules are made explicit 5. **Make TDD explicit** -- the plan declares the Ralph/default loop, required unit + e2e evidence, and any justified exceptions -6. **Enable architecture-first execution** -- `/workflows:architecture` turns the plan into a dedicated architecture artifact before `/deepen-plan`, `/workflows:work`, and `/workflows:review` harden or execute it +6. **Choose the right execution shape** -- vertical slices are the default, but infra tracks and fix batches are valid when they fit the real work better +7. **Enable architecture-first execution** -- `/workflows:architecture` turns the plan into a dedicated architecture artifact before `/deepen-plan`, `/workflows:work`, and `/workflows:review` harden or execute it -Plans consume the project constitution from `/workflows:constitution` when available, plus lynchpin artifacts from `/workflows:brainstorm` when available, or construct feature context fresh when running standalone. Either way, the plan document carries forward the WHY, WHERE, DONE, GUARDRAIL, and TDD contract that all downstream phases depend on. After the plan is written, the next explicit step is `/workflows:architecture`, not direct deepening. +Plans consume the project constitution from `/workflows:constitution` when available, plus lynchpin artifacts from `/workflows:brainstorm` when available, or construct feature context fresh when running standalone. Either way, the plan document carries forward the WHY, WHERE, DONE, GUARDRAIL, TDD, and **execution shape** contract that all downstream phases depend on. After the plan is written, the next explicit step is `/workflows:architecture`, not direct deepening. ## Feature Description @@ -71,6 +72,15 @@ Every plan must then write its own `tdd:` frontmatter block plus a `## TDD & Evi - **Exception rule:** Any deviation from the resolved default loop or evidence requirements must be explicit and justified in `tdd.exceptions` and in the plan body. - **Shared source of truth:** Reuse `commands/workflows/references/tdd-evidence-contract.md` for contract resolution, the `## TDD & Evidence Contract` section shape, Ralph evidence semantics, and exception handling. +#### Execution Shape Baseline (Runs Before Path A/B/C) + +Use `commands/workflows/references/execution-shape.md` as the single source for choosing and documenting the execution shape. + +- **Default mode:** `vertical-slices` +- **Allowed overrides:** `infra-track`, `fix-batch` +- **Override rule:** Any non-default mode must include a short rationale in frontmatter and in the plan body +- **Anti-coercion rule:** Do not force work into slices if that would create fake end-to-end structure + #### Path A: Spec/Plan File Provided **Check if arguments contain a plan or spec file:** @@ -79,7 +89,7 @@ If the feature description (`#$ARGUMENTS`) is or contains a path to a `.md` file 1. Read the file 2. Announce: "Found existing plan/spec: `[file path]`. Using as foundation." -3. Extract: title, problem statement, proposed approach, acceptance criteria, implementation phases, and any existing tasks +3. Extract: title, problem statement, proposed approach, acceptance criteria, execution shape (if any), and any existing execution units 4. **Check for brainstorm reference** -- look for a `brainstorm_ref` field in frontmatter, or search `docs/brainstorms/` for a matching topic. If found, read and extract lynchpin artifacts (see Path B). 5. **Extract or construct WHY artifacts from the spec:** - If the spec has a Problem Narrative / User Story / Architectural Context -- use them directly @@ -91,7 +101,7 @@ If the feature description (`#$ARGUMENTS`) is or contains a path to a `.md` file 6. **Skip free-form idea refinement** -- the spec defines WHAT to build 7. Proceed to Step 0.5 to gather any additional project inputs, then to research -In Step 2 (Issue Planning), **build upon the existing plan structure** -- preserve its sections, fill gaps, add execution-readiness fields (Files, Depends on, Success criteria, Test command) to any tasks that lack them, and enrich with research findings. Do NOT discard or rewrite sections that are already well-defined. +In Step 2 (Issue Planning), **build upon the existing plan structure** -- preserve its sections, fill gaps, add the execution-shape contract and execution-readiness fields to any legacy execution units that lack them, and enrich with research findings. Do NOT discard or rewrite sections that are already well-defined. #### Path B: Brainstorm Document Found @@ -114,11 +124,11 @@ ls -la docs/brainstorms/*.md 2>/dev/null | head -10 3. Announce: "Found brainstorm from [date]: [topic]. Consuming lynchpin artifacts." 4. **Extract and surface all lynchpin sections:** - **Problem Narrative** -- the synthesized WHY (carry forward verbatim into plan) - - **User Story** -- the north star (carry forward, plan tasks must trace to this) + - **User Story** -- the north star (carry forward, plan slices must trace to this) - **Architectural Context** -- the WHERE map (feeds `{{ARCHITECTURAL_CONTEXT}}` in work.md) - **Success Criteria** -- the DONE definition (plan acceptance criteria must include these) - **Stakeholder Impact** -- who is affected (informs stakeholder analysis) - - **Chosen Approach** and **Key Decisions** -- the WHAT (informs task decomposition) + - **Chosen Approach** and **Key Decisions** -- the WHAT (informs slice decomposition) - **Open Questions** -- must be resolved before planning proceeds 5. **If any handoff fields are `false` or sections are empty**, flag them: "Brainstorm is missing [X]. I'll construct this during planning." 6. **Resolve open questions** -- if the brainstorm has unresolved questions, use **AskUserQuestion tool** to resolve each one before proceeding @@ -331,7 +341,7 @@ Now that we have concrete codebase knowledge, refine the WHY artifacts establish Explicitly state how research findings confirm, challenge, or refine the planned approach relative to the user story. Examples: - "Codebase already has a similar pattern in `app/Services/AuthService.php` -- we should follow it for consistency, which aligns with the user story because..." - "Learnings doc warns about [gotcha] -- this affects our approach because..." -- "No existing patterns found for this -- higher risk, may need more tasks for validation." +- "No existing patterns found for this -- higher risk, may need more slices for validation." - "Constitution requires [baseline] -- the plan must make that visible in acceptance criteria or approvals." **Optional validation:** Briefly summarize the refined WHY artifacts and key research findings, then ask if anything looks off or missing before proceeding to planning. @@ -366,28 +376,34 @@ Think like a product manager -- what would make this issue clear, actionable, an - [ ] Gather supporting materials (error logs, screenshots, design mockups) - [ ] Prepare code examples or reproduction steps if applicable, name the mock filenames in the lists -**Phase Decomposition (traced to user story):** +**Execution Shape Selection (traced to user story):** + +Use `commands/workflows/references/execution-shape.md` as the source of truth for selecting and documenting the plan's execution shape. -Each implementation phase must state **what aspect of the user story it serves**. This creates a traceable chain: -- User Story → Phase → Tasks → Files +Default to **`vertical-slices`**: +- User Story → Phase/Track (optional grouping) → Slice → Files +- Start with the thinnest tracer bullet +- Slice vertically across layers when needed +- Treat phases as wrappers, not executable units +- Forbid horizontal slice titles unless they still produce a demoable outcome -When decomposing into phases: -- **Group by user-facing capability**, not by technical layer. "User can log in" is a phase; "Create database tables" is a task within a phase. -- **Each phase should deliver a testable slice** of the user story where possible -- **Each subphase/task should be a self-contained execution unit** -- after its listed dependencies are satisfied, the executor should have the context, scope, relevant files, success criteria, and verification command needed to complete it without reconstructing intent from neighboring phases -- **Cross-reference success criteria** -- map each success criterion to the phase(s) that deliver it -- **Architectural context informs boundaries** -- use the WHERE map to identify natural phase boundaries (e.g., service boundaries, module boundaries) +Switch only when that default would be fake: +- **`infra-track`** for enabling/foundation work with no honest user-visible tracer bullet yet +- **`fix-batch`** for a batch of small mostly independent fixes + +Every plan must record: +- `execution_shape.mode` +- `execution_shape.rationale` (required when mode is not `vertical-slices`) +- A matching `## Execution Shape` section in the body **Execution Readiness:** -For plans that will be executed via `/workflows:work`, ensure each implementation task includes: -- **Scope:** What this task owns, what it changes, and any important boundary or non-goal that keeps the slice contained -- **Files:** List of files to create or modify -- **Depends on:** Which other tasks must complete first (or "None") -- **Success criteria:** Testable checkboxes that define "done" -- **Test command:** The exact command to verify the task is complete. Across the plan, these commands must satisfy the plan-level TDD evidence contract. +For plans that will be executed via `/workflows:work`, the plan must include the packet section required by the selected mode: +- `## Execution Slices` +- `## Infrastructure Work Packets` +- `## Fix Batch Items` -This structured format enables the `/workflows:work` orchestrator to delegate each task to a focused subagent with clear scope and termination criteria. Treat every task as a mini-handoff packet: if an executor had only that task plus the shared WHY/architecture context, they should still know what to touch, what not to touch, and how to prove it is done. Plans without this structure will be flagged for refinement before execution begins. +Each packet must include the fields defined in `commands/workflows/references/execution-shape.md`. Plans without a declared shape and packet structure will be flagged for refinement before execution begins. **TDD & Evidence Contract (mandatory):** @@ -407,10 +423,10 @@ Apply the shared `Named Agent Dispatch` protocol from `commands/workflows/refere - Task spec-flow-analyzer(feature_description, user_story, success_criteria, research_findings) The SpecFlow Analyzer should evaluate: -- Do the planned phases cover all aspects of the user story? +- Do the planned slices cover all aspects of the user story? - Are there user flows implied by the user story that the plan doesn't address? - Do edge cases threaten any of the success criteria? -- Are there gaps between what the user needs (story) and what the plan delivers (tasks)? +- Are there gaps between what the user needs (story) and what the plan delivers (slices)? **SpecFlow Analyzer Output:** @@ -421,7 +437,7 @@ The SpecFlow Analyzer should evaluate: ### 4. Choose Implementation Detail Level -**Important for `/workflows:work` compatibility:** All detail levels can be executed, but the MORE and A LOT levels produce plans with structured execution chunks (per-task scope, success criteria, test commands, and file lists) that enable the subagent orchestration model in `/workflows:work`. MINIMAL plans work but may require the orchestrator to decompose tasks further before delegating to subagents and supply any missing containment details. +**Important for `/workflows:work` compatibility:** All detail levels can be executed, but each level must still declare an execution shape and produce the matching packet section. `vertical-slices` is the default and usually the best choice. MORE and A LOT provide the richest packets (scope fence, dependencies, evidence, and safety notes) and therefore give the most predictable subagent orchestration. **All detail levels include WHY sections.** The Problem Narrative, User Story, Architectural Context, and Success Criteria are mandatory at every level -- they are the contract that downstream phases depend on. The difference between levels is how much implementation detail surrounds them. @@ -437,7 +453,7 @@ Select how comprehensive you want the issue to be, simpler is mostly better. - Basic acceptance criteria - Essential context only -**Note:** MINIMAL plans may need to be enriched with per-task success criteria before running `/workflows:work`. The orchestrator can handle this decomposition automatically, but providing structured tasks up front leads to more predictable execution. +**Note:** MINIMAL plans may still contain only a few units, but they must include at least one execution-ready packet from the selected mode before `/workflows:work` should execute them. When in doubt, choose a tracer-bullet slice. **Structure:** @@ -468,6 +484,9 @@ tdd: unit: inherit # inherit | required | optional e2e: inherit # inherit | required | optional exceptions: [] # [{ scope, reason, replacement_evidence }] +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" --- # [Issue Title] @@ -500,6 +519,11 @@ which causes [impact]. Use the exact section shape from `commands/workflows/references/tdd-evidence-contract.md` with the resolved values for this plan. Do not omit any bullet, and make every deviation explicit with `replacement_evidence`. +## Execution Shape + +- **Mode:** vertical-slices +- **Why:** The work has a real tracer-bullet path, so default to end-to-end slices. + ## Constitution Alignment - **Relevant principles:** [Project rules that apply to this work] @@ -510,6 +534,34 @@ Use the exact section shape from `commands/workflows/references/tdd-evidence-con [Brief description of what to build and how] +## Execution Slices + +Use this default section only when `execution_shape.mode` is `vertical-slices`. If the selected mode is `infra-track` or `fix-batch`, replace this section with the matching packet section from `commands/workflows/references/execution-shape.md`. + +##### Slice 1.1: [Tracer Bullet Slice Title] +**Slice type:** tracer-bullet +**Serves:** [Which aspect of the user story / which success criterion this slice proves] +**Demo scenario:** [Describe the smallest end-to-end behavior this slice makes observable] +**Files:** `path/to/file1.php`, `path/to/file2.php` +**Depends on:** None +**Dependency type:** real | stub-available | parallel-safe + +###### What to build +[Brief description of the thin end-to-end path] + +###### Scope +- **Owns:** [What this slice is responsible for] +- **Non-goals:** [What this slice intentionally does not solve yet] +- **Scope fence:** [What would count as widening the slice too far] + +###### Acceptance criteria +- [ ] Criterion 1 +- [ ] Criterion 2 + +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What this command proves for the tracer bullet] + ## References - Related issue: #[issue_number] @@ -524,7 +576,7 @@ Use the exact section shape from `commands/workflows/references/tdd-evidence-con - Detailed background and motivation - Technical considerations -- Phased implementation with story tracing +- Issue-shaped execution slices with story tracing - Success metrics - Dependencies and risks @@ -557,6 +609,9 @@ tdd: unit: inherit # inherit | required | optional e2e: inherit # inherit | required | optional exceptions: [] # [{ scope, reason, replacement_evidence }] +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" --- # [Issue Title] @@ -593,6 +648,11 @@ which causes [impact]. Use the exact section shape from `commands/workflows/references/tdd-evidence-contract.md` with the resolved values for this plan. Do not omit any bullet, and make every deviation explicit with `replacement_evidence`. +## Execution Shape + +- **Mode:** vertical-slices +- **Why:** The default tracer-bullet decomposition matches the real behavior being delivered. + ## Constitution Alignment - **Relevant principles:** [Project rules that apply to this work] @@ -610,37 +670,88 @@ Use the exact section shape from `commands/workflows/references/tdd-evidence-con - Performance implications - Security considerations -## Implementation Phases +## Execution Slices + +Use this default section only when `execution_shape.mode` is `vertical-slices`. If the selected mode is `infra-track` or `fix-batch`, replace this section with the matching packet section from `commands/workflows/references/execution-shape.md`. -#### Phase 1: [Phase Name] -**Serves:** [Which aspect of the user story / which success criterion this phase delivers] +#### Phase 1: [Optional grouping / milestone] +**Purpose:** [Why these slices belong together or why this track exists] +**Not executable by itself:** `/workflows:work` executes the slices below, not the phase wrapper. -##### Task 1.1: [Task Name] +##### Slice 1.1: [Tracer Bullet Slice Title] +**Slice type:** tracer-bullet +**Serves:** [Which aspect of the user story / which success criterion this slice proves] +**Demo scenario:** [Describe the smallest end-to-end behavior this slice makes observable] **Files:** `path/to/file1.php`, `path/to/file2.php` **Depends on:** None -**Success criteria:** +**Dependency type:** parallel-safe + +###### What to build +[Describe the thin vertical cut through the system] + +###### Scope +- **Owns:** [What this slice is responsible for] +- **Non-goals:** [What it intentionally does not solve yet] +- **Scope fence:** [What would widen the slice too far] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -##### Task 1.2: [Task Name] +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] + +##### Slice 1.2: [Expansion Slice Title] +**Slice type:** expansion +**Serves:** [Which aspect of the user story / which success criterion this slice extends] +**Demo scenario:** [Describe the next observable behavior] **Files:** `path/to/file3.php` -**Depends on:** Task 1.1 -**Success criteria:** +**Depends on:** Slice 1.1 +**Dependency type:** real + +###### What to build +[Describe the next thin vertical cut] + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What stays out] +- **Scope fence:** [Boundary for this expansion] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -#### Phase 2: [Phase Name] -**Serves:** [Which aspect of the user story / which success criterion this phase delivers] +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] -##### Task 2.1: [Task Name] +#### Phase 2: [Optional grouping / milestone] +**Purpose:** [Why the next slices are grouped here] + +##### Slice 2.1: [Hardening or follow-on slice] +**Slice type:** expansion | hardening +**Serves:** [Which aspect of the user story / which success criterion this slice delivers] +**Demo scenario:** [Describe the observable behavior or guardrail added here] **Files:** `path/to/file4.php` -**Depends on:** Task 1.2 -**Success criteria:** +**Depends on:** Slice 1.2 +**Dependency type:** real + +###### What to build +[Describe the slice] + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What stays out] +- **Scope fence:** [Boundary for this slice] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` + +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] ## Acceptance Criteria @@ -669,7 +780,7 @@ Use the exact section shape from `commands/workflows/references/tdd-evidence-con **Includes everything from MORE plus:** -- Detailed implementation plan with phases +- Detailed implementation plan with slice groups - Alternative approaches considered (traced to user story) - Extensive technical specifications - Resource requirements and timeline @@ -706,6 +817,9 @@ tdd: unit: inherit # inherit | required | optional e2e: inherit # inherit | required | optional exceptions: [] # [{ scope, reason, replacement_evidence }] +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" --- # [Issue Title] @@ -749,6 +863,11 @@ As a [persona 2], I need to [action] so that [outcome]. Use the exact section shape from `commands/workflows/references/tdd-evidence-contract.md` with the resolved values for this plan. Do not omit any bullet, and make every deviation explicit with `replacement_evidence`. +## Execution Shape + +- **Mode:** vertical-slices +- **Why:** The plan delivers meaningful user-visible tracer bullets, so slices stay the best default. + ## Constitution Alignment - **Relevant principles:** [Project rules that apply to this work] @@ -777,65 +896,162 @@ Use the exact section shape from `commands/workflows/references/tdd-evidence-con [Detailed technical design, grounded in the architectural context map] -### Implementation Phases +### Execution Slices -#### Phase 1: [Foundation] -**Serves:** [Which aspect of the user story / which success criteria this phase delivers] -**Rationale:** [Why this phase comes first -- what it enables for subsequent phases] +Use this default section only when `execution_shape.mode` is `vertical-slices`. If the selected mode is `infra-track` or `fix-batch`, replace this section with the matching packet section from `commands/workflows/references/execution-shape.md`. -##### Task 1.1: [Task Name] +#### Phase 1: [Tracer bullet track] +**Purpose:** [Why these slices come first] +**Rationale:** [What this track proves before later widening] + +##### Slice 1.1: [Tracer Bullet Slice Title] +**Slice type:** tracer-bullet +**Serves:** [Which aspect of the user story / which success criteria this slice proves] +**Demo scenario:** [Describe the smallest end-to-end behavior] **Files:** `path/to/file1.php`, `path/to/file2.php` **Depends on:** None -**Success criteria:** +**Dependency type:** real | stub-available | parallel-safe +**Blast radius:** low | medium | high +**Shared state changes:** [None, or list] +**Rollback path:** [How to back out safely if risky] + +###### What to build +[Describe the tracer bullet as an issue-sized vertical slice] + +###### Scope +- **Owns:** [What this slice is responsible for] +- **Non-goals:** [What intentionally waits for later slices] +- **Scope fence:** [What would widen the slice too far] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -##### Task 1.2: [Task Name] +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] + +##### Slice 1.2: [Follow-on expansion slice] +**Slice type:** expansion +**Serves:** [Which aspect of the user story / which success criteria this slice extends] +**Demo scenario:** [Describe the next observable behavior] **Files:** `path/to/file3.php` -**Depends on:** Task 1.1 -**Success criteria:** +**Depends on:** Slice 1.1 +**Dependency type:** real +**Blast radius:** low | medium | high +**Shared state changes:** [None, or list] +**Rollback path:** [How to back out safely if risky] + +###### What to build +[Describe the slice] + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What stays out] +- **Scope fence:** [Boundary for this slice] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -#### Phase 2: [Core Implementation] -**Serves:** [Which aspect of the user story / which success criteria this phase delivers] -**Rationale:** [Why this phase order -- what it builds on from Phase 1] +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] + +#### Phase 2: [Core widening track] +**Purpose:** [Why these slices come after the tracer bullet] +**Rationale:** [What this track widens or hardens] -##### Task 2.1: [Task Name] +##### Slice 2.1: [Core slice] +**Slice type:** expansion | hardening +**Serves:** [Which aspect of the user story / which success criteria this slice delivers] +**Demo scenario:** [Describe the user-visible behavior] **Files:** `path/to/file4.php`, `path/to/file5.php` -**Depends on:** Task 1.2 -**Success criteria:** +**Depends on:** Slice 1.2 +**Dependency type:** real +**Blast radius:** low | medium | high +**Shared state changes:** [None, or list] +**Rollback path:** [How to back out safely if risky] + +###### What to build +[Describe the slice] + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What stays out] +- **Scope fence:** [Boundary for this slice] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -##### Task 2.2: [Task Name] +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] + +##### Slice 2.2: [Parallel-safe or stub-removal slice] +**Slice type:** expansion | hardening +**Serves:** [Which aspect of the user story / which success criteria this slice delivers] +**Demo scenario:** [Describe the observable outcome] **Files:** `path/to/file6.php` -**Depends on:** Task 2.1 -**Success criteria:** +**Depends on:** Slice 2.1 +**Dependency type:** real | stub-available | parallel-safe +**Blast radius:** low | medium | high +**Shared state changes:** [None, or list] +**Rollback path:** [How to back out safely if risky] + +###### What to build +[Describe the slice] + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What stays out] +- **Scope fence:** [Boundary for this slice] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -#### Phase 3: [Polish & Optimization] -**Serves:** [Which success criteria / quality aspects this phase delivers] +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] -##### Task 3.1: [Task Name] +#### Phase 3: [Hardening / rollout track] +**Purpose:** [Why these slices close the loop] + +##### Slice 3.1: [Hardening slice] +**Slice type:** hardening +**Serves:** [Which success criteria / quality aspects this slice delivers] +**Demo scenario:** [Describe the behavior or safety improvement] **Files:** `path/to/file7.php` -**Depends on:** Task 2.2 -**Success criteria:** +**Depends on:** Slice 2.2 +**Dependency type:** real +**Blast radius:** low | medium | high +**Shared state changes:** [None, or list] +**Rollback path:** [How to back out safely if risky] + +###### What to build +[Describe the slice] + +###### Scope +- **Owns:** [What this slice changes] +- **Non-goals:** [What stays out] +- **Scope fence:** [Boundary for this slice] + +###### Acceptance criteria - [ ] Criterion 1 - [ ] Criterion 2 -**Test command:** `` -### Phase-to-Story Traceability +###### Evidence +- **Test command:** `` +- **Evidence focus:** [What the command proves] + +### Slice-to-Story Traceability -| Success Criterion | Delivered by Phase(s) | Key Tasks | +| Success Criterion | Delivered by Slice(s) | Demo scenarios | |---|---|---| -| [Criterion 1 from Success Criteria] | Phase 1, Phase 2 | Task 1.1, Task 2.1 | -| [Criterion 2 from Success Criteria] | Phase 2 | Task 2.1, Task 2.2 | +| [Criterion 1 from Success Criteria] | Slice 1.1, Slice 2.1 | [Scenario names] | +| [Criterion 2 from Success Criteria] | Slice 2.1, Slice 2.2 | [Scenario names] | ## Alternative Approaches Considered @@ -970,10 +1186,12 @@ public function processUser(User $user): array - [ ] Architectural Context is grounded in actual repo research (not hypothetical) - [ ] Success Criteria are tied to user outcomes, not just technical checkboxes - [ ] If `docs/constitution.md` exists, Constitution Alignment names the applicable rules, approvals, and any waivers explicitly -- [ ] Every implementation phase states which user story aspect / success criterion it serves +- [ ] Every execution slice states which user story aspect / success criterion it serves - [ ] `handoff` frontmatter fields are all `true` - [ ] `tdd` frontmatter is present and the precedence rule is explicit - [ ] `## TDD & Evidence Contract` names the effective loop, required evidence, and any justified exceptions +- [ ] `execution_shape` frontmatter is present and matches the body section +- [ ] Non-default execution shapes include an explicit rationale **Content Quality:** @@ -987,14 +1205,17 @@ public function processUser(User $user): array **Execution Readiness (for `/workflows:work`):** -- [ ] Each task is a self-contained execution unit once dependencies are met -- [ ] Each task has: Files, Depends on, Success criteria, Test command -- [ ] Each task scope is explicit enough that an executor does not need to infer missing boundaries from adjacent phases -- [ ] Task success criteria are testable (not vague) -- [ ] Dependencies between tasks are explicit +- [ ] The selected execution shape matches the real work instead of forcing fake verticality +- [ ] The plan includes the packet section required by the selected mode +- [ ] Every packet includes the required fields from `commands/workflows/references/execution-shape.md` +- [ ] If mode is `vertical-slices`, the first slice is a tracer bullet, not a broad foundation phase +- [ ] If mode is `vertical-slices`, no slice is a disguised horizontal layer bucket unless it still delivers a demoable outcome +- [ ] Packet scope is explicit enough that an executor does not need to infer missing boundaries from adjacent packets +- [ ] Packet success criteria are testable (not vague) +- [ ] Dependencies are explicit wherever ordering matters - [ ] Architectural context is specific enough to fill `{{ARCHITECTURAL_CONTEXT}}` in execution agent prompts - [ ] The plan declares unit + e2e evidence by default, or records a justified exception with replacement evidence -- [ ] Task test commands collectively satisfy the resolved TDD contract +- [ ] Validation/test commands collectively satisfy the resolved TDD contract ## Directory Setup & Gitignore @@ -1080,12 +1301,12 @@ The plan document is a structured contract consumed by all downstream phases. He **`/workflows:architecture`** reads: - Problem Narrative, User Story, Success Criteria, and Architectural Context -- the WHY/WHERE contract it must preserve -- Implementation phases and tasks -- identifies the deepening candidates that need structural clarification +- Execution shape plus execution packets -- identifies the deepening candidates and boundaries that need structural clarification - Constitution Alignment / waivers / brainstorm decisions -- keeps architecture decisions inside approved project guardrails - **Must write**: a dedicated artifact in `docs/architecture/` plus an `architecture_ref` back into the plan **`/deepen-plan`** reads: -- Implementation phases and tasks -- enriches each with parallel research (best practices, performance, UI patterns) +- Execution shape plus execution packets -- enriches each with parallel research and splits, merges, or reshapes packets when the current mode is weak - Success criteria -- validates they are testable and complete - Architectural Context -- uses it to ground research in the right part of the system - `tdd` frontmatter and `## TDD & Evidence Contract` -- preserves the effective Ralph/default loop, evidence requirements, and any justified exceptions @@ -1093,12 +1314,12 @@ The plan document is a structured contract consumed by all downstream phases. He - **Must preserve**: Problem Narrative, User Story, and handoff contract unchanged **`/workflows:work`** reads: -- **Problem Narrative & User Story** -- the orchestrator uses these to validate task outcomes make sense in context, not just pass tests +- **Problem Narrative & User Story** -- the orchestrator uses these to validate slice outcomes make sense in context, not just pass tests - **Architectural Context** -- feeds directly into `{{ARCHITECTURAL_CONTEXT}}` in each execution agent's prompt. This is WHY grounded arch context matters -- every subagent gets system-level awareness - **`architecture_ref` / `docs/architecture/` artifact / explicit architecture handoff contract** -- feeds deletion-test decisions, interfaces as test surfaces, seams, adapters, and contracts into execution so subagents do not invent structure ad hoc - **`tdd` frontmatter + `## TDD & Evidence Contract`** -- plan-level values win; `inherit` falls back to `compound-engineering.local.md`; if neither exists, execution should assume Ralph-driven unit + e2e evidence -- **Implementation phases & tasks** -- the execution chunk structure (Files, Depends on, Success criteria, Test command) -- **Success Criteria** -- the orchestrator checks final outcomes against these, not just individual task passes +- **`execution_shape` + execution packets** -- tells the orchestrator whether to execute slices, infrastructure packets, or fix-batch items, and which fields each unit must respect +- **Success Criteria** -- the orchestrator checks final outcomes against these, not just individual unit passes - **`constitution_version` / `constitution_waivers` / Constitution Alignment** -- the execution phase enforces repo-wide guardrails and knows which exceptions were approved - **`brainstorm_ref`** -- if present, the orchestrator can read the original brainstorm for additional context @@ -1108,6 +1329,7 @@ The plan document is a structured contract consumed by all downstream phases. He - **Architectural Context** -- used to evaluate whether the implementation respects system boundaries and integration points - **`architecture_ref` / `docs/architecture/` artifact / explicit architecture handoff contract** -- supplies the architecture intent, deletion-test outcomes, interfaces, seams, adapters, and contracts that reviewers must verify or flag as drift - **`tdd` frontmatter + `## TDD & Evidence Contract`** -- review must verify the declared evidence exists and that any deviation from Ralph/unit+e2e is explicitly justified +- **`execution_shape` + execution packets** -- review uses the chosen mode to judge whether the work was decomposed honestly and executed completely - **Constitution Alignment and waivers** -- used to distinguish approved exceptions from blocking constitution violations - **Stakeholder Impact** (A LOT level) -- informs stakeholder-perspective review diff --git a/portable/compound-engineering/commands/workflows/references/execution-agent-prompt.md b/portable/compound-engineering/commands/workflows/references/execution-agent-prompt.md index 78cd269..9047914 100644 --- a/portable/compound-engineering/commands/workflows/references/execution-agent-prompt.md +++ b/portable/compound-engineering/commands/workflows/references/execution-agent-prompt.md @@ -1,5 +1,10 @@ --- -{} +model: claude-sonnet-4.6 +platforms: + copilot: + model: gpt-5.3-codex + opencode: + model: openrouter/moonshotai/kimi-k2.6 --- # Execution Agent Prompt Template @@ -10,24 +15,32 @@ This template is used by the `workflows:work` orchestrator to construct prompts --- -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. -## Your Task +## Your Unit -**Task:** {{TASK_NAME}} +**Unit:** {{UNIT_TITLE}} -{{TASK_DESCRIPTION}} +{{UNIT_DESCRIPTION}} + +**Unit kind:** {{UNIT_KIND}} + +**Outcome scenario:** {{OUTCOME_SCENARIO}} + +**Scope:** {{UNIT_SCOPE}} + +**Scope fence:** {{UNIT_SCOPE_FENCE}} **Files to create/modify:** {{FILE_LIST}} **Success criteria:** {{SUCCESS_CRITERIA}} -**Test command:** `{{TEST_COMMAND}}` +**Validation command:** `{{VALIDATION_COMMAND}}` **Dependencies completed:** {{COMPLETED_DEPENDENCIES}} -## Why This Task Exists +## Why This Unit Exists {{WHY_CONTEXT}} @@ -35,7 +48,11 @@ You are an execution agent implementing a specific task from a work plan. Follow {{ARCHITECTURAL_CONTEXT}} -## Learnings from Previous Tasks +## Architecture Handoff + +{{ARCHITECTURE_HANDOFF}} + +## Learnings from Previous Units {{LEARNINGS_BRIEF}} @@ -51,7 +68,7 @@ You are an execution agent implementing a specific task from a work plan. Follow ## Phase 1: Understand Before Building -Before writing ANY code, review the task requirements AND the "Why This Task Exists" section carefully. +Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. **If anything is unclear, ambiguous, or could be interpreted multiple ways:** - List your questions explicitly @@ -60,7 +77,7 @@ Before writing ANY code, review the task requirements AND the "Why This Task Exi **If everything is clear:** - State your interpretation of the requirements in 2-3 sentences -- State how this task serves the overall user story (from the WHY context) +- State how this unit serves the overall user story (from the WHY context) - List any assumptions you are making (even obvious ones) - Proceed to Phase 2 @@ -84,7 +101,7 @@ If tests fail after implementation: 1. Read the error message carefully -- understand what failed and why 2. Analyze whether the failure is in your implementation or in the test 3. Fix the issue -4. Re-run the test command +4. Re-run the validation command 5. Repeat up to 3 total attempts 6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly @@ -98,7 +115,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che - [ ] Did I miss any requirements? **Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Task Exists" section describes? +- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? - [ ] Would a user achieve the stated outcome with this code? - [ ] Did I build anything that doesn't trace back to the success criteria or user story? @@ -117,7 +134,7 @@ Before reporting back, review your own work with fresh eyes. Go through each che **Testing:** - [ ] Do tests verify actual behavior (not just mock behavior)? - [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? +- [ ] Did I run the validation command and confirm it passes? **Evidence:** - [ ] Can I show actual test output (not just "tests pass")? @@ -133,13 +150,13 @@ Return a structured execution report in exactly this format: Use `commands/workflows/references/tdd-evidence-contract.md` as the single source for the `### TDD Evidence` block. `Red` and `Green` prove behavior coverage. `Post-Refactor Green` proves cleanup safety after refactor. Each `Evidence` line should quote the decisive failing or passing signal in one sentence, not a narrative. ```markdown -## Execution Report: [Task Name] +## Execution Report: [Unit Title] ### Interpretation [Your 2-3 sentence interpretation of what was asked] ### Purpose Served -[Which user story aspect / success criterion this task delivers, from the WHY context] +[Which user story aspect / success criterion this unit delivers, from the WHY context] ### Assumptions Made - [List each assumption, even if obvious] @@ -154,7 +171,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [Insert the exact Ralph evidence block from `commands/workflows/references/tdd-evidence-contract.md`. Preserve the `Red`, `Green`, and `Post-Refactor Green` headings with their command/result/evidence fields.] ### Test Results -- Command: `[test command]` +- Command: `[validation command]` - Result: PASS/FAIL - Attempts: [n] - Output: @@ -171,7 +188,7 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc [If no problems: "None"] ### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] +- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future units] [If none: "None"] @@ -188,9 +205,9 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the single sourc _This section is included only when the resolved TDD contract explicitly allows standard implementation._ 1. Read referenced files and understand existing patterns -2. Implement the task following project conventions +2. Implement the unit following project conventions 3. Write tests matching the success criteria -4. Run the test command: `{{TEST_COMMAND}}` +4. Run the validation command: `{{VALIDATION_COMMAND}}` 5. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) --- diff --git a/portable/compound-engineering/commands/workflows/references/execution-shape.md b/portable/compound-engineering/commands/workflows/references/execution-shape.md new file mode 100644 index 0000000..4ad4d31 --- /dev/null +++ b/portable/compound-engineering/commands/workflows/references/execution-shape.md @@ -0,0 +1,93 @@ +# Execution Shape Contract + +Use this reference when planning, deepening, or executing work so the workflow can preserve judgment without re-explaining the same rules in every prompt. + +## Default + +Choose `vertical-slices` unless that would create fake end-to-end work just to satisfy the template. + +## Allowed modes + +### `vertical-slices` (default) + +Use when a thin, testable, end-to-end behavior exists. + +Required packet: +- `Slice type` +- `Serves` +- `Demo scenario` +- `Scope` +- `Scope fence` +- `Files` +- `Depends on` +- `Dependency type` +- `Success criteria` +- `Test command` + +### `infra-track` + +Use for enabling or foundational work where no honest user-visible tracer bullet exists yet. + +Required packet: +- `Capability enabled` +- `Consumers / downstream work unlocked` +- `Scope` +- `Files` +- `Depends on` +- `Risk / rollback` +- `Validation command` +- `Success criteria` + +### `fix-batch` + +Use for a series of small mostly independent fixes where forcing one vertical slice would blur the real work. + +Required packet: +- `Problem` +- `Repro / expected outcome` +- `Files` +- `Depends on` +- `Validation command` +- `Success criteria` + +## Selection rules + +1. Default to `vertical-slices`. +2. Switch to `infra-track` only when the honest near-term value is enabling capability, not a user-visible behavior. +3. Switch to `fix-batch` only when the work is truly a batch of small fixes, not a feature being split too late. +4. Never force `vertical-slices` if that would create fake verticality. +5. If the mode is not the default, record why in `execution_shape.rationale` and in `## Execution Shape`. + +## Plan shape + +Add this frontmatter block to every plan: + +```yaml +execution_shape: + mode: vertical-slices # vertical-slices | infra-track | fix-batch + rationale: "" # required when mode is not vertical-slices +``` + +Add this body section: + +```markdown +## Execution Shape +- **Mode:** [vertical-slices | infra-track | fix-batch] +- **Why:** [1-2 sentences] +``` + +Then use the packet section that matches the chosen mode: +- `## Execution Slices` +- `## Infrastructure Work Packets` +- `## Fix Batch Items` + +## Deepening rules + +- Validate the plan against the chosen mode, not against `vertical-slices` unconditionally. +- You may recommend switching modes if the selected one is clearly wrong, but record that as a `WHY Reassessment` note instead of silently rewriting intent. + +## Execution rules + +- `/workflows:work` must execute the units defined by the chosen mode. +- Do not coerce `infra-track` or `fix-batch` plans into slices unless the user explicitly approves a mode change. +- Session tracking may stay generic (`unit`, `work status`) even when the selected mode is `vertical-slices`. diff --git a/portable/compound-engineering/commands/workflows/references/spec-review-prompt.md b/portable/compound-engineering/commands/workflows/references/spec-review-prompt.md index d6bcaa5..34bcbc1 100644 --- a/portable/compound-engineering/commands/workflows/references/spec-review-prompt.md +++ b/portable/compound-engineering/commands/workflows/references/spec-review-prompt.md @@ -14,15 +14,15 @@ You are a spec compliance reviewer. Your job is to verify whether an implementat ## What Was Requested -{{TASK_REQUIREMENTS}} +{{UNIT_REQUIREMENTS}} ## Success Criteria {{SUCCESS_CRITERIA}} -## Task Purpose +## Unit Purpose -{{TASK_SERVES}} +{{UNIT_PURPOSE}} ## What Implementer Claims They Built diff --git a/portable/compound-engineering/commands/workflows/review.md b/portable/compound-engineering/commands/workflows/review.md index 087b219..715639f 100644 --- a/portable/compound-engineering/commands/workflows/review.md +++ b/portable/compound-engineering/commands/workflows/review.md @@ -143,7 +143,7 @@ This context is passed to EVERY review agent below. It is not optional. #### TDD Evidence Gate (BEFORE reviewer dispatch) -If a `docs/execution-sessions/work-*/STATE.md` file exists for this branch, read the completed task session files before dispatching review agents and build a terse evidence ledger. +If a `docs/execution-sessions/work-*/STATE.md` file exists for this branch, read the completed execution unit session files before dispatching review agents and build a terse evidence ledger. Apply `commands/workflows/references/tdd-evidence-contract.md` as the source of truth for the Ralph evidence block and review-gate classifications. Verify the plan's approved exception contract instead of improvising replacement evidence rules. @@ -521,8 +521,8 @@ After creating all todo files, present comprehensive summary: ### TDD Evidence Gate -- **Behavior coverage:** PASS / FAIL — [task/session refs with weak or missing `Red`/`Green` evidence] -- **Cleanup after refactor:** PASS / FAIL — [task/session refs with weak or missing `Post-Refactor Green` evidence] +- **Behavior coverage:** PASS / FAIL — [unit/session refs with weak or missing `Red`/`Green` evidence] +- **Cleanup after refactor:** PASS / FAIL — [unit/session refs with weak or missing `Post-Refactor Green` evidence] [If PARTIALLY or NO:] **Gaps:** diff --git a/portable/compound-engineering/commands/workflows/work.md b/portable/compound-engineering/commands/workflows/work.md index d011386..81783e1 100644 --- a/portable/compound-engineering/commands/workflows/work.md +++ b/portable/compound-engineering/commands/workflows/work.md @@ -14,17 +14,17 @@ Execute a work plan while maintaining WHY tracing from problem narrative through ## Introduction -This command takes a work document (plan, specification, or todo file) and executes it systematically using a **subagent orchestration model**. The orchestrator (this conversation) decomposes the plan into scoped chunks and delegates each to a focused subagent. Each subagent follows a standardized 4-phase protocol (understand, implement, self-review, report) defined in the execution agent prompt template. +This command takes a work document (plan, specification, or todo file) and executes it systematically using a **subagent orchestration model**. The orchestrator (this conversation) loads or adapts the plan into execution units and delegates each unit to a focused subagent. `vertical-slices` is the default execution shape, but `infra-track` and `fix-batch` are also valid when declared by the plan. Each subagent follows a standardized 4-phase protocol (understand, implement, self-review, report) defined in the execution agent prompt template. -**WHY-grounded execution:** Every subagent receives the plan's WHY context -- the problem narrative, user story, architectural context, the architecture handoff contract, and which success criterion their specific task serves. This prevents implementation drift where technically correct code fails to deliver the user's actual need. The orchestrator is the guardian of WHY: it extracts purpose from the plan, threads it through every task prompt, and validates that the combined output delivers the stated user story. +**WHY-grounded execution:** Every subagent receives the plan's WHY context -- the problem narrative, user story, architectural context, the architecture handoff contract, and which success criterion their specific unit serves. This prevents implementation drift where technically correct code fails to deliver the user's actual need. The orchestrator is the guardian of WHY: it extracts purpose from the plan, threads it through every unit prompt, and validates that the combined output delivers the stated user story. ### Review Mode This command supports a `--review-mode` argument that controls when code review happens: -- **`bulk`** (default) -- Review happens after ALL tasks complete, using `/workflows:review`. This is the standard behavior and is fastest for most work. -- **`inline`** -- After each task, a lightweight two-stage review (spec compliance then code quality) runs automatically. Catches spec drift early but adds 2-4 extra subagent calls per task. -- **`both`** -- Inline review per task AND comprehensive `/workflows:review` at the end. Maximum quality assurance. +- **`bulk`** (default) -- Review happens after ALL units complete, using `/workflows:review`. This is the standard behavior and is fastest for most work. +- **`inline`** -- After each unit, a lightweight two-stage review (spec compliance then code quality) runs automatically. Catches spec drift early but adds 2-4 extra subagent calls per unit. +- **`both`** -- Inline review per unit AND comprehensive `/workflows:review` at the end. Maximum quality assurance. If no `--review-mode` is specified, check `compound-engineering.local.md` for a `review_mode` setting. If not found there either, default to `bulk`. @@ -38,15 +38,16 @@ If no `--review-mode` is specified, check `compound-engineering.local.md` for a 1. **Read Plan and Extract WHY + Guardrail Context** - - Read the work document completely - - **Extract WHY artifacts** from the plan (these ground everything that follows): - - **Problem Narrative** -- why this work exists, what pain it solves - - **User Story** -- who benefits and what outcome they get - - **Architectural Context** -- how the solution fits in the system - - **Success Criteria** -- measurable conditions that define "done" - - **Phase-to-story tracing** -- each phase's "Serves:" line showing what user story aspect it delivers - - **Constitution alignment** -- relevant principles, required approvals, and any approved waivers - - **`tdd` frontmatter + `## TDD & Evidence Contract`** -- resolve the effective TDD contract using `commands/workflows/references/tdd-evidence-contract.md` (plan overrides local, `inherit` falls back, and no-local-config defaults to Ralph-driven `red-green-refactor` with unit + e2e evidence required) + - Read the work document completely + - **Extract WHY artifacts** from the plan (these ground everything that follows): + - **Problem Narrative** -- why this work exists, what pain it solves + - **User Story** -- who benefits and what outcome they get + - **Architectural Context** -- how the solution fits in the system + - **Success Criteria** -- measurable conditions that define "done" + - **Execution shape** -- resolve it using `commands/workflows/references/execution-shape.md` + - **Unit tracing** -- each packet's `Serves`, `Consumers`, or equivalent purpose line showing what outcome it delivers or unlocks + - **Constitution alignment** -- relevant principles, required approvals, and any approved waivers + - **`tdd` frontmatter + `## TDD & Evidence Contract`** -- resolve the effective TDD contract using `commands/workflows/references/tdd-evidence-contract.md` (plan overrides local, `inherit` falls back, and no-local-config defaults to Ralph-driven `red-green-refactor` with unit + e2e evidence required) - Check for `handoff:` frontmatter in the plan. If present, verify all flags are `true` (problem_narrative, user_story, architectural_context, success_criteria). If any are `false`, warn the user that WHY context is incomplete and suggest running `/workflows:brainstorm` or `/workflows:plan` first. - If the resolved contract weakens Ralph/unit+e2e without a justified exception in the plan, stop and ask for the plan contract to be corrected before execution - If `docs/constitution.md` exists, read it and extract the active constitution version, applicable principles, execution baselines, and approval rules. If the plan lists `constitution_waivers`, honor only those explicit exceptions. @@ -55,7 +56,8 @@ If no `--review-mode` is specified, check `compound-engineering.local.md` for a - If no architecture artifact is recorded, assemble an explicit architecture handoff contract from the plan's Architectural Context, Key Decisions, Constitution Alignment, brainstorm context, and execution constraints. Tell the user this is a fallback and recommend `/workflows:architecture` if boundaries are still unsettled. - Review any other references or links provided in the plan - If the constitution requires explicit approval for any part of the planned work (for example, risky writes, schema changes, auth changes, or scope expansions), surface that before execution starts - - If anything is unclear or ambiguous, ask clarifying questions now + - If the document is not already in a declared execution shape, treat it as **legacy input**. Before spawning subagents, adapt it into execution units in STATE.md. If that adaptation materially changes scope or ordering, ask the user to approve the adapted unit backlog before proceeding. + - If anything is unclear or ambiguous, ask clarifying questions now - Get user approval to proceed - **Do not skip this** - better to ask questions now than build the wrong thing @@ -103,27 +105,27 @@ If no `--review-mode` is specified, check `compound-engineering.local.md` for a - You want to keep the default branch clean while experimenting - You plan to switch between branches frequently -3. **Preview Task Breakdown** - - Mentally identify the major tasks from the plan +3. **Preview Unit Breakdown** + - Mentally identify the major execution units from the plan - Note any questions about dependencies or scope - - The formal task decomposition happens in Phase 2 Step 4 (STATE.md), which is the persistent record of progress + - The formal unit decomposition happens in Phase 2 Step 4 (STATE.md), which is the persistent record of progress - TodoWrite can be used for in-conversation progress tracking if helpful, but STATE.md is the source of truth ### Phase 2: Orchestrated Execution -Phase 2 is where the orchestrator (this conversation) decomposes the plan into scoped chunks and delegates each to a focused subagent. The orchestrator does NOT implement code itself -- it decomposes, delegates, records, and routes. +Phase 2 is where the orchestrator (this conversation) resolves the plan's execution shape, decomposes the work into execution units, and delegates each to a focused subagent. The orchestrator does NOT implement code itself -- it decomposes, delegates, records, and routes. #### Step 1: Validate Plan Readiness -Before executing, validate four things: **structural readiness** (tasks are granular and testable), **WHY readiness** (the plan carries purpose context), **TDD readiness** (the execution contract is explicit and enforceable), and **guardrail readiness** (repo-wide rules are visible and actionable). +Before executing, validate four things: **structural readiness** (the selected execution shape is honest and its units are testable), **WHY readiness** (the plan carries purpose context), **TDD readiness** (the execution contract is explicit and enforceable), and **guardrail readiness** (repo-wide rules are visible and actionable). -**Structural readiness** -- each implementation task should have: +**Structural readiness** -- first resolve `execution_shape` using `commands/workflows/references/execution-shape.md`, then verify the units for that mode: -- **Task description** -- what needs to be done -- **Files to create/modify** -- specific file paths -- **Success criteria** -- checkboxes that define "done" -- **Test command** -- how to verify the task works -- **Dependencies** -- which other tasks must complete first +- **`vertical-slices`** -- slice type, serves, demo scenario, scope fence, files, success criteria, validation command, dependencies, dependency type +- **`infra-track`** -- capability enabled, consumers / downstream work unlocked, scope, files, risk / rollback, success criteria, validation command, dependencies +- **`fix-batch`** -- problem, repro / expected outcome, files, success criteria, validation command, dependencies +- **Default rule** -- if `execution_shape` is missing, assume `vertical-slices` +- **Anti-coercion rule** -- do not force infra or fix-batch work into slices if that would create fake verticality **Guardrail readiness** -- when the project has `docs/constitution.md`, the plan should make repo-wide rules visible: @@ -138,25 +140,25 @@ Before executing, validate four things: **structural readiness** (tasks are gran - **`## TDD & Evidence Contract` present** -- states the resolved execution path in plain language - **Effective mode resolved** -- Ralph-driven by default unless the plan explicitly approves a standard-mode exception - **Required evidence resolved** -- unit + e2e by default, or justified replacement evidence when explicitly waived -- **Report contract visible** -- Ralph-driven tasks must emit stable red, green, and post-refactor green evidence blocks +- **Report contract visible** -- Ralph-driven units must emit stable red, green, and post-refactor green evidence blocks **WHY readiness** -- the plan should have: - **Problem Narrative** -- present and non-empty - **User Story** -- present with clear "As a... I want... So that..." - **Architectural Context** -- present, describing system fit -- **Success Criteria** -- present at plan level (not just task level) -- **Phase tracing** -- each phase has a "Serves:" line connecting it to the user story +- **Success Criteria** -- present at plan level (not just unit level) +- **Unit tracing** -- each execution unit has a purpose line connecting it to the user story or explicit enabling outcome -If the plan lacks structural details, or if no architecture artifact / handoff contract can explain the boundaries, refuse to proceed and suggest running `/workflows:architecture` first if the boundaries are still fuzzy, then `/deepen-plan`, or manually breaking down the plan. +If the plan lacks structural details, or if no architecture artifact / handoff contract can explain the boundaries, refuse to proceed and suggest running `/workflows:architecture` first if the boundaries are still fuzzy, then `/deepen-plan`, or manually breaking down the plan into execution units. If the plan lacks the `tdd` block or `## TDD & Evidence Contract`, or if the resolved contract is ambiguous, refuse to proceed and suggest `/workflows:plan` or `/deepen-plan` to repair the execution contract before spawning subagents. If the plan lacks WHY artifacts, the orchestrator should **construct minimal WHY context** before proceeding: 1. Ask the user: "This plan doesn't include a problem narrative or user story. In one sentence, what problem are we solving and for whom?" -2. Infer success criteria from the task-level criteria +2. Infer success criteria from the unit-level criteria 3. Infer architectural context from the file paths and technologies mentioned -4. Record these in STATE.md (see Step 3) so they're available for all tasks +4. Record these in STATE.md (see Step 3) so they're available for all units #### Step 2: Check for Resumable Session @@ -169,7 +171,7 @@ ls docs/execution-sessions/work-*/STATE.md 2>/dev/null If a previous session exists for the same plan file and has `status: in_progress`: - Ask the user: "Found incomplete session `[session_id]` for this plan. Resume where you left off, or start fresh?" -- **If resume**: Read STATE.md, load the WHY Context section plus the Architecture Handoff section, skip completed tasks, load the learnings brief, and continue from `current_task` +- **If resume**: Read STATE.md, load the WHY Context section plus the Architecture Handoff section, skip completed units, load the learnings brief, and continue from `current_unit` - **If fresh**: Archive the old session directory (rename with `-archived` suffix), then start a new session If no resumable session exists, proceed to Step 3. @@ -191,8 +193,9 @@ plan_file: [path to plan] brainstorm_ref: [path to brainstorm, if available] started: [ISO timestamp] status: in_progress -current_task: 0 -total_tasks: [count] +execution_shape: [vertical-slices | infra-track | fix-batch] +current_unit: 0 +total_units: [count] session_id: [SESSION_ID] --- @@ -227,57 +230,68 @@ session_id: [SESSION_ID] - Seams / adapters / contracts: [boundaries this execution must honor] - Review guidance: [what `/workflows:review` must verify later] -## Task Status -| # | Task | Serves | Status | Attempts | Session File | -|---|------|--------|--------|----------|--------------| -| 1 | [task name] | [which user story aspect] | pending | -- | -- | -| 2 | [task name] | [which user story aspect] | pending | -- | -- | +## Work Status +| # | Unit | Kind | Serves / Unlocks | Status | Attempts | Session File | +|---|------|------|------------------|--------|----------|--------------| +| 1 | [unit title] | tracer-bullet | [which user story aspect or enabling outcome] | pending | -- | -- | +| 2 | [unit title] | expansion | [which user story aspect or enabling outcome] | pending | -- | -- | ... ## Learnings Brief _No learnings yet._ ``` -#### Step 4: Decompose Plan into Execution Chunks +#### Step 4: Load or Adapt Execution Units -The orchestrator parses the plan and creates a list of execution chunks. Each chunk is a self-contained unit of work. The orchestrator does the heavy lifting here: +The orchestrator parses the plan and creates a list of execution units. Each unit is a self-contained packet of work defined by the selected execution shape. The orchestrator does the heavy lifting here: -- **Break large phases** into smaller tasks if needed (each task should be completable in one subagent session) -- **Preserve WHY tracing** -- when splitting a phase, each resulting task inherits the parent phase's "Serves:" line. Never create an orphan task with no connection to the user story. -- **Identify file dependencies** between tasks (Task B modifies a file created by Task A) -- **Determine parallelizable tasks** -- tasks with non-overlapping file sets can run simultaneously -- **Ensure each chunk has clear success criteria** -- if the plan already defines them, use them directly; otherwise, the orchestrator must create them -- **Map each task to its purpose** -- record which success criterion or user story aspect each task delivers (this goes in STATE.md's "Serves" column) +- **Prefer plan-defined units directly** -- if the plan already declares a coherent execution shape, execute those packets as written +- **Adapt legacy phase/task plans into units before coding** -- do not execute raw task lists directly once the shape contract is available +- **Break oversized units** into smaller units if needed (each unit should be completable in one subagent session) +- **Preserve WHY tracing** -- when splitting a unit, each resulting unit inherits or refines the parent unit's purpose line. Never create an orphan unit with no connection to the user story. +- **Identify file dependencies** between units +- **Determine parallelizable units** -- only units with non-overlapping file sets and compatible dependencies can run simultaneously +- **Ensure each unit has clear success criteria** -- if the plan already defines them, use them directly; otherwise, the orchestrator must create them +- **Map each unit to its purpose** -- record which success criterion or enabling outcome each unit delivers (this goes in STATE.md's "Serves / Unlocks" column) -If the plan already has well-defined tasks with success criteria, use them directly. If not, the orchestrator must create them before proceeding. +Mode-specific rules: +- **`vertical-slices`** -- execute slices directly; keep the first unit a tracer bullet +- **`infra-track`** -- execute infrastructure work packets directly; do not coerce them into fake slices +- **`fix-batch`** -- execute fix items directly; keep each one narrow and independently verifiable -#### Step 5: Execute Task Loop +If the plan already has well-defined units with success criteria, use them directly. If not, the orchestrator must create them before proceeding. -For each task (or parallel batch of tasks), follow this cycle: +#### Step 5: Execute Unit Loop + +For each unit (or parallel batch of units), follow this cycle: ##### a. Build Scoped Prompt -For each task, the orchestrator constructs a focused prompt by loading the **execution agent prompt template** from `commands/workflows/references/execution-agent-prompt.md` and filling in the context blocks. +For each unit, the orchestrator constructs a focused prompt by loading the **execution agent prompt template** from `commands/workflows/references/execution-agent-prompt.md` and filling in the context blocks. Before building `scoped_prompt`, apply the shared `Reference Template Loading` protocol in `commands/workflows/references/orchestration-protocol.md` to `execution-agent-prompt.md`. Fill the placeholders from the loaded template and do not reconstruct the prompt from memory. -- **{{TASK_NAME}}** and **{{TASK_DESCRIPTION}}** -- from the plan +- **{{UNIT_TITLE}}** and **{{UNIT_DESCRIPTION}}** -- from the plan +- **{{UNIT_KIND}}** -- from the plan (`tracer-bullet`, `infra-packet`, `fix-item`, etc.) +- **{{OUTCOME_SCENARIO}}** -- the observable behavior or enabling outcome this unit proves +- **{{UNIT_SCOPE}}** -- what the unit owns and excludes +- **{{UNIT_SCOPE_FENCE}}** -- the boundary that keeps the unit thin - **{{FILE_LIST}}** -- files to create/modify from the plan - **{{SUCCESS_CRITERIA}}** -- checkboxes that define "done" -- **{{TEST_COMMAND}}** -- how to verify the task works -- **{{COMPLETED_DEPENDENCIES}}** -- list of already-completed tasks this depends on +- **{{VALIDATION_COMMAND}}** -- how to verify the unit works +- **{{COMPLETED_DEPENDENCIES}}** -- list of already-completed units this depends on - **{{WHY_CONTEXT}}** -- the purpose grounding block (constructed by orchestrator): ``` - ## Why This Task Exists + ## Why This Unit Exists **Problem:** [problem narrative from plan -- 1-2 sentences] **User Story:** [user story from plan] - **This task serves:** [the "Serves:" line from this task's parent phase -- which user story aspect or success criterion this delivers] + **This unit serves:** [the packet purpose line from this unit -- which user story aspect, success criterion, or enabling outcome this delivers] **Overall success criteria:** [plan-level success criteria list] **Guardrails:** [relevant constitution principles, approval rules, and approved waivers] ``` -- **{{ARCHITECTURAL_CONTEXT}}** -- from the plan's Architectural Context section, filtered to what's relevant for this task's files and domain -- **{{ARCHITECTURE_HANDOFF}}** -- from the `docs/architecture/` artifact or explicit plan-derived handoff contract; include deletion-test decisions, interfaces as test surfaces, seams, adapters, contracts, and downstream review guidance relevant to this task -- **{{LEARNINGS_BRIEF}}** -- from previous tasks, filtered by domain relevance (only include backend learnings for backend tasks, frontend learnings for frontend tasks, etc.) +- **{{ARCHITECTURAL_CONTEXT}}** -- from the plan's Architectural Context section, filtered to what's relevant for this unit's files and domain +- **{{ARCHITECTURE_HANDOFF}}** -- from the `docs/architecture/` artifact or explicit plan-derived handoff contract; include deletion-test decisions, interfaces as test surfaces, seams, adapters, contracts, and downstream review guidance relevant to this unit +- **{{LEARNINGS_BRIEF}}** -- from previous units, filtered by domain relevance - **{{PROJECT_CONVENTIONS}}** -- from CLAUDE.md plus relevant constitution baselines - **{{TDD_CONTRACT}}** -- the resolved execution contract: effective mode, Ralph/default loop, required unit/e2e evidence, and any explicit exceptions - **{{TDD_SECTION}}** -- if the resolved effective mode is Ralph-driven, include the Ralph/TDD Implementation Section from the template; otherwise include the Standard Implementation Section. Do not treat Ralph as an adjacent side command when it is the resolved default. @@ -290,7 +304,7 @@ The execution agent template instructs each subagent to follow a 4-phase protoco ##### b. Spawn Subagent -Delegate the task to a focused subagent: +Delegate the unit to a focused subagent: ``` Task(general-purpose, prompt=scoped_prompt) @@ -312,23 +326,23 @@ The subagent prompt is constructed from the loaded execution agent template (`co - Final test results (pass/fail) - Attempt count -**For parallel tasks**: Spawn multiple subagents simultaneously. Only parallelize tasks with non-overlapping file sets. Before parallelizing, verify file sets do not overlap. +**For parallel units**: Spawn multiple subagents simultaneously only when their file sets do not overlap and their dependency types allow parallel work. Before parallelizing, verify file sets do not overlap and no unit claims shared mutable state without an explicit guard. **Example scoped prompt:** ``` -You are implementing Task 3 of a feature plan. Here is your scoped context: +You are implementing Unit 3 of a feature plan. Here is your scoped context: -## Why This Task Exists +## Why This Unit Exists **Problem:** Users currently cannot authenticate, forcing manual session management that's error-prone and insecure. **User Story:** As a user, I want to log in with my credentials so that I can access my personalized dashboard securely. -**This task serves:** "Secure authentication flow" -- implementing the core token generation that enables the login experience. +**This unit serves:** "Secure authentication flow" -- implementing the first thin end-to-end login path. **Overall success criteria:** - Users can log in and receive a JWT token - Invalid credentials are rejected with clear error messages - Tokens expire after the configured TTL -## Task +## Unit Create the UserAuthService with JWT token generation and validation. ## Files to Create/Modify @@ -342,11 +356,11 @@ Create the UserAuthService with JWT token generation and validation. - [ ] authenticate() throws AuthenticationError for invalid credentials - [ ] Token validation works for valid and expired tokens -## Test Command +## Validation Command npm test -- --filter UserAuthService ## Architectural Context -JWT-based stateless auth. Tokens issued by UserAuthService, validated by middleware (Task 4). No server-side session storage. +JWT-based stateless auth. Tokens issued by UserAuthService, validated by middleware (Unit 4). No server-side session storage. ## TDD Execution Contract - Effective mode: Ralph-driven TDD @@ -359,7 +373,7 @@ JWT-based stateless auth. Tokens issued by UserAuthService, validated by middlew - Variables are camelCase - Type annotations on all parameters and return types -## Learnings from Previous Tasks +## Learnings from Previous Units - [backend] Use jest.mock() for module mocking - [backend] Factory pattern: createUser() helper not new User() - [testing] Use expect().toThrow() for error assertions @@ -377,15 +391,16 @@ JWT-based stateless auth. Tokens issued by UserAuthService, validated by middlew When the subagent returns, the orchestrator processes the results: -**0. Validate the execution contract evidence** -- audit the report against `commands/workflows/references/tdd-evidence-contract.md`. If a Ralph-driven task is missing stable `Red`, `Green`, and `Post-Refactor Green` evidence blocks, treat the report as incomplete and send it back for correction before marking the task complete. +**0. Validate the execution contract evidence** -- audit the report against `commands/workflows/references/tdd-evidence-contract.md`. If a Ralph-driven unit is missing stable `Red`, `Green`, and `Post-Refactor Green` evidence blocks, treat the report as incomplete and send it back for correction before marking the unit complete. -**1. Write session file** to `docs/execution-sessions/${SESSION_ID}/task-{nn}-{slug}.md`: +**1. Write session file** to `docs/execution-sessions/${SESSION_ID}/unit-{nn}-{slug}.md`: ```markdown --- -task: "[task name]" -task_number: [n] -serves: "[which user story aspect / success criterion this task delivers]" +unit: "[unit title]" +unit_number: [n] +unit_kind: [tracer-bullet|expansion|hardening|infra-packet|fix-item] +serves: "[which user story aspect / success criterion / enabling outcome this unit delivers]" status: [completed|failed] attempt_count: [n] domains: [backend, frontend, testing, database, etc.] @@ -422,22 +437,22 @@ session_id: [SESSION_ID] **2. Inline Review (when `--review-mode inline` or `--review-mode both`)** - If the `--review-mode` argument is `inline` or `both`, perform a two-stage inline review before proceeding to the next task. If `--review-mode` is `bulk` (the default), skip this step. +If the `--review-mode` argument is `inline` or `both`, perform a two-stage inline review before proceeding to the next unit. If `--review-mode` is `bulk` (the default), skip this step. **Stage 1: Spec Compliance Review** Apply the shared `Reference Template Loading` protocol from `commands/workflows/references/orchestration-protocol.md`, substituting `spec-review-prompt.md`. If the template cannot be loaded and quoted, stop the inline review loop and report the missing template instead of improvising. Then fill in: - - `{{TASK_REQUIREMENTS}}` -- the task description and success criteria + - `{{UNIT_REQUIREMENTS}}` -- the unit description, outcome scenario, scope fence, and success criteria - `{{SUCCESS_CRITERIA}}` -- the success criteria checkboxes - `{{IMPLEMENTER_REPORT}}` -- the execution report from the subagent - - `{{TASK_SERVES}}` -- what user story aspect this task delivers (from the task's "Serves:" line) + - `{{UNIT_PURPOSE}}` -- what user story aspect or enabling outcome this unit delivers (from the unit's purpose line) Spawn a spec reviewer subagent: ``` Task(general-purpose, prompt=filled_spec_review_prompt) ``` - The spec reviewer should check not just checkbox compliance but whether the implementation actually delivers on the purpose stated in "Serves:". A task can pass all checkboxes but miss the intent. + The spec reviewer should check not just checkbox compliance but whether the implementation actually delivers on the recorded purpose. A unit can pass all checkboxes but miss the intent. - If **PASS**: proceed to Stage 2 - If **FAIL**: spawn a new execution subagent with the specific issues to fix, then re-run the spec reviewer (max 2 fix-review cycles). If still failing after 2 cycles, log the issues and ask the user how to proceed. @@ -455,26 +470,26 @@ session_id: [SESSION_ID] - If **PASS**: proceed to next steps - If **FAIL** with Critical issues: spawn fix subagent, re-review (max 2 cycles) - - If **FAIL** with only Important/Minor issues: log them for the orchestrator's attention but proceed to next task (these will also be caught by `/workflows:review` if run later) + - If **FAIL** with only Important/Minor issues: log them for the orchestrator's attention but proceed to next unit (these will also be caught by `/workflows:review` if run later) - **Note:** Inline review is a lightweight per-task check. It does NOT replace the comprehensive `/workflows:review` multi-agent review. When `--review-mode both` is active, inline review runs per-task AND `/workflows:review` runs after all tasks complete. + **Note:** Inline review is a lightweight per-unit check. It does NOT replace the comprehensive `/workflows:review` multi-agent review. When `--review-mode both` is active, inline review runs per-unit AND `/workflows:review` runs after all units complete. -**3. Update STATE.md** -- mark the task status, increment `current_task`, update the task status table +**3. Update STATE.md** -- mark the unit status, increment `current_unit`, update the work status table -**4. Update learnings brief** -- add new learnings from this task, tagged by domain, deduplicated against existing learnings +**4. Update learnings brief** -- add new learnings from this unit, tagged by domain, deduplicated against existing learnings **5. Update plan file** -- check off completed items (`[ ]` to `[x]`) in the original plan document -**6. Regression guard** -- run test commands from ALL previously completed tasks. If any regress: - - Log the regression in the current task's session file +**6. Regression guard** -- run validation commands from ALL previously completed units. If any regress: + - Log the regression in the current unit's session file - Spawn a fix subagent with context about what broke and why - - Do not proceed to the next task until the regression is fixed + - Do not proceed to the next unit until the regression is fixed **7. Incremental commit** if appropriate (logical unit complete, tests pass): | Commit when... | Don't commit when... | |----------------|---------------------| - | Logical unit complete (model, service, component) | Small part of a larger unit | + | Logical unit complete (one observable outcome) | Small part of a larger unit | | Tests pass + meaningful progress | Tests failing | | About to switch contexts (backend to frontend) | Purely scaffolding with no behavior | | About to attempt risky/uncertain changes | Would need a "WIP" commit message | @@ -495,10 +510,10 @@ session_id: [SESSION_ID] If a subagent fails after its internal retries: -1. **Reframe**: Can the task be broken down differently? Try spawning a new subagent with a different approach or smaller scope. -2. **Ask user**: Use AskUserQuestion -- "Task [name] failed after 3 attempts. [error summary]. How should I proceed?" - - Options: "Retry with different approach", "Skip and continue", "Stop pipeline", "I'll fix it manually" -3. **Skip and continue**: Mark task as `skipped` in STATE.md. Note it as a blocker for any dependent tasks. Dependent tasks are also skipped automatically. +1. **Reframe**: Can the unit be broken down differently? Try spawning a new subagent with a different approach or smaller scope. +2. **Ask user**: Use AskUserQuestion -- "Unit [name] failed after 3 attempts. [error summary]. How should I proceed?" + - Options: "Retry with different approach", "Skip and continue", "Stop pipeline", "I'll fix it manually" +3. **Skip and continue**: Mark the unit as `skipped` in STATE.md. Note it as a blocker for any dependent units. Dependent units are also skipped automatically. 4. **Stop pipeline**: Save all state to STATE.md with `status: paused`, present a summary of what was completed and what remains. ### Phase 3: Quality Check @@ -519,11 +534,11 @@ If a subagent fails after its internal retries: Before mechanical quality checks, validate that the combined work delivers on the WHY: - - **User story delivered?** -- Review the user story from STATE.md. Can a user actually achieve the stated outcome with what was built? If any success criterion is unmet or any task was skipped, note the gap. - - **Architectural integrity?** -- Does the implementation match the architectural context from the plan? Flag any deviations (e.g., plan said "stateless JWT" but implementation uses server sessions). - - **Constitution honored?** -- Does the implementation respect the constitution baselines and approval rules captured in STATE.md? Flag any unwaived violations. - - **Ralph evidence complete?** -- For Ralph-driven tasks, does every session file include Red, Green, and Post-Refactor Green evidence aligned to the resolved unit/e2e contract or an explicitly approved exception? - - **No orphan code** -- Is there any implemented code that doesn't trace back to the user story or success criteria? This may indicate scope creep during execution. + - **User story delivered?** -- Review the user story from STATE.md. Can a user actually achieve the stated outcome with what was built? If any success criterion is unmet or any unit was skipped, note the gap. + - **Architectural integrity?** -- Does the implementation match the architectural context from the plan? Flag any deviations (e.g., plan said "stateless JWT" but implementation uses server sessions). + - **Constitution honored?** -- Does the implementation respect the constitution baselines and approval rules captured in STATE.md? Flag any unwaived violations. + - **Ralph evidence complete?** -- For Ralph-driven units, does every session file include Red, Green, and Post-Refactor Green evidence aligned to the resolved unit/e2e contract or an explicitly approved exception? + - **No orphan code** -- Is there any implemented code that doesn't trace back to the user story or success criteria? This may indicate scope creep during execution. If purpose validation reveals gaps, present them to the user before proceeding to PR. @@ -536,8 +551,8 @@ If a subagent fails after its internal retries: Run configured agents in parallel with Task tool. **Pass the WHY context (problem narrative, user story, success criteria) to reviewer agents** so they can evaluate fitness for purpose, not just code quality. Present findings and address critical issues. 4. **Final Validation** - - All tasks in STATE.md marked `completed` (or explicitly `skipped` with user approval) - - All tests pass (including regression tests from every completed task) + - All units in STATE.md marked `completed` (or explicitly `skipped` with user approval) + - All tests pass (including regression tests from every completed unit) - Linting passes - Code follows existing patterns - Purpose validation passed (user story deliverable, architecture intact) @@ -632,8 +647,8 @@ If the `finishing-branch` skill is not available, follow the manual steps below: - **Key decisions made:** [architectural or design choices] ## Success Criteria Status - - [x] [criterion 1 from plan] -- delivered by Task N - - [x] [criterion 2 from plan] -- delivered by Task N + - [x] [criterion 1 from plan] -- delivered by Unit N + - [x] [criterion 2 from plan] -- delivered by Unit N - [ ] [criterion 3 if skipped] -- skipped: [reason] ## Testing @@ -692,7 +707,7 @@ If the `finishing-branch` skill is not available, follow the manual steps below: 5. **Notify User** - Summarize what was completed - Link to PR - - Highlight any tasks that were skipped and why + - Highlight any units that were skipped and why - Reference the execution session directory for detailed logs - Note any follow-up work needed - Suggest next steps if applicable @@ -707,7 +722,7 @@ For complex plans with multiple independent workstreams, enable swarm mode for p | Use Swarm Mode when... | Use Standard Mode when... | |------------------------|---------------------------| -| Plan has 5+ independent tasks | Plan is linear/sequential | +| Plan has 5+ independent units | Plan is linear/sequential | | Multiple specialists needed (review + test + implement) | Single-focus work | | Want maximum parallelism | Simpler mental model preferred | | Large feature with clear phases | Small feature or bug fix | @@ -716,7 +731,7 @@ For complex plans with multiple independent workstreams, enable swarm mode for p To trigger swarm execution, say: -> "Make a Task list and launch an army of agent swarm subagents to build the plan" +> "Make a unit list and launch an army of agent swarm subagents to build the plan" Or explicitly request: "Use swarm mode for this work" @@ -729,10 +744,10 @@ When swarm mode is enabled, the workflow changes: Teammate({ operation: "spawnTeam", team_name: "work-{timestamp}" }) ``` -2. **Create Task List with Dependencies** - - Parse plan into TaskCreate items +2. **Create Unit List with Dependencies** + - Parse plan into execution work items - Set up blockedBy relationships for sequential dependencies - - Independent tasks have no blockers (can run in parallel) + - Independent units have no blockers (can run in parallel) 3. **Spawn Specialized Teammates** ``` @@ -740,7 +755,7 @@ When swarm mode is enabled, the workflow changes: team_name: "work-{timestamp}", name: "implementer", subagent_type: "general-purpose", - prompt: "Claim implementation tasks, execute, mark complete", + prompt: "Claim implementation units, execute, mark complete", run_in_background: true }) @@ -748,13 +763,13 @@ When swarm mode is enabled, the workflow changes: team_name: "work-{timestamp}", name: "tester", subagent_type: "general-purpose", - prompt: "Claim testing tasks, run tests, mark complete", + prompt: "Claim testing units, run tests, mark complete", run_in_background: true }) ``` 4. **Coordinate and Monitor** - - Team lead monitors task completion + - Team lead monitors unit completion - Spawn additional workers as phases unblock - Handle plan approval if required @@ -775,7 +790,7 @@ See the `orchestrating-swarms` skill for detailed swarm patterns and best practi ### WHY Grounds Everything -- Every subagent knows why its task exists, not just what to build +- Every subagent knows why its unit exists, not just what to build - The orchestrator is the guardian of WHY: it extracts, threads, and validates purpose - Purpose drift is caught by inline reviews and Phase 3 validation, not just at the end - If the combined work doesn't deliver the user story, passing tests don't matter @@ -784,7 +799,7 @@ See the `orchestrating-swarms` skill for detailed swarm patterns and best practi - The orchestrator decomposes, delegates, records, and routes. It does NOT implement code itself. - Each subagent gets only the context it needs. No conversation history pollution. -- Learnings compound: each task benefits from everything learned in previous tasks. +- Learnings compound: each unit benefits from everything learned in previous units. ### Start Fast, Execute Faster @@ -814,7 +829,7 @@ See the `orchestrating-swarms` skill for detailed swarm patterns and best practi ### Ship Complete Features -- Mark all tasks completed before moving on +- Mark all units completed before moving on - Don't leave features 80% done - A finished feature that ships beats a perfect feature that doesn't @@ -822,7 +837,7 @@ See the `orchestrating-swarms` skill for detailed swarm patterns and best practi - Escalation path (reframe, ask user, skip, stop) -- not infinite loops - Progress is persistent: STATE.md means you can resume after crashes -- Regression is caught early: previous tests re-run after each task +- Regression is caught early: previous tests re-run after each unit - When debugging unexpected errors, use the `systematic-debugging` skill for structured root-cause analysis instead of trial-and-error ## Quality Checklist @@ -830,12 +845,12 @@ See the `orchestrating-swarms` skill for detailed swarm patterns and best practi Before creating PR, verify: - [ ] All clarifying questions asked and answered -- [ ] All tasks in STATE.md marked completed (or explicitly skipped with user approval) +- [ ] All units in STATE.md marked completed (or explicitly skipped with user approval) - [ ] **User story deliverable** -- the combined work enables the stated user outcome - [ ] **Success criteria met** -- every plan-level success criterion addressed (or gap documented) - [ ] **Architecture intact** -- implementation matches the plan's architectural context - [ ] Tests pass (run project's test command) -- [ ] Regression tests from all completed tasks pass +- [ ] Regression tests from all completed units pass - [ ] Linting passes (use linting-agent) - [ ] Code follows existing patterns - [ ] Figma designs match implementation (if applicable) @@ -861,14 +876,14 @@ For most features: tests + linting + following patterns is sufficient. ## Common Pitfalls to Avoid - **Losing the WHY** - Subagents build what's specified but miss the intent. Always pass WHY context. -- **Purpose drift** - Tasks individually pass but combined output doesn't deliver the user story. Validate at Phase 3. +- **Purpose drift** - Units individually pass but combined output doesn't deliver the user story. Validate at Phase 3. - **Analysis paralysis** - Don't overthink, read the plan and execute - **Skipping clarifying questions** - Ask now, not after building wrong thing - **Ignoring plan references** - The plan has links for a reason - **Testing at the end** - Test continuously or suffer later - **Orchestrator doing implementation** - Delegate to subagents, don't implement inline -- **Skipping regression checks** - A passing task that breaks previous work is not progress -- **Losing session state** - Always write to STATE.md before and after each task +- **Skipping regression checks** - A passing unit that breaks previous work is not progress +- **Losing session state** - Always write to STATE.md before and after each unit - **Dumping all session files into subagent context** - Use the learnings brief, filtered by domain - **Over-reviewing simple changes** - Save reviewer agents for complex work - **80% done syndrome** - Finish the feature, don't move on early diff --git a/portable/compound-engineering/plugin.yaml b/portable/compound-engineering/plugin.yaml index cec0465..7c96302 100644 --- a/portable/compound-engineering/plugin.yaml +++ b/portable/compound-engineering/plugin.yaml @@ -1,5 +1,5 @@ name: compound-engineering -version: 4.4.0 +version: 4.5.0 description: lead: OpenCode-first AI-powered development tools. suffix: spanning code review, research, design, and workflow automation, with generated Copilot support and Claude Code compatibility outputs. diff --git a/portable/compound-engineering/skills/brainstorming/SKILL.md b/portable/compound-engineering/skills/brainstorming/SKILL.md index 1831171..6d655df 100644 --- a/portable/compound-engineering/skills/brainstorming/SKILL.md +++ b/portable/compound-engineering/skills/brainstorming/SKILL.md @@ -21,7 +21,7 @@ This skill provides detailed process knowledge for effective brainstorming sessi The brainstorm produces three lynchpin artifacts that anchor all downstream phases: 1. **Problem Narrative & User Story** -- the WHY (consumed by plan, work, and review) 2. **Architectural Context Map** -- the WHERE (consumed by execution agents and reviewers) -3. **Design Decisions** -- the WHAT (consumed by plan for task decomposition) +3. **Design Decisions** -- the WHAT (consumed by plan for execution-slice decomposition) ## When to Use This Skill @@ -316,18 +316,18 @@ This prevents wasted effort on misaligned designs. The brainstorm document is the **feature-level spec and handoff contract** for downstream work. The project constitution, when present, remains the repo-wide governing artifact: **`/workflows:plan` consumes:** -- Problem narrative and user story -> structures phases around the WHY -- Architectural context -> informs task decomposition, file mapping, dependencies +- Problem narrative and user story -> structures execution slices around the WHY +- Architectural context -> informs slice decomposition, file mapping, dependencies - Success criteria -> becomes the plan's acceptance criteria foundation - Key decisions -> preserved and enriched, not re-decided **`/deepen-plan` consumes:** -- Problem narrative -> evaluates whether deepened tasks still serve the original intent +- Problem narrative -> evaluates whether deepened slices still serve the original intent - Success criteria -> grounds best-practice research in actual goals **`/workflows:work` consumes:** - Architectural context -> populates `{{ARCHITECTURAL_CONTEXT}}` for every execution agent -- User story -> orchestrator validates each task contributes to the story +- User story -> orchestrator validates each slice contributes to the story - Problem narrative -> included in scoped prompts so agents understand purpose **`/workflows:review` consumes:** diff --git a/portable/compound-engineering/skills/orchestrating-swarms/SKILL.md b/portable/compound-engineering/skills/orchestrating-swarms/SKILL.md index 97b75c6..d69b37c 100644 --- a/portable/compound-engineering/skills/orchestrating-swarms/SKILL.md +++ b/portable/compound-engineering/skills/orchestrating-swarms/SKILL.md @@ -39,7 +39,7 @@ Use swarms when the work has real parallelism, specialist boundaries, or depende - Keep the team small. Extra workers are justified only when they remove wall-clock time or increase specialist quality. ### Task design -- Write tasks as outcomes, not vague topics. +- Write work items as outcomes, not vague topics. - Keep scopes non-overlapping unless the assignment is an explicit cross-check. - Prefer DAG-style dependencies over ad hoc sequencing. - State what evidence counts as done: files changed, tests run, findings delivered, screenshots captured, or open questions listed. @@ -53,7 +53,7 @@ Have workers report in a terse, machine-checkable shape: - `risks`: unresolved concerns ### Leader responsibilities -- Keep the canonical task list and dependency map. +- Keep the canonical slice/work-item list and dependency map. - Resolve blockers instead of letting workers stall silently. - Merge duplicate findings and remove contradictory advice. - Re-run shared verification after integrating worker output. diff --git a/portable/compound-engineering/skills/setup/SKILL.md b/portable/compound-engineering/skills/setup/SKILL.md index 70fff91..2e6d5fa 100644 --- a/portable/compound-engineering/skills/setup/SKILL.md +++ b/portable/compound-engineering/skills/setup/SKILL.md @@ -220,7 +220,7 @@ options: - `tdd.evidence.unit`: `required` or `optional` - `tdd.evidence.e2e`: `required` or `optional` - `tdd.exceptions`: `[]` by default. Plans must carry any justified exceptions. -- `review_mode`: "bulk" (default), "inline", or "both" (controls per-task review in workflows:work) +- `review_mode`: "bulk" (default), "inline", or "both" (controls per-slice review in workflows:work) Write `compound-engineering.local.md`: diff --git a/tests/architecture-workflow-contract.test.ts b/tests/architecture-workflow-contract.test.ts index 6a9973c..fe8c3f6 100644 --- a/tests/architecture-workflow-contract.test.ts +++ b/tests/architecture-workflow-contract.test.ts @@ -97,6 +97,14 @@ describe("architecture workflow contract", () => { "workflows", "review.md", ) + const executionPrompt = await readRepoFile( + "portable", + "compound-engineering", + "commands", + "workflows", + "references", + "execution-agent-prompt.md", + ) const rootReadme = await readRepoFile("README.md") const pluginChangelog = await readRepoFile("plugins", "compound-engineering", "CHANGELOG.md") @@ -104,6 +112,8 @@ describe("architecture workflow contract", () => { expect(deepenPrompt).toContain("Read `architecture_ref`") expect(workPrompt).toContain("### Architecture Handoff") expect(workPrompt).toContain("{{ARCHITECTURE_HANDOFF}}") + expect(executionPrompt).toContain("## Architecture Handoff") + expect(executionPrompt).toContain("{{ARCHITECTURE_HANDOFF}}") expect(reviewPrompt).toContain("Architecture Artifact") expect(reviewPrompt).toContain("Architecture Handoff") expect(reviewPrompt).toContain("docs/architecture/*.md") diff --git a/tests/tdd-contract.test.ts b/tests/tdd-contract.test.ts index a0859c6..905b5c8 100644 --- a/tests/tdd-contract.test.ts +++ b/tests/tdd-contract.test.ts @@ -38,8 +38,10 @@ describe("TDD contract surfaces", () => { expect(planPrompt).toContain("Plan-level `tdd` values override `compound-engineering.local.md`") expect(planPrompt).toContain("unit + e2e evidence") expect(planPrompt).toContain("replacement_evidence") - expect(planPrompt).toContain("Each subphase/task should be a self-contained execution unit") - expect(planPrompt).toContain("Treat every task as a mini-handoff packet") + expect(planPrompt).toContain("commands/workflows/references/execution-shape.md") + expect(planPrompt).toContain("execution_shape:") + expect(planPrompt).toContain("## Execution Shape") + expect(planPrompt).toContain("vertical-slices") }) test("deepen-plan preserves and validates the resolved TDD contract", async () => { diff --git a/tests/workflow-orchestration-reference.test.ts b/tests/workflow-orchestration-reference.test.ts index 016ce8b..250c7f4 100644 --- a/tests/workflow-orchestration-reference.test.ts +++ b/tests/workflow-orchestration-reference.test.ts @@ -26,6 +26,14 @@ describe("workflow orchestration references", () => { "references", "tdd-evidence-contract.md", ) + const executionShape = await readRepoFile( + "portable", + "compound-engineering", + "commands", + "workflows", + "references", + "execution-shape.md", + ) expect(orchestration).toContain("## Reference Template Loading") expect(orchestration).toContain("## Named Agent Dispatch") @@ -38,6 +46,11 @@ describe("workflow orchestration references", () => { expect(tdd).toContain("## Review Gate Classifications") expect(tdd).toContain("replacement_evidence") expect(tdd).toContain("Missing cleanup after refactor") + expect(executionShape).toContain("## Default") + expect(executionShape).toContain("vertical-slices") + expect(executionShape).toContain("infra-track") + expect(executionShape).toContain("fix-batch") + expect(executionShape).toContain("## Plan shape") }) test("plan, deepen, work, and review reference the shared orchestration rules instead of duplicating them", async () => { @@ -159,4 +172,37 @@ describe("workflow orchestration references", () => { expect(specPrompt).toContain("Missing behavior coverage") expect(qualityPrompt).toContain("Missing cleanup after refactor") }) + + test("plan, deepen, and work share the execution-shape contract", async () => { + const planPrompt = await readRepoFile( + "portable", + "compound-engineering", + "commands", + "workflows", + "plan.md", + ) + const deepenPrompt = await readRepoFile( + "portable", + "compound-engineering", + "commands", + "deepen-plan.md", + ) + const workPrompt = await readRepoFile( + "portable", + "compound-engineering", + "commands", + "workflows", + "work.md", + ) + + for (const prompt of [planPrompt, deepenPrompt, workPrompt]) { + expect(prompt).toContain("commands/workflows/references/execution-shape.md") + } + + expect(planPrompt).toContain("execution_shape:") + expect(planPrompt).toContain("## Execution Shape") + expect(deepenPrompt).toContain("Resolve execution shape first") + expect(workPrompt).toContain("execution_shape") + expect(workPrompt).toContain("execution units") + }) })