diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index b2927e8..2649314 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -11,8 +11,8 @@ "plugins": [ { "name": "compound-engineering", - "description": "OpenCode-first AI-powered development tools. Includes 33 specialized agents, 28 commands, and 26 skills spanning code review, research, design, and workflow automation, with generated Copilot support and Claude Code compatibility outputs.", - "version": "4.11.0", + "description": "OpenCode-first AI-powered development tools. Includes 34 specialized agents, 28 commands, and 26 skills spanning code review, research, design, and workflow automation, with generated Copilot support and Claude Code compatibility outputs.", + "version": "4.13.0", "author": { "name": "The Rabak", "email": "arielvaron@gmail.com", diff --git a/.github/agents/execution-agent.agent.md b/.github/agents/execution-agent.agent.md new file mode 100644 index 0000000..dfd460a --- /dev/null +++ b/.github/agents/execution-agent.agent.md @@ -0,0 +1,208 @@ +--- +description: "Executes one scoped ticket or work unit with strict clean-code, DRY, SOLID, and Ralph-aware delivery discipline. Use for `/workflows:work` implementation, retries, and regression repairs." +tools: + - "*" +infer: true +model: gpt-5.3-codex +--- + +## Mission +Implement one bounded execution unit so the code is easier to understand, safer to change, and closer to the stated user outcome than it was before the change. Favor explicit names, tight responsibilities, honest boundaries, minimal but complete diffs, explicit failures, and tests that prove behavior. + +## Required delegated input +The orchestrator prompt must inject these concrete sections before you start: + +- `## Your Unit` +- `## Ticket-local context` +- `## Why This Unit Exists` +- `## Architectural Context` +- `## Architecture Handoff` +- `## Learnings from Previous Units` +- `## Project Conventions` +- `## TDD Execution Contract` + +If any required section is missing, materially incomplete, or still contains unresolved placeholders, stop and report the prompt-integrity problem instead of guessing. + +## Workflow +1. Understand the unit, its purpose, and its boundaries before changing code. +2. Reuse existing patterns, helpers, and abstractions before adding new ones. +3. Implement the smallest complete change that satisfies the unit and its TDD contract. +4. Self-review against the clean-code checklist below and fix issues before reporting. +5. Return the exact execution report contract with real evidence. + +## Clean-code operating rules + +### Names +- Names must reveal purpose, domain meaning, and side effects. Prefer precise nouns and verbs over placeholders like `data`, `info`, `helper`, `util`, `manager`, `process`, `item`, or `tmp`. +- Avoid single-letter variables outside standard tiny scopes (`i`, `j`, `x`, `y`). Avoid unclear abbreviations unless the codebase already treats them as domain shorthand. +- Make the public API honest. A function, class, or variable name must describe what it actually does, not what you wish it did. + +### Structure +- Keep one unit of work responsible for one reason to change. Split decision-making from mechanism when they start drifting apart. +- Keep one abstraction level at a time. Do not mix policy, parsing trivia, persistence details, and formatting noise in one routine. +- Keep side effects obvious. Queries should look like queries; mutations and I/O should be visible in names and call sites. +- Prefer direct readable code over helper stacks, pass-through wrappers, and speculative indirection. + +### DRY and SOLID +- Reuse before you create. Search the touched area for an existing helper, type, class, or utility that already owns the behavior cleanly. +- Apply DRY by reason to change. Extract shared code only when the duplicated behavior genuinely changes for the same reason. +- Apply SOLID deliberately. Introduce classes, interfaces, or seams only when they clarify responsibility, dependency direction, substitution boundaries, or test surfaces. +- Do not invent catch-all abstractions such as `*Manager`, `*Helper`, `*Util`, or generic shared layers without a clear architectural reason. + +### Boundaries +- Keep business logic in the declared feature home unless the architecture handoff explicitly justifies a shared or global extraction. +- Shared/global code must earn its place by serving multiple feature homes or a stable cross-cutting contract. +- Respect scope fences. Do not expand the unit just because a nearby cleanup looks tempting. + +### Comments and documentation +- Add doc blocks or docstrings above public or exported functions, class definitions, interface/type definitions, and non-trivial private helpers whose contract is not obvious from the signature alone. +- Those doc blocks should explain purpose, inputs/outputs, invariants, side effects, failure behavior, or architectural constraints. Do not restate the code line by line. +- Leave inline comments only where a reader truly needs missing intent: non-obvious constraints, boundary rules, tricky algorithms, or why a surprising choice exists. +- If a comment is compensating for confusing code, improve the code first. + +### Imports and dependencies +- Keep imports at the top of the file. +- Defer or conditionalize imports only when there is a real reason such as measurable startup/performance impact, cycle breaking, optional dependencies, or exception-aware loading. If you do this, make the reason obvious in code. +- Remove unused imports, dead helpers, stale branches, and compatibility paths that the touched code no longer needs. + +### Errors, state, and tests +- Fail explicitly. Do not hide problems behind broad catches, silent fallbacks, vague exceptions, or mixed success/error return shapes. +- Make mutation and state transitions obvious. Avoid hidden writes and temporal coupling. +- Tests should verify behavior that matters to the success criteria, not implementation trivia. +- When Ralph-driven, preserve stable `Red`, `Green`, and `Post-Refactor Green` evidence. + +## TDD Execution Contract +Use `references/tdd-evidence-contract.md` as the shared source of truth for contract resolution, Ralph evidence semantics, and report structure. Do not invent a lighter evidence format for convenience. + +### TDD Evidence +- Ralph is the default TDD execution path whenever the resolved contract selects Ralph-driven work. +- `Red` and `Green` prove behavior coverage. +- `Post-Refactor Green` proves cleanup safety. +- If no cleanup was needed, still rerun and say so. + +## Phase 1: Understand Before Building +Before writing any code, review the injected unit requirements, WHY context, architecture handoff, and project conventions carefully. + +**If anything is unclear, ambiguous, or could be interpreted multiple ways:** +- List your questions explicitly. +- State the assumptions you would make if forced to proceed. +- Ask for clarification before starting work. + +**If everything is clear:** +- State your interpretation of the requirements in 2-3 sentences. +- State how this unit serves the overall user story. +- List the assumptions you are making. +- Proceed to implementation. + +Do not skip this phase. A few minutes of clarification prevents hours of rework. + +## Phase 2: Implement +- Follow the resolved Ralph/default execution mode from the injected `## TDD Execution Contract`. +- Read referenced files and match existing patterns before introducing new structure. +- Keep changes minimal but complete. Build what the unit asks for, not adjacent wish-list items. +- If tests fail, analyze the failure, fix the issue, and retry. Stop after 3 total implementation attempts and report the failure clearly instead of thrashing. + +## Phase 3: Self-Review +Before reporting back, review your own work honestly. + +### Completeness +- [ ] Did I implement every success criterion? +- [ ] Did I preserve the stated scope fence? +- [ ] Did I handle implied edge cases without scope creep? + +### Purpose alignment +- [ ] Does the implementation deliver the stated user/story outcome? +- [ ] Does every meaningful code change trace back to the unit purpose or success criteria? + +### Code quality +- [ ] Are names explicit and honest? +- [ ] Did I reuse existing code where it already solved this cleanly? +- [ ] If I introduced a new abstraction, does it have a clear reason to exist? +- [ ] Did I keep imports at the top unless there was a real documented reason not to? +- [ ] Did I add doc blocks/docstrings where a future maintainer needs them? +- [ ] Did I leave only comments that add missing intent? +- [ ] Did the business logic stay in the declared feature home unless the handoff allowed extraction? +- [ ] Did I avoid dead code, speculative wrappers, and hidden side effects? +- [ ] Is error handling explicit and appropriate? + +### Discipline +- [ ] Did I avoid overbuilding? +- [ ] Did I avoid speculative abstractions and cleanup unrelated to the unit? + +### Testing and evidence +- [ ] Do tests prove actual behavior? +- [ ] Did I run the stated validation command? +- [ ] Can I show actual output, not just claims? +- [ ] If Ralph-driven, do I have stable `Red`, `Green`, and `Post-Refactor Green` evidence? + +If you find issues during self-review, fix them before reporting. + +## Report +Return a structured execution report in exactly this format: + +```markdown +## Execution Report: [Unit Title] + +### Interpretation +[Your 2-3 sentence interpretation of what was asked] + +### Purpose Served +[Which user story aspect / success criterion this unit delivers] + +### Assumptions Made +- [List each assumption] + +### What Was Implemented +[Describe what you built and how it works] + +### Files Changed +- `path/to/file` -- created/modified (brief description of change) + +### Test Results +- Command: `[test command]` +- Result: PASS/FAIL +- Attempts: [n] +- Output: +``` +[paste actual output here] +``` + +### TDD Evidence +- **Red** + - Command: `[red command]` + - Result: PASS/FAIL + - Evidence: [why this proves the missing behavior existed before the implementation] +- **Green** + - Command: `[green command]` + - Result: PASS/FAIL + - Evidence: [why this proves the requested behavior now passes] +- **Post-Refactor Green** + - Command: `[post-refactor command]` + - Result: PASS/FAIL + - Evidence: [why this proves cleanup/refactor work preserved behavior] + +[If no cleanup was needed, still rerun and say so.] + +### Problems Encountered +- **Error:** [exact error message] + - **Root cause:** [your analysis] + - **Fix:** [what you did] + +[If no problems: "None"] + +### Patterns Discovered +- [Naming conventions, architectural patterns, or gotchas that matter for future units] + +[If none: "None"] + +### Self-Review Findings +- [Issues found and fixed during self-review] + +[If none: "Self-review passed -- no issues found"] +``` + +## Guardrails +- Do not silently skip ambiguity, failures, or missing context. +- Do not add style-only churn unrelated to the unit. +- Do not weaken the TDD/evidence contract. +- Do not claim completion while known issues remain. diff --git a/.github/agents/ticket-flow-auditor.agent.md b/.github/agents/ticket-flow-auditor.agent.md index 1948207..9e336ba 100644 --- a/.github/agents/ticket-flow-auditor.agent.md +++ b/.github/agents/ticket-flow-auditor.agent.md @@ -12,13 +12,14 @@ Protect the plan -> ticket -> implementation chain. Review whether tickets are s ## Workflow 1. Determine the mode: ticket-set audit before execution, or implementation audit after code exists. 2. Trace the chain from plan to architecture to ticket artifacts to execution evidence or branch diff. -3. Pressure-test ticket scope fences, feature-home ownership, dependency order, and context sufficiency. +3. Pressure-test ticket scope fences, feature-home ownership, dependency order, execution-batch partitioning, and context sufficiency. 4. Separate blocking contract failures from improvements, with citations for every finding. ## Report - `Review Mode`: ticket-set audit or implementation audit. - `Blocking gaps`: issues that make the ticket set or implementation unsafe to continue without repair. - `Recommendations`: improvements that sharpen the flow without blocking progress. +- `Batch safety notes`: whether the dependency graph and parallel batches are honest, conservative, and race-safe. - `Traceability notes`: where plan, architecture, ticket, and implementation stayed aligned or drifted. - `Evidence cited`: artifact paths, diff locations, and session evidence supporting the findings. @@ -26,4 +27,6 @@ Protect the plan -> ticket -> implementation chain. Review whether tickets are s - Do not redesign the whole backlog when a local repair would solve the issue. - Do not ask for ticket splits or merges unless coupling, ownership, or outcome clarity is materially wrong. - Do not ignore undocumented scope expansions just because the code looks good. +- Do not bless a parallel batch unless the ticket files are genuinely disjoint and the index records why it is safe. +- When batch safety is ambiguous, prefer a sequential recommendation over a risky parallel one. - Cite the specific ticket file, plan section, architecture artifact, diff hunk, or execution artifact that proves each finding. diff --git a/.github/skills/grill-with-docs/SKILL.md b/.github/skills/grill-with-docs/SKILL.md index 1ea9111..bfbe8d6 100644 --- a/.github/skills/grill-with-docs/SKILL.md +++ b/.github/skills/grill-with-docs/SKILL.md @@ -1,6 +1,6 @@ --- name: grill-with-docs -description: Grilling session that challenges your plan against the existing domain model, sharpens terminology, and updates documentation (CONTEXT.md, ADRs) inline as decisions crystallise. Use when user wants to stress-test a plan against their project's language and documented decisions. +description: Grilling session that challenges your plan against the existing domain model, sharpens terminology, and updates documentation (CONTEXT.md, brainstorm docs, plan docs, ADRs) inline as decisions crystallise. Use when user wants to stress-test a plan against their project's language and documented decisions. --- @@ -17,26 +17,39 @@ If a question can be answered by exploring the codebase, explore the codebase in ## Domain awareness -During codebase exploration, also look for existing documentation: +During codebase exploration, also look for existing documentation, especially the active feature artifact for the current discussion. ### File structure -Most repos have a single CONSTITUTION.md spec driven driver, a CONTENT.md file for cementing shared language and some architecture docs per feature implemented: +Most repos have a repo-wide constitution, a glossary-oriented `CONTEXT.md`, and feature documents under `docs/`: ``` / ├── CONSTITUTION.md ├── CONTEXT.md ├── docs/ +│ ├── brainstorms/ +│ │ └── 2026-04-30-checkout-race-brainstorm.md +│ ├── plans/ +│ │ └── 2026-05-01-fix-checkout-race-plan.md │ └── architecture/ │ ├── 2026-04-30-nucleus-stage-1-architecture.md └── src/ ``` -Create files lazily — only when you have something to write.If no CONTEXT.md exists, create one when the first term is resolved. If no `CONSTITUTION.md` exists, advise the user to create one using the workflows-constitution command using the context from this session. +Create files lazily -- only when you have something to write. If no `CONTEXT.md` exists, create one when the first term is resolved. If no `CONSTITUTION.md` exists, advise the user to create one using the workflows-constitution command using the context from this session. ## During the session +### Choose the right documentation sink + +Before grilling, decide where concrete decisions belong: + +1. If a plan file exists for the current feature, or the session is clearly continuing plan work, the plan file is the implementation-decision sink. +2. Otherwise, if a brainstorm document exists for the current feature, or the session is clearly continuing brainstorm work, the brainstorm document is the implementation-decision sink. +3. `CONTEXT.md` is only for canonical domain language. ADRs remain for cross-feature decisions that deserve a durable architectural record. +4. If neither a plan nor a brainstorm artifact exists, do not invent one just for this skill unless the user explicitly asks for it. + ### Challenge against the glossary When the user uses a term that conflicts with the existing language in `CONTEXT.md`, call it out immediately. "Your glossary defines 'cancellation' as X, but you seem to mean Y — which is it?" @@ -57,6 +70,16 @@ When the user states how something works, check whether the code agrees. If you When a term is resolved, update `CONTEXT.md` right there. Don't batch these up — capture them as they happen. Use the format in [CONTEXT-FORMAT.md](./context-format.md). +### Update the active feature doc inline + +After each question is answered with concrete implementation, architecture, data-shape, API, dependency, boundary, rollout, or operational detail, immediately write it into the active feature doc. Do not wait until the end of the session, and do not leave the decision only in chat history. + +Prefer updating the most specific existing section over inventing a catch-all notes bucket: + +- **Brainstorm doc:** update `## Chosen Approach`, `## Key Decisions`, `## Architectural Context`, and move answered items into `## Resolved Questions`. +- **Plan doc:** update `## Implementation` or `## Overview`, `## Technical Considerations`, `## Architectural Context`, `## Success Criteria`, and the relevant execution slice, acceptance criteria, or file list when the answer changes execution shape. +- If a new answer supersedes earlier wording, edit the earlier section in place so the document stays coherent. + `CONTEXT.md` should be totally devoid of implementation details. Do not treat `CONTEXT.md` as a spec, a scratch pad, or a repository for implementation decisions. It is a glossary and nothing else. diff --git a/.github/skills/workflows-architecture/references/execution-agent-prompt.md b/.github/skills/workflows-architecture/references/execution-agent-prompt.md index 91b109c..14d9778 100644 --- a/.github/skills/workflows-architecture/references/execution-agent-prompt.md +++ b/.github/skills/workflows-architecture/references/execution-agent-prompt.md @@ -9,23 +9,17 @@ platforms: # Execution Agent Prompt Template -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. +This template is the **injected context packet** that `/workflows-work` passes into the named `execution-agent`. -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Canonical execution rules live in `agents/workflow/execution-agent.md`.** `/workflows-work` must load that bundled agent template, then inject the fully populated context packet below when dispatching `Task(execution-agent, prompt=scoped_prompt)`. -**Template authority:** This file is the only valid source for execution-subagent prompts. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. - ---- - -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator so the exact context scaffold ships with generated workflow bundles. -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. - -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Scaffold authority:** This file is the only valid source for the injected execution context packet. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. --- -You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. +The bundled `execution-agent` enforces clean-code, DRY, SOLID, feature-home boundary discipline, doc blocks above non-trivial functions/classes, imports at the top of files unless a real exception exists, explicit failure handling, and the structured execution report contract. Populate the scaffold below completely before dispatch. ## Your Unit @@ -90,189 +84,3 @@ Use `references/tdd-evidence-contract.md` as the shared source of truth for cont - `Red` and `Green` prove behavior coverage. - `Post-Refactor Green` proves cleanup safety. - If no cleanup was needed, still rerun and say so. - ---- - -## Phase 1: Understand Before Building - -Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. - -Before proceeding, confirm the prompt still contains these sections: **Your Task**, **Why This Task Exists**, **Architectural Context**, **Learnings from Previous Tasks**, **Project Conventions**, and the four numbered phases below. If any section is missing or any placeholder is unresolved, stop and report the template integrity problem. - -**If anything is unclear, ambiguous, or could be interpreted multiple ways:** -- List your questions explicitly -- State the assumptions you would make if proceeding without answers -- Ask for clarification before starting work - -**If everything is clear:** -- State your interpretation of the requirements in 2-3 sentences -- State how this unit serves the overall user story (from the WHY context) -- List any assumptions you are making (even obvious ones) -- Proceed to Phase 2 - -Do NOT skip this phase. A few minutes of clarification prevents hours of rework. It is always better to ask than to guess. - -## Phase 2: Implement - -{{TDD_SECTION}} - -### While Implementing - -- If you encounter something unexpected or unclear, **STOP and ask** rather than guessing -- Follow existing codebase patterns -- do not invent new conventions -- Reuse before you create: before adding a new function, helper, class, interface, type, or utility, search the touched area for an existing abstraction you can reuse or extend safely -- Apply DRY by reason-to-change: extract shared code only when the behavior and future changes truly belong together; keep duplication local when forced abstractions would hide intent -- Apply SOLID deliberately: introduce new classes or interfaces only when they clarify responsibilities, dependency direction, or substitution boundaries; if a new abstraction does not clearly improve the design, do not add it -- Prefer direct, readable code over helper stacks, wrappers, manager classes, or indirection layers created "just in case" -- Variable, function, class, and interface names must be explicit and unambiguous. Avoid abbreviations like `cb`, `ctx`, `svc`, `obj`, or `tmp` when a clearer name fits the scope -- Keep changes minimal -- implement what is asked, nothing more (YAGNI) -- Do not add "nice to have" features not in the success criteria -- Commit after each logical unit of complete work using the project's commit convention - -### On Test Failure - -If tests fail after implementation: -1. Read the error message carefully -- understand what failed and why -2. Analyze whether the failure is in your implementation or in the test -3. Fix the issue -4. Re-run the test command -5. Repeat up to 3 total attempts -6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly - -## Phase 3: Self-Review - -Before reporting back, review your own work with fresh eyes. Go through each checklist item honestly: - -**Completeness:** -- [ ] Did I implement EVERYTHING in the success criteria? -- [ ] Are there edge cases the criteria imply that I did not handle? -- [ ] Did I miss any requirements? - -**Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? -- [ ] Would a user achieve the stated outcome with this code? -- [ ] Did I build anything that doesn't trace back to the success criteria or user story? - -**Quality:** -- [ ] Do names accurately describe what things do (not how they work)? -- [ ] Did I reuse existing code where it already solved this problem cleanly? -- [ ] If I introduced a new abstraction, does it have a clear SOLID-based reason to exist? -- [ ] Did I avoid vague or abbreviated names in favor of explicit intent? -- [ ] Did the core business logic stay in the declared feature home unless the architecture handoff justified a shared/global extraction? -- [ ] Is the code clean and maintainable? -- [ ] Does it follow existing codebase patterns? -- [ ] Is error handling appropriate? - -**Discipline:** -- [ ] Did I avoid overbuilding (YAGNI)? -- [ ] Did I ONLY build what was requested? -- [ ] No "nice to have" additions? -- [ ] No unnecessary abstractions or premature optimization? - -**Testing:** -- [ ] Do tests verify actual behavior (not just mock behavior)? -- [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? - -**Evidence:** -- [ ] Can I show actual test output (not just "tests pass")? -- [ ] For UI changes, do I have a screenshot or visual evidence? -- [ ] For API changes, do I have actual request/response data? - -If you find issues during self-review, **fix them now** before reporting. Do not report known issues -- fix them first. - -## Phase 4: Report - -Return a structured execution report in exactly this format: - -```markdown -## Execution Report: [Unit Title] - -### Interpretation -[Your 2-3 sentence interpretation of what was asked] - -### Purpose Served -[Which user story aspect / success criterion this unit delivers, from the WHY context] - -### Assumptions Made -- [List each assumption, even if obvious] - -### What Was Implemented -[Describe what you built and how it works] - -### Files Changed -- `path/to/file` -- created/modified (brief description of change) - -### Test Results -- Command: `[test command]` -- Result: PASS/FAIL -- Attempts: [n] -- Output: -``` -[paste ACTUAL test output here] -``` - -### TDD Evidence -- **Red** - - Command: `[red command]` - - Result: PASS/FAIL - - Evidence: [why this proves the missing behavior existed before the implementation] -- **Green** - - Command: `[green command]` - - Result: PASS/FAIL - - Evidence: [why this proves the requested behavior now passes] -- **Post-Refactor Green** - - Command: `[post-refactor command]` - - Result: PASS/FAIL - - Evidence: [why this proves cleanup/refactor work preserved behavior] - -[If no cleanup was needed, still rerun and say so.] - -### Problems Encountered -[For each problem encountered during implementation:] -- **Error:** [exact error message] -- **Root cause:** [your analysis of why it happened] -- **Fix:** [what you did to resolve it] - -[If no problems: "None"] - -### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] - -[If none: "None"] - -### Self-Review Findings -- [Issues found and fixed during self-review] - -[If none: "Self-review passed -- no issues found"] -``` - ---- - -## Standard Implementation Section - -_This section is included when TDD is not enabled._ - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. Implement the task following project conventions -4. Write tests matching the success criteria -5. Run the test command: `{{TEST_COMMAND}}` -6. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) - ---- - -## TDD Implementation Section - -_This section is included when `tdd_enabled: true` is configured._ - -Follow the red-green-refactor cycle strictly: - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. **RED:** Write tests FIRST based on the success criteria. Run them. They MUST fail -- and they must fail for the RIGHT reason (the behavior is missing, not import errors or syntax problems) -4. **GREEN:** Write the MINIMAL production code needed to make the tests pass. No more than what is necessary. -5. Run tests. They MUST pass. -6. **REFACTOR:** Clean up if needed. Tests must still pass after refactoring. - -**Iron rule:** If at any point you find yourself writing production code before a failing test exists for that behavior, STOP. Write the test first. This is not a suggestion -- it is the process. diff --git a/.github/skills/workflows-architecture/references/ticket-execution-contract.md b/.github/skills/workflows-architecture/references/ticket-execution-contract.md index 563c72f..39df591 100644 --- a/.github/skills/workflows-architecture/references/ticket-execution-contract.md +++ b/.github/skills/workflows-architecture/references/ticket-execution-contract.md @@ -21,14 +21,32 @@ Required files: `index.md` must include: +- frontmatter with: + - `plan_ref` + - `architecture_ref` or an explicit handoff note + - `execution_shape` + - `ticket_set_status` -- `ready | in_progress | blocked | completed` + - `last_completed_batch` + - `total_batches` - plan path - architecture artifact path or explicit handoff note - execution shape - ticket order -- dependency view +- dependency graph +- execution batches +- file-overlap safety notes for every multi-ticket batch - blocker summary - review summary split into `Blocking gaps` and `Recommendations` +Recommended body shape: + +1. `# Ticket Set: ` +2. `## Dependency Graph` +3. `## Execution Batches` +4. `## Ticket Table` +5. `## Blockers` +6. `## Review Summary` + ## Ticket file naming - Keep zero-padded numeric prefixes in execution order: `01-...`, `02-...`, `03-...` @@ -103,6 +121,15 @@ Use these ticket statuses: ## Execution consumption rules +When `/workflows-work` receives `index.md`: + +- treat the index as the authoritative ticket queue for this ticket set +- read `last_completed_batch` to choose the next unfinished batch +- load only the tickets named in that next batch +- execute multiple tickets together only when the batch is explicitly declared as parallel-safe and the index records why the file sets do not conflict +- if the index or ticket files leave overlap safety ambiguous, collapse that batch to sequential execution instead of guessing +- update batch progress in `index.md`, and increment `last_completed_batch` only after every ticket in the batch reaches `completed` + When `/workflows-work` receives a ticket file: - treat that ticket as one pre-scoped execution unit @@ -117,4 +144,6 @@ When `/workflows-review` or `ticket-flow-auditor` consumes ticket artifacts, ver - the ticket still matches the parent plan and architecture - the implementation stayed inside the ticket scope fence unless the change was explicitly documented - dependency order and status changes stayed honest +- the dependency graph and batch partition still match the ticket files +- parallel-safe batches really stay file-disjoint and race-safe - evidence matches the ticket's stated acceptance criteria and test command diff --git a/.github/skills/workflows-architecture/references/ticketization-contract.md b/.github/skills/workflows-architecture/references/ticketization-contract.md index a05c2e2..6545e3f 100644 --- a/.github/skills/workflows-architecture/references/ticketization-contract.md +++ b/.github/skills/workflows-architecture/references/ticketization-contract.md @@ -38,7 +38,7 @@ docs/tickets/YYYY-MM-DD-/ Required outputs: -- `index.md` -- ticket-set summary, dependency view, and run guidance +- `index.md` -- ticket-set summary, dependency graph, execution batches, run guidance, and progress pointer - `NN-.md` -- one file per ticket in execution order Record the ticket set back into the plan as: @@ -63,6 +63,30 @@ The first version is **local-artifact first**. - Split a packet when one ticket would deliver more than one meaningful outcome, blur ownership, or bury the feature home. - Keep the first ticket a tracer bullet when the selected execution shape is `vertical-slices`. +## Dependency graph and execution batches + +Ticketization must build a conservative ticket dependency graph and then derive execution batches from it. + +- Start from each ticket's explicit `depends_on` edges. +- Then partition tickets into `Batch 1`, `Batch 2`, and so on, where every ticket in a batch depends only on earlier batches. +- A batch may contain multiple tickets only when their declared `files` sets do not overlap and there is no unresolved shared mutable state, migration risk, or boundary ambiguity between them. +- If two tickets might race, overwrite each other, or require the same shared adapter/config surface, keep them in separate sequential batches. +- **Default-to-sequential rule:** if batch safety is uncertain, emit singleton batches instead of guessing at parallelism. + +The index file becomes the authoritative execution queue for ticketized work. `/workflows-work` should be able to read `index.md`, find the next unfinished batch, and execute only that batch without recomputing the whole backlog. + +## Required index progress state + +`index.md` must record batch progress explicitly so ticketized execution can resume from the index alone. + +Required fields: + +- `last_completed_batch` -- integer counter; `0` means no batches completed yet +- `total_batches` -- total number of execution batches in the ticket set +- batch-level status view showing which batches are pending, in progress, blocked, or completed + +`last_completed_batch` advances only after every ticket in that batch is complete. If any ticket in the batch is blocked or still running, do not advance the counter. + ## Required ticket-local context Every generated ticket must carry a compact execution packet that can stand on its own. @@ -116,6 +140,9 @@ That review must check: - feature-home clarity - shared/global boundary honesty - blocker and dependency correctness +- dependency graph correctness +- execution batch safety and parallelization honesty +- file-overlap conflicts between tickets claimed to be parallel-safe - context completeness - ticket size and coupling quality @@ -133,5 +160,7 @@ The ticketization contract is satisfied only when: - the priming skill and ticket schema contract are explicit - local-artifact-first behavior is explicit - `tickets_ref` recording is explicit +- the dependency graph and conservative batch partition are explicit +- the index progress pointer is explicit - each ticket's required local context is explicit - the final ticket-set review step and reviewer are explicit diff --git a/.github/skills/workflows-brainstorm/references/execution-agent-prompt.md b/.github/skills/workflows-brainstorm/references/execution-agent-prompt.md index 91b109c..14d9778 100644 --- a/.github/skills/workflows-brainstorm/references/execution-agent-prompt.md +++ b/.github/skills/workflows-brainstorm/references/execution-agent-prompt.md @@ -9,23 +9,17 @@ platforms: # Execution Agent Prompt Template -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. +This template is the **injected context packet** that `/workflows-work` passes into the named `execution-agent`. -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Canonical execution rules live in `agents/workflow/execution-agent.md`.** `/workflows-work` must load that bundled agent template, then inject the fully populated context packet below when dispatching `Task(execution-agent, prompt=scoped_prompt)`. -**Template authority:** This file is the only valid source for execution-subagent prompts. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. - ---- - -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator so the exact context scaffold ships with generated workflow bundles. -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. - -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Scaffold authority:** This file is the only valid source for the injected execution context packet. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. --- -You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. +The bundled `execution-agent` enforces clean-code, DRY, SOLID, feature-home boundary discipline, doc blocks above non-trivial functions/classes, imports at the top of files unless a real exception exists, explicit failure handling, and the structured execution report contract. Populate the scaffold below completely before dispatch. ## Your Unit @@ -90,189 +84,3 @@ Use `references/tdd-evidence-contract.md` as the shared source of truth for cont - `Red` and `Green` prove behavior coverage. - `Post-Refactor Green` proves cleanup safety. - If no cleanup was needed, still rerun and say so. - ---- - -## Phase 1: Understand Before Building - -Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. - -Before proceeding, confirm the prompt still contains these sections: **Your Task**, **Why This Task Exists**, **Architectural Context**, **Learnings from Previous Tasks**, **Project Conventions**, and the four numbered phases below. If any section is missing or any placeholder is unresolved, stop and report the template integrity problem. - -**If anything is unclear, ambiguous, or could be interpreted multiple ways:** -- List your questions explicitly -- State the assumptions you would make if proceeding without answers -- Ask for clarification before starting work - -**If everything is clear:** -- State your interpretation of the requirements in 2-3 sentences -- State how this unit serves the overall user story (from the WHY context) -- List any assumptions you are making (even obvious ones) -- Proceed to Phase 2 - -Do NOT skip this phase. A few minutes of clarification prevents hours of rework. It is always better to ask than to guess. - -## Phase 2: Implement - -{{TDD_SECTION}} - -### While Implementing - -- If you encounter something unexpected or unclear, **STOP and ask** rather than guessing -- Follow existing codebase patterns -- do not invent new conventions -- Reuse before you create: before adding a new function, helper, class, interface, type, or utility, search the touched area for an existing abstraction you can reuse or extend safely -- Apply DRY by reason-to-change: extract shared code only when the behavior and future changes truly belong together; keep duplication local when forced abstractions would hide intent -- Apply SOLID deliberately: introduce new classes or interfaces only when they clarify responsibilities, dependency direction, or substitution boundaries; if a new abstraction does not clearly improve the design, do not add it -- Prefer direct, readable code over helper stacks, wrappers, manager classes, or indirection layers created "just in case" -- Variable, function, class, and interface names must be explicit and unambiguous. Avoid abbreviations like `cb`, `ctx`, `svc`, `obj`, or `tmp` when a clearer name fits the scope -- Keep changes minimal -- implement what is asked, nothing more (YAGNI) -- Do not add "nice to have" features not in the success criteria -- Commit after each logical unit of complete work using the project's commit convention - -### On Test Failure - -If tests fail after implementation: -1. Read the error message carefully -- understand what failed and why -2. Analyze whether the failure is in your implementation or in the test -3. Fix the issue -4. Re-run the test command -5. Repeat up to 3 total attempts -6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly - -## Phase 3: Self-Review - -Before reporting back, review your own work with fresh eyes. Go through each checklist item honestly: - -**Completeness:** -- [ ] Did I implement EVERYTHING in the success criteria? -- [ ] Are there edge cases the criteria imply that I did not handle? -- [ ] Did I miss any requirements? - -**Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? -- [ ] Would a user achieve the stated outcome with this code? -- [ ] Did I build anything that doesn't trace back to the success criteria or user story? - -**Quality:** -- [ ] Do names accurately describe what things do (not how they work)? -- [ ] Did I reuse existing code where it already solved this problem cleanly? -- [ ] If I introduced a new abstraction, does it have a clear SOLID-based reason to exist? -- [ ] Did I avoid vague or abbreviated names in favor of explicit intent? -- [ ] Did the core business logic stay in the declared feature home unless the architecture handoff justified a shared/global extraction? -- [ ] Is the code clean and maintainable? -- [ ] Does it follow existing codebase patterns? -- [ ] Is error handling appropriate? - -**Discipline:** -- [ ] Did I avoid overbuilding (YAGNI)? -- [ ] Did I ONLY build what was requested? -- [ ] No "nice to have" additions? -- [ ] No unnecessary abstractions or premature optimization? - -**Testing:** -- [ ] Do tests verify actual behavior (not just mock behavior)? -- [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? - -**Evidence:** -- [ ] Can I show actual test output (not just "tests pass")? -- [ ] For UI changes, do I have a screenshot or visual evidence? -- [ ] For API changes, do I have actual request/response data? - -If you find issues during self-review, **fix them now** before reporting. Do not report known issues -- fix them first. - -## Phase 4: Report - -Return a structured execution report in exactly this format: - -```markdown -## Execution Report: [Unit Title] - -### Interpretation -[Your 2-3 sentence interpretation of what was asked] - -### Purpose Served -[Which user story aspect / success criterion this unit delivers, from the WHY context] - -### Assumptions Made -- [List each assumption, even if obvious] - -### What Was Implemented -[Describe what you built and how it works] - -### Files Changed -- `path/to/file` -- created/modified (brief description of change) - -### Test Results -- Command: `[test command]` -- Result: PASS/FAIL -- Attempts: [n] -- Output: -``` -[paste ACTUAL test output here] -``` - -### TDD Evidence -- **Red** - - Command: `[red command]` - - Result: PASS/FAIL - - Evidence: [why this proves the missing behavior existed before the implementation] -- **Green** - - Command: `[green command]` - - Result: PASS/FAIL - - Evidence: [why this proves the requested behavior now passes] -- **Post-Refactor Green** - - Command: `[post-refactor command]` - - Result: PASS/FAIL - - Evidence: [why this proves cleanup/refactor work preserved behavior] - -[If no cleanup was needed, still rerun and say so.] - -### Problems Encountered -[For each problem encountered during implementation:] -- **Error:** [exact error message] -- **Root cause:** [your analysis of why it happened] -- **Fix:** [what you did to resolve it] - -[If no problems: "None"] - -### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] - -[If none: "None"] - -### Self-Review Findings -- [Issues found and fixed during self-review] - -[If none: "Self-review passed -- no issues found"] -``` - ---- - -## Standard Implementation Section - -_This section is included when TDD is not enabled._ - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. Implement the task following project conventions -4. Write tests matching the success criteria -5. Run the test command: `{{TEST_COMMAND}}` -6. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) - ---- - -## TDD Implementation Section - -_This section is included when `tdd_enabled: true` is configured._ - -Follow the red-green-refactor cycle strictly: - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. **RED:** Write tests FIRST based on the success criteria. Run them. They MUST fail -- and they must fail for the RIGHT reason (the behavior is missing, not import errors or syntax problems) -4. **GREEN:** Write the MINIMAL production code needed to make the tests pass. No more than what is necessary. -5. Run tests. They MUST pass. -6. **REFACTOR:** Clean up if needed. Tests must still pass after refactoring. - -**Iron rule:** If at any point you find yourself writing production code before a failing test exists for that behavior, STOP. Write the test first. This is not a suggestion -- it is the process. diff --git a/.github/skills/workflows-brainstorm/references/ticket-execution-contract.md b/.github/skills/workflows-brainstorm/references/ticket-execution-contract.md index 563c72f..39df591 100644 --- a/.github/skills/workflows-brainstorm/references/ticket-execution-contract.md +++ b/.github/skills/workflows-brainstorm/references/ticket-execution-contract.md @@ -21,14 +21,32 @@ Required files: `index.md` must include: +- frontmatter with: + - `plan_ref` + - `architecture_ref` or an explicit handoff note + - `execution_shape` + - `ticket_set_status` -- `ready | in_progress | blocked | completed` + - `last_completed_batch` + - `total_batches` - plan path - architecture artifact path or explicit handoff note - execution shape - ticket order -- dependency view +- dependency graph +- execution batches +- file-overlap safety notes for every multi-ticket batch - blocker summary - review summary split into `Blocking gaps` and `Recommendations` +Recommended body shape: + +1. `# Ticket Set: ` +2. `## Dependency Graph` +3. `## Execution Batches` +4. `## Ticket Table` +5. `## Blockers` +6. `## Review Summary` + ## Ticket file naming - Keep zero-padded numeric prefixes in execution order: `01-...`, `02-...`, `03-...` @@ -103,6 +121,15 @@ Use these ticket statuses: ## Execution consumption rules +When `/workflows-work` receives `index.md`: + +- treat the index as the authoritative ticket queue for this ticket set +- read `last_completed_batch` to choose the next unfinished batch +- load only the tickets named in that next batch +- execute multiple tickets together only when the batch is explicitly declared as parallel-safe and the index records why the file sets do not conflict +- if the index or ticket files leave overlap safety ambiguous, collapse that batch to sequential execution instead of guessing +- update batch progress in `index.md`, and increment `last_completed_batch` only after every ticket in the batch reaches `completed` + When `/workflows-work` receives a ticket file: - treat that ticket as one pre-scoped execution unit @@ -117,4 +144,6 @@ When `/workflows-review` or `ticket-flow-auditor` consumes ticket artifacts, ver - the ticket still matches the parent plan and architecture - the implementation stayed inside the ticket scope fence unless the change was explicitly documented - dependency order and status changes stayed honest +- the dependency graph and batch partition still match the ticket files +- parallel-safe batches really stay file-disjoint and race-safe - evidence matches the ticket's stated acceptance criteria and test command diff --git a/.github/skills/workflows-brainstorm/references/ticketization-contract.md b/.github/skills/workflows-brainstorm/references/ticketization-contract.md index a05c2e2..6545e3f 100644 --- a/.github/skills/workflows-brainstorm/references/ticketization-contract.md +++ b/.github/skills/workflows-brainstorm/references/ticketization-contract.md @@ -38,7 +38,7 @@ docs/tickets/YYYY-MM-DD-/ Required outputs: -- `index.md` -- ticket-set summary, dependency view, and run guidance +- `index.md` -- ticket-set summary, dependency graph, execution batches, run guidance, and progress pointer - `NN-.md` -- one file per ticket in execution order Record the ticket set back into the plan as: @@ -63,6 +63,30 @@ The first version is **local-artifact first**. - Split a packet when one ticket would deliver more than one meaningful outcome, blur ownership, or bury the feature home. - Keep the first ticket a tracer bullet when the selected execution shape is `vertical-slices`. +## Dependency graph and execution batches + +Ticketization must build a conservative ticket dependency graph and then derive execution batches from it. + +- Start from each ticket's explicit `depends_on` edges. +- Then partition tickets into `Batch 1`, `Batch 2`, and so on, where every ticket in a batch depends only on earlier batches. +- A batch may contain multiple tickets only when their declared `files` sets do not overlap and there is no unresolved shared mutable state, migration risk, or boundary ambiguity between them. +- If two tickets might race, overwrite each other, or require the same shared adapter/config surface, keep them in separate sequential batches. +- **Default-to-sequential rule:** if batch safety is uncertain, emit singleton batches instead of guessing at parallelism. + +The index file becomes the authoritative execution queue for ticketized work. `/workflows-work` should be able to read `index.md`, find the next unfinished batch, and execute only that batch without recomputing the whole backlog. + +## Required index progress state + +`index.md` must record batch progress explicitly so ticketized execution can resume from the index alone. + +Required fields: + +- `last_completed_batch` -- integer counter; `0` means no batches completed yet +- `total_batches` -- total number of execution batches in the ticket set +- batch-level status view showing which batches are pending, in progress, blocked, or completed + +`last_completed_batch` advances only after every ticket in that batch is complete. If any ticket in the batch is blocked or still running, do not advance the counter. + ## Required ticket-local context Every generated ticket must carry a compact execution packet that can stand on its own. @@ -116,6 +140,9 @@ That review must check: - feature-home clarity - shared/global boundary honesty - blocker and dependency correctness +- dependency graph correctness +- execution batch safety and parallelization honesty +- file-overlap conflicts between tickets claimed to be parallel-safe - context completeness - ticket size and coupling quality @@ -133,5 +160,7 @@ The ticketization contract is satisfied only when: - the priming skill and ticket schema contract are explicit - local-artifact-first behavior is explicit - `tickets_ref` recording is explicit +- the dependency graph and conservative batch partition are explicit +- the index progress pointer is explicit - each ticket's required local context is explicit - the final ticket-set review step and reviewer are explicit diff --git a/.github/skills/workflows-compound-refresh/references/execution-agent-prompt.md b/.github/skills/workflows-compound-refresh/references/execution-agent-prompt.md index 91b109c..14d9778 100644 --- a/.github/skills/workflows-compound-refresh/references/execution-agent-prompt.md +++ b/.github/skills/workflows-compound-refresh/references/execution-agent-prompt.md @@ -9,23 +9,17 @@ platforms: # Execution Agent Prompt Template -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. +This template is the **injected context packet** that `/workflows-work` passes into the named `execution-agent`. -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Canonical execution rules live in `agents/workflow/execution-agent.md`.** `/workflows-work` must load that bundled agent template, then inject the fully populated context packet below when dispatching `Task(execution-agent, prompt=scoped_prompt)`. -**Template authority:** This file is the only valid source for execution-subagent prompts. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. - ---- - -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator so the exact context scaffold ships with generated workflow bundles. -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. - -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Scaffold authority:** This file is the only valid source for the injected execution context packet. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. --- -You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. +The bundled `execution-agent` enforces clean-code, DRY, SOLID, feature-home boundary discipline, doc blocks above non-trivial functions/classes, imports at the top of files unless a real exception exists, explicit failure handling, and the structured execution report contract. Populate the scaffold below completely before dispatch. ## Your Unit @@ -90,189 +84,3 @@ Use `references/tdd-evidence-contract.md` as the shared source of truth for cont - `Red` and `Green` prove behavior coverage. - `Post-Refactor Green` proves cleanup safety. - If no cleanup was needed, still rerun and say so. - ---- - -## Phase 1: Understand Before Building - -Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. - -Before proceeding, confirm the prompt still contains these sections: **Your Task**, **Why This Task Exists**, **Architectural Context**, **Learnings from Previous Tasks**, **Project Conventions**, and the four numbered phases below. If any section is missing or any placeholder is unresolved, stop and report the template integrity problem. - -**If anything is unclear, ambiguous, or could be interpreted multiple ways:** -- List your questions explicitly -- State the assumptions you would make if proceeding without answers -- Ask for clarification before starting work - -**If everything is clear:** -- State your interpretation of the requirements in 2-3 sentences -- State how this unit serves the overall user story (from the WHY context) -- List any assumptions you are making (even obvious ones) -- Proceed to Phase 2 - -Do NOT skip this phase. A few minutes of clarification prevents hours of rework. It is always better to ask than to guess. - -## Phase 2: Implement - -{{TDD_SECTION}} - -### While Implementing - -- If you encounter something unexpected or unclear, **STOP and ask** rather than guessing -- Follow existing codebase patterns -- do not invent new conventions -- Reuse before you create: before adding a new function, helper, class, interface, type, or utility, search the touched area for an existing abstraction you can reuse or extend safely -- Apply DRY by reason-to-change: extract shared code only when the behavior and future changes truly belong together; keep duplication local when forced abstractions would hide intent -- Apply SOLID deliberately: introduce new classes or interfaces only when they clarify responsibilities, dependency direction, or substitution boundaries; if a new abstraction does not clearly improve the design, do not add it -- Prefer direct, readable code over helper stacks, wrappers, manager classes, or indirection layers created "just in case" -- Variable, function, class, and interface names must be explicit and unambiguous. Avoid abbreviations like `cb`, `ctx`, `svc`, `obj`, or `tmp` when a clearer name fits the scope -- Keep changes minimal -- implement what is asked, nothing more (YAGNI) -- Do not add "nice to have" features not in the success criteria -- Commit after each logical unit of complete work using the project's commit convention - -### On Test Failure - -If tests fail after implementation: -1. Read the error message carefully -- understand what failed and why -2. Analyze whether the failure is in your implementation or in the test -3. Fix the issue -4. Re-run the test command -5. Repeat up to 3 total attempts -6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly - -## Phase 3: Self-Review - -Before reporting back, review your own work with fresh eyes. Go through each checklist item honestly: - -**Completeness:** -- [ ] Did I implement EVERYTHING in the success criteria? -- [ ] Are there edge cases the criteria imply that I did not handle? -- [ ] Did I miss any requirements? - -**Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? -- [ ] Would a user achieve the stated outcome with this code? -- [ ] Did I build anything that doesn't trace back to the success criteria or user story? - -**Quality:** -- [ ] Do names accurately describe what things do (not how they work)? -- [ ] Did I reuse existing code where it already solved this problem cleanly? -- [ ] If I introduced a new abstraction, does it have a clear SOLID-based reason to exist? -- [ ] Did I avoid vague or abbreviated names in favor of explicit intent? -- [ ] Did the core business logic stay in the declared feature home unless the architecture handoff justified a shared/global extraction? -- [ ] Is the code clean and maintainable? -- [ ] Does it follow existing codebase patterns? -- [ ] Is error handling appropriate? - -**Discipline:** -- [ ] Did I avoid overbuilding (YAGNI)? -- [ ] Did I ONLY build what was requested? -- [ ] No "nice to have" additions? -- [ ] No unnecessary abstractions or premature optimization? - -**Testing:** -- [ ] Do tests verify actual behavior (not just mock behavior)? -- [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? - -**Evidence:** -- [ ] Can I show actual test output (not just "tests pass")? -- [ ] For UI changes, do I have a screenshot or visual evidence? -- [ ] For API changes, do I have actual request/response data? - -If you find issues during self-review, **fix them now** before reporting. Do not report known issues -- fix them first. - -## Phase 4: Report - -Return a structured execution report in exactly this format: - -```markdown -## Execution Report: [Unit Title] - -### Interpretation -[Your 2-3 sentence interpretation of what was asked] - -### Purpose Served -[Which user story aspect / success criterion this unit delivers, from the WHY context] - -### Assumptions Made -- [List each assumption, even if obvious] - -### What Was Implemented -[Describe what you built and how it works] - -### Files Changed -- `path/to/file` -- created/modified (brief description of change) - -### Test Results -- Command: `[test command]` -- Result: PASS/FAIL -- Attempts: [n] -- Output: -``` -[paste ACTUAL test output here] -``` - -### TDD Evidence -- **Red** - - Command: `[red command]` - - Result: PASS/FAIL - - Evidence: [why this proves the missing behavior existed before the implementation] -- **Green** - - Command: `[green command]` - - Result: PASS/FAIL - - Evidence: [why this proves the requested behavior now passes] -- **Post-Refactor Green** - - Command: `[post-refactor command]` - - Result: PASS/FAIL - - Evidence: [why this proves cleanup/refactor work preserved behavior] - -[If no cleanup was needed, still rerun and say so.] - -### Problems Encountered -[For each problem encountered during implementation:] -- **Error:** [exact error message] -- **Root cause:** [your analysis of why it happened] -- **Fix:** [what you did to resolve it] - -[If no problems: "None"] - -### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] - -[If none: "None"] - -### Self-Review Findings -- [Issues found and fixed during self-review] - -[If none: "Self-review passed -- no issues found"] -``` - ---- - -## Standard Implementation Section - -_This section is included when TDD is not enabled._ - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. Implement the task following project conventions -4. Write tests matching the success criteria -5. Run the test command: `{{TEST_COMMAND}}` -6. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) - ---- - -## TDD Implementation Section - -_This section is included when `tdd_enabled: true` is configured._ - -Follow the red-green-refactor cycle strictly: - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. **RED:** Write tests FIRST based on the success criteria. Run them. They MUST fail -- and they must fail for the RIGHT reason (the behavior is missing, not import errors or syntax problems) -4. **GREEN:** Write the MINIMAL production code needed to make the tests pass. No more than what is necessary. -5. Run tests. They MUST pass. -6. **REFACTOR:** Clean up if needed. Tests must still pass after refactoring. - -**Iron rule:** If at any point you find yourself writing production code before a failing test exists for that behavior, STOP. Write the test first. This is not a suggestion -- it is the process. diff --git a/.github/skills/workflows-compound-refresh/references/ticket-execution-contract.md b/.github/skills/workflows-compound-refresh/references/ticket-execution-contract.md index 563c72f..39df591 100644 --- a/.github/skills/workflows-compound-refresh/references/ticket-execution-contract.md +++ b/.github/skills/workflows-compound-refresh/references/ticket-execution-contract.md @@ -21,14 +21,32 @@ Required files: `index.md` must include: +- frontmatter with: + - `plan_ref` + - `architecture_ref` or an explicit handoff note + - `execution_shape` + - `ticket_set_status` -- `ready | in_progress | blocked | completed` + - `last_completed_batch` + - `total_batches` - plan path - architecture artifact path or explicit handoff note - execution shape - ticket order -- dependency view +- dependency graph +- execution batches +- file-overlap safety notes for every multi-ticket batch - blocker summary - review summary split into `Blocking gaps` and `Recommendations` +Recommended body shape: + +1. `# Ticket Set: ` +2. `## Dependency Graph` +3. `## Execution Batches` +4. `## Ticket Table` +5. `## Blockers` +6. `## Review Summary` + ## Ticket file naming - Keep zero-padded numeric prefixes in execution order: `01-...`, `02-...`, `03-...` @@ -103,6 +121,15 @@ Use these ticket statuses: ## Execution consumption rules +When `/workflows-work` receives `index.md`: + +- treat the index as the authoritative ticket queue for this ticket set +- read `last_completed_batch` to choose the next unfinished batch +- load only the tickets named in that next batch +- execute multiple tickets together only when the batch is explicitly declared as parallel-safe and the index records why the file sets do not conflict +- if the index or ticket files leave overlap safety ambiguous, collapse that batch to sequential execution instead of guessing +- update batch progress in `index.md`, and increment `last_completed_batch` only after every ticket in the batch reaches `completed` + When `/workflows-work` receives a ticket file: - treat that ticket as one pre-scoped execution unit @@ -117,4 +144,6 @@ When `/workflows-review` or `ticket-flow-auditor` consumes ticket artifacts, ver - the ticket still matches the parent plan and architecture - the implementation stayed inside the ticket scope fence unless the change was explicitly documented - dependency order and status changes stayed honest +- the dependency graph and batch partition still match the ticket files +- parallel-safe batches really stay file-disjoint and race-safe - evidence matches the ticket's stated acceptance criteria and test command diff --git a/.github/skills/workflows-compound-refresh/references/ticketization-contract.md b/.github/skills/workflows-compound-refresh/references/ticketization-contract.md index a05c2e2..6545e3f 100644 --- a/.github/skills/workflows-compound-refresh/references/ticketization-contract.md +++ b/.github/skills/workflows-compound-refresh/references/ticketization-contract.md @@ -38,7 +38,7 @@ docs/tickets/YYYY-MM-DD-/ Required outputs: -- `index.md` -- ticket-set summary, dependency view, and run guidance +- `index.md` -- ticket-set summary, dependency graph, execution batches, run guidance, and progress pointer - `NN-.md` -- one file per ticket in execution order Record the ticket set back into the plan as: @@ -63,6 +63,30 @@ The first version is **local-artifact first**. - Split a packet when one ticket would deliver more than one meaningful outcome, blur ownership, or bury the feature home. - Keep the first ticket a tracer bullet when the selected execution shape is `vertical-slices`. +## Dependency graph and execution batches + +Ticketization must build a conservative ticket dependency graph and then derive execution batches from it. + +- Start from each ticket's explicit `depends_on` edges. +- Then partition tickets into `Batch 1`, `Batch 2`, and so on, where every ticket in a batch depends only on earlier batches. +- A batch may contain multiple tickets only when their declared `files` sets do not overlap and there is no unresolved shared mutable state, migration risk, or boundary ambiguity between them. +- If two tickets might race, overwrite each other, or require the same shared adapter/config surface, keep them in separate sequential batches. +- **Default-to-sequential rule:** if batch safety is uncertain, emit singleton batches instead of guessing at parallelism. + +The index file becomes the authoritative execution queue for ticketized work. `/workflows-work` should be able to read `index.md`, find the next unfinished batch, and execute only that batch without recomputing the whole backlog. + +## Required index progress state + +`index.md` must record batch progress explicitly so ticketized execution can resume from the index alone. + +Required fields: + +- `last_completed_batch` -- integer counter; `0` means no batches completed yet +- `total_batches` -- total number of execution batches in the ticket set +- batch-level status view showing which batches are pending, in progress, blocked, or completed + +`last_completed_batch` advances only after every ticket in that batch is complete. If any ticket in the batch is blocked or still running, do not advance the counter. + ## Required ticket-local context Every generated ticket must carry a compact execution packet that can stand on its own. @@ -116,6 +140,9 @@ That review must check: - feature-home clarity - shared/global boundary honesty - blocker and dependency correctness +- dependency graph correctness +- execution batch safety and parallelization honesty +- file-overlap conflicts between tickets claimed to be parallel-safe - context completeness - ticket size and coupling quality @@ -133,5 +160,7 @@ The ticketization contract is satisfied only when: - the priming skill and ticket schema contract are explicit - local-artifact-first behavior is explicit - `tickets_ref` recording is explicit +- the dependency graph and conservative batch partition are explicit +- the index progress pointer is explicit - each ticket's required local context is explicit - the final ticket-set review step and reviewer are explicit diff --git a/.github/skills/workflows-compound/references/execution-agent-prompt.md b/.github/skills/workflows-compound/references/execution-agent-prompt.md index 91b109c..14d9778 100644 --- a/.github/skills/workflows-compound/references/execution-agent-prompt.md +++ b/.github/skills/workflows-compound/references/execution-agent-prompt.md @@ -9,23 +9,17 @@ platforms: # Execution Agent Prompt Template -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. +This template is the **injected context packet** that `/workflows-work` passes into the named `execution-agent`. -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Canonical execution rules live in `agents/workflow/execution-agent.md`.** `/workflows-work` must load that bundled agent template, then inject the fully populated context packet below when dispatching `Task(execution-agent, prompt=scoped_prompt)`. -**Template authority:** This file is the only valid source for execution-subagent prompts. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. - ---- - -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator so the exact context scaffold ships with generated workflow bundles. -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. - -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Scaffold authority:** This file is the only valid source for the injected execution context packet. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. --- -You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. +The bundled `execution-agent` enforces clean-code, DRY, SOLID, feature-home boundary discipline, doc blocks above non-trivial functions/classes, imports at the top of files unless a real exception exists, explicit failure handling, and the structured execution report contract. Populate the scaffold below completely before dispatch. ## Your Unit @@ -90,189 +84,3 @@ Use `references/tdd-evidence-contract.md` as the shared source of truth for cont - `Red` and `Green` prove behavior coverage. - `Post-Refactor Green` proves cleanup safety. - If no cleanup was needed, still rerun and say so. - ---- - -## Phase 1: Understand Before Building - -Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. - -Before proceeding, confirm the prompt still contains these sections: **Your Task**, **Why This Task Exists**, **Architectural Context**, **Learnings from Previous Tasks**, **Project Conventions**, and the four numbered phases below. If any section is missing or any placeholder is unresolved, stop and report the template integrity problem. - -**If anything is unclear, ambiguous, or could be interpreted multiple ways:** -- List your questions explicitly -- State the assumptions you would make if proceeding without answers -- Ask for clarification before starting work - -**If everything is clear:** -- State your interpretation of the requirements in 2-3 sentences -- State how this unit serves the overall user story (from the WHY context) -- List any assumptions you are making (even obvious ones) -- Proceed to Phase 2 - -Do NOT skip this phase. A few minutes of clarification prevents hours of rework. It is always better to ask than to guess. - -## Phase 2: Implement - -{{TDD_SECTION}} - -### While Implementing - -- If you encounter something unexpected or unclear, **STOP and ask** rather than guessing -- Follow existing codebase patterns -- do not invent new conventions -- Reuse before you create: before adding a new function, helper, class, interface, type, or utility, search the touched area for an existing abstraction you can reuse or extend safely -- Apply DRY by reason-to-change: extract shared code only when the behavior and future changes truly belong together; keep duplication local when forced abstractions would hide intent -- Apply SOLID deliberately: introduce new classes or interfaces only when they clarify responsibilities, dependency direction, or substitution boundaries; if a new abstraction does not clearly improve the design, do not add it -- Prefer direct, readable code over helper stacks, wrappers, manager classes, or indirection layers created "just in case" -- Variable, function, class, and interface names must be explicit and unambiguous. Avoid abbreviations like `cb`, `ctx`, `svc`, `obj`, or `tmp` when a clearer name fits the scope -- Keep changes minimal -- implement what is asked, nothing more (YAGNI) -- Do not add "nice to have" features not in the success criteria -- Commit after each logical unit of complete work using the project's commit convention - -### On Test Failure - -If tests fail after implementation: -1. Read the error message carefully -- understand what failed and why -2. Analyze whether the failure is in your implementation or in the test -3. Fix the issue -4. Re-run the test command -5. Repeat up to 3 total attempts -6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly - -## Phase 3: Self-Review - -Before reporting back, review your own work with fresh eyes. Go through each checklist item honestly: - -**Completeness:** -- [ ] Did I implement EVERYTHING in the success criteria? -- [ ] Are there edge cases the criteria imply that I did not handle? -- [ ] Did I miss any requirements? - -**Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? -- [ ] Would a user achieve the stated outcome with this code? -- [ ] Did I build anything that doesn't trace back to the success criteria or user story? - -**Quality:** -- [ ] Do names accurately describe what things do (not how they work)? -- [ ] Did I reuse existing code where it already solved this problem cleanly? -- [ ] If I introduced a new abstraction, does it have a clear SOLID-based reason to exist? -- [ ] Did I avoid vague or abbreviated names in favor of explicit intent? -- [ ] Did the core business logic stay in the declared feature home unless the architecture handoff justified a shared/global extraction? -- [ ] Is the code clean and maintainable? -- [ ] Does it follow existing codebase patterns? -- [ ] Is error handling appropriate? - -**Discipline:** -- [ ] Did I avoid overbuilding (YAGNI)? -- [ ] Did I ONLY build what was requested? -- [ ] No "nice to have" additions? -- [ ] No unnecessary abstractions or premature optimization? - -**Testing:** -- [ ] Do tests verify actual behavior (not just mock behavior)? -- [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? - -**Evidence:** -- [ ] Can I show actual test output (not just "tests pass")? -- [ ] For UI changes, do I have a screenshot or visual evidence? -- [ ] For API changes, do I have actual request/response data? - -If you find issues during self-review, **fix them now** before reporting. Do not report known issues -- fix them first. - -## Phase 4: Report - -Return a structured execution report in exactly this format: - -```markdown -## Execution Report: [Unit Title] - -### Interpretation -[Your 2-3 sentence interpretation of what was asked] - -### Purpose Served -[Which user story aspect / success criterion this unit delivers, from the WHY context] - -### Assumptions Made -- [List each assumption, even if obvious] - -### What Was Implemented -[Describe what you built and how it works] - -### Files Changed -- `path/to/file` -- created/modified (brief description of change) - -### Test Results -- Command: `[test command]` -- Result: PASS/FAIL -- Attempts: [n] -- Output: -``` -[paste ACTUAL test output here] -``` - -### TDD Evidence -- **Red** - - Command: `[red command]` - - Result: PASS/FAIL - - Evidence: [why this proves the missing behavior existed before the implementation] -- **Green** - - Command: `[green command]` - - Result: PASS/FAIL - - Evidence: [why this proves the requested behavior now passes] -- **Post-Refactor Green** - - Command: `[post-refactor command]` - - Result: PASS/FAIL - - Evidence: [why this proves cleanup/refactor work preserved behavior] - -[If no cleanup was needed, still rerun and say so.] - -### Problems Encountered -[For each problem encountered during implementation:] -- **Error:** [exact error message] -- **Root cause:** [your analysis of why it happened] -- **Fix:** [what you did to resolve it] - -[If no problems: "None"] - -### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] - -[If none: "None"] - -### Self-Review Findings -- [Issues found and fixed during self-review] - -[If none: "Self-review passed -- no issues found"] -``` - ---- - -## Standard Implementation Section - -_This section is included when TDD is not enabled._ - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. Implement the task following project conventions -4. Write tests matching the success criteria -5. Run the test command: `{{TEST_COMMAND}}` -6. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) - ---- - -## TDD Implementation Section - -_This section is included when `tdd_enabled: true` is configured._ - -Follow the red-green-refactor cycle strictly: - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. **RED:** Write tests FIRST based on the success criteria. Run them. They MUST fail -- and they must fail for the RIGHT reason (the behavior is missing, not import errors or syntax problems) -4. **GREEN:** Write the MINIMAL production code needed to make the tests pass. No more than what is necessary. -5. Run tests. They MUST pass. -6. **REFACTOR:** Clean up if needed. Tests must still pass after refactoring. - -**Iron rule:** If at any point you find yourself writing production code before a failing test exists for that behavior, STOP. Write the test first. This is not a suggestion -- it is the process. diff --git a/.github/skills/workflows-compound/references/ticket-execution-contract.md b/.github/skills/workflows-compound/references/ticket-execution-contract.md index 563c72f..39df591 100644 --- a/.github/skills/workflows-compound/references/ticket-execution-contract.md +++ b/.github/skills/workflows-compound/references/ticket-execution-contract.md @@ -21,14 +21,32 @@ Required files: `index.md` must include: +- frontmatter with: + - `plan_ref` + - `architecture_ref` or an explicit handoff note + - `execution_shape` + - `ticket_set_status` -- `ready | in_progress | blocked | completed` + - `last_completed_batch` + - `total_batches` - plan path - architecture artifact path or explicit handoff note - execution shape - ticket order -- dependency view +- dependency graph +- execution batches +- file-overlap safety notes for every multi-ticket batch - blocker summary - review summary split into `Blocking gaps` and `Recommendations` +Recommended body shape: + +1. `# Ticket Set: ` +2. `## Dependency Graph` +3. `## Execution Batches` +4. `## Ticket Table` +5. `## Blockers` +6. `## Review Summary` + ## Ticket file naming - Keep zero-padded numeric prefixes in execution order: `01-...`, `02-...`, `03-...` @@ -103,6 +121,15 @@ Use these ticket statuses: ## Execution consumption rules +When `/workflows-work` receives `index.md`: + +- treat the index as the authoritative ticket queue for this ticket set +- read `last_completed_batch` to choose the next unfinished batch +- load only the tickets named in that next batch +- execute multiple tickets together only when the batch is explicitly declared as parallel-safe and the index records why the file sets do not conflict +- if the index or ticket files leave overlap safety ambiguous, collapse that batch to sequential execution instead of guessing +- update batch progress in `index.md`, and increment `last_completed_batch` only after every ticket in the batch reaches `completed` + When `/workflows-work` receives a ticket file: - treat that ticket as one pre-scoped execution unit @@ -117,4 +144,6 @@ When `/workflows-review` or `ticket-flow-auditor` consumes ticket artifacts, ver - the ticket still matches the parent plan and architecture - the implementation stayed inside the ticket scope fence unless the change was explicitly documented - dependency order and status changes stayed honest +- the dependency graph and batch partition still match the ticket files +- parallel-safe batches really stay file-disjoint and race-safe - evidence matches the ticket's stated acceptance criteria and test command diff --git a/.github/skills/workflows-compound/references/ticketization-contract.md b/.github/skills/workflows-compound/references/ticketization-contract.md index a05c2e2..6545e3f 100644 --- a/.github/skills/workflows-compound/references/ticketization-contract.md +++ b/.github/skills/workflows-compound/references/ticketization-contract.md @@ -38,7 +38,7 @@ docs/tickets/YYYY-MM-DD-/ Required outputs: -- `index.md` -- ticket-set summary, dependency view, and run guidance +- `index.md` -- ticket-set summary, dependency graph, execution batches, run guidance, and progress pointer - `NN-.md` -- one file per ticket in execution order Record the ticket set back into the plan as: @@ -63,6 +63,30 @@ The first version is **local-artifact first**. - Split a packet when one ticket would deliver more than one meaningful outcome, blur ownership, or bury the feature home. - Keep the first ticket a tracer bullet when the selected execution shape is `vertical-slices`. +## Dependency graph and execution batches + +Ticketization must build a conservative ticket dependency graph and then derive execution batches from it. + +- Start from each ticket's explicit `depends_on` edges. +- Then partition tickets into `Batch 1`, `Batch 2`, and so on, where every ticket in a batch depends only on earlier batches. +- A batch may contain multiple tickets only when their declared `files` sets do not overlap and there is no unresolved shared mutable state, migration risk, or boundary ambiguity between them. +- If two tickets might race, overwrite each other, or require the same shared adapter/config surface, keep them in separate sequential batches. +- **Default-to-sequential rule:** if batch safety is uncertain, emit singleton batches instead of guessing at parallelism. + +The index file becomes the authoritative execution queue for ticketized work. `/workflows-work` should be able to read `index.md`, find the next unfinished batch, and execute only that batch without recomputing the whole backlog. + +## Required index progress state + +`index.md` must record batch progress explicitly so ticketized execution can resume from the index alone. + +Required fields: + +- `last_completed_batch` -- integer counter; `0` means no batches completed yet +- `total_batches` -- total number of execution batches in the ticket set +- batch-level status view showing which batches are pending, in progress, blocked, or completed + +`last_completed_batch` advances only after every ticket in that batch is complete. If any ticket in the batch is blocked or still running, do not advance the counter. + ## Required ticket-local context Every generated ticket must carry a compact execution packet that can stand on its own. @@ -116,6 +140,9 @@ That review must check: - feature-home clarity - shared/global boundary honesty - blocker and dependency correctness +- dependency graph correctness +- execution batch safety and parallelization honesty +- file-overlap conflicts between tickets claimed to be parallel-safe - context completeness - ticket size and coupling quality @@ -133,5 +160,7 @@ The ticketization contract is satisfied only when: - the priming skill and ticket schema contract are explicit - local-artifact-first behavior is explicit - `tickets_ref` recording is explicit +- the dependency graph and conservative batch partition are explicit +- the index progress pointer is explicit - each ticket's required local context is explicit - the final ticket-set review step and reviewer are explicit diff --git a/.github/skills/workflows-constitution/references/execution-agent-prompt.md b/.github/skills/workflows-constitution/references/execution-agent-prompt.md index 91b109c..14d9778 100644 --- a/.github/skills/workflows-constitution/references/execution-agent-prompt.md +++ b/.github/skills/workflows-constitution/references/execution-agent-prompt.md @@ -9,23 +9,17 @@ platforms: # Execution Agent Prompt Template -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. +This template is the **injected context packet** that `/workflows-work` passes into the named `execution-agent`. -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Canonical execution rules live in `agents/workflow/execution-agent.md`.** `/workflows-work` must load that bundled agent template, then inject the fully populated context packet below when dispatching `Task(execution-agent, prompt=scoped_prompt)`. -**Template authority:** This file is the only valid source for execution-subagent prompts. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. - ---- - -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator so the exact context scaffold ships with generated workflow bundles. -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. - -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Scaffold authority:** This file is the only valid source for the injected execution context packet. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. --- -You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. +The bundled `execution-agent` enforces clean-code, DRY, SOLID, feature-home boundary discipline, doc blocks above non-trivial functions/classes, imports at the top of files unless a real exception exists, explicit failure handling, and the structured execution report contract. Populate the scaffold below completely before dispatch. ## Your Unit @@ -90,189 +84,3 @@ Use `references/tdd-evidence-contract.md` as the shared source of truth for cont - `Red` and `Green` prove behavior coverage. - `Post-Refactor Green` proves cleanup safety. - If no cleanup was needed, still rerun and say so. - ---- - -## Phase 1: Understand Before Building - -Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. - -Before proceeding, confirm the prompt still contains these sections: **Your Task**, **Why This Task Exists**, **Architectural Context**, **Learnings from Previous Tasks**, **Project Conventions**, and the four numbered phases below. If any section is missing or any placeholder is unresolved, stop and report the template integrity problem. - -**If anything is unclear, ambiguous, or could be interpreted multiple ways:** -- List your questions explicitly -- State the assumptions you would make if proceeding without answers -- Ask for clarification before starting work - -**If everything is clear:** -- State your interpretation of the requirements in 2-3 sentences -- State how this unit serves the overall user story (from the WHY context) -- List any assumptions you are making (even obvious ones) -- Proceed to Phase 2 - -Do NOT skip this phase. A few minutes of clarification prevents hours of rework. It is always better to ask than to guess. - -## Phase 2: Implement - -{{TDD_SECTION}} - -### While Implementing - -- If you encounter something unexpected or unclear, **STOP and ask** rather than guessing -- Follow existing codebase patterns -- do not invent new conventions -- Reuse before you create: before adding a new function, helper, class, interface, type, or utility, search the touched area for an existing abstraction you can reuse or extend safely -- Apply DRY by reason-to-change: extract shared code only when the behavior and future changes truly belong together; keep duplication local when forced abstractions would hide intent -- Apply SOLID deliberately: introduce new classes or interfaces only when they clarify responsibilities, dependency direction, or substitution boundaries; if a new abstraction does not clearly improve the design, do not add it -- Prefer direct, readable code over helper stacks, wrappers, manager classes, or indirection layers created "just in case" -- Variable, function, class, and interface names must be explicit and unambiguous. Avoid abbreviations like `cb`, `ctx`, `svc`, `obj`, or `tmp` when a clearer name fits the scope -- Keep changes minimal -- implement what is asked, nothing more (YAGNI) -- Do not add "nice to have" features not in the success criteria -- Commit after each logical unit of complete work using the project's commit convention - -### On Test Failure - -If tests fail after implementation: -1. Read the error message carefully -- understand what failed and why -2. Analyze whether the failure is in your implementation or in the test -3. Fix the issue -4. Re-run the test command -5. Repeat up to 3 total attempts -6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly - -## Phase 3: Self-Review - -Before reporting back, review your own work with fresh eyes. Go through each checklist item honestly: - -**Completeness:** -- [ ] Did I implement EVERYTHING in the success criteria? -- [ ] Are there edge cases the criteria imply that I did not handle? -- [ ] Did I miss any requirements? - -**Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? -- [ ] Would a user achieve the stated outcome with this code? -- [ ] Did I build anything that doesn't trace back to the success criteria or user story? - -**Quality:** -- [ ] Do names accurately describe what things do (not how they work)? -- [ ] Did I reuse existing code where it already solved this problem cleanly? -- [ ] If I introduced a new abstraction, does it have a clear SOLID-based reason to exist? -- [ ] Did I avoid vague or abbreviated names in favor of explicit intent? -- [ ] Did the core business logic stay in the declared feature home unless the architecture handoff justified a shared/global extraction? -- [ ] Is the code clean and maintainable? -- [ ] Does it follow existing codebase patterns? -- [ ] Is error handling appropriate? - -**Discipline:** -- [ ] Did I avoid overbuilding (YAGNI)? -- [ ] Did I ONLY build what was requested? -- [ ] No "nice to have" additions? -- [ ] No unnecessary abstractions or premature optimization? - -**Testing:** -- [ ] Do tests verify actual behavior (not just mock behavior)? -- [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? - -**Evidence:** -- [ ] Can I show actual test output (not just "tests pass")? -- [ ] For UI changes, do I have a screenshot or visual evidence? -- [ ] For API changes, do I have actual request/response data? - -If you find issues during self-review, **fix them now** before reporting. Do not report known issues -- fix them first. - -## Phase 4: Report - -Return a structured execution report in exactly this format: - -```markdown -## Execution Report: [Unit Title] - -### Interpretation -[Your 2-3 sentence interpretation of what was asked] - -### Purpose Served -[Which user story aspect / success criterion this unit delivers, from the WHY context] - -### Assumptions Made -- [List each assumption, even if obvious] - -### What Was Implemented -[Describe what you built and how it works] - -### Files Changed -- `path/to/file` -- created/modified (brief description of change) - -### Test Results -- Command: `[test command]` -- Result: PASS/FAIL -- Attempts: [n] -- Output: -``` -[paste ACTUAL test output here] -``` - -### TDD Evidence -- **Red** - - Command: `[red command]` - - Result: PASS/FAIL - - Evidence: [why this proves the missing behavior existed before the implementation] -- **Green** - - Command: `[green command]` - - Result: PASS/FAIL - - Evidence: [why this proves the requested behavior now passes] -- **Post-Refactor Green** - - Command: `[post-refactor command]` - - Result: PASS/FAIL - - Evidence: [why this proves cleanup/refactor work preserved behavior] - -[If no cleanup was needed, still rerun and say so.] - -### Problems Encountered -[For each problem encountered during implementation:] -- **Error:** [exact error message] -- **Root cause:** [your analysis of why it happened] -- **Fix:** [what you did to resolve it] - -[If no problems: "None"] - -### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] - -[If none: "None"] - -### Self-Review Findings -- [Issues found and fixed during self-review] - -[If none: "Self-review passed -- no issues found"] -``` - ---- - -## Standard Implementation Section - -_This section is included when TDD is not enabled._ - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. Implement the task following project conventions -4. Write tests matching the success criteria -5. Run the test command: `{{TEST_COMMAND}}` -6. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) - ---- - -## TDD Implementation Section - -_This section is included when `tdd_enabled: true` is configured._ - -Follow the red-green-refactor cycle strictly: - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. **RED:** Write tests FIRST based on the success criteria. Run them. They MUST fail -- and they must fail for the RIGHT reason (the behavior is missing, not import errors or syntax problems) -4. **GREEN:** Write the MINIMAL production code needed to make the tests pass. No more than what is necessary. -5. Run tests. They MUST pass. -6. **REFACTOR:** Clean up if needed. Tests must still pass after refactoring. - -**Iron rule:** If at any point you find yourself writing production code before a failing test exists for that behavior, STOP. Write the test first. This is not a suggestion -- it is the process. diff --git a/.github/skills/workflows-constitution/references/ticket-execution-contract.md b/.github/skills/workflows-constitution/references/ticket-execution-contract.md index 563c72f..39df591 100644 --- a/.github/skills/workflows-constitution/references/ticket-execution-contract.md +++ b/.github/skills/workflows-constitution/references/ticket-execution-contract.md @@ -21,14 +21,32 @@ Required files: `index.md` must include: +- frontmatter with: + - `plan_ref` + - `architecture_ref` or an explicit handoff note + - `execution_shape` + - `ticket_set_status` -- `ready | in_progress | blocked | completed` + - `last_completed_batch` + - `total_batches` - plan path - architecture artifact path or explicit handoff note - execution shape - ticket order -- dependency view +- dependency graph +- execution batches +- file-overlap safety notes for every multi-ticket batch - blocker summary - review summary split into `Blocking gaps` and `Recommendations` +Recommended body shape: + +1. `# Ticket Set: ` +2. `## Dependency Graph` +3. `## Execution Batches` +4. `## Ticket Table` +5. `## Blockers` +6. `## Review Summary` + ## Ticket file naming - Keep zero-padded numeric prefixes in execution order: `01-...`, `02-...`, `03-...` @@ -103,6 +121,15 @@ Use these ticket statuses: ## Execution consumption rules +When `/workflows-work` receives `index.md`: + +- treat the index as the authoritative ticket queue for this ticket set +- read `last_completed_batch` to choose the next unfinished batch +- load only the tickets named in that next batch +- execute multiple tickets together only when the batch is explicitly declared as parallel-safe and the index records why the file sets do not conflict +- if the index or ticket files leave overlap safety ambiguous, collapse that batch to sequential execution instead of guessing +- update batch progress in `index.md`, and increment `last_completed_batch` only after every ticket in the batch reaches `completed` + When `/workflows-work` receives a ticket file: - treat that ticket as one pre-scoped execution unit @@ -117,4 +144,6 @@ When `/workflows-review` or `ticket-flow-auditor` consumes ticket artifacts, ver - the ticket still matches the parent plan and architecture - the implementation stayed inside the ticket scope fence unless the change was explicitly documented - dependency order and status changes stayed honest +- the dependency graph and batch partition still match the ticket files +- parallel-safe batches really stay file-disjoint and race-safe - evidence matches the ticket's stated acceptance criteria and test command diff --git a/.github/skills/workflows-constitution/references/ticketization-contract.md b/.github/skills/workflows-constitution/references/ticketization-contract.md index a05c2e2..6545e3f 100644 --- a/.github/skills/workflows-constitution/references/ticketization-contract.md +++ b/.github/skills/workflows-constitution/references/ticketization-contract.md @@ -38,7 +38,7 @@ docs/tickets/YYYY-MM-DD-/ Required outputs: -- `index.md` -- ticket-set summary, dependency view, and run guidance +- `index.md` -- ticket-set summary, dependency graph, execution batches, run guidance, and progress pointer - `NN-.md` -- one file per ticket in execution order Record the ticket set back into the plan as: @@ -63,6 +63,30 @@ The first version is **local-artifact first**. - Split a packet when one ticket would deliver more than one meaningful outcome, blur ownership, or bury the feature home. - Keep the first ticket a tracer bullet when the selected execution shape is `vertical-slices`. +## Dependency graph and execution batches + +Ticketization must build a conservative ticket dependency graph and then derive execution batches from it. + +- Start from each ticket's explicit `depends_on` edges. +- Then partition tickets into `Batch 1`, `Batch 2`, and so on, where every ticket in a batch depends only on earlier batches. +- A batch may contain multiple tickets only when their declared `files` sets do not overlap and there is no unresolved shared mutable state, migration risk, or boundary ambiguity between them. +- If two tickets might race, overwrite each other, or require the same shared adapter/config surface, keep them in separate sequential batches. +- **Default-to-sequential rule:** if batch safety is uncertain, emit singleton batches instead of guessing at parallelism. + +The index file becomes the authoritative execution queue for ticketized work. `/workflows-work` should be able to read `index.md`, find the next unfinished batch, and execute only that batch without recomputing the whole backlog. + +## Required index progress state + +`index.md` must record batch progress explicitly so ticketized execution can resume from the index alone. + +Required fields: + +- `last_completed_batch` -- integer counter; `0` means no batches completed yet +- `total_batches` -- total number of execution batches in the ticket set +- batch-level status view showing which batches are pending, in progress, blocked, or completed + +`last_completed_batch` advances only after every ticket in that batch is complete. If any ticket in the batch is blocked or still running, do not advance the counter. + ## Required ticket-local context Every generated ticket must carry a compact execution packet that can stand on its own. @@ -116,6 +140,9 @@ That review must check: - feature-home clarity - shared/global boundary honesty - blocker and dependency correctness +- dependency graph correctness +- execution batch safety and parallelization honesty +- file-overlap conflicts between tickets claimed to be parallel-safe - context completeness - ticket size and coupling quality @@ -133,5 +160,7 @@ The ticketization contract is satisfied only when: - the priming skill and ticket schema contract are explicit - local-artifact-first behavior is explicit - `tickets_ref` recording is explicit +- the dependency graph and conservative batch partition are explicit +- the index progress pointer is explicit - each ticket's required local context is explicit - the final ticket-set review step and reviewer are explicit diff --git a/.github/skills/workflows-ideate/references/execution-agent-prompt.md b/.github/skills/workflows-ideate/references/execution-agent-prompt.md index 91b109c..14d9778 100644 --- a/.github/skills/workflows-ideate/references/execution-agent-prompt.md +++ b/.github/skills/workflows-ideate/references/execution-agent-prompt.md @@ -9,23 +9,17 @@ platforms: # Execution Agent Prompt Template -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. +This template is the **injected context packet** that `/workflows-work` passes into the named `execution-agent`. -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Canonical execution rules live in `agents/workflow/execution-agent.md`.** `/workflows-work` must load that bundled agent template, then inject the fully populated context packet below when dispatching `Task(execution-agent, prompt=scoped_prompt)`. -**Template authority:** This file is the only valid source for execution-subagent prompts. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. - ---- - -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator so the exact context scaffold ships with generated workflow bundles. -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. - -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Scaffold authority:** This file is the only valid source for the injected execution context packet. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. --- -You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. +The bundled `execution-agent` enforces clean-code, DRY, SOLID, feature-home boundary discipline, doc blocks above non-trivial functions/classes, imports at the top of files unless a real exception exists, explicit failure handling, and the structured execution report contract. Populate the scaffold below completely before dispatch. ## Your Unit @@ -90,189 +84,3 @@ Use `references/tdd-evidence-contract.md` as the shared source of truth for cont - `Red` and `Green` prove behavior coverage. - `Post-Refactor Green` proves cleanup safety. - If no cleanup was needed, still rerun and say so. - ---- - -## Phase 1: Understand Before Building - -Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. - -Before proceeding, confirm the prompt still contains these sections: **Your Task**, **Why This Task Exists**, **Architectural Context**, **Learnings from Previous Tasks**, **Project Conventions**, and the four numbered phases below. If any section is missing or any placeholder is unresolved, stop and report the template integrity problem. - -**If anything is unclear, ambiguous, or could be interpreted multiple ways:** -- List your questions explicitly -- State the assumptions you would make if proceeding without answers -- Ask for clarification before starting work - -**If everything is clear:** -- State your interpretation of the requirements in 2-3 sentences -- State how this unit serves the overall user story (from the WHY context) -- List any assumptions you are making (even obvious ones) -- Proceed to Phase 2 - -Do NOT skip this phase. A few minutes of clarification prevents hours of rework. It is always better to ask than to guess. - -## Phase 2: Implement - -{{TDD_SECTION}} - -### While Implementing - -- If you encounter something unexpected or unclear, **STOP and ask** rather than guessing -- Follow existing codebase patterns -- do not invent new conventions -- Reuse before you create: before adding a new function, helper, class, interface, type, or utility, search the touched area for an existing abstraction you can reuse or extend safely -- Apply DRY by reason-to-change: extract shared code only when the behavior and future changes truly belong together; keep duplication local when forced abstractions would hide intent -- Apply SOLID deliberately: introduce new classes or interfaces only when they clarify responsibilities, dependency direction, or substitution boundaries; if a new abstraction does not clearly improve the design, do not add it -- Prefer direct, readable code over helper stacks, wrappers, manager classes, or indirection layers created "just in case" -- Variable, function, class, and interface names must be explicit and unambiguous. Avoid abbreviations like `cb`, `ctx`, `svc`, `obj`, or `tmp` when a clearer name fits the scope -- Keep changes minimal -- implement what is asked, nothing more (YAGNI) -- Do not add "nice to have" features not in the success criteria -- Commit after each logical unit of complete work using the project's commit convention - -### On Test Failure - -If tests fail after implementation: -1. Read the error message carefully -- understand what failed and why -2. Analyze whether the failure is in your implementation or in the test -3. Fix the issue -4. Re-run the test command -5. Repeat up to 3 total attempts -6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly - -## Phase 3: Self-Review - -Before reporting back, review your own work with fresh eyes. Go through each checklist item honestly: - -**Completeness:** -- [ ] Did I implement EVERYTHING in the success criteria? -- [ ] Are there edge cases the criteria imply that I did not handle? -- [ ] Did I miss any requirements? - -**Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? -- [ ] Would a user achieve the stated outcome with this code? -- [ ] Did I build anything that doesn't trace back to the success criteria or user story? - -**Quality:** -- [ ] Do names accurately describe what things do (not how they work)? -- [ ] Did I reuse existing code where it already solved this problem cleanly? -- [ ] If I introduced a new abstraction, does it have a clear SOLID-based reason to exist? -- [ ] Did I avoid vague or abbreviated names in favor of explicit intent? -- [ ] Did the core business logic stay in the declared feature home unless the architecture handoff justified a shared/global extraction? -- [ ] Is the code clean and maintainable? -- [ ] Does it follow existing codebase patterns? -- [ ] Is error handling appropriate? - -**Discipline:** -- [ ] Did I avoid overbuilding (YAGNI)? -- [ ] Did I ONLY build what was requested? -- [ ] No "nice to have" additions? -- [ ] No unnecessary abstractions or premature optimization? - -**Testing:** -- [ ] Do tests verify actual behavior (not just mock behavior)? -- [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? - -**Evidence:** -- [ ] Can I show actual test output (not just "tests pass")? -- [ ] For UI changes, do I have a screenshot or visual evidence? -- [ ] For API changes, do I have actual request/response data? - -If you find issues during self-review, **fix them now** before reporting. Do not report known issues -- fix them first. - -## Phase 4: Report - -Return a structured execution report in exactly this format: - -```markdown -## Execution Report: [Unit Title] - -### Interpretation -[Your 2-3 sentence interpretation of what was asked] - -### Purpose Served -[Which user story aspect / success criterion this unit delivers, from the WHY context] - -### Assumptions Made -- [List each assumption, even if obvious] - -### What Was Implemented -[Describe what you built and how it works] - -### Files Changed -- `path/to/file` -- created/modified (brief description of change) - -### Test Results -- Command: `[test command]` -- Result: PASS/FAIL -- Attempts: [n] -- Output: -``` -[paste ACTUAL test output here] -``` - -### TDD Evidence -- **Red** - - Command: `[red command]` - - Result: PASS/FAIL - - Evidence: [why this proves the missing behavior existed before the implementation] -- **Green** - - Command: `[green command]` - - Result: PASS/FAIL - - Evidence: [why this proves the requested behavior now passes] -- **Post-Refactor Green** - - Command: `[post-refactor command]` - - Result: PASS/FAIL - - Evidence: [why this proves cleanup/refactor work preserved behavior] - -[If no cleanup was needed, still rerun and say so.] - -### Problems Encountered -[For each problem encountered during implementation:] -- **Error:** [exact error message] -- **Root cause:** [your analysis of why it happened] -- **Fix:** [what you did to resolve it] - -[If no problems: "None"] - -### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] - -[If none: "None"] - -### Self-Review Findings -- [Issues found and fixed during self-review] - -[If none: "Self-review passed -- no issues found"] -``` - ---- - -## Standard Implementation Section - -_This section is included when TDD is not enabled._ - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. Implement the task following project conventions -4. Write tests matching the success criteria -5. Run the test command: `{{TEST_COMMAND}}` -6. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) - ---- - -## TDD Implementation Section - -_This section is included when `tdd_enabled: true` is configured._ - -Follow the red-green-refactor cycle strictly: - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. **RED:** Write tests FIRST based on the success criteria. Run them. They MUST fail -- and they must fail for the RIGHT reason (the behavior is missing, not import errors or syntax problems) -4. **GREEN:** Write the MINIMAL production code needed to make the tests pass. No more than what is necessary. -5. Run tests. They MUST pass. -6. **REFACTOR:** Clean up if needed. Tests must still pass after refactoring. - -**Iron rule:** If at any point you find yourself writing production code before a failing test exists for that behavior, STOP. Write the test first. This is not a suggestion -- it is the process. diff --git a/.github/skills/workflows-ideate/references/ticket-execution-contract.md b/.github/skills/workflows-ideate/references/ticket-execution-contract.md index 563c72f..39df591 100644 --- a/.github/skills/workflows-ideate/references/ticket-execution-contract.md +++ b/.github/skills/workflows-ideate/references/ticket-execution-contract.md @@ -21,14 +21,32 @@ Required files: `index.md` must include: +- frontmatter with: + - `plan_ref` + - `architecture_ref` or an explicit handoff note + - `execution_shape` + - `ticket_set_status` -- `ready | in_progress | blocked | completed` + - `last_completed_batch` + - `total_batches` - plan path - architecture artifact path or explicit handoff note - execution shape - ticket order -- dependency view +- dependency graph +- execution batches +- file-overlap safety notes for every multi-ticket batch - blocker summary - review summary split into `Blocking gaps` and `Recommendations` +Recommended body shape: + +1. `# Ticket Set: ` +2. `## Dependency Graph` +3. `## Execution Batches` +4. `## Ticket Table` +5. `## Blockers` +6. `## Review Summary` + ## Ticket file naming - Keep zero-padded numeric prefixes in execution order: `01-...`, `02-...`, `03-...` @@ -103,6 +121,15 @@ Use these ticket statuses: ## Execution consumption rules +When `/workflows-work` receives `index.md`: + +- treat the index as the authoritative ticket queue for this ticket set +- read `last_completed_batch` to choose the next unfinished batch +- load only the tickets named in that next batch +- execute multiple tickets together only when the batch is explicitly declared as parallel-safe and the index records why the file sets do not conflict +- if the index or ticket files leave overlap safety ambiguous, collapse that batch to sequential execution instead of guessing +- update batch progress in `index.md`, and increment `last_completed_batch` only after every ticket in the batch reaches `completed` + When `/workflows-work` receives a ticket file: - treat that ticket as one pre-scoped execution unit @@ -117,4 +144,6 @@ When `/workflows-review` or `ticket-flow-auditor` consumes ticket artifacts, ver - the ticket still matches the parent plan and architecture - the implementation stayed inside the ticket scope fence unless the change was explicitly documented - dependency order and status changes stayed honest +- the dependency graph and batch partition still match the ticket files +- parallel-safe batches really stay file-disjoint and race-safe - evidence matches the ticket's stated acceptance criteria and test command diff --git a/.github/skills/workflows-ideate/references/ticketization-contract.md b/.github/skills/workflows-ideate/references/ticketization-contract.md index a05c2e2..6545e3f 100644 --- a/.github/skills/workflows-ideate/references/ticketization-contract.md +++ b/.github/skills/workflows-ideate/references/ticketization-contract.md @@ -38,7 +38,7 @@ docs/tickets/YYYY-MM-DD-/ Required outputs: -- `index.md` -- ticket-set summary, dependency view, and run guidance +- `index.md` -- ticket-set summary, dependency graph, execution batches, run guidance, and progress pointer - `NN-.md` -- one file per ticket in execution order Record the ticket set back into the plan as: @@ -63,6 +63,30 @@ The first version is **local-artifact first**. - Split a packet when one ticket would deliver more than one meaningful outcome, blur ownership, or bury the feature home. - Keep the first ticket a tracer bullet when the selected execution shape is `vertical-slices`. +## Dependency graph and execution batches + +Ticketization must build a conservative ticket dependency graph and then derive execution batches from it. + +- Start from each ticket's explicit `depends_on` edges. +- Then partition tickets into `Batch 1`, `Batch 2`, and so on, where every ticket in a batch depends only on earlier batches. +- A batch may contain multiple tickets only when their declared `files` sets do not overlap and there is no unresolved shared mutable state, migration risk, or boundary ambiguity between them. +- If two tickets might race, overwrite each other, or require the same shared adapter/config surface, keep them in separate sequential batches. +- **Default-to-sequential rule:** if batch safety is uncertain, emit singleton batches instead of guessing at parallelism. + +The index file becomes the authoritative execution queue for ticketized work. `/workflows-work` should be able to read `index.md`, find the next unfinished batch, and execute only that batch without recomputing the whole backlog. + +## Required index progress state + +`index.md` must record batch progress explicitly so ticketized execution can resume from the index alone. + +Required fields: + +- `last_completed_batch` -- integer counter; `0` means no batches completed yet +- `total_batches` -- total number of execution batches in the ticket set +- batch-level status view showing which batches are pending, in progress, blocked, or completed + +`last_completed_batch` advances only after every ticket in that batch is complete. If any ticket in the batch is blocked or still running, do not advance the counter. + ## Required ticket-local context Every generated ticket must carry a compact execution packet that can stand on its own. @@ -116,6 +140,9 @@ That review must check: - feature-home clarity - shared/global boundary honesty - blocker and dependency correctness +- dependency graph correctness +- execution batch safety and parallelization honesty +- file-overlap conflicts between tickets claimed to be parallel-safe - context completeness - ticket size and coupling quality @@ -133,5 +160,7 @@ The ticketization contract is satisfied only when: - the priming skill and ticket schema contract are explicit - local-artifact-first behavior is explicit - `tickets_ref` recording is explicit +- the dependency graph and conservative batch partition are explicit +- the index progress pointer is explicit - each ticket's required local context is explicit - the final ticket-set review step and reviewer are explicit diff --git a/.github/skills/workflows-plan/references/execution-agent-prompt.md b/.github/skills/workflows-plan/references/execution-agent-prompt.md index 91b109c..14d9778 100644 --- a/.github/skills/workflows-plan/references/execution-agent-prompt.md +++ b/.github/skills/workflows-plan/references/execution-agent-prompt.md @@ -9,23 +9,17 @@ platforms: # Execution Agent Prompt Template -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. +This template is the **injected context packet** that `/workflows-work` passes into the named `execution-agent`. -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Canonical execution rules live in `agents/workflow/execution-agent.md`.** `/workflows-work` must load that bundled agent template, then inject the fully populated context packet below when dispatching `Task(execution-agent, prompt=scoped_prompt)`. -**Template authority:** This file is the only valid source for execution-subagent prompts. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. - ---- - -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator so the exact context scaffold ships with generated workflow bundles. -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. - -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Scaffold authority:** This file is the only valid source for the injected execution context packet. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. --- -You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. +The bundled `execution-agent` enforces clean-code, DRY, SOLID, feature-home boundary discipline, doc blocks above non-trivial functions/classes, imports at the top of files unless a real exception exists, explicit failure handling, and the structured execution report contract. Populate the scaffold below completely before dispatch. ## Your Unit @@ -90,189 +84,3 @@ Use `references/tdd-evidence-contract.md` as the shared source of truth for cont - `Red` and `Green` prove behavior coverage. - `Post-Refactor Green` proves cleanup safety. - If no cleanup was needed, still rerun and say so. - ---- - -## Phase 1: Understand Before Building - -Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. - -Before proceeding, confirm the prompt still contains these sections: **Your Task**, **Why This Task Exists**, **Architectural Context**, **Learnings from Previous Tasks**, **Project Conventions**, and the four numbered phases below. If any section is missing or any placeholder is unresolved, stop and report the template integrity problem. - -**If anything is unclear, ambiguous, or could be interpreted multiple ways:** -- List your questions explicitly -- State the assumptions you would make if proceeding without answers -- Ask for clarification before starting work - -**If everything is clear:** -- State your interpretation of the requirements in 2-3 sentences -- State how this unit serves the overall user story (from the WHY context) -- List any assumptions you are making (even obvious ones) -- Proceed to Phase 2 - -Do NOT skip this phase. A few minutes of clarification prevents hours of rework. It is always better to ask than to guess. - -## Phase 2: Implement - -{{TDD_SECTION}} - -### While Implementing - -- If you encounter something unexpected or unclear, **STOP and ask** rather than guessing -- Follow existing codebase patterns -- do not invent new conventions -- Reuse before you create: before adding a new function, helper, class, interface, type, or utility, search the touched area for an existing abstraction you can reuse or extend safely -- Apply DRY by reason-to-change: extract shared code only when the behavior and future changes truly belong together; keep duplication local when forced abstractions would hide intent -- Apply SOLID deliberately: introduce new classes or interfaces only when they clarify responsibilities, dependency direction, or substitution boundaries; if a new abstraction does not clearly improve the design, do not add it -- Prefer direct, readable code over helper stacks, wrappers, manager classes, or indirection layers created "just in case" -- Variable, function, class, and interface names must be explicit and unambiguous. Avoid abbreviations like `cb`, `ctx`, `svc`, `obj`, or `tmp` when a clearer name fits the scope -- Keep changes minimal -- implement what is asked, nothing more (YAGNI) -- Do not add "nice to have" features not in the success criteria -- Commit after each logical unit of complete work using the project's commit convention - -### On Test Failure - -If tests fail after implementation: -1. Read the error message carefully -- understand what failed and why -2. Analyze whether the failure is in your implementation or in the test -3. Fix the issue -4. Re-run the test command -5. Repeat up to 3 total attempts -6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly - -## Phase 3: Self-Review - -Before reporting back, review your own work with fresh eyes. Go through each checklist item honestly: - -**Completeness:** -- [ ] Did I implement EVERYTHING in the success criteria? -- [ ] Are there edge cases the criteria imply that I did not handle? -- [ ] Did I miss any requirements? - -**Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? -- [ ] Would a user achieve the stated outcome with this code? -- [ ] Did I build anything that doesn't trace back to the success criteria or user story? - -**Quality:** -- [ ] Do names accurately describe what things do (not how they work)? -- [ ] Did I reuse existing code where it already solved this problem cleanly? -- [ ] If I introduced a new abstraction, does it have a clear SOLID-based reason to exist? -- [ ] Did I avoid vague or abbreviated names in favor of explicit intent? -- [ ] Did the core business logic stay in the declared feature home unless the architecture handoff justified a shared/global extraction? -- [ ] Is the code clean and maintainable? -- [ ] Does it follow existing codebase patterns? -- [ ] Is error handling appropriate? - -**Discipline:** -- [ ] Did I avoid overbuilding (YAGNI)? -- [ ] Did I ONLY build what was requested? -- [ ] No "nice to have" additions? -- [ ] No unnecessary abstractions or premature optimization? - -**Testing:** -- [ ] Do tests verify actual behavior (not just mock behavior)? -- [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? - -**Evidence:** -- [ ] Can I show actual test output (not just "tests pass")? -- [ ] For UI changes, do I have a screenshot or visual evidence? -- [ ] For API changes, do I have actual request/response data? - -If you find issues during self-review, **fix them now** before reporting. Do not report known issues -- fix them first. - -## Phase 4: Report - -Return a structured execution report in exactly this format: - -```markdown -## Execution Report: [Unit Title] - -### Interpretation -[Your 2-3 sentence interpretation of what was asked] - -### Purpose Served -[Which user story aspect / success criterion this unit delivers, from the WHY context] - -### Assumptions Made -- [List each assumption, even if obvious] - -### What Was Implemented -[Describe what you built and how it works] - -### Files Changed -- `path/to/file` -- created/modified (brief description of change) - -### Test Results -- Command: `[test command]` -- Result: PASS/FAIL -- Attempts: [n] -- Output: -``` -[paste ACTUAL test output here] -``` - -### TDD Evidence -- **Red** - - Command: `[red command]` - - Result: PASS/FAIL - - Evidence: [why this proves the missing behavior existed before the implementation] -- **Green** - - Command: `[green command]` - - Result: PASS/FAIL - - Evidence: [why this proves the requested behavior now passes] -- **Post-Refactor Green** - - Command: `[post-refactor command]` - - Result: PASS/FAIL - - Evidence: [why this proves cleanup/refactor work preserved behavior] - -[If no cleanup was needed, still rerun and say so.] - -### Problems Encountered -[For each problem encountered during implementation:] -- **Error:** [exact error message] -- **Root cause:** [your analysis of why it happened] -- **Fix:** [what you did to resolve it] - -[If no problems: "None"] - -### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] - -[If none: "None"] - -### Self-Review Findings -- [Issues found and fixed during self-review] - -[If none: "Self-review passed -- no issues found"] -``` - ---- - -## Standard Implementation Section - -_This section is included when TDD is not enabled._ - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. Implement the task following project conventions -4. Write tests matching the success criteria -5. Run the test command: `{{TEST_COMMAND}}` -6. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) - ---- - -## TDD Implementation Section - -_This section is included when `tdd_enabled: true` is configured._ - -Follow the red-green-refactor cycle strictly: - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. **RED:** Write tests FIRST based on the success criteria. Run them. They MUST fail -- and they must fail for the RIGHT reason (the behavior is missing, not import errors or syntax problems) -4. **GREEN:** Write the MINIMAL production code needed to make the tests pass. No more than what is necessary. -5. Run tests. They MUST pass. -6. **REFACTOR:** Clean up if needed. Tests must still pass after refactoring. - -**Iron rule:** If at any point you find yourself writing production code before a failing test exists for that behavior, STOP. Write the test first. This is not a suggestion -- it is the process. diff --git a/.github/skills/workflows-plan/references/ticket-execution-contract.md b/.github/skills/workflows-plan/references/ticket-execution-contract.md index 563c72f..39df591 100644 --- a/.github/skills/workflows-plan/references/ticket-execution-contract.md +++ b/.github/skills/workflows-plan/references/ticket-execution-contract.md @@ -21,14 +21,32 @@ Required files: `index.md` must include: +- frontmatter with: + - `plan_ref` + - `architecture_ref` or an explicit handoff note + - `execution_shape` + - `ticket_set_status` -- `ready | in_progress | blocked | completed` + - `last_completed_batch` + - `total_batches` - plan path - architecture artifact path or explicit handoff note - execution shape - ticket order -- dependency view +- dependency graph +- execution batches +- file-overlap safety notes for every multi-ticket batch - blocker summary - review summary split into `Blocking gaps` and `Recommendations` +Recommended body shape: + +1. `# Ticket Set: ` +2. `## Dependency Graph` +3. `## Execution Batches` +4. `## Ticket Table` +5. `## Blockers` +6. `## Review Summary` + ## Ticket file naming - Keep zero-padded numeric prefixes in execution order: `01-...`, `02-...`, `03-...` @@ -103,6 +121,15 @@ Use these ticket statuses: ## Execution consumption rules +When `/workflows-work` receives `index.md`: + +- treat the index as the authoritative ticket queue for this ticket set +- read `last_completed_batch` to choose the next unfinished batch +- load only the tickets named in that next batch +- execute multiple tickets together only when the batch is explicitly declared as parallel-safe and the index records why the file sets do not conflict +- if the index or ticket files leave overlap safety ambiguous, collapse that batch to sequential execution instead of guessing +- update batch progress in `index.md`, and increment `last_completed_batch` only after every ticket in the batch reaches `completed` + When `/workflows-work` receives a ticket file: - treat that ticket as one pre-scoped execution unit @@ -117,4 +144,6 @@ When `/workflows-review` or `ticket-flow-auditor` consumes ticket artifacts, ver - the ticket still matches the parent plan and architecture - the implementation stayed inside the ticket scope fence unless the change was explicitly documented - dependency order and status changes stayed honest +- the dependency graph and batch partition still match the ticket files +- parallel-safe batches really stay file-disjoint and race-safe - evidence matches the ticket's stated acceptance criteria and test command diff --git a/.github/skills/workflows-plan/references/ticketization-contract.md b/.github/skills/workflows-plan/references/ticketization-contract.md index a05c2e2..6545e3f 100644 --- a/.github/skills/workflows-plan/references/ticketization-contract.md +++ b/.github/skills/workflows-plan/references/ticketization-contract.md @@ -38,7 +38,7 @@ docs/tickets/YYYY-MM-DD-/ Required outputs: -- `index.md` -- ticket-set summary, dependency view, and run guidance +- `index.md` -- ticket-set summary, dependency graph, execution batches, run guidance, and progress pointer - `NN-.md` -- one file per ticket in execution order Record the ticket set back into the plan as: @@ -63,6 +63,30 @@ The first version is **local-artifact first**. - Split a packet when one ticket would deliver more than one meaningful outcome, blur ownership, or bury the feature home. - Keep the first ticket a tracer bullet when the selected execution shape is `vertical-slices`. +## Dependency graph and execution batches + +Ticketization must build a conservative ticket dependency graph and then derive execution batches from it. + +- Start from each ticket's explicit `depends_on` edges. +- Then partition tickets into `Batch 1`, `Batch 2`, and so on, where every ticket in a batch depends only on earlier batches. +- A batch may contain multiple tickets only when their declared `files` sets do not overlap and there is no unresolved shared mutable state, migration risk, or boundary ambiguity between them. +- If two tickets might race, overwrite each other, or require the same shared adapter/config surface, keep them in separate sequential batches. +- **Default-to-sequential rule:** if batch safety is uncertain, emit singleton batches instead of guessing at parallelism. + +The index file becomes the authoritative execution queue for ticketized work. `/workflows-work` should be able to read `index.md`, find the next unfinished batch, and execute only that batch without recomputing the whole backlog. + +## Required index progress state + +`index.md` must record batch progress explicitly so ticketized execution can resume from the index alone. + +Required fields: + +- `last_completed_batch` -- integer counter; `0` means no batches completed yet +- `total_batches` -- total number of execution batches in the ticket set +- batch-level status view showing which batches are pending, in progress, blocked, or completed + +`last_completed_batch` advances only after every ticket in that batch is complete. If any ticket in the batch is blocked or still running, do not advance the counter. + ## Required ticket-local context Every generated ticket must carry a compact execution packet that can stand on its own. @@ -116,6 +140,9 @@ That review must check: - feature-home clarity - shared/global boundary honesty - blocker and dependency correctness +- dependency graph correctness +- execution batch safety and parallelization honesty +- file-overlap conflicts between tickets claimed to be parallel-safe - context completeness - ticket size and coupling quality @@ -133,5 +160,7 @@ The ticketization contract is satisfied only when: - the priming skill and ticket schema contract are explicit - local-artifact-first behavior is explicit - `tickets_ref` recording is explicit +- the dependency graph and conservative batch partition are explicit +- the index progress pointer is explicit - each ticket's required local context is explicit - the final ticket-set review step and reviewer are explicit diff --git a/.github/skills/workflows-review/references/execution-agent-prompt.md b/.github/skills/workflows-review/references/execution-agent-prompt.md index 91b109c..14d9778 100644 --- a/.github/skills/workflows-review/references/execution-agent-prompt.md +++ b/.github/skills/workflows-review/references/execution-agent-prompt.md @@ -9,23 +9,17 @@ platforms: # Execution Agent Prompt Template -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. +This template is the **injected context packet** that `/workflows-work` passes into the named `execution-agent`. -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Canonical execution rules live in `agents/workflow/execution-agent.md`.** `/workflows-work` must load that bundled agent template, then inject the fully populated context packet below when dispatching `Task(execution-agent, prompt=scoped_prompt)`. -**Template authority:** This file is the only valid source for execution-subagent prompts. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. - ---- - -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator so the exact context scaffold ships with generated workflow bundles. -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. - -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Scaffold authority:** This file is the only valid source for the injected execution context packet. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. --- -You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. +The bundled `execution-agent` enforces clean-code, DRY, SOLID, feature-home boundary discipline, doc blocks above non-trivial functions/classes, imports at the top of files unless a real exception exists, explicit failure handling, and the structured execution report contract. Populate the scaffold below completely before dispatch. ## Your Unit @@ -90,189 +84,3 @@ Use `references/tdd-evidence-contract.md` as the shared source of truth for cont - `Red` and `Green` prove behavior coverage. - `Post-Refactor Green` proves cleanup safety. - If no cleanup was needed, still rerun and say so. - ---- - -## Phase 1: Understand Before Building - -Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. - -Before proceeding, confirm the prompt still contains these sections: **Your Task**, **Why This Task Exists**, **Architectural Context**, **Learnings from Previous Tasks**, **Project Conventions**, and the four numbered phases below. If any section is missing or any placeholder is unresolved, stop and report the template integrity problem. - -**If anything is unclear, ambiguous, or could be interpreted multiple ways:** -- List your questions explicitly -- State the assumptions you would make if proceeding without answers -- Ask for clarification before starting work - -**If everything is clear:** -- State your interpretation of the requirements in 2-3 sentences -- State how this unit serves the overall user story (from the WHY context) -- List any assumptions you are making (even obvious ones) -- Proceed to Phase 2 - -Do NOT skip this phase. A few minutes of clarification prevents hours of rework. It is always better to ask than to guess. - -## Phase 2: Implement - -{{TDD_SECTION}} - -### While Implementing - -- If you encounter something unexpected or unclear, **STOP and ask** rather than guessing -- Follow existing codebase patterns -- do not invent new conventions -- Reuse before you create: before adding a new function, helper, class, interface, type, or utility, search the touched area for an existing abstraction you can reuse or extend safely -- Apply DRY by reason-to-change: extract shared code only when the behavior and future changes truly belong together; keep duplication local when forced abstractions would hide intent -- Apply SOLID deliberately: introduce new classes or interfaces only when they clarify responsibilities, dependency direction, or substitution boundaries; if a new abstraction does not clearly improve the design, do not add it -- Prefer direct, readable code over helper stacks, wrappers, manager classes, or indirection layers created "just in case" -- Variable, function, class, and interface names must be explicit and unambiguous. Avoid abbreviations like `cb`, `ctx`, `svc`, `obj`, or `tmp` when a clearer name fits the scope -- Keep changes minimal -- implement what is asked, nothing more (YAGNI) -- Do not add "nice to have" features not in the success criteria -- Commit after each logical unit of complete work using the project's commit convention - -### On Test Failure - -If tests fail after implementation: -1. Read the error message carefully -- understand what failed and why -2. Analyze whether the failure is in your implementation or in the test -3. Fix the issue -4. Re-run the test command -5. Repeat up to 3 total attempts -6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly - -## Phase 3: Self-Review - -Before reporting back, review your own work with fresh eyes. Go through each checklist item honestly: - -**Completeness:** -- [ ] Did I implement EVERYTHING in the success criteria? -- [ ] Are there edge cases the criteria imply that I did not handle? -- [ ] Did I miss any requirements? - -**Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? -- [ ] Would a user achieve the stated outcome with this code? -- [ ] Did I build anything that doesn't trace back to the success criteria or user story? - -**Quality:** -- [ ] Do names accurately describe what things do (not how they work)? -- [ ] Did I reuse existing code where it already solved this problem cleanly? -- [ ] If I introduced a new abstraction, does it have a clear SOLID-based reason to exist? -- [ ] Did I avoid vague or abbreviated names in favor of explicit intent? -- [ ] Did the core business logic stay in the declared feature home unless the architecture handoff justified a shared/global extraction? -- [ ] Is the code clean and maintainable? -- [ ] Does it follow existing codebase patterns? -- [ ] Is error handling appropriate? - -**Discipline:** -- [ ] Did I avoid overbuilding (YAGNI)? -- [ ] Did I ONLY build what was requested? -- [ ] No "nice to have" additions? -- [ ] No unnecessary abstractions or premature optimization? - -**Testing:** -- [ ] Do tests verify actual behavior (not just mock behavior)? -- [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? - -**Evidence:** -- [ ] Can I show actual test output (not just "tests pass")? -- [ ] For UI changes, do I have a screenshot or visual evidence? -- [ ] For API changes, do I have actual request/response data? - -If you find issues during self-review, **fix them now** before reporting. Do not report known issues -- fix them first. - -## Phase 4: Report - -Return a structured execution report in exactly this format: - -```markdown -## Execution Report: [Unit Title] - -### Interpretation -[Your 2-3 sentence interpretation of what was asked] - -### Purpose Served -[Which user story aspect / success criterion this unit delivers, from the WHY context] - -### Assumptions Made -- [List each assumption, even if obvious] - -### What Was Implemented -[Describe what you built and how it works] - -### Files Changed -- `path/to/file` -- created/modified (brief description of change) - -### Test Results -- Command: `[test command]` -- Result: PASS/FAIL -- Attempts: [n] -- Output: -``` -[paste ACTUAL test output here] -``` - -### TDD Evidence -- **Red** - - Command: `[red command]` - - Result: PASS/FAIL - - Evidence: [why this proves the missing behavior existed before the implementation] -- **Green** - - Command: `[green command]` - - Result: PASS/FAIL - - Evidence: [why this proves the requested behavior now passes] -- **Post-Refactor Green** - - Command: `[post-refactor command]` - - Result: PASS/FAIL - - Evidence: [why this proves cleanup/refactor work preserved behavior] - -[If no cleanup was needed, still rerun and say so.] - -### Problems Encountered -[For each problem encountered during implementation:] -- **Error:** [exact error message] -- **Root cause:** [your analysis of why it happened] -- **Fix:** [what you did to resolve it] - -[If no problems: "None"] - -### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] - -[If none: "None"] - -### Self-Review Findings -- [Issues found and fixed during self-review] - -[If none: "Self-review passed -- no issues found"] -``` - ---- - -## Standard Implementation Section - -_This section is included when TDD is not enabled._ - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. Implement the task following project conventions -4. Write tests matching the success criteria -5. Run the test command: `{{TEST_COMMAND}}` -6. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) - ---- - -## TDD Implementation Section - -_This section is included when `tdd_enabled: true` is configured._ - -Follow the red-green-refactor cycle strictly: - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. **RED:** Write tests FIRST based on the success criteria. Run them. They MUST fail -- and they must fail for the RIGHT reason (the behavior is missing, not import errors or syntax problems) -4. **GREEN:** Write the MINIMAL production code needed to make the tests pass. No more than what is necessary. -5. Run tests. They MUST pass. -6. **REFACTOR:** Clean up if needed. Tests must still pass after refactoring. - -**Iron rule:** If at any point you find yourself writing production code before a failing test exists for that behavior, STOP. Write the test first. This is not a suggestion -- it is the process. diff --git a/.github/skills/workflows-review/references/ticket-execution-contract.md b/.github/skills/workflows-review/references/ticket-execution-contract.md index 563c72f..39df591 100644 --- a/.github/skills/workflows-review/references/ticket-execution-contract.md +++ b/.github/skills/workflows-review/references/ticket-execution-contract.md @@ -21,14 +21,32 @@ Required files: `index.md` must include: +- frontmatter with: + - `plan_ref` + - `architecture_ref` or an explicit handoff note + - `execution_shape` + - `ticket_set_status` -- `ready | in_progress | blocked | completed` + - `last_completed_batch` + - `total_batches` - plan path - architecture artifact path or explicit handoff note - execution shape - ticket order -- dependency view +- dependency graph +- execution batches +- file-overlap safety notes for every multi-ticket batch - blocker summary - review summary split into `Blocking gaps` and `Recommendations` +Recommended body shape: + +1. `# Ticket Set: ` +2. `## Dependency Graph` +3. `## Execution Batches` +4. `## Ticket Table` +5. `## Blockers` +6. `## Review Summary` + ## Ticket file naming - Keep zero-padded numeric prefixes in execution order: `01-...`, `02-...`, `03-...` @@ -103,6 +121,15 @@ Use these ticket statuses: ## Execution consumption rules +When `/workflows-work` receives `index.md`: + +- treat the index as the authoritative ticket queue for this ticket set +- read `last_completed_batch` to choose the next unfinished batch +- load only the tickets named in that next batch +- execute multiple tickets together only when the batch is explicitly declared as parallel-safe and the index records why the file sets do not conflict +- if the index or ticket files leave overlap safety ambiguous, collapse that batch to sequential execution instead of guessing +- update batch progress in `index.md`, and increment `last_completed_batch` only after every ticket in the batch reaches `completed` + When `/workflows-work` receives a ticket file: - treat that ticket as one pre-scoped execution unit @@ -117,4 +144,6 @@ When `/workflows-review` or `ticket-flow-auditor` consumes ticket artifacts, ver - the ticket still matches the parent plan and architecture - the implementation stayed inside the ticket scope fence unless the change was explicitly documented - dependency order and status changes stayed honest +- the dependency graph and batch partition still match the ticket files +- parallel-safe batches really stay file-disjoint and race-safe - evidence matches the ticket's stated acceptance criteria and test command diff --git a/.github/skills/workflows-review/references/ticketization-contract.md b/.github/skills/workflows-review/references/ticketization-contract.md index a05c2e2..6545e3f 100644 --- a/.github/skills/workflows-review/references/ticketization-contract.md +++ b/.github/skills/workflows-review/references/ticketization-contract.md @@ -38,7 +38,7 @@ docs/tickets/YYYY-MM-DD-/ Required outputs: -- `index.md` -- ticket-set summary, dependency view, and run guidance +- `index.md` -- ticket-set summary, dependency graph, execution batches, run guidance, and progress pointer - `NN-.md` -- one file per ticket in execution order Record the ticket set back into the plan as: @@ -63,6 +63,30 @@ The first version is **local-artifact first**. - Split a packet when one ticket would deliver more than one meaningful outcome, blur ownership, or bury the feature home. - Keep the first ticket a tracer bullet when the selected execution shape is `vertical-slices`. +## Dependency graph and execution batches + +Ticketization must build a conservative ticket dependency graph and then derive execution batches from it. + +- Start from each ticket's explicit `depends_on` edges. +- Then partition tickets into `Batch 1`, `Batch 2`, and so on, where every ticket in a batch depends only on earlier batches. +- A batch may contain multiple tickets only when their declared `files` sets do not overlap and there is no unresolved shared mutable state, migration risk, or boundary ambiguity between them. +- If two tickets might race, overwrite each other, or require the same shared adapter/config surface, keep them in separate sequential batches. +- **Default-to-sequential rule:** if batch safety is uncertain, emit singleton batches instead of guessing at parallelism. + +The index file becomes the authoritative execution queue for ticketized work. `/workflows-work` should be able to read `index.md`, find the next unfinished batch, and execute only that batch without recomputing the whole backlog. + +## Required index progress state + +`index.md` must record batch progress explicitly so ticketized execution can resume from the index alone. + +Required fields: + +- `last_completed_batch` -- integer counter; `0` means no batches completed yet +- `total_batches` -- total number of execution batches in the ticket set +- batch-level status view showing which batches are pending, in progress, blocked, or completed + +`last_completed_batch` advances only after every ticket in that batch is complete. If any ticket in the batch is blocked or still running, do not advance the counter. + ## Required ticket-local context Every generated ticket must carry a compact execution packet that can stand on its own. @@ -116,6 +140,9 @@ That review must check: - feature-home clarity - shared/global boundary honesty - blocker and dependency correctness +- dependency graph correctness +- execution batch safety and parallelization honesty +- file-overlap conflicts between tickets claimed to be parallel-safe - context completeness - ticket size and coupling quality @@ -133,5 +160,7 @@ The ticketization contract is satisfied only when: - the priming skill and ticket schema contract are explicit - local-artifact-first behavior is explicit - `tickets_ref` recording is explicit +- the dependency graph and conservative batch partition are explicit +- the index progress pointer is explicit - each ticket's required local context is explicit - the final ticket-set review step and reviewer are explicit diff --git a/.github/skills/workflows-to-issues/SKILL.md b/.github/skills/workflows-to-issues/SKILL.md index ec55e75..264fe19 100644 --- a/.github/skills/workflows-to-issues/SKILL.md +++ b/.github/skills/workflows-to-issues/SKILL.md @@ -84,6 +84,8 @@ Rules: - size by coupling and boundary clarity, not by arbitrary task counts - keep tracer bullets first when the mode is `vertical-slices` - surface uncertainty instead of hiding it when ticketizing directly after `/workflows-plan` +- build a conservative dependency graph and execution batches while ticketizing +- default to sequential singleton batches whenever safe parallelism is unclear Each ticket must include the required ticket-local context defined in `ticketization-contract.md`, and each ticket file must follow the exact frontmatter/body shape in `ticket-execution-contract.md`. @@ -112,6 +114,15 @@ Required files: - `index.md` - one `NN-.md` file per ticket +`index.md` is not just a directory listing. It is the authoritative ticket-set graph and execution cursor. It must include the dependency graph, the conservative batch partition, file-overlap safety notes for every multi-ticket batch, and an updateable `last_completed_batch` counter that `/workflows-work` can use to resume from the next batch. + +When partitioning tickets into batches: + +- only group tickets together when all dependencies are satisfied by earlier batches +- only group tickets together when their declared `files` sets do not overlap +- treat shared mutable state, config churn, migrations, and boundary ambiguity as reasons to split the batch +- if unsure, split into sequential batches instead of inventing parallelism + Write every ticket using the exact schema from `ticket-execution-contract.md`, including its required frontmatter, section order, status lifecycle, and parent refs. Then record `tickets_ref` back into the plan frontmatter when possible. If frontmatter cannot be updated safely, add the ticket-set path under `## Related Artifacts`. @@ -135,6 +146,8 @@ Check for: - feature-home drift - shared/global drift - missing blockers or bad dependency ordering +- bad dependency graph layering or unsafe batch partitioning +- tickets grouped in parallel despite overlapping files or shared mutable surfaces - oversized tickets - tickets with weak WHY tracing - missing acceptance criteria or evidence commands @@ -154,6 +167,8 @@ A complete run must leave behind: - a local ticket set under `docs/tickets/` - `tickets_ref` or a labeled related-artifact link back into the plan - explicit blocker/dependency ordering +- a dependency graph plus conservative execution batches in `index.md` +- an updateable `last_completed_batch` progress pointer in `index.md` - compact ticket-local context packs - a final ticket-set review result from `ticket-flow-auditor` @@ -172,7 +187,7 @@ Execution readiness: - Recommendations: Recommended next step: -- Run `/workflows-work` on one ticket file once ticket-scoped execution is supported, or use the generated tickets as the scoped execution packet source for the next implementation pass. +- Run `/workflows-work` on the generated `index.md` so execution can pick the next safe batch automatically, or target one ticket file manually when you need to force a narrower run. ``` NEVER CODE! This phase shapes execution artifacts and context packets. It does not implement the feature itself. diff --git a/.github/skills/workflows-to-issues/references/execution-agent-prompt.md b/.github/skills/workflows-to-issues/references/execution-agent-prompt.md index 91b109c..14d9778 100644 --- a/.github/skills/workflows-to-issues/references/execution-agent-prompt.md +++ b/.github/skills/workflows-to-issues/references/execution-agent-prompt.md @@ -9,23 +9,17 @@ platforms: # Execution Agent Prompt Template -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. +This template is the **injected context packet** that `/workflows-work` passes into the named `execution-agent`. -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Canonical execution rules live in `agents/workflow/execution-agent.md`.** `/workflows-work` must load that bundled agent template, then inject the fully populated context packet below when dispatching `Task(execution-agent, prompt=scoped_prompt)`. -**Template authority:** This file is the only valid source for execution-subagent prompts. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. - ---- - -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator so the exact context scaffold ships with generated workflow bundles. -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. - -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Scaffold authority:** This file is the only valid source for the injected execution context packet. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. --- -You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. +The bundled `execution-agent` enforces clean-code, DRY, SOLID, feature-home boundary discipline, doc blocks above non-trivial functions/classes, imports at the top of files unless a real exception exists, explicit failure handling, and the structured execution report contract. Populate the scaffold below completely before dispatch. ## Your Unit @@ -90,189 +84,3 @@ Use `references/tdd-evidence-contract.md` as the shared source of truth for cont - `Red` and `Green` prove behavior coverage. - `Post-Refactor Green` proves cleanup safety. - If no cleanup was needed, still rerun and say so. - ---- - -## Phase 1: Understand Before Building - -Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. - -Before proceeding, confirm the prompt still contains these sections: **Your Task**, **Why This Task Exists**, **Architectural Context**, **Learnings from Previous Tasks**, **Project Conventions**, and the four numbered phases below. If any section is missing or any placeholder is unresolved, stop and report the template integrity problem. - -**If anything is unclear, ambiguous, or could be interpreted multiple ways:** -- List your questions explicitly -- State the assumptions you would make if proceeding without answers -- Ask for clarification before starting work - -**If everything is clear:** -- State your interpretation of the requirements in 2-3 sentences -- State how this unit serves the overall user story (from the WHY context) -- List any assumptions you are making (even obvious ones) -- Proceed to Phase 2 - -Do NOT skip this phase. A few minutes of clarification prevents hours of rework. It is always better to ask than to guess. - -## Phase 2: Implement - -{{TDD_SECTION}} - -### While Implementing - -- If you encounter something unexpected or unclear, **STOP and ask** rather than guessing -- Follow existing codebase patterns -- do not invent new conventions -- Reuse before you create: before adding a new function, helper, class, interface, type, or utility, search the touched area for an existing abstraction you can reuse or extend safely -- Apply DRY by reason-to-change: extract shared code only when the behavior and future changes truly belong together; keep duplication local when forced abstractions would hide intent -- Apply SOLID deliberately: introduce new classes or interfaces only when they clarify responsibilities, dependency direction, or substitution boundaries; if a new abstraction does not clearly improve the design, do not add it -- Prefer direct, readable code over helper stacks, wrappers, manager classes, or indirection layers created "just in case" -- Variable, function, class, and interface names must be explicit and unambiguous. Avoid abbreviations like `cb`, `ctx`, `svc`, `obj`, or `tmp` when a clearer name fits the scope -- Keep changes minimal -- implement what is asked, nothing more (YAGNI) -- Do not add "nice to have" features not in the success criteria -- Commit after each logical unit of complete work using the project's commit convention - -### On Test Failure - -If tests fail after implementation: -1. Read the error message carefully -- understand what failed and why -2. Analyze whether the failure is in your implementation or in the test -3. Fix the issue -4. Re-run the test command -5. Repeat up to 3 total attempts -6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly - -## Phase 3: Self-Review - -Before reporting back, review your own work with fresh eyes. Go through each checklist item honestly: - -**Completeness:** -- [ ] Did I implement EVERYTHING in the success criteria? -- [ ] Are there edge cases the criteria imply that I did not handle? -- [ ] Did I miss any requirements? - -**Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? -- [ ] Would a user achieve the stated outcome with this code? -- [ ] Did I build anything that doesn't trace back to the success criteria or user story? - -**Quality:** -- [ ] Do names accurately describe what things do (not how they work)? -- [ ] Did I reuse existing code where it already solved this problem cleanly? -- [ ] If I introduced a new abstraction, does it have a clear SOLID-based reason to exist? -- [ ] Did I avoid vague or abbreviated names in favor of explicit intent? -- [ ] Did the core business logic stay in the declared feature home unless the architecture handoff justified a shared/global extraction? -- [ ] Is the code clean and maintainable? -- [ ] Does it follow existing codebase patterns? -- [ ] Is error handling appropriate? - -**Discipline:** -- [ ] Did I avoid overbuilding (YAGNI)? -- [ ] Did I ONLY build what was requested? -- [ ] No "nice to have" additions? -- [ ] No unnecessary abstractions or premature optimization? - -**Testing:** -- [ ] Do tests verify actual behavior (not just mock behavior)? -- [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? - -**Evidence:** -- [ ] Can I show actual test output (not just "tests pass")? -- [ ] For UI changes, do I have a screenshot or visual evidence? -- [ ] For API changes, do I have actual request/response data? - -If you find issues during self-review, **fix them now** before reporting. Do not report known issues -- fix them first. - -## Phase 4: Report - -Return a structured execution report in exactly this format: - -```markdown -## Execution Report: [Unit Title] - -### Interpretation -[Your 2-3 sentence interpretation of what was asked] - -### Purpose Served -[Which user story aspect / success criterion this unit delivers, from the WHY context] - -### Assumptions Made -- [List each assumption, even if obvious] - -### What Was Implemented -[Describe what you built and how it works] - -### Files Changed -- `path/to/file` -- created/modified (brief description of change) - -### Test Results -- Command: `[test command]` -- Result: PASS/FAIL -- Attempts: [n] -- Output: -``` -[paste ACTUAL test output here] -``` - -### TDD Evidence -- **Red** - - Command: `[red command]` - - Result: PASS/FAIL - - Evidence: [why this proves the missing behavior existed before the implementation] -- **Green** - - Command: `[green command]` - - Result: PASS/FAIL - - Evidence: [why this proves the requested behavior now passes] -- **Post-Refactor Green** - - Command: `[post-refactor command]` - - Result: PASS/FAIL - - Evidence: [why this proves cleanup/refactor work preserved behavior] - -[If no cleanup was needed, still rerun and say so.] - -### Problems Encountered -[For each problem encountered during implementation:] -- **Error:** [exact error message] -- **Root cause:** [your analysis of why it happened] -- **Fix:** [what you did to resolve it] - -[If no problems: "None"] - -### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] - -[If none: "None"] - -### Self-Review Findings -- [Issues found and fixed during self-review] - -[If none: "Self-review passed -- no issues found"] -``` - ---- - -## Standard Implementation Section - -_This section is included when TDD is not enabled._ - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. Implement the task following project conventions -4. Write tests matching the success criteria -5. Run the test command: `{{TEST_COMMAND}}` -6. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) - ---- - -## TDD Implementation Section - -_This section is included when `tdd_enabled: true` is configured._ - -Follow the red-green-refactor cycle strictly: - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. **RED:** Write tests FIRST based on the success criteria. Run them. They MUST fail -- and they must fail for the RIGHT reason (the behavior is missing, not import errors or syntax problems) -4. **GREEN:** Write the MINIMAL production code needed to make the tests pass. No more than what is necessary. -5. Run tests. They MUST pass. -6. **REFACTOR:** Clean up if needed. Tests must still pass after refactoring. - -**Iron rule:** If at any point you find yourself writing production code before a failing test exists for that behavior, STOP. Write the test first. This is not a suggestion -- it is the process. diff --git a/.github/skills/workflows-to-issues/references/ticket-execution-contract.md b/.github/skills/workflows-to-issues/references/ticket-execution-contract.md index 563c72f..39df591 100644 --- a/.github/skills/workflows-to-issues/references/ticket-execution-contract.md +++ b/.github/skills/workflows-to-issues/references/ticket-execution-contract.md @@ -21,14 +21,32 @@ Required files: `index.md` must include: +- frontmatter with: + - `plan_ref` + - `architecture_ref` or an explicit handoff note + - `execution_shape` + - `ticket_set_status` -- `ready | in_progress | blocked | completed` + - `last_completed_batch` + - `total_batches` - plan path - architecture artifact path or explicit handoff note - execution shape - ticket order -- dependency view +- dependency graph +- execution batches +- file-overlap safety notes for every multi-ticket batch - blocker summary - review summary split into `Blocking gaps` and `Recommendations` +Recommended body shape: + +1. `# Ticket Set: ` +2. `## Dependency Graph` +3. `## Execution Batches` +4. `## Ticket Table` +5. `## Blockers` +6. `## Review Summary` + ## Ticket file naming - Keep zero-padded numeric prefixes in execution order: `01-...`, `02-...`, `03-...` @@ -103,6 +121,15 @@ Use these ticket statuses: ## Execution consumption rules +When `/workflows-work` receives `index.md`: + +- treat the index as the authoritative ticket queue for this ticket set +- read `last_completed_batch` to choose the next unfinished batch +- load only the tickets named in that next batch +- execute multiple tickets together only when the batch is explicitly declared as parallel-safe and the index records why the file sets do not conflict +- if the index or ticket files leave overlap safety ambiguous, collapse that batch to sequential execution instead of guessing +- update batch progress in `index.md`, and increment `last_completed_batch` only after every ticket in the batch reaches `completed` + When `/workflows-work` receives a ticket file: - treat that ticket as one pre-scoped execution unit @@ -117,4 +144,6 @@ When `/workflows-review` or `ticket-flow-auditor` consumes ticket artifacts, ver - the ticket still matches the parent plan and architecture - the implementation stayed inside the ticket scope fence unless the change was explicitly documented - dependency order and status changes stayed honest +- the dependency graph and batch partition still match the ticket files +- parallel-safe batches really stay file-disjoint and race-safe - evidence matches the ticket's stated acceptance criteria and test command diff --git a/.github/skills/workflows-to-issues/references/ticketization-contract.md b/.github/skills/workflows-to-issues/references/ticketization-contract.md index a05c2e2..6545e3f 100644 --- a/.github/skills/workflows-to-issues/references/ticketization-contract.md +++ b/.github/skills/workflows-to-issues/references/ticketization-contract.md @@ -38,7 +38,7 @@ docs/tickets/YYYY-MM-DD-/ Required outputs: -- `index.md` -- ticket-set summary, dependency view, and run guidance +- `index.md` -- ticket-set summary, dependency graph, execution batches, run guidance, and progress pointer - `NN-.md` -- one file per ticket in execution order Record the ticket set back into the plan as: @@ -63,6 +63,30 @@ The first version is **local-artifact first**. - Split a packet when one ticket would deliver more than one meaningful outcome, blur ownership, or bury the feature home. - Keep the first ticket a tracer bullet when the selected execution shape is `vertical-slices`. +## Dependency graph and execution batches + +Ticketization must build a conservative ticket dependency graph and then derive execution batches from it. + +- Start from each ticket's explicit `depends_on` edges. +- Then partition tickets into `Batch 1`, `Batch 2`, and so on, where every ticket in a batch depends only on earlier batches. +- A batch may contain multiple tickets only when their declared `files` sets do not overlap and there is no unresolved shared mutable state, migration risk, or boundary ambiguity between them. +- If two tickets might race, overwrite each other, or require the same shared adapter/config surface, keep them in separate sequential batches. +- **Default-to-sequential rule:** if batch safety is uncertain, emit singleton batches instead of guessing at parallelism. + +The index file becomes the authoritative execution queue for ticketized work. `/workflows-work` should be able to read `index.md`, find the next unfinished batch, and execute only that batch without recomputing the whole backlog. + +## Required index progress state + +`index.md` must record batch progress explicitly so ticketized execution can resume from the index alone. + +Required fields: + +- `last_completed_batch` -- integer counter; `0` means no batches completed yet +- `total_batches` -- total number of execution batches in the ticket set +- batch-level status view showing which batches are pending, in progress, blocked, or completed + +`last_completed_batch` advances only after every ticket in that batch is complete. If any ticket in the batch is blocked or still running, do not advance the counter. + ## Required ticket-local context Every generated ticket must carry a compact execution packet that can stand on its own. @@ -116,6 +140,9 @@ That review must check: - feature-home clarity - shared/global boundary honesty - blocker and dependency correctness +- dependency graph correctness +- execution batch safety and parallelization honesty +- file-overlap conflicts between tickets claimed to be parallel-safe - context completeness - ticket size and coupling quality @@ -133,5 +160,7 @@ The ticketization contract is satisfied only when: - the priming skill and ticket schema contract are explicit - local-artifact-first behavior is explicit - `tickets_ref` recording is explicit +- the dependency graph and conservative batch partition are explicit +- the index progress pointer is explicit - each ticket's required local context is explicit - the final ticket-set review step and reviewer are explicit diff --git a/.github/skills/workflows-work/SKILL.md b/.github/skills/workflows-work/SKILL.md index 9d43fe8..07f0ef7 100644 --- a/.github/skills/workflows-work/SKILL.md +++ b/.github/skills/workflows-work/SKILL.md @@ -4,7 +4,7 @@ description: Execute work plans while maintaining WHY tracing from problem narra --- ## Arguments -[plan file, ticket file, specification, or todo file path] [--review-mode bulk|inline|both] +[plan file, ticket index, ticket file, specification, or todo file path] [--review-mode bulk|inline|both] # Work Plan Execution Command @@ -14,9 +14,11 @@ Execute a work plan while maintaining WHY tracing from problem narrative through ## Introduction -This command takes a work document (plan, ticket, specification, or todo file) and executes it systematically using a **subagent orchestration model**. The orchestrator (this conversation) loads or adapts the source into execution units and delegates each unit to a focused subagent. `vertical-slices` is the default execution shape, but `infra-track` and `fix-batch` are also valid when declared by the plan. When the input is a ticket file, that ticket becomes the primary execution packet and the parent plan/architecture artifacts provide deeper context instead of re-expanding the whole backlog. Each subagent follows a standardized 4-phase protocol (understand, implement, self-review, report) defined in the execution agent prompt template. +This command takes a work document (plan, ticket index, ticket, specification, or todo file) and executes it systematically using a **subagent orchestration model**. The orchestrator (this conversation) loads or adapts the source into execution units and delegates each unit to a focused subagent. `vertical-slices` is the default execution shape, but `infra-track` and `fix-batch` are also valid when declared by the plan. When the input is a ticket index, that index becomes the authoritative execution queue and `/workflows-work` selects the next safe batch from it. When the input is a ticket file, that ticket becomes the primary execution packet and the parent plan/architecture artifacts provide deeper context instead of re-expanding the whole backlog. Every implementation unit, retry, and regression repair in this workflow is delegated through the named `execution-agent`, which follows a standardized 4-phase protocol (understand, implement, self-review, report). -**WHY-grounded execution:** Every subagent receives the source plan's WHY context -- the problem narrative, user story, architectural context, the architecture handoff contract, and which success criterion their specific unit serves. When execution starts from a ticket, the ticket's local context packet stays primary, while `plan_ref`, `tickets_ref`, and `architecture_ref` remain the deeper-dive path. This prevents implementation drift where technically correct code fails to deliver the user's actual need. The orchestrator is the guardian of WHY: it extracts purpose from the plan, threads it through every unit prompt, and validates that the combined output delivers the stated user story. +**WHY-grounded execution:** Every subagent receives the source plan's WHY context -- the problem narrative, user story, architectural context, the architecture handoff contract, and which success criterion their specific unit serves. When execution starts from a ticket index, the index decides the next batch while the selected tickets provide the local execution packets. When execution starts from a ticket file, the ticket's local context packet stays primary, while `plan_ref`, `tickets_ref`, and `architecture_ref` remain the deeper-dive path. This prevents implementation drift where technically correct code fails to deliver the user's actual need. The orchestrator is the guardian of WHY: it extracts purpose from the plan, threads it through every unit prompt, and validates that the combined output delivers the stated user story. + +**Execution delegation rule:** Ticket execution must always go through the bundled `execution-agent`. Do not route ticket implementation, ticket fix loops, or ticket regression repairs through `general-purpose` or any ad hoc worker prompt. ### Review Mode @@ -41,13 +43,19 @@ If no `--review-mode` is specified, check `compound-engineering.local.md` for a 1. **Read the work document and extract WHY + guardrail context** - Read the work document completely - - If the input is a ticket file, first load `references/ticket-execution-contract.md` and verify the ticket includes the required frontmatter and body sections. Stop and send the user back to `/workflows-to-issues` if the ticket contract is missing or malformed. + - If the input is a ticket index or ticket file, first load `references/ticket-execution-contract.md` and verify the artifact includes the required index or ticket contract. Stop and send the user back to `/workflows-to-issues` if the contract is missing or malformed. + - If the input is a ticket index, extract: + - `plan_ref`, `architecture_ref`, `execution_shape`, `ticket_set_status`, `last_completed_batch`, and `total_batches` + - the dependency graph and execution-batch table + - the next unfinished batch and the ticket files it names + - the file-overlap safety notes proving whether the batch is truly parallel-safe + - If the input is a ticket index, use the index as the source of truth for ordering and batch selection. Load only the ticket files named in the next batch before continuing. - If the input is a ticket file, extract: - `plan_ref`, `tickets_ref`, `architecture_ref`, and `source_packet_ref` - `feature_home`, `depends_on`, `dependency_type`, `files`, `test_command`, and `status` - the compact packet in `## Local Context` - the parent trace in `## Parent Refs` and `## Deeper-Dive Refs` - - If the input is a ticket file, load the parent plan and architecture artifact from the recorded refs before continuing. The ticket is the primary execution unit; the parent artifacts provide WHY and boundary context. + - If the input is a ticket index or ticket file, load the parent plan and architecture artifact from the recorded refs before continuing. The index chooses the batch; the ticket files remain the execution packets; the parent artifacts provide WHY and boundary context. - **Extract WHY artifacts** from the parent plan (these ground everything that follows): - **Problem Narrative** -- why this work exists, what pain it solves - **User Story** -- who benefits and what outcome they get @@ -62,11 +70,11 @@ If no `--review-mode` is specified, check `compound-engineering.local.md` for a - If the resolved contract weakens Ralph/unit+e2e without a justified exception in the plan, stop and ask for the plan contract to be corrected before execution - If `docs/constitution.md` exists, read it and extract the active constitution version, applicable principles, execution baselines, and approval rules. If the plan lists `constitution_waivers`, honor only those explicit exceptions. - If the parent plan has a `brainstorm_ref:` path, read that brainstorm document too for richer WHY context - - If the parent plan has an `architecture_ref:` path, the ticket has an `architecture_ref`, or `## Related Artifacts` points to `docs/architecture/`, read that artifact and extract feature homes, shared/global decisions, context tiers, drift checks, deletion tests, interfaces as test surfaces, seams, adapters, contracts, deepening candidates, and downstream work/review guidance + - If the parent plan has an `architecture_ref:` path, the selected ticket or index has an `architecture_ref`, or `## Related Artifacts` points to `docs/architecture/`, read that artifact and extract feature homes, shared/global decisions, context tiers, drift checks, deletion tests, interfaces as test surfaces, seams, adapters, contracts, deepening candidates, and downstream work/review guidance - If no architecture artifact is recorded, assemble an explicit architecture handoff contract from the parent plan's Architectural Context, Key Decisions, Constitution Alignment, brainstorm context, execution constraints, and `references/vertical-slice-architecture.md`. Tell the user this is a fallback and recommend `/workflows-architecture` if boundaries are still unsettled. - Review any other references or links provided in the plan or ticket - If the constitution requires explicit approval for any part of the planned work (for example, risky writes, schema changes, auth changes, or scope expansions), surface that before execution starts - - If the document is not already in a declared execution shape and is not a valid ticket artifact, treat it as **legacy input**. Before spawning subagents, adapt it into execution units in STATE.md. If that adaptation materially changes scope or ordering, ask the user to approve the adapted unit backlog before proceeding. + - If the document is not already in a declared execution shape and is not a valid ticket index or ticket artifact, treat it as **legacy input**. Before spawning subagents, adapt it into execution units in STATE.md. If that adaptation materially changes scope or ordering, ask the user to approve the adapted unit backlog before proceeding. - If anything is unclear or ambiguous, ask clarifying questions now - Get user approval to proceed - **Do not skip this** - better to ask questions now than build the wrong thing @@ -117,6 +125,7 @@ If no `--review-mode` is specified, check `compound-engineering.local.md` for a 3. **Preview Unit Breakdown** - Mentally identify the major execution units from the source document + - If the input is a ticket index, preview the next batch from the index and whether it is sequential or parallel-safe - If the input is a ticket file, preview exactly one execution unit unless the user explicitly asks to re-split it - Note any questions about dependencies or scope - The formal unit decomposition happens in Phase 2 Step 4 (STATE.md), which is the persistent record of progress @@ -135,6 +144,7 @@ Before executing, validate four things: **structural readiness** (the selected e - **`vertical-slices`** -- slice type, serves, demo scenario, feature home, scope fence, files, success criteria, validation command, dependencies, dependency type - **`infra-track`** -- capability enabled, consumers / downstream work unlocked, scope, files, risk / rollback, success criteria, validation command, dependencies - **`fix-batch`** -- problem, repro / expected outcome, files, success criteria, validation command, dependencies +- **Ticket index input** -- valid `ticket-execution-contract.md` index contract, batch table, file-overlap safety notes, and progress pointer - **Ticket input** -- valid `ticket-execution-contract.md` frontmatter/body, parent refs, feature home, scope fence, acceptance criteria, test command, and compact local context - **Default rule** -- if `execution_shape` is missing, assume `vertical-slices` - **Anti-coercion rule** -- do not force infra or fix-batch work into slices if that would create fake verticality @@ -162,7 +172,7 @@ Before executing, validate four things: **structural readiness** (the selected e - **Success Criteria** -- present at plan level (not just unit level) - **Unit tracing** -- each execution unit has a purpose line connecting it to the user story or explicit enabling outcome -If the plan lacks structural details, the ticket lacks the ticket execution contract, or no architecture artifact / handoff contract can explain the boundaries, refuse to proceed and suggest running `/workflows-architecture` first if the boundaries are still fuzzy, then `/deepen-plan` or `/workflows-to-issues`, or manually repairing the execution packet. +If the plan lacks structural details, the ticket index or ticket lacks the ticket execution contract, or no architecture artifact / handoff contract can explain the boundaries, refuse to proceed and suggest running `/workflows-architecture` first if the boundaries are still fuzzy, then `/deepen-plan` or `/workflows-to-issues`, or manually repairing the execution packet. If the plan lacks the `tdd` block or `## TDD & Evidence Contract`, or if the resolved contract is ambiguous, refuse to proceed and suggest `/workflows-plan` or `/deepen-plan` to repair the execution contract before spawning subagents. @@ -174,7 +184,7 @@ If the plan lacks WHY artifacts, the orchestrator should **construct minimal WHY #### Step 2: Check for Resumable Session -Before creating a new session, check for existing incomplete sessions for the same plan or ticket: +Before creating a new session, check for existing incomplete sessions for the same plan, ticket index, or ticket: ```bash ls docs/execution-sessions/work-*/state.md 2>/dev/null @@ -201,8 +211,9 @@ Create a `STATE.md` file in the session directory: ```markdown --- -source_type: [plan | ticket | specification | todo] +source_type: [plan | ticket-index | ticket | specification | todo] plan_file: [path to plan] +ticket_index: [path to ticket index, if applicable] ticket_file: [path to ticket, if applicable] tickets_ref: [path to ticket index, if applicable] source_packet_ref: [plan packet ref or ticket packet ref] @@ -265,12 +276,13 @@ _No learnings yet._ The orchestrator parses the source artifact and creates a list of execution units. Each unit is a self-contained packet of work defined by the selected execution shape or the ticket contract. The orchestrator does the heavy lifting here: - **Prefer plan-defined units directly** -- if the plan already declares a coherent execution shape, execute those packets as written +- **Prefer index-defined batches directly** -- if the input is a valid ticket index, execute the next unresolved batch as written and do not re-batch it unless the index is missing safety evidence or the user explicitly approves a change - **Prefer ticket-defined unit directly** -- if the input is a valid ticket artifact, execute that one ticket as one unit and do not re-split it unless the user explicitly approves a change - **Adapt legacy phase/task plans into units before coding** -- do not execute raw task lists directly once the shape contract is available - **Break oversized units** into smaller units if needed (each unit should be completable in one subagent session) - **Preserve WHY tracing** -- when splitting a unit, each resulting unit inherits or refines the parent unit's purpose line. Never create an orphan unit with no connection to the user story. - **Identify file dependencies** between units -- **Determine parallelizable units** -- only units with non-overlapping file sets and compatible dependencies can run simultaneously +- **Determine parallelizable units** -- only units with non-overlapping file sets and compatible dependencies can run simultaneously. For ticket-index input, trust only the explicit batch partition recorded in `index.md`; if the index leaves overlap safety ambiguous, collapse the batch to sequential execution. - **Ensure each unit has clear success criteria** -- if the plan already defines them, use them directly; otherwise, the orchestrator must create them - **Map each unit to its purpose** -- record which success criterion or enabling outcome each unit delivers (this goes in STATE.md's "Serves / Unlocks" column) @@ -287,27 +299,33 @@ For each unit (or parallel batch of units), follow this cycle: ##### a. Build Scoped Prompt -For each unit, the orchestrator constructs a focused prompt by loading the **execution agent prompt template** from `references/execution-agent-prompt.md` and filling in the context blocks. - -Apply the shared `Reference Template Loading` protocol from `references/orchestration-protocol.md`, substituting `execution-agent-prompt.md`. - -- Quote the first non-empty line of the loaded template before continuing. -- Every execution, retry, fix, and regression-repair subagent in this workflow must start from a freshly loaded copy of that same template. -- Fill the placeholders from the loaded template only. Do not reconstruct the prompt from memory, paraphrase it into a shorter prompt, or drop mandatory sections. -- If the template cannot be loaded, quoted, or fully populated without unresolved `{{PLACEHOLDER}}` values, stop and resolve the missing context before spawning the subagent. - -- **{{UNIT_TITLE}}** and **{{UNIT_DESCRIPTION}}** -- from the plan packet or ticket artifact -- **{{UNIT_KIND}}** -- from the plan or ticket (`tracer-bullet`, `expansion`, `hardening`, `infra-packet`, `fix-item`, etc.) -- **{{OUTCOME_SCENARIO}}** -- the observable behavior or enabling outcome this unit proves -- **{{UNIT_SCOPE}}** -- what the unit owns and excludes -- **{{UNIT_SCOPE_FENCE}}** -- the boundary that keeps the unit thin -- **{{FILE_LIST}}** -- files to create/modify from the plan or ticket -- **{{SUCCESS_CRITERIA}}** -- checkboxes that define "done" -- **{{VALIDATION_COMMAND}}** -- how to verify the unit works -- **{{COMPLETED_DEPENDENCIES}}** -- list of already-completed units this depends on -- **{{PARENT_REFS}}** -- plan, ticket set, architecture, and source packet refs that anchor this unit -- **{{TICKET_LOCAL_CONTEXT}}** -- the ticket-local execution packet when the source is a ticket; otherwise a compact packet derived from the plan unit -- **{{WHY_CONTEXT}}** -- the purpose grounding block (constructed by orchestrator): +For each unit, the orchestrator constructs a focused prompt for the named `execution-agent`. + +Apply the shared `Named Agent Dispatch` protocol from `references/orchestration-protocol.md`, substituting `execution-agent`. + +- Quote the first non-empty line of the loaded bundled agent template before continuing. +- Every execution, retry, fix, and regression-repair subagent in this workflow must start from a freshly loaded copy of that same agent template. +- Build `scoped_prompt` by injecting the full loaded `execution-agent` template plus the resolved context packet below. Do not summarize, abbreviate, or paraphrase the agent template. +- Apply the shared `Reference Template Loading` protocol from `references/orchestration-protocol.md`, substituting `execution-agent-prompt.md`, and quote the first non-empty line of that scaffold before continuing. +- Also load `references/execution-agent-prompt.md` as the scaffold for the context packet so the injected headings stay stable across retries and follow-up fixes. +- Fill the scaffold completely. Do not continue if any required section is missing or any `{{PLACEHOLDER}}` value is unresolved. + +- **`## Your Unit`** + - **{{UNIT_TITLE}}** and **{{UNIT_DESCRIPTION}}** -- from the plan packet or ticket artifact + - **{{UNIT_KIND}}** -- from the plan or ticket (`tracer-bullet`, `expansion`, `hardening`, `infra-packet`, `fix-item`, etc.) + - **{{OUTCOME_SCENARIO}}** -- the observable behavior or enabling outcome this unit proves + - **{{FEATURE_HOME}}** -- the primary feature home or owning module + - **{{UNIT_SCOPE}}** -- what the unit owns and excludes + - **{{UNIT_SCOPE_FENCE}}** -- the boundary that keeps the unit thin + - **{{FILE_LIST}}** -- files to create/modify from the plan or ticket + - **{{SUCCESS_CRITERIA}}** -- checkboxes that define "done" + - **{{VALIDATION_COMMAND}}** -- how to verify the unit works + - **{{COMPLETED_DEPENDENCIES}}** -- list of already-completed units this depends on + - **{{PARENT_REFS}}** -- plan, ticket set, architecture, and source packet refs that anchor this unit +- **`## Ticket-local context`** + - **{{TICKET_LOCAL_CONTEXT}}** -- the ticket-local execution packet when the source is a ticket; otherwise a compact packet derived from the plan unit +- **`## Why This Unit Exists`** + - **{{WHY_CONTEXT}}** -- the purpose grounding block (constructed by orchestrator): ``` ## Why This Unit Exists **Problem:** [problem narrative from plan -- 1-2 sentences] @@ -316,14 +334,18 @@ Apply the shared `Reference Template Loading` protocol from `references/orchestr **Overall success criteria:** [plan-level success criteria list] **Guardrails:** [relevant constitution principles, approval rules, and approved waivers] ``` -- **{{ARCHITECTURAL_CONTEXT}}** -- from the plan's Architectural Context section, filtered to what's relevant for this unit's files and domain -- **{{ARCHITECTURE_HANDOFF}}** -- from the `docs/architecture/` artifact or explicit plan-derived handoff contract; include deletion-test decisions, interfaces as test surfaces, seams, adapters, contracts, and downstream review guidance relevant to this unit -- **{{LEARNINGS_BRIEF}}** -- from previous units, filtered by domain relevance -- **{{PROJECT_CONVENTIONS}}** -- from CLAUDE.md plus relevant constitution baselines -- **{{TDD_CONTRACT}}** -- the resolved execution contract: effective mode, Ralph/default loop, required unit/e2e evidence, and any explicit exceptions -- **{{TDD_SECTION}}** -- if the resolved effective mode is Ralph-driven, include the Ralph/TDD Implementation Section from the template; otherwise include the Standard Implementation Section. Do not treat Ralph as an adjacent side command when it is the resolved default. - -The execution agent template instructs each subagent to follow a 4-phase protocol: +- **`## Architectural Context`** + - **{{ARCHITECTURAL_CONTEXT}}** -- from the plan's Architectural Context section, filtered to what's relevant for this unit's files and domain +- **`## Architecture Handoff`** + - **{{ARCHITECTURE_HANDOFF}}** -- from the `docs/architecture/` artifact or explicit plan-derived handoff contract; include deletion-test decisions, interfaces as test surfaces, seams, adapters, contracts, and downstream review guidance relevant to this unit +- **`## Learnings from Previous Units`** + - **{{LEARNINGS_BRIEF}}** -- from previous units, filtered by domain relevance +- **`## Project Conventions`** + - **{{PROJECT_CONVENTIONS}}** -- from CLAUDE.md plus relevant constitution baselines +- **`## TDD Execution Contract`** + - **{{TDD_CONTRACT}}** -- the resolved execution contract: effective mode, Ralph/default loop, required unit/e2e evidence, any explicit exceptions, and any fix/regression context that the retried unit must address + +The loaded `execution-agent` template instructs each subagent to follow a 4-phase protocol: 1. **Understand** -- review requirements, surface ambiguities, state assumptions before coding 2. **Implement** -- follow the resolved Ralph/default execution mode, retry on failure (up to 3 attempts) 3. **Self-review** -- check completeness, quality, discipline, testing, and evidence @@ -334,10 +356,10 @@ The execution agent template instructs each subagent to follow a 4-phase protoco Delegate the unit to a focused subagent: ``` -Task(general-purpose, prompt=scoped_prompt) +Task(execution-agent, prompt=scoped_prompt) ``` -The subagent prompt is constructed from the loaded execution agent template (`references/execution-agent-prompt.md`). The template already includes instructions for the 4-phase protocol (understand, implement, self-review, report). The orchestrator fills in the context blocks and passes the result. Do not substitute a custom summary prompt for any execution worker: +The subagent prompt is constructed from the loaded bundled `execution-agent` template plus the fully injected context packet scaffold from `references/execution-agent-prompt.md`. Do not substitute a custom summary prompt for any execution worker, and do not dispatch ticket implementation through `general-purpose`: 1. Read referenced files and understand existing patterns 2. Follow the resolved Ralph/default execution contract @@ -353,7 +375,7 @@ The subagent prompt is constructed from the loaded execution agent template (`re - Final test results (pass/fail) - Attempt count -**For parallel units**: Spawn multiple subagents simultaneously only when their file sets do not overlap and their dependency types allow parallel work. Before parallelizing, verify file sets do not overlap and no unit claims shared mutable state without an explicit guard. +**For parallel units**: Spawn multiple subagents simultaneously only when their file sets do not overlap and their dependency types allow parallel work. For ticket-index input, parallelize only when the selected batch is explicitly marked safe by the index's file-overlap notes. Before parallelizing, verify file sets do not overlap and no unit claims shared mutable state without an explicit guard. If there is doubt, execute sequentially. **Example scoped prompt:** @@ -483,7 +505,7 @@ If the `--review-mode` argument is `inline` or `both`, perform a two-stage inlin The spec reviewer should check not just checkbox compliance but whether the implementation actually delivers on the recorded purpose. A unit can pass all checkboxes but miss the intent. - If **PASS**: proceed to Stage 2 - - If **FAIL**: reload `execution-agent-prompt.md`, rebuild the scoped execution prompt with the review findings added as fix context, then spawn a new execution subagent. Re-run the spec reviewer afterward (max 2 fix-review cycles). If still failing after 2 cycles, log the issues and ask the user how to proceed. + - If **FAIL**: reload the bundled `execution-agent` template plus `execution-agent-prompt.md`, rebuild the scoped execution prompt with the review findings added as fix context, then spawn a new `execution-agent` run. Re-run the spec reviewer afterward (max 2 fix-review cycles). If still failing after 2 cycles, log the issues and ask the user how to proceed. **Stage 2: Code Quality Review** (only after spec compliance passes) @@ -497,7 +519,7 @@ If the `--review-mode` argument is `inline` or `both`, perform a two-stage inlin ``` - If **PASS**: proceed to next steps - - If **FAIL** with Critical issues: reload `execution-agent-prompt.md`, rebuild the scoped execution prompt with the quality findings added as fix context, spawn a fix subagent, then re-review (max 2 cycles) + - If **FAIL** with Critical issues: reload the bundled `execution-agent` template plus `execution-agent-prompt.md`, rebuild the scoped execution prompt with the quality findings added as fix context, spawn an `execution-agent` fix run, then re-review (max 2 cycles) - If **FAIL** with only Important/Minor issues: log them for the orchestrator's attention but proceed to next task (these will also be caught by `/workflows-review` if run later) **Note:** Inline review is a lightweight per-unit check. It does NOT replace the comprehensive `/workflows-review` multi-agent review. When `--review-mode both` is active, inline review runs per-unit AND `/workflows-review` runs after all units complete. @@ -508,12 +530,14 @@ If the `--review-mode` argument is `inline` or `both`, perform a two-stage inlin **5. Update source work artifact** -- keep the execution source honest: - plan input: check off completed items (`[ ]` to `[x]`) in the original plan document + - ticket-index input: update the selected batch status in `index.md`, keep the ticket table honest, and increment `last_completed_batch` only after every ticket in that batch is `completed` - ticket input: update the ticket `status` field (`ready` -> `completed` or `blocked`) and preserve parent refs + - if both an index and ticket files can be updated safely, update both without inventing new status fields - if both a ticket and plan backlog can be updated safely, update both without inventing new status fields **6. Regression guard** -- run test commands from ALL previously completed tasks. If any regress: - Log the regression in the current task's session file - - Reload `execution-agent-prompt.md`, rebuild the scoped execution prompt with context about what broke and why, and spawn a fix subagent + - Reload the bundled `execution-agent` template plus `execution-agent-prompt.md`, rebuild the scoped execution prompt with context about what broke and why, and spawn an `execution-agent` fix run - Do not proceed to the next task until the regression is fixed **7. Incremental commit** if appropriate (logical unit complete, tests pass): diff --git a/.github/skills/workflows-work/references/execution-agent-prompt.md b/.github/skills/workflows-work/references/execution-agent-prompt.md index 91b109c..14d9778 100644 --- a/.github/skills/workflows-work/references/execution-agent-prompt.md +++ b/.github/skills/workflows-work/references/execution-agent-prompt.md @@ -9,23 +9,17 @@ platforms: # Execution Agent Prompt Template -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. +This template is the **injected context packet** that `/workflows-work` passes into the named `execution-agent`. -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Canonical execution rules live in `agents/workflow/execution-agent.md`.** `/workflows-work` must load that bundled agent template, then inject the fully populated context packet below when dispatching `Task(execution-agent, prompt=scoped_prompt)`. -**Template authority:** This file is the only valid source for execution-subagent prompts. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. - ---- - -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator so the exact context scaffold ships with generated workflow bundles. -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. - -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Scaffold authority:** This file is the only valid source for the injected execution context packet. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. --- -You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. +The bundled `execution-agent` enforces clean-code, DRY, SOLID, feature-home boundary discipline, doc blocks above non-trivial functions/classes, imports at the top of files unless a real exception exists, explicit failure handling, and the structured execution report contract. Populate the scaffold below completely before dispatch. ## Your Unit @@ -90,189 +84,3 @@ Use `references/tdd-evidence-contract.md` as the shared source of truth for cont - `Red` and `Green` prove behavior coverage. - `Post-Refactor Green` proves cleanup safety. - If no cleanup was needed, still rerun and say so. - ---- - -## Phase 1: Understand Before Building - -Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. - -Before proceeding, confirm the prompt still contains these sections: **Your Task**, **Why This Task Exists**, **Architectural Context**, **Learnings from Previous Tasks**, **Project Conventions**, and the four numbered phases below. If any section is missing or any placeholder is unresolved, stop and report the template integrity problem. - -**If anything is unclear, ambiguous, or could be interpreted multiple ways:** -- List your questions explicitly -- State the assumptions you would make if proceeding without answers -- Ask for clarification before starting work - -**If everything is clear:** -- State your interpretation of the requirements in 2-3 sentences -- State how this unit serves the overall user story (from the WHY context) -- List any assumptions you are making (even obvious ones) -- Proceed to Phase 2 - -Do NOT skip this phase. A few minutes of clarification prevents hours of rework. It is always better to ask than to guess. - -## Phase 2: Implement - -{{TDD_SECTION}} - -### While Implementing - -- If you encounter something unexpected or unclear, **STOP and ask** rather than guessing -- Follow existing codebase patterns -- do not invent new conventions -- Reuse before you create: before adding a new function, helper, class, interface, type, or utility, search the touched area for an existing abstraction you can reuse or extend safely -- Apply DRY by reason-to-change: extract shared code only when the behavior and future changes truly belong together; keep duplication local when forced abstractions would hide intent -- Apply SOLID deliberately: introduce new classes or interfaces only when they clarify responsibilities, dependency direction, or substitution boundaries; if a new abstraction does not clearly improve the design, do not add it -- Prefer direct, readable code over helper stacks, wrappers, manager classes, or indirection layers created "just in case" -- Variable, function, class, and interface names must be explicit and unambiguous. Avoid abbreviations like `cb`, `ctx`, `svc`, `obj`, or `tmp` when a clearer name fits the scope -- Keep changes minimal -- implement what is asked, nothing more (YAGNI) -- Do not add "nice to have" features not in the success criteria -- Commit after each logical unit of complete work using the project's commit convention - -### On Test Failure - -If tests fail after implementation: -1. Read the error message carefully -- understand what failed and why -2. Analyze whether the failure is in your implementation or in the test -3. Fix the issue -4. Re-run the test command -5. Repeat up to 3 total attempts -6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly - -## Phase 3: Self-Review - -Before reporting back, review your own work with fresh eyes. Go through each checklist item honestly: - -**Completeness:** -- [ ] Did I implement EVERYTHING in the success criteria? -- [ ] Are there edge cases the criteria imply that I did not handle? -- [ ] Did I miss any requirements? - -**Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? -- [ ] Would a user achieve the stated outcome with this code? -- [ ] Did I build anything that doesn't trace back to the success criteria or user story? - -**Quality:** -- [ ] Do names accurately describe what things do (not how they work)? -- [ ] Did I reuse existing code where it already solved this problem cleanly? -- [ ] If I introduced a new abstraction, does it have a clear SOLID-based reason to exist? -- [ ] Did I avoid vague or abbreviated names in favor of explicit intent? -- [ ] Did the core business logic stay in the declared feature home unless the architecture handoff justified a shared/global extraction? -- [ ] Is the code clean and maintainable? -- [ ] Does it follow existing codebase patterns? -- [ ] Is error handling appropriate? - -**Discipline:** -- [ ] Did I avoid overbuilding (YAGNI)? -- [ ] Did I ONLY build what was requested? -- [ ] No "nice to have" additions? -- [ ] No unnecessary abstractions or premature optimization? - -**Testing:** -- [ ] Do tests verify actual behavior (not just mock behavior)? -- [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? - -**Evidence:** -- [ ] Can I show actual test output (not just "tests pass")? -- [ ] For UI changes, do I have a screenshot or visual evidence? -- [ ] For API changes, do I have actual request/response data? - -If you find issues during self-review, **fix them now** before reporting. Do not report known issues -- fix them first. - -## Phase 4: Report - -Return a structured execution report in exactly this format: - -```markdown -## Execution Report: [Unit Title] - -### Interpretation -[Your 2-3 sentence interpretation of what was asked] - -### Purpose Served -[Which user story aspect / success criterion this unit delivers, from the WHY context] - -### Assumptions Made -- [List each assumption, even if obvious] - -### What Was Implemented -[Describe what you built and how it works] - -### Files Changed -- `path/to/file` -- created/modified (brief description of change) - -### Test Results -- Command: `[test command]` -- Result: PASS/FAIL -- Attempts: [n] -- Output: -``` -[paste ACTUAL test output here] -``` - -### TDD Evidence -- **Red** - - Command: `[red command]` - - Result: PASS/FAIL - - Evidence: [why this proves the missing behavior existed before the implementation] -- **Green** - - Command: `[green command]` - - Result: PASS/FAIL - - Evidence: [why this proves the requested behavior now passes] -- **Post-Refactor Green** - - Command: `[post-refactor command]` - - Result: PASS/FAIL - - Evidence: [why this proves cleanup/refactor work preserved behavior] - -[If no cleanup was needed, still rerun and say so.] - -### Problems Encountered -[For each problem encountered during implementation:] -- **Error:** [exact error message] -- **Root cause:** [your analysis of why it happened] -- **Fix:** [what you did to resolve it] - -[If no problems: "None"] - -### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] - -[If none: "None"] - -### Self-Review Findings -- [Issues found and fixed during self-review] - -[If none: "Self-review passed -- no issues found"] -``` - ---- - -## Standard Implementation Section - -_This section is included when TDD is not enabled._ - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. Implement the task following project conventions -4. Write tests matching the success criteria -5. Run the test command: `{{TEST_COMMAND}}` -6. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) - ---- - -## TDD Implementation Section - -_This section is included when `tdd_enabled: true` is configured._ - -Follow the red-green-refactor cycle strictly: - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. **RED:** Write tests FIRST based on the success criteria. Run them. They MUST fail -- and they must fail for the RIGHT reason (the behavior is missing, not import errors or syntax problems) -4. **GREEN:** Write the MINIMAL production code needed to make the tests pass. No more than what is necessary. -5. Run tests. They MUST pass. -6. **REFACTOR:** Clean up if needed. Tests must still pass after refactoring. - -**Iron rule:** If at any point you find yourself writing production code before a failing test exists for that behavior, STOP. Write the test first. This is not a suggestion -- it is the process. diff --git a/.github/skills/workflows-work/references/ticket-execution-contract.md b/.github/skills/workflows-work/references/ticket-execution-contract.md index 563c72f..39df591 100644 --- a/.github/skills/workflows-work/references/ticket-execution-contract.md +++ b/.github/skills/workflows-work/references/ticket-execution-contract.md @@ -21,14 +21,32 @@ Required files: `index.md` must include: +- frontmatter with: + - `plan_ref` + - `architecture_ref` or an explicit handoff note + - `execution_shape` + - `ticket_set_status` -- `ready | in_progress | blocked | completed` + - `last_completed_batch` + - `total_batches` - plan path - architecture artifact path or explicit handoff note - execution shape - ticket order -- dependency view +- dependency graph +- execution batches +- file-overlap safety notes for every multi-ticket batch - blocker summary - review summary split into `Blocking gaps` and `Recommendations` +Recommended body shape: + +1. `# Ticket Set: ` +2. `## Dependency Graph` +3. `## Execution Batches` +4. `## Ticket Table` +5. `## Blockers` +6. `## Review Summary` + ## Ticket file naming - Keep zero-padded numeric prefixes in execution order: `01-...`, `02-...`, `03-...` @@ -103,6 +121,15 @@ Use these ticket statuses: ## Execution consumption rules +When `/workflows-work` receives `index.md`: + +- treat the index as the authoritative ticket queue for this ticket set +- read `last_completed_batch` to choose the next unfinished batch +- load only the tickets named in that next batch +- execute multiple tickets together only when the batch is explicitly declared as parallel-safe and the index records why the file sets do not conflict +- if the index or ticket files leave overlap safety ambiguous, collapse that batch to sequential execution instead of guessing +- update batch progress in `index.md`, and increment `last_completed_batch` only after every ticket in the batch reaches `completed` + When `/workflows-work` receives a ticket file: - treat that ticket as one pre-scoped execution unit @@ -117,4 +144,6 @@ When `/workflows-review` or `ticket-flow-auditor` consumes ticket artifacts, ver - the ticket still matches the parent plan and architecture - the implementation stayed inside the ticket scope fence unless the change was explicitly documented - dependency order and status changes stayed honest +- the dependency graph and batch partition still match the ticket files +- parallel-safe batches really stay file-disjoint and race-safe - evidence matches the ticket's stated acceptance criteria and test command diff --git a/.github/skills/workflows-work/references/ticketization-contract.md b/.github/skills/workflows-work/references/ticketization-contract.md index a05c2e2..6545e3f 100644 --- a/.github/skills/workflows-work/references/ticketization-contract.md +++ b/.github/skills/workflows-work/references/ticketization-contract.md @@ -38,7 +38,7 @@ docs/tickets/YYYY-MM-DD-/ Required outputs: -- `index.md` -- ticket-set summary, dependency view, and run guidance +- `index.md` -- ticket-set summary, dependency graph, execution batches, run guidance, and progress pointer - `NN-.md` -- one file per ticket in execution order Record the ticket set back into the plan as: @@ -63,6 +63,30 @@ The first version is **local-artifact first**. - Split a packet when one ticket would deliver more than one meaningful outcome, blur ownership, or bury the feature home. - Keep the first ticket a tracer bullet when the selected execution shape is `vertical-slices`. +## Dependency graph and execution batches + +Ticketization must build a conservative ticket dependency graph and then derive execution batches from it. + +- Start from each ticket's explicit `depends_on` edges. +- Then partition tickets into `Batch 1`, `Batch 2`, and so on, where every ticket in a batch depends only on earlier batches. +- A batch may contain multiple tickets only when their declared `files` sets do not overlap and there is no unresolved shared mutable state, migration risk, or boundary ambiguity between them. +- If two tickets might race, overwrite each other, or require the same shared adapter/config surface, keep them in separate sequential batches. +- **Default-to-sequential rule:** if batch safety is uncertain, emit singleton batches instead of guessing at parallelism. + +The index file becomes the authoritative execution queue for ticketized work. `/workflows-work` should be able to read `index.md`, find the next unfinished batch, and execute only that batch without recomputing the whole backlog. + +## Required index progress state + +`index.md` must record batch progress explicitly so ticketized execution can resume from the index alone. + +Required fields: + +- `last_completed_batch` -- integer counter; `0` means no batches completed yet +- `total_batches` -- total number of execution batches in the ticket set +- batch-level status view showing which batches are pending, in progress, blocked, or completed + +`last_completed_batch` advances only after every ticket in that batch is complete. If any ticket in the batch is blocked or still running, do not advance the counter. + ## Required ticket-local context Every generated ticket must carry a compact execution packet that can stand on its own. @@ -116,6 +140,9 @@ That review must check: - feature-home clarity - shared/global boundary honesty - blocker and dependency correctness +- dependency graph correctness +- execution batch safety and parallelization honesty +- file-overlap conflicts between tickets claimed to be parallel-safe - context completeness - ticket size and coupling quality @@ -133,5 +160,7 @@ The ticketization contract is satisfied only when: - the priming skill and ticket schema contract are explicit - local-artifact-first behavior is explicit - `tickets_ref` recording is explicit +- the dependency graph and conservative batch partition are explicit +- the index progress pointer is explicit - each ticket's required local context is explicit - the final ticket-set review step and reviewer are explicit diff --git a/README.md b/README.md index aaababf..b173066 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Portable source-of-truth and OpenCode-first release repository for the `compound-engineering` plugin. -The project packages a constitution-first, spec-driven workflow system for AI-assisted engineering: **33 specialized agents, 28 commands, and 26 skills** built from one portable definition set and emitted into multiple agent harnesses. +The project packages a constitution-first, spec-driven workflow system for AI-assisted engineering: **34 specialized agents, 28 commands, and 26 skills** built from one portable definition set and emitted into multiple agent harnesses. - **OpenCode** -- first-class authoring and daily-driver surface - **GitHub Copilot** -- second-class generated output @@ -89,8 +89,8 @@ This repo is built for: | `/workflows:plan` | execution-ready HOW | chooses an execution shape, then breaks work into slices or other execution packets with dependencies and success criteria | | `/workflows:architecture` | architecture artifact in `docs/architecture/` | forces the important structural decisions into the open | | `/deepen-plan` | stronger plan with research and review input | hardens the selected execution backlog before execution | -| `/workflows:to-issues` | local ticket set in `docs/tickets/` | turns one large plan into smaller vertical-slice execution packets by applying the focused ticket-priming skill, then gates the set with `ticket-flow-auditor` | -| `/workflows:work` | executed implementation with session state and learnings | drives the Ralph-first build loop by executing either plan units or one ticket artifact through scoped subagents | +| `/workflows:to-issues` | local ticket set in `docs/tickets/` | turns one large plan into smaller vertical-slice execution packets, builds a conservative dependency graph plus parallel-safe batches, then gates the set with `ticket-flow-auditor` | +| `/workflows:work` | executed implementation with session state and learnings | drives the Ralph-first build loop by executing either plan units or the next safe batch from a ticket index through scoped subagents | | `/workflows:review` | purpose-aware review against code, architecture, tickets, and evidence | checks fit, not just syntax | | `/workflows:compound` | reusable solution docs and refreshed learnings | turns one solved problem into future leverage | @@ -110,9 +110,9 @@ For most serious work: ### What is new in the ticketized flow - **`focused-ticket-priming`** turns one execution packet into one compact ticket-local packet instead of copying the whole plan into every task. -- **`ticket-execution-contract.md`** gives ticket creation, ticket execution, and review one shared schema for frontmatter, sections, refs, and ticket status. +- **`ticket-execution-contract.md`** gives ticket creation, ticket execution, and review one shared schema for frontmatter, sections, refs, ticket status, index batching, and resumable progress. - **`ticket-flow-auditor`** now closes `/workflows:to-issues` and also runs inside `/workflows:review`, so the same reviewer checks both pre-execution ticket quality and post-implementation ticket drift. -- **Ticket-scoped `/workflows:work`** can execute one ticket file directly using `plan_ref`, `tickets_ref`, and `architecture_ref` as deeper-dive context instead of reloading the full backlog every time. +- **Index-driven `/workflows:work`** can execute the next safe ticket batch directly from `docs/tickets/.../index.md`, using the index as the execution cursor and the selected ticket files as the local packets. ## Full updated workflow guidance @@ -124,15 +124,15 @@ Use the full chain when you want the plugin to take a feature from vague intent | 2 | `/workflows:plan` | `docs/plans/...` | Choose an honest execution shape and define packets with dependencies and evidence expectations. | | 3 | `/workflows:architecture` | `docs/architecture/...` | Lock feature homes, shared/global boundaries, context tiers, seams, and drift checks before hardening the plan. | | 4 | `/deepen-plan` | updated `docs/plans/...` | Stress-test the chosen backlog with research and reviewers until boundaries and execution packets are solid. | -| 5 | `/workflows:to-issues` | `docs/tickets/.../index.md` + ticket files | Use `focused-ticket-priming` to shrink each packet into one execution-ready ticket, then let `ticket-flow-auditor` classify blocking gaps vs recommendations. | -| 6 | `/workflows:work ` | `docs/execution-sessions/...` | Prefer one ticket file at a time. The ticket stays primary, and the parent plan plus architecture stay as deeper-dive refs. | +| 5 | `/workflows:to-issues` | `docs/tickets/.../index.md` + ticket files | Use `focused-ticket-priming` to shrink each packet into one execution-ready ticket, then write the dependency graph, conservative execution batches, and `last_completed_batch` cursor into the index before `ticket-flow-auditor` signs off. | +| 6 | `/workflows:work ` | `docs/execution-sessions/...` | Prefer the ticket index as the execution entrypoint. `/workflows:work` reads the next batch from the index, runs only that safe batch, and advances the index cursor when the batch is complete. | | 7 | `/workflows:review` | review findings | Review against code, architecture, ticket artifacts, and TDD evidence. This is where post-implementation ticket drift is checked. | | 8 | `/workflows:compound` | `docs/solutions/...` | Capture the solved pattern so the next task starts from accumulated knowledge instead of chat history. | ### Practical usage rules 1. Run **`/workflows:to-issues` after `/deepen-plan`** when you want the cleanest execution packets. Run it right after `/workflows:plan` only when you explicitly want earlier backlog shaping and you are willing to preserve visible uncertainty. -2. Treat **tickets as the default execution boundary**. If the plan is large, do not hand the entire plan to every execution run once ticket artifacts exist. +2. Treat **the ticket index as the default execution entrypoint** once ticket artifacts exist. Let `/workflows:work` pick the next batch from `index.md` instead of hand-selecting from the full plan every time. 3. Keep **business logic inside the feature home** named by the architecture artifact. Only move code into shared/global space when the reason to change is truly cross-feature. 4. Let **`ticket-flow-auditor` findings block execution** when it reports missing dependency order, weak WHY tracing, oversized tickets, or scope fences that are too vague to enforce. 5. Use **`/brownfield-maintenance`** outside the happy path when the repo already exists and the AI-layer docs, prompts, or review contracts need repair before you can trust the workflow. @@ -142,7 +142,7 @@ Use the full chain when you want the plugin to take a feature from vague intent - `/technical_review` is gone - `/workflows:architecture` is now the supported architecture handoff - `/workflows:to-issues` is the local-artifact-first ticketization step between deepening and execution, now powered by the `focused-ticket-priming` skill and the reusable `ticket-flow-auditor` -- `/workflows:work` can execute one `docs/tickets/.../*.md` artifact directly instead of dragging the full plan into every run +- `/workflows:work` can execute the next safe batch directly from `docs/tickets/.../index.md`, while still allowing a single ticket file when you need a narrower manual run - plan/deepen/work now default to issue-shaped vertical slices and tracer-bullet sequencing, while still allowing explicit `infra-track` and `fix-batch` modes when slices would be fake - `/brownfield-maintenance` is the on-demand repair path for inherited repos whose AI-layer docs, prompts, and reviewer coverage have drifted - Ralph-driven TDD is explicit across setup, planning, execution, and review diff --git a/plugins/compound-engineering/.claude-plugin/plugin.json b/plugins/compound-engineering/.claude-plugin/plugin.json index c68e3f1..fc470aa 100644 --- a/plugins/compound-engineering/.claude-plugin/plugin.json +++ b/plugins/compound-engineering/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "compound-engineering", - "version": "4.11.0", - "description": "OpenCode-first AI-powered development tools. Includes 33 specialized agents, 28 commands, and 26 skills spanning code review, research, design, and workflow automation, with generated Copilot support and Claude Code compatibility outputs.", + "version": "4.13.0", + "description": "OpenCode-first AI-powered development tools. Includes 34 specialized agents, 28 commands, and 26 skills spanning code review, research, design, and workflow automation, with generated Copilot support and Claude Code compatibility outputs.", "author": { "name": "The Rabak", "email": "arielvaron@gmail.com", diff --git a/plugins/compound-engineering/CHANGELOG.md b/plugins/compound-engineering/CHANGELOG.md index 7677324..73b5c4e 100644 --- a/plugins/compound-engineering/CHANGELOG.md +++ b/plugins/compound-engineering/CHANGELOG.md @@ -5,6 +5,31 @@ All notable changes to the compound-engineering plugin will be documented in thi The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [4.13.0] - 2026-05-24 + +### Added + +- **`execution-agent` agent** -- New workflow execution agent for `/workflows:work` ticket and unit delivery, with strict clean-code, DRY, SOLID, doc-block, import-order, and Ralph evidence guardrails. + +### Changed + +- **`/workflows:work`** -- Ticket execution, retries, and regression repairs now dispatch through the named `execution-agent`, with the full bundled agent template plus injected execution packet context loaded on every run. +- **Execution prompt contract** -- `execution-agent-prompt.md` now serves as the injected context scaffold for the named `execution-agent` instead of acting like a standalone worker prompt. + +## [4.12.0] - 2026-05-24 + +### Changed + +- **`/workflows:to-issues`** -- Ticket sets now require a conservative dependency graph, explicit parallel-safe execution batches, and an index-level `last_completed_batch` cursor so ticketized execution can resume from `index.md`. +- **`/workflows:work`** -- Now accepts ticket indexes as the default ticketized entrypoint, selects the next safe batch from `docs/tickets/.../index.md`, and advances the batch cursor only after the whole batch completes. +- **`ticket-flow-auditor` and ticket contracts** -- Review and shared ticket contracts now verify batch safety, dependency layering, and file-overlap honesty so parallel execution stays race-safe. + +## [4.11.1] - 2026-05-24 + +### Changed + +- **`grill-with-docs` / `grill-me` skill** -- Concrete implementation and architecture decisions now get written inline into the active brainstorm or plan artifact during the grilling session, while `CONTEXT.md` stays glossary-only. + ## [4.11.0] - 2026-05-20 ### Added diff --git a/plugins/compound-engineering/README.md b/plugins/compound-engineering/README.md index a4b7d21..111597d 100644 --- a/plugins/compound-engineering/README.md +++ b/plugins/compound-engineering/README.md @@ -1,6 +1,6 @@ # Compounding Engineering Plugin -AI-powered development tools that get smarter with every use. Make each unit of engineering work easier than the last. Includes 33 specialized agents, 28 commands, and 26 skills. +AI-powered development tools that get smarter with every use. Make each unit of engineering work easier than the last. Includes 34 specialized agents, 28 commands, and 26 skills. This repository also ships generated Copilot assets under the repo root `.github/`, built from the canonical portable source in `portable/compound-engineering/`. @@ -14,9 +14,9 @@ This repository also ships generated Copilot assets under the repo root `.github ## Workflow contract highlights - `/workflows:architecture` is the architecture-improvement handoff between planning and `/deepen-plan`. -- `/workflows:to-issues` turns plans into local ticket artifacts with compact execution context before implementation, using `focused-ticket-priming` and `ticket-flow-auditor`. +- `/workflows:to-issues` turns plans into local ticket artifacts with compact execution context, a dependency graph, and conservative execution batches before implementation, using `focused-ticket-priming` and `ticket-flow-auditor`. - `/workflows:plan`, `/deepen-plan`, and `/workflows:work` now default to issue-shaped execution slices, with the first slice acting as the tracer bullet, while still allowing explicit `infra-track` and `fix-batch` modes when slices would be fake. -- `/workflows:work` can execute a single ticket artifact directly while preserving parent plan and architecture refs. +- `/workflows:work` can execute the next safe batch directly from a ticket index while preserving parent plan and architecture refs. - Vertical slices now carry a feature-home module contract: feature business logic stays co-located, while truly shared utilities and adapters stay global. - `/brownfield-maintenance` is the on-demand repair path for inherited repos whose AI-layer docs, prompts, and reviewer coverage have drifted. - `/workflows:work` is the Ralph-first execution path; `/ralph-loop` and `/cancel-ralph` are helpers, not a detached workflow. @@ -30,15 +30,15 @@ Use this sequence when you want the full compound workflow instead of an ad hoc 2. `/workflows:plan` -- choose the execution shape and define packets. 3. `/workflows:architecture` -- lock feature homes, shared/global boundaries, and architecture handoff details. 4. `/deepen-plan` -- harden the plan with research and review. -5. `/workflows:to-issues` -- generate `docs/tickets/...` with `focused-ticket-priming`, then gate the ticket set with `ticket-flow-auditor`. -6. `/workflows:work ` -- execute one ticket artifact at a time while preserving parent plan and architecture refs. +5. `/workflows:to-issues` -- generate `docs/tickets/...` with `focused-ticket-priming`, then write the dependency graph and batch cursor into `index.md` before `ticket-flow-auditor` gates the set. +6. `/workflows:work ` -- execute the next safe batch from the ticket index while preserving parent plan and architecture refs. 7. `/workflows:review` -- review code, ticket drift, architecture fit, and TDD evidence together. 8. `/workflows:compound` -- turn the result into reusable team knowledge. ### Ticketized execution guidance - Prefer `/workflows:to-issues` after `/deepen-plan` for the sharpest execution packets. -- Use ticket files as the default `/workflows:work` input once they exist. +- Use the ticket index as the default `/workflows:work` input once it exists. - Treat feature-home ownership and scope fences as hard boundaries, not suggestions. - Use `/brownfield-maintenance` separately when an inherited repo needs workflow repair before normal feature delivery. @@ -61,7 +61,7 @@ bun test | Component | Count | |-----------|-------| -| Agents | 33 | +| Agents | 34 | | Commands | 28 | | Skills | 26 | | Hooks | 0 | @@ -71,7 +71,7 @@ bun test Agents are organized into categories for easier discovery. -### Review (20) +### Review (21) | Agent | Description | |-------|-------------| @@ -116,11 +116,12 @@ Agents are organized into categories for easier discovery. | `design-iterator` | Iteratively refine UI through systematic design iterations | | `figma-design-sync` | Synchronize web implementations with Figma designs | -### Workflow (3) +### Workflow (4) | Agent | Description | |-------|-------------| | `bug-reproduction-validator` | Systematically reproduce and validate bug reports | +| `execution-agent` | Execute scoped `/workflows:work` tickets and units with strict clean-code, DRY, SOLID, and Ralph-aware delivery discipline | | `pr-comment-resolver` | Address PR comments and implement fixes | | `spec-flow-analyzer` | Analyze user flows and identify gaps in specifications | diff --git a/plugins/compound-engineering/agents/review/ticket-flow-auditor.md b/plugins/compound-engineering/agents/review/ticket-flow-auditor.md index 414238c..87a877f 100644 --- a/plugins/compound-engineering/agents/review/ticket-flow-auditor.md +++ b/plugins/compound-engineering/agents/review/ticket-flow-auditor.md @@ -10,13 +10,14 @@ Protect the plan -> ticket -> implementation chain. Review whether tickets are s ## Workflow 1. Determine the mode: ticket-set audit before execution, or implementation audit after code exists. 2. Trace the chain from plan to architecture to ticket artifacts to execution evidence or branch diff. -3. Pressure-test ticket scope fences, feature-home ownership, dependency order, and context sufficiency. +3. Pressure-test ticket scope fences, feature-home ownership, dependency order, execution-batch partitioning, and context sufficiency. 4. Separate blocking contract failures from improvements, with citations for every finding. ## Report - `Review Mode`: ticket-set audit or implementation audit. - `Blocking gaps`: issues that make the ticket set or implementation unsafe to continue without repair. - `Recommendations`: improvements that sharpen the flow without blocking progress. +- `Batch safety notes`: whether the dependency graph and parallel batches are honest, conservative, and race-safe. - `Traceability notes`: where plan, architecture, ticket, and implementation stayed aligned or drifted. - `Evidence cited`: artifact paths, diff locations, and session evidence supporting the findings. @@ -24,4 +25,6 @@ Protect the plan -> ticket -> implementation chain. Review whether tickets are s - Do not redesign the whole backlog when a local repair would solve the issue. - Do not ask for ticket splits or merges unless coupling, ownership, or outcome clarity is materially wrong. - Do not ignore undocumented scope expansions just because the code looks good. +- Do not bless a parallel batch unless the ticket files are genuinely disjoint and the index records why it is safe. +- When batch safety is ambiguous, prefer a sequential recommendation over a risky parallel one. - Cite the specific ticket file, plan section, architecture artifact, diff hunk, or execution artifact that proves each finding. diff --git a/plugins/compound-engineering/agents/workflow/execution-agent.md b/plugins/compound-engineering/agents/workflow/execution-agent.md new file mode 100644 index 0000000..013b8fe --- /dev/null +++ b/plugins/compound-engineering/agents/workflow/execution-agent.md @@ -0,0 +1,206 @@ +--- +name: execution-agent +description: "Executes one scoped ticket or work unit with strict clean-code, DRY, SOLID, and Ralph-aware delivery discipline. Use for `/workflows:work` implementation, retries, and regression repairs." +model: claude-sonnet-4-6 +--- + +## Mission +Implement one bounded execution unit so the code is easier to understand, safer to change, and closer to the stated user outcome than it was before the change. Favor explicit names, tight responsibilities, honest boundaries, minimal but complete diffs, explicit failures, and tests that prove behavior. + +## Required delegated input +The orchestrator prompt must inject these concrete sections before you start: + +- `## Your Unit` +- `## Ticket-local context` +- `## Why This Unit Exists` +- `## Architectural Context` +- `## Architecture Handoff` +- `## Learnings from Previous Units` +- `## Project Conventions` +- `## TDD Execution Contract` + +If any required section is missing, materially incomplete, or still contains unresolved placeholders, stop and report the prompt-integrity problem instead of guessing. + +## Workflow +1. Understand the unit, its purpose, and its boundaries before changing code. +2. Reuse existing patterns, helpers, and abstractions before adding new ones. +3. Implement the smallest complete change that satisfies the unit and its TDD contract. +4. Self-review against the clean-code checklist below and fix issues before reporting. +5. Return the exact execution report contract with real evidence. + +## Clean-code operating rules + +### Names +- Names must reveal purpose, domain meaning, and side effects. Prefer precise nouns and verbs over placeholders like `data`, `info`, `helper`, `util`, `manager`, `process`, `item`, or `tmp`. +- Avoid single-letter variables outside standard tiny scopes (`i`, `j`, `x`, `y`). Avoid unclear abbreviations unless the codebase already treats them as domain shorthand. +- Make the public API honest. A function, class, or variable name must describe what it actually does, not what you wish it did. + +### Structure +- Keep one unit of work responsible for one reason to change. Split decision-making from mechanism when they start drifting apart. +- Keep one abstraction level at a time. Do not mix policy, parsing trivia, persistence details, and formatting noise in one routine. +- Keep side effects obvious. Queries should look like queries; mutations and I/O should be visible in names and call sites. +- Prefer direct readable code over helper stacks, pass-through wrappers, and speculative indirection. + +### DRY and SOLID +- Reuse before you create. Search the touched area for an existing helper, type, class, or utility that already owns the behavior cleanly. +- Apply DRY by reason to change. Extract shared code only when the duplicated behavior genuinely changes for the same reason. +- Apply SOLID deliberately. Introduce classes, interfaces, or seams only when they clarify responsibility, dependency direction, substitution boundaries, or test surfaces. +- Do not invent catch-all abstractions such as `*Manager`, `*Helper`, `*Util`, or generic shared layers without a clear architectural reason. + +### Boundaries +- Keep business logic in the declared feature home unless the architecture handoff explicitly justifies a shared or global extraction. +- Shared/global code must earn its place by serving multiple feature homes or a stable cross-cutting contract. +- Respect scope fences. Do not expand the unit just because a nearby cleanup looks tempting. + +### Comments and documentation +- Add doc blocks or docstrings above public or exported functions, class definitions, interface/type definitions, and non-trivial private helpers whose contract is not obvious from the signature alone. +- Those doc blocks should explain purpose, inputs/outputs, invariants, side effects, failure behavior, or architectural constraints. Do not restate the code line by line. +- Leave inline comments only where a reader truly needs missing intent: non-obvious constraints, boundary rules, tricky algorithms, or why a surprising choice exists. +- If a comment is compensating for confusing code, improve the code first. + +### Imports and dependencies +- Keep imports at the top of the file. +- Defer or conditionalize imports only when there is a real reason such as measurable startup/performance impact, cycle breaking, optional dependencies, or exception-aware loading. If you do this, make the reason obvious in code. +- Remove unused imports, dead helpers, stale branches, and compatibility paths that the touched code no longer needs. + +### Errors, state, and tests +- Fail explicitly. Do not hide problems behind broad catches, silent fallbacks, vague exceptions, or mixed success/error return shapes. +- Make mutation and state transitions obvious. Avoid hidden writes and temporal coupling. +- Tests should verify behavior that matters to the success criteria, not implementation trivia. +- When Ralph-driven, preserve stable `Red`, `Green`, and `Post-Refactor Green` evidence. + +## TDD Execution Contract +Use `commands/workflows/references/tdd-evidence-contract.md` as the shared source of truth for contract resolution, Ralph evidence semantics, and report structure. Do not invent a lighter evidence format for convenience. + +### TDD Evidence +- Ralph is the default TDD execution path whenever the resolved contract selects Ralph-driven work. +- `Red` and `Green` prove behavior coverage. +- `Post-Refactor Green` proves cleanup safety. +- If no cleanup was needed, still rerun and say so. + +## Phase 1: Understand Before Building +Before writing any code, review the injected unit requirements, WHY context, architecture handoff, and project conventions carefully. + +**If anything is unclear, ambiguous, or could be interpreted multiple ways:** +- List your questions explicitly. +- State the assumptions you would make if forced to proceed. +- Ask for clarification before starting work. + +**If everything is clear:** +- State your interpretation of the requirements in 2-3 sentences. +- State how this unit serves the overall user story. +- List the assumptions you are making. +- Proceed to implementation. + +Do not skip this phase. A few minutes of clarification prevents hours of rework. + +## Phase 2: Implement +- Follow the resolved Ralph/default execution mode from the injected `## TDD Execution Contract`. +- Read referenced files and match existing patterns before introducing new structure. +- Keep changes minimal but complete. Build what the unit asks for, not adjacent wish-list items. +- If tests fail, analyze the failure, fix the issue, and retry. Stop after 3 total implementation attempts and report the failure clearly instead of thrashing. + +## Phase 3: Self-Review +Before reporting back, review your own work honestly. + +### Completeness +- [ ] Did I implement every success criterion? +- [ ] Did I preserve the stated scope fence? +- [ ] Did I handle implied edge cases without scope creep? + +### Purpose alignment +- [ ] Does the implementation deliver the stated user/story outcome? +- [ ] Does every meaningful code change trace back to the unit purpose or success criteria? + +### Code quality +- [ ] Are names explicit and honest? +- [ ] Did I reuse existing code where it already solved this cleanly? +- [ ] If I introduced a new abstraction, does it have a clear reason to exist? +- [ ] Did I keep imports at the top unless there was a real documented reason not to? +- [ ] Did I add doc blocks/docstrings where a future maintainer needs them? +- [ ] Did I leave only comments that add missing intent? +- [ ] Did the business logic stay in the declared feature home unless the handoff allowed extraction? +- [ ] Did I avoid dead code, speculative wrappers, and hidden side effects? +- [ ] Is error handling explicit and appropriate? + +### Discipline +- [ ] Did I avoid overbuilding? +- [ ] Did I avoid speculative abstractions and cleanup unrelated to the unit? + +### Testing and evidence +- [ ] Do tests prove actual behavior? +- [ ] Did I run the stated validation command? +- [ ] Can I show actual output, not just claims? +- [ ] If Ralph-driven, do I have stable `Red`, `Green`, and `Post-Refactor Green` evidence? + +If you find issues during self-review, fix them before reporting. + +## Report +Return a structured execution report in exactly this format: + +```markdown +## Execution Report: [Unit Title] + +### Interpretation +[Your 2-3 sentence interpretation of what was asked] + +### Purpose Served +[Which user story aspect / success criterion this unit delivers] + +### Assumptions Made +- [List each assumption] + +### What Was Implemented +[Describe what you built and how it works] + +### Files Changed +- `path/to/file` -- created/modified (brief description of change) + +### Test Results +- Command: `[test command]` +- Result: PASS/FAIL +- Attempts: [n] +- Output: +``` +[paste actual output here] +``` + +### TDD Evidence +- **Red** + - Command: `[red command]` + - Result: PASS/FAIL + - Evidence: [why this proves the missing behavior existed before the implementation] +- **Green** + - Command: `[green command]` + - Result: PASS/FAIL + - Evidence: [why this proves the requested behavior now passes] +- **Post-Refactor Green** + - Command: `[post-refactor command]` + - Result: PASS/FAIL + - Evidence: [why this proves cleanup/refactor work preserved behavior] + +[If no cleanup was needed, still rerun and say so.] + +### Problems Encountered +- **Error:** [exact error message] + - **Root cause:** [your analysis] + - **Fix:** [what you did] + +[If no problems: "None"] + +### Patterns Discovered +- [Naming conventions, architectural patterns, or gotchas that matter for future units] + +[If none: "None"] + +### Self-Review Findings +- [Issues found and fixed during self-review] + +[If none: "Self-review passed -- no issues found"] +``` + +## Guardrails +- Do not silently skip ambiguity, failures, or missing context. +- Do not add style-only churn unrelated to the unit. +- Do not weaken the TDD/evidence contract. +- Do not claim completion while known issues remain. diff --git a/plugins/compound-engineering/commands/workflows/to-issues.md b/plugins/compound-engineering/commands/workflows/to-issues.md index b481410..f1a46a6 100644 --- a/plugins/compound-engineering/commands/workflows/to-issues.md +++ b/plugins/compound-engineering/commands/workflows/to-issues.md @@ -82,6 +82,8 @@ Rules: - size by coupling and boundary clarity, not by arbitrary task counts - keep tracer bullets first when the mode is `vertical-slices` - surface uncertainty instead of hiding it when ticketizing directly after `/workflows:plan` +- build a conservative dependency graph and execution batches while ticketizing +- default to sequential singleton batches whenever safe parallelism is unclear Each ticket must include the required ticket-local context defined in `ticketization-contract.md`, and each ticket file must follow the exact frontmatter/body shape in `ticket-execution-contract.md`. @@ -110,6 +112,15 @@ Required files: - `index.md` - one `NN-.md` file per ticket +`index.md` is not just a directory listing. It is the authoritative ticket-set graph and execution cursor. It must include the dependency graph, the conservative batch partition, file-overlap safety notes for every multi-ticket batch, and an updateable `last_completed_batch` counter that `/workflows:work` can use to resume from the next batch. + +When partitioning tickets into batches: + +- only group tickets together when all dependencies are satisfied by earlier batches +- only group tickets together when their declared `files` sets do not overlap +- treat shared mutable state, config churn, migrations, and boundary ambiguity as reasons to split the batch +- if unsure, split into sequential batches instead of inventing parallelism + Write every ticket using the exact schema from `ticket-execution-contract.md`, including its required frontmatter, section order, status lifecycle, and parent refs. Then record `tickets_ref` back into the plan frontmatter when possible. If frontmatter cannot be updated safely, add the ticket-set path under `## Related Artifacts`. @@ -133,6 +144,8 @@ Check for: - feature-home drift - shared/global drift - missing blockers or bad dependency ordering +- bad dependency graph layering or unsafe batch partitioning +- tickets grouped in parallel despite overlapping files or shared mutable surfaces - oversized tickets - tickets with weak WHY tracing - missing acceptance criteria or evidence commands @@ -152,6 +165,8 @@ A complete run must leave behind: - a local ticket set under `docs/tickets/` - `tickets_ref` or a labeled related-artifact link back into the plan - explicit blocker/dependency ordering +- a dependency graph plus conservative execution batches in `index.md` +- an updateable `last_completed_batch` progress pointer in `index.md` - compact ticket-local context packs - a final ticket-set review result from `ticket-flow-auditor` @@ -170,7 +185,7 @@ Execution readiness: - Recommendations: Recommended next step: -- Run `/workflows:work` on one ticket file once ticket-scoped execution is supported, or use the generated tickets as the scoped execution packet source for the next implementation pass. +- Run `/workflows:work` on the generated `index.md` so execution can pick the next safe batch automatically, or target one ticket file manually when you need to force a narrower run. ``` NEVER CODE! This phase shapes execution artifacts and context packets. It does not implement the feature itself. diff --git a/plugins/compound-engineering/commands/workflows/work.md b/plugins/compound-engineering/commands/workflows/work.md index 33efafd..7ec5dad 100644 --- a/plugins/compound-engineering/commands/workflows/work.md +++ b/plugins/compound-engineering/commands/workflows/work.md @@ -1,7 +1,7 @@ --- name: "workflows:work" description: Execute work plans while maintaining WHY tracing from problem narrative through user story to implementation. Grounds every subagent in purpose. -argument-hint: "[plan file, ticket file, specification, or todo file path] [--review-mode bulk|inline|both]" +argument-hint: "[plan file, ticket index, ticket file, specification, or todo file path] [--review-mode bulk|inline|both]" --- # Work Plan Execution Command @@ -12,9 +12,11 @@ Execute a work plan while maintaining WHY tracing from problem narrative through ## Introduction -This command takes a work document (plan, ticket, specification, or todo file) and executes it systematically using a **subagent orchestration model**. The orchestrator (this conversation) loads or adapts the source into execution units and delegates each unit to a focused subagent. `vertical-slices` is the default execution shape, but `infra-track` and `fix-batch` are also valid when declared by the plan. When the input is a ticket file, that ticket becomes the primary execution packet and the parent plan/architecture artifacts provide deeper context instead of re-expanding the whole backlog. Each subagent follows a standardized 4-phase protocol (understand, implement, self-review, report) defined in the execution agent prompt template. +This command takes a work document (plan, ticket index, ticket, specification, or todo file) and executes it systematically using a **subagent orchestration model**. The orchestrator (this conversation) loads or adapts the source into execution units and delegates each unit to a focused subagent. `vertical-slices` is the default execution shape, but `infra-track` and `fix-batch` are also valid when declared by the plan. When the input is a ticket index, that index becomes the authoritative execution queue and `/workflows:work` selects the next safe batch from it. When the input is a ticket file, that ticket becomes the primary execution packet and the parent plan/architecture artifacts provide deeper context instead of re-expanding the whole backlog. Every implementation unit, retry, and regression repair in this workflow is delegated through the named `execution-agent`, which follows a standardized 4-phase protocol (understand, implement, self-review, report). -**WHY-grounded execution:** Every subagent receives the source plan's WHY context -- the problem narrative, user story, architectural context, the architecture handoff contract, and which success criterion their specific unit serves. When execution starts from a ticket, the ticket's local context packet stays primary, while `plan_ref`, `tickets_ref`, and `architecture_ref` remain the deeper-dive path. This prevents implementation drift where technically correct code fails to deliver the user's actual need. The orchestrator is the guardian of WHY: it extracts purpose from the plan, threads it through every unit prompt, and validates that the combined output delivers the stated user story. +**WHY-grounded execution:** Every subagent receives the source plan's WHY context -- the problem narrative, user story, architectural context, the architecture handoff contract, and which success criterion their specific unit serves. When execution starts from a ticket index, the index decides the next batch while the selected tickets provide the local execution packets. When execution starts from a ticket file, the ticket's local context packet stays primary, while `plan_ref`, `tickets_ref`, and `architecture_ref` remain the deeper-dive path. This prevents implementation drift where technically correct code fails to deliver the user's actual need. The orchestrator is the guardian of WHY: it extracts purpose from the plan, threads it through every unit prompt, and validates that the combined output delivers the stated user story. + +**Execution delegation rule:** Ticket execution must always go through the bundled `execution-agent`. Do not route ticket implementation, ticket fix loops, or ticket regression repairs through `general-purpose` or any ad hoc worker prompt. ### Review Mode @@ -39,13 +41,19 @@ If no `--review-mode` is specified, check `compound-engineering.local.md` for a 1. **Read the work document and extract WHY + guardrail context** - Read the work document completely - - If the input is a ticket file, first load `commands/workflows/references/ticket-execution-contract.md` and verify the ticket includes the required frontmatter and body sections. Stop and send the user back to `/workflows:to-issues` if the ticket contract is missing or malformed. + - If the input is a ticket index or ticket file, first load `commands/workflows/references/ticket-execution-contract.md` and verify the artifact includes the required index or ticket contract. Stop and send the user back to `/workflows:to-issues` if the contract is missing or malformed. + - If the input is a ticket index, extract: + - `plan_ref`, `architecture_ref`, `execution_shape`, `ticket_set_status`, `last_completed_batch`, and `total_batches` + - the dependency graph and execution-batch table + - the next unfinished batch and the ticket files it names + - the file-overlap safety notes proving whether the batch is truly parallel-safe + - If the input is a ticket index, use the index as the source of truth for ordering and batch selection. Load only the ticket files named in the next batch before continuing. - If the input is a ticket file, extract: - `plan_ref`, `tickets_ref`, `architecture_ref`, and `source_packet_ref` - `feature_home`, `depends_on`, `dependency_type`, `files`, `test_command`, and `status` - the compact packet in `## Local Context` - the parent trace in `## Parent Refs` and `## Deeper-Dive Refs` - - If the input is a ticket file, load the parent plan and architecture artifact from the recorded refs before continuing. The ticket is the primary execution unit; the parent artifacts provide WHY and boundary context. + - If the input is a ticket index or ticket file, load the parent plan and architecture artifact from the recorded refs before continuing. The index chooses the batch; the ticket files remain the execution packets; the parent artifacts provide WHY and boundary context. - **Extract WHY artifacts** from the parent plan (these ground everything that follows): - **Problem Narrative** -- why this work exists, what pain it solves - **User Story** -- who benefits and what outcome they get @@ -60,11 +68,11 @@ If no `--review-mode` is specified, check `compound-engineering.local.md` for a - If the resolved contract weakens Ralph/unit+e2e without a justified exception in the plan, stop and ask for the plan contract to be corrected before execution - If `docs/constitution.md` exists, read it and extract the active constitution version, applicable principles, execution baselines, and approval rules. If the plan lists `constitution_waivers`, honor only those explicit exceptions. - If the parent plan has a `brainstorm_ref:` path, read that brainstorm document too for richer WHY context - - If the parent plan has an `architecture_ref:` path, the ticket has an `architecture_ref`, or `## Related Artifacts` points to `docs/architecture/`, read that artifact and extract feature homes, shared/global decisions, context tiers, drift checks, deletion tests, interfaces as test surfaces, seams, adapters, contracts, deepening candidates, and downstream work/review guidance + - If the parent plan has an `architecture_ref:` path, the selected ticket or index has an `architecture_ref`, or `## Related Artifacts` points to `docs/architecture/`, read that artifact and extract feature homes, shared/global decisions, context tiers, drift checks, deletion tests, interfaces as test surfaces, seams, adapters, contracts, deepening candidates, and downstream work/review guidance - If no architecture artifact is recorded, assemble an explicit architecture handoff contract from the parent plan's Architectural Context, Key Decisions, Constitution Alignment, brainstorm context, execution constraints, and `commands/workflows/references/vertical-slice-architecture.md`. Tell the user this is a fallback and recommend `/workflows:architecture` if boundaries are still unsettled. - Review any other references or links provided in the plan or ticket - If the constitution requires explicit approval for any part of the planned work (for example, risky writes, schema changes, auth changes, or scope expansions), surface that before execution starts - - If the document is not already in a declared execution shape and is not a valid ticket artifact, treat it as **legacy input**. Before spawning subagents, adapt it into execution units in STATE.md. If that adaptation materially changes scope or ordering, ask the user to approve the adapted unit backlog before proceeding. + - If the document is not already in a declared execution shape and is not a valid ticket index or ticket artifact, treat it as **legacy input**. Before spawning subagents, adapt it into execution units in STATE.md. If that adaptation materially changes scope or ordering, ask the user to approve the adapted unit backlog before proceeding. - If anything is unclear or ambiguous, ask clarifying questions now - Get user approval to proceed - **Do not skip this** - better to ask questions now than build the wrong thing @@ -115,6 +123,7 @@ If no `--review-mode` is specified, check `compound-engineering.local.md` for a 3. **Preview Unit Breakdown** - Mentally identify the major execution units from the source document + - If the input is a ticket index, preview the next batch from the index and whether it is sequential or parallel-safe - If the input is a ticket file, preview exactly one execution unit unless the user explicitly asks to re-split it - Note any questions about dependencies or scope - The formal unit decomposition happens in Phase 2 Step 4 (STATE.md), which is the persistent record of progress @@ -133,6 +142,7 @@ Before executing, validate four things: **structural readiness** (the selected e - **`vertical-slices`** -- slice type, serves, demo scenario, feature home, scope fence, files, success criteria, validation command, dependencies, dependency type - **`infra-track`** -- capability enabled, consumers / downstream work unlocked, scope, files, risk / rollback, success criteria, validation command, dependencies - **`fix-batch`** -- problem, repro / expected outcome, files, success criteria, validation command, dependencies +- **Ticket index input** -- valid `ticket-execution-contract.md` index contract, batch table, file-overlap safety notes, and progress pointer - **Ticket input** -- valid `ticket-execution-contract.md` frontmatter/body, parent refs, feature home, scope fence, acceptance criteria, test command, and compact local context - **Default rule** -- if `execution_shape` is missing, assume `vertical-slices` - **Anti-coercion rule** -- do not force infra or fix-batch work into slices if that would create fake verticality @@ -160,7 +170,7 @@ Before executing, validate four things: **structural readiness** (the selected e - **Success Criteria** -- present at plan level (not just unit level) - **Unit tracing** -- each execution unit has a purpose line connecting it to the user story or explicit enabling outcome -If the plan lacks structural details, the ticket lacks the ticket execution contract, or no architecture artifact / handoff contract can explain the boundaries, refuse to proceed and suggest running `/workflows:architecture` first if the boundaries are still fuzzy, then `/deepen-plan` or `/workflows:to-issues`, or manually repairing the execution packet. +If the plan lacks structural details, the ticket index or ticket lacks the ticket execution contract, or no architecture artifact / handoff contract can explain the boundaries, refuse to proceed and suggest running `/workflows:architecture` first if the boundaries are still fuzzy, then `/deepen-plan` or `/workflows:to-issues`, or manually repairing the execution packet. If the plan lacks the `tdd` block or `## TDD & Evidence Contract`, or if the resolved contract is ambiguous, refuse to proceed and suggest `/workflows:plan` or `/deepen-plan` to repair the execution contract before spawning subagents. @@ -172,7 +182,7 @@ If the plan lacks WHY artifacts, the orchestrator should **construct minimal WHY #### Step 2: Check for Resumable Session -Before creating a new session, check for existing incomplete sessions for the same plan or ticket: +Before creating a new session, check for existing incomplete sessions for the same plan, ticket index, or ticket: ```bash ls docs/execution-sessions/work-*/STATE.md 2>/dev/null @@ -199,8 +209,9 @@ Create a `STATE.md` file in the session directory: ```markdown --- -source_type: [plan | ticket | specification | todo] +source_type: [plan | ticket-index | ticket | specification | todo] plan_file: [path to plan] +ticket_index: [path to ticket index, if applicable] ticket_file: [path to ticket, if applicable] tickets_ref: [path to ticket index, if applicable] source_packet_ref: [plan packet ref or ticket packet ref] @@ -263,12 +274,13 @@ _No learnings yet._ The orchestrator parses the source artifact and creates a list of execution units. Each unit is a self-contained packet of work defined by the selected execution shape or the ticket contract. The orchestrator does the heavy lifting here: - **Prefer plan-defined units directly** -- if the plan already declares a coherent execution shape, execute those packets as written +- **Prefer index-defined batches directly** -- if the input is a valid ticket index, execute the next unresolved batch as written and do not re-batch it unless the index is missing safety evidence or the user explicitly approves a change - **Prefer ticket-defined unit directly** -- if the input is a valid ticket artifact, execute that one ticket as one unit and do not re-split it unless the user explicitly approves a change - **Adapt legacy phase/task plans into units before coding** -- do not execute raw task lists directly once the shape contract is available - **Break oversized units** into smaller units if needed (each unit should be completable in one subagent session) - **Preserve WHY tracing** -- when splitting a unit, each resulting unit inherits or refines the parent unit's purpose line. Never create an orphan unit with no connection to the user story. - **Identify file dependencies** between units -- **Determine parallelizable units** -- only units with non-overlapping file sets and compatible dependencies can run simultaneously +- **Determine parallelizable units** -- only units with non-overlapping file sets and compatible dependencies can run simultaneously. For ticket-index input, trust only the explicit batch partition recorded in `index.md`; if the index leaves overlap safety ambiguous, collapse the batch to sequential execution. - **Ensure each unit has clear success criteria** -- if the plan already defines them, use them directly; otherwise, the orchestrator must create them - **Map each unit to its purpose** -- record which success criterion or enabling outcome each unit delivers (this goes in STATE.md's "Serves / Unlocks" column) @@ -285,27 +297,33 @@ For each unit (or parallel batch of units), follow this cycle: ##### a. Build Scoped Prompt -For each unit, the orchestrator constructs a focused prompt by loading the **execution agent prompt template** from `commands/workflows/references/execution-agent-prompt.md` and filling in the context blocks. - -Apply the shared `Reference Template Loading` protocol from `commands/workflows/references/orchestration-protocol.md`, substituting `execution-agent-prompt.md`. - -- Quote the first non-empty line of the loaded template before continuing. -- Every execution, retry, fix, and regression-repair subagent in this workflow must start from a freshly loaded copy of that same template. -- Fill the placeholders from the loaded template only. Do not reconstruct the prompt from memory, paraphrase it into a shorter prompt, or drop mandatory sections. -- If the template cannot be loaded, quoted, or fully populated without unresolved `{{PLACEHOLDER}}` values, stop and resolve the missing context before spawning the subagent. - -- **{{UNIT_TITLE}}** and **{{UNIT_DESCRIPTION}}** -- from the plan packet or ticket artifact -- **{{UNIT_KIND}}** -- from the plan or ticket (`tracer-bullet`, `expansion`, `hardening`, `infra-packet`, `fix-item`, etc.) -- **{{OUTCOME_SCENARIO}}** -- the observable behavior or enabling outcome this unit proves -- **{{UNIT_SCOPE}}** -- what the unit owns and excludes -- **{{UNIT_SCOPE_FENCE}}** -- the boundary that keeps the unit thin -- **{{FILE_LIST}}** -- files to create/modify from the plan or ticket -- **{{SUCCESS_CRITERIA}}** -- checkboxes that define "done" -- **{{VALIDATION_COMMAND}}** -- how to verify the unit works -- **{{COMPLETED_DEPENDENCIES}}** -- list of already-completed units this depends on -- **{{PARENT_REFS}}** -- plan, ticket set, architecture, and source packet refs that anchor this unit -- **{{TICKET_LOCAL_CONTEXT}}** -- the ticket-local execution packet when the source is a ticket; otherwise a compact packet derived from the plan unit -- **{{WHY_CONTEXT}}** -- the purpose grounding block (constructed by orchestrator): +For each unit, the orchestrator constructs a focused prompt for the named `execution-agent`. + +Apply the shared `Named Agent Dispatch` protocol from `commands/workflows/references/orchestration-protocol.md`, substituting `execution-agent`. + +- Quote the first non-empty line of the loaded bundled agent template before continuing. +- Every execution, retry, fix, and regression-repair subagent in this workflow must start from a freshly loaded copy of that same agent template. +- Build `scoped_prompt` by injecting the full loaded `execution-agent` template plus the resolved context packet below. Do not summarize, abbreviate, or paraphrase the agent template. +- Apply the shared `Reference Template Loading` protocol from `commands/workflows/references/orchestration-protocol.md`, substituting `execution-agent-prompt.md`, and quote the first non-empty line of that scaffold before continuing. +- Also load `commands/workflows/references/execution-agent-prompt.md` as the scaffold for the context packet so the injected headings stay stable across retries and follow-up fixes. +- Fill the scaffold completely. Do not continue if any required section is missing or any `{{PLACEHOLDER}}` value is unresolved. + +- **`## Your Unit`** + - **{{UNIT_TITLE}}** and **{{UNIT_DESCRIPTION}}** -- from the plan packet or ticket artifact + - **{{UNIT_KIND}}** -- from the plan or ticket (`tracer-bullet`, `expansion`, `hardening`, `infra-packet`, `fix-item`, etc.) + - **{{OUTCOME_SCENARIO}}** -- the observable behavior or enabling outcome this unit proves + - **{{FEATURE_HOME}}** -- the primary feature home or owning module + - **{{UNIT_SCOPE}}** -- what the unit owns and excludes + - **{{UNIT_SCOPE_FENCE}}** -- the boundary that keeps the unit thin + - **{{FILE_LIST}}** -- files to create/modify from the plan or ticket + - **{{SUCCESS_CRITERIA}}** -- checkboxes that define "done" + - **{{VALIDATION_COMMAND}}** -- how to verify the unit works + - **{{COMPLETED_DEPENDENCIES}}** -- list of already-completed units this depends on + - **{{PARENT_REFS}}** -- plan, ticket set, architecture, and source packet refs that anchor this unit +- **`## Ticket-local context`** + - **{{TICKET_LOCAL_CONTEXT}}** -- the ticket-local execution packet when the source is a ticket; otherwise a compact packet derived from the plan unit +- **`## Why This Unit Exists`** + - **{{WHY_CONTEXT}}** -- the purpose grounding block (constructed by orchestrator): ``` ## Why This Unit Exists **Problem:** [problem narrative from plan -- 1-2 sentences] @@ -314,14 +332,18 @@ Apply the shared `Reference Template Loading` protocol from `commands/workflows/ **Overall success criteria:** [plan-level success criteria list] **Guardrails:** [relevant constitution principles, approval rules, and approved waivers] ``` -- **{{ARCHITECTURAL_CONTEXT}}** -- from the plan's Architectural Context section, filtered to what's relevant for this unit's files and domain -- **{{ARCHITECTURE_HANDOFF}}** -- from the `docs/architecture/` artifact or explicit plan-derived handoff contract; include deletion-test decisions, interfaces as test surfaces, seams, adapters, contracts, and downstream review guidance relevant to this unit -- **{{LEARNINGS_BRIEF}}** -- from previous units, filtered by domain relevance -- **{{PROJECT_CONVENTIONS}}** -- from CLAUDE.md plus relevant constitution baselines -- **{{TDD_CONTRACT}}** -- the resolved execution contract: effective mode, Ralph/default loop, required unit/e2e evidence, and any explicit exceptions -- **{{TDD_SECTION}}** -- if the resolved effective mode is Ralph-driven, include the Ralph/TDD Implementation Section from the template; otherwise include the Standard Implementation Section. Do not treat Ralph as an adjacent side command when it is the resolved default. - -The execution agent template instructs each subagent to follow a 4-phase protocol: +- **`## Architectural Context`** + - **{{ARCHITECTURAL_CONTEXT}}** -- from the plan's Architectural Context section, filtered to what's relevant for this unit's files and domain +- **`## Architecture Handoff`** + - **{{ARCHITECTURE_HANDOFF}}** -- from the `docs/architecture/` artifact or explicit plan-derived handoff contract; include deletion-test decisions, interfaces as test surfaces, seams, adapters, contracts, and downstream review guidance relevant to this unit +- **`## Learnings from Previous Units`** + - **{{LEARNINGS_BRIEF}}** -- from previous units, filtered by domain relevance +- **`## Project Conventions`** + - **{{PROJECT_CONVENTIONS}}** -- from CLAUDE.md plus relevant constitution baselines +- **`## TDD Execution Contract`** + - **{{TDD_CONTRACT}}** -- the resolved execution contract: effective mode, Ralph/default loop, required unit/e2e evidence, any explicit exceptions, and any fix/regression context that the retried unit must address + +The loaded `execution-agent` template instructs each subagent to follow a 4-phase protocol: 1. **Understand** -- review requirements, surface ambiguities, state assumptions before coding 2. **Implement** -- follow the resolved Ralph/default execution mode, retry on failure (up to 3 attempts) 3. **Self-review** -- check completeness, quality, discipline, testing, and evidence @@ -332,10 +354,10 @@ The execution agent template instructs each subagent to follow a 4-phase protoco Delegate the unit to a focused subagent: ``` -Task(general-purpose, prompt=scoped_prompt) +Task(execution-agent, prompt=scoped_prompt) ``` -The subagent prompt is constructed from the loaded execution agent template (`commands/workflows/references/execution-agent-prompt.md`). The template already includes instructions for the 4-phase protocol (understand, implement, self-review, report). The orchestrator fills in the context blocks and passes the result. Do not substitute a custom summary prompt for any execution worker: +The subagent prompt is constructed from the loaded bundled `execution-agent` template plus the fully injected context packet scaffold from `commands/workflows/references/execution-agent-prompt.md`. Do not substitute a custom summary prompt for any execution worker, and do not dispatch ticket implementation through `general-purpose`: 1. Read referenced files and understand existing patterns 2. Follow the resolved Ralph/default execution contract @@ -351,7 +373,7 @@ The subagent prompt is constructed from the loaded execution agent template (`co - Final test results (pass/fail) - Attempt count -**For parallel units**: Spawn multiple subagents simultaneously only when their file sets do not overlap and their dependency types allow parallel work. Before parallelizing, verify file sets do not overlap and no unit claims shared mutable state without an explicit guard. +**For parallel units**: Spawn multiple subagents simultaneously only when their file sets do not overlap and their dependency types allow parallel work. For ticket-index input, parallelize only when the selected batch is explicitly marked safe by the index's file-overlap notes. Before parallelizing, verify file sets do not overlap and no unit claims shared mutable state without an explicit guard. If there is doubt, execute sequentially. **Example scoped prompt:** @@ -481,7 +503,7 @@ If the `--review-mode` argument is `inline` or `both`, perform a two-stage inlin The spec reviewer should check not just checkbox compliance but whether the implementation actually delivers on the recorded purpose. A unit can pass all checkboxes but miss the intent. - If **PASS**: proceed to Stage 2 - - If **FAIL**: reload `execution-agent-prompt.md`, rebuild the scoped execution prompt with the review findings added as fix context, then spawn a new execution subagent. Re-run the spec reviewer afterward (max 2 fix-review cycles). If still failing after 2 cycles, log the issues and ask the user how to proceed. + - If **FAIL**: reload the bundled `execution-agent` template plus `execution-agent-prompt.md`, rebuild the scoped execution prompt with the review findings added as fix context, then spawn a new `execution-agent` run. Re-run the spec reviewer afterward (max 2 fix-review cycles). If still failing after 2 cycles, log the issues and ask the user how to proceed. **Stage 2: Code Quality Review** (only after spec compliance passes) @@ -495,7 +517,7 @@ If the `--review-mode` argument is `inline` or `both`, perform a two-stage inlin ``` - If **PASS**: proceed to next steps - - If **FAIL** with Critical issues: reload `execution-agent-prompt.md`, rebuild the scoped execution prompt with the quality findings added as fix context, spawn a fix subagent, then re-review (max 2 cycles) + - If **FAIL** with Critical issues: reload the bundled `execution-agent` template plus `execution-agent-prompt.md`, rebuild the scoped execution prompt with the quality findings added as fix context, spawn an `execution-agent` fix run, then re-review (max 2 cycles) - If **FAIL** with only Important/Minor issues: log them for the orchestrator's attention but proceed to next task (these will also be caught by `/workflows:review` if run later) **Note:** Inline review is a lightweight per-unit check. It does NOT replace the comprehensive `/workflows:review` multi-agent review. When `--review-mode both` is active, inline review runs per-unit AND `/workflows:review` runs after all units complete. @@ -506,12 +528,14 @@ If the `--review-mode` argument is `inline` or `both`, perform a two-stage inlin **5. Update source work artifact** -- keep the execution source honest: - plan input: check off completed items (`[ ]` to `[x]`) in the original plan document + - ticket-index input: update the selected batch status in `index.md`, keep the ticket table honest, and increment `last_completed_batch` only after every ticket in that batch is `completed` - ticket input: update the ticket `status` field (`ready` -> `completed` or `blocked`) and preserve parent refs + - if both an index and ticket files can be updated safely, update both without inventing new status fields - if both a ticket and plan backlog can be updated safely, update both without inventing new status fields **6. Regression guard** -- run test commands from ALL previously completed tasks. If any regress: - Log the regression in the current task's session file - - Reload `execution-agent-prompt.md`, rebuild the scoped execution prompt with context about what broke and why, and spawn a fix subagent + - Reload the bundled `execution-agent` template plus `execution-agent-prompt.md`, rebuild the scoped execution prompt with context about what broke and why, and spawn an `execution-agent` fix run - Do not proceed to the next task until the regression is fixed **7. Incremental commit** if appropriate (logical unit complete, tests pass): diff --git a/plugins/compound-engineering/skills/grill-me/SKILL.md b/plugins/compound-engineering/skills/grill-me/SKILL.md index 5ca5126..4d8d1b5 100644 --- a/plugins/compound-engineering/skills/grill-me/SKILL.md +++ b/plugins/compound-engineering/skills/grill-me/SKILL.md @@ -1,6 +1,6 @@ --- name: grill-with-docs -description: Grilling session that challenges your plan against the existing domain model, sharpens terminology, and updates documentation (CONTEXT.md, ADRs) inline as decisions crystallise. Use when user wants to stress-test a plan against their project's language and documented decisions. +description: Grilling session that challenges your plan against the existing domain model, sharpens terminology, and updates documentation (CONTEXT.md, brainstorm docs, plan docs, ADRs) inline as decisions crystallise. Use when user wants to stress-test a plan against their project's language and documented decisions. --- @@ -17,26 +17,39 @@ If a question can be answered by exploring the codebase, explore the codebase in ## Domain awareness -During codebase exploration, also look for existing documentation: +During codebase exploration, also look for existing documentation, especially the active feature artifact for the current discussion. ### File structure -Most repos have a single CONSTITUTION.md spec driven driver, a CONTENT.md file for cementing shared language and some architecture docs per feature implemented: +Most repos have a repo-wide constitution, a glossary-oriented `CONTEXT.md`, and feature documents under `docs/`: ``` / ├── CONSTITUTION.md ├── CONTEXT.md ├── docs/ +│ ├── brainstorms/ +│ │ └── 2026-04-30-checkout-race-brainstorm.md +│ ├── plans/ +│ │ └── 2026-05-01-fix-checkout-race-plan.md │ └── architecture/ │ ├── 2026-04-30-nucleus-stage-1-architecture.md └── src/ ``` -Create files lazily — only when you have something to write.If no CONTEXT.md exists, create one when the first term is resolved. If no `CONSTITUTION.md` exists, advise the user to create one using the workflows-constitution command using the context from this session. +Create files lazily -- only when you have something to write. If no `CONTEXT.md` exists, create one when the first term is resolved. If no `CONSTITUTION.md` exists, advise the user to create one using the workflows-constitution command using the context from this session. ## During the session +### Choose the right documentation sink + +Before grilling, decide where concrete decisions belong: + +1. If a plan file exists for the current feature, or the session is clearly continuing plan work, the plan file is the implementation-decision sink. +2. Otherwise, if a brainstorm document exists for the current feature, or the session is clearly continuing brainstorm work, the brainstorm document is the implementation-decision sink. +3. `CONTEXT.md` is only for canonical domain language. ADRs remain for cross-feature decisions that deserve a durable architectural record. +4. If neither a plan nor a brainstorm artifact exists, do not invent one just for this skill unless the user explicitly asks for it. + ### Challenge against the glossary When the user uses a term that conflicts with the existing language in `CONTEXT.md`, call it out immediately. "Your glossary defines 'cancellation' as X, but you seem to mean Y — which is it?" @@ -57,6 +70,16 @@ When the user states how something works, check whether the code agrees. If you When a term is resolved, update `CONTEXT.md` right there. Don't batch these up — capture them as they happen. Use the format in [CONTEXT-FORMAT.md](./CONTEXT-FORMAT.md). +### Update the active feature doc inline + +After each question is answered with concrete implementation, architecture, data-shape, API, dependency, boundary, rollout, or operational detail, immediately write it into the active feature doc. Do not wait until the end of the session, and do not leave the decision only in chat history. + +Prefer updating the most specific existing section over inventing a catch-all notes bucket: + +- **Brainstorm doc:** update `## Chosen Approach`, `## Key Decisions`, `## Architectural Context`, and move answered items into `## Resolved Questions`. +- **Plan doc:** update `## Implementation` or `## Overview`, `## Technical Considerations`, `## Architectural Context`, `## Success Criteria`, and the relevant execution slice, acceptance criteria, or file list when the answer changes execution shape. +- If a new answer supersedes earlier wording, edit the earlier section in place so the document stays coherent. + `CONTEXT.md` should be totally devoid of implementation details. Do not treat `CONTEXT.md` as a spec, a scratch pad, or a repository for implementation decisions. It is a glossary and nothing else. diff --git a/portable/compound-engineering/agents/review/ticket-flow-auditor.md b/portable/compound-engineering/agents/review/ticket-flow-auditor.md index 75bbb22..16389e4 100644 --- a/portable/compound-engineering/agents/review/ticket-flow-auditor.md +++ b/portable/compound-engineering/agents/review/ticket-flow-auditor.md @@ -18,13 +18,14 @@ Protect the plan -> ticket -> implementation chain. Review whether tickets are s ## Workflow 1. Determine the mode: ticket-set audit before execution, or implementation audit after code exists. 2. Trace the chain from plan to architecture to ticket artifacts to execution evidence or branch diff. -3. Pressure-test ticket scope fences, feature-home ownership, dependency order, and context sufficiency. +3. Pressure-test ticket scope fences, feature-home ownership, dependency order, execution-batch partitioning, and context sufficiency. 4. Separate blocking contract failures from improvements, with citations for every finding. ## Report - `Review Mode`: ticket-set audit or implementation audit. - `Blocking gaps`: issues that make the ticket set or implementation unsafe to continue without repair. - `Recommendations`: improvements that sharpen the flow without blocking progress. +- `Batch safety notes`: whether the dependency graph and parallel batches are honest, conservative, and race-safe. - `Traceability notes`: where plan, architecture, ticket, and implementation stayed aligned or drifted. - `Evidence cited`: artifact paths, diff locations, and session evidence supporting the findings. @@ -32,4 +33,6 @@ Protect the plan -> ticket -> implementation chain. Review whether tickets are s - Do not redesign the whole backlog when a local repair would solve the issue. - Do not ask for ticket splits or merges unless coupling, ownership, or outcome clarity is materially wrong. - Do not ignore undocumented scope expansions just because the code looks good. +- Do not bless a parallel batch unless the ticket files are genuinely disjoint and the index records why it is safe. +- When batch safety is ambiguous, prefer a sequential recommendation over a risky parallel one. - Cite the specific ticket file, plan section, architecture artifact, diff hunk, or execution artifact that proves each finding. diff --git a/portable/compound-engineering/agents/workflow/execution-agent.md b/portable/compound-engineering/agents/workflow/execution-agent.md new file mode 100644 index 0000000..8a9ba03 --- /dev/null +++ b/portable/compound-engineering/agents/workflow/execution-agent.md @@ -0,0 +1,214 @@ +--- +name: execution-agent +description: >- + Executes one scoped ticket or work unit with strict clean-code, DRY, SOLID, + and Ralph-aware delivery discipline. Use for `/workflows:work` + implementation, retries, and regression repairs. +model: claude-sonnet-4-6 +platforms: + copilot: + model: gpt-5.3-codex + opencode: + model: openrouter/moonshotai/kimi-k2.6 +--- + +## Mission +Implement one bounded execution unit so the code is easier to understand, safer to change, and closer to the stated user outcome than it was before the change. Favor explicit names, tight responsibilities, honest boundaries, minimal but complete diffs, explicit failures, and tests that prove behavior. + +## Required delegated input +The orchestrator prompt must inject these concrete sections before you start: + +- `## Your Unit` +- `## Ticket-local context` +- `## Why This Unit Exists` +- `## Architectural Context` +- `## Architecture Handoff` +- `## Learnings from Previous Units` +- `## Project Conventions` +- `## TDD Execution Contract` + +If any required section is missing, materially incomplete, or still contains unresolved placeholders, stop and report the prompt-integrity problem instead of guessing. + +## Workflow +1. Understand the unit, its purpose, and its boundaries before changing code. +2. Reuse existing patterns, helpers, and abstractions before adding new ones. +3. Implement the smallest complete change that satisfies the unit and its TDD contract. +4. Self-review against the clean-code checklist below and fix issues before reporting. +5. Return the exact execution report contract with real evidence. + +## Clean-code operating rules + +### Names +- Names must reveal purpose, domain meaning, and side effects. Prefer precise nouns and verbs over placeholders like `data`, `info`, `helper`, `util`, `manager`, `process`, `item`, or `tmp`. +- Avoid single-letter variables outside standard tiny scopes (`i`, `j`, `x`, `y`). Avoid unclear abbreviations unless the codebase already treats them as domain shorthand. +- Make the public API honest. A function, class, or variable name must describe what it actually does, not what you wish it did. + +### Structure +- Keep one unit of work responsible for one reason to change. Split decision-making from mechanism when they start drifting apart. +- Keep one abstraction level at a time. Do not mix policy, parsing trivia, persistence details, and formatting noise in one routine. +- Keep side effects obvious. Queries should look like queries; mutations and I/O should be visible in names and call sites. +- Prefer direct readable code over helper stacks, pass-through wrappers, and speculative indirection. + +### DRY and SOLID +- Reuse before you create. Search the touched area for an existing helper, type, class, or utility that already owns the behavior cleanly. +- Apply DRY by reason to change. Extract shared code only when the duplicated behavior genuinely changes for the same reason. +- Apply SOLID deliberately. Introduce classes, interfaces, or seams only when they clarify responsibility, dependency direction, substitution boundaries, or test surfaces. +- Do not invent catch-all abstractions such as `*Manager`, `*Helper`, `*Util`, or generic shared layers without a clear architectural reason. + +### Boundaries +- Keep business logic in the declared feature home unless the architecture handoff explicitly justifies a shared or global extraction. +- Shared/global code must earn its place by serving multiple feature homes or a stable cross-cutting contract. +- Respect scope fences. Do not expand the unit just because a nearby cleanup looks tempting. + +### Comments and documentation +- Add doc blocks or docstrings above public or exported functions, class definitions, interface/type definitions, and non-trivial private helpers whose contract is not obvious from the signature alone. +- Those doc blocks should explain purpose, inputs/outputs, invariants, side effects, failure behavior, or architectural constraints. Do not restate the code line by line. +- Leave inline comments only where a reader truly needs missing intent: non-obvious constraints, boundary rules, tricky algorithms, or why a surprising choice exists. +- If a comment is compensating for confusing code, improve the code first. + +### Imports and dependencies +- Keep imports at the top of the file. +- Defer or conditionalize imports only when there is a real reason such as measurable startup/performance impact, cycle breaking, optional dependencies, or exception-aware loading. If you do this, make the reason obvious in code. +- Remove unused imports, dead helpers, stale branches, and compatibility paths that the touched code no longer needs. + +### Errors, state, and tests +- Fail explicitly. Do not hide problems behind broad catches, silent fallbacks, vague exceptions, or mixed success/error return shapes. +- Make mutation and state transitions obvious. Avoid hidden writes and temporal coupling. +- Tests should verify behavior that matters to the success criteria, not implementation trivia. +- When Ralph-driven, preserve stable `Red`, `Green`, and `Post-Refactor Green` evidence. + +## TDD Execution Contract +Use `commands/workflows/references/tdd-evidence-contract.md` as the shared source of truth for contract resolution, Ralph evidence semantics, and report structure. Do not invent a lighter evidence format for convenience. + +### TDD Evidence +- Ralph is the default TDD execution path whenever the resolved contract selects Ralph-driven work. +- `Red` and `Green` prove behavior coverage. +- `Post-Refactor Green` proves cleanup safety. +- If no cleanup was needed, still rerun and say so. + +## Phase 1: Understand Before Building +Before writing any code, review the injected unit requirements, WHY context, architecture handoff, and project conventions carefully. + +**If anything is unclear, ambiguous, or could be interpreted multiple ways:** +- List your questions explicitly. +- State the assumptions you would make if forced to proceed. +- Ask for clarification before starting work. + +**If everything is clear:** +- State your interpretation of the requirements in 2-3 sentences. +- State how this unit serves the overall user story. +- List the assumptions you are making. +- Proceed to implementation. + +Do not skip this phase. A few minutes of clarification prevents hours of rework. + +## Phase 2: Implement +- Follow the resolved Ralph/default execution mode from the injected `## TDD Execution Contract`. +- Read referenced files and match existing patterns before introducing new structure. +- Keep changes minimal but complete. Build what the unit asks for, not adjacent wish-list items. +- If tests fail, analyze the failure, fix the issue, and retry. Stop after 3 total implementation attempts and report the failure clearly instead of thrashing. + +## Phase 3: Self-Review +Before reporting back, review your own work honestly. + +### Completeness +- [ ] Did I implement every success criterion? +- [ ] Did I preserve the stated scope fence? +- [ ] Did I handle implied edge cases without scope creep? + +### Purpose alignment +- [ ] Does the implementation deliver the stated user/story outcome? +- [ ] Does every meaningful code change trace back to the unit purpose or success criteria? + +### Code quality +- [ ] Are names explicit and honest? +- [ ] Did I reuse existing code where it already solved this cleanly? +- [ ] If I introduced a new abstraction, does it have a clear reason to exist? +- [ ] Did I keep imports at the top unless there was a real documented reason not to? +- [ ] Did I add doc blocks/docstrings where a future maintainer needs them? +- [ ] Did I leave only comments that add missing intent? +- [ ] Did the business logic stay in the declared feature home unless the handoff allowed extraction? +- [ ] Did I avoid dead code, speculative wrappers, and hidden side effects? +- [ ] Is error handling explicit and appropriate? + +### Discipline +- [ ] Did I avoid overbuilding? +- [ ] Did I avoid speculative abstractions and cleanup unrelated to the unit? + +### Testing and evidence +- [ ] Do tests prove actual behavior? +- [ ] Did I run the stated validation command? +- [ ] Can I show actual output, not just claims? +- [ ] If Ralph-driven, do I have stable `Red`, `Green`, and `Post-Refactor Green` evidence? + +If you find issues during self-review, fix them before reporting. + +## Report +Return a structured execution report in exactly this format: + +```markdown +## Execution Report: [Unit Title] + +### Interpretation +[Your 2-3 sentence interpretation of what was asked] + +### Purpose Served +[Which user story aspect / success criterion this unit delivers] + +### Assumptions Made +- [List each assumption] + +### What Was Implemented +[Describe what you built and how it works] + +### Files Changed +- `path/to/file` -- created/modified (brief description of change) + +### Test Results +- Command: `[test command]` +- Result: PASS/FAIL +- Attempts: [n] +- Output: +``` +[paste actual output here] +``` + +### TDD Evidence +- **Red** + - Command: `[red command]` + - Result: PASS/FAIL + - Evidence: [why this proves the missing behavior existed before the implementation] +- **Green** + - Command: `[green command]` + - Result: PASS/FAIL + - Evidence: [why this proves the requested behavior now passes] +- **Post-Refactor Green** + - Command: `[post-refactor command]` + - Result: PASS/FAIL + - Evidence: [why this proves cleanup/refactor work preserved behavior] + +[If no cleanup was needed, still rerun and say so.] + +### Problems Encountered +- **Error:** [exact error message] + - **Root cause:** [your analysis] + - **Fix:** [what you did] + +[If no problems: "None"] + +### Patterns Discovered +- [Naming conventions, architectural patterns, or gotchas that matter for future units] + +[If none: "None"] + +### Self-Review Findings +- [Issues found and fixed during self-review] + +[If none: "Self-review passed -- no issues found"] +``` + +## Guardrails +- Do not silently skip ambiguity, failures, or missing context. +- Do not add style-only churn unrelated to the unit. +- Do not weaken the TDD/evidence contract. +- Do not claim completion while known issues remain. diff --git a/portable/compound-engineering/commands/triage.md b/portable/compound-engineering/commands/triage.md index b919b07..8831b11 100644 --- a/portable/compound-engineering/commands/triage.md +++ b/portable/compound-engineering/commands/triage.md @@ -1,313 +1,321 @@ --- name: triage -description: Triage and categorize findings for the CLI todo system -argument-hint: '[findings list or source type]' +description: Triage review todos, resolve open decisions one-by-one, then orchestrate execution-agent runs with strict per-item validation +argument-hint: '[todo range or scope]' platforms: claude: disable-model-invocation: true --- -- First set the /model to Haiku -- Then read all pending todos in the todos/ directory +- Read all target todo files before asking decisions. +- Keep main context as orchestration + validation space; execution agents do implementation. -Present all findings, decisions, or issues here one by one for triage. The goal is to go through each item and decide whether to add it to the CLI todo system. +Use this command when you need to process review todos end-to-end: -**IMPORTANT: DO NOT CODE ANYTHING DURING TRIAGE!** +1. triage each todo, +2. resolve open questions with the user, +3. write decisions into todo files, +4. execute todos one at a time via execution-agent with full context packets, +5. validate each result independently, +6. close statuses cleanly. -This command is for: - -- Triaging code review findings -- Processing security audit results -- Reviewing performance analysis -- Handling any other categorized findings that need tracking - -## Workflow - -### Step 1: Present Each Finding +## Core Rule Set -For each finding, present in this format: +**IMPORTANT: During triage/decision phases, DO NOT implement code fixes.** -``` ---- -Issue #X: [Brief Title] - -Severity: 🔴 P1 (CRITICAL) / 🟡 P2 (IMPORTANT) / 🔵 P3 (NICE-TO-HAVE) - -Category: [Security/Performance/Architecture/Bug/Feature/etc.] +This command is for: -Description: -[Detailed explanation of the issue or improvement] +- Review-todo triage and decision closure +- Converting vague todos into executable units +- Sequential execution-agent dispatch with rich context +- Independent orchestration-side validation and status control -Location: [file_path:line_number] +## Workflow -Problem Scenario: -[Step by step what's wrong or could happen] +### Step 1: Bootstrap and Scope -Proposed Solution: -[How to fix it] +1. Load project context and memory first. +2. Identify target todos from user scope (range, priority, status, or "all open"). +3. Build a deterministic queue sorted by todo id. -Estimated Effort: [Small (< 2 hours) / Medium (2-8 hours) / Large (> 8 hours)] +Recommended checks: ---- -Do you want to add this to the todo list? -1. yes - create todo file -2. next - skip this item -3. custom - modify before creating +```bash +rg '^status:\s*(pending|in_progress|ready)' todos/*.md ``` -### Step 2: Handle User Decision - -**When user says "yes":** - -1. **Update existing todo file** (if it exists) or **Create new filename:** +### Step 2: Read and Triage All Target Todos - If todo already exists (from code review): +Read each todo fully before asking any question. Capture: - - Rename file from `{id}-pending-{priority}-{desc}.md` → `{id}-ready-{priority}-{desc}.md` - - Update YAML frontmatter: `status: pending` → `status: ready` - - Keep issue_id, priority, and description unchanged +- Problem statement quality +- Missing decisions or unresolved options +- Dependency and ordering concerns +- Validation expectations +- Coupled files/tests/docs likely needed for execution - If creating new todo: +Present triage output per todo in this format: - ``` - {next_id}-ready-{priority}-{brief-description}.md - ``` - - Priority mapping: +```markdown +--- +Todo #NNN: [Title] - - 🔴 P1 (CRITICAL) → `p1` - - 🟡 P2 (IMPORTANT) → `p2` - - 🔵 P3 (NICE-TO-HAVE) → `p3` +Status: [pending/in_progress/ready] +Priority: [p1/p2/p3] +Dependencies: [none/list] - Example: `042-ready-p1-transaction-boundaries.md` +Open Decisions: +1. [Decision question] +2. [Decision question] -2. **Update YAML frontmatter:** +Execution Risks: +- [Risk] - ```yaml - --- - status: ready # IMPORTANT: Change from "pending" to "ready" - priority: p1 # or p2, p3 based on severity - issue_id: "042" - tags: [category, relevant-tags] - dependencies: [] - --- - ``` +Initial Recommendation: +- [Suggested choice and why] +--- +``` -3. **Populate or update the file:** +### Step 3: Resolve Open Decisions (One Question at a Time) - ```yaml - # [Issue Title] +Ask only one question at a time. Do not batch decisions. - ## Problem Statement - [Description from finding] +Decision prompt format: - ## Findings - - [Key discoveries] - - Location: [file_path:line_number] - - [Scenario details] +```markdown +Decision for Todo #NNN: +[Clear question] - ## Proposed Solutions +Recommended: [option] +1. [option A] +2. [option B] +3. [option C] +``` - ### Option 1: [Primary solution] - - **Pros**: [Benefits] - - **Cons**: [Drawbacks if any] - - **Effort**: [Small/Medium/Large] - - **Risk**: [Low/Medium/High] +Rules: - ## Recommended Action - [Filled during triage - specific action plan] +- Wait for explicit answer before asking next question. +- If user gives freeform decision, normalize it and confirm in one sentence. +- No implementation starts until decisions for that todo are resolved. - ## Technical Details - - **Affected Files**: [List files] - - **Related Components**: [Components affected] - - **Database Changes**: [Yes/No - describe if yes] +### Step 4: Write Decisions Back into Todo Files - ## Resources - - Original finding: [Source of this issue] - - Related issues: [If any] +Update todo files immediately after each resolved decision. - ## Acceptance Criteria - - [ ] [Specific success criteria] - - [ ] Tests pass - - [ ] Code reviewed +Expected updates: - ## Work Log +1. Add/refresh `## Recommended Action` +2. Append `## Work Log` entry with decision outcome +3. Keep status accurate: + - stays `pending` after triage-only updates + - moves to `in_progress` when execution starts + - moves to `done` only after independent validation passes - ### {date} - Approved for Work - **By:** Claude Triage System - **Actions:** - - Issue approved during triage session - - Status changed from pending → ready - - Ready to be picked up and worked on +Work log template: - **Learnings:** - - [Context and insights] +```markdown +### YYYY-MM-DD - Triage decisions recorded - ## Notes - Source: Triage session on {date} - ``` +**By:** @user -4. **Confirm approval:** "✅ Approved: `{new_filename}` (Issue #{issue_id}) - Status: **ready** → Ready to work on" +**Actions:** +- [Decision 1 captured] +- [Decision 2 captured] -**When user says "next":** +**Learnings:** +- [Why this direction was chosen] +``` -- **Delete the todo file** - Remove it from todos/ directory since it's not relevant -- Skip to the next item -- Track skipped items for summary +### Step 5: Build an Execution Packet per Todo -**When user says "custom":** +Before launching execution-agent, prepare a full context packet. Do not send minimal prompts. -- Ask what to modify (priority, description, details) -- Update the information -- Present revised version -- Ask again: yes/next/custom +Every packet must include: -### Step 3: Continue Until All Processed +- Repository path and branch +- Exact todo file path and title +- Goal and acceptance criteria +- Resolved decisions (explicitly listed) +- Scope fence (what not to change) +- Likely files and tests +- Validation expectations +- Reporting contract (what execution-agent must return) -- Process all items one by one -- Track using TodoWrite for visibility -- Don't wait for approval between items - keep moving +Execution packet skeleton: -### Step 4: Final Summary +```markdown +AGENT_TEMPLATE loaded via local agent repository. Follow exactly. -After all items processed: +Repository: [path] +Branch: [branch] -````markdown -## Triage Complete +## Your Unit +Todo file: [path] +Title: [title] +Goal: [goal] -**Total Items:** [X] **Todos Approved (ready):** [Y] **Skipped:** [Z] +## Decisions (Final) +- [decision] +- [decision] -### Approved Todos (Ready for Work): +## Architecture Handoff +Acceptance criteria: +1. [...] +2. [...] -- `042-ready-p1-transaction-boundaries.md` - Transaction boundary issue -- `043-ready-p2-cache-optimization.md` - Cache performance improvement ... +Scope fence: +- [...] +- [...] -### Skipped Items (Deleted): +## Likely Files +- [file] +- [file] -- Item #5: [reason] - Removed from todos/ -- Item #12: [reason] - Removed from todos/ +## Validation Contract +- run/update tests relevant to this todo +- report red/green/post-refactor evidence +- provide changed files and rationale +``` -### Summary of Changes Made: +### Step 6: Execute One Todo at a Time via execution-agent -During triage, the following status updates occurred: +For each todo in queue: -- **Pending → Ready:** Filenames and frontmatter updated to reflect approved status -- **Deleted:** Todo files for skipped findings removed from todos/ directory -- Each approved file now has `status: ready` in YAML frontmatter +1. Set todo status to `in_progress` +2. Dispatch execution-agent in sync mode with full packet +3. Review execution report +4. Validate independently from orchestration context +5. If validation fails, keep `in_progress`, refine packet, and re-run +6. On success, set status to `done` and log completion evidence in todo -### Next Steps: +Do not run parallel execution agents for this workflow unless user asks for parallelism. -1. View approved todos ready for work: - ```bash - ls todos/*-ready-*.md - ``` -```` +### Step 7: Orchestration-Side Validation (Mandatory) -2. Start work on approved items: +Never rely only on subagent self-report. Orchestrator validates. - ```bash - /resolve_todo_parallel # Work on multiple approved items efficiently - ``` +Validation checklist per todo: -3. Or pick individual items to work on +- targeted tests pass for changed area +- expected files actually changed +- scope fence was respected +- todo acceptance criteria now true +- todo status/frontmatter/log updated -4. As you work, update todo status: - - Ready → In Progress (in your local context as you work) - - In Progress → Complete (rename file: ready → complete, update frontmatter) +Completion log template: -``` +```markdown +### YYYY-MM-DD - Execution completed -## Example Response Format +**Actions:** +- [Implemented change summary] +**Validation:** +- `command 1` +- `command 2` ``` ---- +### Step 8: Final Sweep and Completion Report -Issue #5: Missing Transaction Boundaries for Multi-Step Operations +After all todos processed: -Severity: 🔴 P1 (CRITICAL) +1. Check no target todo remains `pending`/`in_progress` +2. Report done/incomplete counts +3. List any blocked items with exact blocker -Category: Data Integrity / Security +Final report format: -Description: The google_oauth2_connected callback in GoogleOauthCallbacks concern performs multiple database operations without transaction protection. If any step fails midway, the database is left in an inconsistent state. +```markdown +## Triage + Execution Complete -Location: app/Services/OAuthService.php:13-50 +**Total Targeted:** [X] +**Done:** [Y] +**Still Open:** [Z] -Problem Scenario: +### Done +- [todo-id] [title] -1. User.update succeeds (email changed) -2. Account.save! fails (validation error) -3. Result: User has changed email but no associated Account -4. Next login attempt fails completely +### Still Open / Blocked +- [todo-id] [reason] -Operations Without Transaction: +### Validation Run +- [key command] +- [key command] +``` -- User confirmation (line 13) -- Waitlist removal (line 14) -- User profile update (line 21-23) -- Account creation (line 28-37) -- Avatar attachment (line 39-45) -- Journey creation (line 47) +## Example Interaction Flow -Proposed Solution: Wrap all operations in ApplicationRecord.transaction do ... end block +```markdown +Todo #057 has one open decision: auth hardening now vs defer. +Recommendation: defer auth to next phase and document deployment constraint. -Estimated Effort: Small (30 minutes) +Which direction do you want? +1. Defer auth now and document constraints (Recommended) +2. Implement auth hardening now +``` ---- +Then: -Do you want to add this to the todo list? +```markdown +Recorded decision in `todos/057-...md`. +Now dispatching execution-agent for todo 057 with full packet. +``` -1. yes - create todo file -2. next - skip this item -3. custom - modify before creating +Then: +```markdown +Todo 057 implemented and validated. +Status updated to done. +Proceeding to todo 058. ``` ## Important Implementation Details -### Status Transitions During Triage +### Status Discipline -**When "yes" is selected:** -1. Rename file: `{id}-pending-{priority}-{desc}.md` → `{id}-ready-{priority}-{desc}.md` -2. Update YAML frontmatter: `status: pending` → `status: ready` -3. Update Work Log with triage approval entry -4. Confirm: "✅ Approved: `{filename}` (Issue #{issue_id}) - Status: **ready**" +- `pending`: triaged but not executing +- `in_progress`: actively executing/iterating +- `done`: validated and closed +- `blocked`: cannot continue; include concrete blocker -**When "next" is selected:** -1. Delete the todo file from todos/ directory -2. Skip to next item -3. No file remains in the system +### Context Discipline -### Progress Tracking +- Main context owns orchestration, decisions, validation, and status integrity. +- execution-agent owns code edits for one scoped todo at a time. +- Do not duplicate the same implementation work in both contexts. -Every time you present a todo as a header, include: -- **Progress:** X/Y completed (e.g., "3/10 completed") -- **Estimated time remaining:** Based on how quickly you're progressing -- **Pacing:** Monitor time per finding and adjust estimate accordingly +### Decision Discipline -Example: -``` +- Ask one decision question at a time. +- Write answers into todo immediately. +- Never "assume defaults" if user decision is explicitly required. -Progress: 3/10 completed | Estimated time: ~2 minutes remaining +### Execution Prompt Quality Bar -``` +Bad packet: +- "Implement todo 059" -### Do Not Code During Triage +Good packet: +- includes decisions, acceptance criteria, scope fence, likely files, and tests. -- ✅ Present findings -- ✅ Make yes/next/custom decisions -- ✅ Update todo files (rename, frontmatter, work log) -- ❌ Do NOT implement fixes or write code -- ❌ Do NOT add detailed implementation details -- ❌ That's for /resolve_todo_parallel phase -``` +### Do / Don't + +- ✅ Do triage all targeted todos before heavy execution when user asks for prep. +- ✅ Do capture recommendations before asking decisions. +- ✅ Do validate each completed todo independently. +- ✅ Do keep todo files as source of truth. +- ❌ Don't code during triage-only phase. +- ❌ Don't ask multiple decisions in one message. +- ❌ Don't mark done before orchestration-side validation. +- ❌ Don't drop decision outcomes from todo logs. + +## Done Options -When done give these options +When all targeted todos are processed, end with: ```markdown What would you like to do next? -1. run /resolve_todo_parallel to resolve the todos -2. commit the todos -3. nothing, go chill +1. commit and push current completed todo batch +2. stop here ``` diff --git a/portable/compound-engineering/commands/workflows/references/execution-agent-prompt.md b/portable/compound-engineering/commands/workflows/references/execution-agent-prompt.md index 07f0bf3..33333cc 100644 --- a/portable/compound-engineering/commands/workflows/references/execution-agent-prompt.md +++ b/portable/compound-engineering/commands/workflows/references/execution-agent-prompt.md @@ -9,23 +9,17 @@ platforms: # Execution Agent Prompt Template -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. +This template is the **injected context packet** that `/workflows:work` passes into the named `execution-agent`. -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Canonical execution rules live in `agents/workflow/execution-agent.md`.** `/workflows:work` must load that bundled agent template, then inject the fully populated context packet below when dispatching `Task(execution-agent, prompt=scoped_prompt)`. -**Template authority:** This file is the only valid source for execution-subagent prompts. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. - ---- - -You are an execution agent implementing a specific task from a work plan. Follow the 4-phase protocol below exactly. +**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator so the exact context scaffold ships with generated workflow bundles. -This template is used by the `workflows:work` orchestrator to construct prompts for execution subagents. The orchestrator fills in context blocks (marked with `{{PLACEHOLDER}}`) before passing the result to `Task(general-purpose, prompt=filled_template)`. - -**This is NOT an invocable agent.** It is a reference document consumed by the orchestrator. +**Scaffold authority:** This file is the only valid source for the injected execution context packet. If you receive a shortened paraphrase, a prompt missing the sections below, or a prompt that still contains unresolved `{{PLACEHOLDER}}` tokens, stop and report that the execution template is incomplete. Do not proceed on a reconstructed or partial prompt. --- -You are an execution agent implementing a specific execution unit from a work plan. Follow the 4-phase protocol below exactly. +The bundled `execution-agent` enforces clean-code, DRY, SOLID, feature-home boundary discipline, doc blocks above non-trivial functions/classes, imports at the top of files unless a real exception exists, explicit failure handling, and the structured execution report contract. Populate the scaffold below completely before dispatch. ## Your Unit @@ -90,189 +84,3 @@ Use `commands/workflows/references/tdd-evidence-contract.md` as the shared sourc - `Red` and `Green` prove behavior coverage. - `Post-Refactor Green` proves cleanup safety. - If no cleanup was needed, still rerun and say so. - ---- - -## Phase 1: Understand Before Building - -Before writing ANY code, review the unit requirements AND the "Why This Unit Exists" section carefully. - -Before proceeding, confirm the prompt still contains these sections: **Your Task**, **Why This Task Exists**, **Architectural Context**, **Learnings from Previous Tasks**, **Project Conventions**, and the four numbered phases below. If any section is missing or any placeholder is unresolved, stop and report the template integrity problem. - -**If anything is unclear, ambiguous, or could be interpreted multiple ways:** -- List your questions explicitly -- State the assumptions you would make if proceeding without answers -- Ask for clarification before starting work - -**If everything is clear:** -- State your interpretation of the requirements in 2-3 sentences -- State how this unit serves the overall user story (from the WHY context) -- List any assumptions you are making (even obvious ones) -- Proceed to Phase 2 - -Do NOT skip this phase. A few minutes of clarification prevents hours of rework. It is always better to ask than to guess. - -## Phase 2: Implement - -{{TDD_SECTION}} - -### While Implementing - -- If you encounter something unexpected or unclear, **STOP and ask** rather than guessing -- Follow existing codebase patterns -- do not invent new conventions -- Reuse before you create: before adding a new function, helper, class, interface, type, or utility, search the touched area for an existing abstraction you can reuse or extend safely -- Apply DRY by reason-to-change: extract shared code only when the behavior and future changes truly belong together; keep duplication local when forced abstractions would hide intent -- Apply SOLID deliberately: introduce new classes or interfaces only when they clarify responsibilities, dependency direction, or substitution boundaries; if a new abstraction does not clearly improve the design, do not add it -- Prefer direct, readable code over helper stacks, wrappers, manager classes, or indirection layers created "just in case" -- Variable, function, class, and interface names must be explicit and unambiguous. Avoid abbreviations like `cb`, `ctx`, `svc`, `obj`, or `tmp` when a clearer name fits the scope -- Keep changes minimal -- implement what is asked, nothing more (YAGNI) -- Do not add "nice to have" features not in the success criteria -- Commit after each logical unit of complete work using the project's commit convention - -### On Test Failure - -If tests fail after implementation: -1. Read the error message carefully -- understand what failed and why -2. Analyze whether the failure is in your implementation or in the test -3. Fix the issue -4. Re-run the test command -5. Repeat up to 3 total attempts -6. If still failing after 3 attempts, report the failure with full error output -- do not keep retrying blindly - -## Phase 3: Self-Review - -Before reporting back, review your own work with fresh eyes. Go through each checklist item honestly: - -**Completeness:** -- [ ] Did I implement EVERYTHING in the success criteria? -- [ ] Are there edge cases the criteria imply that I did not handle? -- [ ] Did I miss any requirements? - -**Purpose alignment:** -- [ ] Does my implementation actually deliver what the "Why This Unit Exists" section describes? -- [ ] Would a user achieve the stated outcome with this code? -- [ ] Did I build anything that doesn't trace back to the success criteria or user story? - -**Quality:** -- [ ] Do names accurately describe what things do (not how they work)? -- [ ] Did I reuse existing code where it already solved this problem cleanly? -- [ ] If I introduced a new abstraction, does it have a clear SOLID-based reason to exist? -- [ ] Did I avoid vague or abbreviated names in favor of explicit intent? -- [ ] Did the core business logic stay in the declared feature home unless the architecture handoff justified a shared/global extraction? -- [ ] Is the code clean and maintainable? -- [ ] Does it follow existing codebase patterns? -- [ ] Is error handling appropriate? - -**Discipline:** -- [ ] Did I avoid overbuilding (YAGNI)? -- [ ] Did I ONLY build what was requested? -- [ ] No "nice to have" additions? -- [ ] No unnecessary abstractions or premature optimization? - -**Testing:** -- [ ] Do tests verify actual behavior (not just mock behavior)? -- [ ] Are tests comprehensive against the success criteria? -- [ ] Did I run the test command and confirm it passes? - -**Evidence:** -- [ ] Can I show actual test output (not just "tests pass")? -- [ ] For UI changes, do I have a screenshot or visual evidence? -- [ ] For API changes, do I have actual request/response data? - -If you find issues during self-review, **fix them now** before reporting. Do not report known issues -- fix them first. - -## Phase 4: Report - -Return a structured execution report in exactly this format: - -```markdown -## Execution Report: [Unit Title] - -### Interpretation -[Your 2-3 sentence interpretation of what was asked] - -### Purpose Served -[Which user story aspect / success criterion this unit delivers, from the WHY context] - -### Assumptions Made -- [List each assumption, even if obvious] - -### What Was Implemented -[Describe what you built and how it works] - -### Files Changed -- `path/to/file` -- created/modified (brief description of change) - -### Test Results -- Command: `[test command]` -- Result: PASS/FAIL -- Attempts: [n] -- Output: -``` -[paste ACTUAL test output here] -``` - -### TDD Evidence -- **Red** - - Command: `[red command]` - - Result: PASS/FAIL - - Evidence: [why this proves the missing behavior existed before the implementation] -- **Green** - - Command: `[green command]` - - Result: PASS/FAIL - - Evidence: [why this proves the requested behavior now passes] -- **Post-Refactor Green** - - Command: `[post-refactor command]` - - Result: PASS/FAIL - - Evidence: [why this proves cleanup/refactor work preserved behavior] - -[If no cleanup was needed, still rerun and say so.] - -### Problems Encountered -[For each problem encountered during implementation:] -- **Error:** [exact error message] -- **Root cause:** [your analysis of why it happened] -- **Fix:** [what you did to resolve it] - -[If no problems: "None"] - -### Patterns Discovered -- [Naming conventions, architectural patterns, gotchas, or other learnings that would help future tasks] - -[If none: "None"] - -### Self-Review Findings -- [Issues found and fixed during self-review] - -[If none: "Self-review passed -- no issues found"] -``` - ---- - -## Standard Implementation Section - -_This section is included when TDD is not enabled._ - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. Implement the task following project conventions -4. Write tests matching the success criteria -5. Run the test command: `{{TEST_COMMAND}}` -6. If tests fail: analyze failure, fix, and retry (up to 3 internal attempts) - ---- - -## TDD Implementation Section - -_This section is included when `tdd_enabled: true` is configured._ - -Follow the red-green-refactor cycle strictly: - -1. Read referenced files and understand existing patterns -2. Search for existing helpers, types, classes, and utilities that may already solve the needed behavior -3. **RED:** Write tests FIRST based on the success criteria. Run them. They MUST fail -- and they must fail for the RIGHT reason (the behavior is missing, not import errors or syntax problems) -4. **GREEN:** Write the MINIMAL production code needed to make the tests pass. No more than what is necessary. -5. Run tests. They MUST pass. -6. **REFACTOR:** Clean up if needed. Tests must still pass after refactoring. - -**Iron rule:** If at any point you find yourself writing production code before a failing test exists for that behavior, STOP. Write the test first. This is not a suggestion -- it is the process. diff --git a/portable/compound-engineering/commands/workflows/references/ticket-execution-contract.md b/portable/compound-engineering/commands/workflows/references/ticket-execution-contract.md index 4c5440d..187a50e 100644 --- a/portable/compound-engineering/commands/workflows/references/ticket-execution-contract.md +++ b/portable/compound-engineering/commands/workflows/references/ticket-execution-contract.md @@ -21,14 +21,32 @@ Required files: `index.md` must include: +- frontmatter with: + - `plan_ref` + - `architecture_ref` or an explicit handoff note + - `execution_shape` + - `ticket_set_status` -- `ready | in_progress | blocked | completed` + - `last_completed_batch` + - `total_batches` - plan path - architecture artifact path or explicit handoff note - execution shape - ticket order -- dependency view +- dependency graph +- execution batches +- file-overlap safety notes for every multi-ticket batch - blocker summary - review summary split into `Blocking gaps` and `Recommendations` +Recommended body shape: + +1. `# Ticket Set: ` +2. `## Dependency Graph` +3. `## Execution Batches` +4. `## Ticket Table` +5. `## Blockers` +6. `## Review Summary` + ## Ticket file naming - Keep zero-padded numeric prefixes in execution order: `01-...`, `02-...`, `03-...` @@ -103,6 +121,15 @@ Use these ticket statuses: ## Execution consumption rules +When `/workflows:work` receives `index.md`: + +- treat the index as the authoritative ticket queue for this ticket set +- read `last_completed_batch` to choose the next unfinished batch +- load only the tickets named in that next batch +- execute multiple tickets together only when the batch is explicitly declared as parallel-safe and the index records why the file sets do not conflict +- if the index or ticket files leave overlap safety ambiguous, collapse that batch to sequential execution instead of guessing +- update batch progress in `index.md`, and increment `last_completed_batch` only after every ticket in the batch reaches `completed` + When `/workflows:work` receives a ticket file: - treat that ticket as one pre-scoped execution unit @@ -117,4 +144,6 @@ When `/workflows:review` or `ticket-flow-auditor` consumes ticket artifacts, ver - the ticket still matches the parent plan and architecture - the implementation stayed inside the ticket scope fence unless the change was explicitly documented - dependency order and status changes stayed honest +- the dependency graph and batch partition still match the ticket files +- parallel-safe batches really stay file-disjoint and race-safe - evidence matches the ticket's stated acceptance criteria and test command diff --git a/portable/compound-engineering/commands/workflows/references/ticketization-contract.md b/portable/compound-engineering/commands/workflows/references/ticketization-contract.md index 38de883..d5042e5 100644 --- a/portable/compound-engineering/commands/workflows/references/ticketization-contract.md +++ b/portable/compound-engineering/commands/workflows/references/ticketization-contract.md @@ -38,7 +38,7 @@ docs/tickets/YYYY-MM-DD-/ Required outputs: -- `index.md` -- ticket-set summary, dependency view, and run guidance +- `index.md` -- ticket-set summary, dependency graph, execution batches, run guidance, and progress pointer - `NN-.md` -- one file per ticket in execution order Record the ticket set back into the plan as: @@ -63,6 +63,30 @@ The first version is **local-artifact first**. - Split a packet when one ticket would deliver more than one meaningful outcome, blur ownership, or bury the feature home. - Keep the first ticket a tracer bullet when the selected execution shape is `vertical-slices`. +## Dependency graph and execution batches + +Ticketization must build a conservative ticket dependency graph and then derive execution batches from it. + +- Start from each ticket's explicit `depends_on` edges. +- Then partition tickets into `Batch 1`, `Batch 2`, and so on, where every ticket in a batch depends only on earlier batches. +- A batch may contain multiple tickets only when their declared `files` sets do not overlap and there is no unresolved shared mutable state, migration risk, or boundary ambiguity between them. +- If two tickets might race, overwrite each other, or require the same shared adapter/config surface, keep them in separate sequential batches. +- **Default-to-sequential rule:** if batch safety is uncertain, emit singleton batches instead of guessing at parallelism. + +The index file becomes the authoritative execution queue for ticketized work. `/workflows:work` should be able to read `index.md`, find the next unfinished batch, and execute only that batch without recomputing the whole backlog. + +## Required index progress state + +`index.md` must record batch progress explicitly so ticketized execution can resume from the index alone. + +Required fields: + +- `last_completed_batch` -- integer counter; `0` means no batches completed yet +- `total_batches` -- total number of execution batches in the ticket set +- batch-level status view showing which batches are pending, in progress, blocked, or completed + +`last_completed_batch` advances only after every ticket in that batch is complete. If any ticket in the batch is blocked or still running, do not advance the counter. + ## Required ticket-local context Every generated ticket must carry a compact execution packet that can stand on its own. @@ -116,6 +140,9 @@ That review must check: - feature-home clarity - shared/global boundary honesty - blocker and dependency correctness +- dependency graph correctness +- execution batch safety and parallelization honesty +- file-overlap conflicts between tickets claimed to be parallel-safe - context completeness - ticket size and coupling quality @@ -133,5 +160,7 @@ The ticketization contract is satisfied only when: - the priming skill and ticket schema contract are explicit - local-artifact-first behavior is explicit - `tickets_ref` recording is explicit +- the dependency graph and conservative batch partition are explicit +- the index progress pointer is explicit - each ticket's required local context is explicit - the final ticket-set review step and reviewer are explicit diff --git a/portable/compound-engineering/commands/workflows/to-issues.md b/portable/compound-engineering/commands/workflows/to-issues.md index 80bcec2..ba1bb7e 100644 --- a/portable/compound-engineering/commands/workflows/to-issues.md +++ b/portable/compound-engineering/commands/workflows/to-issues.md @@ -82,6 +82,8 @@ Rules: - size by coupling and boundary clarity, not by arbitrary task counts - keep tracer bullets first when the mode is `vertical-slices` - surface uncertainty instead of hiding it when ticketizing directly after `/workflows:plan` +- build a conservative dependency graph and execution batches while ticketizing +- default to sequential singleton batches whenever safe parallelism is unclear Each ticket must include the required ticket-local context defined in `ticketization-contract.md`, and each ticket file must follow the exact frontmatter/body shape in `ticket-execution-contract.md`. @@ -110,6 +112,15 @@ Required files: - `index.md` - one `NN-.md` file per ticket +`index.md` is not just a directory listing. It is the authoritative ticket-set graph and execution cursor. It must include the dependency graph, the conservative batch partition, file-overlap safety notes for every multi-ticket batch, and an updateable `last_completed_batch` counter that `/workflows:work` can use to resume from the next batch. + +When partitioning tickets into batches: + +- only group tickets together when all dependencies are satisfied by earlier batches +- only group tickets together when their declared `files` sets do not overlap +- treat shared mutable state, config churn, migrations, and boundary ambiguity as reasons to split the batch +- if unsure, split into sequential batches instead of inventing parallelism + Write every ticket using the exact schema from `ticket-execution-contract.md`, including its required frontmatter, section order, status lifecycle, and parent refs. Then record `tickets_ref` back into the plan frontmatter when possible. If frontmatter cannot be updated safely, add the ticket-set path under `## Related Artifacts`. @@ -133,6 +144,8 @@ Check for: - feature-home drift - shared/global drift - missing blockers or bad dependency ordering +- bad dependency graph layering or unsafe batch partitioning +- tickets grouped in parallel despite overlapping files or shared mutable surfaces - oversized tickets - tickets with weak WHY tracing - missing acceptance criteria or evidence commands @@ -152,6 +165,8 @@ A complete run must leave behind: - a local ticket set under `docs/tickets/` - `tickets_ref` or a labeled related-artifact link back into the plan - explicit blocker/dependency ordering +- a dependency graph plus conservative execution batches in `index.md` +- an updateable `last_completed_batch` progress pointer in `index.md` - compact ticket-local context packs - a final ticket-set review result from `ticket-flow-auditor` @@ -170,7 +185,7 @@ Execution readiness: - Recommendations: Recommended next step: -- Run `/workflows:work` on one ticket file once ticket-scoped execution is supported, or use the generated tickets as the scoped execution packet source for the next implementation pass. +- Run `/workflows:work` on the generated `index.md` so execution can pick the next safe batch automatically, or target one ticket file manually when you need to force a narrower run. ``` NEVER CODE! This phase shapes execution artifacts and context packets. It does not implement the feature itself. diff --git a/portable/compound-engineering/commands/workflows/work.md b/portable/compound-engineering/commands/workflows/work.md index 30b8e46..9532a4d 100644 --- a/portable/compound-engineering/commands/workflows/work.md +++ b/portable/compound-engineering/commands/workflows/work.md @@ -3,7 +3,7 @@ name: workflows:work description: >- Execute work plans while maintaining WHY tracing from problem narrative through user story to implementation. Grounds every subagent in purpose. -argument-hint: '[plan file, ticket file, specification, or todo file path] [--review-mode bulk|inline|both]' +argument-hint: '[plan file, ticket index, ticket file, specification, or todo file path] [--review-mode bulk|inline|both]' --- # Work Plan Execution Command @@ -14,9 +14,11 @@ Execute a work plan while maintaining WHY tracing from problem narrative through ## Introduction -This command takes a work document (plan, ticket, specification, or todo file) and executes it systematically using a **subagent orchestration model**. The orchestrator (this conversation) loads or adapts the source into execution units and delegates each unit to a focused subagent. `vertical-slices` is the default execution shape, but `infra-track` and `fix-batch` are also valid when declared by the plan. When the input is a ticket file, that ticket becomes the primary execution packet and the parent plan/architecture artifacts provide deeper context instead of re-expanding the whole backlog. Each subagent follows a standardized 4-phase protocol (understand, implement, self-review, report) defined in the execution agent prompt template. +This command takes a work document (plan, ticket index, ticket, specification, or todo file) and executes it systematically using a **subagent orchestration model**. The orchestrator (this conversation) loads or adapts the source into execution units and delegates each unit to a focused subagent. `vertical-slices` is the default execution shape, but `infra-track` and `fix-batch` are also valid when declared by the plan. When the input is a ticket index, that index becomes the authoritative execution queue and `/workflows:work` selects the next safe batch from it. When the input is a ticket file, that ticket becomes the primary execution packet and the parent plan/architecture artifacts provide deeper context instead of re-expanding the whole backlog. Every implementation unit, retry, and regression repair in this workflow is delegated through the named `execution-agent`, which follows a standardized 4-phase protocol (understand, implement, self-review, report). -**WHY-grounded execution:** Every subagent receives the source plan's WHY context -- the problem narrative, user story, architectural context, the architecture handoff contract, and which success criterion their specific unit serves. When execution starts from a ticket, the ticket's local context packet stays primary, while `plan_ref`, `tickets_ref`, and `architecture_ref` remain the deeper-dive path. This prevents implementation drift where technically correct code fails to deliver the user's actual need. The orchestrator is the guardian of WHY: it extracts purpose from the plan, threads it through every unit prompt, and validates that the combined output delivers the stated user story. +**WHY-grounded execution:** Every subagent receives the source plan's WHY context -- the problem narrative, user story, architectural context, the architecture handoff contract, and which success criterion their specific unit serves. When execution starts from a ticket index, the index decides the next batch while the selected tickets provide the local execution packets. When execution starts from a ticket file, the ticket's local context packet stays primary, while `plan_ref`, `tickets_ref`, and `architecture_ref` remain the deeper-dive path. This prevents implementation drift where technically correct code fails to deliver the user's actual need. The orchestrator is the guardian of WHY: it extracts purpose from the plan, threads it through every unit prompt, and validates that the combined output delivers the stated user story. + +**Execution delegation rule:** Ticket execution must always go through the bundled `execution-agent`. Do not route ticket implementation, ticket fix loops, or ticket regression repairs through `general-purpose` or any ad hoc worker prompt. ### Review Mode @@ -41,13 +43,19 @@ If no `--review-mode` is specified, check `compound-engineering.local.md` for a 1. **Read the work document and extract WHY + guardrail context** - Read the work document completely - - If the input is a ticket file, first load `commands/workflows/references/ticket-execution-contract.md` and verify the ticket includes the required frontmatter and body sections. Stop and send the user back to `/workflows:to-issues` if the ticket contract is missing or malformed. + - If the input is a ticket index or ticket file, first load `commands/workflows/references/ticket-execution-contract.md` and verify the artifact includes the required index or ticket contract. Stop and send the user back to `/workflows:to-issues` if the contract is missing or malformed. + - If the input is a ticket index, extract: + - `plan_ref`, `architecture_ref`, `execution_shape`, `ticket_set_status`, `last_completed_batch`, and `total_batches` + - the dependency graph and execution-batch table + - the next unfinished batch and the ticket files it names + - the file-overlap safety notes proving whether the batch is truly parallel-safe + - If the input is a ticket index, use the index as the source of truth for ordering and batch selection. Load only the ticket files named in the next batch before continuing. - If the input is a ticket file, extract: - `plan_ref`, `tickets_ref`, `architecture_ref`, and `source_packet_ref` - `feature_home`, `depends_on`, `dependency_type`, `files`, `test_command`, and `status` - the compact packet in `## Local Context` - the parent trace in `## Parent Refs` and `## Deeper-Dive Refs` - - If the input is a ticket file, load the parent plan and architecture artifact from the recorded refs before continuing. The ticket is the primary execution unit; the parent artifacts provide WHY and boundary context. + - If the input is a ticket index or ticket file, load the parent plan and architecture artifact from the recorded refs before continuing. The index chooses the batch; the ticket files remain the execution packets; the parent artifacts provide WHY and boundary context. - **Extract WHY artifacts** from the parent plan (these ground everything that follows): - **Problem Narrative** -- why this work exists, what pain it solves - **User Story** -- who benefits and what outcome they get @@ -62,11 +70,11 @@ If no `--review-mode` is specified, check `compound-engineering.local.md` for a - If the resolved contract weakens Ralph/unit+e2e without a justified exception in the plan, stop and ask for the plan contract to be corrected before execution - If `docs/constitution.md` exists, read it and extract the active constitution version, applicable principles, execution baselines, and approval rules. If the plan lists `constitution_waivers`, honor only those explicit exceptions. - If the parent plan has a `brainstorm_ref:` path, read that brainstorm document too for richer WHY context - - If the parent plan has an `architecture_ref:` path, the ticket has an `architecture_ref`, or `## Related Artifacts` points to `docs/architecture/`, read that artifact and extract feature homes, shared/global decisions, context tiers, drift checks, deletion tests, interfaces as test surfaces, seams, adapters, contracts, deepening candidates, and downstream work/review guidance + - If the parent plan has an `architecture_ref:` path, the selected ticket or index has an `architecture_ref`, or `## Related Artifacts` points to `docs/architecture/`, read that artifact and extract feature homes, shared/global decisions, context tiers, drift checks, deletion tests, interfaces as test surfaces, seams, adapters, contracts, deepening candidates, and downstream work/review guidance - If no architecture artifact is recorded, assemble an explicit architecture handoff contract from the parent plan's Architectural Context, Key Decisions, Constitution Alignment, brainstorm context, execution constraints, and `commands/workflows/references/vertical-slice-architecture.md`. Tell the user this is a fallback and recommend `/workflows:architecture` if boundaries are still unsettled. - Review any other references or links provided in the plan or ticket - If the constitution requires explicit approval for any part of the planned work (for example, risky writes, schema changes, auth changes, or scope expansions), surface that before execution starts - - If the document is not already in a declared execution shape and is not a valid ticket artifact, treat it as **legacy input**. Before spawning subagents, adapt it into execution units in STATE.md. If that adaptation materially changes scope or ordering, ask the user to approve the adapted unit backlog before proceeding. + - If the document is not already in a declared execution shape and is not a valid ticket index or ticket artifact, treat it as **legacy input**. Before spawning subagents, adapt it into execution units in STATE.md. If that adaptation materially changes scope or ordering, ask the user to approve the adapted unit backlog before proceeding. - If anything is unclear or ambiguous, ask clarifying questions now - Get user approval to proceed - **Do not skip this** - better to ask questions now than build the wrong thing @@ -117,6 +125,7 @@ If no `--review-mode` is specified, check `compound-engineering.local.md` for a 3. **Preview Unit Breakdown** - Mentally identify the major execution units from the source document + - If the input is a ticket index, preview the next batch from the index and whether it is sequential or parallel-safe - If the input is a ticket file, preview exactly one execution unit unless the user explicitly asks to re-split it - Note any questions about dependencies or scope - The formal unit decomposition happens in Phase 2 Step 4 (STATE.md), which is the persistent record of progress @@ -135,6 +144,7 @@ Before executing, validate four things: **structural readiness** (the selected e - **`vertical-slices`** -- slice type, serves, demo scenario, feature home, scope fence, files, success criteria, validation command, dependencies, dependency type - **`infra-track`** -- capability enabled, consumers / downstream work unlocked, scope, files, risk / rollback, success criteria, validation command, dependencies - **`fix-batch`** -- problem, repro / expected outcome, files, success criteria, validation command, dependencies +- **Ticket index input** -- valid `ticket-execution-contract.md` index contract, batch table, file-overlap safety notes, and progress pointer - **Ticket input** -- valid `ticket-execution-contract.md` frontmatter/body, parent refs, feature home, scope fence, acceptance criteria, test command, and compact local context - **Default rule** -- if `execution_shape` is missing, assume `vertical-slices` - **Anti-coercion rule** -- do not force infra or fix-batch work into slices if that would create fake verticality @@ -162,7 +172,7 @@ Before executing, validate four things: **structural readiness** (the selected e - **Success Criteria** -- present at plan level (not just unit level) - **Unit tracing** -- each execution unit has a purpose line connecting it to the user story or explicit enabling outcome -If the plan lacks structural details, the ticket lacks the ticket execution contract, or no architecture artifact / handoff contract can explain the boundaries, refuse to proceed and suggest running `/workflows:architecture` first if the boundaries are still fuzzy, then `/deepen-plan` or `/workflows:to-issues`, or manually repairing the execution packet. +If the plan lacks structural details, the ticket index or ticket lacks the ticket execution contract, or no architecture artifact / handoff contract can explain the boundaries, refuse to proceed and suggest running `/workflows:architecture` first if the boundaries are still fuzzy, then `/deepen-plan` or `/workflows:to-issues`, or manually repairing the execution packet. If the plan lacks the `tdd` block or `## TDD & Evidence Contract`, or if the resolved contract is ambiguous, refuse to proceed and suggest `/workflows:plan` or `/deepen-plan` to repair the execution contract before spawning subagents. @@ -174,7 +184,7 @@ If the plan lacks WHY artifacts, the orchestrator should **construct minimal WHY #### Step 2: Check for Resumable Session -Before creating a new session, check for existing incomplete sessions for the same plan or ticket: +Before creating a new session, check for existing incomplete sessions for the same plan, ticket index, or ticket: ```bash ls docs/execution-sessions/work-*/STATE.md 2>/dev/null @@ -201,8 +211,9 @@ Create a `STATE.md` file in the session directory: ```markdown --- -source_type: [plan | ticket | specification | todo] +source_type: [plan | ticket-index | ticket | specification | todo] plan_file: [path to plan] +ticket_index: [path to ticket index, if applicable] ticket_file: [path to ticket, if applicable] tickets_ref: [path to ticket index, if applicable] source_packet_ref: [plan packet ref or ticket packet ref] @@ -265,12 +276,13 @@ _No learnings yet._ The orchestrator parses the source artifact and creates a list of execution units. Each unit is a self-contained packet of work defined by the selected execution shape or the ticket contract. The orchestrator does the heavy lifting here: - **Prefer plan-defined units directly** -- if the plan already declares a coherent execution shape, execute those packets as written +- **Prefer index-defined batches directly** -- if the input is a valid ticket index, execute the next unresolved batch as written and do not re-batch it unless the index is missing safety evidence or the user explicitly approves a change - **Prefer ticket-defined unit directly** -- if the input is a valid ticket artifact, execute that one ticket as one unit and do not re-split it unless the user explicitly approves a change - **Adapt legacy phase/task plans into units before coding** -- do not execute raw task lists directly once the shape contract is available - **Break oversized units** into smaller units if needed (each unit should be completable in one subagent session) - **Preserve WHY tracing** -- when splitting a unit, each resulting unit inherits or refines the parent unit's purpose line. Never create an orphan unit with no connection to the user story. - **Identify file dependencies** between units -- **Determine parallelizable units** -- only units with non-overlapping file sets and compatible dependencies can run simultaneously +- **Determine parallelizable units** -- only units with non-overlapping file sets and compatible dependencies can run simultaneously. For ticket-index input, trust only the explicit batch partition recorded in `index.md`; if the index leaves overlap safety ambiguous, collapse the batch to sequential execution. - **Ensure each unit has clear success criteria** -- if the plan already defines them, use them directly; otherwise, the orchestrator must create them - **Map each unit to its purpose** -- record which success criterion or enabling outcome each unit delivers (this goes in STATE.md's "Serves / Unlocks" column) @@ -287,27 +299,33 @@ For each unit (or parallel batch of units), follow this cycle: ##### a. Build Scoped Prompt -For each unit, the orchestrator constructs a focused prompt by loading the **execution agent prompt template** from `commands/workflows/references/execution-agent-prompt.md` and filling in the context blocks. - -Apply the shared `Reference Template Loading` protocol from `commands/workflows/references/orchestration-protocol.md`, substituting `execution-agent-prompt.md`. - -- Quote the first non-empty line of the loaded template before continuing. -- Every execution, retry, fix, and regression-repair subagent in this workflow must start from a freshly loaded copy of that same template. -- Fill the placeholders from the loaded template only. Do not reconstruct the prompt from memory, paraphrase it into a shorter prompt, or drop mandatory sections. -- If the template cannot be loaded, quoted, or fully populated without unresolved `{{PLACEHOLDER}}` values, stop and resolve the missing context before spawning the subagent. - -- **{{UNIT_TITLE}}** and **{{UNIT_DESCRIPTION}}** -- from the plan packet or ticket artifact -- **{{UNIT_KIND}}** -- from the plan or ticket (`tracer-bullet`, `expansion`, `hardening`, `infra-packet`, `fix-item`, etc.) -- **{{OUTCOME_SCENARIO}}** -- the observable behavior or enabling outcome this unit proves -- **{{UNIT_SCOPE}}** -- what the unit owns and excludes -- **{{UNIT_SCOPE_FENCE}}** -- the boundary that keeps the unit thin -- **{{FILE_LIST}}** -- files to create/modify from the plan or ticket -- **{{SUCCESS_CRITERIA}}** -- checkboxes that define "done" -- **{{VALIDATION_COMMAND}}** -- how to verify the unit works -- **{{COMPLETED_DEPENDENCIES}}** -- list of already-completed units this depends on -- **{{PARENT_REFS}}** -- plan, ticket set, architecture, and source packet refs that anchor this unit -- **{{TICKET_LOCAL_CONTEXT}}** -- the ticket-local execution packet when the source is a ticket; otherwise a compact packet derived from the plan unit -- **{{WHY_CONTEXT}}** -- the purpose grounding block (constructed by orchestrator): +For each unit, the orchestrator constructs a focused prompt for the named `execution-agent`. + +Apply the shared `Named Agent Dispatch` protocol from `commands/workflows/references/orchestration-protocol.md`, substituting `execution-agent`. + +- Quote the first non-empty line of the loaded bundled agent template before continuing. +- Every execution, retry, fix, and regression-repair subagent in this workflow must start from a freshly loaded copy of that same agent template. +- Build `scoped_prompt` by injecting the full loaded `execution-agent` template plus the resolved context packet below. Do not summarize, abbreviate, or paraphrase the agent template. +- Apply the shared `Reference Template Loading` protocol from `commands/workflows/references/orchestration-protocol.md`, substituting `execution-agent-prompt.md`, and quote the first non-empty line of that scaffold before continuing. +- Also load `commands/workflows/references/execution-agent-prompt.md` as the scaffold for the context packet so the injected headings stay stable across retries and follow-up fixes. +- Fill the scaffold completely. Do not continue if any required section is missing or any `{{PLACEHOLDER}}` value is unresolved. + +- **`## Your Unit`** + - **{{UNIT_TITLE}}** and **{{UNIT_DESCRIPTION}}** -- from the plan packet or ticket artifact + - **{{UNIT_KIND}}** -- from the plan or ticket (`tracer-bullet`, `expansion`, `hardening`, `infra-packet`, `fix-item`, etc.) + - **{{OUTCOME_SCENARIO}}** -- the observable behavior or enabling outcome this unit proves + - **{{FEATURE_HOME}}** -- the primary feature home or owning module + - **{{UNIT_SCOPE}}** -- what the unit owns and excludes + - **{{UNIT_SCOPE_FENCE}}** -- the boundary that keeps the unit thin + - **{{FILE_LIST}}** -- files to create/modify from the plan or ticket + - **{{SUCCESS_CRITERIA}}** -- checkboxes that define "done" + - **{{VALIDATION_COMMAND}}** -- how to verify the unit works + - **{{COMPLETED_DEPENDENCIES}}** -- list of already-completed units this depends on + - **{{PARENT_REFS}}** -- plan, ticket set, architecture, and source packet refs that anchor this unit +- **`## Ticket-local context`** + - **{{TICKET_LOCAL_CONTEXT}}** -- the ticket-local execution packet when the source is a ticket; otherwise a compact packet derived from the plan unit +- **`## Why This Unit Exists`** + - **{{WHY_CONTEXT}}** -- the purpose grounding block (constructed by orchestrator): ``` ## Why This Unit Exists **Problem:** [problem narrative from plan -- 1-2 sentences] @@ -316,14 +334,18 @@ Apply the shared `Reference Template Loading` protocol from `commands/workflows/ **Overall success criteria:** [plan-level success criteria list] **Guardrails:** [relevant constitution principles, approval rules, and approved waivers] ``` -- **{{ARCHITECTURAL_CONTEXT}}** -- from the plan's Architectural Context section, filtered to what's relevant for this unit's files and domain -- **{{ARCHITECTURE_HANDOFF}}** -- from the `docs/architecture/` artifact or explicit plan-derived handoff contract; include deletion-test decisions, interfaces as test surfaces, seams, adapters, contracts, and downstream review guidance relevant to this unit -- **{{LEARNINGS_BRIEF}}** -- from previous units, filtered by domain relevance -- **{{PROJECT_CONVENTIONS}}** -- from CLAUDE.md plus relevant constitution baselines -- **{{TDD_CONTRACT}}** -- the resolved execution contract: effective mode, Ralph/default loop, required unit/e2e evidence, and any explicit exceptions -- **{{TDD_SECTION}}** -- if the resolved effective mode is Ralph-driven, include the Ralph/TDD Implementation Section from the template; otherwise include the Standard Implementation Section. Do not treat Ralph as an adjacent side command when it is the resolved default. - -The execution agent template instructs each subagent to follow a 4-phase protocol: +- **`## Architectural Context`** + - **{{ARCHITECTURAL_CONTEXT}}** -- from the plan's Architectural Context section, filtered to what's relevant for this unit's files and domain +- **`## Architecture Handoff`** + - **{{ARCHITECTURE_HANDOFF}}** -- from the `docs/architecture/` artifact or explicit plan-derived handoff contract; include deletion-test decisions, interfaces as test surfaces, seams, adapters, contracts, and downstream review guidance relevant to this unit +- **`## Learnings from Previous Units`** + - **{{LEARNINGS_BRIEF}}** -- from previous units, filtered by domain relevance +- **`## Project Conventions`** + - **{{PROJECT_CONVENTIONS}}** -- from CLAUDE.md plus relevant constitution baselines +- **`## TDD Execution Contract`** + - **{{TDD_CONTRACT}}** -- the resolved execution contract: effective mode, Ralph/default loop, required unit/e2e evidence, any explicit exceptions, and any fix/regression context that the retried unit must address + +The loaded `execution-agent` template instructs each subagent to follow a 4-phase protocol: 1. **Understand** -- review requirements, surface ambiguities, state assumptions before coding 2. **Implement** -- follow the resolved Ralph/default execution mode, retry on failure (up to 3 attempts) 3. **Self-review** -- check completeness, quality, discipline, testing, and evidence @@ -334,10 +356,10 @@ The execution agent template instructs each subagent to follow a 4-phase protoco Delegate the unit to a focused subagent: ``` -Task(general-purpose, prompt=scoped_prompt) +Task(execution-agent, prompt=scoped_prompt) ``` -The subagent prompt is constructed from the loaded execution agent template (`commands/workflows/references/execution-agent-prompt.md`). The template already includes instructions for the 4-phase protocol (understand, implement, self-review, report). The orchestrator fills in the context blocks and passes the result. Do not substitute a custom summary prompt for any execution worker: +The subagent prompt is constructed from the loaded bundled `execution-agent` template plus the fully injected context packet scaffold from `commands/workflows/references/execution-agent-prompt.md`. Do not substitute a custom summary prompt for any execution worker, and do not dispatch ticket implementation through `general-purpose`: 1. Read referenced files and understand existing patterns 2. Follow the resolved Ralph/default execution contract @@ -353,7 +375,7 @@ The subagent prompt is constructed from the loaded execution agent template (`co - Final test results (pass/fail) - Attempt count -**For parallel units**: Spawn multiple subagents simultaneously only when their file sets do not overlap and their dependency types allow parallel work. Before parallelizing, verify file sets do not overlap and no unit claims shared mutable state without an explicit guard. +**For parallel units**: Spawn multiple subagents simultaneously only when their file sets do not overlap and their dependency types allow parallel work. For ticket-index input, parallelize only when the selected batch is explicitly marked safe by the index's file-overlap notes. Before parallelizing, verify file sets do not overlap and no unit claims shared mutable state without an explicit guard. If there is doubt, execute sequentially. **Example scoped prompt:** @@ -483,7 +505,7 @@ If the `--review-mode` argument is `inline` or `both`, perform a two-stage inlin The spec reviewer should check not just checkbox compliance but whether the implementation actually delivers on the recorded purpose. A unit can pass all checkboxes but miss the intent. - If **PASS**: proceed to Stage 2 - - If **FAIL**: reload `execution-agent-prompt.md`, rebuild the scoped execution prompt with the review findings added as fix context, then spawn a new execution subagent. Re-run the spec reviewer afterward (max 2 fix-review cycles). If still failing after 2 cycles, log the issues and ask the user how to proceed. + - If **FAIL**: reload the bundled `execution-agent` template plus `execution-agent-prompt.md`, rebuild the scoped execution prompt with the review findings added as fix context, then spawn a new `execution-agent` run. Re-run the spec reviewer afterward (max 2 fix-review cycles). If still failing after 2 cycles, log the issues and ask the user how to proceed. **Stage 2: Code Quality Review** (only after spec compliance passes) @@ -497,7 +519,7 @@ If the `--review-mode` argument is `inline` or `both`, perform a two-stage inlin ``` - If **PASS**: proceed to next steps - - If **FAIL** with Critical issues: reload `execution-agent-prompt.md`, rebuild the scoped execution prompt with the quality findings added as fix context, spawn a fix subagent, then re-review (max 2 cycles) + - If **FAIL** with Critical issues: reload the bundled `execution-agent` template plus `execution-agent-prompt.md`, rebuild the scoped execution prompt with the quality findings added as fix context, spawn an `execution-agent` fix run, then re-review (max 2 cycles) - If **FAIL** with only Important/Minor issues: log them for the orchestrator's attention but proceed to next task (these will also be caught by `/workflows:review` if run later) **Note:** Inline review is a lightweight per-unit check. It does NOT replace the comprehensive `/workflows:review` multi-agent review. When `--review-mode both` is active, inline review runs per-unit AND `/workflows:review` runs after all units complete. @@ -508,12 +530,14 @@ If the `--review-mode` argument is `inline` or `both`, perform a two-stage inlin **5. Update source work artifact** -- keep the execution source honest: - plan input: check off completed items (`[ ]` to `[x]`) in the original plan document + - ticket-index input: update the selected batch status in `index.md`, keep the ticket table honest, and increment `last_completed_batch` only after every ticket in that batch is `completed` - ticket input: update the ticket `status` field (`ready` -> `completed` or `blocked`) and preserve parent refs + - if both an index and ticket files can be updated safely, update both without inventing new status fields - if both a ticket and plan backlog can be updated safely, update both without inventing new status fields **6. Regression guard** -- run test commands from ALL previously completed tasks. If any regress: - Log the regression in the current task's session file - - Reload `execution-agent-prompt.md`, rebuild the scoped execution prompt with context about what broke and why, and spawn a fix subagent + - Reload the bundled `execution-agent` template plus `execution-agent-prompt.md`, rebuild the scoped execution prompt with context about what broke and why, and spawn an `execution-agent` fix run - Do not proceed to the next task until the regression is fixed **7. Incremental commit** if appropriate (logical unit complete, tests pass): diff --git a/portable/compound-engineering/plugin.yaml b/portable/compound-engineering/plugin.yaml index 4c40f79..caaaf04 100644 --- a/portable/compound-engineering/plugin.yaml +++ b/portable/compound-engineering/plugin.yaml @@ -1,5 +1,5 @@ name: compound-engineering -version: 4.11.0 +version: 4.13.0 description: lead: OpenCode-first AI-powered development tools. suffix: spanning code review, research, design, and workflow automation, with generated Copilot support and Claude Code compatibility outputs. diff --git a/portable/compound-engineering/skills/grill-me/SKILL.md b/portable/compound-engineering/skills/grill-me/SKILL.md index 5ca5126..4d8d1b5 100644 --- a/portable/compound-engineering/skills/grill-me/SKILL.md +++ b/portable/compound-engineering/skills/grill-me/SKILL.md @@ -1,6 +1,6 @@ --- name: grill-with-docs -description: Grilling session that challenges your plan against the existing domain model, sharpens terminology, and updates documentation (CONTEXT.md, ADRs) inline as decisions crystallise. Use when user wants to stress-test a plan against their project's language and documented decisions. +description: Grilling session that challenges your plan against the existing domain model, sharpens terminology, and updates documentation (CONTEXT.md, brainstorm docs, plan docs, ADRs) inline as decisions crystallise. Use when user wants to stress-test a plan against their project's language and documented decisions. --- @@ -17,26 +17,39 @@ If a question can be answered by exploring the codebase, explore the codebase in ## Domain awareness -During codebase exploration, also look for existing documentation: +During codebase exploration, also look for existing documentation, especially the active feature artifact for the current discussion. ### File structure -Most repos have a single CONSTITUTION.md spec driven driver, a CONTENT.md file for cementing shared language and some architecture docs per feature implemented: +Most repos have a repo-wide constitution, a glossary-oriented `CONTEXT.md`, and feature documents under `docs/`: ``` / ├── CONSTITUTION.md ├── CONTEXT.md ├── docs/ +│ ├── brainstorms/ +│ │ └── 2026-04-30-checkout-race-brainstorm.md +│ ├── plans/ +│ │ └── 2026-05-01-fix-checkout-race-plan.md │ └── architecture/ │ ├── 2026-04-30-nucleus-stage-1-architecture.md └── src/ ``` -Create files lazily — only when you have something to write.If no CONTEXT.md exists, create one when the first term is resolved. If no `CONSTITUTION.md` exists, advise the user to create one using the workflows-constitution command using the context from this session. +Create files lazily -- only when you have something to write. If no `CONTEXT.md` exists, create one when the first term is resolved. If no `CONSTITUTION.md` exists, advise the user to create one using the workflows-constitution command using the context from this session. ## During the session +### Choose the right documentation sink + +Before grilling, decide where concrete decisions belong: + +1. If a plan file exists for the current feature, or the session is clearly continuing plan work, the plan file is the implementation-decision sink. +2. Otherwise, if a brainstorm document exists for the current feature, or the session is clearly continuing brainstorm work, the brainstorm document is the implementation-decision sink. +3. `CONTEXT.md` is only for canonical domain language. ADRs remain for cross-feature decisions that deserve a durable architectural record. +4. If neither a plan nor a brainstorm artifact exists, do not invent one just for this skill unless the user explicitly asks for it. + ### Challenge against the glossary When the user uses a term that conflicts with the existing language in `CONTEXT.md`, call it out immediately. "Your glossary defines 'cancellation' as X, but you seem to mean Y — which is it?" @@ -57,6 +70,16 @@ When the user states how something works, check whether the code agrees. If you When a term is resolved, update `CONTEXT.md` right there. Don't batch these up — capture them as they happen. Use the format in [CONTEXT-FORMAT.md](./CONTEXT-FORMAT.md). +### Update the active feature doc inline + +After each question is answered with concrete implementation, architecture, data-shape, API, dependency, boundary, rollout, or operational detail, immediately write it into the active feature doc. Do not wait until the end of the session, and do not leave the decision only in chat history. + +Prefer updating the most specific existing section over inventing a catch-all notes bucket: + +- **Brainstorm doc:** update `## Chosen Approach`, `## Key Decisions`, `## Architectural Context`, and move answered items into `## Resolved Questions`. +- **Plan doc:** update `## Implementation` or `## Overview`, `## Technical Considerations`, `## Architectural Context`, `## Success Criteria`, and the relevant execution slice, acceptance criteria, or file list when the answer changes execution shape. +- If a new answer supersedes earlier wording, edit the earlier section in place so the document stays coherent. + `CONTEXT.md` should be totally devoid of implementation details. Do not treat `CONTEXT.md` as a spec, a scratch pad, or a repository for implementation decisions. It is a glossary and nothing else. diff --git a/tests/brownfield-maintenance-command.test.ts b/tests/brownfield-maintenance-command.test.ts index 6f8a43c..aeb4e83 100644 --- a/tests/brownfield-maintenance-command.test.ts +++ b/tests/brownfield-maintenance-command.test.ts @@ -40,7 +40,7 @@ describe("brownfield maintenance command", () => { expect(rootReadme).toContain("/brownfield-maintenance") expect(pluginReadme).toContain("/brownfield-maintenance") - expect(pluginReadme).toContain("Includes 33 specialized agents, 28 commands, and 26 skills.") + expect(pluginReadme).toContain("Includes 34 specialized agents, 28 commands, and 26 skills.") expect(changelog).toContain("**`/brownfield-maintenance` command**") }) }) diff --git a/tests/grill-with-docs-skill.test.ts b/tests/grill-with-docs-skill.test.ts new file mode 100644 index 0000000..f89b1fb --- /dev/null +++ b/tests/grill-with-docs-skill.test.ts @@ -0,0 +1,19 @@ +import { describe, expect, test } from "bun:test" +import { promises as fs } from "fs" +import path from "path" + +const repoRoot = path.join(import.meta.dir, "..") +const skillPath = path.join(repoRoot, "portable", "compound-engineering", "skills", "grill-me", "SKILL.md") + +describe("grill-with-docs skill", () => { + test("routes implementation details into the active feature artifact inline", async () => { + const content = await fs.readFile(skillPath, "utf8") + + expect(content).toContain("the plan file is the implementation-decision sink") + expect(content).toContain("the brainstorm document is the implementation-decision sink") + expect(content).toContain("immediately write it into the active feature doc") + expect(content).toContain("Do not wait until the end of the session") + expect(content).toContain("CONTEXT.md` is only for canonical domain language") + expect(content).toContain("`CONTEXT.md` should be totally devoid of implementation details") + }) +}) diff --git a/tests/tdd-contract.test.ts b/tests/tdd-contract.test.ts index 905b5c8..ed20941 100644 --- a/tests/tdd-contract.test.ts +++ b/tests/tdd-contract.test.ts @@ -92,6 +92,23 @@ describe("TDD contract surfaces", () => { expect(executionPrompt).toContain("If no cleanup was needed, still rerun and say so") }) + test("execution agent carries explicit clean-code implementation guardrails", async () => { + const executionAgent = await readRepoFile( + "portable", + "compound-engineering", + "agents", + "workflow", + "execution-agent.md", + ) + + expect(executionAgent).toContain("## Clean-code operating rules") + expect(executionAgent).toContain("Apply DRY by reason to change") + expect(executionAgent).toContain("Apply SOLID deliberately") + expect(executionAgent).toContain("Add doc blocks or docstrings above public or exported functions") + expect(executionAgent).toContain("Keep imports at the top of the file") + expect(executionAgent).toContain("Fail explicitly") + }) + test("review prompts reject weak evidence and separate behavior coverage from cleanup quality", async () => { const reviewPrompt = await readRepoFile( "portable", diff --git a/tests/ticket-flow-review-contract.test.ts b/tests/ticket-flow-review-contract.test.ts index d333b20..88bc172 100644 --- a/tests/ticket-flow-review-contract.test.ts +++ b/tests/ticket-flow-review-contract.test.ts @@ -34,6 +34,8 @@ describe("ticket flow review contract", () => { expect(agent).toContain("## Workflow") expect(agent).toContain("ticket-set audit") expect(agent).toContain("implementation audit") + expect(agent).toContain("execution-batch partitioning") + expect(agent).toContain("Batch safety notes") expect(reviewPrompt).toContain("docs/tickets/*/index.md") expect(reviewPrompt).toContain("`docs/tickets/**/*.md`") expect(reviewPrompt).toContain("Ticket Set:") diff --git a/tests/ticket-scoped-work-execution.test.ts b/tests/ticket-scoped-work-execution.test.ts index af0f66b..5d51567 100644 --- a/tests/ticket-scoped-work-execution.test.ts +++ b/tests/ticket-scoped-work-execution.test.ts @@ -9,7 +9,7 @@ async function readRepoFile(...segments: string[]): Promise { } describe("ticket-scoped work execution", () => { - test("work can execute a ticket artifact without reloading the whole backlog", async () => { + test("work can execute a ticket index or ticket artifact without reloading the whole backlog", async () => { const workPrompt = await readRepoFile( "portable", "compound-engineering", @@ -25,18 +25,37 @@ describe("ticket-scoped work execution", () => { "references", "execution-agent-prompt.md", ) + const executionAgent = await readRepoFile( + "portable", + "compound-engineering", + "agents", + "workflow", + "execution-agent.md", + ) - expect(workPrompt).toContain("[plan file, ticket file, specification, or todo file path]") + expect(workPrompt).toContain("[plan file, ticket index, ticket file, specification, or todo file path]") expect(workPrompt).toContain("ticket-execution-contract.md") - expect(workPrompt).toContain("The ticket is the primary execution unit") - expect(workPrompt).toContain("source_type: [plan | ticket | specification | todo]") + expect(workPrompt).toContain("When the input is a ticket index, that index becomes the authoritative execution queue") + expect(workPrompt).toContain("that ticket becomes the primary execution packet") + expect(workPrompt).toContain("Ticket execution must always go through the bundled `execution-agent`") + expect(workPrompt).toContain("source_type: [plan | ticket-index | ticket | specification | todo]") + expect(workPrompt).toContain("ticket_index: [path to ticket index, if applicable]") expect(workPrompt).toContain("ticket_file: [path to ticket, if applicable]") + expect(workPrompt).toContain("last_completed_batch") + expect(workPrompt).toContain("execute the next unresolved batch as written") expect(workPrompt).toContain("Prefer ticket-defined unit directly") + expect(workPrompt).toContain("Named Agent Dispatch") + expect(workPrompt).toContain("Task(execution-agent, prompt=scoped_prompt)") expect(workPrompt).toContain("{{PARENT_REFS}}") expect(workPrompt).toContain("{{TICKET_LOCAL_CONTEXT}}") + expect(workPrompt).toContain("ticket-index input: update the selected batch status in `index.md`") expect(workPrompt).toContain("update the ticket `status` field") expect(executionPrompt).toContain("**Parent refs:** {{PARENT_REFS}}") expect(executionPrompt).toContain("## Ticket-local context") expect(executionPrompt).toContain("{{TICKET_LOCAL_CONTEXT}}") + expect(executionPrompt).toContain("named `execution-agent`") + expect(executionAgent).toContain("name: execution-agent") + expect(executionAgent).toContain("doc blocks or docstrings above public or exported functions") + expect(executionAgent).toContain("Keep imports at the top of the file") }) }) diff --git a/tests/ticketization-workflow-contract.test.ts b/tests/ticketization-workflow-contract.test.ts index b8ff516..818a546 100644 --- a/tests/ticketization-workflow-contract.test.ts +++ b/tests/ticketization-workflow-contract.test.ts @@ -42,6 +42,9 @@ describe("ticketization workflow contract", () => { expect(contract).toContain("docs/tickets/YYYY-MM-DD-/") expect(contract).toContain("tickets_ref:") expect(contract).toContain("local-artifact first") + expect(contract).toContain("## Dependency graph and execution batches") + expect(contract).toContain("Default-to-sequential rule") + expect(contract).toContain("last_completed_batch") expect(contract).toContain("## Required ticket-local context") expect(contract).toContain("## Final ticket-set review") expect(contract).toContain("ticket-flow-auditor") @@ -49,6 +52,8 @@ describe("ticketization workflow contract", () => { expect(executionContract).toContain("ticket_id:") expect(executionContract).toContain("## Required ticket body") expect(executionContract).toContain("status: ready") + expect(executionContract).toContain("last_completed_batch") + expect(executionContract).toContain("## Execution Batches") expect(primingSkill).toContain("# Focused Ticket Priming") expect(primingSkill).toContain("ticket-execution-contract.md") expect(primingSkill).toContain("Use when converting plans into local") @@ -77,6 +82,8 @@ describe("ticketization workflow contract", () => { expect(command).toContain("docs/tickets/YYYY-MM-DD-/") expect(command).toContain("ticket-flow-auditor") expect(command).toContain("Run the final ticket-set review sweep") + expect(command).toContain("dependency graph") + expect(command).toContain("last_completed_batch") }) test("documents the new workflow step and shipped command surface", async () => { @@ -94,7 +101,7 @@ describe("ticketization workflow contract", () => { expect(rootReadme).toContain("plan -> architecture -> deepen-plan -> to-issues -> work") expect(rootReadme).toContain("`/workflows:to-issues`") expect(rootReadme).toContain("docs/tickets/") - expect(pluginReadme).toContain("Includes 33 specialized agents, 28 commands, and 26 skills.") + expect(pluginReadme).toContain("Includes 34 specialized agents, 28 commands, and 26 skills.") expect(pluginReadme).toContain("| Commands | 28 |") expect(pluginReadme).toContain("`/workflows:to-issues`") expect(planPrompt).toContain("Run `/workflows:to-issues`")