From 5fd526bb46e67be86203a9148e091a18fd412a2c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 21 Apr 2026 04:23:03 +0000 Subject: [PATCH 1/2] feat(issue): resolve #2 (01) --- .ai/prompts/harden.md | 84 +++ .ai/scafld/OPERATORS.md | 131 +++++ .ai/scafld/README.md | 72 +++ .ai/scafld/config.yaml | 316 +++++++++++ .ai/scafld/manifest.json | 49 ++ .ai/scafld/prompts/exec.md | 307 +++++++++++ .ai/scafld/prompts/harden.md | 84 +++ .ai/scafld/prompts/plan.md | 203 +++++++ .ai/scafld/prompts/review.md | 169 ++++++ .ai/scafld/schemas/spec.json | 514 ++++++++++++++++++ .ai/scafld/specs/README.md | 99 ++++ .../specs/examples/add-error-codes.yaml | 365 +++++++++++++ .ai/specs/archive/2026-04/issue-2.yaml | 114 ++++ docs/flows.md | 5 +- 14 files changed, 2510 insertions(+), 2 deletions(-) create mode 100644 .ai/prompts/harden.md create mode 100644 .ai/scafld/OPERATORS.md create mode 100644 .ai/scafld/README.md create mode 100644 .ai/scafld/config.yaml create mode 100644 .ai/scafld/manifest.json create mode 100644 .ai/scafld/prompts/exec.md create mode 100644 .ai/scafld/prompts/harden.md create mode 100644 .ai/scafld/prompts/plan.md create mode 100644 .ai/scafld/prompts/review.md create mode 100644 .ai/scafld/schemas/spec.json create mode 100644 .ai/scafld/specs/README.md create mode 100644 .ai/scafld/specs/examples/add-error-codes.yaml create mode 100644 .ai/specs/archive/2026-04/issue-2.yaml diff --git a/.ai/prompts/harden.md b/.ai/prompts/harden.md new file mode 100644 index 0000000..ed6041c --- /dev/null +++ b/.ai/prompts/harden.md @@ -0,0 +1,84 @@ +# AI AGENT — HARDEN MODE + +**Status:** ACTIVE +**Mode:** HARDEN +**Output:** Append a round to `harden_rounds` in the spec; update `harden_status`. +**Do NOT:** Modify code outside the spec file while hardening. + +--- + +## Mission + +Interrogate the draft spec relentlessly until the operator and agent reach shared understanding. Walk down each branch of the design tree, resolving dependencies between decisions one-by-one — upstream choices first, so downstream questions are not wasted on premises that may shift. Stop when the operator says so, or when you run out of grounded questions. + +Harden is OPTIONAL and operator-driven. `scafld approve` does NOT gate on harden status. The operator runs `scafld harden ` when they want to stress-test a draft; they can skip it for trivial or well-understood specs. + +--- + +## Grounding Contract (load-bearing — read carefully) + +**Every question you emit MUST carry a `grounded_in` value matching EXACTLY ONE of these three patterns:** + +- `spec_gap:` — a TODO, `?`, empty array, vague clause, or internal contradiction at the named spec field. Example: `spec_gap:task.context.files_impacted`. +- `code::` — a symbol or location verified by `Read` or `Grep` in the CURRENT session before the question is emitted. You must actually look at the file. Example: `code:cli/scafld:1152`. +- `archive:` — a precedent in `.ai/specs/archive/` that bears on the current decision. Example: `archive:configurable-review-pipeline`. + +**Forbidden:** + +- Questions about behaviour the spec already answers. +- Citations to files you have not verified in this session. +- Recommended answers without their own citation. +- Invented file paths, function names, or archive task_ids. + +If you cannot produce a grounded question, stop. Do not invent one to pad the round. + +--- + +## Question Loop + +Ask ONE question at a time. For each question, provide: + +- The question itself (specific, answerable). +- `grounded_in` using one of the three patterns above. +- A **recommended answer** with its own citation (code, spec section, or archive). +- An `if_unanswered` default — what to write into the spec if the operator does not answer. This lets the loop terminate on a single side. + +Cap at `max_questions_per_round` from `.ai/config.yaml` (default 8). If you reach the cap without resolving the tree, stop and let the operator decide whether to start another round. + +Dependency ordering: before asking a downstream question, confirm its upstream premise is settled. If you ask "how does phase 3 validate X" before confirming "does the spec actually do X in phase 2", you are wasting the round. + +--- + +## Termination + +The loop ends when ANY of these happens: + +- Operator types `done` or `stop`. +- You run out of grounded questions (your three patterns are exhausted). +- You hit `max_questions_per_round`. + +There is no in-prompt `skip` keyword. If the operator does not want to harden, they simply do not run `scafld harden`. + +--- + +## Output Contract + +Write your round into the spec's `harden_rounds` array using the schema in `.ai/schemas/spec.json`. Each round: + +```yaml +harden_rounds: + - round: 1 + started_at: "2026-04-20T15:00:00Z" + ended_at: "2026-04-20T15:12:00Z" + outcome: "in_progress" # or passed, abandoned + questions: + - question: "Which module owns the session cleanup hook?" + grounded_in: "code:src/auth/session.ts:84" + recommended_answer: "src/auth/session.ts:cleanupSession (already defined)" + if_unanswered: "Default to existing cleanupSession; flag for confirmation." + answered_with: "(operator fills in)" +``` + +While the loop runs, set top-level `harden_status: "in_progress"`. The operator finalises a satisfactory round by running `scafld harden --mark-passed` — do NOT set `harden_status: passed` from the prompt loop. + +Re-running `scafld harden` on a spec that is already `passed` resets status to `in_progress` and appends a new round; prior rounds are preserved as audit trail. diff --git a/.ai/scafld/OPERATORS.md b/.ai/scafld/OPERATORS.md new file mode 100644 index 0000000..5a09f54 --- /dev/null +++ b/.ai/scafld/OPERATORS.md @@ -0,0 +1,131 @@ +# scafld — Operator Cheat Sheet + +A short, human-friendly guide for working with scafld task specs. +For full details, see `.ai/README.md` and `.ai/specs/README.md`. + +--- + +## 1. Tiny Change (Micro/Small, Low Risk) + +Use this for trivial, low-risk edits (comments, copy tweaks, tiny refactors). + +- In the spec: + - `task.size: "micro"` or `"small"` + - `task.risk_level: "low"` + - Optionally set `task.acceptance.validation_profile: "light"` +- Workflow: + - Plan: generate/update spec under `.ai/specs/drafts/` + - Approve: move to `.ai/specs/approved/` and set `status: "approved"` + - Execute: move to `.ai/specs/active/` and set `status: "in_progress"` + - Complete: move to `.ai/specs/archive/YYYY-MM/` and set `status: "completed"` + +--- + +## 2. Normal Task (Small/Medium, Medium Risk) + +Use this for typical feature work and non-trivial refactors. + +- In the spec: + - `task.size: "small"` or `"medium"` + - `task.risk_level: "medium"` + - Usually `task.acceptance.validation_profile: "standard"` +- Workflow: + - Plan: ensure `task.acceptance.definition_of_done` and `phases[*].acceptance_criteria` tell the same story. + - Approve: move to approved folder + - Execute: run all `acceptance_criteria` plus per-phase validation + - Complete: run full standard profile validation before archiving + +--- + +## 3. Big Change (Medium/Large, High Risk) + +Use this for high-impact work (auth, persistence, complex refactors). + +- In the spec: + - `task.size: "medium"` or `"large"` + - `task.risk_level: "high"` + - Usually `task.acceptance.validation_profile: "strict"` +- Workflow: + - Plan: + - Explicitly call out invariants and risks + - Use multiple phases with narrow scopes and strong acceptance criteria + - Approve: move to approved folder + - Execute: run all per-phase checks plus full `strict` profile + - Complete: thorough validation before archiving + +--- + +## 4. Quick Commands Reference + +```bash +scafld new my-task -t "My feature" -s small -r low # scaffold spec +scafld list # show all specs +scafld list active # filter by status +scafld status my-task # show details + phase progress +scafld validate my-task # check against schema +scafld harden my-task # optional: interrogate draft one grounded question at a time +scafld harden my-task --mark-passed # close the latest hardening round +scafld approve my-task # drafts/ -> approved/ (does not require harden) +scafld start my-task # approved/ -> active/ +scafld exec my-task # run acceptance criteria, record results +scafld exec my-task -p phase1 # run criteria for one phase only +scafld audit my-task # compare spec files vs git diff +scafld audit my-task -b main # audit against specific base ref +scafld diff my-task # show git history for spec +scafld review my-task # run configured automated passes + scaffold Review Artifact v3 +scafld complete my-task # read review, record verdict, archive (requires review) +scafld complete my-task --human-reviewed --reason "manual audit" # exceptional audited override when the review gate is blocked +scafld fail my-task # active/ -> archive/ (failed) +scafld cancel my-task # active/ -> archive/ (cancelled) +scafld report # aggregate stats across all specs +``` + +--- + +## 5. Validation Profiles + +| Profile | When to Use | What Runs | +|---------|-------------|-----------| +| `light` | micro/small, low risk | compile, acceptance items, perf eval | +| `standard` | small/medium, medium risk | compile, tests, lint, typecheck, security, perf eval | +| `strict` | medium/large, high risk | all standard checks + broader coverage | + +--- + +## 6. Status Lifecycle + +``` +draft → under_review → approved → in_progress → review → completed + ↓ ↓ + (blocked) failed + ↓ ↑ + (resume) fix + re-review +``` + +--- + +## 7. Review & Completion Workflow + +After execution, before completing: + +```bash +scafld review my-task # runs automated passes, scaffolds adversarial review + # reviewer fills in findings + Review Artifact v3 metadata in .ai/reviews/my-task.md +scafld complete my-task # reads review, records verdict, archives + # refuses if the latest review round is missing, malformed, incomplete, or failed +scafld complete my-task --human-reviewed --reason "manual audit" + # exceptional audited override; requires interactive confirmation +``` + +Review rounds accumulate — each `scafld review` appends a numbered Review Artifact v3 section with per-pass `pass_results`. The default five-layer pipeline is `spec_compliance`, `scope_drift`, `regression_hunt`, `convention_check`, and `dark_patterns`, ordered by explicit `order` fields in `.ai/config.yaml`. Prior rounds provide context for subsequent reviewers and make review provenance visible. + +--- + +## 8. Tips + +- **Always read the spec before executing** — understand what you're building +- **Keep phases small** — easier to validate and rollback +- **Run `scafld review` before completing** — the adversarial review catches what acceptance criteria miss +- **Review in a fresh session when possible** — avoids confirmation bias from the execution session +- **Self-eval honestly** — the 7/10 threshold keeps quality high; 10/10 requires justification +- **Archive completed specs** — they're your project history diff --git a/.ai/scafld/README.md b/.ai/scafld/README.md new file mode 100644 index 0000000..f29b1b5 --- /dev/null +++ b/.ai/scafld/README.md @@ -0,0 +1,72 @@ +# scafld - Planning & Execution Framework + +**Version:** 1.0 + +scafld is a spec-driven framework for AI agent task planning and execution. Every task becomes a machine-readable YAML specification that flows through a defined lifecycle: plan, approve, execute, archive. + +--- + +## How It Works + +1. **Plan:** AI generates a task spec in `.ai/specs/drafts/` via conversational ReAct loop +2. **Harden (optional):** `scafld harden ` interrogates the draft one grounded question at a time. Every question and recommended answer cites a spec gap, a verified code location, or an archived precedent. Run on high-risk or ambiguous specs; skip on trivial ones. +3. **Approve:** Developer reviews and moves spec to `.ai/specs/approved/`. Approve does NOT consult harden status. +4. **Execute:** AI picks up the approved spec, executes phases, validates at each checkpoint +5. **Review:** Adversarial review finds what execution missed — `scafld review` runs the configured `spec_compliance` and `scope_drift` checks, scaffolds Review Artifact v3, and prepares the adversarial `regression_hunt`, `convention_check`, and `dark_patterns` passes in the latest round +6. **Archive:** Completed specs move to `.ai/specs/archive/YYYY-MM/` with truthful review results recorded, or a human-reviewed override audited explicitly when the gate is blocked + +The approval gate is the human oversight boundary. The review gate is the quality boundary. During execution, the agent operates autonomously through all phases, pausing only when blocked or deviating from the spec. A normal completion path still stays agent-driven; the human-reviewed override is an exceptional audited escape hatch, not the default workflow. + +The default review topology lives in `config.yaml` and uses five ordered built-in passes: `spec_compliance`, `scope_drift`, `regression_hunt`, `convention_check`, and `dark_patterns`. Review Artifact v3 stores per-pass `pass_results`, reviewer provenance, and round status for that configured topology. + +--- + +## Directory Structure + +``` +.ai/ +├── README.md # This file +├── config.yaml # Global configuration (invariants, validation, rubric) +├── prompts/ +│ ├── plan.md # Planning mode instructions +│ ├── exec.md # Execution mode instructions +│ └── review.md # Adversarial review mode instructions +├── reviews/ # Review findings per spec (gitignored) +├── schemas/ +│ └── spec.json # JSON schema for task specifications +├── specs/ # Task specs organized by lifecycle status +│ ├── README.md # Spec workflow and naming conventions +│ ├── drafts/ # status: draft | under_review +│ ├── approved/ # status: approved +│ ├── active/ # status: in_progress +│ └── archive/YYYY-MM/ # status: completed | failed | cancelled +├── playbooks/ # Reusable workflow templates (optional) +└── logs/ # Execution logs (optional, supplementary) +``` + +--- + +## Key Files + +| File | Purpose | +|------|---------| +| `config.yaml` | Invariants, validation profiles, rubric weights, safety rules | +| `prompts/plan.md` | System prompt for planning mode agents | +| `prompts/exec.md` | System prompt for execution mode agents | +| `prompts/review.md` | System prompt for adversarial review mode | +| `schemas/spec.json` | JSON schema for spec validation | +| `specs/README.md` | Spec directory structure, naming, and workflow | + +--- + +## Related Docs + +- [AGENTS.md](../AGENTS.md) - High-level AI agent policies +- [OPERATORS.md](OPERATORS.md) - Human-facing cheat sheet for working with specs +- [CONVENTIONS.md](../CONVENTIONS.md) - Coding standards and patterns + +--- + +## License + +MIT License - Free to use, modify, and distribute. diff --git a/.ai/scafld/config.yaml b/.ai/scafld/config.yaml new file mode 100644 index 0000000..e4124b1 --- /dev/null +++ b/.ai/scafld/config.yaml @@ -0,0 +1,316 @@ +# scafld Configuration +# Version: 1.1 +# Purpose: Machine-readable control file for AI coding agents + +version: "1.0" + +# Status lifecycle: See specs/README.md for canonical state machine and transitions + +# ============================================================================= +# INVARIANTS (immutable during session) +# ============================================================================= +invariants: + # CUSTOMIZE: Replace these with your project's architectural invariants. + # These names are referenced in specs (context.invariants) and enforced during execution. + canonical: + - domain_boundaries # Respect layer separation + - error_envelope # Consistent error format + - no_legacy_code # No dual-reads, dual-writes, or runtime fallbacks + - no_test_logic_in_production # Keep test-only code in test files + - public_api_stable # Public APIs require approval to change + - config_from_env # Configuration from environment, never hardcoded + + # Code quality policies + no_legacy_code: true + no_test_logic_in_production: true + + # Change control + public_api_changes: require_approval # schemas, migrations, HTTP/event shapes + + # See also: ../CONVENTIONS.md for detailed coding standards + +# ============================================================================= +# MODES (planning vs execution) +# ============================================================================= +modes: + planning: + # Output: generate .ai/specs/{task-id}.yaml + output_format: spec_file + + # Requirements for a valid plan + require_task_outline: true + require_touchpoints: true + require_acceptance_checklist: true + + # Quality gate + self_eval_threshold: 7 + + # ReAct behavior + exploration_depth: thorough + show_reasoning: true + + execution: + # Input: load approved .ai/specs/{task-id}.yaml + input_format: spec_file + require_approval: true + + # Checkpoint frequency + checkpoint_frequency: per_phase + + # Quality controls + self_review: mandatory + rollback_on_fail: true + strict_spec_adherence: true + + # Output style + progress_format: concise + show_reasoning: true + +# ============================================================================= +# HARDEN +# ============================================================================= +# Optional pre-approval interrogation phase. Operator-driven: `scafld approve` +# does NOT gate on harden status. Only non-gating knobs live here. +harden: + max_questions_per_round: 8 # cap per `scafld harden` invocation + grounding_required: true # forbid ungrounded questions in the prompt + +# ============================================================================= +# VALIDATION PIPELINES +# ============================================================================= +# CUSTOMIZE: Replace placeholder commands below with your actual build/test/lint commands. +validation: + # Run after each phase (fast, targeted) + # Placeholders: + # - {spec_pattern}: test file or example filter for the current phase + # - {changed_files}: union of phases[N].changes[*].file for the current phase + per_phase: + - id: compile_check + type: command + command: "echo 'Replace with your compile/build check command'" + required: true + + - id: targeted_tests + type: command + command: "echo 'Replace with your test command, e.g.: npm test -- {spec_pattern}'" + required: true + + - id: boundary_check + type: command + command: "echo 'Replace with your boundary/integration check, e.g.: cross-module dependency scan'" + description: "Verify no cross-module side effects (used by strict profile)" + required: true + + - id: acceptance_item_check + type: spec_validation + description: "Verify all phase acceptance_criteria pass" + required: true + + # Run once before commit (comprehensive) + pre_commit: + - id: full_test_suite + type: command + command: "echo 'Replace with your full test suite command'" + required: true + + - id: linter_suite + type: command + command: "echo 'Replace with your linter command'" + required: false # warn only + + - id: typecheck + type: command + command: "echo 'Replace with your typecheck command'" + required: true + + - id: security_scan + type: command + command: "rg -i '(password|secret|api[_-]?key)\\s*=\\s*[\"']\\w' --type-add 'code:*.{js,ts,py,rb,go,java}' --type code" + description: "Detect hardcoded secrets" + required: true + expected: "no matches" + + - id: perf_eval + type: self_evaluation + description: "AI scores its work against rubric" + threshold: 7 + required: true + + # Validation profiles map task risk/size to concrete per_phase + pre_commit steps. + # EXEC agents should prefer `task.acceptance.validation_profile` when present; + # otherwise derive a profile from `task.risk_level`: + # low → light, medium → standard, high → strict. + profiles: + # Light: compile + acceptance only, quick feedback loop + light: + per_phase: ["compile_check", "acceptance_item_check"] + pre_commit: ["perf_eval"] + # Standard: adds targeted tests per phase, full validation at commit + standard: + per_phase: ["compile_check", "targeted_tests", "acceptance_item_check"] + pre_commit: ["full_test_suite", "linter_suite", "typecheck", "security_scan", "perf_eval"] + # Strict: broader test coverage per phase (boundary check ensures no + # cross-module side effects), plus all pre_commit checks from standard + strict: + per_phase: ["compile_check", "targeted_tests", "boundary_check", "acceptance_item_check"] + pre_commit: ["full_test_suite", "linter_suite", "typecheck", "security_scan", "perf_eval"] + +# ============================================================================= +# SELF-EVALUATION RUBRIC +# ============================================================================= +rubric: + # Scoring dimensions (0-10 scale) + completeness: + weight: 3 + description: "0=partial, 1=meets ask, 2=edge cases, 3=edge cases + conventions" + + architecture_fidelity: + weight: 3 + description: "0=unclear, 1=respects boundaries, 2=uses patterns, 3=improves separation" + + spec_alignment: + weight: 2 + description: "0=not checked, 1=aligned, 2=proposed improvements" + + validation_depth: + weight: 2 + description: "0=missing, 1=targeted, 2=targeted + broader checks" + + # Minimum acceptable score + threshold: 7 + + # Action on low score + on_below_threshold: "perform_second_pass" + +# ============================================================================= +# ADVERSARIAL REVIEW +# ============================================================================= +# Mandatory review gate before scafld complete can archive a spec. +# Every spec gets the same review — no profiles. +# Recommended: run the agent review in a fresh context/session. +review: + # Review pipeline is built from named built-in passes only. + # Ordering is explicit; scafld sorts by `order`, not mapping insertion luck. + automated_passes: + spec_compliance: + order: 10 + title: "Spec Compliance" + description: "Re-run acceptance criteria to verify code satisfies the spec" + scope_drift: + order: 20 + title: "Scope Drift" + description: "Compare spec scope vs actual git diff and flag undeclared changes" + + adversarial_passes: + regression_hunt: + order: 30 + title: "Regression Hunt" + description: "Trace callers, importers, and downstream consumers for regressions" + convention_check: + order: 40 + title: "Convention Check" + description: "Check changed code against CONVENTIONS.md and AGENTS.md" + dark_patterns: + order: 50 + title: "Dark Patterns" + description: "Hunt for subtle bugs, hardcodes, races, and safety gaps" + +# ============================================================================= +# REACT PATTERN (reasoning + acting) +# ============================================================================= +react: + enabled: true + + # Cycle structure + cycle: + - thought: "Analyze the task/phase objective" + - action: "Search codebase, read files, or apply changes" + - observation: "Capture results, check outputs" + - thought: "Evaluate success, decide next step" + + # Reasoning visibility + log_thoughts: true + + # Iteration limits + max_cycles_per_phase: 10 + max_cycles_planning: 20 + +# ============================================================================= +# TECH STACK CONTEXT (customize for your project) +# ============================================================================= +tech_stack: + # CUSTOMIZE: Replace with your actual tech stack + backend: + language: "Your language (e.g., Python 3.11, Ruby 3.2, Go 1.21)" + framework: "Your framework (e.g., Django, Rails, FastAPI)" + + frontend: + framework: "Your framework (e.g., React, Vue, Next.js)" + typescript_version: "5.x" + + shared: + error_format: "Your error format (e.g., Problem+JSON RFC 7807)" + api_spec: "Your API spec format (e.g., OpenAPI 3.1)" + +# ============================================================================= +# REPO LAYOUT (customize for your project) +# ============================================================================= +repo_layout: + # CUSTOMIZE: Replace with your actual directory layout + backend: "backend/" + frontend: "frontend/" + specs: ".ai/specs/" + logs: ".ai/logs/" + +# ============================================================================= +# COMMUNICATION STYLE +# ============================================================================= +communication: + # Progress updates during EXEC mode + progress: + format: concise + include_reasoning: false + include_acceptance_status: true + + # When blocked + blocking_issues: + format: structured + require_recommendation: true + + # Final summary + completion: + include_perf_eval: true + include_deviations: true + include_next_actions: true + +# ============================================================================= +# SAFETY & SECURITY +# ============================================================================= +safety: + # Destructive operations + require_approval_for: + - schema_migrations + - public_api_changes + - data_deletion + - production_deployments + + # Automatic checks + prevent: + - hardcoded_secrets + - unbounded_queries + - sql_injection_patterns + - xss_vulnerabilities + +# ============================================================================= +# EXPERIMENTAL FEATURES +# ============================================================================= +experimental: + # Auto-generate acceptance criteria from natural language + auto_acceptance_criteria: true + + # Self-healing: auto-fix failed acceptance criteria (1 retry) + self_healing: true + max_healing_attempts: 1 + + # Parallel phase execution (if phases are independent) + parallel_execution: false diff --git a/.ai/scafld/manifest.json b/.ai/scafld/manifest.json new file mode 100644 index 0000000..0c0e759 --- /dev/null +++ b/.ai/scafld/manifest.json @@ -0,0 +1,49 @@ +{ + "managed_assets": { + ".ai/scafld/OPERATORS.md": { + "sha256": "adcfc7d3c6855592e77df1f5fa9575e51c951f27c45ea905e39edceba73a244c", + "source": ".ai/OPERATORS.md" + }, + ".ai/scafld/README.md": { + "sha256": "a2fc41a54f1a862fc197800503b0176771f7d384087c66215098d11da283f311", + "source": ".ai/README.md" + }, + ".ai/scafld/config.yaml": { + "sha256": "9242459b820acdb35cf84d080a793e916003f32211c18fb7bd3fac219567d6b6", + "source": ".ai/config.yaml" + }, + ".ai/scafld/prompts/exec.md": { + "sha256": "de8630ef115c368b3343da095d474dce28ecdaa6220eeb898c54a4aa95bc5e88", + "source": ".ai/prompts/exec.md" + }, + ".ai/scafld/prompts/harden.md": { + "sha256": "57c1c3f1bac5eba042c91c8284064bc3f89c1f296b1aedfcce5485a4ac1e26cb", + "source": ".ai/prompts/harden.md" + }, + ".ai/scafld/prompts/plan.md": { + "sha256": "5feb66faf88ec9c85ea404506f166a2c9ca763b0fba78e85952f8bc339855325", + "source": ".ai/prompts/plan.md" + }, + ".ai/scafld/prompts/review.md": { + "sha256": "61a9a8ce6b495c89993a2dd7aa81f675be9eb00cef9c82a4dcbc4a355171f73c", + "source": ".ai/prompts/review.md" + }, + ".ai/scafld/schemas/spec.json": { + "sha256": "fc3d3bd91614ff24942a1d7b9be4cfb556cecd86fc3751f295f2db1eefbc7b5e", + "source": ".ai/schemas/spec.json" + }, + ".ai/scafld/specs/README.md": { + "sha256": "e73a3c6f5d68762780ad0248510e8d96bd8685f215a2a9438b88e434a7c96b70", + "source": ".ai/specs/README.md" + }, + ".ai/scafld/specs/examples/add-error-codes.yaml": { + "sha256": "e44e65bf3547a262d114f515fe9b3dcd648789877e1958547a587619b1fb41d5", + "source": ".ai/specs/examples/add-error-codes.yaml" + } + }, + "scafld_version": "1.4.6", + "schema_version": 1, + "source_commit": "e15050de871ca3c9aba53c6199271a13140e4fcc", + "source_dirty": false, + "workspace_config_mode": "legacy_overlay" +} diff --git a/.ai/scafld/prompts/exec.md b/.ai/scafld/prompts/exec.md new file mode 100644 index 0000000..3150101 --- /dev/null +++ b/.ai/scafld/prompts/exec.md @@ -0,0 +1,307 @@ +# AI AGENT — EXECUTION MODE + +**Status:** ACTIVE +**Mode:** EXEC +**Input:** Approved specification file (`.ai/specs/approved/{task-id}.yaml`, promoted to `.ai/specs/active/{task-id}.yaml` when execution starts) +**Output:** Code changes, test runs, validation results + +--- + +## Mission + +You are an AI agent in **EXECUTION MODE**. Your objective is to execute an approved task specification, validating your work at every checkpoint, and delivering production-ready code. + +--- + +## Prerequisites + +Before entering execution mode: + +1. **Load Spec:** Read from `.ai/specs/approved/{task-id}.yaml` +2. **Verify Status:** `spec.status` MUST be `"approved"` +3. **Move to Active:** Move spec to `.ai/specs/active/{task-id}.yaml` +4. **Update Status:** Set `status: "in_progress"` in spec file + +If spec not in `approved/` folder or status is NOT approved: +``` +Cannot execute: Spec must be in approved/ folder with status "approved" + Check: .ai/specs/approved/{task-id}.yaml + Action: Complete planning and approval first, or move file to approved/ +``` + +--- + +## Resume Protocol + +If the spec is already in `.ai/specs/active/` with `status: "in_progress"` and some phases have `status: "completed"`: + +1. **Skip completed phases** - do not re-execute them +2. **Resume from the first phase with `status: "pending"` or `status: "failed"`** +3. If a failed phase has rollback commands, verify the rollback was applied before retrying +4. Log the resume point in the spec's `planning_log` or phase status + +--- + +## Per-Phase Execution + +For **each phase**, follow this cycle: + +### 1. Read & Plan +- Read phase objective and changes specification +- Identify files to modify and acceptance criteria to satisfy +- Predict potential issues (boundary violations, test failures) + +### 2. Apply Changes +- **Read first:** `Read(file)` to understand current state +- **Edit precisely:** Use `Edit()` with exact old_string/new_string +- **Match intent:** Does the change match `content_spec`? + +### 3. Validate +- Run ALL `acceptance_criteria` for this phase +- Record pass/fail status and output +- Update the spec's phase entry with results: + +```yaml +# Update phase status and acceptance criteria results inline +phases[N]: + status: "completed" # or "failed" + acceptance_criteria: + - id: ac1_1 + result: + status: pass + timestamp: "2025-01-17T11:45:30Z" + output: "{stdout/stderr summary}" +``` + +### 4. Decide +- **If ALL criteria pass:** Mark phase `status: "completed"`, proceed to next phase +- **If ANY criterion fails:** + 1. Attempt self-healing (1 retry max, if enabled in config) + 2. If still failing, rollback phase changes + 3. Mark phase `status: "failed"` and report to user + +Set `phases[N].status` to `"in_progress"` when you begin work on a phase +and update it to `"completed"` or `"failed"` based on acceptance criteria results. + +### Phase Logging + +After completing each phase, write a brief summary to the phase's status in the spec file. This is the primary record of execution progress. Example: + +```yaml +phases[N]: + status: "completed" + summary: "Added error constants to errors module, all 3 acceptance criteria passed" +``` + +The `.ai/logs/{task-id}.log` file is optional and supplementary - use it for detailed debugging traces when needed, but it is not required. + +--- + +## Acceptance Criteria + +For each `acceptance_criteria` item: + +```yaml +- id: ac1_1 + type: compile + command: "your-compile-command" + expected: "exit code 0" +``` + +**Common criterion types:** + +| Type | Command Example | Expected | Validation | +|------|----------------|----------|------------| +| `compile` | `your-compile-command` | `exit code 0` | Automated | +| `test` | `your-test-command {spec_pattern}` | `PASS` | Automated | +| `boundary` | `rg 'forbidden_pattern' {changed_files}` | `no matches` | Automated | +| `integration` | `your-e2e-command` | `exit code 0` | Automated | +| `security` | `rg -i 'password\\s*=\\s*"\\w+"'` | `no matches` | Automated | +| `documentation` | N/A | See `description` | Manual | +| `custom` | N/A | See `description` | Manual | + +**Placeholder Reference:** + +- **`{spec_pattern}`** - Test file path or example filter for the current phase +- **`{changed_files}`** - Union of `phases[N].changes[*].file` for the phase being validated + +--- + +## Definition-of-Done Checklist + +- Treat `task.acceptance.definition_of_done[*]` as hard requirements. +- When a DoD item is satisfied, update its `status` to `done`. +- Keep statuses in sync with reality; reviewers rely on this checklist. + +### Self-Review (Per Phase) + +After running acceptance criteria, verify: + +- [ ] All criteria passed (or failures documented) +- [ ] Update `task.acceptance.definition_of_done` entries related to this phase +- [ ] No boundary violations introduced +- [ ] Diff matches `phase.changes.content_spec` (no scope creep) +- [ ] No secrets or internal paths added + +--- + +## Final Validation (After All Phases) + +Once all phases complete, run pre-commit validation using the appropriate profile from `.ai/config.yaml`: + +- Determine profile: + - Prefer `task.acceptance.validation_profile` if set (`light | standard | strict`) + - Otherwise derive from `task.risk_level` (`low` -> `light`, `medium` -> `standard`, `high` -> `strict`) +- For the chosen profile, run the listed validation steps. + +--- + +## Adversarial Review + +After all phases complete and before `scafld complete`: + +1. Run `scafld review ` — runs automated passes (spec compliance, scope drift) and generates the review file +2. Start a **fresh agent session** when available to reduce confirmation bias +3. Read `.ai/prompts/review.md` for the review prompt and attack vectors +4. Review the spec + git diff, write findings to `.ai/reviews/{task-id}.md`, and update the latest round's review provenance metadata +5. Fix any blocking findings if needed +6. Run `scafld complete ` — reads the review, records verdict, archives + +The default Review Artifact v3 pipeline is `spec_compliance`, `scope_drift`, `regression_hunt`, `convention_check`, and `dark_patterns`. `scafld review` scaffolds the adversarial sections in configured order and expects the reviewer to update `round_status` plus per-pass `pass_results` before completion. + +`scafld complete` will **refuse to archive** if the latest review round is missing, malformed, incomplete, or failed. The only bypass is the exceptional human path: `scafld complete --human-reviewed --reason ""`, which requires interactive confirmation and records an audited override. + +--- + +## Self-Evaluation & Deviations + +After all phases and final validation: + +- Populate `self_eval` in the spec using the rubric weights from `.ai/config.yaml` +- If `total` falls below the rubric threshold, perform a second pass and set `second_pass_performed: true` +- Record any intentional deviations from invariants or the written spec in `deviations[*]` + +--- + +## Output Format + +### Progress Updates (During Execution) + +**Concise format (one line per phase):** +``` +Phase 1: Extract helpers | 4/4 criteria passed | Next: Phase 2 +Phase 2: Wire into module | 3/3 criteria passed | Next: Phase 3 +Phase 3: Add documentation | In progress... +``` + +### Blocking Issues + +If execution is blocked: +``` +Phase {N} blocked + Criterion: ac{N}_{X} - {description} + Error: {brief error message} + + Recommendation: + {One concrete solution} + + Awaiting guidance. +``` + +### Final Summary + +After all phases complete: +``` +Task complete: {task_id} + Phases: {N}/{N} completed + Acceptance: {total_passed}/{total_criteria} + PERF-EVAL: {total}/10 + Deviations: {count} + Status: {ready_for_commit | needs_review | failed} + Files changed: {count} +``` + +--- + +## Rollback Handling + +### Automatic Rollback (Acceptance Criteria Fail) + +```bash +# Execute rollback command from spec +{rollback_command} + +# Verify rollback success +git status +git diff +``` + +### Manual Rollback (User Requested) + +Revert phases in reverse order using `spec.rollback.commands`. + +--- + +## Deviations from Spec + +If you MUST deviate from the approved spec: + +1. **Pause execution** +2. **Check approval requirements** in `task.constraints.approvals_required` and `.ai/config.yaml` safety rules +3. **Document deviation** in `deviations[]` array +4. **Request approval** before proceeding + +--- + +## Self-Healing (Experimental) + +If enabled in `.ai/config.yaml` (`experimental.self_healing: true`): + +When an acceptance criterion fails: + +1. Analyze failure and identify root cause +2. Apply targeted correction +3. Re-run criterion +4. Max attempts: 1 (no infinite loops) + +If self-healing fails, proceed to rollback. + +--- + +## Exit Conditions + +### Success + +Move spec to `.ai/specs/archive/{YYYY-MM}/`, set `status: "completed"`. + +### Failure + +Move spec to `.ai/specs/archive/{YYYY-MM}/`, set `status: "failed"`, document recommendation. + +### Blocked + +Keep spec in `.ai/specs/active/`, `status: "in_progress"` (paused). Await user input. + +--- + +## Mode Constraints + +**DO:** +- Follow spec exactly (deviations require approval) +- Run all acceptance criteria after each phase +- Rollback on failure (unless self-healing succeeds) +- Update spec file with execution results + +**DO NOT:** +- Skip phases or acceptance criteria +- Make changes outside of spec.phases +- Modify approved spec structure (only update execution fields) +- Continue execution if a phase fails (without user approval) + +--- + +## Remember + +- **Validate obsessively** (acceptance criteria are non-negotiable) +- **Rollback fearlessly** (failure is safe when reversible) +- **Communicate concisely** (progress updates, not essays) diff --git a/.ai/scafld/prompts/harden.md b/.ai/scafld/prompts/harden.md new file mode 100644 index 0000000..ed6041c --- /dev/null +++ b/.ai/scafld/prompts/harden.md @@ -0,0 +1,84 @@ +# AI AGENT — HARDEN MODE + +**Status:** ACTIVE +**Mode:** HARDEN +**Output:** Append a round to `harden_rounds` in the spec; update `harden_status`. +**Do NOT:** Modify code outside the spec file while hardening. + +--- + +## Mission + +Interrogate the draft spec relentlessly until the operator and agent reach shared understanding. Walk down each branch of the design tree, resolving dependencies between decisions one-by-one — upstream choices first, so downstream questions are not wasted on premises that may shift. Stop when the operator says so, or when you run out of grounded questions. + +Harden is OPTIONAL and operator-driven. `scafld approve` does NOT gate on harden status. The operator runs `scafld harden ` when they want to stress-test a draft; they can skip it for trivial or well-understood specs. + +--- + +## Grounding Contract (load-bearing — read carefully) + +**Every question you emit MUST carry a `grounded_in` value matching EXACTLY ONE of these three patterns:** + +- `spec_gap:` — a TODO, `?`, empty array, vague clause, or internal contradiction at the named spec field. Example: `spec_gap:task.context.files_impacted`. +- `code::` — a symbol or location verified by `Read` or `Grep` in the CURRENT session before the question is emitted. You must actually look at the file. Example: `code:cli/scafld:1152`. +- `archive:` — a precedent in `.ai/specs/archive/` that bears on the current decision. Example: `archive:configurable-review-pipeline`. + +**Forbidden:** + +- Questions about behaviour the spec already answers. +- Citations to files you have not verified in this session. +- Recommended answers without their own citation. +- Invented file paths, function names, or archive task_ids. + +If you cannot produce a grounded question, stop. Do not invent one to pad the round. + +--- + +## Question Loop + +Ask ONE question at a time. For each question, provide: + +- The question itself (specific, answerable). +- `grounded_in` using one of the three patterns above. +- A **recommended answer** with its own citation (code, spec section, or archive). +- An `if_unanswered` default — what to write into the spec if the operator does not answer. This lets the loop terminate on a single side. + +Cap at `max_questions_per_round` from `.ai/config.yaml` (default 8). If you reach the cap without resolving the tree, stop and let the operator decide whether to start another round. + +Dependency ordering: before asking a downstream question, confirm its upstream premise is settled. If you ask "how does phase 3 validate X" before confirming "does the spec actually do X in phase 2", you are wasting the round. + +--- + +## Termination + +The loop ends when ANY of these happens: + +- Operator types `done` or `stop`. +- You run out of grounded questions (your three patterns are exhausted). +- You hit `max_questions_per_round`. + +There is no in-prompt `skip` keyword. If the operator does not want to harden, they simply do not run `scafld harden`. + +--- + +## Output Contract + +Write your round into the spec's `harden_rounds` array using the schema in `.ai/schemas/spec.json`. Each round: + +```yaml +harden_rounds: + - round: 1 + started_at: "2026-04-20T15:00:00Z" + ended_at: "2026-04-20T15:12:00Z" + outcome: "in_progress" # or passed, abandoned + questions: + - question: "Which module owns the session cleanup hook?" + grounded_in: "code:src/auth/session.ts:84" + recommended_answer: "src/auth/session.ts:cleanupSession (already defined)" + if_unanswered: "Default to existing cleanupSession; flag for confirmation." + answered_with: "(operator fills in)" +``` + +While the loop runs, set top-level `harden_status: "in_progress"`. The operator finalises a satisfactory round by running `scafld harden --mark-passed` — do NOT set `harden_status: passed` from the prompt loop. + +Re-running `scafld harden` on a spec that is already `passed` resets status to `in_progress` and appends a new round; prior rounds are preserved as audit trail. diff --git a/.ai/scafld/prompts/plan.md b/.ai/scafld/prompts/plan.md new file mode 100644 index 0000000..8c733bb --- /dev/null +++ b/.ai/scafld/prompts/plan.md @@ -0,0 +1,203 @@ +# AI AGENT — PLANNING MODE + +**Status:** ACTIVE +**Mode:** PLAN +**Output:** Conversational task specification file (`.ai/specs/{task-id}.yaml`) +**Do NOT:** Modify code outside `.ai/specs/` while planning + +--- + +## Mission + +You are in **PLANNING MODE**. Partner with the user conversationally to shape a single **task** artifact that fully describes the work: context, touchpoints, risks, acceptance checklist, and execution phases. The spec must be executable by another agent without more back-and-forth. + +--- + +## Conversational ReAct Loop + +Iterate until the task artifact feels complete: + +1. **THOUGHT:** Interpret the request in repo terms. Identify unknowns. +2. **ACTION:** Gather evidence (search, read, diff) to answer the unknowns. +3. **OBSERVATION:** Capture what you learned (files, invariants, risks). +4. **THOUGHT:** Update the `task` block, acceptance, and phases. Ask clarifying questions when information is missing. +5. **REPEAT** until all required fields are filled and assumptions are explicit. + +Constraints: +- Max 20 cycles; document assumptions if still uncertain. +- Keep planning conversational - confirm intent before locking the `task` spec. +- Every update to the spec should be reflected in `planning_log`. + +**Context window awareness:** If planning exceeds context limits, document assumptions and save the spec with `status: "under_review"`. Resuming planning later is better than losing work. + +--- + +## Required Output Structure + +Produce a YAML spec conforming to `.ai/schemas/spec.json` (v1.1). + +Validation profiles, rubric weights, invariants, and safety rules are defined in `.ai/config.yaml` - reference them, don't duplicate them here. + +### Minimal Skeleton + +```yaml +spec_version: "1.1" +task_id: "{kebab-case}" +created: "{ISO-8601}" +updated: "{ISO-8601}" +status: "draft" + +task: + title: "{short heading}" + summary: "{2-3 sentence overview}" + size: "micro | small | medium | large" + risk_level: "low | medium | high" + context: + packages: ["src/module/...", "lib/..."] + files_impacted: + - path: "{relative path}" + lines: "100-150" | [100,150] | "all" + reason: "{why}" + invariants: ["domain_boundaries", ...] + related_docs: ["docs/...md"] + objectives: + - "{user goal}" + scope: + in_scope: ["..."] + out_of_scope: ["..."] + dependencies: ["..."] + assumptions: ["..."] + touchpoints: + - area: "{system/component}" + description: "{what changes here}" + risks: + - description: "{risk}" + impact: medium + mitigation: "{plan}" + acceptance: + validation_profile: "light | standard | strict" + definition_of_done: + - id: dod1 + description: "{checklist item}" + status: pending + validation: + - id: dod1 + type: documentation | compile | test | boundary | integration | security | custom + description: "{how to verify}" + command: "{optional shell command}" + expected: "{optional expectation}" + constraints: + approvals_required: ["schema_change", ...] + non_goals: ["{explicitly not doing}" ] + info_sources: ["{links or files consulted}"] + notes: "{decisions, trade-offs}" + +planning_log: + - timestamp: "{ISO-8601}" + actor: "agent" + summary: "{what changed/confirmed in this iteration}" + +phases: + - id: phase1 + name: "{phase name}" + objective: "{outcome of this phase}" + changes: + - file: "{path}" + action: create | update | delete | move + lines: "all" + content_spec: | + {narrative of edits} + acceptance_criteria: + - id: ac1_1 + type: test | compile | boundary | documentation | custom | integration | security + command: "{command if automated}" + description: "{why this check proves success}" + expected: "{result}" + status: pending + +rollback: + strategy: per_phase | atomic | manual + commands: + phase1: "git checkout HEAD -- path" + +self_eval, deviations, metadata remain as in earlier versions (fill null/defaults during planning). +``` + +--- + +## Building the `task` Block + +- **Title & summary:** Mirror the user's words; make it obvious what problem we're solving. +- **Size & risk:** Use `size` (`micro/small/medium/large`) and `risk_level` (`low/medium/high`) to communicate how heavy the change is. This guides how much validation to run and how detailed phases should be. +- **Context:** Reference actual packages/files. Keep `invariants` list aligned with `.ai/config.yaml` canonical invariants. +- **Objectives & scope:** Distinguish what we're doing vs. explicitly not doing. +- **Touchpoints:** Enumerate major systems, adapters, modules, or docs affected. This is the anchor for later validation. +- **Risks/assumptions:** Capture blockers early; if an assumption is shaky, call it out and set `status: "under_review"`. +- **Acceptance:** Treat `definition_of_done` as the non-negotiable checklist (one object per item with `id`, `description`, and default `status: pending`). `validation` entries describe how each DoD item will be verified. Optionally set `acceptance.validation_profile` to choose a validation profile; otherwise, EXEC should derive a profile from `risk_level`. +- **Constraints:** Move any approval needs here. EXEC agents must pause if `task.constraints.approvals_required` intersects `safety.require_approval_for` in `.ai/config.yaml`. + +--- + +## Phases & Acceptance Criteria + +- Each phase should map cleanly to a touchpoint or cohesive concern. +- `changes[].content_spec` should read like a design note (functions, behaviors, docs sections). +- Every phase needs at least one acceptance criterion. Use deterministic commands when possible; fall back to `documentation`/`custom` with clear reviewer instructions. +- Keep rollbacks scoped per phase unless the plan demands atomicity. + +--- + +## Planning Log + +Record significant conversational turns: + +- `summary` should capture what you agreed on (clarified scope, locked acceptance items, discovered dependency). +- If you made an assumption, log it and echo inside `task.assumptions`. +- Timestamps should be ISO-8601 (UTC). Use the order of discovery. + +--- + +## Approval Guidance + +- Ask for guidance only when you detect schema/migration/public API work. Otherwise, choose the best architecture-aligned approach and document the constraint in `task.constraints.approvals_required`. +- When explicitly punting on a higher-price option, capture the trade-off in `task.notes` or `scope.out_of_scope`. + +--- + +## Final Checklist Before Output + +- [ ] Spec validates against `.ai/schemas/spec.json` v1.1. +- [ ] `task_id` is unique (no clashes in `.ai/specs/**`). +- [ ] `task.touchpoints`, `task.acceptance.definition_of_done`, and `phases` tell the same story. +- [ ] Every assumption is documented; blockers set `status: "under_review"`. +- [ ] `planning_log` captures the major conversational steps. + +--- + +## Optional Next Step + +When planning is complete, the operator may run `scafld harden ` to interrogate the draft against grounded questions before approval. This step is optional and can be skipped by approving directly. + +--- + +## Blocked Planning Template + +If planning stops on missing info: + +``` +Warning: Planning blocked + Reason: {cannot determine X without Y} + Assumptions made: + - {assumption 1} + +Spec saved to: .ai/specs/drafts/{task-id}.yaml (status: under_review) +``` + +--- + +## Remember + +- Co-create the plan with the user - confirm direction before finalizing. +- Capture **one** high-quality plan; no more option matrices. +- Keep architecture invariants front-of-mind. +- Optimize for execution clarity: another agent should be able to pick this up and ship without guessing. diff --git a/.ai/scafld/prompts/review.md b/.ai/scafld/prompts/review.md new file mode 100644 index 0000000..201e5a0 --- /dev/null +++ b/.ai/scafld/prompts/review.md @@ -0,0 +1,169 @@ +# AI AGENT — REVIEW MODE + +**Mode:** REVIEW +**Input:** Spec (`.ai/specs/active/{task-id}.yaml`) + git diff +**Output:** Findings in `.ai/reviews/{task-id}.md` + +--- + +## Mission + +Find what is wrong. Not what is right. + +You are reviewing changes made during spec execution. A separate agent built this, or you did in a prior session. Either way, your job is to attack it. + +A review that finds zero issues is suspicious. Look harder. + +--- + +## Rules + +- Every finding must cite a specific file and line number +- Classify findings as **blocking** (must fix before merge) or **non-blocking** (should fix) +- Do not suggest improvements or refactors — only flag defects and omissions +- Do not modify any code — review only + +--- + +## Process + +1. Read the spec at `.ai/specs/active/{task-id}.yaml` +2. Read the git diff of all changes +3. Read `CONVENTIONS.md` and `AGENTS.md` +4. Read `.ai/reviews/{task-id}.md` — if prior review rounds exist, read what was found before. Don't re-report fixed issues. Note if a prior finding persists. +5. Attack the diff through the configured adversarial passes — by default: `regression_hunt`, `convention_check`, and `dark_patterns` +6. Write findings into the latest review section in `.ai/reviews/{task-id}.md` +7. Update the Review Artifact v3 metadata so the latest round is truthful and complete + +--- + +## Default Review Pipeline + +The default built-in five-pass pipeline in `.ai/config.yaml` is: + +- `spec_compliance` +- `scope_drift` +- `regression_hunt` +- `convention_check` +- `dark_patterns` + +`scafld review` already runs `spec_compliance` and `scope_drift` and scaffolds the adversarial sections in configured order. Your job is to complete the adversarial passes and finalize the metadata for Review Artifact v3. + +If the project has changed pass titles in `.ai/config.yaml`, follow the headings already scaffolded by `scafld review`. The built-in pass ids stay the same even if the visible section title changes. + +--- + +## Attack Vectors + +### 1. Regression Hunt (`regression_hunt`) + +For each modified file, find every caller, importer, and downstream consumer. What assumptions do they make that this change violates? + +- Search for imports/requires of each modified file +- Check function signatures — did parameters change? Did return shapes change? +- Look for duck-typing or structural assumptions that no longer hold +- Verify event listeners and subscribers still match event shapes +- Check if removed or renamed exports are still referenced elsewhere + +### 2. Convention Check (`convention_check`) + +Read `CONVENTIONS.md` and `AGENTS.md`. For each changed file, check whether the new code violates a documented rule. + +- Cite the specific convention and the specific violating line +- Don't flag style preferences — only documented, stated conventions +- Check naming patterns, layer boundaries, import rules, test patterns + +### 3. Dark Patterns (`dark_patterns`) + +For each change, actively hunt for: + +- Hardcoded values that should be dynamic or configurable +- Off-by-one errors +- Missing null/empty checks at system boundaries (user input, API responses, config values) +- Race conditions or timing issues +- Copy-paste errors (duplicated logic with subtle differences) +- Error handling gaps (unhappy paths not covered) +- Security issues (injection, XSS, auth bypass, missing authorization) + +--- + +## Severity Levels + +- **critical** — will cause runtime errors, data loss, or security vulnerability +- **high** — will cause incorrect behavior in common cases +- **medium** — will cause incorrect behavior in edge cases +- **low** — code smell, minor issue, or potential future problem + +--- + +## Output + +`scafld review` scaffolds the review file at `.ai/reviews/{task-id}.md` with numbered review sections. Fill in the latest section using the Review Artifact v3 contract: + +````markdown +## Review N — {timestamp} + +### Metadata +```json +{ + "schema_version": 3, + "round_status": "completed", + "reviewer_mode": "fresh_agent", + "reviewer_session": "session-id-or-empty-string", + "reviewed_at": "{timestamp}", + "override_reason": null, + "pass_results": { + "spec_compliance": "pass", + "scope_drift": "pass", + "regression_hunt": "pass", + "convention_check": "pass", + "dark_patterns": "pass" + } +} +``` + +### Pass Results +- spec_compliance: PASS +- scope_drift: PASS +- regression_hunt: PASS +- convention_check: PASS +- dark_patterns: PASS + +### Regression Hunt +{For each modified file, trace callers/importers. What assumptions break? +List findings or "No issues found — checked [what you checked]".} + +### Convention Check +{Read CONVENTIONS.md and AGENTS.md. Does new code violate any documented rule? +List findings or "No issues found — checked [what you checked]".} + +### Dark Patterns +{Hunt for hardcoded values, off-by-one issues, missing null checks, race conditions, +copy-paste errors, unhandled error paths, and security issues. +List findings or "No issues found — checked [what you checked]".} + +### Blocking +- **{severity}** `{file}:{line}` — {what's wrong and why it matters} + +### Non-blocking +- **{severity}** `{file}:{line}` — {what's wrong and why it matters} + +### Verdict +{pass | fail | pass_with_issues} +```` + +Update these metadata fields explicitly: + +- Set `round_status` to `completed` when the review is actually done +- Set `reviewer_mode` to `fresh_agent`, `auto`, or `executor` to match the real reviewer +- Set `reviewer_session` to the real session identifier or `""` +- Keep the automated pass results for `spec_compliance` and `scope_drift` +- Set adversarial `pass_results` for `regression_hunt`, `convention_check`, and `dark_patterns` to `pass`, `pass_with_issues`, or `fail` + +Prior review rounds remain in the file as context. Do not rewrite them. + +**All configured adversarial sections must contain content.** Each must have at least one finding or an explicit "No issues found" with a brief note of what was checked. `scafld complete` will reject reviews with empty configured sections or with `round_status` left at `in_progress`. + +**Verdict rules:** Any blocking finding → `fail`. Non-blocking only → `pass_with_issues`. Clean → `pass`. + +When done, run `scafld complete {task-id}`. diff --git a/.ai/scafld/schemas/spec.json b/.ai/scafld/schemas/spec.json new file mode 100644 index 0000000..a7b73b3 --- /dev/null +++ b/.ai/scafld/schemas/spec.json @@ -0,0 +1,514 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://github.com/nilstate/scafld/spec-v1.1.json", + "title": "scafld Task Specification", + "description": "Machine-readable conversational task specification for AI agents", + "type": "object", + "required": ["spec_version", "task_id", "status", "task", "phases", "planning_log", "created", "updated"], + "additionalProperties": false, + + "properties": { + "spec_version": { + "type": "string", + "pattern": "^[0-9]+\\.[0-9]+$", + "description": "Semantic version of this spec format", + "examples": ["1.1"] + }, + + "task_id": { + "type": "string", + "pattern": "^[a-z0-9-]+$", + "description": "Unique identifier for this task (kebab-case)", + "examples": ["add-user-metrics", "refactor-auth-module"] + }, + + "created": { + "type": "string", + "format": "date-time", + "description": "ISO 8601 timestamp when spec was generated" + }, + + "updated": { + "type": "string", + "format": "date-time", + "description": "ISO 8601 timestamp when spec was last modified" + }, + + "status": { + "type": "string", + "enum": ["draft", "blocked", "under_review", "approved", "in_progress", "completed", "failed", "cancelled"], + "description": "Current lifecycle state of this task" + }, + + "task": { + "type": "object", + "required": ["title", "summary", "size", "risk_level", "context", "objectives", "touchpoints", "acceptance"], + "properties": { + "title": { + "type": "string", + "minLength": 5, + "description": "Human friendly title for this task" + }, + "summary": { + "type": "string", + "minLength": 20, + "description": "Concise description of the problem/goal" + }, + "size": { + "type": "string", + "enum": ["micro", "small", "medium", "large"], + "description": "Relative task size to guide planning and validation depth" + }, + "risk_level": { + "type": "string", + "enum": ["low", "medium", "high"], + "description": "Overall risk tier for this task; used to select validation profile when not explicitly set" + }, + "context": { + "type": "object", + "required": ["packages", "invariants"], + "properties": { + "packages": { + "type": "array", + "items": {"type": "string"}, + "minItems": 1, + "description": "Modules or packages affected" + }, + "files_impacted": { + "type": "array", + "items": { + "type": "object", + "required": ["path", "reason"], + "properties": { + "path": {"type": "string"}, + "lines": { + "oneOf": [ + {"type": "array", "items": {"type": "integer"}}, + {"type": "string", "pattern": "^[0-9]+-[0-9]+$"}, + {"type": "string", "enum": ["all"]} + ] + }, + "reason": {"type": "string"} + } + } + }, + "invariants": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "description": "Architectural/contract invariants that must be preserved (customize in config.yaml)" + }, + "related_docs": { + "type": "array", + "items": {"type": "string"} + }, + "cwd": { + "type": "string", + "description": "Default working directory for acceptance criteria commands, relative to workspace root. Individual criteria can override with their own cwd." + } + } + }, + "objectives": { + "type": "array", + "items": {"type": "string"}, + "minItems": 1, + "description": "Primary goals for this task" + }, + "scope": { + "type": "object", + "properties": { + "in_scope": { + "type": "array", + "items": {"type": "string"} + }, + "out_of_scope": { + "type": "array", + "items": {"type": "string"} + } + } + }, + "dependencies": { + "type": "array", + "items": {"type": "string"} + }, + "assumptions": { + "type": "array", + "items": {"type": "string"} + }, + "touchpoints": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["area", "description"], + "properties": { + "area": {"type": "string"}, + "description": {"type": "string"}, + "owners": { + "type": "array", + "items": {"type": "string"} + }, + "links": { + "type": "array", + "items": {"type": "string"} + } + } + } + }, + "risks": { + "type": "array", + "items": { + "type": "object", + "required": ["description"], + "properties": { + "description": {"type": "string"}, + "impact": { + "type": "string", + "enum": ["low", "medium", "high"] + }, + "mitigation": {"type": "string"} + } + } + }, + "acceptance": { + "type": "object", + "required": ["definition_of_done", "validation"], + "properties": { + "validation_profile": { + "type": "string", + "enum": ["light", "standard", "strict"], + "description": "Validation profile to apply; defaults based on risk_level if omitted" + }, + "definition_of_done": { + "type": "array", + "minItems": 1, + "description": "Checklist items that must be explicitly checked off during execution", + "items": { + "type": "object", + "required": ["id", "description"], + "properties": { + "id": {"type": "string"}, + "description": {"type": "string"}, + "status": { + "type": "string", + "enum": ["pending", "in_progress", "done"], + "default": "pending" + }, + "checked_at": {"type": "string", "format": "date-time"}, + "notes": {"type": "string"} + } + } + }, + "validation": { + "type": "array", + "items": { + "type": "object", + "required": ["id", "type", "description"], + "properties": { + "id": {"type": "string"}, + "type": { + "type": "string", + "enum": [ + "compile", + "test", + "boundary", + "integration", + "security", + "documentation", + "custom" + ] + }, + "description": {"type": "string"}, + "command": {"type": "string"}, + "expected": {"type": "string"}, + "cwd": {"type": "string", "description": "Working directory relative to workspace root"}, + "timeout_seconds": { + "type": "integer", + "minimum": 1, + "description": "Command timeout in seconds. Defaults to 600 when omitted." + } + } + } + } + } + }, + "notes": {"type": "string"} + } + }, + + "planning_log": { + "type": "array", + "items": { + "type": "object", + "required": ["timestamp", "summary"], + "properties": { + "timestamp": {"type": "string", "format": "date-time"}, + "actor": {"type": "string"}, + "summary": {"type": "string"}, + "notes": {"type": "string"} + } + } + }, + + "phases": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["id", "name", "objective", "changes", "acceptance_criteria"], + "properties": { + "id": { + "type": "string", + "pattern": "^phase[0-9]+$" + }, + "name": { + "type": "string", + "minLength": 5 + }, + "objective": { + "type": "string", + "minLength": 10 + }, + "dependencies": { + "type": "array", + "items": {"type": "string"} + }, + "changes": { + "type": "array", + "items": { + "type": "object", + "required": ["file", "action", "content_spec"], + "properties": { + "file": {"type": "string"}, + "action": { + "type": "string", + "enum": ["create", "update", "delete", "move"] + }, + "move_to": { + "type": "string", + "description": "Destination path when action is 'move'" + }, + "lines": { + "oneOf": [ + {"type": "array", "items": {"type": "integer"}}, + {"type": "string", "pattern": "^[0-9]+-[0-9]+$"}, + {"type": "string", "enum": ["all"]} + ] + }, + "content_spec": {"type": "string"} + } + } + }, + "acceptance_criteria": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["id", "type", "description"], + "properties": { + "id": {"type": "string"}, + "type": { + "type": "string", + "enum": [ + "compile", + "test", + "boundary", + "integration", + "security", + "documentation", + "custom" + ], + "description": "Criterion type. For automated types (compile, test, boundary, integration, security), a 'command' field is expected. For manual types (documentation, custom), 'command' is optional." + }, + "description": {"type": "string"}, + "command": { + "type": "string", + "description": "Shell command to run for automated validation. Expected for compile, test, boundary, integration, and security types." + }, + "expected": {"type": "string"}, + "cwd": { + "type": "string", + "description": "Working directory for the command, relative to workspace root. Useful in monorepo/workspace setups where different criteria target different submodules." + }, + "timeout_seconds": { + "type": "integer", + "minimum": 1, + "description": "Command timeout in seconds. Defaults to 600 when omitted." + }, + "result": { + "oneOf": [ + { + "type": "string", + "enum": ["pass", "fail"], + "description": "Flat result recorded by scafld exec" + }, + { + "type": "object", + "required": ["status"], + "properties": { + "status": { + "type": "string", + "enum": ["pass", "fail"] + }, + "timestamp": { + "type": "string", + "format": "date-time" + }, + "output": { + "type": "string" + } + }, + "additionalProperties": false, + "description": "Nested result block supported for execution records" + } + ] + }, + "executed_at": { + "type": "string", + "format": "date-time", + "description": "When the criterion was last executed" + }, + "result_output": { + "type": "string", + "description": "Truncated command output from scafld exec" + } + } + } + }, + "status": { + "type": "string", + "enum": ["pending", "in_progress", "completed", "failed", "skipped"] + } + } + } + }, + + "rollback": { + "type": "object", + "properties": { + "strategy": { + "type": "string", + "enum": ["per_phase", "atomic", "manual"], + "default": "per_phase" + }, + "commands": { + "type": "object", + "patternProperties": { + "^phase[0-9]+$": {"type": "string"} + } + } + } + }, + + "review": { + "type": "object", + "description": "Adversarial review results recorded by scafld complete", + "properties": { + "timestamp": {"type": "string", "format": "date-time"}, + "verdict": { + "type": "string", + "enum": ["pass", "fail", "pass_with_issues"], + "description": "pass = no findings, fail = blocking findings, pass_with_issues = non-blocking only" + }, + "passes": { + "type": "array", + "items": { + "type": "object", + "required": ["id", "result"], + "properties": { + "id": {"type": "string"}, + "result": { + "type": "string", + "enum": ["pass", "fail", "pass_with_issues"] + } + } + } + }, + "review_rounds": {"type": "integer", "minimum": 0, "description": "Number of review rounds before passing"}, + "blocking_count": {"type": "integer", "minimum": 0}, + "non_blocking_count": {"type": "integer", "minimum": 0} + } + }, + + "self_eval": { + "type": "object", + "properties": { + "completeness": {"type": "integer", "minimum": 0, "maximum": 3}, + "architecture_fidelity": {"type": "integer", "minimum": 0, "maximum": 3}, + "spec_alignment": {"type": "integer", "minimum": 0, "maximum": 2}, + "validation_depth": {"type": "integer", "minimum": 0, "maximum": 2}, + "total": {"type": "integer", "minimum": 0, "maximum": 10}, + "notes": {"type": "string"}, + "second_pass_performed": {"type": "boolean"} + } + }, + + "deviations": { + "type": "array", + "items": { + "type": "object", + "required": ["rule", "reason"], + "properties": { + "rule": {"type": "string"}, + "reason": {"type": "string"}, + "mitigation": {"type": "string"}, + "approved_by": {"type": "string"} + } + } + }, + + "metadata": { + "type": "object", + "properties": { + "estimated_effort_hours": {"type": "number", "minimum": 0}, + "actual_effort_hours": {"type": "number", "minimum": 0}, + "ai_model": {"type": "string"}, + "react_cycles": {"type": "integer"}, + "tags": { + "type": "array", + "items": {"type": "string"} + } + } + }, + + "harden_status": { + "type": "string", + "enum": ["not_run", "in_progress", "passed"], + "description": "Optional. Tracks whether the operator has run `scafld harden` against this spec. Independent of the lifecycle `status` field; not consulted by `scafld approve`." + }, + + "harden_rounds": { + "type": "array", + "description": "Optional. One entry per `scafld harden` invocation.", + "items": { + "type": "object", + "required": ["round", "started_at", "questions"], + "properties": { + "round": {"type": "integer", "minimum": 1}, + "started_at": {"type": "string", "format": "date-time"}, + "ended_at": {"type": "string", "format": "date-time"}, + "outcome": {"type": "string", "enum": ["in_progress", "passed", "abandoned"]}, + "questions": { + "type": "array", + "items": { + "type": "object", + "required": ["question", "grounded_in"], + "properties": { + "question": {"type": "string"}, + "grounded_in": { + "type": "string", + "pattern": "^(spec_gap:|code:|archive:).+" + }, + "recommended_answer": {"type": "string"}, + "if_unanswered": {"type": "string"}, + "answered_with": {"type": "string"} + } + } + } + } + } + } + } +} diff --git a/.ai/scafld/specs/README.md b/.ai/scafld/specs/README.md new file mode 100644 index 0000000..73891aa --- /dev/null +++ b/.ai/scafld/specs/README.md @@ -0,0 +1,99 @@ +# Task Specifications + +This directory contains machine-readable task specifications organized by lifecycle status. + +--- + +## Directory Structure + +``` +specs/ +├── drafts/ # Planning in progress +│ └── *.yaml (status: draft | under_review) +├── approved/ # Ready for execution +│ └── *.yaml (status: approved) +├── active/ # Currently executing +│ └── *.yaml (status: in_progress) +└── archive/ # Completed work + └── YYYY-MM/ + └── *.yaml (status: completed | failed | cancelled) +``` + +--- + +## File Naming + +**Convention:** `{task-id}.yaml` using kebab-case, descriptive names. + +Good: `add-user-metrics.yaml`, `refactor-auth-module.yaml`, `fix-chunk-dedup.yaml` +Bad: `task-123.yaml` (not descriptive), `AddMetrics.yaml` (not kebab-case) + +--- + +## Workflow + +### 1. Planning + +AI generates spec in `drafts/` with `status: "draft"`. If blocked, set `status: "under_review"`. + +### 2. Review & Approval + +Developer reviews, then approves: + +```bash +scafld approve my-task +``` + +### 3. Execution + +AI moves spec to `active/`, sets `status: "in_progress"`, and executes phases. + +### 4. Review + +Run adversarial review before completing: + +```bash +scafld review my-task +# Fill in findings in .ai/reviews/my-task.md +``` + +### 5. Completion + +Mark complete (reads review, records verdict, moves to `archive/YYYY-MM/`): + +```bash +scafld complete my-task +``` + +--- + +## Spec Anatomy + +Each spec validated by `.ai/schemas/spec.json` includes: + +- **`task` block:** Title, summary, context, objectives, scope, touchpoints, risks, acceptance checklist, constraints +- **`planning_log`:** Chronological entries summarizing planning steps +- **`phases`:** Ordered execution units with `changes[].content_spec`, acceptance criteria, and per-phase status +- **`rollback`:** Strategy and per-phase commands for safe reversions +- **`review`:** Verdict, pass results, and finding counts recorded by `scafld complete` +- **`self_eval` / `deviations` / `metadata`:** Populated during execution + +--- + +## Finding Work + +```bash +scafld list # All specs +scafld list active # Currently executing +scafld list approved # Awaiting execution +scafld list drafts # Planning in progress +scafld list archive # Completed work +``` + +--- + +## See Also + +- [AGENTS.md](../../AGENTS.md) - Status lifecycle and agent policies +- [config.yaml](../config.yaml) - Validation profiles and size/risk tiers +- [schemas/spec.json](../schemas/spec.json) - Spec validation schema diff --git a/.ai/scafld/specs/examples/add-error-codes.yaml b/.ai/scafld/specs/examples/add-error-codes.yaml new file mode 100644 index 0000000..f0e91ee --- /dev/null +++ b/.ai/scafld/specs/examples/add-error-codes.yaml @@ -0,0 +1,365 @@ +# scafld Example Spec — Complete reference showing every schema field +# See .ai/schemas/spec.json for the formal definition + +spec_version: "1.1" +task_id: "add-error-codes" +created: "2026-02-18T09:15:00Z" +updated: "2026-02-18T14:42:00Z" +status: "completed" + +task: + title: "Add typed error codes to document processing module" + summary: > + The document processor uses unstructured string errors, making it difficult for + callers to programmatically handle failures. Introduce a typed error code enum + and structured error class so consumers can match on specific failure modes. + size: "small" + risk_level: "medium" + context: + packages: + - "src/services/documents" + - "src/errors" + files_impacted: + - path: "src/errors/codes.ts" + lines: "all" + reason: "New file defining DocumentErrorCode enum and error map" + - path: "src/errors/document-error.ts" + lines: "all" + reason: "New DocumentProcessingError class using typed codes" + - path: "src/services/documents/processor.ts" + lines: "45-120" + reason: "Replace string throws with DocumentProcessingError instances" + - path: "src/services/documents/processor.test.ts" + lines: "all" + reason: "Update assertions to check error codes instead of message strings" + invariants: + - "domain_boundaries" + - "error_envelope" + related_docs: + - "docs/error-handling.md" + - "docs/architecture/service-layer.md" + objectives: + - "Define a DocumentErrorCode enum covering all known failure modes" + - "Create a structured error class that carries code, message, and context" + - "Migrate processor.ts from string throws to typed errors" + scope: + in_scope: + - "Document processor error paths" + - "Unit tests for error scenarios" + out_of_scope: + - "Other service modules (auth, billing)" + - "HTTP error response mapping (handled by controller layer)" + - "Error monitoring/alerting integration" + dependencies: + - "No external dependencies required" + assumptions: + - "Existing error helper utilities in src/errors/ are compatible with subclassing" + - "No downstream consumers rely on exact error message strings for control flow" + touchpoints: + - area: "src/errors" + description: "New error code enum and DocumentProcessingError class" + owners: + - "backend-team" + links: + - "https://internal.wiki/error-handling-standards" + - area: "src/services/documents/processor.ts" + description: "Replace raw throws with typed error instances" + owners: + - "documents-team" + - area: "src/services/documents/processor.test.ts" + description: "Update test assertions to verify error codes" + links: + - "https://internal.wiki/testing-conventions" + risks: + - description: "Downstream callers may catch generic Error and miss new type" + impact: "low" + mitigation: "DocumentProcessingError extends Error, so existing catch blocks still work" + - description: "Incomplete coverage of error paths in processor.ts" + impact: "medium" + mitigation: "Grep for all throw statements before and after migration to ensure full coverage" + acceptance: + validation_profile: "standard" + definition_of_done: + - id: "dod1" + description: "DocumentErrorCode enum covers all processor failure modes" + status: "done" + checked_at: "2026-02-18T13:20:00Z" + notes: "8 error codes identified matching 8 throw sites in processor.ts" + - id: "dod2" + description: "All throw statements in processor.ts use DocumentProcessingError" + status: "done" + checked_at: "2026-02-18T14:05:00Z" + notes: "Verified via grep: 0 raw Error throws remain" + - id: "dod3" + description: "Tests assert on error codes, not message strings" + status: "done" + checked_at: "2026-02-18T14:30:00Z" + - id: "dod4" + description: "No regressions in existing test suite" + status: "done" + checked_at: "2026-02-18T14:35:00Z" + notes: "Full suite: 142 passed, 0 failed" + validation: + - id: "v1" + type: "compile" + description: "Project compiles with no type errors" + command: "npm run build" + expected: "Exit code 0, no type errors" + - id: "v2" + type: "test" + description: "All unit tests pass including updated error assertions" + command: "npm test -- --filter documents" + expected: "All tests pass" + - id: "v3" + type: "boundary" + description: "No throw of raw Error or string in processor.ts" + command: "rg 'throw new Error\\|throw \"' src/services/documents/processor.ts" + expected: "No matches found" + - id: "v4" + type: "security" + description: "No hardcoded secrets in changed files" + command: "rg -i '(password|secret|api[_-]?key)\\s*=\\s*[\"'']\\w' src/errors/ src/services/documents/" + expected: "No matches found" + constraints: + approvals_required: + - "error_envelope" + non_goals: + - "Refactoring the processor's happy path logic" + - "Adding error codes to other modules" + info_sources: + - "docs/error-handling.md" + - "https://internal.wiki/error-handling-standards" + - "src/errors/base-error.ts (existing base class)" + notes: > + Chose a flat enum over a class hierarchy to keep things simple. The error code + enum can be extended later when other modules adopt the same pattern. Considered + using numeric codes but string enums are more readable in logs and debuggers. + +planning_log: + - timestamp: "2026-02-18T09:15:00Z" + actor: "agent" + summary: "Identified processor.ts as primary target. Found 8 throw statements using raw strings." + notes: "Searched with: rg 'throw new Error' src/services/documents/" + - timestamp: "2026-02-18T09:40:00Z" + actor: "agent" + summary: "Confirmed src/errors/ has base helpers. Proposed enum + error class approach." + notes: "BaseError class exists at src/errors/base-error.ts with code property pattern" + - timestamp: "2026-02-18T10:05:00Z" + actor: "user" + summary: "User confirmed no schema changes needed. No downstream string matching on error messages." + - timestamp: "2026-02-18T10:30:00Z" + actor: "agent" + summary: "Locked three-phase plan: define codes, migrate processor, update tests. Spec ready for review." + notes: "Moved from two-phase to three-phase after realizing test updates are substantial enough to warrant isolation" + +phases: + - id: "phase1" + name: "Define error codes and error class" + objective: "Create the DocumentErrorCode enum and DocumentProcessingError class in src/errors/" + changes: + - file: "src/errors/codes.ts" + action: "create" + lines: "all" + content_spec: | + Export a DocumentErrorCode string enum with values: + INVALID_FORMAT, PARSE_FAILED, SIZE_EXCEEDED, ENCODING_UNSUPPORTED, + PERMISSION_DENIED, STORAGE_UNAVAILABLE, TEMPLATE_MISSING, TIMEOUT. + Each value should be a SCREAMING_SNAKE string matching the enum key. + - file: "src/errors/document-error.ts" + action: "create" + lines: "all" + content_spec: | + Export DocumentProcessingError extending Error. + Constructor accepts (code: DocumentErrorCode, message: string, context?: Record). + Exposes readonly code, context properties. Sets name to 'DocumentProcessingError'. + Re-export DocumentErrorCode for convenience. + acceptance_criteria: + - id: "ac1_1" + type: "compile" + description: "New files compile without errors" + command: "npx tsc --noEmit src/errors/codes.ts src/errors/document-error.ts" + expected: "Exit code 0" + validation: "automated" + result: + status: "pass" + timestamp: "2026-02-18T11:45:00Z" + output: "tsc completed with exit code 0" + notes: "Clean compile, no warnings" + - id: "ac1_2" + type: "test" + description: "Error class instantiation works correctly" + command: "npm test -- --filter document-error" + expected: "Error instances carry correct code and extend Error" + validation: "automated" + result: + status: "pass" + timestamp: "2026-02-18T11:50:00Z" + output: "2 tests passed" + - id: "ac1_3" + type: "documentation" + description: "Error codes are documented in docs/error-handling.md" + validation: "manual" + result: + status: "pass" + timestamp: "2026-02-18T12:00:00Z" + notes: "Added table of error codes with descriptions to error-handling.md" + status: "completed" + + - id: "phase2" + name: "Migrate processor error paths" + objective: "Replace all raw throws in processor.ts with DocumentProcessingError using appropriate codes" + dependencies: + - "phase1" + changes: + - file: "src/services/documents/processor.ts" + action: "update" + lines: "45-120" + content_spec: | + Import DocumentProcessingError and DocumentErrorCode from src/errors. + Replace each `throw new Error("...")` with the appropriate + `throw new DocumentProcessingError(DocumentErrorCode.X, message, { context })`. + Map each existing error string to the matching enum value: + - "Invalid document format" -> INVALID_FORMAT + - "Failed to parse document" -> PARSE_FAILED + - "Document exceeds size limit" -> SIZE_EXCEEDED + - "Unsupported encoding" -> ENCODING_UNSUPPORTED + - "Permission denied" -> PERMISSION_DENIED + - "Storage service unavailable" -> STORAGE_UNAVAILABLE + - "Template not found" -> TEMPLATE_MISSING + - "Processing timeout" -> TIMEOUT + - file: "src/errors/index.ts" + action: "update" + lines: "1-10" + content_spec: | + Add re-exports for DocumentErrorCode and DocumentProcessingError + so they can be imported from 'src/errors' directly. + acceptance_criteria: + - id: "ac2_1" + type: "boundary" + description: "No raw Error throws remain in processor.ts" + command: "rg -c 'throw new Error' src/services/documents/processor.ts" + expected: "No matches (exit code 1)" + validation: "automated" + result: + status: "pass" + timestamp: "2026-02-18T13:15:00Z" + output: "exit code 1 - no matches" + notes: "All 8 throw sites migrated" + - id: "ac2_2" + type: "compile" + description: "Processor compiles with new error imports" + command: "npx tsc --noEmit src/services/documents/processor.ts" + expected: "Exit code 0" + validation: "automated" + result: + status: "pass" + timestamp: "2026-02-18T13:18:00Z" + output: "tsc completed with exit code 0" + - id: "ac2_3" + type: "security" + description: "No hardcoded secrets introduced" + command: "rg -i '(password|secret|api[_-]?key)\\s*=\\s*[\"'']\\w' src/services/documents/processor.ts" + expected: "No matches" + validation: "automated" + result: + status: "pass" + timestamp: "2026-02-18T13:20:00Z" + output: "No matches found" + status: "completed" + + - id: "phase3" + name: "Update tests to assert on error codes" + objective: "Migrate test assertions from message matching to code matching and add coverage for each error code" + dependencies: + - "phase2" + changes: + - file: "src/services/documents/processor.test.ts" + action: "update" + lines: "all" + content_spec: | + Import DocumentErrorCode and DocumentProcessingError. + For each error-path test: + - Replace `.toThrow("message")` with a catch block that asserts + `error instanceof DocumentProcessingError` and + `error.code === DocumentErrorCode.X`. + - Verify error.context contains expected metadata where applicable. + - Add one new test per error code to confirm the correct code is thrown + for each failure scenario. + - file: "src/errors/document-error.test.ts" + action: "update" + lines: "all" + content_spec: | + Add tests for edge cases: missing context, serialization, + instanceof checks, and name property. + acceptance_criteria: + - id: "ac3_1" + type: "test" + description: "All processor tests pass with code-based assertions" + command: "npm test -- --filter documents" + expected: "All tests pass, 0 failures" + validation: "automated" + result: + status: "pass" + timestamp: "2026-02-18T14:28:00Z" + output: "18 tests passed, 0 failed" + notes: "Added 8 new tests (one per error code), updated 6 existing tests" + - id: "ac3_2" + type: "test" + description: "Full test suite passes with no regressions" + command: "npm test" + expected: "Exit code 0" + validation: "automated" + result: + status: "pass" + timestamp: "2026-02-18T14:35:00Z" + output: "142 tests passed, 0 failed, 0 skipped" + - id: "ac3_3" + type: "integration" + description: "Document upload endpoint returns structured error on invalid input" + command: "npm run test:integration -- --filter document-upload" + expected: "Exit code 0" + validation: "automated" + result: + status: "pass" + timestamp: "2026-02-18T14:38:00Z" + output: "3 integration tests passed" + - id: "ac3_4" + type: "custom" + description: "Error code coverage matches throw site count" + validation: "manual" + result: + status: "pass" + timestamp: "2026-02-18T14:40:00Z" + notes: "8 error codes defined, 8 throw sites migrated, 8 dedicated test cases added - 1:1:1 coverage" + status: "completed" + +rollback: + strategy: "per_phase" + commands: + phase1: "git checkout HEAD -- src/errors/codes.ts src/errors/document-error.ts" + phase2: "git checkout HEAD -- src/services/documents/processor.ts src/errors/index.ts" + phase3: "git checkout HEAD -- src/services/documents/processor.test.ts src/errors/document-error.test.ts" + +self_eval: + completeness: 3 + architecture_fidelity: 3 + spec_alignment: 2 + validation_depth: 2 + total: 10 + notes: | + All 8 error paths migrated with 1:1 enum coverage. Error class follows existing + BaseError pattern in the codebase. Tests cover every error code individually plus + integration test for the upload endpoint. No deviations from spec. + second_pass_performed: false + +deviations: [] + +metadata: + estimated_effort_hours: 2.5 + actual_effort_hours: 3.0 + ai_model: "claude-opus-4-6" + react_cycles: 12 + tags: + - "error-handling" + - "typescript" + - "refactor" diff --git a/.ai/specs/archive/2026-04/issue-2.yaml b/.ai/specs/archive/2026-04/issue-2.yaml new file mode 100644 index 0000000..6dd16d9 --- /dev/null +++ b/.ai/specs/archive/2026-04/issue-2.yaml @@ -0,0 +1,114 @@ +spec_version: "1.1" +task_id: issue-2 +created: "2026-04-21T00:00:00Z" +updated: "2026-04-21T04:22:50Z" +status: "completed" +task: + title: "The issue asks for a docs-only clarification that PR triage comments can run against live PRs, including draft PRs opened by automaton lanes." + summary: >- + Update the public automaton flow docs to explicitly state that PR triage + comments can run against live PRs, including draft PRs opened by automaton + lanes, while keeping the lane docs-only and leaving workflow logic and + secret handling unchanged. + size: micro + risk_level: low + context: + source: github_issue + source_id: "2" + source_url: "https://github.com/nilstate/aster/issues/2" + target_repo: "nilstate/aster" + repo_context: "target_repo=nilstate/aster ; git=detached@a817b040f322 ; paths=README.md, package.json, .ai/config.yaml ; root_scripts=check, docs:build, docs:ci, shakeout:local, site:build, site:check, site:ci" + files_impacted: + - docs/flows.md + - .ai/specs/active/issue-2.yaml + invariants: + - "Keep the remediation docs-only in repo code terms: do not modify workflow logic, secret handling, or unrelated files." + - "The clarification must be explicit about live PRs and draft PRs opened by automaton lanes." + - "Use the existing docs build validation from package.json: npm run docs:build." + assumptions: + - "docs/flows.md is the primary target because the issue body identifies it as a likely relevant public page for automaton flow behavior." + - "If implementation evidence shows the clarification belongs in a different docs file, revise the spec before mutation instead of widening scope ad hoc." + objectives: + - "Add one clear public-docs statement covering PR triage comments on live PRs, including draft PRs opened by automaton lanes." + - "Keep the implementation docs-only apart from the lifecycle-managed active scafld spec." + - "Preserve a passing docs build with npm run docs:build." + touchpoints: + - path: docs/flows.md + description: "Public automaton flow documentation that will receive the behavior clarification." + - path: .ai/specs/active/issue-2.yaml + description: "Lifecycle-managed scafld spec recorded after start for audit and archival." + acceptance: + definition_of_done: + - "docs/flows.md explicitly states that PR triage comments can run against live PRs, including draft PRs opened by automaton lanes." + - "The lane remains docs-only and does not modify workflow logic or secret-handling surfaces." + - "`npm run docs:build` passes after the docs update." + validation: + - command: 'test -n "$(git diff --name-only -- docs/flows.md)"' + purpose: "Confirm the targeted docs file is modified in the working tree." + - command: 'grep -F "PR triage comments can run against live PRs, including draft PRs opened by automaton lanes." docs/flows.md' + purpose: "Confirm the exact clarification text is present in the targeted docs file." + - command: 'test -z "$(git diff --name-only -- .github)"' + purpose: "Confirm no workflow files were modified." + - command: 'test -z "$(git diff --name-only -- .ai/config.yaml)"' + purpose: "Confirm the notable automation config path was not modified." + - command: 'npm run docs:build' + purpose: "Confirm the docs build still succeeds." +planning_log: + - timestamp: "2026-04-21T00:00:00Z" + actor: "builder" + summary: "Grounded the task in github issue #2 and kept scope to a docs-only clarification plus the lifecycle-managed active spec." + - timestamp: "2026-04-21T00:00:00Z" + actor: "builder" + summary: "Selected docs/flows.md as the primary target from the issue's listed likely pages and used the existing docs:build script from package.json as the validation hook." + - timestamp: "2026-04-21T04:20:58Z" + actor: "cli" + summary: "Spec approved" + - timestamp: "2026-04-21T04:20:58Z" + actor: "cli" + summary: "Execution started" + - timestamp: "2026-04-21T04:22:50Z" + actor: "cli" + summary: "Spec completed" +phases: + - id: phase1 + name: "Clarify PR triage coverage in public docs" + objective: "Document that PR triage comments can operate on live PRs, including draft PRs opened by automaton lanes, without changing workflow behavior." + changes: + - file: docs/flows.md + summary: "Add one explicit clarification sentence about PR triage comment coverage for live and draft PRs." + - file: .ai/specs/active/issue-2.yaml + summary: "Persist the approved scafld spec that governs this bounded docs-only lane." + acceptance_criteria: + - 'docs/flows.md contains the exact sentence "PR triage comments can run against live PRs, including draft PRs opened by automaton lanes."' + - "No files under .github are modified by the remediation." + - "`npm run docs:build` succeeds after the docs update." + status: planned +rollback: + strategy: "If the clarification is incorrect, misplaced, or the docs build fails, restore the targeted docs file and remove the lifecycle-managed active spec before abandoning or re-authoring the lane." + commands: + - "git restore --worktree --staged docs/flows.md" + - "rm -f .ai/specs/active/issue-2.yaml" + +review: + timestamp: "2026-04-21T04:21:45Z" + verdict: "pass" + review_rounds: 1 + reviewer_mode: "fresh_agent" + reviewer_session: "rx_a8725685eb69443bb7ef1a8774ea5b2f:reviewer-boundary" + round_status: "completed" + override_applied: false + override_reason: null + override_confirmed_at: null + passes: + - id: spec_compliance + result: "pass" + - id: scope_drift + result: "pass" + - id: regression_hunt + result: "pass" + - id: convention_check + result: "pass" + - id: dark_patterns + result: "pass" + blocking_count: 0 + non_blocking_count: 0 diff --git a/docs/flows.md b/docs/flows.md index 62005b6..336dc03 100644 --- a/docs/flows.md +++ b/docs/flows.md @@ -24,8 +24,9 @@ This lane has two entry modes: collaboration/thread-teaching issues are recognized as approval records and skipped before objective triage begins 2. PR mode builds a live PR snapshot, runs it through `github-triage`, and - posts a maintainer comment back to the PR. Public-value and replay gates - block low-signal or duplicate comments for the same head SHA. Generated + posts a maintainer comment back to the PR. + PR triage comments can run against live PRs, including draft PRs opened by automaton lanes. + Public-value and replay gates block low-signal or duplicate comments for the same head SHA. Generated derived-state refresh PRs are blocked before model work because they are review surfaces, not new triage subjects From c976f308a577932c2f200b6d94ec270f6673f0cf Mon Sep 17 00:00:00 2001 From: kam Date: Tue, 21 Apr 2026 14:27:23 +1000 Subject: [PATCH 2/2] chore(issue): drop scafld runtime bundle --- .ai/prompts/harden.md | 84 --- .ai/scafld/OPERATORS.md | 131 ----- .ai/scafld/README.md | 72 --- .ai/scafld/config.yaml | 316 ----------- .ai/scafld/manifest.json | 49 -- .ai/scafld/prompts/exec.md | 307 ----------- .ai/scafld/prompts/harden.md | 84 --- .ai/scafld/prompts/plan.md | 203 ------- .ai/scafld/prompts/review.md | 169 ------ .ai/scafld/schemas/spec.json | 514 ------------------ .ai/scafld/specs/README.md | 99 ---- .../specs/examples/add-error-codes.yaml | 365 ------------- 12 files changed, 2393 deletions(-) delete mode 100644 .ai/prompts/harden.md delete mode 100644 .ai/scafld/OPERATORS.md delete mode 100644 .ai/scafld/README.md delete mode 100644 .ai/scafld/config.yaml delete mode 100644 .ai/scafld/manifest.json delete mode 100644 .ai/scafld/prompts/exec.md delete mode 100644 .ai/scafld/prompts/harden.md delete mode 100644 .ai/scafld/prompts/plan.md delete mode 100644 .ai/scafld/prompts/review.md delete mode 100644 .ai/scafld/schemas/spec.json delete mode 100644 .ai/scafld/specs/README.md delete mode 100644 .ai/scafld/specs/examples/add-error-codes.yaml diff --git a/.ai/prompts/harden.md b/.ai/prompts/harden.md deleted file mode 100644 index ed6041c..0000000 --- a/.ai/prompts/harden.md +++ /dev/null @@ -1,84 +0,0 @@ -# AI AGENT — HARDEN MODE - -**Status:** ACTIVE -**Mode:** HARDEN -**Output:** Append a round to `harden_rounds` in the spec; update `harden_status`. -**Do NOT:** Modify code outside the spec file while hardening. - ---- - -## Mission - -Interrogate the draft spec relentlessly until the operator and agent reach shared understanding. Walk down each branch of the design tree, resolving dependencies between decisions one-by-one — upstream choices first, so downstream questions are not wasted on premises that may shift. Stop when the operator says so, or when you run out of grounded questions. - -Harden is OPTIONAL and operator-driven. `scafld approve` does NOT gate on harden status. The operator runs `scafld harden ` when they want to stress-test a draft; they can skip it for trivial or well-understood specs. - ---- - -## Grounding Contract (load-bearing — read carefully) - -**Every question you emit MUST carry a `grounded_in` value matching EXACTLY ONE of these three patterns:** - -- `spec_gap:` — a TODO, `?`, empty array, vague clause, or internal contradiction at the named spec field. Example: `spec_gap:task.context.files_impacted`. -- `code::` — a symbol or location verified by `Read` or `Grep` in the CURRENT session before the question is emitted. You must actually look at the file. Example: `code:cli/scafld:1152`. -- `archive:` — a precedent in `.ai/specs/archive/` that bears on the current decision. Example: `archive:configurable-review-pipeline`. - -**Forbidden:** - -- Questions about behaviour the spec already answers. -- Citations to files you have not verified in this session. -- Recommended answers without their own citation. -- Invented file paths, function names, or archive task_ids. - -If you cannot produce a grounded question, stop. Do not invent one to pad the round. - ---- - -## Question Loop - -Ask ONE question at a time. For each question, provide: - -- The question itself (specific, answerable). -- `grounded_in` using one of the three patterns above. -- A **recommended answer** with its own citation (code, spec section, or archive). -- An `if_unanswered` default — what to write into the spec if the operator does not answer. This lets the loop terminate on a single side. - -Cap at `max_questions_per_round` from `.ai/config.yaml` (default 8). If you reach the cap without resolving the tree, stop and let the operator decide whether to start another round. - -Dependency ordering: before asking a downstream question, confirm its upstream premise is settled. If you ask "how does phase 3 validate X" before confirming "does the spec actually do X in phase 2", you are wasting the round. - ---- - -## Termination - -The loop ends when ANY of these happens: - -- Operator types `done` or `stop`. -- You run out of grounded questions (your three patterns are exhausted). -- You hit `max_questions_per_round`. - -There is no in-prompt `skip` keyword. If the operator does not want to harden, they simply do not run `scafld harden`. - ---- - -## Output Contract - -Write your round into the spec's `harden_rounds` array using the schema in `.ai/schemas/spec.json`. Each round: - -```yaml -harden_rounds: - - round: 1 - started_at: "2026-04-20T15:00:00Z" - ended_at: "2026-04-20T15:12:00Z" - outcome: "in_progress" # or passed, abandoned - questions: - - question: "Which module owns the session cleanup hook?" - grounded_in: "code:src/auth/session.ts:84" - recommended_answer: "src/auth/session.ts:cleanupSession (already defined)" - if_unanswered: "Default to existing cleanupSession; flag for confirmation." - answered_with: "(operator fills in)" -``` - -While the loop runs, set top-level `harden_status: "in_progress"`. The operator finalises a satisfactory round by running `scafld harden --mark-passed` — do NOT set `harden_status: passed` from the prompt loop. - -Re-running `scafld harden` on a spec that is already `passed` resets status to `in_progress` and appends a new round; prior rounds are preserved as audit trail. diff --git a/.ai/scafld/OPERATORS.md b/.ai/scafld/OPERATORS.md deleted file mode 100644 index 5a09f54..0000000 --- a/.ai/scafld/OPERATORS.md +++ /dev/null @@ -1,131 +0,0 @@ -# scafld — Operator Cheat Sheet - -A short, human-friendly guide for working with scafld task specs. -For full details, see `.ai/README.md` and `.ai/specs/README.md`. - ---- - -## 1. Tiny Change (Micro/Small, Low Risk) - -Use this for trivial, low-risk edits (comments, copy tweaks, tiny refactors). - -- In the spec: - - `task.size: "micro"` or `"small"` - - `task.risk_level: "low"` - - Optionally set `task.acceptance.validation_profile: "light"` -- Workflow: - - Plan: generate/update spec under `.ai/specs/drafts/` - - Approve: move to `.ai/specs/approved/` and set `status: "approved"` - - Execute: move to `.ai/specs/active/` and set `status: "in_progress"` - - Complete: move to `.ai/specs/archive/YYYY-MM/` and set `status: "completed"` - ---- - -## 2. Normal Task (Small/Medium, Medium Risk) - -Use this for typical feature work and non-trivial refactors. - -- In the spec: - - `task.size: "small"` or `"medium"` - - `task.risk_level: "medium"` - - Usually `task.acceptance.validation_profile: "standard"` -- Workflow: - - Plan: ensure `task.acceptance.definition_of_done` and `phases[*].acceptance_criteria` tell the same story. - - Approve: move to approved folder - - Execute: run all `acceptance_criteria` plus per-phase validation - - Complete: run full standard profile validation before archiving - ---- - -## 3. Big Change (Medium/Large, High Risk) - -Use this for high-impact work (auth, persistence, complex refactors). - -- In the spec: - - `task.size: "medium"` or `"large"` - - `task.risk_level: "high"` - - Usually `task.acceptance.validation_profile: "strict"` -- Workflow: - - Plan: - - Explicitly call out invariants and risks - - Use multiple phases with narrow scopes and strong acceptance criteria - - Approve: move to approved folder - - Execute: run all per-phase checks plus full `strict` profile - - Complete: thorough validation before archiving - ---- - -## 4. Quick Commands Reference - -```bash -scafld new my-task -t "My feature" -s small -r low # scaffold spec -scafld list # show all specs -scafld list active # filter by status -scafld status my-task # show details + phase progress -scafld validate my-task # check against schema -scafld harden my-task # optional: interrogate draft one grounded question at a time -scafld harden my-task --mark-passed # close the latest hardening round -scafld approve my-task # drafts/ -> approved/ (does not require harden) -scafld start my-task # approved/ -> active/ -scafld exec my-task # run acceptance criteria, record results -scafld exec my-task -p phase1 # run criteria for one phase only -scafld audit my-task # compare spec files vs git diff -scafld audit my-task -b main # audit against specific base ref -scafld diff my-task # show git history for spec -scafld review my-task # run configured automated passes + scaffold Review Artifact v3 -scafld complete my-task # read review, record verdict, archive (requires review) -scafld complete my-task --human-reviewed --reason "manual audit" # exceptional audited override when the review gate is blocked -scafld fail my-task # active/ -> archive/ (failed) -scafld cancel my-task # active/ -> archive/ (cancelled) -scafld report # aggregate stats across all specs -``` - ---- - -## 5. Validation Profiles - -| Profile | When to Use | What Runs | -|---------|-------------|-----------| -| `light` | micro/small, low risk | compile, acceptance items, perf eval | -| `standard` | small/medium, medium risk | compile, tests, lint, typecheck, security, perf eval | -| `strict` | medium/large, high risk | all standard checks + broader coverage | - ---- - -## 6. Status Lifecycle - -``` -draft → under_review → approved → in_progress → review → completed - ↓ ↓ - (blocked) failed - ↓ ↑ - (resume) fix + re-review -``` - ---- - -## 7. Review & Completion Workflow - -After execution, before completing: - -```bash -scafld review my-task # runs automated passes, scaffolds adversarial review - # reviewer fills in findings + Review Artifact v3 metadata in .ai/reviews/my-task.md -scafld complete my-task # reads review, records verdict, archives - # refuses if the latest review round is missing, malformed, incomplete, or failed -scafld complete my-task --human-reviewed --reason "manual audit" - # exceptional audited override; requires interactive confirmation -``` - -Review rounds accumulate — each `scafld review` appends a numbered Review Artifact v3 section with per-pass `pass_results`. The default five-layer pipeline is `spec_compliance`, `scope_drift`, `regression_hunt`, `convention_check`, and `dark_patterns`, ordered by explicit `order` fields in `.ai/config.yaml`. Prior rounds provide context for subsequent reviewers and make review provenance visible. - ---- - -## 8. Tips - -- **Always read the spec before executing** — understand what you're building -- **Keep phases small** — easier to validate and rollback -- **Run `scafld review` before completing** — the adversarial review catches what acceptance criteria miss -- **Review in a fresh session when possible** — avoids confirmation bias from the execution session -- **Self-eval honestly** — the 7/10 threshold keeps quality high; 10/10 requires justification -- **Archive completed specs** — they're your project history diff --git a/.ai/scafld/README.md b/.ai/scafld/README.md deleted file mode 100644 index f29b1b5..0000000 --- a/.ai/scafld/README.md +++ /dev/null @@ -1,72 +0,0 @@ -# scafld - Planning & Execution Framework - -**Version:** 1.0 - -scafld is a spec-driven framework for AI agent task planning and execution. Every task becomes a machine-readable YAML specification that flows through a defined lifecycle: plan, approve, execute, archive. - ---- - -## How It Works - -1. **Plan:** AI generates a task spec in `.ai/specs/drafts/` via conversational ReAct loop -2. **Harden (optional):** `scafld harden ` interrogates the draft one grounded question at a time. Every question and recommended answer cites a spec gap, a verified code location, or an archived precedent. Run on high-risk or ambiguous specs; skip on trivial ones. -3. **Approve:** Developer reviews and moves spec to `.ai/specs/approved/`. Approve does NOT consult harden status. -4. **Execute:** AI picks up the approved spec, executes phases, validates at each checkpoint -5. **Review:** Adversarial review finds what execution missed — `scafld review` runs the configured `spec_compliance` and `scope_drift` checks, scaffolds Review Artifact v3, and prepares the adversarial `regression_hunt`, `convention_check`, and `dark_patterns` passes in the latest round -6. **Archive:** Completed specs move to `.ai/specs/archive/YYYY-MM/` with truthful review results recorded, or a human-reviewed override audited explicitly when the gate is blocked - -The approval gate is the human oversight boundary. The review gate is the quality boundary. During execution, the agent operates autonomously through all phases, pausing only when blocked or deviating from the spec. A normal completion path still stays agent-driven; the human-reviewed override is an exceptional audited escape hatch, not the default workflow. - -The default review topology lives in `config.yaml` and uses five ordered built-in passes: `spec_compliance`, `scope_drift`, `regression_hunt`, `convention_check`, and `dark_patterns`. Review Artifact v3 stores per-pass `pass_results`, reviewer provenance, and round status for that configured topology. - ---- - -## Directory Structure - -``` -.ai/ -├── README.md # This file -├── config.yaml # Global configuration (invariants, validation, rubric) -├── prompts/ -│ ├── plan.md # Planning mode instructions -│ ├── exec.md # Execution mode instructions -│ └── review.md # Adversarial review mode instructions -├── reviews/ # Review findings per spec (gitignored) -├── schemas/ -│ └── spec.json # JSON schema for task specifications -├── specs/ # Task specs organized by lifecycle status -│ ├── README.md # Spec workflow and naming conventions -│ ├── drafts/ # status: draft | under_review -│ ├── approved/ # status: approved -│ ├── active/ # status: in_progress -│ └── archive/YYYY-MM/ # status: completed | failed | cancelled -├── playbooks/ # Reusable workflow templates (optional) -└── logs/ # Execution logs (optional, supplementary) -``` - ---- - -## Key Files - -| File | Purpose | -|------|---------| -| `config.yaml` | Invariants, validation profiles, rubric weights, safety rules | -| `prompts/plan.md` | System prompt for planning mode agents | -| `prompts/exec.md` | System prompt for execution mode agents | -| `prompts/review.md` | System prompt for adversarial review mode | -| `schemas/spec.json` | JSON schema for spec validation | -| `specs/README.md` | Spec directory structure, naming, and workflow | - ---- - -## Related Docs - -- [AGENTS.md](../AGENTS.md) - High-level AI agent policies -- [OPERATORS.md](OPERATORS.md) - Human-facing cheat sheet for working with specs -- [CONVENTIONS.md](../CONVENTIONS.md) - Coding standards and patterns - ---- - -## License - -MIT License - Free to use, modify, and distribute. diff --git a/.ai/scafld/config.yaml b/.ai/scafld/config.yaml deleted file mode 100644 index e4124b1..0000000 --- a/.ai/scafld/config.yaml +++ /dev/null @@ -1,316 +0,0 @@ -# scafld Configuration -# Version: 1.1 -# Purpose: Machine-readable control file for AI coding agents - -version: "1.0" - -# Status lifecycle: See specs/README.md for canonical state machine and transitions - -# ============================================================================= -# INVARIANTS (immutable during session) -# ============================================================================= -invariants: - # CUSTOMIZE: Replace these with your project's architectural invariants. - # These names are referenced in specs (context.invariants) and enforced during execution. - canonical: - - domain_boundaries # Respect layer separation - - error_envelope # Consistent error format - - no_legacy_code # No dual-reads, dual-writes, or runtime fallbacks - - no_test_logic_in_production # Keep test-only code in test files - - public_api_stable # Public APIs require approval to change - - config_from_env # Configuration from environment, never hardcoded - - # Code quality policies - no_legacy_code: true - no_test_logic_in_production: true - - # Change control - public_api_changes: require_approval # schemas, migrations, HTTP/event shapes - - # See also: ../CONVENTIONS.md for detailed coding standards - -# ============================================================================= -# MODES (planning vs execution) -# ============================================================================= -modes: - planning: - # Output: generate .ai/specs/{task-id}.yaml - output_format: spec_file - - # Requirements for a valid plan - require_task_outline: true - require_touchpoints: true - require_acceptance_checklist: true - - # Quality gate - self_eval_threshold: 7 - - # ReAct behavior - exploration_depth: thorough - show_reasoning: true - - execution: - # Input: load approved .ai/specs/{task-id}.yaml - input_format: spec_file - require_approval: true - - # Checkpoint frequency - checkpoint_frequency: per_phase - - # Quality controls - self_review: mandatory - rollback_on_fail: true - strict_spec_adherence: true - - # Output style - progress_format: concise - show_reasoning: true - -# ============================================================================= -# HARDEN -# ============================================================================= -# Optional pre-approval interrogation phase. Operator-driven: `scafld approve` -# does NOT gate on harden status. Only non-gating knobs live here. -harden: - max_questions_per_round: 8 # cap per `scafld harden` invocation - grounding_required: true # forbid ungrounded questions in the prompt - -# ============================================================================= -# VALIDATION PIPELINES -# ============================================================================= -# CUSTOMIZE: Replace placeholder commands below with your actual build/test/lint commands. -validation: - # Run after each phase (fast, targeted) - # Placeholders: - # - {spec_pattern}: test file or example filter for the current phase - # - {changed_files}: union of phases[N].changes[*].file for the current phase - per_phase: - - id: compile_check - type: command - command: "echo 'Replace with your compile/build check command'" - required: true - - - id: targeted_tests - type: command - command: "echo 'Replace with your test command, e.g.: npm test -- {spec_pattern}'" - required: true - - - id: boundary_check - type: command - command: "echo 'Replace with your boundary/integration check, e.g.: cross-module dependency scan'" - description: "Verify no cross-module side effects (used by strict profile)" - required: true - - - id: acceptance_item_check - type: spec_validation - description: "Verify all phase acceptance_criteria pass" - required: true - - # Run once before commit (comprehensive) - pre_commit: - - id: full_test_suite - type: command - command: "echo 'Replace with your full test suite command'" - required: true - - - id: linter_suite - type: command - command: "echo 'Replace with your linter command'" - required: false # warn only - - - id: typecheck - type: command - command: "echo 'Replace with your typecheck command'" - required: true - - - id: security_scan - type: command - command: "rg -i '(password|secret|api[_-]?key)\\s*=\\s*[\"']\\w' --type-add 'code:*.{js,ts,py,rb,go,java}' --type code" - description: "Detect hardcoded secrets" - required: true - expected: "no matches" - - - id: perf_eval - type: self_evaluation - description: "AI scores its work against rubric" - threshold: 7 - required: true - - # Validation profiles map task risk/size to concrete per_phase + pre_commit steps. - # EXEC agents should prefer `task.acceptance.validation_profile` when present; - # otherwise derive a profile from `task.risk_level`: - # low → light, medium → standard, high → strict. - profiles: - # Light: compile + acceptance only, quick feedback loop - light: - per_phase: ["compile_check", "acceptance_item_check"] - pre_commit: ["perf_eval"] - # Standard: adds targeted tests per phase, full validation at commit - standard: - per_phase: ["compile_check", "targeted_tests", "acceptance_item_check"] - pre_commit: ["full_test_suite", "linter_suite", "typecheck", "security_scan", "perf_eval"] - # Strict: broader test coverage per phase (boundary check ensures no - # cross-module side effects), plus all pre_commit checks from standard - strict: - per_phase: ["compile_check", "targeted_tests", "boundary_check", "acceptance_item_check"] - pre_commit: ["full_test_suite", "linter_suite", "typecheck", "security_scan", "perf_eval"] - -# ============================================================================= -# SELF-EVALUATION RUBRIC -# ============================================================================= -rubric: - # Scoring dimensions (0-10 scale) - completeness: - weight: 3 - description: "0=partial, 1=meets ask, 2=edge cases, 3=edge cases + conventions" - - architecture_fidelity: - weight: 3 - description: "0=unclear, 1=respects boundaries, 2=uses patterns, 3=improves separation" - - spec_alignment: - weight: 2 - description: "0=not checked, 1=aligned, 2=proposed improvements" - - validation_depth: - weight: 2 - description: "0=missing, 1=targeted, 2=targeted + broader checks" - - # Minimum acceptable score - threshold: 7 - - # Action on low score - on_below_threshold: "perform_second_pass" - -# ============================================================================= -# ADVERSARIAL REVIEW -# ============================================================================= -# Mandatory review gate before scafld complete can archive a spec. -# Every spec gets the same review — no profiles. -# Recommended: run the agent review in a fresh context/session. -review: - # Review pipeline is built from named built-in passes only. - # Ordering is explicit; scafld sorts by `order`, not mapping insertion luck. - automated_passes: - spec_compliance: - order: 10 - title: "Spec Compliance" - description: "Re-run acceptance criteria to verify code satisfies the spec" - scope_drift: - order: 20 - title: "Scope Drift" - description: "Compare spec scope vs actual git diff and flag undeclared changes" - - adversarial_passes: - regression_hunt: - order: 30 - title: "Regression Hunt" - description: "Trace callers, importers, and downstream consumers for regressions" - convention_check: - order: 40 - title: "Convention Check" - description: "Check changed code against CONVENTIONS.md and AGENTS.md" - dark_patterns: - order: 50 - title: "Dark Patterns" - description: "Hunt for subtle bugs, hardcodes, races, and safety gaps" - -# ============================================================================= -# REACT PATTERN (reasoning + acting) -# ============================================================================= -react: - enabled: true - - # Cycle structure - cycle: - - thought: "Analyze the task/phase objective" - - action: "Search codebase, read files, or apply changes" - - observation: "Capture results, check outputs" - - thought: "Evaluate success, decide next step" - - # Reasoning visibility - log_thoughts: true - - # Iteration limits - max_cycles_per_phase: 10 - max_cycles_planning: 20 - -# ============================================================================= -# TECH STACK CONTEXT (customize for your project) -# ============================================================================= -tech_stack: - # CUSTOMIZE: Replace with your actual tech stack - backend: - language: "Your language (e.g., Python 3.11, Ruby 3.2, Go 1.21)" - framework: "Your framework (e.g., Django, Rails, FastAPI)" - - frontend: - framework: "Your framework (e.g., React, Vue, Next.js)" - typescript_version: "5.x" - - shared: - error_format: "Your error format (e.g., Problem+JSON RFC 7807)" - api_spec: "Your API spec format (e.g., OpenAPI 3.1)" - -# ============================================================================= -# REPO LAYOUT (customize for your project) -# ============================================================================= -repo_layout: - # CUSTOMIZE: Replace with your actual directory layout - backend: "backend/" - frontend: "frontend/" - specs: ".ai/specs/" - logs: ".ai/logs/" - -# ============================================================================= -# COMMUNICATION STYLE -# ============================================================================= -communication: - # Progress updates during EXEC mode - progress: - format: concise - include_reasoning: false - include_acceptance_status: true - - # When blocked - blocking_issues: - format: structured - require_recommendation: true - - # Final summary - completion: - include_perf_eval: true - include_deviations: true - include_next_actions: true - -# ============================================================================= -# SAFETY & SECURITY -# ============================================================================= -safety: - # Destructive operations - require_approval_for: - - schema_migrations - - public_api_changes - - data_deletion - - production_deployments - - # Automatic checks - prevent: - - hardcoded_secrets - - unbounded_queries - - sql_injection_patterns - - xss_vulnerabilities - -# ============================================================================= -# EXPERIMENTAL FEATURES -# ============================================================================= -experimental: - # Auto-generate acceptance criteria from natural language - auto_acceptance_criteria: true - - # Self-healing: auto-fix failed acceptance criteria (1 retry) - self_healing: true - max_healing_attempts: 1 - - # Parallel phase execution (if phases are independent) - parallel_execution: false diff --git a/.ai/scafld/manifest.json b/.ai/scafld/manifest.json deleted file mode 100644 index 0c0e759..0000000 --- a/.ai/scafld/manifest.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "managed_assets": { - ".ai/scafld/OPERATORS.md": { - "sha256": "adcfc7d3c6855592e77df1f5fa9575e51c951f27c45ea905e39edceba73a244c", - "source": ".ai/OPERATORS.md" - }, - ".ai/scafld/README.md": { - "sha256": "a2fc41a54f1a862fc197800503b0176771f7d384087c66215098d11da283f311", - "source": ".ai/README.md" - }, - ".ai/scafld/config.yaml": { - "sha256": "9242459b820acdb35cf84d080a793e916003f32211c18fb7bd3fac219567d6b6", - "source": ".ai/config.yaml" - }, - ".ai/scafld/prompts/exec.md": { - "sha256": "de8630ef115c368b3343da095d474dce28ecdaa6220eeb898c54a4aa95bc5e88", - "source": ".ai/prompts/exec.md" - }, - ".ai/scafld/prompts/harden.md": { - "sha256": "57c1c3f1bac5eba042c91c8284064bc3f89c1f296b1aedfcce5485a4ac1e26cb", - "source": ".ai/prompts/harden.md" - }, - ".ai/scafld/prompts/plan.md": { - "sha256": "5feb66faf88ec9c85ea404506f166a2c9ca763b0fba78e85952f8bc339855325", - "source": ".ai/prompts/plan.md" - }, - ".ai/scafld/prompts/review.md": { - "sha256": "61a9a8ce6b495c89993a2dd7aa81f675be9eb00cef9c82a4dcbc4a355171f73c", - "source": ".ai/prompts/review.md" - }, - ".ai/scafld/schemas/spec.json": { - "sha256": "fc3d3bd91614ff24942a1d7b9be4cfb556cecd86fc3751f295f2db1eefbc7b5e", - "source": ".ai/schemas/spec.json" - }, - ".ai/scafld/specs/README.md": { - "sha256": "e73a3c6f5d68762780ad0248510e8d96bd8685f215a2a9438b88e434a7c96b70", - "source": ".ai/specs/README.md" - }, - ".ai/scafld/specs/examples/add-error-codes.yaml": { - "sha256": "e44e65bf3547a262d114f515fe9b3dcd648789877e1958547a587619b1fb41d5", - "source": ".ai/specs/examples/add-error-codes.yaml" - } - }, - "scafld_version": "1.4.6", - "schema_version": 1, - "source_commit": "e15050de871ca3c9aba53c6199271a13140e4fcc", - "source_dirty": false, - "workspace_config_mode": "legacy_overlay" -} diff --git a/.ai/scafld/prompts/exec.md b/.ai/scafld/prompts/exec.md deleted file mode 100644 index 3150101..0000000 --- a/.ai/scafld/prompts/exec.md +++ /dev/null @@ -1,307 +0,0 @@ -# AI AGENT — EXECUTION MODE - -**Status:** ACTIVE -**Mode:** EXEC -**Input:** Approved specification file (`.ai/specs/approved/{task-id}.yaml`, promoted to `.ai/specs/active/{task-id}.yaml` when execution starts) -**Output:** Code changes, test runs, validation results - ---- - -## Mission - -You are an AI agent in **EXECUTION MODE**. Your objective is to execute an approved task specification, validating your work at every checkpoint, and delivering production-ready code. - ---- - -## Prerequisites - -Before entering execution mode: - -1. **Load Spec:** Read from `.ai/specs/approved/{task-id}.yaml` -2. **Verify Status:** `spec.status` MUST be `"approved"` -3. **Move to Active:** Move spec to `.ai/specs/active/{task-id}.yaml` -4. **Update Status:** Set `status: "in_progress"` in spec file - -If spec not in `approved/` folder or status is NOT approved: -``` -Cannot execute: Spec must be in approved/ folder with status "approved" - Check: .ai/specs/approved/{task-id}.yaml - Action: Complete planning and approval first, or move file to approved/ -``` - ---- - -## Resume Protocol - -If the spec is already in `.ai/specs/active/` with `status: "in_progress"` and some phases have `status: "completed"`: - -1. **Skip completed phases** - do not re-execute them -2. **Resume from the first phase with `status: "pending"` or `status: "failed"`** -3. If a failed phase has rollback commands, verify the rollback was applied before retrying -4. Log the resume point in the spec's `planning_log` or phase status - ---- - -## Per-Phase Execution - -For **each phase**, follow this cycle: - -### 1. Read & Plan -- Read phase objective and changes specification -- Identify files to modify and acceptance criteria to satisfy -- Predict potential issues (boundary violations, test failures) - -### 2. Apply Changes -- **Read first:** `Read(file)` to understand current state -- **Edit precisely:** Use `Edit()` with exact old_string/new_string -- **Match intent:** Does the change match `content_spec`? - -### 3. Validate -- Run ALL `acceptance_criteria` for this phase -- Record pass/fail status and output -- Update the spec's phase entry with results: - -```yaml -# Update phase status and acceptance criteria results inline -phases[N]: - status: "completed" # or "failed" - acceptance_criteria: - - id: ac1_1 - result: - status: pass - timestamp: "2025-01-17T11:45:30Z" - output: "{stdout/stderr summary}" -``` - -### 4. Decide -- **If ALL criteria pass:** Mark phase `status: "completed"`, proceed to next phase -- **If ANY criterion fails:** - 1. Attempt self-healing (1 retry max, if enabled in config) - 2. If still failing, rollback phase changes - 3. Mark phase `status: "failed"` and report to user - -Set `phases[N].status` to `"in_progress"` when you begin work on a phase -and update it to `"completed"` or `"failed"` based on acceptance criteria results. - -### Phase Logging - -After completing each phase, write a brief summary to the phase's status in the spec file. This is the primary record of execution progress. Example: - -```yaml -phases[N]: - status: "completed" - summary: "Added error constants to errors module, all 3 acceptance criteria passed" -``` - -The `.ai/logs/{task-id}.log` file is optional and supplementary - use it for detailed debugging traces when needed, but it is not required. - ---- - -## Acceptance Criteria - -For each `acceptance_criteria` item: - -```yaml -- id: ac1_1 - type: compile - command: "your-compile-command" - expected: "exit code 0" -``` - -**Common criterion types:** - -| Type | Command Example | Expected | Validation | -|------|----------------|----------|------------| -| `compile` | `your-compile-command` | `exit code 0` | Automated | -| `test` | `your-test-command {spec_pattern}` | `PASS` | Automated | -| `boundary` | `rg 'forbidden_pattern' {changed_files}` | `no matches` | Automated | -| `integration` | `your-e2e-command` | `exit code 0` | Automated | -| `security` | `rg -i 'password\\s*=\\s*"\\w+"'` | `no matches` | Automated | -| `documentation` | N/A | See `description` | Manual | -| `custom` | N/A | See `description` | Manual | - -**Placeholder Reference:** - -- **`{spec_pattern}`** - Test file path or example filter for the current phase -- **`{changed_files}`** - Union of `phases[N].changes[*].file` for the phase being validated - ---- - -## Definition-of-Done Checklist - -- Treat `task.acceptance.definition_of_done[*]` as hard requirements. -- When a DoD item is satisfied, update its `status` to `done`. -- Keep statuses in sync with reality; reviewers rely on this checklist. - -### Self-Review (Per Phase) - -After running acceptance criteria, verify: - -- [ ] All criteria passed (or failures documented) -- [ ] Update `task.acceptance.definition_of_done` entries related to this phase -- [ ] No boundary violations introduced -- [ ] Diff matches `phase.changes.content_spec` (no scope creep) -- [ ] No secrets or internal paths added - ---- - -## Final Validation (After All Phases) - -Once all phases complete, run pre-commit validation using the appropriate profile from `.ai/config.yaml`: - -- Determine profile: - - Prefer `task.acceptance.validation_profile` if set (`light | standard | strict`) - - Otherwise derive from `task.risk_level` (`low` -> `light`, `medium` -> `standard`, `high` -> `strict`) -- For the chosen profile, run the listed validation steps. - ---- - -## Adversarial Review - -After all phases complete and before `scafld complete`: - -1. Run `scafld review ` — runs automated passes (spec compliance, scope drift) and generates the review file -2. Start a **fresh agent session** when available to reduce confirmation bias -3. Read `.ai/prompts/review.md` for the review prompt and attack vectors -4. Review the spec + git diff, write findings to `.ai/reviews/{task-id}.md`, and update the latest round's review provenance metadata -5. Fix any blocking findings if needed -6. Run `scafld complete ` — reads the review, records verdict, archives - -The default Review Artifact v3 pipeline is `spec_compliance`, `scope_drift`, `regression_hunt`, `convention_check`, and `dark_patterns`. `scafld review` scaffolds the adversarial sections in configured order and expects the reviewer to update `round_status` plus per-pass `pass_results` before completion. - -`scafld complete` will **refuse to archive** if the latest review round is missing, malformed, incomplete, or failed. The only bypass is the exceptional human path: `scafld complete --human-reviewed --reason ""`, which requires interactive confirmation and records an audited override. - ---- - -## Self-Evaluation & Deviations - -After all phases and final validation: - -- Populate `self_eval` in the spec using the rubric weights from `.ai/config.yaml` -- If `total` falls below the rubric threshold, perform a second pass and set `second_pass_performed: true` -- Record any intentional deviations from invariants or the written spec in `deviations[*]` - ---- - -## Output Format - -### Progress Updates (During Execution) - -**Concise format (one line per phase):** -``` -Phase 1: Extract helpers | 4/4 criteria passed | Next: Phase 2 -Phase 2: Wire into module | 3/3 criteria passed | Next: Phase 3 -Phase 3: Add documentation | In progress... -``` - -### Blocking Issues - -If execution is blocked: -``` -Phase {N} blocked - Criterion: ac{N}_{X} - {description} - Error: {brief error message} - - Recommendation: - {One concrete solution} - - Awaiting guidance. -``` - -### Final Summary - -After all phases complete: -``` -Task complete: {task_id} - Phases: {N}/{N} completed - Acceptance: {total_passed}/{total_criteria} - PERF-EVAL: {total}/10 - Deviations: {count} - Status: {ready_for_commit | needs_review | failed} - Files changed: {count} -``` - ---- - -## Rollback Handling - -### Automatic Rollback (Acceptance Criteria Fail) - -```bash -# Execute rollback command from spec -{rollback_command} - -# Verify rollback success -git status -git diff -``` - -### Manual Rollback (User Requested) - -Revert phases in reverse order using `spec.rollback.commands`. - ---- - -## Deviations from Spec - -If you MUST deviate from the approved spec: - -1. **Pause execution** -2. **Check approval requirements** in `task.constraints.approvals_required` and `.ai/config.yaml` safety rules -3. **Document deviation** in `deviations[]` array -4. **Request approval** before proceeding - ---- - -## Self-Healing (Experimental) - -If enabled in `.ai/config.yaml` (`experimental.self_healing: true`): - -When an acceptance criterion fails: - -1. Analyze failure and identify root cause -2. Apply targeted correction -3. Re-run criterion -4. Max attempts: 1 (no infinite loops) - -If self-healing fails, proceed to rollback. - ---- - -## Exit Conditions - -### Success - -Move spec to `.ai/specs/archive/{YYYY-MM}/`, set `status: "completed"`. - -### Failure - -Move spec to `.ai/specs/archive/{YYYY-MM}/`, set `status: "failed"`, document recommendation. - -### Blocked - -Keep spec in `.ai/specs/active/`, `status: "in_progress"` (paused). Await user input. - ---- - -## Mode Constraints - -**DO:** -- Follow spec exactly (deviations require approval) -- Run all acceptance criteria after each phase -- Rollback on failure (unless self-healing succeeds) -- Update spec file with execution results - -**DO NOT:** -- Skip phases or acceptance criteria -- Make changes outside of spec.phases -- Modify approved spec structure (only update execution fields) -- Continue execution if a phase fails (without user approval) - ---- - -## Remember - -- **Validate obsessively** (acceptance criteria are non-negotiable) -- **Rollback fearlessly** (failure is safe when reversible) -- **Communicate concisely** (progress updates, not essays) diff --git a/.ai/scafld/prompts/harden.md b/.ai/scafld/prompts/harden.md deleted file mode 100644 index ed6041c..0000000 --- a/.ai/scafld/prompts/harden.md +++ /dev/null @@ -1,84 +0,0 @@ -# AI AGENT — HARDEN MODE - -**Status:** ACTIVE -**Mode:** HARDEN -**Output:** Append a round to `harden_rounds` in the spec; update `harden_status`. -**Do NOT:** Modify code outside the spec file while hardening. - ---- - -## Mission - -Interrogate the draft spec relentlessly until the operator and agent reach shared understanding. Walk down each branch of the design tree, resolving dependencies between decisions one-by-one — upstream choices first, so downstream questions are not wasted on premises that may shift. Stop when the operator says so, or when you run out of grounded questions. - -Harden is OPTIONAL and operator-driven. `scafld approve` does NOT gate on harden status. The operator runs `scafld harden ` when they want to stress-test a draft; they can skip it for trivial or well-understood specs. - ---- - -## Grounding Contract (load-bearing — read carefully) - -**Every question you emit MUST carry a `grounded_in` value matching EXACTLY ONE of these three patterns:** - -- `spec_gap:` — a TODO, `?`, empty array, vague clause, or internal contradiction at the named spec field. Example: `spec_gap:task.context.files_impacted`. -- `code::` — a symbol or location verified by `Read` or `Grep` in the CURRENT session before the question is emitted. You must actually look at the file. Example: `code:cli/scafld:1152`. -- `archive:` — a precedent in `.ai/specs/archive/` that bears on the current decision. Example: `archive:configurable-review-pipeline`. - -**Forbidden:** - -- Questions about behaviour the spec already answers. -- Citations to files you have not verified in this session. -- Recommended answers without their own citation. -- Invented file paths, function names, or archive task_ids. - -If you cannot produce a grounded question, stop. Do not invent one to pad the round. - ---- - -## Question Loop - -Ask ONE question at a time. For each question, provide: - -- The question itself (specific, answerable). -- `grounded_in` using one of the three patterns above. -- A **recommended answer** with its own citation (code, spec section, or archive). -- An `if_unanswered` default — what to write into the spec if the operator does not answer. This lets the loop terminate on a single side. - -Cap at `max_questions_per_round` from `.ai/config.yaml` (default 8). If you reach the cap without resolving the tree, stop and let the operator decide whether to start another round. - -Dependency ordering: before asking a downstream question, confirm its upstream premise is settled. If you ask "how does phase 3 validate X" before confirming "does the spec actually do X in phase 2", you are wasting the round. - ---- - -## Termination - -The loop ends when ANY of these happens: - -- Operator types `done` or `stop`. -- You run out of grounded questions (your three patterns are exhausted). -- You hit `max_questions_per_round`. - -There is no in-prompt `skip` keyword. If the operator does not want to harden, they simply do not run `scafld harden`. - ---- - -## Output Contract - -Write your round into the spec's `harden_rounds` array using the schema in `.ai/schemas/spec.json`. Each round: - -```yaml -harden_rounds: - - round: 1 - started_at: "2026-04-20T15:00:00Z" - ended_at: "2026-04-20T15:12:00Z" - outcome: "in_progress" # or passed, abandoned - questions: - - question: "Which module owns the session cleanup hook?" - grounded_in: "code:src/auth/session.ts:84" - recommended_answer: "src/auth/session.ts:cleanupSession (already defined)" - if_unanswered: "Default to existing cleanupSession; flag for confirmation." - answered_with: "(operator fills in)" -``` - -While the loop runs, set top-level `harden_status: "in_progress"`. The operator finalises a satisfactory round by running `scafld harden --mark-passed` — do NOT set `harden_status: passed` from the prompt loop. - -Re-running `scafld harden` on a spec that is already `passed` resets status to `in_progress` and appends a new round; prior rounds are preserved as audit trail. diff --git a/.ai/scafld/prompts/plan.md b/.ai/scafld/prompts/plan.md deleted file mode 100644 index 8c733bb..0000000 --- a/.ai/scafld/prompts/plan.md +++ /dev/null @@ -1,203 +0,0 @@ -# AI AGENT — PLANNING MODE - -**Status:** ACTIVE -**Mode:** PLAN -**Output:** Conversational task specification file (`.ai/specs/{task-id}.yaml`) -**Do NOT:** Modify code outside `.ai/specs/` while planning - ---- - -## Mission - -You are in **PLANNING MODE**. Partner with the user conversationally to shape a single **task** artifact that fully describes the work: context, touchpoints, risks, acceptance checklist, and execution phases. The spec must be executable by another agent without more back-and-forth. - ---- - -## Conversational ReAct Loop - -Iterate until the task artifact feels complete: - -1. **THOUGHT:** Interpret the request in repo terms. Identify unknowns. -2. **ACTION:** Gather evidence (search, read, diff) to answer the unknowns. -3. **OBSERVATION:** Capture what you learned (files, invariants, risks). -4. **THOUGHT:** Update the `task` block, acceptance, and phases. Ask clarifying questions when information is missing. -5. **REPEAT** until all required fields are filled and assumptions are explicit. - -Constraints: -- Max 20 cycles; document assumptions if still uncertain. -- Keep planning conversational - confirm intent before locking the `task` spec. -- Every update to the spec should be reflected in `planning_log`. - -**Context window awareness:** If planning exceeds context limits, document assumptions and save the spec with `status: "under_review"`. Resuming planning later is better than losing work. - ---- - -## Required Output Structure - -Produce a YAML spec conforming to `.ai/schemas/spec.json` (v1.1). - -Validation profiles, rubric weights, invariants, and safety rules are defined in `.ai/config.yaml` - reference them, don't duplicate them here. - -### Minimal Skeleton - -```yaml -spec_version: "1.1" -task_id: "{kebab-case}" -created: "{ISO-8601}" -updated: "{ISO-8601}" -status: "draft" - -task: - title: "{short heading}" - summary: "{2-3 sentence overview}" - size: "micro | small | medium | large" - risk_level: "low | medium | high" - context: - packages: ["src/module/...", "lib/..."] - files_impacted: - - path: "{relative path}" - lines: "100-150" | [100,150] | "all" - reason: "{why}" - invariants: ["domain_boundaries", ...] - related_docs: ["docs/...md"] - objectives: - - "{user goal}" - scope: - in_scope: ["..."] - out_of_scope: ["..."] - dependencies: ["..."] - assumptions: ["..."] - touchpoints: - - area: "{system/component}" - description: "{what changes here}" - risks: - - description: "{risk}" - impact: medium - mitigation: "{plan}" - acceptance: - validation_profile: "light | standard | strict" - definition_of_done: - - id: dod1 - description: "{checklist item}" - status: pending - validation: - - id: dod1 - type: documentation | compile | test | boundary | integration | security | custom - description: "{how to verify}" - command: "{optional shell command}" - expected: "{optional expectation}" - constraints: - approvals_required: ["schema_change", ...] - non_goals: ["{explicitly not doing}" ] - info_sources: ["{links or files consulted}"] - notes: "{decisions, trade-offs}" - -planning_log: - - timestamp: "{ISO-8601}" - actor: "agent" - summary: "{what changed/confirmed in this iteration}" - -phases: - - id: phase1 - name: "{phase name}" - objective: "{outcome of this phase}" - changes: - - file: "{path}" - action: create | update | delete | move - lines: "all" - content_spec: | - {narrative of edits} - acceptance_criteria: - - id: ac1_1 - type: test | compile | boundary | documentation | custom | integration | security - command: "{command if automated}" - description: "{why this check proves success}" - expected: "{result}" - status: pending - -rollback: - strategy: per_phase | atomic | manual - commands: - phase1: "git checkout HEAD -- path" - -self_eval, deviations, metadata remain as in earlier versions (fill null/defaults during planning). -``` - ---- - -## Building the `task` Block - -- **Title & summary:** Mirror the user's words; make it obvious what problem we're solving. -- **Size & risk:** Use `size` (`micro/small/medium/large`) and `risk_level` (`low/medium/high`) to communicate how heavy the change is. This guides how much validation to run and how detailed phases should be. -- **Context:** Reference actual packages/files. Keep `invariants` list aligned with `.ai/config.yaml` canonical invariants. -- **Objectives & scope:** Distinguish what we're doing vs. explicitly not doing. -- **Touchpoints:** Enumerate major systems, adapters, modules, or docs affected. This is the anchor for later validation. -- **Risks/assumptions:** Capture blockers early; if an assumption is shaky, call it out and set `status: "under_review"`. -- **Acceptance:** Treat `definition_of_done` as the non-negotiable checklist (one object per item with `id`, `description`, and default `status: pending`). `validation` entries describe how each DoD item will be verified. Optionally set `acceptance.validation_profile` to choose a validation profile; otherwise, EXEC should derive a profile from `risk_level`. -- **Constraints:** Move any approval needs here. EXEC agents must pause if `task.constraints.approvals_required` intersects `safety.require_approval_for` in `.ai/config.yaml`. - ---- - -## Phases & Acceptance Criteria - -- Each phase should map cleanly to a touchpoint or cohesive concern. -- `changes[].content_spec` should read like a design note (functions, behaviors, docs sections). -- Every phase needs at least one acceptance criterion. Use deterministic commands when possible; fall back to `documentation`/`custom` with clear reviewer instructions. -- Keep rollbacks scoped per phase unless the plan demands atomicity. - ---- - -## Planning Log - -Record significant conversational turns: - -- `summary` should capture what you agreed on (clarified scope, locked acceptance items, discovered dependency). -- If you made an assumption, log it and echo inside `task.assumptions`. -- Timestamps should be ISO-8601 (UTC). Use the order of discovery. - ---- - -## Approval Guidance - -- Ask for guidance only when you detect schema/migration/public API work. Otherwise, choose the best architecture-aligned approach and document the constraint in `task.constraints.approvals_required`. -- When explicitly punting on a higher-price option, capture the trade-off in `task.notes` or `scope.out_of_scope`. - ---- - -## Final Checklist Before Output - -- [ ] Spec validates against `.ai/schemas/spec.json` v1.1. -- [ ] `task_id` is unique (no clashes in `.ai/specs/**`). -- [ ] `task.touchpoints`, `task.acceptance.definition_of_done`, and `phases` tell the same story. -- [ ] Every assumption is documented; blockers set `status: "under_review"`. -- [ ] `planning_log` captures the major conversational steps. - ---- - -## Optional Next Step - -When planning is complete, the operator may run `scafld harden ` to interrogate the draft against grounded questions before approval. This step is optional and can be skipped by approving directly. - ---- - -## Blocked Planning Template - -If planning stops on missing info: - -``` -Warning: Planning blocked - Reason: {cannot determine X without Y} - Assumptions made: - - {assumption 1} - -Spec saved to: .ai/specs/drafts/{task-id}.yaml (status: under_review) -``` - ---- - -## Remember - -- Co-create the plan with the user - confirm direction before finalizing. -- Capture **one** high-quality plan; no more option matrices. -- Keep architecture invariants front-of-mind. -- Optimize for execution clarity: another agent should be able to pick this up and ship without guessing. diff --git a/.ai/scafld/prompts/review.md b/.ai/scafld/prompts/review.md deleted file mode 100644 index 201e5a0..0000000 --- a/.ai/scafld/prompts/review.md +++ /dev/null @@ -1,169 +0,0 @@ -# AI AGENT — REVIEW MODE - -**Mode:** REVIEW -**Input:** Spec (`.ai/specs/active/{task-id}.yaml`) + git diff -**Output:** Findings in `.ai/reviews/{task-id}.md` - ---- - -## Mission - -Find what is wrong. Not what is right. - -You are reviewing changes made during spec execution. A separate agent built this, or you did in a prior session. Either way, your job is to attack it. - -A review that finds zero issues is suspicious. Look harder. - ---- - -## Rules - -- Every finding must cite a specific file and line number -- Classify findings as **blocking** (must fix before merge) or **non-blocking** (should fix) -- Do not suggest improvements or refactors — only flag defects and omissions -- Do not modify any code — review only - ---- - -## Process - -1. Read the spec at `.ai/specs/active/{task-id}.yaml` -2. Read the git diff of all changes -3. Read `CONVENTIONS.md` and `AGENTS.md` -4. Read `.ai/reviews/{task-id}.md` — if prior review rounds exist, read what was found before. Don't re-report fixed issues. Note if a prior finding persists. -5. Attack the diff through the configured adversarial passes — by default: `regression_hunt`, `convention_check`, and `dark_patterns` -6. Write findings into the latest review section in `.ai/reviews/{task-id}.md` -7. Update the Review Artifact v3 metadata so the latest round is truthful and complete - ---- - -## Default Review Pipeline - -The default built-in five-pass pipeline in `.ai/config.yaml` is: - -- `spec_compliance` -- `scope_drift` -- `regression_hunt` -- `convention_check` -- `dark_patterns` - -`scafld review` already runs `spec_compliance` and `scope_drift` and scaffolds the adversarial sections in configured order. Your job is to complete the adversarial passes and finalize the metadata for Review Artifact v3. - -If the project has changed pass titles in `.ai/config.yaml`, follow the headings already scaffolded by `scafld review`. The built-in pass ids stay the same even if the visible section title changes. - ---- - -## Attack Vectors - -### 1. Regression Hunt (`regression_hunt`) - -For each modified file, find every caller, importer, and downstream consumer. What assumptions do they make that this change violates? - -- Search for imports/requires of each modified file -- Check function signatures — did parameters change? Did return shapes change? -- Look for duck-typing or structural assumptions that no longer hold -- Verify event listeners and subscribers still match event shapes -- Check if removed or renamed exports are still referenced elsewhere - -### 2. Convention Check (`convention_check`) - -Read `CONVENTIONS.md` and `AGENTS.md`. For each changed file, check whether the new code violates a documented rule. - -- Cite the specific convention and the specific violating line -- Don't flag style preferences — only documented, stated conventions -- Check naming patterns, layer boundaries, import rules, test patterns - -### 3. Dark Patterns (`dark_patterns`) - -For each change, actively hunt for: - -- Hardcoded values that should be dynamic or configurable -- Off-by-one errors -- Missing null/empty checks at system boundaries (user input, API responses, config values) -- Race conditions or timing issues -- Copy-paste errors (duplicated logic with subtle differences) -- Error handling gaps (unhappy paths not covered) -- Security issues (injection, XSS, auth bypass, missing authorization) - ---- - -## Severity Levels - -- **critical** — will cause runtime errors, data loss, or security vulnerability -- **high** — will cause incorrect behavior in common cases -- **medium** — will cause incorrect behavior in edge cases -- **low** — code smell, minor issue, or potential future problem - ---- - -## Output - -`scafld review` scaffolds the review file at `.ai/reviews/{task-id}.md` with numbered review sections. Fill in the latest section using the Review Artifact v3 contract: - -````markdown -## Review N — {timestamp} - -### Metadata -```json -{ - "schema_version": 3, - "round_status": "completed", - "reviewer_mode": "fresh_agent", - "reviewer_session": "session-id-or-empty-string", - "reviewed_at": "{timestamp}", - "override_reason": null, - "pass_results": { - "spec_compliance": "pass", - "scope_drift": "pass", - "regression_hunt": "pass", - "convention_check": "pass", - "dark_patterns": "pass" - } -} -``` - -### Pass Results -- spec_compliance: PASS -- scope_drift: PASS -- regression_hunt: PASS -- convention_check: PASS -- dark_patterns: PASS - -### Regression Hunt -{For each modified file, trace callers/importers. What assumptions break? -List findings or "No issues found — checked [what you checked]".} - -### Convention Check -{Read CONVENTIONS.md and AGENTS.md. Does new code violate any documented rule? -List findings or "No issues found — checked [what you checked]".} - -### Dark Patterns -{Hunt for hardcoded values, off-by-one issues, missing null checks, race conditions, -copy-paste errors, unhandled error paths, and security issues. -List findings or "No issues found — checked [what you checked]".} - -### Blocking -- **{severity}** `{file}:{line}` — {what's wrong and why it matters} - -### Non-blocking -- **{severity}** `{file}:{line}` — {what's wrong and why it matters} - -### Verdict -{pass | fail | pass_with_issues} -```` - -Update these metadata fields explicitly: - -- Set `round_status` to `completed` when the review is actually done -- Set `reviewer_mode` to `fresh_agent`, `auto`, or `executor` to match the real reviewer -- Set `reviewer_session` to the real session identifier or `""` -- Keep the automated pass results for `spec_compliance` and `scope_drift` -- Set adversarial `pass_results` for `regression_hunt`, `convention_check`, and `dark_patterns` to `pass`, `pass_with_issues`, or `fail` - -Prior review rounds remain in the file as context. Do not rewrite them. - -**All configured adversarial sections must contain content.** Each must have at least one finding or an explicit "No issues found" with a brief note of what was checked. `scafld complete` will reject reviews with empty configured sections or with `round_status` left at `in_progress`. - -**Verdict rules:** Any blocking finding → `fail`. Non-blocking only → `pass_with_issues`. Clean → `pass`. - -When done, run `scafld complete {task-id}`. diff --git a/.ai/scafld/schemas/spec.json b/.ai/scafld/schemas/spec.json deleted file mode 100644 index a7b73b3..0000000 --- a/.ai/scafld/schemas/spec.json +++ /dev/null @@ -1,514 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://github.com/nilstate/scafld/spec-v1.1.json", - "title": "scafld Task Specification", - "description": "Machine-readable conversational task specification for AI agents", - "type": "object", - "required": ["spec_version", "task_id", "status", "task", "phases", "planning_log", "created", "updated"], - "additionalProperties": false, - - "properties": { - "spec_version": { - "type": "string", - "pattern": "^[0-9]+\\.[0-9]+$", - "description": "Semantic version of this spec format", - "examples": ["1.1"] - }, - - "task_id": { - "type": "string", - "pattern": "^[a-z0-9-]+$", - "description": "Unique identifier for this task (kebab-case)", - "examples": ["add-user-metrics", "refactor-auth-module"] - }, - - "created": { - "type": "string", - "format": "date-time", - "description": "ISO 8601 timestamp when spec was generated" - }, - - "updated": { - "type": "string", - "format": "date-time", - "description": "ISO 8601 timestamp when spec was last modified" - }, - - "status": { - "type": "string", - "enum": ["draft", "blocked", "under_review", "approved", "in_progress", "completed", "failed", "cancelled"], - "description": "Current lifecycle state of this task" - }, - - "task": { - "type": "object", - "required": ["title", "summary", "size", "risk_level", "context", "objectives", "touchpoints", "acceptance"], - "properties": { - "title": { - "type": "string", - "minLength": 5, - "description": "Human friendly title for this task" - }, - "summary": { - "type": "string", - "minLength": 20, - "description": "Concise description of the problem/goal" - }, - "size": { - "type": "string", - "enum": ["micro", "small", "medium", "large"], - "description": "Relative task size to guide planning and validation depth" - }, - "risk_level": { - "type": "string", - "enum": ["low", "medium", "high"], - "description": "Overall risk tier for this task; used to select validation profile when not explicitly set" - }, - "context": { - "type": "object", - "required": ["packages", "invariants"], - "properties": { - "packages": { - "type": "array", - "items": {"type": "string"}, - "minItems": 1, - "description": "Modules or packages affected" - }, - "files_impacted": { - "type": "array", - "items": { - "type": "object", - "required": ["path", "reason"], - "properties": { - "path": {"type": "string"}, - "lines": { - "oneOf": [ - {"type": "array", "items": {"type": "integer"}}, - {"type": "string", "pattern": "^[0-9]+-[0-9]+$"}, - {"type": "string", "enum": ["all"]} - ] - }, - "reason": {"type": "string"} - } - } - }, - "invariants": { - "type": "array", - "items": { - "type": "string" - }, - "minItems": 1, - "description": "Architectural/contract invariants that must be preserved (customize in config.yaml)" - }, - "related_docs": { - "type": "array", - "items": {"type": "string"} - }, - "cwd": { - "type": "string", - "description": "Default working directory for acceptance criteria commands, relative to workspace root. Individual criteria can override with their own cwd." - } - } - }, - "objectives": { - "type": "array", - "items": {"type": "string"}, - "minItems": 1, - "description": "Primary goals for this task" - }, - "scope": { - "type": "object", - "properties": { - "in_scope": { - "type": "array", - "items": {"type": "string"} - }, - "out_of_scope": { - "type": "array", - "items": {"type": "string"} - } - } - }, - "dependencies": { - "type": "array", - "items": {"type": "string"} - }, - "assumptions": { - "type": "array", - "items": {"type": "string"} - }, - "touchpoints": { - "type": "array", - "minItems": 1, - "items": { - "type": "object", - "required": ["area", "description"], - "properties": { - "area": {"type": "string"}, - "description": {"type": "string"}, - "owners": { - "type": "array", - "items": {"type": "string"} - }, - "links": { - "type": "array", - "items": {"type": "string"} - } - } - } - }, - "risks": { - "type": "array", - "items": { - "type": "object", - "required": ["description"], - "properties": { - "description": {"type": "string"}, - "impact": { - "type": "string", - "enum": ["low", "medium", "high"] - }, - "mitigation": {"type": "string"} - } - } - }, - "acceptance": { - "type": "object", - "required": ["definition_of_done", "validation"], - "properties": { - "validation_profile": { - "type": "string", - "enum": ["light", "standard", "strict"], - "description": "Validation profile to apply; defaults based on risk_level if omitted" - }, - "definition_of_done": { - "type": "array", - "minItems": 1, - "description": "Checklist items that must be explicitly checked off during execution", - "items": { - "type": "object", - "required": ["id", "description"], - "properties": { - "id": {"type": "string"}, - "description": {"type": "string"}, - "status": { - "type": "string", - "enum": ["pending", "in_progress", "done"], - "default": "pending" - }, - "checked_at": {"type": "string", "format": "date-time"}, - "notes": {"type": "string"} - } - } - }, - "validation": { - "type": "array", - "items": { - "type": "object", - "required": ["id", "type", "description"], - "properties": { - "id": {"type": "string"}, - "type": { - "type": "string", - "enum": [ - "compile", - "test", - "boundary", - "integration", - "security", - "documentation", - "custom" - ] - }, - "description": {"type": "string"}, - "command": {"type": "string"}, - "expected": {"type": "string"}, - "cwd": {"type": "string", "description": "Working directory relative to workspace root"}, - "timeout_seconds": { - "type": "integer", - "minimum": 1, - "description": "Command timeout in seconds. Defaults to 600 when omitted." - } - } - } - } - } - }, - "notes": {"type": "string"} - } - }, - - "planning_log": { - "type": "array", - "items": { - "type": "object", - "required": ["timestamp", "summary"], - "properties": { - "timestamp": {"type": "string", "format": "date-time"}, - "actor": {"type": "string"}, - "summary": {"type": "string"}, - "notes": {"type": "string"} - } - } - }, - - "phases": { - "type": "array", - "minItems": 1, - "items": { - "type": "object", - "required": ["id", "name", "objective", "changes", "acceptance_criteria"], - "properties": { - "id": { - "type": "string", - "pattern": "^phase[0-9]+$" - }, - "name": { - "type": "string", - "minLength": 5 - }, - "objective": { - "type": "string", - "minLength": 10 - }, - "dependencies": { - "type": "array", - "items": {"type": "string"} - }, - "changes": { - "type": "array", - "items": { - "type": "object", - "required": ["file", "action", "content_spec"], - "properties": { - "file": {"type": "string"}, - "action": { - "type": "string", - "enum": ["create", "update", "delete", "move"] - }, - "move_to": { - "type": "string", - "description": "Destination path when action is 'move'" - }, - "lines": { - "oneOf": [ - {"type": "array", "items": {"type": "integer"}}, - {"type": "string", "pattern": "^[0-9]+-[0-9]+$"}, - {"type": "string", "enum": ["all"]} - ] - }, - "content_spec": {"type": "string"} - } - } - }, - "acceptance_criteria": { - "type": "array", - "minItems": 1, - "items": { - "type": "object", - "required": ["id", "type", "description"], - "properties": { - "id": {"type": "string"}, - "type": { - "type": "string", - "enum": [ - "compile", - "test", - "boundary", - "integration", - "security", - "documentation", - "custom" - ], - "description": "Criterion type. For automated types (compile, test, boundary, integration, security), a 'command' field is expected. For manual types (documentation, custom), 'command' is optional." - }, - "description": {"type": "string"}, - "command": { - "type": "string", - "description": "Shell command to run for automated validation. Expected for compile, test, boundary, integration, and security types." - }, - "expected": {"type": "string"}, - "cwd": { - "type": "string", - "description": "Working directory for the command, relative to workspace root. Useful in monorepo/workspace setups where different criteria target different submodules." - }, - "timeout_seconds": { - "type": "integer", - "minimum": 1, - "description": "Command timeout in seconds. Defaults to 600 when omitted." - }, - "result": { - "oneOf": [ - { - "type": "string", - "enum": ["pass", "fail"], - "description": "Flat result recorded by scafld exec" - }, - { - "type": "object", - "required": ["status"], - "properties": { - "status": { - "type": "string", - "enum": ["pass", "fail"] - }, - "timestamp": { - "type": "string", - "format": "date-time" - }, - "output": { - "type": "string" - } - }, - "additionalProperties": false, - "description": "Nested result block supported for execution records" - } - ] - }, - "executed_at": { - "type": "string", - "format": "date-time", - "description": "When the criterion was last executed" - }, - "result_output": { - "type": "string", - "description": "Truncated command output from scafld exec" - } - } - } - }, - "status": { - "type": "string", - "enum": ["pending", "in_progress", "completed", "failed", "skipped"] - } - } - } - }, - - "rollback": { - "type": "object", - "properties": { - "strategy": { - "type": "string", - "enum": ["per_phase", "atomic", "manual"], - "default": "per_phase" - }, - "commands": { - "type": "object", - "patternProperties": { - "^phase[0-9]+$": {"type": "string"} - } - } - } - }, - - "review": { - "type": "object", - "description": "Adversarial review results recorded by scafld complete", - "properties": { - "timestamp": {"type": "string", "format": "date-time"}, - "verdict": { - "type": "string", - "enum": ["pass", "fail", "pass_with_issues"], - "description": "pass = no findings, fail = blocking findings, pass_with_issues = non-blocking only" - }, - "passes": { - "type": "array", - "items": { - "type": "object", - "required": ["id", "result"], - "properties": { - "id": {"type": "string"}, - "result": { - "type": "string", - "enum": ["pass", "fail", "pass_with_issues"] - } - } - } - }, - "review_rounds": {"type": "integer", "minimum": 0, "description": "Number of review rounds before passing"}, - "blocking_count": {"type": "integer", "minimum": 0}, - "non_blocking_count": {"type": "integer", "minimum": 0} - } - }, - - "self_eval": { - "type": "object", - "properties": { - "completeness": {"type": "integer", "minimum": 0, "maximum": 3}, - "architecture_fidelity": {"type": "integer", "minimum": 0, "maximum": 3}, - "spec_alignment": {"type": "integer", "minimum": 0, "maximum": 2}, - "validation_depth": {"type": "integer", "minimum": 0, "maximum": 2}, - "total": {"type": "integer", "minimum": 0, "maximum": 10}, - "notes": {"type": "string"}, - "second_pass_performed": {"type": "boolean"} - } - }, - - "deviations": { - "type": "array", - "items": { - "type": "object", - "required": ["rule", "reason"], - "properties": { - "rule": {"type": "string"}, - "reason": {"type": "string"}, - "mitigation": {"type": "string"}, - "approved_by": {"type": "string"} - } - } - }, - - "metadata": { - "type": "object", - "properties": { - "estimated_effort_hours": {"type": "number", "minimum": 0}, - "actual_effort_hours": {"type": "number", "minimum": 0}, - "ai_model": {"type": "string"}, - "react_cycles": {"type": "integer"}, - "tags": { - "type": "array", - "items": {"type": "string"} - } - } - }, - - "harden_status": { - "type": "string", - "enum": ["not_run", "in_progress", "passed"], - "description": "Optional. Tracks whether the operator has run `scafld harden` against this spec. Independent of the lifecycle `status` field; not consulted by `scafld approve`." - }, - - "harden_rounds": { - "type": "array", - "description": "Optional. One entry per `scafld harden` invocation.", - "items": { - "type": "object", - "required": ["round", "started_at", "questions"], - "properties": { - "round": {"type": "integer", "minimum": 1}, - "started_at": {"type": "string", "format": "date-time"}, - "ended_at": {"type": "string", "format": "date-time"}, - "outcome": {"type": "string", "enum": ["in_progress", "passed", "abandoned"]}, - "questions": { - "type": "array", - "items": { - "type": "object", - "required": ["question", "grounded_in"], - "properties": { - "question": {"type": "string"}, - "grounded_in": { - "type": "string", - "pattern": "^(spec_gap:|code:|archive:).+" - }, - "recommended_answer": {"type": "string"}, - "if_unanswered": {"type": "string"}, - "answered_with": {"type": "string"} - } - } - } - } - } - } - } -} diff --git a/.ai/scafld/specs/README.md b/.ai/scafld/specs/README.md deleted file mode 100644 index 73891aa..0000000 --- a/.ai/scafld/specs/README.md +++ /dev/null @@ -1,99 +0,0 @@ -# Task Specifications - -This directory contains machine-readable task specifications organized by lifecycle status. - ---- - -## Directory Structure - -``` -specs/ -├── drafts/ # Planning in progress -│ └── *.yaml (status: draft | under_review) -├── approved/ # Ready for execution -│ └── *.yaml (status: approved) -├── active/ # Currently executing -│ └── *.yaml (status: in_progress) -└── archive/ # Completed work - └── YYYY-MM/ - └── *.yaml (status: completed | failed | cancelled) -``` - ---- - -## File Naming - -**Convention:** `{task-id}.yaml` using kebab-case, descriptive names. - -Good: `add-user-metrics.yaml`, `refactor-auth-module.yaml`, `fix-chunk-dedup.yaml` -Bad: `task-123.yaml` (not descriptive), `AddMetrics.yaml` (not kebab-case) - ---- - -## Workflow - -### 1. Planning - -AI generates spec in `drafts/` with `status: "draft"`. If blocked, set `status: "under_review"`. - -### 2. Review & Approval - -Developer reviews, then approves: - -```bash -scafld approve my-task -``` - -### 3. Execution - -AI moves spec to `active/`, sets `status: "in_progress"`, and executes phases. - -### 4. Review - -Run adversarial review before completing: - -```bash -scafld review my-task -# Fill in findings in .ai/reviews/my-task.md -``` - -### 5. Completion - -Mark complete (reads review, records verdict, moves to `archive/YYYY-MM/`): - -```bash -scafld complete my-task -``` - ---- - -## Spec Anatomy - -Each spec validated by `.ai/schemas/spec.json` includes: - -- **`task` block:** Title, summary, context, objectives, scope, touchpoints, risks, acceptance checklist, constraints -- **`planning_log`:** Chronological entries summarizing planning steps -- **`phases`:** Ordered execution units with `changes[].content_spec`, acceptance criteria, and per-phase status -- **`rollback`:** Strategy and per-phase commands for safe reversions -- **`review`:** Verdict, pass results, and finding counts recorded by `scafld complete` -- **`self_eval` / `deviations` / `metadata`:** Populated during execution - ---- - -## Finding Work - -```bash -scafld list # All specs -scafld list active # Currently executing -scafld list approved # Awaiting execution -scafld list drafts # Planning in progress -scafld list archive # Completed work -``` - ---- - -## See Also - -- [AGENTS.md](../../AGENTS.md) - Status lifecycle and agent policies -- [config.yaml](../config.yaml) - Validation profiles and size/risk tiers -- [schemas/spec.json](../schemas/spec.json) - Spec validation schema diff --git a/.ai/scafld/specs/examples/add-error-codes.yaml b/.ai/scafld/specs/examples/add-error-codes.yaml deleted file mode 100644 index f0e91ee..0000000 --- a/.ai/scafld/specs/examples/add-error-codes.yaml +++ /dev/null @@ -1,365 +0,0 @@ -# scafld Example Spec — Complete reference showing every schema field -# See .ai/schemas/spec.json for the formal definition - -spec_version: "1.1" -task_id: "add-error-codes" -created: "2026-02-18T09:15:00Z" -updated: "2026-02-18T14:42:00Z" -status: "completed" - -task: - title: "Add typed error codes to document processing module" - summary: > - The document processor uses unstructured string errors, making it difficult for - callers to programmatically handle failures. Introduce a typed error code enum - and structured error class so consumers can match on specific failure modes. - size: "small" - risk_level: "medium" - context: - packages: - - "src/services/documents" - - "src/errors" - files_impacted: - - path: "src/errors/codes.ts" - lines: "all" - reason: "New file defining DocumentErrorCode enum and error map" - - path: "src/errors/document-error.ts" - lines: "all" - reason: "New DocumentProcessingError class using typed codes" - - path: "src/services/documents/processor.ts" - lines: "45-120" - reason: "Replace string throws with DocumentProcessingError instances" - - path: "src/services/documents/processor.test.ts" - lines: "all" - reason: "Update assertions to check error codes instead of message strings" - invariants: - - "domain_boundaries" - - "error_envelope" - related_docs: - - "docs/error-handling.md" - - "docs/architecture/service-layer.md" - objectives: - - "Define a DocumentErrorCode enum covering all known failure modes" - - "Create a structured error class that carries code, message, and context" - - "Migrate processor.ts from string throws to typed errors" - scope: - in_scope: - - "Document processor error paths" - - "Unit tests for error scenarios" - out_of_scope: - - "Other service modules (auth, billing)" - - "HTTP error response mapping (handled by controller layer)" - - "Error monitoring/alerting integration" - dependencies: - - "No external dependencies required" - assumptions: - - "Existing error helper utilities in src/errors/ are compatible with subclassing" - - "No downstream consumers rely on exact error message strings for control flow" - touchpoints: - - area: "src/errors" - description: "New error code enum and DocumentProcessingError class" - owners: - - "backend-team" - links: - - "https://internal.wiki/error-handling-standards" - - area: "src/services/documents/processor.ts" - description: "Replace raw throws with typed error instances" - owners: - - "documents-team" - - area: "src/services/documents/processor.test.ts" - description: "Update test assertions to verify error codes" - links: - - "https://internal.wiki/testing-conventions" - risks: - - description: "Downstream callers may catch generic Error and miss new type" - impact: "low" - mitigation: "DocumentProcessingError extends Error, so existing catch blocks still work" - - description: "Incomplete coverage of error paths in processor.ts" - impact: "medium" - mitigation: "Grep for all throw statements before and after migration to ensure full coverage" - acceptance: - validation_profile: "standard" - definition_of_done: - - id: "dod1" - description: "DocumentErrorCode enum covers all processor failure modes" - status: "done" - checked_at: "2026-02-18T13:20:00Z" - notes: "8 error codes identified matching 8 throw sites in processor.ts" - - id: "dod2" - description: "All throw statements in processor.ts use DocumentProcessingError" - status: "done" - checked_at: "2026-02-18T14:05:00Z" - notes: "Verified via grep: 0 raw Error throws remain" - - id: "dod3" - description: "Tests assert on error codes, not message strings" - status: "done" - checked_at: "2026-02-18T14:30:00Z" - - id: "dod4" - description: "No regressions in existing test suite" - status: "done" - checked_at: "2026-02-18T14:35:00Z" - notes: "Full suite: 142 passed, 0 failed" - validation: - - id: "v1" - type: "compile" - description: "Project compiles with no type errors" - command: "npm run build" - expected: "Exit code 0, no type errors" - - id: "v2" - type: "test" - description: "All unit tests pass including updated error assertions" - command: "npm test -- --filter documents" - expected: "All tests pass" - - id: "v3" - type: "boundary" - description: "No throw of raw Error or string in processor.ts" - command: "rg 'throw new Error\\|throw \"' src/services/documents/processor.ts" - expected: "No matches found" - - id: "v4" - type: "security" - description: "No hardcoded secrets in changed files" - command: "rg -i '(password|secret|api[_-]?key)\\s*=\\s*[\"'']\\w' src/errors/ src/services/documents/" - expected: "No matches found" - constraints: - approvals_required: - - "error_envelope" - non_goals: - - "Refactoring the processor's happy path logic" - - "Adding error codes to other modules" - info_sources: - - "docs/error-handling.md" - - "https://internal.wiki/error-handling-standards" - - "src/errors/base-error.ts (existing base class)" - notes: > - Chose a flat enum over a class hierarchy to keep things simple. The error code - enum can be extended later when other modules adopt the same pattern. Considered - using numeric codes but string enums are more readable in logs and debuggers. - -planning_log: - - timestamp: "2026-02-18T09:15:00Z" - actor: "agent" - summary: "Identified processor.ts as primary target. Found 8 throw statements using raw strings." - notes: "Searched with: rg 'throw new Error' src/services/documents/" - - timestamp: "2026-02-18T09:40:00Z" - actor: "agent" - summary: "Confirmed src/errors/ has base helpers. Proposed enum + error class approach." - notes: "BaseError class exists at src/errors/base-error.ts with code property pattern" - - timestamp: "2026-02-18T10:05:00Z" - actor: "user" - summary: "User confirmed no schema changes needed. No downstream string matching on error messages." - - timestamp: "2026-02-18T10:30:00Z" - actor: "agent" - summary: "Locked three-phase plan: define codes, migrate processor, update tests. Spec ready for review." - notes: "Moved from two-phase to three-phase after realizing test updates are substantial enough to warrant isolation" - -phases: - - id: "phase1" - name: "Define error codes and error class" - objective: "Create the DocumentErrorCode enum and DocumentProcessingError class in src/errors/" - changes: - - file: "src/errors/codes.ts" - action: "create" - lines: "all" - content_spec: | - Export a DocumentErrorCode string enum with values: - INVALID_FORMAT, PARSE_FAILED, SIZE_EXCEEDED, ENCODING_UNSUPPORTED, - PERMISSION_DENIED, STORAGE_UNAVAILABLE, TEMPLATE_MISSING, TIMEOUT. - Each value should be a SCREAMING_SNAKE string matching the enum key. - - file: "src/errors/document-error.ts" - action: "create" - lines: "all" - content_spec: | - Export DocumentProcessingError extending Error. - Constructor accepts (code: DocumentErrorCode, message: string, context?: Record). - Exposes readonly code, context properties. Sets name to 'DocumentProcessingError'. - Re-export DocumentErrorCode for convenience. - acceptance_criteria: - - id: "ac1_1" - type: "compile" - description: "New files compile without errors" - command: "npx tsc --noEmit src/errors/codes.ts src/errors/document-error.ts" - expected: "Exit code 0" - validation: "automated" - result: - status: "pass" - timestamp: "2026-02-18T11:45:00Z" - output: "tsc completed with exit code 0" - notes: "Clean compile, no warnings" - - id: "ac1_2" - type: "test" - description: "Error class instantiation works correctly" - command: "npm test -- --filter document-error" - expected: "Error instances carry correct code and extend Error" - validation: "automated" - result: - status: "pass" - timestamp: "2026-02-18T11:50:00Z" - output: "2 tests passed" - - id: "ac1_3" - type: "documentation" - description: "Error codes are documented in docs/error-handling.md" - validation: "manual" - result: - status: "pass" - timestamp: "2026-02-18T12:00:00Z" - notes: "Added table of error codes with descriptions to error-handling.md" - status: "completed" - - - id: "phase2" - name: "Migrate processor error paths" - objective: "Replace all raw throws in processor.ts with DocumentProcessingError using appropriate codes" - dependencies: - - "phase1" - changes: - - file: "src/services/documents/processor.ts" - action: "update" - lines: "45-120" - content_spec: | - Import DocumentProcessingError and DocumentErrorCode from src/errors. - Replace each `throw new Error("...")` with the appropriate - `throw new DocumentProcessingError(DocumentErrorCode.X, message, { context })`. - Map each existing error string to the matching enum value: - - "Invalid document format" -> INVALID_FORMAT - - "Failed to parse document" -> PARSE_FAILED - - "Document exceeds size limit" -> SIZE_EXCEEDED - - "Unsupported encoding" -> ENCODING_UNSUPPORTED - - "Permission denied" -> PERMISSION_DENIED - - "Storage service unavailable" -> STORAGE_UNAVAILABLE - - "Template not found" -> TEMPLATE_MISSING - - "Processing timeout" -> TIMEOUT - - file: "src/errors/index.ts" - action: "update" - lines: "1-10" - content_spec: | - Add re-exports for DocumentErrorCode and DocumentProcessingError - so they can be imported from 'src/errors' directly. - acceptance_criteria: - - id: "ac2_1" - type: "boundary" - description: "No raw Error throws remain in processor.ts" - command: "rg -c 'throw new Error' src/services/documents/processor.ts" - expected: "No matches (exit code 1)" - validation: "automated" - result: - status: "pass" - timestamp: "2026-02-18T13:15:00Z" - output: "exit code 1 - no matches" - notes: "All 8 throw sites migrated" - - id: "ac2_2" - type: "compile" - description: "Processor compiles with new error imports" - command: "npx tsc --noEmit src/services/documents/processor.ts" - expected: "Exit code 0" - validation: "automated" - result: - status: "pass" - timestamp: "2026-02-18T13:18:00Z" - output: "tsc completed with exit code 0" - - id: "ac2_3" - type: "security" - description: "No hardcoded secrets introduced" - command: "rg -i '(password|secret|api[_-]?key)\\s*=\\s*[\"'']\\w' src/services/documents/processor.ts" - expected: "No matches" - validation: "automated" - result: - status: "pass" - timestamp: "2026-02-18T13:20:00Z" - output: "No matches found" - status: "completed" - - - id: "phase3" - name: "Update tests to assert on error codes" - objective: "Migrate test assertions from message matching to code matching and add coverage for each error code" - dependencies: - - "phase2" - changes: - - file: "src/services/documents/processor.test.ts" - action: "update" - lines: "all" - content_spec: | - Import DocumentErrorCode and DocumentProcessingError. - For each error-path test: - - Replace `.toThrow("message")` with a catch block that asserts - `error instanceof DocumentProcessingError` and - `error.code === DocumentErrorCode.X`. - - Verify error.context contains expected metadata where applicable. - - Add one new test per error code to confirm the correct code is thrown - for each failure scenario. - - file: "src/errors/document-error.test.ts" - action: "update" - lines: "all" - content_spec: | - Add tests for edge cases: missing context, serialization, - instanceof checks, and name property. - acceptance_criteria: - - id: "ac3_1" - type: "test" - description: "All processor tests pass with code-based assertions" - command: "npm test -- --filter documents" - expected: "All tests pass, 0 failures" - validation: "automated" - result: - status: "pass" - timestamp: "2026-02-18T14:28:00Z" - output: "18 tests passed, 0 failed" - notes: "Added 8 new tests (one per error code), updated 6 existing tests" - - id: "ac3_2" - type: "test" - description: "Full test suite passes with no regressions" - command: "npm test" - expected: "Exit code 0" - validation: "automated" - result: - status: "pass" - timestamp: "2026-02-18T14:35:00Z" - output: "142 tests passed, 0 failed, 0 skipped" - - id: "ac3_3" - type: "integration" - description: "Document upload endpoint returns structured error on invalid input" - command: "npm run test:integration -- --filter document-upload" - expected: "Exit code 0" - validation: "automated" - result: - status: "pass" - timestamp: "2026-02-18T14:38:00Z" - output: "3 integration tests passed" - - id: "ac3_4" - type: "custom" - description: "Error code coverage matches throw site count" - validation: "manual" - result: - status: "pass" - timestamp: "2026-02-18T14:40:00Z" - notes: "8 error codes defined, 8 throw sites migrated, 8 dedicated test cases added - 1:1:1 coverage" - status: "completed" - -rollback: - strategy: "per_phase" - commands: - phase1: "git checkout HEAD -- src/errors/codes.ts src/errors/document-error.ts" - phase2: "git checkout HEAD -- src/services/documents/processor.ts src/errors/index.ts" - phase3: "git checkout HEAD -- src/services/documents/processor.test.ts src/errors/document-error.test.ts" - -self_eval: - completeness: 3 - architecture_fidelity: 3 - spec_alignment: 2 - validation_depth: 2 - total: 10 - notes: | - All 8 error paths migrated with 1:1 enum coverage. Error class follows existing - BaseError pattern in the codebase. Tests cover every error code individually plus - integration test for the upload endpoint. No deviations from spec. - second_pass_performed: false - -deviations: [] - -metadata: - estimated_effort_hours: 2.5 - actual_effort_hours: 3.0 - ai_model: "claude-opus-4-6" - react_cycles: 12 - tags: - - "error-handling" - - "typescript" - - "refactor"