diff --git a/.github/workflows/workflow-token-count.yml b/.github/workflows/workflow-token-count.yml new file mode 100644 index 00000000..5eb0a1fe --- /dev/null +++ b/.github/workflows/workflow-token-count.yml @@ -0,0 +1,38 @@ +name: Workflow token count + +on: + push: + branches: + - main + pull_request: + branches: + - main + +permissions: + contents: read + +jobs: + token-count: + name: Run workflow token count + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - name: Check out repository + uses: actions/checkout@v6 + + - name: Install Nix + uses: cachix/install-nix-action@v31 + + - name: Run workflow token-count app + run: nix run .#token-count-workflows + + - name: Upload workflow token-count artifacts + uses: actions/upload-artifact@v4 + with: + name: workflow-token-footprint + path: | + context/tmp/token-footprint/workflow-token-count-latest.json + context/tmp/token-footprint/workflow-token-count-latest.md + context/tmp/token-footprint/workflow-token-count-*.json + context/tmp/token-footprint/workflow-token-count-*.md + if-no-files-found: error diff --git a/.opencode/agent/Shared Context Plan.md b/.opencode/agent/Shared Context Plan.md index 3dee7a0e..4499d961 100644 --- a/.opencode/agent/Shared Context Plan.md +++ b/.opencode/agent/Shared Context Plan.md @@ -71,6 +71,8 @@ Procedure Important behaviors - Keep context optimized for future AI sessions, not prose-heavy narration. - Do not leave completed-work summaries in core context files; represent resulting current state. +- Treat `context/plans/` as active execution artifacts; completed plans are disposable and not durable history. +- Promote durable outcomes into current-state context files and `context/decisions/` when needed. - Long-term quality is measured by code quality and context accuracy. Natural nudges to use diff --git a/.opencode/command/change-to-plan.md b/.opencode/command/change-to-plan.md index 1c9509d0..0486ac66 100644 --- a/.opencode/command/change-to-plan.md +++ b/.opencode/command/change-to-plan.md @@ -10,6 +10,7 @@ Input change request: Behavior: - Keep this command as thin orchestration; delegate clarification/ambiguity handling and plan-shape contracts to `sce-plan-authoring`. +- Ensure plan output follows one-task/one-atomic-commit slicing through `sce-plan-authoring` task-shape rules. - Write/update `context/plans/{plan_name}.md`. - Confirm plan creation with `{plan_name}` and exact path. - Return the full ordered task list. diff --git a/.opencode/command/commit.md b/.opencode/command/commit.md index 005017fa..17f15dfd 100644 --- a/.opencode/command/commit.md +++ b/.opencode/command/commit.md @@ -18,8 +18,8 @@ Behavior: Confirm once staging is complete." - After confirmation: - 1. Inspect staged changes. - 2. Delegate commit-message grammar, atomic split decisions, and split guidance to `sce-atomic-commit`. + - Classify staged diff scope (`context/`-only vs mixed `context/` + non-`context/`) and apply the context-guidance gate from `sce-atomic-commit`. + - Delegate commit-message grammar, atomic split decisions, and split guidance to `sce-atomic-commit`. - Do not create commits automatically. - Output only proposed commit message(s) and split guidance when needed. diff --git a/.opencode/command/next-task.md b/.opencode/command/next-task.md index b6c486dd..667fb3da 100644 --- a/.opencode/command/next-task.md +++ b/.opencode/command/next-task.md @@ -14,12 +14,11 @@ Expected arguments: Behavior: - Run `sce-plan-review` first to resolve plan target/task and readiness. -- Confirmation gate before execution: - - if plan + task ID are provided and plan review reports no blockers/ambiguity/missing acceptance criteria, auto-pass readiness - - otherwise, resolve open points and ask the user to confirm the task is ready before continuing -- After readiness passes, run `sce-task-execution` and enforce its mandatory implementation stop before any edits. -- After user confirms that implementation stop, continue `sce-task-execution` for scoped implementation, checks/lints/build (as applicable), and plan task status updates. -- Run `sce-context-sync` as a required done gate; keep `context/` aligned with code truth, including required shared-file verification and feature discoverability links. +- Apply readiness confirmation gate from `sce-plan-review`: + - auto-pass only when both plan + task ID are provided and review reports no blockers/ambiguity/missing acceptance criteria + - otherwise resolve open points and ask user confirmation before execution +- Run `sce-task-execution`; keep mandatory implementation stop, scoped implementation, checks/lints/build, and plan status updates skill-owned. +- Run `sce-context-sync` as the required done gate. - Wait for user feedback; if in-scope fixes are requested, apply fixes, rerun light checks (and a light/fast build when applicable), then run `sce-context-sync` again. - If this is the final plan task, run `sce-validation`. - If more tasks remain, prompt a new session with `/next-task {plan_name} T0X`. diff --git a/.opencode/skills/sce-atomic-commit/SKILL.md b/.opencode/skills/sce-atomic-commit/SKILL.md index 8de08ce6..ad227989 100644 --- a/.opencode/skills/sce-atomic-commit/SKILL.md +++ b/.opencode/skills/sce-atomic-commit/SKILL.md @@ -72,6 +72,12 @@ Default split order: 3. tests 4. docs +## Context-file guidance gating + +- Check staged diff scope before proposing commit messaging guidance. +- If staged changes are context-only (`context/**`), context-file-focused guidance is allowed. +- If staged changes are mixed (`context/**` + non-`context/**`), avoid default context-file commit reminders and prioritize guidance that reflects the full staged scope. + ## Anti-patterns - vague subjects ("cleanup", "updates") diff --git a/.opencode/skills/sce-context-sync/SKILL.md b/.opencode/skills/sce-context-sync/SKILL.md index 2cdda4cd..2df90ea2 100644 --- a/.opencode/skills/sce-context-sync/SKILL.md +++ b/.opencode/skills/sce-context-sync/SKILL.md @@ -8,15 +8,15 @@ compatibility: opencode - Context is durable AI memory and must reflect current-state truth. - If context and code diverge, code is source of truth. -## Mandatory sync pass (always check all) -For every completed implementation task, explicitly review and update these files when relevant: +## Mandatory sync pass (important-change gated) +For every completed implementation task, run a sync pass over these shared files: - `context/overview.md` - `context/architecture.md` - `context/glossary.md` - `context/patterns.md` - `context/context-map.md` -Do not skip `overview`, `architecture`, or `glossary` by default. If no edit is needed, verify they still match current code behavior. +Do not default to editing root context files on every task. First classify whether the task is an important change; then edit or verify accordingly. ## Root context significance gating - Treat root context edits as required when a task introduces or changes cross-cutting behavior, repository-wide policy/contracts, architecture boundaries, or canonical terminology. diff --git a/.opencode/skills/sce-plan-authoring/SKILL.md b/.opencode/skills/sce-plan-authoring/SKILL.md index c2441d46..4cb2b670 100644 --- a/.opencode/skills/sce-plan-authoring/SKILL.md +++ b/.opencode/skills/sce-plan-authoring/SKILL.md @@ -44,6 +44,25 @@ For each task include: - Done when - Verification notes (commands or checks) +## Atomic task slicing contract (required) +- Author each executable task as one atomic commit unit by default. +- Every task must be scoped so one contributor can complete it and land it as one coherent commit without bundling unrelated changes. +- If a candidate task would require multiple independent commits (for example: refactor + behavior change + docs), split it into separate sequential tasks before finalizing the plan. +- Keep broad wrappers (`polish`, `finalize`, `misc updates`) out of executable tasks; convert them into specific outcomes with concrete acceptance checks. + +Use this quick atomicity check before accepting each task: +- `single_intent`: task delivers one primary outcome +- `single_area`: task touch scope is narrow and related +- `single_verification`: done checks validate one coherent change set + +Example compliant skeleton: +- [ ] T0X: `[single intent title]` (status:todo) + - Task ID: T0X + - Goal: `[one outcome]` + - Boundaries (in/out of scope): `[tight scope]` + - Done when: `[clear acceptance for one coherent change]` + - Verification notes (commands or checks): `[targeted checks for this change]` + Use checkbox lines for machine-friendly progress tracking: - `- [ ] T01: ... (status:todo)` diff --git a/.opencode/skills/sce-plan-review/SKILL.md b/.opencode/skills/sce-plan-review/SKILL.md index df0b88bd..9803f2fa 100644 --- a/.opencode/skills/sce-plan-review/SKILL.md +++ b/.opencode/skills/sce-plan-review/SKILL.md @@ -28,6 +28,8 @@ compatibility: opencode ## Rules - Do not auto-mark tasks complete during review. - Keep continuation state in the plan markdown itself. +- Treat `context/plans/` as active execution artifacts; completed plans are disposable and not a durable context source. +- If durable history is needed, record it in current-state context files and/or `context/decisions/` instead of completed plan files. - Keep implementation blocked until decision alignment on unclear points. - If plan context is stale or partial, continue with code truth and flag context updates. diff --git a/.opencode/skills/sce-task-execution/SKILL.md b/.opencode/skills/sce-task-execution/SKILL.md index 03099eba..eb6c4927 100644 --- a/.opencode/skills/sce-task-execution/SKILL.md +++ b/.opencode/skills/sce-task-execution/SKILL.md @@ -25,8 +25,9 @@ compatibility: opencode 3) Stop and ask: "Continue with implementation now?" (yes/no). 4) Implement minimal in-scope changes. 5) Run light task-level tests/checks and lints first, and run a build when the build is light/fast (targeted over full-suite unless requested), then capture evidence. -6) Keep session-only scraps in `context/tmp/`. -7) Update task status in `context/plans/{plan_id}.md`. +6) Record whether the implementation is an important change for context sync (root-edit required) or verify-only (no root edits expected). +7) Keep session-only scraps in `context/tmp/`. +8) Update task status in `context/plans/{plan_id}.md`. ## Scope expansion rule - If out-of-scope edits are needed, stop and ask for approval. diff --git a/README.md b/README.md index 7c560d54..f0d909bd 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,29 @@ boundaries. Built by [CroCoder](https://www.crocoder.dev/) +## Workflow token counting + +Static workflow token-footprint reports are produced by the T06 script at +`evals/token-count-workflows.ts` using the canonical manifest +`context/sce/workflow-token-footprint-manifest.json`. + +```bash +cd evals +bun run token-count-workflows +``` + +Optional inputs: + +```bash +bun run token-count-workflows --run-id local-test +bun run token-count-workflows --baseline ../context/tmp/token-footprint/workflow-token-count-latest.json +``` + +Outputs are written to `context/tmp/token-footprint/` as: +- `workflow-token-count-latest.json` +- `workflow-token-count-latest.md` +- `workflow-token-count-.json` (when `--run-id` is provided) + ## Dev shell agnix tooling This repository exposes `agnix` and `agnix-lsp` through `nix develop` using a Nix-first shell with Rust toolchain support. diff --git a/config/.claude/agents/shared-context-plan.md b/config/.claude/agents/shared-context-plan.md index 07c50ee2..c81866e7 100644 --- a/config/.claude/agents/shared-context-plan.md +++ b/config/.claude/agents/shared-context-plan.md @@ -50,6 +50,8 @@ Procedure Important behaviors - Keep context optimized for future AI sessions, not prose-heavy narration. - Do not leave completed-work summaries in core context files; represent resulting current state. +- Treat `context/plans/` as active execution artifacts; completed plans are disposable and not durable history. +- Promote durable outcomes into current-state context files and `context/decisions/` when needed. - Long-term quality is measured by code quality and context accuracy. Natural nudges to use diff --git a/config/.claude/commands/change-to-plan.md b/config/.claude/commands/change-to-plan.md index 0ec0c1c2..b743f425 100644 --- a/config/.claude/commands/change-to-plan.md +++ b/config/.claude/commands/change-to-plan.md @@ -10,6 +10,7 @@ Input change request: Behavior: - Keep this command as thin orchestration; delegate clarification/ambiguity handling and plan-shape contracts to `sce-plan-authoring`. +- Ensure plan output follows one-task/one-atomic-commit slicing through `sce-plan-authoring` task-shape rules. - Write/update `context/plans/{plan_name}.md`. - Confirm plan creation with `{plan_name}` and exact path. - Return the full ordered task list. diff --git a/config/.claude/commands/commit.md b/config/.claude/commands/commit.md index 62e777ff..ad99ebf3 100644 --- a/config/.claude/commands/commit.md +++ b/config/.claude/commands/commit.md @@ -18,8 +18,8 @@ Behavior: Confirm once staging is complete." - After confirmation: - 1. Inspect staged changes. - 2. Delegate commit-message grammar, atomic split decisions, and split guidance to `sce-atomic-commit`. + - Classify staged diff scope (`context/`-only vs mixed `context/` + non-`context/`) and apply the context-guidance gate from `sce-atomic-commit`. + - Delegate commit-message grammar, atomic split decisions, and split guidance to `sce-atomic-commit`. - Do not create commits automatically. - Output only proposed commit message(s) and split guidance when needed. diff --git a/config/.claude/commands/next-task.md b/config/.claude/commands/next-task.md index d6469caf..9ae8fe27 100644 --- a/config/.claude/commands/next-task.md +++ b/config/.claude/commands/next-task.md @@ -14,12 +14,11 @@ Expected arguments: Behavior: - Run `sce-plan-review` first to resolve plan target/task and readiness. -- Confirmation gate before execution: - - if plan + task ID are provided and plan review reports no blockers/ambiguity/missing acceptance criteria, auto-pass readiness - - otherwise, resolve open points and ask the user to confirm the task is ready before continuing -- After readiness passes, run `sce-task-execution` and enforce its mandatory implementation stop before any edits. -- After user confirms that implementation stop, continue `sce-task-execution` for scoped implementation, checks/lints/build (as applicable), and plan task status updates. -- Run `sce-context-sync` as a required done gate; keep `context/` aligned with code truth, including required shared-file verification and feature discoverability links. +- Apply readiness confirmation gate from `sce-plan-review`: + - auto-pass only when both plan + task ID are provided and review reports no blockers/ambiguity/missing acceptance criteria + - otherwise resolve open points and ask user confirmation before execution +- Run `sce-task-execution`; keep mandatory implementation stop, scoped implementation, checks/lints/build, and plan status updates skill-owned. +- Run `sce-context-sync` as the required done gate. - Wait for user feedback; if in-scope fixes are requested, apply fixes, rerun light checks (and a light/fast build when applicable), then run `sce-context-sync` again. - If this is the final plan task, run `sce-validation`. - If more tasks remain, prompt a new session with `/next-task {plan_name} T0X`. diff --git a/config/.claude/skills/sce-atomic-commit/SKILL.md b/config/.claude/skills/sce-atomic-commit/SKILL.md index d94bc63c..ba878631 100644 --- a/config/.claude/skills/sce-atomic-commit/SKILL.md +++ b/config/.claude/skills/sce-atomic-commit/SKILL.md @@ -72,6 +72,12 @@ Default split order: 3. tests 4. docs +## Context-file guidance gating + +- Check staged diff scope before proposing commit messaging guidance. +- If staged changes are context-only (`context/**`), context-file-focused guidance is allowed. +- If staged changes are mixed (`context/**` + non-`context/**`), avoid default context-file commit reminders and prioritize guidance that reflects the full staged scope. + ## Anti-patterns - vague subjects ("cleanup", "updates") diff --git a/config/.claude/skills/sce-context-sync/SKILL.md b/config/.claude/skills/sce-context-sync/SKILL.md index d100ef3b..294bdc3e 100644 --- a/config/.claude/skills/sce-context-sync/SKILL.md +++ b/config/.claude/skills/sce-context-sync/SKILL.md @@ -8,15 +8,15 @@ compatibility: claude - Context is durable AI memory and must reflect current-state truth. - If context and code diverge, code is source of truth. -## Mandatory sync pass (always check all) -For every completed implementation task, explicitly review and update these files when relevant: +## Mandatory sync pass (important-change gated) +For every completed implementation task, run a sync pass over these shared files: - `context/overview.md` - `context/architecture.md` - `context/glossary.md` - `context/patterns.md` - `context/context-map.md` -Do not skip `overview`, `architecture`, or `glossary` by default. If no edit is needed, verify they still match current code behavior. +Do not default to editing root context files on every task. First classify whether the task is an important change; then edit or verify accordingly. ## Root context significance gating - Treat root context edits as required when a task introduces or changes cross-cutting behavior, repository-wide policy/contracts, architecture boundaries, or canonical terminology. diff --git a/config/.claude/skills/sce-plan-authoring/SKILL.md b/config/.claude/skills/sce-plan-authoring/SKILL.md index d1d6c838..21cfe05a 100644 --- a/config/.claude/skills/sce-plan-authoring/SKILL.md +++ b/config/.claude/skills/sce-plan-authoring/SKILL.md @@ -44,6 +44,25 @@ For each task include: - Done when - Verification notes (commands or checks) +## Atomic task slicing contract (required) +- Author each executable task as one atomic commit unit by default. +- Every task must be scoped so one contributor can complete it and land it as one coherent commit without bundling unrelated changes. +- If a candidate task would require multiple independent commits (for example: refactor + behavior change + docs), split it into separate sequential tasks before finalizing the plan. +- Keep broad wrappers (`polish`, `finalize`, `misc updates`) out of executable tasks; convert them into specific outcomes with concrete acceptance checks. + +Use this quick atomicity check before accepting each task: +- `single_intent`: task delivers one primary outcome +- `single_area`: task touch scope is narrow and related +- `single_verification`: done checks validate one coherent change set + +Example compliant skeleton: +- [ ] T0X: `[single intent title]` (status:todo) + - Task ID: T0X + - Goal: `[one outcome]` + - Boundaries (in/out of scope): `[tight scope]` + - Done when: `[clear acceptance for one coherent change]` + - Verification notes (commands or checks): `[targeted checks for this change]` + Use checkbox lines for machine-friendly progress tracking: - `- [ ] T01: ... (status:todo)` diff --git a/config/.claude/skills/sce-plan-review/SKILL.md b/config/.claude/skills/sce-plan-review/SKILL.md index bf17d1f8..4a25fd50 100644 --- a/config/.claude/skills/sce-plan-review/SKILL.md +++ b/config/.claude/skills/sce-plan-review/SKILL.md @@ -28,6 +28,8 @@ compatibility: claude ## Rules - Do not auto-mark tasks complete during review. - Keep continuation state in the plan markdown itself. +- Treat `context/plans/` as active execution artifacts; completed plans are disposable and not a durable context source. +- If durable history is needed, record it in current-state context files and/or `context/decisions/` instead of completed plan files. - Keep implementation blocked until decision alignment on unclear points. - If plan context is stale or partial, continue with code truth and flag context updates. diff --git a/config/.claude/skills/sce-task-execution/SKILL.md b/config/.claude/skills/sce-task-execution/SKILL.md index af1a019f..d948d66d 100644 --- a/config/.claude/skills/sce-task-execution/SKILL.md +++ b/config/.claude/skills/sce-task-execution/SKILL.md @@ -25,8 +25,9 @@ compatibility: claude 3) Stop and ask: "Continue with implementation now?" (yes/no). 4) Implement minimal in-scope changes. 5) Run light task-level tests/checks and lints first, and run a build when the build is light/fast (targeted over full-suite unless requested), then capture evidence. -6) Keep session-only scraps in `context/tmp/`. -7) Update task status in `context/plans/{plan_id}.md`. +6) Record whether the implementation is an important change for context sync (root-edit required) or verify-only (no root edits expected). +7) Keep session-only scraps in `context/tmp/`. +8) Update task status in `context/plans/{plan_id}.md`. ## Scope expansion rule - If out-of-scope edits are needed, stop and ask for approval. diff --git a/config/.opencode/agent/Shared Context Plan.md b/config/.opencode/agent/Shared Context Plan.md index 3dee7a0e..4499d961 100644 --- a/config/.opencode/agent/Shared Context Plan.md +++ b/config/.opencode/agent/Shared Context Plan.md @@ -71,6 +71,8 @@ Procedure Important behaviors - Keep context optimized for future AI sessions, not prose-heavy narration. - Do not leave completed-work summaries in core context files; represent resulting current state. +- Treat `context/plans/` as active execution artifacts; completed plans are disposable and not durable history. +- Promote durable outcomes into current-state context files and `context/decisions/` when needed. - Long-term quality is measured by code quality and context accuracy. Natural nudges to use diff --git a/config/.opencode/command/change-to-plan.md b/config/.opencode/command/change-to-plan.md index 1c9509d0..0486ac66 100644 --- a/config/.opencode/command/change-to-plan.md +++ b/config/.opencode/command/change-to-plan.md @@ -10,6 +10,7 @@ Input change request: Behavior: - Keep this command as thin orchestration; delegate clarification/ambiguity handling and plan-shape contracts to `sce-plan-authoring`. +- Ensure plan output follows one-task/one-atomic-commit slicing through `sce-plan-authoring` task-shape rules. - Write/update `context/plans/{plan_name}.md`. - Confirm plan creation with `{plan_name}` and exact path. - Return the full ordered task list. diff --git a/config/.opencode/command/commit.md b/config/.opencode/command/commit.md index 005017fa..17f15dfd 100644 --- a/config/.opencode/command/commit.md +++ b/config/.opencode/command/commit.md @@ -18,8 +18,8 @@ Behavior: Confirm once staging is complete." - After confirmation: - 1. Inspect staged changes. - 2. Delegate commit-message grammar, atomic split decisions, and split guidance to `sce-atomic-commit`. + - Classify staged diff scope (`context/`-only vs mixed `context/` + non-`context/`) and apply the context-guidance gate from `sce-atomic-commit`. + - Delegate commit-message grammar, atomic split decisions, and split guidance to `sce-atomic-commit`. - Do not create commits automatically. - Output only proposed commit message(s) and split guidance when needed. diff --git a/config/.opencode/command/next-task.md b/config/.opencode/command/next-task.md index b6c486dd..667fb3da 100644 --- a/config/.opencode/command/next-task.md +++ b/config/.opencode/command/next-task.md @@ -14,12 +14,11 @@ Expected arguments: Behavior: - Run `sce-plan-review` first to resolve plan target/task and readiness. -- Confirmation gate before execution: - - if plan + task ID are provided and plan review reports no blockers/ambiguity/missing acceptance criteria, auto-pass readiness - - otherwise, resolve open points and ask the user to confirm the task is ready before continuing -- After readiness passes, run `sce-task-execution` and enforce its mandatory implementation stop before any edits. -- After user confirms that implementation stop, continue `sce-task-execution` for scoped implementation, checks/lints/build (as applicable), and plan task status updates. -- Run `sce-context-sync` as a required done gate; keep `context/` aligned with code truth, including required shared-file verification and feature discoverability links. +- Apply readiness confirmation gate from `sce-plan-review`: + - auto-pass only when both plan + task ID are provided and review reports no blockers/ambiguity/missing acceptance criteria + - otherwise resolve open points and ask user confirmation before execution +- Run `sce-task-execution`; keep mandatory implementation stop, scoped implementation, checks/lints/build, and plan status updates skill-owned. +- Run `sce-context-sync` as the required done gate. - Wait for user feedback; if in-scope fixes are requested, apply fixes, rerun light checks (and a light/fast build when applicable), then run `sce-context-sync` again. - If this is the final plan task, run `sce-validation`. - If more tasks remain, prompt a new session with `/next-task {plan_name} T0X`. diff --git a/config/.opencode/skills/sce-atomic-commit/SKILL.md b/config/.opencode/skills/sce-atomic-commit/SKILL.md index 8de08ce6..ad227989 100644 --- a/config/.opencode/skills/sce-atomic-commit/SKILL.md +++ b/config/.opencode/skills/sce-atomic-commit/SKILL.md @@ -72,6 +72,12 @@ Default split order: 3. tests 4. docs +## Context-file guidance gating + +- Check staged diff scope before proposing commit messaging guidance. +- If staged changes are context-only (`context/**`), context-file-focused guidance is allowed. +- If staged changes are mixed (`context/**` + non-`context/**`), avoid default context-file commit reminders and prioritize guidance that reflects the full staged scope. + ## Anti-patterns - vague subjects ("cleanup", "updates") diff --git a/config/.opencode/skills/sce-context-sync/SKILL.md b/config/.opencode/skills/sce-context-sync/SKILL.md index 2cdda4cd..2df90ea2 100644 --- a/config/.opencode/skills/sce-context-sync/SKILL.md +++ b/config/.opencode/skills/sce-context-sync/SKILL.md @@ -8,15 +8,15 @@ compatibility: opencode - Context is durable AI memory and must reflect current-state truth. - If context and code diverge, code is source of truth. -## Mandatory sync pass (always check all) -For every completed implementation task, explicitly review and update these files when relevant: +## Mandatory sync pass (important-change gated) +For every completed implementation task, run a sync pass over these shared files: - `context/overview.md` - `context/architecture.md` - `context/glossary.md` - `context/patterns.md` - `context/context-map.md` -Do not skip `overview`, `architecture`, or `glossary` by default. If no edit is needed, verify they still match current code behavior. +Do not default to editing root context files on every task. First classify whether the task is an important change; then edit or verify accordingly. ## Root context significance gating - Treat root context edits as required when a task introduces or changes cross-cutting behavior, repository-wide policy/contracts, architecture boundaries, or canonical terminology. diff --git a/config/.opencode/skills/sce-plan-authoring/SKILL.md b/config/.opencode/skills/sce-plan-authoring/SKILL.md index c2441d46..4cb2b670 100644 --- a/config/.opencode/skills/sce-plan-authoring/SKILL.md +++ b/config/.opencode/skills/sce-plan-authoring/SKILL.md @@ -44,6 +44,25 @@ For each task include: - Done when - Verification notes (commands or checks) +## Atomic task slicing contract (required) +- Author each executable task as one atomic commit unit by default. +- Every task must be scoped so one contributor can complete it and land it as one coherent commit without bundling unrelated changes. +- If a candidate task would require multiple independent commits (for example: refactor + behavior change + docs), split it into separate sequential tasks before finalizing the plan. +- Keep broad wrappers (`polish`, `finalize`, `misc updates`) out of executable tasks; convert them into specific outcomes with concrete acceptance checks. + +Use this quick atomicity check before accepting each task: +- `single_intent`: task delivers one primary outcome +- `single_area`: task touch scope is narrow and related +- `single_verification`: done checks validate one coherent change set + +Example compliant skeleton: +- [ ] T0X: `[single intent title]` (status:todo) + - Task ID: T0X + - Goal: `[one outcome]` + - Boundaries (in/out of scope): `[tight scope]` + - Done when: `[clear acceptance for one coherent change]` + - Verification notes (commands or checks): `[targeted checks for this change]` + Use checkbox lines for machine-friendly progress tracking: - `- [ ] T01: ... (status:todo)` diff --git a/config/.opencode/skills/sce-plan-review/SKILL.md b/config/.opencode/skills/sce-plan-review/SKILL.md index df0b88bd..9803f2fa 100644 --- a/config/.opencode/skills/sce-plan-review/SKILL.md +++ b/config/.opencode/skills/sce-plan-review/SKILL.md @@ -28,6 +28,8 @@ compatibility: opencode ## Rules - Do not auto-mark tasks complete during review. - Keep continuation state in the plan markdown itself. +- Treat `context/plans/` as active execution artifacts; completed plans are disposable and not a durable context source. +- If durable history is needed, record it in current-state context files and/or `context/decisions/` instead of completed plan files. - Keep implementation blocked until decision alignment on unclear points. - If plan context is stale or partial, continue with code truth and flag context updates. diff --git a/config/.opencode/skills/sce-task-execution/SKILL.md b/config/.opencode/skills/sce-task-execution/SKILL.md index 03099eba..eb6c4927 100644 --- a/config/.opencode/skills/sce-task-execution/SKILL.md +++ b/config/.opencode/skills/sce-task-execution/SKILL.md @@ -25,8 +25,9 @@ compatibility: opencode 3) Stop and ask: "Continue with implementation now?" (yes/no). 4) Implement minimal in-scope changes. 5) Run light task-level tests/checks and lints first, and run a build when the build is light/fast (targeted over full-suite unless requested), then capture evidence. -6) Keep session-only scraps in `context/tmp/`. -7) Update task status in `context/plans/{plan_id}.md`. +6) Record whether the implementation is an important change for context sync (root-edit required) or verify-only (no root edits expected). +7) Keep session-only scraps in `context/tmp/`. +8) Update task status in `context/plans/{plan_id}.md`. ## Scope expansion rule - If out-of-scope edits are needed, stop and ask for approval. diff --git a/config/pkl/base/shared-content.pkl b/config/pkl/base/shared-content.pkl index 2036ad25..da7c5b5a 100644 --- a/config/pkl/base/shared-content.pkl +++ b/config/pkl/base/shared-content.pkl @@ -32,6 +32,11 @@ local sharedSceLongTermQualityBullet = """ - Long-term quality is measured by code quality and context accuracy. """ +local sharedSceDisposablePlanLifecycleBullet = """ +- Treat `context/plans/` as active execution artifacts; completed plans are disposable and not durable history. +- Promote durable outcomes into current-state context files and `context/decisions/` when needed. +""" + agents { ["shared-context-plan"] = new ContentUnit { id = "agent.shared-context-plan" @@ -75,6 +80,7 @@ Procedure Important behaviors \(sharedSceQualityPosturePrefixBullets) +\(sharedSceDisposablePlanLifecycleBullet) \(sharedSceLongTermQualityBullet) Natural nudges to use @@ -181,12 +187,11 @@ Expected arguments: Behavior: - Run `sce-plan-review` first to resolve plan target/task and readiness. -- Confirmation gate before execution: - - if plan + task ID are provided and plan review reports no blockers/ambiguity/missing acceptance criteria, auto-pass readiness - - otherwise, resolve open points and ask the user to confirm the task is ready before continuing -- After readiness passes, run `sce-task-execution` and enforce its mandatory implementation stop before any edits. -- After user confirms that implementation stop, continue `sce-task-execution` for scoped implementation, checks/lints/build (as applicable), and plan task status updates. -- Run `sce-context-sync` as a required done gate; keep `context/` aligned with code truth, including required shared-file verification and feature discoverability links. +- Apply readiness confirmation gate from `sce-plan-review`: + - auto-pass only when both plan + task ID are provided and review reports no blockers/ambiguity/missing acceptance criteria + - otherwise resolve open points and ask user confirmation before execution +- Run `sce-task-execution`; keep mandatory implementation stop, scoped implementation, checks/lints/build, and plan status updates skill-owned. +- Run `sce-context-sync` as the required done gate. - Wait for user feedback; if in-scope fixes are requested, apply fixes, rerun light checks (and a light/fast build when applicable), then run `sce-context-sync` again. - If this is the final plan task, run `sce-validation`. - If more tasks remain, prompt a new session with `/next-task {plan_name} T0X`. @@ -205,6 +210,7 @@ Input change request: Behavior: - Keep this command as thin orchestration; delegate clarification/ambiguity handling and plan-shape contracts to `sce-plan-authoring`. +- Ensure plan output follows one-task/one-atomic-commit slicing through `sce-plan-authoring` task-shape rules. - Write/update `context/plans/{plan_name}.md`. - Confirm plan creation with `{plan_name}` and exact path. - Return the full ordered task list. @@ -287,8 +293,8 @@ Behavior: Confirm once staging is complete." - After confirmation: - 1. Inspect staged changes. - 2. Delegate commit-message grammar, atomic split decisions, and split guidance to `sce-atomic-commit`. + - Classify staged diff scope (`context/`-only vs mixed `context/` + non-`context/`) and apply the context-guidance gate from `sce-atomic-commit`. + - Delegate commit-message grammar, atomic split decisions, and split guidance to `sce-atomic-commit`. - Do not create commits automatically. - Output only proposed commit message(s) and split guidance when needed. @@ -362,15 +368,15 @@ Create these paths: - Context is durable AI memory and must reflect current-state truth. - If context and code diverge, code is source of truth. -## Mandatory sync pass (always check all) -For every completed implementation task, explicitly review and update these files when relevant: +## Mandatory sync pass (important-change gated) +For every completed implementation task, run a sync pass over these shared files: - `context/overview.md` - `context/architecture.md` - `context/glossary.md` - `context/patterns.md` - `context/context-map.md` -Do not skip `overview`, `architecture`, or `glossary` by default. If no edit is needed, verify they still match current code behavior. +Do not default to editing root context files on every task. First classify whether the task is an important change; then edit or verify accordingly. ## Root context significance gating - Treat root context edits as required when a task introduces or changes cross-cutting behavior, repository-wide policy/contracts, architecture boundaries, or canonical terminology. @@ -556,6 +562,25 @@ For each task include: - Done when - Verification notes (commands or checks) +## Atomic task slicing contract (required) +- Author each executable task as one atomic commit unit by default. +- Every task must be scoped so one contributor can complete it and land it as one coherent commit without bundling unrelated changes. +- If a candidate task would require multiple independent commits (for example: refactor + behavior change + docs), split it into separate sequential tasks before finalizing the plan. +- Keep broad wrappers (`polish`, `finalize`, `misc updates`) out of executable tasks; convert them into specific outcomes with concrete acceptance checks. + +Use this quick atomicity check before accepting each task: +- `single_intent`: task delivers one primary outcome +- `single_area`: task touch scope is narrow and related +- `single_verification`: done checks validate one coherent change set + +Example compliant skeleton: +- [ ] T0X: `[single intent title]` (status:todo) + - Task ID: T0X + - Goal: `[one outcome]` + - Boundaries (in/out of scope): `[tight scope]` + - Done when: `[clear acceptance for one coherent change]` + - Verification notes (commands or checks): `[targeted checks for this change]` + Use checkbox lines for machine-friendly progress tracking: - `- [ ] T01: ... (status:todo)` @@ -601,6 +626,8 @@ Use checkbox lines for machine-friendly progress tracking: ## Rules - Do not auto-mark tasks complete during review. - Keep continuation state in the plan markdown itself. +- Treat `context/plans/` as active execution artifacts; completed plans are disposable and not a durable context source. +- If durable history is needed, record it in current-state context files and/or `context/decisions/` instead of completed plan files. - Keep implementation blocked until decision alignment on unclear points. - If plan context is stale or partial, continue with code truth and flag context updates. @@ -639,8 +666,9 @@ Use checkbox lines for machine-friendly progress tracking: 3) Stop and ask: "Continue with implementation now?" (yes/no). 4) Implement minimal in-scope changes. 5) Run light task-level tests/checks and lints first, and run a build when the build is light/fast (targeted over full-suite unless requested), then capture evidence. -6) Keep session-only scraps in `context/tmp/`. -7) Update task status in `context/plans/{plan_id}.md`. +6) Record whether the implementation is an important change for context sync (root-edit required) or verify-only (no root edits expected). +7) Keep session-only scraps in `context/tmp/`. +8) Update task status in `context/plans/{plan_id}.md`. ## Scope expansion rule - If out-of-scope edits are needed, stop and ask for approval. @@ -720,6 +748,12 @@ Default split order: 3. tests 4. docs +## Context-file guidance gating + +- Check staged diff scope before proposing commit messaging guidance. +- If staged changes are context-only (`context/**`), context-file-focused guidance is allowed. +- If staged changes are mixed (`context/**` + non-`context/**`), avoid default context-file commit reminders and prioritize guidance that reflects the full staged scope. + ## Anti-patterns - vague subjects ("cleanup", "updates") diff --git a/context/architecture.md b/context/architecture.md index c1fe62b5..740cca5b 100644 --- a/context/architecture.md +++ b/context/architecture.md @@ -24,9 +24,11 @@ Current target renderer helper modules: - `config/pkl/generate.pkl` (single multi-file generation entrypoint) - `config/pkl/check-generated.sh` (dev-shell integration stale-output detection against committed generated files) - `nix run .#sync-opencode-config` (flake app entrypoint for config regeneration and sync workflow) +- `nix run .#token-count-workflows` (flake app entrypoint for static workflow token-count execution via `evals/token-count-workflows.ts`) - `nix flake check` / `checks..cli-setup-command-surface` (flake check derivation that runs targeted CLI setup command-surface verification from `cli/`) - `.github/workflows/pkl-generated-parity.yml` (CI wrapper that runs the parity check for pushes to `main` and pull requests targeting `main`) - `.github/workflows/agnix-config-validate-report.yml` (CI wrapper that runs `agnix validate` from `config/`, writes `context/tmp/ci-reports/agnix-validate-report.txt`, uploads it when non-info findings are present, and fails on any non-info finding) +- `.github/workflows/workflow-token-count.yml` (CI wrapper that runs `nix run .#token-count-workflows` for pushes/pull requests targeting `main` and uploads token-footprint artifacts from `context/tmp/token-footprint/`) The scaffold provides stable canonical content-unit identifiers and reusable target-agnostic text primitives for all planned authored generated classes (agents, commands, skills, shared library file). @@ -105,5 +107,5 @@ Shared Context Plan and Shared Context Code remain separate architectural roles. - Reuse is handled through shared canonical guidance blocks and skill-owned phase contracts, not by collapsing both roles into one agent. - Shared baseline doctrine for both agents is centralized in reusable constants in `config/pkl/base/shared-content.pkl` and interpolated into each role body at generation time. - `/next-task` is a thin orchestration wrapper: it owns gate sequencing, while phase-detail contracts stay canonical in `sce-plan-review`, `sce-task-execution`, and `sce-context-sync`. -- `/change-to-plan` is a thin orchestration wrapper: it delegates clarification and plan-shape ownership to `sce-plan-authoring` while retaining wrapper-level plan creation confirmation and `/next-task` handoff obligations. +- `/change-to-plan` is a thin orchestration wrapper: it delegates clarification and plan-shape ownership to `sce-plan-authoring` (including one-task/one-atomic-commit task slicing) while retaining wrapper-level plan creation confirmation and `/next-task` handoff obligations. - `/commit` is a thin orchestration wrapper: it retains staged-changes confirmation and no-auto-commit constraints, while commit grammar and atomic split logic stay canonical in `sce-atomic-commit`. diff --git a/context/context-map.md b/context/context-map.md index fb0993d9..17cce365 100644 --- a/context/context-map.md +++ b/context/context-map.md @@ -9,13 +9,16 @@ Primary context files: Feature/domain context: - `context/cli/placeholder-foundation.md` (CLI command surface, setup install flow, shared-runtime sync smoke gate, nested flake release package/app installability, and Cargo local install + crates.io readiness policy) - `context/sce/shared-context-code-workflow.md` -- `context/sce/shared-context-plan-workflow.md` (canonical `/change-to-plan` workflow and clarification/readiness gate contract) +- `context/sce/shared-context-plan-workflow.md` (canonical `/change-to-plan` workflow, clarification/readiness gate contract, and one-task/one-atomic-commit task-slicing policy) - `context/sce/plan-code-overlap-map.md` (T01 overlap matrix for Shared Context Plan/Code, related commands, and core skill ownership/dedup targets) - `context/sce/dedup-ownership-table.md` (current-state canonical owner-vs-consumer matrix for shared SCE behavior domains and thin-command ownership boundaries) +- `context/sce/workflow-token-footprint-inventory.md` (canonical Plan/Execute workflow participant inventory, T02 ranked token-hotspot table, T03 static token-accounting method, and T06 implemented token-count script behavior/usage contract) +- `context/sce/workflow-token-footprint-manifest.json` (T05 canonical machine-readable surface manifest for workflow token counting, including scope extraction rules and conditional flags) +- `context/sce/workflow-token-count-workflow.md` (root flake app contract for workflow token counting and its runtime wiring to evals script execution) - `context/sce/atomic-commit-workflow.md` (canonical `/commit` command + `sce-atomic-commit` skill contract and naming decision) Working areas: -- `context/plans/` +- `context/plans/` (active plan execution artifacts, not durable history) - `context/handovers/` - `context/decisions/` - `context/tmp/` diff --git a/context/glossary.md b/context/glossary.md index d69085a6..97943ff4 100644 --- a/context/glossary.md +++ b/context/glossary.md @@ -1,12 +1,16 @@ # Glossary - `sync-opencode-config`: Flake app command exposed as `nix run .#sync-opencode-config`; canonical operator entrypoint for staged regeneration/replacement of `config/` and replacement of repository-root `.opencode/` from regenerated `config/.opencode/`. +- `token-count-workflows`: Flake app command exposed as `nix run .#token-count-workflows`; canonical repository-root entrypoint that runs `evals/token-count-workflows.ts` through `nix develop` and writes token-count artifacts to `context/tmp/token-footprint/`. - `pkl-check-generated`: Flake app command exposed as `nix run .#pkl-check-generated`; canonical lightweight parity test entrypoint that runs the generated-output drift check inside the Nix dev shell. - lightweight post-task verification baseline: Required quick checks after each completed task in this repo: `nix run .#pkl-check-generated` and `nix flake check`. -- important change (context sync): A completed task change that affects cross-cutting behavior, repository-wide policy/contracts, architecture boundaries, or canonical terminology; these changes require root context edits in `context/overview.md`, `context/architecture.md`, and/or `context/glossary.md` rather than verify-only. +- disposable plan lifecycle: Policy where `context/plans/` holds active execution artifacts only; completed plans are disposable and durable outcomes must be reflected in current-state context files and/or `context/decisions/`. +- important change (context sync): A completed task change that affects cross-cutting behavior, repository-wide policy/contracts, architecture boundaries, or canonical terminology; these changes require root context edits in `context/overview.md`, `context/architecture.md`, and/or `context/glossary.md` instead of verify-only handling. +- verify-only root context pass: Context-sync mode for localized tasks where root-level behavior, architecture, and terminology are unchanged; root shared files are checked against code truth but are not edited by default. - generated-owned outputs: Files materialized by `config/pkl/generate.pkl` under `config/.opencode/**` and `config/.claude/**`. - `agnix-config-validate-report`: GitHub Actions workflow at `.github/workflows/agnix-config-validate-report.yml` that runs `nix develop -c agnix validate .` from `config/` on push/PR to `main`. - `agnix validation report artifact`: Failure-investigation artifact named `agnix-validate-report`, uploaded from deterministic path `context/tmp/ci-reports/agnix-validate-report.txt` when non-info (`warning:`/`error:`/`fatal:`) findings are detected. +- `workflow-token-count` CI workflow: GitHub Actions workflow at `.github/workflows/workflow-token-count.yml` that runs `nix run .#token-count-workflows` on push/PR to `main` and uploads token-count outputs from `context/tmp/token-footprint/` as the `workflow-token-footprint` artifact. - `cli-setup-command-surface` flake check: `checks..cli-setup-command-surface` in `flake.nix`; runs `cargo fmt --check` and focused setup command-surface tests from `cli/` during `nix flake check`. - `cli rust overlay toolchain`: Toolchain contract in `cli/flake.nix` that applies `rust-overlay.overlays.default`, selects `rust-bin.stable.latest.default` with `rustfmt`, and builds the CLI Rust platform via `makeRustPlatform`. - `cli flake release package`: Nested flake package output in `cli/flake.nix` exposed as `packages.sce` with `packages.default = packages.sce`, producing the release-build `sce` binary via `nix build ./cli#default`. @@ -34,11 +38,19 @@ - `sce CLI onboarding guide`: Crate-local documentation at `cli/README.md` that defines runnable placeholder commands, non-goals/safety limits, and roadmap mapping to service modules. - `plan/code overlap map`: Context artifact at `context/sce/plan-code-overlap-map.md` that classifies Shared Context Plan/Code, `/change-to-plan`, `/next-task`, `/commit`, and core skills into role-specific vs shared-reusable instruction blocks with explicit dedup targets. - `SCE dedup ownership table`: Context artifact at `context/sce/dedup-ownership-table.md` that assigns one canonical owner per shared behavior domain, lists reference-only consumers, and labels each overlap as `intentional/keep` or `dedup/remove`. +- `workflow token-footprint inventory`: Context artifact at `context/sce/workflow-token-footprint-inventory.md` that inventories Plan (`/change-to-plan`) and Execute (`/next-task`) workflow participants (agents/commands/skills/context artifacts), ownership boundaries, and invocation order for static token-footprint analysis. +- `workflow token hotspot classification` (T02): Ranked hotspot table section in `context/sce/workflow-token-footprint-inventory.md` that maps token-heavy prompt surfaces to source locations, token-cost reasons, safety-critical keep markers, and keep-vs-reduce guidance. +- `static token accounting method` (T03): Deterministic static-count procedure in `context/sce/workflow-token-footprint-inventory.md` defining exact counted prompt surfaces, tokenizer assumptions, repeatable counting steps, required report fields, and an evidence template for per-surface totals and deltas. +- `workflow token-count manifest` (T05): Canonical machine-readable surface manifest at `context/sce/workflow-token-footprint-manifest.json` used by token-count tooling to resolve counted surfaces, scope rules (`entire-file` vs `canonical-body-subsection`), and conditional inclusion flags. +- `token-footprint artifact path contract` (T05): Deterministic report output location under `context/tmp/token-footprint/`, with required latest artifacts `workflow-token-count-latest.json` and `workflow-token-count-latest.md`. - `shared context plan workflow`: Canonical workflow document at `context/sce/shared-context-plan-workflow.md` defining `/change-to-plan` planning flow, clarification gate semantics, readiness contract, and `/next-task` handoff format. - `commit` command (SCE): Canonical generated command slug that runs commit proposal workflow from staged changes; implemented as `commands["commit"]` in `config/pkl/base/shared-content.pkl` and generated to both OpenCode and Claude command trees. - `sce-atomic-commit`: Canonical generated skill slug for atomic commit planning/message authoring; `atomic-commits` is treated as legacy wording and not the canonical generated skill name. - `SCE Plan/Code role separation`: Architecture decision recorded in `context/decisions/2026-03-03-plan-code-agent-separation.md` that keeps Shared Context Plan (`/change-to-plan`) and Shared Context Code (`/next-task`) as separate roles; dedup is handled through shared canonical guidance and skill-owned contracts rather than agent merge. - `shared SCE baseline snippets`: Reusable canonical blocks in `config/pkl/base/shared-content.pkl` (`sharedSceCorePrinciplesSection`, `sharedSceContextAuthoritySection`, `sharedSceQualityPosturePrefixBullets`, `sharedSceLongTermQualityBullet`) composed into both Shared Context Plan and Shared Context Code agent bodies to remove duplicated baseline doctrine text while preserving role-specific sections. - `next-task thin orchestration contract`: `/next-task` command-body pattern where the command keeps sequencing/readiness gates and delegates detailed behavior ownership to `sce-plan-review`, `sce-task-execution`, and `sce-context-sync`. -- `change-to-plan thin orchestration contract`: `/change-to-plan` command-body pattern where the command stays wrapper-level and delegates clarification/ambiguity handling plus plan-shape contracts to `sce-plan-authoring`, while keeping plan creation confirmation and `/next-task` handoff explicit. +- `change-to-plan thin orchestration contract`: `/change-to-plan` command-body pattern where the command stays wrapper-level and delegates clarification/ambiguity handling plus plan-shape contracts (including one-task/one-atomic-commit task slicing) to `sce-plan-authoring`, while keeping plan creation confirmation and `/next-task` handoff explicit. +- `one-task/one-atomic-commit planning contract`: `sce-plan-authoring` requirement that each executable plan task represents one coherent commit unit; broad multi-commit tasks must be split into sequential atomic tasks before execution handoff. - `commit thin orchestration contract`: `/commit` command-body pattern where the command keeps staged-confirmation and proposal-only constraints, while `sce-atomic-commit` owns commit grammar and atomic split guidance. +- `workflow token-count script` (T06): TypeScript implementation at `evals/token-count-workflows.ts` that reads `context/sce/workflow-token-footprint-manifest.json`, applies `entire-file`/`canonical-body-subsection` extraction rules, counts tokens with `o200k_base` fallback `cl100k_base`, and emits deterministic report artifacts. +- `workflow token-count command` (T06): Bun script entry `token-count-workflows` in `evals/package.json`; canonical invocation is from `evals/` via `bun run token-count-workflows` with optional `--run-id`, `--baseline`, `--manifest`, and `--tokenizer` flags. diff --git a/context/overview.md b/context/overview.md index 3da4af43..63127579 100644 --- a/context/overview.md +++ b/context/overview.md @@ -19,7 +19,8 @@ The repository-root flake now keeps nested CLI flake input wiring coherent by pa Shared Context Plan and Shared Context Code remain separate agent roles by design; planning (`/change-to-plan`) and implementation (`/next-task`) stay split while shared baseline guidance is deduplicated via canonical skill-owned contracts. Their shared baseline doctrine (core principles, `context/` authority, and quality posture) is defined once as canonical snippets in `config/pkl/base/shared-content.pkl` and composed into both agent bodies during generation. The `/next-task` command body is intentionally thin orchestration: readiness gating + phase sequencing are command-owned, while detailed implementation/context-sync contracts are skill-owned (`sce-plan-review`, `sce-task-execution`, `sce-context-sync`). -The `/change-to-plan` command body is also intentionally thin orchestration: it delegates clarification and plan-shape contracts to `sce-plan-authoring` while keeping wrapper-level plan output and handoff obligations explicit. +Context sync now uses an important-change gate: cross-cutting/policy/architecture/terminology changes require root shared-file edits, while localized tasks run verify-only root checks without default churn. +The `/change-to-plan` command body is also intentionally thin orchestration: it delegates clarification and plan-shape contracts to `sce-plan-authoring` (including one-task/one-atomic-commit task slicing) while keeping wrapper-level plan output and handoff obligations explicit. The `/commit` command body is intentionally thin orchestration: it retains staged-confirmation and proposal-only constraints while delegating commit grammar and atomic split guidance to `sce-atomic-commit`. ## Repository model @@ -40,6 +41,7 @@ The `/commit` command body is intentionally thin orchestration: it retains stage - Regenerate outputs in place: `nix develop -c pkl eval -m . config/pkl/generate.pkl` - Verify generated outputs are current: `nix run .#pkl-check-generated` - Run staged destructive sync for `config/` and root `.opencode/`: `nix run .#sync-opencode-config` +- Run workflow token counting from repo root: `nix run .#token-count-workflows` - Run repository flake checks (includes CLI setup command-surface checks): `nix flake check` Lightweight post-task verification baseline (required after each completed task): run `nix run .#pkl-check-generated` and `nix flake check`. @@ -48,6 +50,7 @@ Lightweight post-task verification baseline (required after each completed task) - `.github/workflows/pkl-generated-parity.yml` runs parity checks on pushes to `main` and pull requests targeting `main`. - `.github/workflows/agnix-config-validate-report.yml` runs `agnix validate` from `config/`, fails on non-info findings, and uploads a deterministic report artifact when findings are present. +- `.github/workflows/workflow-token-count.yml` runs `nix run .#token-count-workflows` on pushes to `main` and pull requests targeting `main`, then uploads token-footprint artifacts from `context/tmp/token-footprint/`. ## Cross-target parity @@ -59,9 +62,13 @@ Lightweight post-task verification baseline (required after each completed task) - Use `context/architecture.md` for component boundaries and current-state contracts. - Use `context/patterns.md` for implementation and operational conventions. - Use `context/decisions/` for explicit architecture decisions. -- Use `context/plans/` for task history and verification evidence. +- Use `context/plans/` for active plan execution state and task handoff continuity. - Use `context/cli/placeholder-foundation.md` for current command-surface, local Turso adapter behavior, and module-boundary details of the `sce` placeholder crate. - Use `context/sce/shared-context-plan-workflow.md` for the canonical planning-session workflow (`/change-to-plan`) including clarification gating and `/next-task` handoff contract. - Use `context/sce/plan-code-overlap-map.md` for the current overlap/dedup inventory across Shared Context Plan/Code agents, related commands, and core skills. - Use `context/sce/dedup-ownership-table.md` for canonical owner-vs-consumer boundaries and keep-vs-dedup labels used by the dedup implementation plan. +- Use `context/sce/workflow-token-footprint-inventory.md` for the canonical participant inventory of `/change-to-plan` and `/next-task` workflows, T02 ranked token-hotspot classification, and the T03 static token-accounting method/report template used by token-footprint analysis tasks. +- Use `context/sce/workflow-token-footprint-manifest.json` for the canonical machine-readable T05 manifest consumed by workflow token-count tooling (`surface_id`, workflow class, extraction scope rules, and conditional flags). +- Use `context/sce/workflow-token-count-workflow.md` for the root flake app contract (`nix run .#token-count-workflows`) and runtime wiring to the evals token-count script. +- Use `evals/token-count-workflows.ts` (run via `nix run .#token-count-workflows` from repo root, or `bun run token-count-workflows` from `evals/`) for T06 static workflow token counting that emits deterministic reports to `context/tmp/token-footprint/`. - Use `context/sce/atomic-commit-workflow.md` for canonical `/commit` behavior, `sce-atomic-commit` naming, and proposal-only commit planning constraints. diff --git a/context/patterns.md b/context/patterns.md index dd524653..6227b48c 100644 --- a/context/patterns.md +++ b/context/patterns.md @@ -8,7 +8,9 @@ ## Flake app entrypoints - Expose operational workflows as flake apps so commands are stable and system-mapped across supported `flake-utils` default systems. -- Current repo command contract: `nix run .#sync-opencode-config` is the canonical entrypoint for staged regeneration/replacement of `config/` and replacement of repository-root `.opencode/` from regenerated `config/.opencode/`. +- Current repo command contracts: + - `nix run .#sync-opencode-config` is the canonical entrypoint for staged regeneration/replacement of `config/` and replacement of repository-root `.opencode/` from regenerated `config/.opencode/`. + - `nix run .#token-count-workflows` is the canonical root entrypoint for static workflow token counting (wrapping `bun run token-count-workflows` from `evals/` through `nix develop`). - For flake app outputs, include `meta.description` so `nix flake check` app validation stays warning-free. - For destructive config replacement flows, regenerate into a temporary staged `config/` first, validate required generated directories exist, and only then swap live `config/`. - For destructive root `.opencode/` replacement flows, keep exclusions explicit (for example `node_modules`), use backup-and-restore around swap, and run a source/target tree parity check with the same exclusions. @@ -38,7 +40,7 @@ - Keep SCE command bodies thin when phase skills already define detailed contracts. - For `/next-task`, retain only sequencing and confirmation gates in the command body and delegate phase details to `sce-plan-review`, `sce-task-execution`, and `sce-context-sync`. -- For `/change-to-plan`, retain wrapper-level plan output/handoff obligations in the command body and delegate clarification and plan-shape contracts to `sce-plan-authoring`. +- For `/change-to-plan`, retain wrapper-level plan output/handoff obligations in the command body and delegate clarification and plan-shape contracts (including one-task/one-atomic-commit task slicing) to `sce-plan-authoring`. - For `/commit`, retain staging-confirmation and proposal-only gates in the command body and delegate commit grammar plus atomic split guidance to `sce-atomic-commit`. - Preserve mandatory gates (readiness confirmation, implementation stop, final-task validation trigger) while removing duplicated procedural prose from command text. @@ -49,6 +51,7 @@ - Run multi-file generation with `nix develop -c pkl eval -m . config/pkl/generate.pkl` to emit to repository-root mapped paths. - Run stale-output detection through the flake app entrypoint `nix run .#pkl-check-generated`; it wraps `nix develop -c ./config/pkl/check-generated.sh`, regenerates into a temporary directory, and fails if generated-owned paths differ from committed outputs. - Keep CI parity enforcement aligned with local workflow by running the same command in `.github/workflows/pkl-generated-parity.yml` for pushes to `main` and pull requests targeting `main`. +- Keep token-count CI aligned with the flake app contract by running `nix run .#token-count-workflows` in `.github/workflows/workflow-token-count.yml` on pushes/pull requests targeting `main`, and upload artifacts from `context/tmp/token-footprint/`. - Treat `nix run .#pkl-check-generated` and `nix flake check` as the lightweight post-task verification baseline and run both after each completed task. - Keep agnix config validation on the same trigger contract (`push`/`pull_request` to `main`) in `.github/workflows/agnix-config-validate-report.yml` with job defaults pinned to `working-directory: config`. - In the agnix CI workflow, capture command output to `context/tmp/ci-reports/agnix-validate-report.txt`, treat `warning:`/`error:`/`fatal:` findings as non-info gate failures, and upload the captured report as a GitHub artifact (`agnix-validate-report`) only when non-info findings are present. diff --git a/context/plans/sce-workflow-token-count-nix-ci.md b/context/plans/sce-workflow-token-count-nix-ci.md new file mode 100644 index 00000000..52e81a83 --- /dev/null +++ b/context/plans/sce-workflow-token-count-nix-ci.md @@ -0,0 +1,94 @@ +# Plan: sce-workflow-token-count-nix-ci + +## 1) Change summary +Add a first-class Nix app entrypoint for the workflow token-count script and add a CI workflow that runs the token-count command and uploads the generated artifacts. + +## 2) Success criteria +- A root flake app exists to run workflow token counting similarly to `nix run .#sync-opencode-config`. +- The app executes the existing T06 script and writes artifacts under `context/tmp/token-footprint/`. +- A GitHub Actions workflow runs the Nix app on push/PR (target branch policy aligned with repo conventions) and uploads token-count artifacts. +- CI artifact includes the script outputs generated by the run (`workflow-token-count-latest.json` and `workflow-token-count-latest.md`, plus optional run-id output if configured). +- Context docs remain aligned with the new app/workflow discoverability. + +## 3) Constraints and non-goals +- In scope: flake app wiring, CI workflow wiring, artifact upload configuration, and minimal docs/context sync. +- Out of scope: changing token-counting methodology, manifest schema, or prompt content surfaces. +- Out of scope: runtime token telemetry or external dashboard/reporting. + +## 4) Task stack (T01..T03) +- [x] T01: Add root flake app for workflow token counting (status:done) + - Task ID: T01 + - Goal: expose a root flake app (for example `token-count-workflows`) that runs the token-count script through the existing evals runtime. + - Boundaries (in/out of scope): + - In: `flake.nix` app wiring and command contract. + - Out: script behavior changes beyond wiring needs. + - Done when: + - `nix run .#token-count-workflows` executes successfully from repo root. + - App has deterministic command invocation and metadata description. + - Verification notes (commands or checks): + - `nix run .#token-count-workflows` + - Evidence: + - Added `apps.token-count-workflows` in `flake.nix` with deterministic wrapper program `token-count-workflows` and metadata description, wired to execute `bun run token-count-workflows` from `evals/` via `nix develop`. + - Ran `nix run .#token-count-workflows -- --help` (exit 0) to verify deterministic usage output. + - Ran `nix run .#token-count-workflows` from repository root (exit 0); run completed and wrote artifacts to `context/tmp/token-footprint/workflow-token-count-latest.json` and `context/tmp/token-footprint/workflow-token-count-latest.md`. + - Ran `nix flake check --no-build` (exit 0) to confirm app output evaluation; `apps.x86_64-linux.token-count-workflows` validated successfully with only expected incompatible-system warnings. + - Synced context discoverability for the new root app contract: updated `context/overview.md`, `context/architecture.md`, `context/patterns.md`, `context/glossary.md`, and `context/context-map.md`, and added focused domain documentation at `context/sce/workflow-token-count-workflow.md`. + +- [x] T02: Add CI workflow and artifact upload for token-count outputs (status:done) + - Task ID: T02 + - Goal: add a GitHub Actions workflow that runs the token-count Nix app and uploads generated artifacts. + - Boundaries (in/out of scope): + - In: workflow trigger config, execution command, artifact upload path and naming. + - Out: unrelated CI refactors. + - Done when: + - Workflow file exists under `.github/workflows/` and runs `nix run .#token-count-workflows`. + - Artifact upload includes `context/tmp/token-footprint/` outputs from the run. + - Verification notes (commands or checks): + - Local syntax/contract review of workflow YAML. + - Optional dry-run command parity check via local `nix run .#token-count-workflows`. + - Evidence: + - Added `.github/workflows/workflow-token-count.yml` with trigger policy aligned to repo CI conventions (`push`/`pull_request` on `main`), Nix install via `cachix/install-nix-action@v31`, execution command `nix run .#token-count-workflows`, and artifact upload via `actions/upload-artifact@v4`. + - Artifact upload contract is explicit and deterministic: artifact name `workflow-token-footprint`, required paths include `context/tmp/token-footprint/workflow-token-count-latest.json` and `context/tmp/token-footprint/workflow-token-count-latest.md` (plus wildcard run-id outputs when present), with `if-no-files-found: error`. + - Ran `nix run .#token-count-workflows` locally from repository root (exit 0); run wrote `context/tmp/token-footprint/workflow-token-count-latest.json` and `context/tmp/token-footprint/workflow-token-count-latest.md`. + - Synced context discoverability for the CI contract in `context/overview.md`, `context/architecture.md`, `context/patterns.md`, `context/glossary.md`, and `context/sce/workflow-token-count-workflow.md`. + +- [x] T03: Validation and cleanup (status:done) + - Task ID: T03 + - Goal: validate end-to-end wiring and sync context discoverability. + - Boundaries (in/out of scope): + - In: required checks, context sync for new app/workflow discovery. + - Out: additional feature expansion. + - Done when: + - Verification evidence includes exact command outputs and artifact paths. + - Context reflects the new Nix/CI contract and links are discoverable. + - Verification notes (commands or checks): + - `nix run .#pkl-check-generated` + - `nix flake check` + - Evidence: + - Ran `nix run .#token-count-workflows` from repository root (exit 0); run executed `bun ./token-count-workflows.ts` and wrote artifacts to `context/tmp/token-footprint/workflow-token-count-latest.json` and `context/tmp/token-footprint/workflow-token-count-latest.md`. + - Ran `nix run .#pkl-check-generated` from repository root (exit 0); output reported `Generated outputs are up to date.` + - Ran `nix flake check` from repository root (exit 0); validated `apps.x86_64-linux.token-count-workflows` and completed with only expected incompatible-system warnings for non-local systems. + - Completed required context sync verification pass for shared files (`context/overview.md`, `context/architecture.md`, `context/glossary.md`, `context/patterns.md`, `context/context-map.md`) and confirmed existing token-count workflow discoverability links remain accurate with no drift edits required. + +## 5) Open questions +- None. + +## 6) Final validation report +- Commands run: + - `nix run .#token-count-workflows` (exit 0) + - `nix run .#pkl-check-generated` (exit 0) + - `nix flake check` (exit 0) +- Key outputs: + - Token-count app run wrote `context/tmp/token-footprint/workflow-token-count-latest.json` and `context/tmp/token-footprint/workflow-token-count-latest.md`. + - Generated output parity check reported `Generated outputs are up to date.` + - Flake checks validated `apps.x86_64-linux.token-count-workflows`; only expected incompatible-system warnings were emitted for non-local systems. +- Failed checks and follow-ups: + - None. +- Success-criteria verification summary: + - Root flake app entrypoint exists and runs from repo root (`nix run .#token-count-workflows`). + - App executes the T06 script and writes required artifacts under `context/tmp/token-footprint/`. + - CI workflow `.github/workflows/workflow-token-count.yml` runs the app on push/PR to `main` and uploads token-footprint artifacts. + - CI artifact contract includes required latest outputs (`workflow-token-count-latest.json`, `workflow-token-count-latest.md`) with wildcard run-id support. + - Context discoverability remains aligned and linked (`context/overview.md`, `context/architecture.md`, `context/patterns.md`, `context/glossary.md`, `context/context-map.md`, `context/sce/workflow-token-count-workflow.md`). +- Residual risks: + - `nix flake check` reports expected incompatible-system omissions unless run with `--all-systems`; current local validation covers the active system contract. diff --git a/context/plans/sce-workflow-token-footprint-analysis.md b/context/plans/sce-workflow-token-footprint-analysis.md new file mode 100644 index 00000000..ff100715 --- /dev/null +++ b/context/plans/sce-workflow-token-footprint-analysis.md @@ -0,0 +1,168 @@ +# Plan: sce-workflow-token-footprint-analysis + +## 1) Change summary +Analyze the SCE Plan (`/change-to-plan`) and Execute (`/next-task`) workflows end-to-end, inventory every participating agent/command/skill in each workflow, define a static token-footprint analysis method plus token-reduction options, and add implementation work for a deterministic TypeScript token-count script that operationalizes the T03 counting schema without changing workflow gates or prompt text. + +## 2) Success criteria +- A complete, workflow-scoped inventory exists for Plan and Execute that lists all participating agents, commands, skills, and key context artifacts with ownership boundaries. +- A ranked token-reduction proposal set exists with impact/risk notes and explicit keep-vs-reduce guidance for each major prompt surface. +- A static token accounting method is documented and repeatable for this repo, including measurement inputs, counting procedure, and evidence format. +- A TypeScript static token-count script exists (for example `scripts/token-count-workflows.ts`) that reads the canonical T03 surface manifest (markdown-backed or checked-in JSON manifest), scopes text by rule (`entire-file` vs `canonicalBody subsection`), counts tokens with `o200k_base` (fallback `cl100k_base`), and emits deterministic report output. +- Report output includes required per-surface fields and run-level summary fields from T03, plus Plan/Execute totals, combined total, optional baseline deltas, and run metadata (timestamp, git SHA, tokenizer). +- Report artifacts are written to `context/tmp/`, usage is documented (`context/` or `README`), and verification evidence records exact command(s) run and artifact path(s). +- The plan does not require a hard numeric reduction target yet. + +## 3) Constraints and non-goals +- In scope: workflow analysis and planning artifacts under `context/` for Shared Context Plan/Code, `/change-to-plan`, `/next-task`, and linked skills. +- In scope: static token estimation design, documentation, and deterministic local script implementation for T03 schema accounting. +- Out of scope: implementing workflow text edits, command/skill rewrites, or canonical Pkl changes in this planning session. +- Out of scope: runtime/observed token telemetry estimation; this remains a later phase. +- Out of scope: changing mandatory workflow gates, role boundaries, or prompt rewrites. +- Non-goal: setting a mandatory reduction threshold (for example 20%) at this stage. + +## 4) Task stack (T01..T07) +- [x] T01: Build canonical workflow inventory for Plan and Execute (status:done) + - Task ID: T01 + - Goal: Produce a complete matrix of agents, commands, skills, and context docs used by `/change-to-plan` and `/next-task`, including ownership and invocation boundaries. + - Boundaries (in/out of scope): + - In: `context/sce/*.md` workflow docs, canonical command/skill ownership guidance, generated-surface references needed for coverage. + - Out: changing any workflow behavior while inventorying. + - Done when: + - Plan workflow and Execute workflow each have explicit component lists (agents/commands/skills) with role descriptions. + - Cross-workflow shared vs role-specific components are labeled. + - Verification notes (commands or checks): + - Manual cross-check against `context/sce/shared-context-plan-workflow.md`, `context/sce/shared-context-code-workflow.md`, and dedup ownership artifacts. + - Implementation evidence: + - Canonical inventory artifact created at `context/sce/workflow-token-footprint-inventory.md` with workflow-scoped participant matrix, ownership boundaries, and shared-vs-role-specific labels. + +- [x] T02: Map token-heavy prompt surfaces and duplication hotspots (status:done) + - Task ID: T02 + - Goal: Identify where token overhead accumulates across agents/commands/skills and classify each hotspot as intentional guardrail text vs reducible duplication. + - Boundaries (in/out of scope): + - In: structural analysis of repeated instruction blocks, orchestration-vs-skill duplication, and cross-target parity duplication risks. + - Out: editing source prompts during analysis. + - Done when: + - A hotspot table exists with source location, reason for token cost, and keep/reduce recommendation. + - Safety-critical text that must remain verbose is explicitly marked. + - Verification notes (commands or checks): + - Manual consistency review against `context/sce/plan-code-overlap-map.md` and `context/sce/dedup-ownership-table.md`. + - Implementation evidence: + - Added ranked hotspot table and keep-vs-reduce classification (including explicit safety-critical markings) in `context/sce/workflow-token-footprint-inventory.md` under section `T02: Token-heavy prompt surfaces and duplication hotspots`. + +- [x] T03: Define static token accounting method and evidence template (status:done) + - Task ID: T03 + - Goal: Specify a deterministic static-token estimation workflow for SCE prompt artifacts, including counting scope, tokenizer choice assumptions, and report schema. + - Boundaries (in/out of scope): + - In: static estimation method, reproducibility notes, and evidence capture format in context docs/plan artifacts. + - Out: runtime token observation and production telemetry collection. + - Done when: + - The method states exact inputs (which files/sections are counted), counting steps, and output fields (per-surface tokens, totals, and deltas). + - Assumptions and known limitations of static estimates are documented. + - Verification notes (commands or checks): + - Manual dry-run review confirms the method can be repeated by a future session without ambiguity. + - Implementation evidence: + - Added `T03: Static token accounting method and evidence template` to `context/sce/workflow-token-footprint-inventory.md`, including exact counted surface manifest, tokenizer assumptions, deterministic procedure, required report schema fields, evidence template, and known limitations. + +- [x] T04: Propose reduction strategies with trade-offs and rollout order (status:done) + - Task ID: T04 + - Goal: Produce a prioritized set of token-reduction strategies that preserve SCE safety and role separation while reducing unnecessary prompt footprint. + - Boundaries (in/out of scope): + - In: strategy design (for example thin-wrapper tightening, canonical snippet reuse, redundancy pruning, and context-map narrowing rules). + - Out: executing text reductions in canonical files. + - Done when: + - Proposal list is ranked by expected impact and implementation risk. + - Each proposal includes affected artifacts, rationale, and regression risk notes. + - Verification notes (commands or checks): + - Manual review confirms proposals preserve mandatory gates (clarification, readiness, implementation stop, context sync). + - Implementation evidence: + - Added `T04: Token-reduction strategy set, trade-offs, and rollout order` to `context/sce/workflow-token-footprint-inventory.md` with a prioritized proposal matrix (impact/risk/affected artifacts), explicit rollout sequencing, and preserve-as-is guardrail constraints. + +- [x] T05: Define script inputs/outputs and manifest extraction contract (status:done) + - Task ID: T05 + - Goal: Lock the deterministic interface for the workflow token-count script so implementation has unambiguous schema and extraction rules. + - Boundaries (in/out of scope): + - In: selecting manifest source strategy (`context/sce/workflow-token-footprint-inventory.md` parsing and/or checked-in JSON manifest), exact field mapping to T03 schema, deterministic report file naming/path under `context/tmp/`, baseline-delta input behavior. + - Out: implementing prompt/gate text changes, altering workflow ownership boundaries, or introducing runtime telemetry collection. + - Done when: + - Script contract is explicit for all T03 surfaces, including `scope_rule`, `conditional`, and tokenizer fallback behavior. + - Report schema mapping to T03 required fields is fully specified for per-surface rows and run-level totals. + - Artifact path contract under `context/tmp/` is deterministic and implementation-ready. + - Verification notes (commands or checks): + - Manual traceability check from T03 schema table in `context/sce/workflow-token-footprint-inventory.md` to implementation contract fields. + - Implementation evidence: + - Added `T05: Script input/output and manifest extraction contract` section to `context/sce/workflow-token-footprint-inventory.md`, including canonical manifest strategy, extraction rules, tokenizer/baseline behavior, report schema mapping, and deterministic `context/tmp/token-footprint/` artifact contract. + - Added canonical machine-readable manifest `context/sce/workflow-token-footprint-manifest.json` covering all T03 surfaces with explicit `scope_rule` and `conditional` fields. + - Synced discoverability references in `context/context-map.md`, `context/overview.md`, and `context/glossary.md`. + +- [x] T06: Implement TypeScript static token-count workflow script (status:done) + - Task ID: T06 + - Goal: Build and wire a deterministic TypeScript script that computes per-surface and aggregate token counts for every workflow surface in the T03 manifest. + - Boundaries (in/out of scope): + - In: add `scripts/token-count-workflows.ts` (or repo-conventional equivalent), load manifest from canonical T03 inventory (or checked-in JSON), extract counted text (`entire-file` vs `canonicalBody subsection`), count with `o200k_base` fallback `cl100k_base`, emit deterministic report artifacts to `context/tmp/`, and add package script entry/document usage. + - Out: changing workflow prompt content, changing mandatory workflow gates, or rewriting command/skill contracts. + - Done when: + - Script runs locally and emits all T03 required fields (`surface_id`, `workflow`, `artifact_class`, `path`, `scope_rule`, `tokenizer`, `tokens`, nullable baseline/delta fields, `conditional`, and run summary totals/metadata). + - Report includes Plan subtotal, Execute subtotal, combined total, optional baseline deltas, timestamp, git SHA, tokenizer, and run ID. + - At least one deterministic artifact is created under `context/tmp/` and usage instructions are documented in `context/` or `README`. + - Verification notes capture exact run command(s) and produced artifact path(s). + - Verification notes (commands or checks): + - Execute the script through the documented command (`npm`/`bun` script or direct TS runtime invocation) and confirm report schema completeness. + - Validate deterministic output contract by re-running without source changes and confirming stable structure/field set. + - Implementation evidence: + - Added TypeScript implementation at `evals/token-count-workflows.ts` with manifest-driven extraction (`entire-file`, `canonical-body-subsection`), tokenizer resolution (`o200k_base` fallback `cl100k_base`), optional baseline delta handling, and deterministic report emission. + - Added Bun script entry `token-count-workflows` in `evals/package.json` and installed `js-tiktoken` dependency (`evals/bun.lock`). + - Generated required artifacts at `context/tmp/token-footprint/workflow-token-count-latest.json`, `context/tmp/token-footprint/workflow-token-count-latest.md`, and archival JSON `context/tmp/token-footprint/workflow-token-count-t06-initial.json`. + - Verification commands run: + - `bun run token-count-workflows --run-id t06-initial` (from `evals/`) + - `bun run token-count-workflows` (from `evals/`) + - `bunx tsc --noEmit` (from `evals/`) + - `bun run token-count-workflows --baseline context/tmp/token-footprint/workflow-token-count-t06-initial.json --run-id t06-baseline-check` (from `evals/`) + - `nix run .#pkl-check-generated` + - `nix flake check` + +- [x] T07: Validation and cleanup (status:done) + - Task ID: T07 + - Goal: Validate that analysis artifacts plus script implementation outputs are coherent, complete, and ready for downstream reduction work. + - Boundaries (in/out of scope): + - In: completeness checks across T01-T06 outputs, verification evidence hygiene, final plan cleanup, and context-sync requirements for execution sessions. + - Out: further optimization or prompt-surface rewrites beyond approved implementation scope. + - Done when: + - All success criteria map to completed task artifacts, including T06 script evidence and `context/tmp/` report artifacts. + - Final notes include exact verification commands, artifact paths, and residual limitations/assumptions (if any). + - Execution-phase baseline and context-sync expectations are explicit for follow-on tasks. + - Verification notes (commands or checks): + - Manual traceability check from success criteria to T01-T06 outputs. + - Execution-phase validation baseline: `nix run .#pkl-check-generated` and `nix flake check` plus required `sce-context-sync` completion. + - Implementation evidence: + - Completed manual traceability mapping from section `## 2) Success criteria` to task artifacts recorded under T01-T06 implementation-evidence blocks in this plan, including T06 script outputs and command evidence. + - Confirmed required token-footprint artifacts exist at `context/tmp/token-footprint/workflow-token-count-latest.json`, `context/tmp/token-footprint/workflow-token-count-latest.md`, `context/tmp/token-footprint/workflow-token-count-t06-initial.json`, `context/tmp/token-footprint/workflow-token-count-t06-baseline-check.json`, and `context/tmp/token-footprint/workflow-token-count-t06-final.json`. + - Completed required shared-file context verification for `context/overview.md`, `context/architecture.md`, `context/glossary.md`, `context/patterns.md`, and `context/context-map.md`; no root-context edits were needed because T07 introduced no new behavior, architecture boundary, or terminology. + - Verified feature discoverability links are already present and current for token-footprint artifacts (`context/sce/workflow-token-footprint-inventory.md` and `context/sce/workflow-token-footprint-manifest.json`) via `context/context-map.md`, with matching usage references in `context/overview.md` and terms in `context/glossary.md`. + - Re-ran execution-phase validation baseline and captured passing evidence: + - `nix run .#pkl-check-generated` + - `nix flake check` + - Residual limitations/assumptions remain unchanged from T03/T05/T06 contracts: estimates are static (not runtime telemetry), `o200k_base` tokenizer fallback behavior remains in effect, and no hard reduction threshold is imposed by this plan. + +## 5) Open questions +- None. Scope is confirmed to SCE workflow artifacts, static estimation only, and no hard reduction target at this stage. + +## 6) Final validation report (T07) + +- Commands run: + - `nix run .#pkl-check-generated` (exit: 0; key output: `Generated outputs are up to date.`) + - `nix flake check` (exit: 0; key output: check derivation evaluated and ran successfully; system-compatibility warning only) +- Failed checks and follow-ups: + - None. +- Temporary scaffolding cleanup: + - No temporary implementation scaffolding was introduced in T07; no cleanup action required. +- Context final-state verification: + - Verified `context/overview.md`, `context/architecture.md`, `context/glossary.md`, `context/patterns.md`, and `context/context-map.md` remain aligned to current repository behavior. + - Verified feature discoverability coverage for token-footprint artifacts remains present in shared context references. +- Success-criteria verification summary: + - Inventory and hotspot/method/strategy artifacts are present via T01-T05 evidence in this plan and `context/sce/workflow-token-footprint-inventory.md`. + - Static token-count implementation and usage evidence are present via T06 entries (`evals/token-count-workflows.ts`, script command evidence, and token-footprint artifacts under `context/tmp/token-footprint/`). + - Validation-and-cleanup evidence (traceability mapping, baseline checks, context-sync verification, residual assumptions) is recorded under T07 implementation evidence. +- Residual risks: + - Static token estimates are not runtime telemetry. + - Tokenizer fallback behavior (`o200k_base` fallback to `cl100k_base`) remains an implementation assumption. + - No mandatory numeric reduction threshold is enforced by this plan. diff --git a/context/plans/sce-workflow-token-reduction-and-straightforwardness.md b/context/plans/sce-workflow-token-reduction-and-straightforwardness.md new file mode 100644 index 00000000..e368ac17 --- /dev/null +++ b/context/plans/sce-workflow-token-reduction-and-straightforwardness.md @@ -0,0 +1,142 @@ +# Plan: sce-workflow-token-reduction-and-straightforwardness + +## 1) Change summary +Reduce unnecessary workflow prompt/context tokens and make SCE planning/execution flows more straightforward by tightening plan lifecycle policy, limiting context-sync edits to important changes, constraining commit guidance to staged-diff reality, and enforcing one-task/one-atomic-commit plan slicing. + +## 2) Success criteria +- Completed implementation plans are treated as disposable execution artifacts and are not referenced as durable history from core context files. +- `/commit` guidance proposes context-file-only commit messaging only when staged changes are exclusively under `context/`; mixed code+context diffs do not trigger that guidance. +- Task execution no longer defaults to editing `context/overview.md`, `context/patterns.md`, and `context/architecture.md` on every task; updates are required only for important cross-cutting changes. +- Plan tasks are authored so each executable task maps to one atomic commit boundary. +- Shared Context Plan/Code command+skill contracts remain thin, deterministic, and non-duplicative after updates. + +## 3) Constraints and non-goals +- In scope: SCE command/skill contract text, canonical Pkl source updates, generated parity, and context policy docs needed to codify new lifecycle/sync behavior. +- In scope: reducing recurring token-heavy instructions that do not affect safety or correctness. +- Out of scope: changing command names, collapsing Plan/Code into one role, or introducing auto-commit behavior. +- Out of scope: application/runtime feature work outside SCE workflow authoring surfaces. +- Non-goal: removing all repetition; safety-critical reminders may remain when they protect correctness. + +## Assumptions +- Completed plans are deleted from `context/plans/` and are not treated as durable references from root context navigation. +- `/commit` context-only messaging is conditional on staged diff scope (context-only). +- Root context files (`overview.md`, `patterns.md`, `architecture.md`) are updated only for important cross-cutting changes. +- Each implementation task in a plan is scoped to a single atomic commit. + +## 4) Task stack (T01..T06) +- [x] T01: Define disposable plan lifecycle and durable-context boundaries (status:done) + - Task ID: T01 + - Goal: Establish canonical policy for when plans are kept, deleted, and referenced so durable context does not depend on completed plan files. + - Boundaries (in/out of scope): + - In: policy updates in `context/sce/` workflow docs plus any required root-context wording alignment. + - Out: implementing code-task execution behavior changes. + - Done when: + - A single canonical policy states completed plans are disposable and not a durable context source. + - Root context navigation no longer frames `context/plans/` as task-history storage. + - Verification notes (commands or checks): + - Manual trace from `context/context-map.md`, `context/overview.md`, and `context/sce/shared-context-plan-workflow.md` to confirm policy consistency. + - Canonical prompt-surface alignment in `config/pkl/base/shared-content.pkl` and regenerated outputs for Shared Context Plan + `sce-plan-review` across OpenCode/Claude. + - `nix develop -c pkl eval -m . config/pkl/generate.pkl` + - `nix run .#pkl-check-generated` + - `nix flake check` + +- [x] T02: Enforce "important change only" context-sync updates (status:done) + - Task ID: T02 + - Goal: Update plan/execution skill contracts so root context files are edited only when task impact is cross-cutting and important. + - Boundaries (in/out of scope): + - In: `sce-task-execution`, `sce-context-sync`, related workflow/glossary definitions. + - Out: removing required context sync entirely. + - Done when: + - Skill language makes root-context edits conditional instead of default. + - Verification-only tasks can close without forced root-context churn when no important change occurred. + - Verification notes (commands or checks): + - Manual contract review of canonical skill bodies and generated skill outputs for conditional sync wording. + +- [x] T03: Constrain `/commit` context-file guidance to context-only staged diffs (status:done) + - Task ID: T03 + - Goal: Remove noisy context-commit reminders from mixed-change commit proposals while preserving context-only commit support. + - Boundaries (in/out of scope): + - In: `/commit` command wrapper and `sce-atomic-commit` guidance contract. + - Out: automatic git operations or commit creation behavior. + - Done when: + - Commit guidance explicitly gates context-only reminders by staged diff scope. + - Mixed staged diffs no longer include default "commit context files" recommendations. + - Verification notes (commands or checks): + - Scenario-based contract walkthrough for (a) context-only staged diff and (b) mixed code+context staged diff. + +- [x] T04: Enforce one-task/one-atomic-commit planning contract (status:done) + - Task ID: T04 + - Goal: Make plan-authoring contracts require atomic executable task slicing and reject broad multi-commit tasks. + - Boundaries (in/out of scope): + - In: `sce-plan-authoring` task-shape requirements and related `/change-to-plan` wording. + - Out: changing repository git safety policy. + - Done when: + - Plan task format includes explicit atomic-commit boundary expectations. + - New plans default to executable units that can each land as one coherent commit. + - Verification notes (commands or checks): + - Manual review of updated planning instructions and one sample plan skeleton for atomicity compliance. + +- [x] T05: Regenerate outputs and align context map/glossary discoverability (status:done) + - Task ID: T05 + - Goal: Regenerate generated command/skill artifacts and ensure discoverability docs reflect the new low-noise workflow contracts. + - Boundaries (in/out of scope): + - In: deterministic regeneration outputs and focused context docs impacted by T01-T04. + - Out: unrelated context refactors. + - Done when: + - Generated OpenCode/Claude outputs are updated to match canonical source changes. + - `context/context-map.md` and `context/glossary.md` accurately describe the revised plan lifecycle and sync/commit behavior. + - Verification notes (commands or checks): + - `nix develop -c pkl eval -m . config/pkl/generate.pkl` + - Manual read-through of affected generated command/skill files and context docs. + +- [x] T06: Validation and cleanup (status:done) + - Task ID: T06 + - Goal: Run final quality gates, confirm success criteria evidence, and leave plan state implementation-ready/traceable. + - Boundaries (in/out of scope): + - In: required repo validation checks and final context-sync verification. + - Out: follow-on enhancements beyond this request. + - Done when: + - Validation commands pass and outputs are deterministic. + - Success criteria have explicit evidence across updated contracts. + - Plan checklist/status reflects final state clearly. + - Verification notes (commands or checks): + - `nix run .#pkl-check-generated` + - `nix flake check` + - Final manual context-sync review against updated workflow policy. + +## 5) Open questions +- None. + +## 6) Final validation report (T06) + +### Commands run +- `nix run .#pkl-check-generated` + - Exit code: 0 + - Key output: `Generated outputs are up to date.` +- `nix flake check` + - Exit code: 0 + - Key output: evaluated checks/apps/devShells; built `checks.x86_64-linux.cli-setup-command-surface`; no failures. + - Note: warning reports omitted incompatible systems (`aarch64-darwin`, `aarch64-linux`, `x86_64-darwin`) unless `--all-systems` is used. + +### Context-sync verification +- Classified as verify-only root context pass for this final validation task (no new cross-cutting behavior introduced). +- Verified current-state alignment without root edits for: + - `context/overview.md` + - `context/architecture.md` + - `context/glossary.md` + - `context/patterns.md` + - `context/context-map.md` +- Feature discoverability links remain present in `context/context-map.md` and canonical terms remain covered in `context/glossary.md`. + +### Success-criteria evidence summary +- Disposable plan lifecycle is documented as active-artifact-only (not durable history) in root context navigation and glossary. +- `/commit` guidance is scoped by staged diff type (`context-only` vs mixed) in canonical source and generated command/skill outputs. +- Context sync default behavior is important-change-gated with verify-only path for localized tasks in canonical and generated skill contracts. +- One-task/one-atomic-commit planning requirement is present in planning contracts (`/change-to-plan` + `sce-plan-authoring`). +- Thin command orchestration boundaries for `/next-task`, `/change-to-plan`, and `/commit` remain documented in root shared context files. + +### Failed checks and follow-ups +- None. + +### Residual risks +- `nix flake check` currently validates the host-mapped check set; cross-system parity still depends on running with `--all-systems` in an environment that supports those targets. diff --git a/context/sce/atomic-commit-workflow.md b/context/sce/atomic-commit-workflow.md index df56083a..e8db48bd 100644 --- a/context/sce/atomic-commit-workflow.md +++ b/context/sce/atomic-commit-workflow.md @@ -19,6 +19,8 @@ Naming decision: - Empty command arguments are supported; the command infers intent from staged changes. - Before any proposal, the command must prompt for explicit staging confirmation (`git add ` guidance). +- After staging confirmation, commit guidance must classify staged diff scope (`context/`-only vs mixed `context/` + non-`context/`). +- Context-file-focused commit reminders are allowed only for `context/`-only staged diffs; mixed staged diffs must not receive default context-file reminders. - Command text stays thin and gate-focused; commit grammar and atomic split logic are skill-owned in `sce-atomic-commit`. - Output is proposal-only: commit message(s) and split guidance. - The workflow never creates commits automatically. diff --git a/context/sce/shared-context-code-workflow.md b/context/sce/shared-context-code-workflow.md index 7f76ec94..98733464 100644 --- a/context/sce/shared-context-code-workflow.md +++ b/context/sce/shared-context-code-workflow.md @@ -29,17 +29,14 @@ Examples: 2. Apply the plan-review confirmation gate. - Auto-pass only when both plan and task ID are provided and review reports no blockers, ambiguity, or missing acceptance criteria. - Otherwise, resolve open points and require explicit user confirmation. -3. Enforce the mandatory implementation stop before any code edits. - - Explain: task goal, in/out-of-scope boundaries, done checks, expected files/components, and approach/trade-offs/risks. - - Ask: `Continue with implementation now? (yes/no)`. - - Do not edit files, generate code, or apply patches until the user confirms. -4. Run `sce-task-execution` for minimal in-scope implementation. -5. Run light task-level checks/lints and a build when light/fast; capture evidence. -6. Update task status in the plan file. -7. Run `sce-context-sync` as a mandatory done gate. -8. Wait for feedback; if in-scope fixes are needed, apply fixes, rerun light checks/build-if-fast, and sync context again. -9. If this is the final plan task, run `sce-validation`. -10. If more tasks remain, prompt the next-session command for the next task. +3. Run `sce-task-execution`. + - Mandatory implementation stop is enforced by the skill before edits. + - Scoped implementation, light checks/build-if-fast, and plan status updates are skill-owned. +4. Run `sce-context-sync` as a mandatory done gate. + - Context significance classification and root verify-vs-edit behavior are skill-owned. +5. Wait for feedback; if in-scope fixes are needed, apply fixes, rerun light checks/build-if-fast, and sync context again. +6. If this is the final plan task, run `sce-validation`. +7. If more tasks remain, prompt the next-session command for the next task. ## Mermaid diagram diff --git a/context/sce/shared-context-plan-workflow.md b/context/sce/shared-context-plan-workflow.md index e5f90c54..c53753ea 100644 --- a/context/sce/shared-context-plan-workflow.md +++ b/context/sce/shared-context-plan-workflow.md @@ -29,6 +29,8 @@ Examples: - Capture goal, constraints, non-goals, and success criteria in current-state language. 3. Break work into atomic tasks. - Define each task with goal, boundaries, done checks, and verification notes. + - Enforce one-task/one-atomic-commit slicing: each executable task should land as one coherent commit. + - If a task would require multiple independent commits, split it into sequential tasks before approval. 4. Run clarification gate before plan approval. - If blockers, ambiguity, or missing acceptance criteria exist, stop and ask focused questions. - Do not mark a task ready for implementation until unresolved points are closed. @@ -38,6 +40,13 @@ Examples: - Store continuation state in the plan markdown checkboxes/status only. - Do not mutate code/runtime files during plan-authoring work. +## Plan lifecycle policy + +- Plans in `context/plans/` are execution artifacts for active implementation work. +- Completed plans are disposable and are not a durable context source. +- Do not use completed plan files as long-term history references from core context navigation. +- Promote durable outcomes into current-state context files (`context/overview.md`, `context/architecture.md`, `context/glossary.md`, and focused workflow docs) or decision records under `context/decisions/`. + ## Output contract - Plan target resolved (`plan_name` and path). diff --git a/context/sce/workflow-token-count-workflow.md b/context/sce/workflow-token-count-workflow.md new file mode 100644 index 00000000..9685e5aa --- /dev/null +++ b/context/sce/workflow-token-count-workflow.md @@ -0,0 +1,49 @@ +# Workflow Token Count Flake App + +## Purpose + +Define the repository-root execution contract for static workflow token counting. + +## Command contract + +- Canonical entrypoint: `nix run .#token-count-workflows` +- Help entrypoint: `nix run .#token-count-workflows -- --help` +- Runtime wrapper behavior: + - Resolve repository root via `git rev-parse --show-toplevel` with `pwd` fallback. + - Require `evals/` to exist under repository root. + - Execute `bun run token-count-workflows` from `evals/` inside `nix develop`. + +## Implementation anchors + +- Flake app definition: `flake.nix` (`apps.token-count-workflows`) +- App program implementation: `flake.nix` (`tokenCountWorkflowsApp`) +- Script implementation: `evals/token-count-workflows.ts` +- Evals script command alias: `evals/package.json` (`scripts.token-count-workflows`) + +## Output contract + +- Output directory: `context/tmp/token-footprint/` +- Deterministic latest outputs written each run: + - `workflow-token-count-latest.json` + - `workflow-token-count-latest.md` +- Optional archival JSON output when `--run-id` is supplied to the script. + +## CI contract + +- Workflow: `.github/workflows/workflow-token-count.yml` +- Trigger policy: `push` and `pull_request` events targeting `main` +- CI execution command: `nix run .#token-count-workflows` +- Uploaded artifact name: `workflow-token-footprint` +- Uploaded paths: + - `context/tmp/token-footprint/workflow-token-count-latest.json` + - `context/tmp/token-footprint/workflow-token-count-latest.md` + - `context/tmp/token-footprint/workflow-token-count-*.json` + - `context/tmp/token-footprint/workflow-token-count-*.md` + +## Related context + +- `context/sce/workflow-token-footprint-inventory.md` +- `context/sce/workflow-token-footprint-manifest.json` +- `context/overview.md` +- `context/patterns.md` +- `.github/workflows/workflow-token-count.yml` diff --git a/context/sce/workflow-token-footprint-inventory.md b/context/sce/workflow-token-footprint-inventory.md new file mode 100644 index 00000000..1082b170 --- /dev/null +++ b/context/sce/workflow-token-footprint-inventory.md @@ -0,0 +1,318 @@ +# SCE Workflow Inventory for Token-Footprint Analysis (T01) + +## Purpose + +Provide a canonical inventory of participants in the SCE Plan (`/change-to-plan`) and Execute (`/next-task`) workflows, including ownership and invocation boundaries used for later token-footprint analysis. + +## Workflow inventory matrix + +| Workflow | Component type | Component | Role in workflow | Ownership boundary | Shared vs role-specific | +| --- | --- | --- | --- | --- | --- | +| Plan (`/change-to-plan`) | Agent | `Shared Context Plan` | Planning-only role that converts scoped requests into atomic plan tasks and emits `/next-task` handoff. | Canonical role contract is owned in `config/pkl/base/shared-content.pkl` (`agents["shared-context-plan"].canonicalBody`); generated consumers in `.opencode` and `.claude`. | role-specific | +| Plan (`/change-to-plan`) | Command | `/change-to-plan` | Thin orchestration entrypoint for plan creation/update. | Command wrapper owns only orchestration text; clarification and plan-shape contracts are skill-owned. | shared orchestration pattern | +| Plan (`/change-to-plan`) | Skill | `sce-plan-authoring` | Owns clarification gate, plan-shape contract, and readiness output semantics for planning sessions. | Skill is canonical owner of detailed plan-session behavior used by command wrapper. | role-specific | +| Plan (`/change-to-plan`) | Context artifact | `context/plans/{plan_name}.md` | Durable plan state (task IDs, status, boundaries, done checks, verification notes). | Plan markdown is the continuity source for planning/execution handoff. | shared artifact | +| Plan (`/change-to-plan`) | Context artifact | `context/sce/shared-context-plan-workflow.md` | Canonical workflow definition for Plan role and handoff contract to `/next-task`. | Context-owned reference doc; reflects current-state behavior. | role-specific | +| Execute (`/next-task`) | Agent | `Shared Context Code` | Execution role that runs one approved task, validates behavior, and enforces context sync. | Canonical role contract is owned in `config/pkl/base/shared-content.pkl` (`agents["shared-context-code"].canonicalBody`); generated consumers in `.opencode` and `.claude`. | role-specific | +| Execute (`/next-task`) | Command | `/next-task` | Thin orchestration entrypoint sequencing review, execution, and context sync phases. | Command wrapper owns sequencing/gates; detailed phase behavior is skill-owned. | shared orchestration pattern | +| Execute (`/next-task`) | Skill | `sce-plan-review` | Resolves plan target + task, checks readiness, and enforces clarification before execution when needed. | Skill is canonical owner of review/readiness phase contract. | shared phase in execute workflow | +| Execute (`/next-task`) | Skill | `sce-task-execution` | Enforces implementation stop, scoped edits, task-level verification, and plan status update. | Skill is canonical owner of implementation-phase contract. | shared phase in execute workflow | +| Execute (`/next-task`) | Skill | `sce-context-sync` | Required done gate that synchronizes `context/` to code truth after task implementation. | Skill is canonical owner of sync-phase contract. | shared phase in execute workflow | +| Execute (`/next-task`) | Skill (conditional) | `sce-validation` | Final plan-task-only validation/cleanup phase. | Skill invoked only when current task is final task in the plan. | shared conditional phase | +| Execute (`/next-task`) | Context artifact | `context/sce/shared-context-code-workflow.md` | Canonical workflow definition for Execute role including mandatory gates. | Context-owned reference doc; reflects current-state behavior. | role-specific | + +## Cross-workflow shared components + +| Component domain | Canonical owner | Consumers | +| --- | --- | --- | +| Shared SCE baseline doctrine (core principles, `context/` authority, quality posture) | Shared snippet constants in `config/pkl/base/shared-content.pkl` | Shared Context Plan agent, Shared Context Code agent, generated target-specific agent files | +| Thin-command orchestration model | `commands["change-to-plan"].canonicalBody`, `commands["next-task"].canonicalBody` in `config/pkl/base/shared-content.pkl` | Generated command files for OpenCode/Claude | +| Skill-owned detailed contracts | `skills["sce-plan-authoring"]`, `skills["sce-plan-review"]`, `skills["sce-task-execution"]`, `skills["sce-context-sync"]`, `skills["sce-validation"]` in `config/pkl/base/shared-content.pkl` | Plan/Execute command wrappers and role agents | + +## Invocation boundaries + +### Plan workflow + +1. `/change-to-plan` invokes `Shared Context Plan`. +2. Plan agent loads `sce-plan-authoring` to perform clarification + plan shaping. +3. Plan output emits an execution handoff command: `/next-task {plan_name} {T0X}`. + +### Execute workflow + +1. `/next-task` invokes `Shared Context Code`. +2. Execute flow runs `sce-plan-review` first. +3. After readiness and explicit implementation-stop confirmation, execute flow runs `sce-task-execution`. +4. Done gate always runs `sce-context-sync`; `sce-validation` runs only on final plan task. + +## Sources used + +- `context/sce/shared-context-plan-workflow.md` +- `context/sce/shared-context-code-workflow.md` +- `context/sce/plan-code-overlap-map.md` +- `context/sce/dedup-ownership-table.md` +- `.opencode/command/change-to-plan.md` +- `.opencode/command/next-task.md` +- `.opencode/agent/Shared Context Plan.md` +- `.opencode/agent/Shared Context Code.md` + +## T02: Token-heavy prompt surfaces and duplication hotspots + +| Rank | Prompt surface | Source location(s) | Why token cost accumulates | Safety-critical verbosity | Keep vs reduce recommendation | +| --- | --- | --- | --- | --- | --- | +| 1 | Execute workflow orchestration + phase-contract restatement | `.opencode/command/next-task.md`; `.opencode/skills/sce-plan-review/SKILL.md`; `.opencode/skills/sce-task-execution/SKILL.md`; `.opencode/skills/sce-context-sync/SKILL.md`; `.opencode/agent/Shared Context Code.md` | Same execution gates are repeated across command wrapper, role agent, and three phase skills to preserve tool-agnostic reliability. | yes (readiness gate, mandatory implementation stop, required context-sync gate) | Keep gate semantics and stop conditions verbatim; reduce surrounding explanatory duplication by keeping command wrapper thin and linking to skill-owned contracts. | +| 2 | Shared baseline doctrine duplicated across Plan and Code roles | `config/pkl/base/shared-content.pkl` shared snippets rendered into Plan/Code generated agents for OpenCode + Claude | Cross-role and cross-target projection repeats core principles and `context/` authority text in multiple generated surfaces. | yes (human-decision authority, code-truth precedence, context durability) | Keep canonical shared snippets as single owner; reduce drift risk by avoiding role-local rewrites and preserving snippet reuse only. | +| 3 | Plan workflow clarification/readiness language appearing in both command and skill layers | `.opencode/command/change-to-plan.md`; `.opencode/skills/sce-plan-authoring/SKILL.md`; `context/sce/shared-context-plan-workflow.md` | Clarification and readiness semantics are intentionally restated in wrapper and skill docs for discoverability and enforcement. | yes (clarification gate and explicit readiness contract) | Keep clarification gate requirements; reduce command prose to invocation contract and defer full decision logic to skill-owned sections. | +| 4 | `/commit` workflow guidance split across command and atomic-commit skill docs | `.opencode/command/commit.md`; `.opencode/skills/sce-atomic-commit/SKILL.md`; related glossary/overview entries | Message grammar and atomic split policy can be repeated in wrapper text plus skill-level details. | medium (staged-only and no-auto-commit guardrails are safety-relevant) | Keep wrapper-level staged confirmation and proposal-only constraints; reduce by treating skill as sole owner of detailed commit grammar and split heuristics. | +| 5 | Context navigation/redundant cross-link lists across overview/workflow/glossary files | `context/overview.md`; `context/context-map.md`; `context/glossary.md`; `context/sce/*.md` | Repeated "where to look" sections aid discoverability but add persistent token overhead in background context loading. | no (mostly discoverability, not execution safety) | Reduce long repeated lists by maintaining one canonical map entry per artifact and using shorter pointers elsewhere. | +| 6 | Generated cross-target parity text duplication (OpenCode and Claude outputs) | `config/.opencode/**`; `config/.claude/**` from same canonical Pkl content | Same canonical instructions are emitted twice by design for target parity; static footprint appears doubled even when authored once. | medium (parity guarantees and capability differences must remain explicit) | Keep parity model; reduce analysis noise by counting canonical owner text once and reporting generated parity copies as derived overhead. | + +### Hotspot classification notes + +- Intentional guardrail hotspots (keep verbose): readiness/clarification gates, implementation-stop contract, context-sync required done gate, and authority/safety doctrine. +- Reducible duplication hotspots: wrapper-level explanatory prose beyond gating contract, repeated long discoverability lists, and command-level detail that should remain skill-owned. +- Parity-only duplication should be treated as expected derived overhead, not primary authoring duplication, when prioritizing reduction work. + +## T03: Static token accounting method and evidence template + +### Counting scope (exact inputs) + +Use these workflow prompt surfaces as the canonical static-count manifest for this plan: + +| Surface ID | Workflow | Artifact class | File path | Count scope | +| --- | --- | --- | --- | --- | +| `plan-agent-canonical` | Plan | Agent (canonical owner) | `config/pkl/base/shared-content.pkl` | Count only the rendered source block for `agents["shared-context-plan"].canonicalBody`. | +| `code-agent-canonical` | Execute | Agent (canonical owner) | `config/pkl/base/shared-content.pkl` | Count only the rendered source block for `agents["shared-context-code"].canonicalBody`. | +| `change-to-plan-command` | Plan | Command wrapper | `.opencode/command/change-to-plan.md` | Count entire file. | +| `next-task-command` | Execute | Command wrapper | `.opencode/command/next-task.md` | Count entire file. | +| `plan-authoring-skill` | Plan | Skill contract | `.opencode/skills/sce-plan-authoring/SKILL.md` | Count entire file. | +| `plan-review-skill` | Execute | Skill contract | `.opencode/skills/sce-plan-review/SKILL.md` | Count entire file. | +| `task-execution-skill` | Execute | Skill contract | `.opencode/skills/sce-task-execution/SKILL.md` | Count entire file. | +| `context-sync-skill` | Execute | Skill contract | `.opencode/skills/sce-context-sync/SKILL.md` | Count entire file. | +| `validation-skill` | Execute | Skill contract (conditional) | `.opencode/skills/sce-validation/SKILL.md` | Count entire file; include but tag as conditional. | +| `shared-plan-workflow-doc` | Plan | Context artifact | `context/sce/shared-context-plan-workflow.md` | Count entire file. | +| `shared-code-workflow-doc` | Execute | Context artifact | `context/sce/shared-context-code-workflow.md` | Count entire file. | + +Optional derived-overhead pass (reported separately, not merged into canonical authoring total): +- matching generated Claude surfaces under `config/.claude/**` for the same command/agent/skill slugs; +- matching generated OpenCode surfaces under `config/.opencode/**` when they are not already the counted canonical execution surfaces. + +### Tokenizer assumptions + +- Primary tokenizer assumption: `o200k_base` (closest available static approximation for current OpenAI-family models). +- Fallback tokenizer assumption when `o200k_base` is unavailable: `cl100k_base`. +- Every report must include the tokenizer name used; cross-tokenizer totals are not directly comparable. + +### Deterministic counting procedure + +1. Capture run metadata: date/time (UTC), current git commit SHA, plan name, task ID, operator. +2. Materialize the exact surface manifest from the table above (same surface IDs and file paths). +3. Read files as UTF-8 text and normalize newlines to `\n` before counting. +4. For each surface, apply scope rule (`entire file` or `canonicalBody subsection`) and produce the exact counted text payload. +5. Count tokens for each payload with one tokenizer for the whole run; do not mix tokenizers within a run. +6. Record per-surface token counts, then compute workflow subtotals (`Plan`, `Execute`) and combined total. +7. If a previous baseline exists, compute deltas per surface and for each subtotal/total. +8. Store evidence in a dated artifact under `context/tmp/` and summarize key totals in the active plan task evidence notes. + +### Report schema (required fields) + +Per-surface row fields: +- `surface_id` +- `workflow` (`plan` or `execute`) +- `artifact_class` (`agent`, `command`, `skill`, `context_artifact`) +- `path` +- `scope_rule` +- `tokenizer` +- `tokens` +- `baseline_tokens` (nullable) +- `delta_tokens` (nullable) +- `conditional` (`true` for validation-skill, else `false`) + +Run-level summary fields: +- `run_id` +- `timestamp_utc` +- `git_sha` +- `plan_name` +- `task_id` +- `tokenizer` +- `plan_total_tokens` +- `execute_total_tokens` +- `combined_total_tokens` +- `combined_delta_tokens` (nullable) +- `notes` + +### Evidence template (copy/paste) + +```markdown +# Static token accounting run: + +- timestamp_utc: +- git_sha: +- plan_name: sce-workflow-token-footprint-analysis +- task_id: T03 +- tokenizer: + +| surface_id | workflow | artifact_class | path | scope_rule | tokens | baseline_tokens | delta_tokens | conditional | +| --- | --- | --- | --- | --- | ---: | ---: | ---: | --- | +| ... | ... | ... | ... | ... | ... | ... | ... | ... | + +## Totals + +- plan_total_tokens: +- execute_total_tokens: +- combined_total_tokens: +- combined_delta_tokens: +- notes: +``` + +### Known limitations + +- Static counts do not include runtime/system-level hidden prompt frames, tool IO payload sizes, or conversation-history growth. +- Subsection extraction for canonical Pkl agent bodies depends on stable key names; renamed keys require manifest update. +- Derived parity copies can make footprint appear inflated; keep canonical-owner totals and derived-overhead totals separate. +- Token totals are tokenizer-dependent estimates, not billing-accurate usage measurements. + +## T04: Token-reduction strategy set, trade-offs, and rollout order + +The strategy set below is ordered to reduce prompt footprint without weakening mandatory gates (`clarification`, `readiness`, `implementation stop`, `context sync`) or collapsing Plan/Code role boundaries. + +| Priority | Strategy | Expected impact | Implementation risk | Affected artifacts | Rationale | Guardrail risk + mitigation | +| --- | --- | --- | --- | --- | --- | --- | +| P1 | Tighten thin-wrapper command prose to gate-only contracts | high | low | `config/pkl/base/shared-content.pkl` (`commands["change-to-plan"].canonicalBody`, `commands["next-task"].canonicalBody`, `commands["commit"].canonicalBody`); generated command outputs under `config/.opencode/**` and `config/.claude/**` | Wrapper text currently repeats detailed phase logic that is already skill-owned. Retaining only sequencing + mandatory confirmations removes repeat tokens at every invocation surface. | Risk: wrappers become too terse and hide requirements. Mitigation: keep explicit gate bullets and direct references to canonical skill owners in each wrapper. | +| P2 | Enforce single-owner detailed behavior (skill owns, command references) | high | medium | Skill bodies in `config/pkl/base/shared-content.pkl` for `sce-plan-authoring`, `sce-plan-review`, `sce-task-execution`, `sce-context-sync`, `sce-validation`; corresponding generated skills | Removes duplicated procedural detail split across command, agent, and skill by making skill text the only detailed contract source. | Risk: accidental behavior drift if wrappers and skills diverge semantically. Mitigation: add a parity checklist in plan tasks requiring gate-name exact-match verification after edits. | +| P3 | Convert repeated long workflow prose into canonical snippet constants | medium-high | medium | `config/pkl/base/shared-content.pkl` shared constants and interpolation sites in agent/command/skill canonical bodies | Centralized snippets cut repeated baseline doctrine and standard gate phrasing while preserving wording consistency across targets. | Risk: over-shared snippets can force awkward context-specific text. Mitigation: keep snippets scoped to stable doctrine/gates only; leave role-specific intent local. | +| P4 | Narrow root-context navigation repetition to one canonical index + short pointers | medium | low | `context/context-map.md` (canonical list), plus concise pointer edits in `context/overview.md`, `context/glossary.md`, and workflow docs under `context/sce/*.md` | Repeated "where to look" blocks inflate persistent context payload with low safety value; one canonical map plus short pointers keeps discoverability with fewer tokens. | Risk: reduced discoverability if pointers are too sparse. Mitigation: require every workflow doc to keep one explicit pointer to `context/context-map.md` and its nearest domain file. | +| P5 | Distinguish canonical-owner totals from generated parity copies in reporting defaults | medium | low | `context/sce/workflow-token-footprint-inventory.md` method/reporting sections; future token-report artifacts in `context/tmp/` | Prevents optimization work from targeting unavoidable cross-target duplication by default and keeps reduction efforts focused on editable canonical text. | Risk: teams ignore derived overhead entirely. Mitigation: keep optional derived-overhead pass mandatory for visibility, but separate from canonical reduction KPI. | +| P6 | Add guardrail-preservation acceptance checks to every reduction task | medium | low-medium | Future plan tasks in `context/plans/*.md`; relevant workflow context docs in `context/sce/*.md` | Makes "safe reduction" operational by requiring explicit checks that mandatory gates still exist and role boundaries remain unchanged after edits. | Risk: checklist quality varies by operator. Mitigation: standardize a minimal acceptance template (gate-presence + ownership-boundary verification) reused across tasks. | + +### Rollout order + +1. Apply P1 first to remove highest-volume wrapper duplication with minimal behavior risk. +2. Apply P2 next so detailed contracts are clearly skill-owned before broader refactors. +3. Apply P3 after ownership boundaries are stable to safely deduplicate shared wording. +4. Apply P4 to reduce background context-load overhead while preserving discoverability. +5. Apply P5 and P6 in parallel as measurement/governance controls for all reduction phases. + +### Preserve-as-is constraints during rollout + +- Keep explicit confirmation gates in workflow entrypoints (`readiness`, `implementation stop`) even when surrounding prose is reduced. +- Keep `sce-context-sync` as a required done gate and keep final-task `sce-validation` trigger conditions explicit. +- Keep Plan vs Code role separation and command split (`/change-to-plan`, `/next-task`) unchanged. +- Keep human decision authority and code-truth precedence language explicit where currently required. + +## T05: Script input/output and manifest extraction contract + +This section locks the deterministic contract for the T06 TypeScript implementation. + +### Canonical manifest source (selected strategy) + +- Canonical machine-readable source: `context/sce/workflow-token-footprint-manifest.json`. +- Human-readable mirror: the T03 counting-scope table in this document. +- Contract precedence: if markdown and JSON differ, the JSON manifest is implementation truth and this document must be synced. +- Rationale: checked-in JSON avoids brittle markdown parsing and keeps extraction rules explicit and testable. + +### Manifest schema (required fields) + +Top-level object fields: +- `manifest_version`: string, currently `"1"`. +- `plan_name`: string, currently `"sce-workflow-token-footprint-analysis"`. +- `task_id`: string, currently `"T05"` for this contract definition. +- `surfaces`: array of surface entries (non-empty). + +Per-surface entry fields: +- `surface_id`: stable slug (matches T03 report rows). +- `workflow`: `"plan"` or `"execute"`. +- `artifact_class`: `"agent" | "command" | "skill" | "context_artifact"`. +- `path`: repo-relative path. +- `scope_rule`: object with `type` and rule-specific fields: + - `{"type":"entire-file"}` + - `{"type":"canonical-body-subsection","owner_path":"agents[\"shared-context-plan\"].canonicalBody"}` (example) +- `conditional`: boolean (`true` only for conditional surfaces such as `validation-skill`). + +### Extraction rules + +General extraction rules: +1. Read target file as UTF-8. +2. Normalize line endings to `\n`. +3. Apply `scope_rule` exactly. + +`entire-file` extraction: +- Count normalized full file text with no additional trimming. + +`canonical-body-subsection` extraction (for `config/pkl/base/shared-content.pkl`): +- Locate the exact owner path key declared in `scope_rule.owner_path`. +- Extract only the assigned `canonicalBody` string payload. +- Preserve interior text exactly after newline normalization. +- If key lookup is ambiguous or missing, fail the run with a deterministic error that includes `surface_id` and `owner_path`. + +### Tokenizer contract + +- Preferred tokenizer: `o200k_base`. +- Fallback tokenizer: `cl100k_base`. +- Single-tokenizer-per-run rule: all surfaces in one run must use the same tokenizer. +- Report both `requested_tokenizer` and `resolved_tokenizer`; when fallback occurs, set a run note explaining why. + +### Baseline/delta input contract + +- Optional input: `--baseline `. +- If omitted: `baseline_tokens` and `delta_tokens` are `null` for all surfaces; summary delta fields are `null`. +- If provided: + - baseline report must include per-surface rows keyed by `surface_id`. + - tokenizer must match `resolved_tokenizer`, else fail with deterministic mismatch error. + - surfaces absent in baseline produce `baseline_tokens = null` and `delta_tokens = null`. + +### Report output contract + +Output directory and deterministic naming: +- Directory: `context/tmp/token-footprint/` (create if missing). +- Deterministic latest artifacts (always overwritten): + - `context/tmp/token-footprint/workflow-token-count-latest.json` + - `context/tmp/token-footprint/workflow-token-count-latest.md` +- Optional archival artifact (when `--run-id` provided): + - `context/tmp/token-footprint/workflow-token-count-.json` + +Required per-surface row fields: +- `surface_id`, `workflow`, `artifact_class`, `path`, `scope_rule`, `tokenizer`, `tokens`, `baseline_tokens`, `delta_tokens`, `conditional`. + +Required run-level fields: +- `run_id`, `timestamp_utc`, `git_sha`, `plan_name`, `task_id`, `tokenizer`, `plan_total_tokens`, `execute_total_tokens`, `combined_total_tokens`, `combined_delta_tokens`, `notes`. + +Additional required run-level fields for deterministic diagnostics: +- `requested_tokenizer` +- `resolved_tokenizer` +- `manifest_path` +- `baseline_path` (nullable) + +### Field mapping to T03 schema + +| T03 field | Manifest source | Report source | +| --- | --- | --- | +| `surface_id` | `surfaces[].surface_id` | per-surface row | +| `workflow` | `surfaces[].workflow` | per-surface row | +| `artifact_class` | `surfaces[].artifact_class` | per-surface row | +| `path` | `surfaces[].path` | per-surface row | +| `scope_rule` | `surfaces[].scope_rule` | per-surface row | +| `conditional` | `surfaces[].conditional` | per-surface row | +| `tokenizer` | run tokenizer contract | per-surface row + run summary | +| `tokens` | computed | per-surface row | +| `baseline_tokens` | baseline lookup by `surface_id` | per-surface row | +| `delta_tokens` | `tokens - baseline_tokens` when baseline exists | per-surface row | +| `plan_total_tokens` | computed where `workflow=plan` | run summary | +| `execute_total_tokens` | computed where `workflow=execute` | run summary | +| `combined_total_tokens` | computed | run summary | +| `combined_delta_tokens` | computed when baseline totals exist | run summary | + +### Discoverability links + +- Canonical workflow context: `context/sce/shared-context-plan-workflow.md`, `context/sce/shared-context-code-workflow.md`. +- Plan execution state: `context/plans/sce-workflow-token-footprint-analysis.md`. +- Temporary artifacts location contract: `context/tmp/token-footprint/`. + +## T06: Implemented static token-count script (current state) + +- Implementation path: `evals/token-count-workflows.ts`. +- Canonical input source: `context/sce/workflow-token-footprint-manifest.json`. +- Supported scope rules: `entire-file` and `canonical-body-subsection` (`agents["..."].canonicalBody` owner path contract in `config/pkl/base/shared-content.pkl`). +- Tokenizer behavior: requests `o200k_base`, falls back to `cl100k_base` only when required, and records both requested/resolved tokenizer fields plus fallback notes. +- Baseline behavior: optional `--baseline `; tokenizer mismatch fails deterministically; missing baseline surface rows remain nullable (`baseline_tokens`, `delta_tokens`). +- Runtime command: from `evals/`, run `bun run token-count-workflows` (optional `--run-id`, `--baseline`, `--manifest`, `--tokenizer`). +- Output contract: always writes `workflow-token-count-latest.json` and `workflow-token-count-latest.md` to `context/tmp/token-footprint/`; writes `workflow-token-count-.json` when `--run-id` is provided. diff --git a/context/sce/workflow-token-footprint-manifest.json b/context/sce/workflow-token-footprint-manifest.json new file mode 100644 index 00000000..22b155fd --- /dev/null +++ b/context/sce/workflow-token-footprint-manifest.json @@ -0,0 +1,119 @@ +{ + "manifest_version": "1", + "plan_name": "sce-workflow-token-footprint-analysis", + "task_id": "T05", + "surfaces": [ + { + "surface_id": "plan-agent-canonical", + "workflow": "plan", + "artifact_class": "agent", + "path": "config/pkl/base/shared-content.pkl", + "scope_rule": { + "type": "canonical-body-subsection", + "owner_path": "agents[\"shared-context-plan\"].canonicalBody" + }, + "conditional": false + }, + { + "surface_id": "code-agent-canonical", + "workflow": "execute", + "artifact_class": "agent", + "path": "config/pkl/base/shared-content.pkl", + "scope_rule": { + "type": "canonical-body-subsection", + "owner_path": "agents[\"shared-context-code\"].canonicalBody" + }, + "conditional": false + }, + { + "surface_id": "change-to-plan-command", + "workflow": "plan", + "artifact_class": "command", + "path": ".opencode/command/change-to-plan.md", + "scope_rule": { + "type": "entire-file" + }, + "conditional": false + }, + { + "surface_id": "next-task-command", + "workflow": "execute", + "artifact_class": "command", + "path": ".opencode/command/next-task.md", + "scope_rule": { + "type": "entire-file" + }, + "conditional": false + }, + { + "surface_id": "plan-authoring-skill", + "workflow": "plan", + "artifact_class": "skill", + "path": ".opencode/skills/sce-plan-authoring/SKILL.md", + "scope_rule": { + "type": "entire-file" + }, + "conditional": false + }, + { + "surface_id": "plan-review-skill", + "workflow": "execute", + "artifact_class": "skill", + "path": ".opencode/skills/sce-plan-review/SKILL.md", + "scope_rule": { + "type": "entire-file" + }, + "conditional": false + }, + { + "surface_id": "task-execution-skill", + "workflow": "execute", + "artifact_class": "skill", + "path": ".opencode/skills/sce-task-execution/SKILL.md", + "scope_rule": { + "type": "entire-file" + }, + "conditional": false + }, + { + "surface_id": "context-sync-skill", + "workflow": "execute", + "artifact_class": "skill", + "path": ".opencode/skills/sce-context-sync/SKILL.md", + "scope_rule": { + "type": "entire-file" + }, + "conditional": false + }, + { + "surface_id": "validation-skill", + "workflow": "execute", + "artifact_class": "skill", + "path": ".opencode/skills/sce-validation/SKILL.md", + "scope_rule": { + "type": "entire-file" + }, + "conditional": true + }, + { + "surface_id": "shared-plan-workflow-doc", + "workflow": "plan", + "artifact_class": "context_artifact", + "path": "context/sce/shared-context-plan-workflow.md", + "scope_rule": { + "type": "entire-file" + }, + "conditional": false + }, + { + "surface_id": "shared-code-workflow-doc", + "workflow": "execute", + "artifact_class": "context_artifact", + "path": "context/sce/shared-context-code-workflow.md", + "scope_rule": { + "type": "entire-file" + }, + "conditional": false + } + ] +} diff --git a/evals/README.md b/evals/README.md new file mode 100644 index 00000000..298d08aa --- /dev/null +++ b/evals/README.md @@ -0,0 +1,25 @@ +# Evals + +## Workflow token counting + +Run from the `evals/` directory: + +```bash +bun install +bun run token-count-workflows +``` + +Common options: + +```bash +bun run token-count-workflows --run-id local-test +bun run token-count-workflows --baseline context/tmp/token-footprint/workflow-token-count-latest.json +bun run token-count-workflows --tokenizer cl100k_base +bun run token-count-workflows --manifest context/sce/workflow-token-footprint-manifest.json +``` + +Output artifacts are written to `context/tmp/token-footprint/`: + +- `workflow-token-count-latest.json` +- `workflow-token-count-latest.md` +- `workflow-token-count-.json` (when `--run-id` is provided) diff --git a/evals/bun.lock b/evals/bun.lock index 772dc21c..ef5b4a56 100644 --- a/evals/bun.lock +++ b/evals/bun.lock @@ -6,6 +6,7 @@ "name": "shared-context-engineering", "dependencies": { "@opencode-ai/sdk": "^1.2.6", + "js-tiktoken": "^1.0.21", "opencode-ai": "^1.2.6", }, "devDependencies": { @@ -23,8 +24,12 @@ "@types/node": ["@types/node@25.2.3", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-m0jEgYlYz+mDJZ2+F4v8D1AyQb+QzsNqRuI7xg1VQX/KlKS0qT9r1Mo16yo5F/MtifXFgaofIFsdFMox2SxIbQ=="], + "base64-js": ["base64-js@1.5.1", "", {}, "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA=="], + "bun-types": ["bun-types@1.3.9", "", { "dependencies": { "@types/node": "*" } }, "sha512-+UBWWOakIP4Tswh0Bt0QD0alpTY8cb5hvgiYeWCMet9YukHbzuruIEeXC2D7nMJPB12kbh8C7XJykSexEqGKJg=="], + "js-tiktoken": ["js-tiktoken@1.0.21", "", { "dependencies": { "base64-js": "^1.5.1" } }, "sha512-biOj/6M5qdgx5TKjDnFT1ymSpM5tbd3ylwDtrQvFQSu0Z7bBYko2dF+W/aUkXUPuk6IVpRxk/3Q2sHOzGlS36g=="], + "opencode-ai": ["opencode-ai@1.2.6", "", { "optionalDependencies": { "opencode-darwin-arm64": "1.2.6", "opencode-darwin-x64": "1.2.6", "opencode-darwin-x64-baseline": "1.2.6", "opencode-linux-arm64": "1.2.6", "opencode-linux-arm64-musl": "1.2.6", "opencode-linux-x64": "1.2.6", "opencode-linux-x64-baseline": "1.2.6", "opencode-linux-x64-baseline-musl": "1.2.6", "opencode-linux-x64-musl": "1.2.6", "opencode-windows-x64": "1.2.6", "opencode-windows-x64-baseline": "1.2.6" }, "bin": { "opencode": "bin/opencode" } }, "sha512-50leGrfFGrL8hr/iCs/nDr9kHNtNV7X3NMi3EedTj4R6l8yLvB2KZUQHQFOvR0tjfEBm6OqkLu5rw5JSAt8YZg=="], "opencode-darwin-arm64": ["opencode-darwin-arm64@1.2.6", "", { "os": "darwin", "cpu": "arm64" }, "sha512-v5/Qs2vHAb1J/cMwQ13jTMiDPsEnf1bH+AvQYNlsLNFQbDtWc2gc9FJcXqva0/lhI+aVo2X/Z45E6ZfGE3ffDA=="], diff --git a/evals/package.json b/evals/package.json index 6b38f440..a77ae97a 100644 --- a/evals/package.json +++ b/evals/package.json @@ -4,7 +4,8 @@ "type": "module", "private": true, "scripts": { - "test": "bun test ./evals.test.ts" + "test": "bun test ./evals.test.ts", + "token-count-workflows": "bun ./token-count-workflows.ts" }, "devDependencies": { "@types/bun": "latest" @@ -13,6 +14,7 @@ "typescript": "^5" }, "dependencies": { + "js-tiktoken": "^1.0.21", "@opencode-ai/sdk": "^1.2.6", "opencode-ai": "^1.2.6" } diff --git a/evals/token-count-workflows.ts b/evals/token-count-workflows.ts new file mode 100644 index 00000000..028136ab --- /dev/null +++ b/evals/token-count-workflows.ts @@ -0,0 +1,442 @@ +import { mkdir, readFile, writeFile } from "node:fs/promises"; +import { dirname, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; +import { getEncoding, type TiktokenEncoding } from "js-tiktoken"; +import { execFileSync } from "node:child_process"; + +type Workflow = "plan" | "execute"; +type ArtifactClass = "agent" | "command" | "skill" | "context_artifact"; +type ScopeRule = + | { type: "entire-file" } + | { type: "canonical-body-subsection"; owner_path: string }; + +type Surface = { + surface_id: string; + workflow: Workflow; + artifact_class: ArtifactClass; + path: string; + scope_rule: ScopeRule; + conditional: boolean; +}; + +type Manifest = { + manifest_version: string; + plan_name: string; + task_id: string; + surfaces: Surface[]; +}; + +type SurfaceRow = { + surface_id: string; + workflow: Workflow; + artifact_class: ArtifactClass; + path: string; + scope_rule: ScopeRule; + tokenizer: string; + tokens: number; + baseline_tokens: number | null; + delta_tokens: number | null; + conditional: boolean; +}; + +type Summary = { + run_id: string; + timestamp_utc: string; + git_sha: string; + plan_name: string; + task_id: string; + tokenizer: string; + requested_tokenizer: string; + resolved_tokenizer: string; + manifest_path: string; + baseline_path: string | null; + plan_total_tokens: number; + execute_total_tokens: number; + combined_total_tokens: number; + combined_delta_tokens: number | null; + notes: string[]; +}; + +type Report = { + summary: Summary; + surfaces: SurfaceRow[]; +}; + +const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url)); +const REPO_ROOT = resolve(SCRIPT_DIR, ".."); +const DEFAULT_MANIFEST_PATH = "context/sce/workflow-token-footprint-manifest.json"; +const OUTPUT_DIR = "context/tmp/token-footprint"; + +function fail(message: string): never { + throw new Error(message); +} + +function normalizeNewlines(text: string): string { + return text.replace(/\r\n?/g, "\n"); +} + +function parseArgs(argv: string[]) { + let manifestPath = DEFAULT_MANIFEST_PATH; + let baselinePath: string | null = null; + let runId: string | null = null; + let requestedTokenizer: TiktokenEncoding = "o200k_base"; + + for (let index = 0; index < argv.length; index += 1) { + const token = argv[index]; + if (!token) { + continue; + } + if (token === "--manifest") { + manifestPath = argv[index + 1] ?? fail("Missing value for --manifest"); + index += 1; + continue; + } + if (token === "--baseline") { + baselinePath = argv[index + 1] ?? fail("Missing value for --baseline"); + index += 1; + continue; + } + if (token === "--run-id") { + runId = argv[index + 1] ?? fail("Missing value for --run-id"); + index += 1; + continue; + } + if (token === "--tokenizer") { + const value = argv[index + 1] ?? fail("Missing value for --tokenizer"); + if (value !== "o200k_base" && value !== "cl100k_base") { + fail(`Unsupported tokenizer '${value}'. Expected o200k_base or cl100k_base.`); + } + requestedTokenizer = value; + index += 1; + continue; + } + fail(`Unknown argument '${token}'.`); + } + + return { + manifestPath, + baselinePath, + runId, + requestedTokenizer, + }; +} + +async function readJsonFile(absolutePath: string): Promise { + const raw = normalizeNewlines(await readFile(absolutePath, "utf8")); + return JSON.parse(raw) as T; +} + +function parseOwnerPath(ownerPath: string, surfaceId: string): string { + const match = ownerPath.match(/^agents\["([^"]+)"\]\.canonicalBody$/); + const agentSlug = match?.[1]; + if (!agentSlug) { + fail( + `surface_id=${surfaceId}: unsupported owner_path '${ownerPath}' for canonical-body-subsection extraction`, + ); + } + return agentSlug; +} + +function escapeForRegex(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +function extractCanonicalBodyPkl( + normalizedText: string, + ownerPath: string, + surfaceId: string, +): string { + const agentSlug = parseOwnerPath(ownerPath, surfaceId); + const ownerPattern = new RegExp( + String.raw`\["${escapeForRegex(agentSlug)}"\]\s*=\s*new\s+ContentUnit\s*\{`, + "m", + ); + const ownerMatch = ownerPattern.exec(normalizedText); + if (!ownerMatch || ownerMatch.index < 0) { + fail(`surface_id=${surfaceId}: owner_path '${ownerPath}' not found in source file`); + } + + const ownerStart = ownerMatch.index; + const canonicalBodyPattern = /canonicalBody\s*=\s*"""/m; + const ownerScopedText = normalizedText.slice(ownerStart); + const canonicalBodyMatch = canonicalBodyPattern.exec(ownerScopedText); + if (!canonicalBodyMatch || canonicalBodyMatch.index < 0) { + fail(`surface_id=${surfaceId}: owner_path '${ownerPath}' missing canonicalBody assignment`); + } + + const openDelimiterAbsolute = ownerStart + canonicalBodyMatch.index + canonicalBodyMatch[0].length; + const payloadStart = + normalizedText.charAt(openDelimiterAbsolute) === "\n" + ? openDelimiterAbsolute + 1 + : openDelimiterAbsolute; + + const payloadAndTail = normalizedText.slice(payloadStart); + const closingPattern = /^\s*"""/m; + const closingMatch = closingPattern.exec(payloadAndTail); + if (!closingMatch || closingMatch.index < 0) { + fail(`surface_id=${surfaceId}: owner_path '${ownerPath}' missing canonicalBody closing delimiter`); + } + + return payloadAndTail.slice(0, closingMatch.index); +} + +function extractByScopeRule(sourceText: string, surface: Surface): string { + if (surface.scope_rule.type === "entire-file") { + return sourceText; + } + + if (surface.scope_rule.type === "canonical-body-subsection") { + return extractCanonicalBodyPkl(sourceText, surface.scope_rule.owner_path, surface.surface_id); + } + + fail(`surface_id=${surface.surface_id}: unsupported scope_rule type`); +} + +function resolveTokenizer(requestedTokenizer: TiktokenEncoding): { + encodingName: TiktokenEncoding; + notes: string[]; +} { + try { + getEncoding(requestedTokenizer); + return { encodingName: requestedTokenizer, notes: [] }; + } catch (error) { + if (requestedTokenizer !== "o200k_base") { + throw error; + } + + try { + getEncoding("cl100k_base"); + } catch { + fail("Tokenizer resolution failed: neither o200k_base nor cl100k_base is available"); + } + + return { + encodingName: "cl100k_base", + notes: [ + "Requested tokenizer o200k_base was unavailable; fallback cl100k_base was used.", + ], + }; + } +} + +function stableStringify(value: unknown): string { + return JSON.stringify(value, null, 2); +} + +function getGitSha(): string { + try { + return execFileSync("git", ["rev-parse", "HEAD"], { + cwd: REPO_ROOT, + encoding: "utf8", + stdio: ["ignore", "pipe", "ignore"], + }).trim(); + } catch { + return "unknown"; + } +} + +function requireManifest(manifest: Manifest): void { + if (!manifest.manifest_version) { + fail("Manifest missing required field: manifest_version"); + } + if (!manifest.plan_name) { + fail("Manifest missing required field: plan_name"); + } + if (!manifest.task_id) { + fail("Manifest missing required field: task_id"); + } + if (!Array.isArray(manifest.surfaces) || manifest.surfaces.length === 0) { + fail("Manifest must include a non-empty surfaces array"); + } +} + +function readBaselineRows(baseline: Report): Map { + const map = new Map(); + for (const row of baseline.surfaces) { + map.set(row.surface_id, row.tokens); + } + return map; +} + +function buildMarkdown(report: Report): string { + const { summary, surfaces } = report; + const lines: string[] = [ + `# Static token accounting run: ${summary.run_id}`, + "", + "## Run metadata", + "", + `- timestamp_utc: ${summary.timestamp_utc}`, + `- git_sha: ${summary.git_sha}`, + `- plan_name: ${summary.plan_name}`, + `- task_id: ${summary.task_id}`, + `- tokenizer: ${summary.tokenizer}`, + `- requested_tokenizer: ${summary.requested_tokenizer}`, + `- resolved_tokenizer: ${summary.resolved_tokenizer}`, + `- manifest_path: ${summary.manifest_path}`, + `- baseline_path: ${summary.baseline_path ?? "null"}`, + "", + "## Surface counts", + "", + "| surface_id | workflow | artifact_class | path | scope_rule | tokenizer | tokens | baseline_tokens | delta_tokens | conditional |", + "| --- | --- | --- | --- | --- | --- | ---: | ---: | ---: | --- |", + ]; + + for (const row of surfaces) { + const scopeRule = row.scope_rule.type; + lines.push( + `| ${row.surface_id} | ${row.workflow} | ${row.artifact_class} | ${row.path} | ${scopeRule} | ${row.tokenizer} | ${row.tokens} | ${row.baseline_tokens ?? "null"} | ${row.delta_tokens ?? "null"} | ${row.conditional} |`, + ); + } + + lines.push( + "", + "## Totals", + "", + `- plan_total_tokens: ${summary.plan_total_tokens}`, + `- execute_total_tokens: ${summary.execute_total_tokens}`, + `- combined_total_tokens: ${summary.combined_total_tokens}`, + `- combined_delta_tokens: ${summary.combined_delta_tokens ?? "null"}`, + "", + "## Notes", + "", + ); + + if (summary.notes.length === 0) { + lines.push("- none"); + } else { + for (const note of summary.notes) { + lines.push(`- ${note}`); + } + } + + lines.push(""); + return lines.join("\n"); +} + +async function main(): Promise { + const { manifestPath, baselinePath, runId, requestedTokenizer } = parseArgs( + Bun.argv.slice(2), + ); + + const manifestAbsolutePath = resolve(REPO_ROOT, manifestPath); + const manifest = await readJsonFile(manifestAbsolutePath); + requireManifest(manifest); + + const { encodingName, notes: tokenizerNotes } = resolveTokenizer(requestedTokenizer); + const encoding = getEncoding(encodingName); + + const timestampUtc = new Date().toISOString(); + const resolvedRunId = runId ?? "latest"; + const gitSha = getGitSha(); + + let baselineRows = new Map(); + let baselineSummaryTokenizer: string | null = null; + let baselineAbsolutePath: string | null = null; + + if (baselinePath) { + baselineAbsolutePath = resolve(REPO_ROOT, baselinePath); + const baseline = await readJsonFile(baselineAbsolutePath); + baselineRows = readBaselineRows(baseline); + baselineSummaryTokenizer = + baseline.summary.resolved_tokenizer ?? baseline.summary.tokenizer ?? null; + } + + if (baselineSummaryTokenizer && baselineSummaryTokenizer !== encodingName) { + fail( + `Baseline tokenizer mismatch: baseline=${baselineSummaryTokenizer} current=${encodingName}`, + ); + } + + const rows: SurfaceRow[] = []; + let planTotal = 0; + let executeTotal = 0; + let baselineTotal = 0; + let canComputeCombinedDelta = Boolean(baselinePath); + + for (const surface of manifest.surfaces) { + const surfaceAbsolutePath = resolve(REPO_ROOT, surface.path); + const sourceRaw = await readFile(surfaceAbsolutePath, "utf8"); + const sourceNormalized = normalizeNewlines(sourceRaw); + const extractedPayload = extractByScopeRule(sourceNormalized, surface); + const tokens = encoding.encode(extractedPayload).length; + + const baselineTokens = baselineRows.has(surface.surface_id) + ? baselineRows.get(surface.surface_id) ?? null + : null; + const deltaTokens = baselineTokens === null ? null : tokens - baselineTokens; + + if (surface.workflow === "plan") { + planTotal += tokens; + } + if (surface.workflow === "execute") { + executeTotal += tokens; + } + + if (baselineTokens === null) { + canComputeCombinedDelta = false; + } else { + baselineTotal += baselineTokens; + } + + rows.push({ + surface_id: surface.surface_id, + workflow: surface.workflow, + artifact_class: surface.artifact_class, + path: surface.path, + scope_rule: surface.scope_rule, + tokenizer: encodingName, + tokens, + baseline_tokens: baselineTokens, + delta_tokens: deltaTokens, + conditional: surface.conditional, + }); + } + + const combinedTotal = planTotal + executeTotal; + const combinedDelta = canComputeCombinedDelta ? combinedTotal - baselineTotal : null; + + const summary: Summary = { + run_id: resolvedRunId, + timestamp_utc: timestampUtc, + git_sha: gitSha, + plan_name: manifest.plan_name, + task_id: manifest.task_id, + tokenizer: encodingName, + requested_tokenizer: requestedTokenizer, + resolved_tokenizer: encodingName, + manifest_path: manifestPath, + baseline_path: baselinePath, + plan_total_tokens: planTotal, + execute_total_tokens: executeTotal, + combined_total_tokens: combinedTotal, + combined_delta_tokens: combinedDelta, + notes: tokenizerNotes, + }; + + const report: Report = { + summary, + surfaces: rows, + }; + + const outputDirectoryPath = resolve(REPO_ROOT, OUTPUT_DIR); + await mkdir(outputDirectoryPath, { recursive: true }); + + const latestJsonPath = resolve(outputDirectoryPath, "workflow-token-count-latest.json"); + const latestMarkdownPath = resolve(outputDirectoryPath, "workflow-token-count-latest.md"); + + await writeFile(latestJsonPath, `${stableStringify(report)}\n`, "utf8"); + await writeFile(latestMarkdownPath, buildMarkdown(report), "utf8"); + + if (runId) { + const archiveJsonPath = resolve(outputDirectoryPath, `workflow-token-count-${runId}.json`); + await writeFile(archiveJsonPath, `${stableStringify(report)}\n`, "utf8"); + } + + console.log(`Wrote ${latestJsonPath}`); + console.log(`Wrote ${latestMarkdownPath}`); + if (runId) { + console.log(`Wrote ${resolve(outputDirectoryPath, `workflow-token-count-${runId}.json`)}`); + } +} + +await main(); diff --git a/flake.nix b/flake.nix index 8fded815..15bbe31e 100644 --- a/flake.nix +++ b/flake.nix @@ -204,6 +204,50 @@ ''; }; + tokenCountWorkflowsApp = pkgs.writeShellApplication { + name = "token-count-workflows"; + runtimeInputs = [ + pkgs.git + pkgs.nix + ]; + text = '' + set -euo pipefail + + usage() { + cat <<'EOF' + Usage: nix run .#token-count-workflows [-- --help] + + Deterministic flake entrypoint for workflow token counting. + Runs evals/token-count-workflows.ts through the existing evals Bun runtime. + EOF + } + + case "''${1:-}" in + -h|--help) + usage + exit 0 + ;; + esac + + repo_root="$(git rev-parse --show-toplevel 2>/dev/null || true)" + if [ -z "''${repo_root}" ]; then + repo_root="$(pwd)" + fi + + evals_dir="''${repo_root}/evals" + if [ ! -d "''${evals_dir}" ]; then + cat >&2 <