diff --git a/.github/workflows/daily-triage.yml b/.github/workflows/daily-triage.yml index d0430f7..aecebc1 100644 --- a/.github/workflows/daily-triage.yml +++ b/.github/workflows/daily-triage.yml @@ -121,14 +121,29 @@ jobs: node scripts/append-run-log.mjs "$ENTRY" echo "outcome=${OUTCOME}" >> "$GITHUB_OUTPUT" + # These run the SAME gates a PR-triggered workflow would. Their outcomes + # (not a hardcoded value) drive the commit statuses posted below, so the + # loop cannot mark its own change green unless the real gates passed. + # continue-on-error lets us record a failing outcome and post an honest + # "failure" status instead of aborting before the status is set. - name: Run validate gates (for PR status) id: validate_gates + continue-on-error: true run: bash scripts/ci-validate-gates.sh - name: Run audit gates (for PR status) id: audit_gates + continue-on-error: true run: bash scripts/ci-audit-gates.sh + - name: Fail the run if either gate failed + if: steps.validate_gates.outcome == 'failure' || steps.audit_gates.outcome == 'failure' + run: | + echo "validate gates: ${{ steps.validate_gates.outcome }}" + echo "audit gates: ${{ steps.audit_gates.outcome }}" + echo "One or more dogfood gates failed — not opening/merging an automated PR." + exit 1 + - name: Open PR for STATE.md + loop-run-log if changed id: pr env: @@ -186,16 +201,27 @@ jobs: core.setFailed('Missing head_sha for commit statuses'); return; } + // Map each gate's real step outcome to the commit status it backs. + // 'success' is only posted when the corresponding gate actually passed. + const toState = (outcome) => (outcome === 'success' ? 'success' : 'failure'); const checks = [ - { context: 'validate', description: 'Pattern/registry gates (daily-triage inline)' }, - { context: 'audit', description: 'Loop readiness gates (daily-triage inline)' }, + { + context: 'validate', + description: 'Pattern/registry gates (daily-triage inline)', + state: toState('${{ steps.validate_gates.outcome }}'), + }, + { + context: 'audit', + description: 'Loop readiness gates (daily-triage inline)', + state: toState('${{ steps.audit_gates.outcome }}'), + }, ]; for (const check of checks) { await github.rest.repos.createCommitStatus({ owner: context.repo.owner, repo: context.repo.repo, sha, - state: 'success', + state: check.state, context: check.context, description: check.description, target_url: `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`, diff --git a/tools/loop-audit/dist/auditor.js b/tools/loop-audit/dist/auditor.js index 3cb57b6..fa80599 100644 --- a/tools/loop-audit/dist/auditor.js +++ b/tools/loop-audit/dist/auditor.js @@ -23,6 +23,43 @@ const LOOP_SKILL_NAMES = [ 'draft-release-notes', 'issue-triage', ]; +/** + * Score contribution for each readiness signal, out of 100. + * + * These weights are intentionally centralized (rather than inlined in + * `computeScore`) so the rubric is auditable in one place and the + * level thresholds below stay meaningful when weights change. + * `base` is the floor every project starts from. The remaining weights + * sum (with `base`) to 100 when every signal is present. + */ +const SCORE_WEIGHTS = { + base: 10, + stateFile: 18, + triage: 14, + loopConfig: 9, + agentsMd: 9, + skillsTwoPlus: 14, + skillsOne: 7, + verifier: 14, + safetyLoopMd: 4, + safetyDoc: 4, + github: 6, + githubWorkflows: 4, + mcp: 3, + worktree: 3, + registry: 2, + budgetDoc: 3, + runLog: 3, + loopMdBudget: 2, + budgetSkill: 2, + loopActivity: 6, +}; +/** Minimum total score required to reach each readiness level (gated by additional signal requirements in `computeScore`). */ +const LEVEL_THRESHOLDS = { + L1: 38, + L2: 58, + L3: 78, +}; const SAFETY_FILES = ['safety.md', 'docs/safety.md', 'SECURITY.md']; const MCP_FILES = ['.mcp.json', 'mcp.json', '.mcp/config.json']; const WORKTREE_HINTS = ['worktree', 'worktrees', 'git worktree']; @@ -122,7 +159,7 @@ async function detectLoopActivity(root) { timeout: 1500, }); const lower = log.toLowerCase(); - if (/state\.md|loop| t riage |changelog-drafter|post-merge|daily triage|audit/i.test(lower)) { + if (/state\.md|loop|triage|changelog-drafter|post-merge|daily triage|audit/i.test(lower)) { const firstMatch = log.trim().split('\n')[0] || ''; evidence.push(`git:${firstMatch.slice(0, 60)}`); } @@ -143,45 +180,46 @@ async function detectLoopActivity(root) { return { present: evidence.length > 0, evidence: Array.from(new Set(evidence)).slice(0, 4) }; } export function computeScore(signals) { - let score = 10; + const w = SCORE_WEIGHTS; + let score = w.base; if (signals.stateFile.present) - score += 18; + score += w.stateFile; if (signals.triage.present) - score += 14; + score += w.triage; if (signals.loopConfig.present) - score += 9; + score += w.loopConfig; if (signals.agentsMd.present) - score += 9; + score += w.agentsMd; if (signals.skills.count >= 2) - score += 14; + score += w.skillsTwoPlus; else if (signals.skills.count === 1) - score += 7; + score += w.skillsOne; if (signals.verifier.present) - score += 14; + score += w.verifier; if (signals.safety.loopMdMentionsSafety) - score += 4; + score += w.safetyLoopMd; if (signals.safety.safetyDocPresent) - score += 4; + score += w.safetyDoc; if (signals.github.present) - score += 6; + score += w.github; if (signals.github.workflows) - score += 4; + score += w.githubWorkflows; if (signals.mcp.present) - score += 3; + score += w.mcp; if (signals.worktreeEvidence.present) - score += 3; + score += w.worktree; if (signals.registry.present) - score += 2; + score += w.registry; if (signals.cost.budgetDoc) - score += 3; + score += w.budgetDoc; if (signals.cost.runLog) - score += 3; + score += w.runLog; if (signals.cost.loopMdBudget) - score += 2; + score += w.loopMdBudget; if (signals.cost.budgetSkill) - score += 2; + score += w.budgetSkill; if (signals.loopActivity.present) - score += 6; + score += w.loopActivity; score = Math.min(100, Math.max(0, score)); const costReady = signals.cost.budgetDoc && signals.cost.runLog && @@ -189,11 +227,11 @@ export function computeScore(signals) { const hasRealActivity = signals.loopActivity.present; const l3Ready = costReady && hasRealActivity; let level = 'L0'; - if (score >= 78 && signals.verifier.present && signals.stateFile.present && l3Ready) + if (score >= LEVEL_THRESHOLDS.L3 && signals.verifier.present && signals.stateFile.present && l3Ready) level = 'L3'; - else if (score >= 58 && signals.triage.present) + else if (score >= LEVEL_THRESHOLDS.L2 && signals.triage.present) level = 'L2'; - else if (score >= 38 && signals.stateFile.present) + else if (score >= LEVEL_THRESHOLDS.L1 && signals.stateFile.present) level = 'L1'; else level = 'L0'; @@ -409,13 +447,13 @@ export async function auditProject(target) { const costReady = signals.cost.budgetDoc && signals.cost.runLog && signals.cost.loopMdBudget; - if (score >= 78 && signals.verifier.present && signals.stateFile.present && !costReady) { + if (score >= LEVEL_THRESHOLDS.L3 && signals.verifier.present && signals.stateFile.present && !costReady) { findings.push({ level: 'warn', message: 'Score qualifies for L3 but cost observability is incomplete — capped at L2 until budget + run log + LOOP.md budget exist.', }); } - if (score >= 78 && signals.verifier.present && signals.stateFile.present && costReady && !signals.loopActivity.present) { + if (score >= LEVEL_THRESHOLDS.L3 && signals.verifier.present && signals.stateFile.present && costReady && !signals.loopActivity.present) { findings.push({ level: 'warn', message: 'Score qualifies for L3 but no proven loop activity yet — capped at L2 until you run and commit at least one loop cycle.', diff --git a/tools/loop-audit/src/auditor.ts b/tools/loop-audit/src/auditor.ts index 52f178d..20ba0cd 100644 --- a/tools/loop-audit/src/auditor.ts +++ b/tools/loop-audit/src/auditor.ts @@ -64,6 +64,45 @@ const LOOP_SKILL_NAMES = [ 'issue-triage', ]; +/** + * Score contribution for each readiness signal, out of 100. + * + * These weights are intentionally centralized (rather than inlined in + * `computeScore`) so the rubric is auditable in one place and the + * level thresholds below stay meaningful when weights change. + * `base` is the floor every project starts from. The remaining weights + * sum (with `base`) to 100 when every signal is present. + */ +const SCORE_WEIGHTS = { + base: 10, + stateFile: 18, + triage: 14, + loopConfig: 9, + agentsMd: 9, + skillsTwoPlus: 14, + skillsOne: 7, + verifier: 14, + safetyLoopMd: 4, + safetyDoc: 4, + github: 6, + githubWorkflows: 4, + mcp: 3, + worktree: 3, + registry: 2, + budgetDoc: 3, + runLog: 3, + loopMdBudget: 2, + budgetSkill: 2, + loopActivity: 6, +} as const; + +/** Minimum total score required to reach each readiness level (gated by additional signal requirements in `computeScore`). */ +const LEVEL_THRESHOLDS = { + L1: 38, + L2: 58, + L3: 78, +} as const; + const SAFETY_FILES = ['safety.md', 'docs/safety.md', 'SECURITY.md']; const MCP_FILES = ['.mcp.json', 'mcp.json', '.mcp/config.json']; const WORKTREE_HINTS = ['worktree', 'worktrees', 'git worktree']; @@ -163,7 +202,7 @@ async function detectLoopActivity(root: string): Promise<{ present: boolean; evi timeout: 1500, }); const lower = log.toLowerCase(); - if (/state\.md|loop| t riage |changelog-drafter|post-merge|daily triage|audit/i.test(lower)) { + if (/state\.md|loop|triage|changelog-drafter|post-merge|daily triage|audit/i.test(lower)) { const firstMatch = log.trim().split('\n')[0] || ''; evidence.push(`git:${firstMatch.slice(0, 60)}`); } @@ -184,27 +223,28 @@ async function detectLoopActivity(root: string): Promise<{ present: boolean; evi } export function computeScore(signals: LoopSignals): { score: number; level: 'L0' | 'L1' | 'L2' | 'L3'; assessment: string } { - let score = 10; - - if (signals.stateFile.present) score += 18; - if (signals.triage.present) score += 14; - if (signals.loopConfig.present) score += 9; - if (signals.agentsMd.present) score += 9; - if (signals.skills.count >= 2) score += 14; - else if (signals.skills.count === 1) score += 7; - if (signals.verifier.present) score += 14; - if (signals.safety.loopMdMentionsSafety) score += 4; - if (signals.safety.safetyDocPresent) score += 4; - if (signals.github.present) score += 6; - if (signals.github.workflows) score += 4; - if (signals.mcp.present) score += 3; - if (signals.worktreeEvidence.present) score += 3; - if (signals.registry.present) score += 2; - if (signals.cost.budgetDoc) score += 3; - if (signals.cost.runLog) score += 3; - if (signals.cost.loopMdBudget) score += 2; - if (signals.cost.budgetSkill) score += 2; - if (signals.loopActivity.present) score += 6; + const w = SCORE_WEIGHTS; + let score: number = w.base; + + if (signals.stateFile.present) score += w.stateFile; + if (signals.triage.present) score += w.triage; + if (signals.loopConfig.present) score += w.loopConfig; + if (signals.agentsMd.present) score += w.agentsMd; + if (signals.skills.count >= 2) score += w.skillsTwoPlus; + else if (signals.skills.count === 1) score += w.skillsOne; + if (signals.verifier.present) score += w.verifier; + if (signals.safety.loopMdMentionsSafety) score += w.safetyLoopMd; + if (signals.safety.safetyDocPresent) score += w.safetyDoc; + if (signals.github.present) score += w.github; + if (signals.github.workflows) score += w.githubWorkflows; + if (signals.mcp.present) score += w.mcp; + if (signals.worktreeEvidence.present) score += w.worktree; + if (signals.registry.present) score += w.registry; + if (signals.cost.budgetDoc) score += w.budgetDoc; + if (signals.cost.runLog) score += w.runLog; + if (signals.cost.loopMdBudget) score += w.loopMdBudget; + if (signals.cost.budgetSkill) score += w.budgetSkill; + if (signals.loopActivity.present) score += w.loopActivity; score = Math.min(100, Math.max(0, score)); @@ -216,9 +256,9 @@ export function computeScore(signals: LoopSignals): { score: number; level: 'L0' const l3Ready = costReady && hasRealActivity; let level: 'L0' | 'L1' | 'L2' | 'L3' = 'L0'; - if (score >= 78 && signals.verifier.present && signals.stateFile.present && l3Ready) level = 'L3'; - else if (score >= 58 && signals.triage.present) level = 'L2'; - else if (score >= 38 && signals.stateFile.present) level = 'L1'; + if (score >= LEVEL_THRESHOLDS.L3 && signals.verifier.present && signals.stateFile.present && l3Ready) level = 'L3'; + else if (score >= LEVEL_THRESHOLDS.L2 && signals.triage.present) level = 'L2'; + else if (score >= LEVEL_THRESHOLDS.L1 && signals.stateFile.present) level = 'L1'; else level = 'L0'; const assessment = @@ -452,14 +492,14 @@ export async function auditProject(target: string): Promise { signals.cost.runLog && signals.cost.loopMdBudget; - if (score >= 78 && signals.verifier.present && signals.stateFile.present && !costReady) { + if (score >= LEVEL_THRESHOLDS.L3 && signals.verifier.present && signals.stateFile.present && !costReady) { findings.push({ level: 'warn', message: 'Score qualifies for L3 but cost observability is incomplete — capped at L2 until budget + run log + LOOP.md budget exist.', }); } - if (score >= 78 && signals.verifier.present && signals.stateFile.present && costReady && !signals.loopActivity.present) { + if (score >= LEVEL_THRESHOLDS.L3 && signals.verifier.present && signals.stateFile.present && costReady && !signals.loopActivity.present) { findings.push({ level: 'warn', message: 'Score qualifies for L3 but no proven loop activity yet — capped at L2 until you run and commit at least one loop cycle.', diff --git a/tools/loop-audit/test/auditor.test.mjs b/tools/loop-audit/test/auditor.test.mjs index c20e5f7..b46edfa 100644 --- a/tools/loop-audit/test/auditor.test.mjs +++ b/tools/loop-audit/test/auditor.test.mjs @@ -1,10 +1,26 @@ import { test } from 'node:test'; import assert from 'node:assert/strict'; import { mkdtemp, mkdir, writeFile, rm } from 'node:fs/promises'; +import { execFileSync } from 'node:child_process'; import { tmpdir } from 'node:os'; import path from 'node:path'; import { auditProject, computeScore } from '../dist/auditor.js'; +/** Initialize a throwaway git repo with one commit. Returns false if git is unavailable. */ +function initGitRepo(dir, commitMessage) { + try { + const opts = { cwd: dir, stdio: 'ignore' }; + execFileSync('git', ['init', '-q'], opts); + execFileSync('git', ['config', 'user.email', 'test@example.com'], opts); + execFileSync('git', ['config', 'user.name', 'Test'], opts); + execFileSync('git', ['add', '-A'], opts); + execFileSync('git', ['commit', '-q', '-m', commitMessage], opts); + return true; + } catch { + return false; + } +} + function emptySignals() { return { stateFile: { present: false, paths: [] }, @@ -152,4 +168,29 @@ test('auditProject: L2 with verifier skill', async () => { } finally { await rm(dir, { recursive: true, force: true }); } +}); + +test('detectLoopActivity: git commit mentioning "triage" is recognized as activity', async () => { + const dir = await mkdtemp(path.join(tmpdir(), 'loop-audit-git-')); + try { + // Only signal is the commit subject — the word "triage" with no other + // git-detectable token. Regresses the prior `t riage` (spaced) bug, which + // could never match a real commit message. + await writeFile(path.join(dir, 'notes.txt'), 'work\n'); + if (!initGitRepo(dir, 'chore: triage the inbox')) { + // git not available in this environment — skip without failing + return; + } + const result = await auditProject(dir); + assert.ok( + result.signals.loopActivity.present, + 'expected loop activity to be detected from a triage commit', + ); + assert.ok( + result.signals.loopActivity.evidence.some((e) => e.startsWith('git:')), + `expected git evidence, got: ${JSON.stringify(result.signals.loopActivity.evidence)}`, + ); + } finally { + await rm(dir, { recursive: true, force: true }); + } }); \ No newline at end of file diff --git a/tools/loop-cost/dist/cli.js b/tools/loop-cost/dist/cli.js index a999ff9..c9965ed 100644 --- a/tools/loop-cost/dist/cli.js +++ b/tools/loop-cost/dist/cli.js @@ -75,6 +75,11 @@ Examples: `); process.exit(0); } + const validLevels = ['L1', 'L2', 'L3']; + if (!validLevels.includes(args.level)) { + console.error(`Unknown level: ${args.level}. Valid levels: ${validLevels.join(', ')}`); + process.exit(1); + } const registry = await loadRegistry(); if (args.list) { for (const p of registry.patterns) { diff --git a/tools/loop-cost/dist/estimator.js b/tools/loop-cost/dist/estimator.js index 0437465..ee42cec 100644 --- a/tools/loop-cost/dist/estimator.js +++ b/tools/loop-cost/dist/estimator.js @@ -61,7 +61,11 @@ function formatTokens(n) { return `${Math.round(n / 1_000)}k`; return String(n); } +const VALID_LEVELS = ['L1', 'L2', 'L3']; export function estimateCost(input) { + if (!VALID_LEVELS.includes(input.level)) { + throw new Error(`Invalid readiness level: ${input.level}. Expected one of ${VALID_LEVELS.join(', ')}.`); + } const cadence = input.cadence ?? input.pattern.cadence; const runsPerDay = cadenceToRunsPerDay(cadence, input.conservative); const { cost, token_cost: tokenCostTier } = input.pattern; diff --git a/tools/loop-cost/src/cli.ts b/tools/loop-cost/src/cli.ts index 9a61c60..dcca685 100644 --- a/tools/loop-cost/src/cli.ts +++ b/tools/loop-cost/src/cli.ts @@ -80,6 +80,12 @@ Examples: process.exit(0); } + const validLevels: ReadinessLevel[] = ['L1', 'L2', 'L3']; + if (!validLevels.includes(args.level)) { + console.error(`Unknown level: ${args.level}. Valid levels: ${validLevels.join(', ')}`); + process.exit(1); + } + const registry = await loadRegistry(); if (args.list) { diff --git a/tools/loop-cost/src/estimator.ts b/tools/loop-cost/src/estimator.ts index d0b30cd..99a2fe1 100644 --- a/tools/loop-cost/src/estimator.ts +++ b/tools/loop-cost/src/estimator.ts @@ -116,7 +116,12 @@ function formatTokens(n: number): string { return String(n); } +const VALID_LEVELS: readonly ReadinessLevel[] = ['L1', 'L2', 'L3']; + export function estimateCost(input: EstimateInput): EstimateResult { + if (!VALID_LEVELS.includes(input.level)) { + throw new Error(`Invalid readiness level: ${input.level}. Expected one of ${VALID_LEVELS.join(', ')}.`); + } const cadence = input.cadence ?? input.pattern.cadence; const runsPerDay = cadenceToRunsPerDay(cadence, input.conservative); const { cost, token_cost: tokenCostTier } = input.pattern; diff --git a/tools/loop-cost/test/estimator.test.mjs b/tools/loop-cost/test/estimator.test.mjs index c481b57..4ce0460 100644 --- a/tools/loop-cost/test/estimator.test.mjs +++ b/tools/loop-cost/test/estimator.test.mjs @@ -48,6 +48,13 @@ test('estimateCost: ci-sweeper 15m L2 warns on high spend', () => { assert.ok(r.scenarios.realistic.tokensPerDay < r.scenarios.action.tokensPerDay); }); +test('estimateCost: rejects an invalid readiness level', () => { + assert.throws( + () => estimateCost({ pattern: CI_SWEEPER, level: 'L9' }), + /Invalid readiness level/, + ); +}); + test('estimateCost: daily-triage 1d L1 is cheap', () => { const r = estimateCost({ pattern: { diff --git a/tools/loop-init/dist/cli.js b/tools/loop-init/dist/cli.js index e42d1f5..f4d145a 100644 --- a/tools/loop-init/dist/cli.js +++ b/tools/loop-init/dist/cli.js @@ -276,6 +276,16 @@ Examples: process.exit(0); } const { pattern, tool, target, dryRun } = args; + const validPatterns = Object.keys(PATTERN_STARTERS); + if (!validPatterns.includes(pattern)) { + console.error(`Unknown pattern: ${pattern}\nValid patterns: ${validPatterns.join(', ')}`); + process.exit(1); + } + const validTools = Object.keys(TOOL_SUFFIX); + if (!validTools.includes(tool)) { + console.error(`Unknown tool: ${tool}\nValid tools: ${validTools.join(', ')}`); + process.exit(1); + } const targetDir = path.resolve(target); const baseStarter = PATTERN_STARTERS[pattern]; const suffix = TOOL_SUFFIX[tool]; diff --git a/tools/loop-init/src/cli.ts b/tools/loop-init/src/cli.ts index 9bd614c..ef3cce2 100644 --- a/tools/loop-init/src/cli.ts +++ b/tools/loop-init/src/cli.ts @@ -330,6 +330,19 @@ Examples: } const { pattern, tool, target, dryRun } = args; + + const validPatterns = Object.keys(PATTERN_STARTERS) as Pattern[]; + if (!validPatterns.includes(pattern)) { + console.error(`Unknown pattern: ${pattern}\nValid patterns: ${validPatterns.join(', ')}`); + process.exit(1); + } + + const validTools = Object.keys(TOOL_SUFFIX) as Tool[]; + if (!validTools.includes(tool)) { + console.error(`Unknown tool: ${tool}\nValid tools: ${validTools.join(', ')}`); + process.exit(1); + } + const targetDir = path.resolve(target); const baseStarter = PATTERN_STARTERS[pattern]; const suffix = TOOL_SUFFIX[tool]; diff --git a/tools/loop-init/test/cli.test.mjs b/tools/loop-init/test/cli.test.mjs index 31defe4..88d83c7 100644 --- a/tools/loop-init/test/cli.test.mjs +++ b/tools/loop-init/test/cli.test.mjs @@ -14,6 +14,30 @@ test('loop-init --help exits 0', async () => { assert.match(stdout, /changelog-drafter/); }); +test('loop-init rejects an unknown pattern with a helpful message', async () => { + await assert.rejects( + exec('node', [CLI, '.', '--pattern', 'not-a-pattern', '--dry-run']), + (err) => { + assert.equal(err.code, 1); + assert.match(err.stderr, /Unknown pattern: not-a-pattern/); + assert.match(err.stderr, /Valid patterns:/); + return true; + }, + ); +}); + +test('loop-init rejects an unknown tool with a helpful message', async () => { + await assert.rejects( + exec('node', [CLI, '.', '--pattern', 'daily-triage', '--tool', 'emacs', '--dry-run']), + (err) => { + assert.equal(err.code, 1); + assert.match(err.stderr, /Unknown tool: emacs/); + assert.match(err.stderr, /Valid tools:/); + return true; + }, + ); +}); + test('loop-init dry-run scaffolds daily-triage', async () => { const dir = await mkdtemp(path.join(tmpdir(), 'loop-init-')); try {