Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 29 additions & 3 deletions .github/workflows/daily-triage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,29 @@ jobs:
node scripts/append-run-log.mjs "$ENTRY"
echo "outcome=${OUTCOME}" >> "$GITHUB_OUTPUT"

# These run the SAME gates a PR-triggered workflow would. Their outcomes
# (not a hardcoded value) drive the commit statuses posted below, so the
# loop cannot mark its own change green unless the real gates passed.
# continue-on-error lets us record a failing outcome and post an honest
# "failure" status instead of aborting before the status is set.
- name: Run validate gates (for PR status)
id: validate_gates
continue-on-error: true
run: bash scripts/ci-validate-gates.sh

- name: Run audit gates (for PR status)
id: audit_gates
continue-on-error: true
run: bash scripts/ci-audit-gates.sh

- name: Fail the run if either gate failed
if: steps.validate_gates.outcome == 'failure' || steps.audit_gates.outcome == 'failure'
run: |
echo "validate gates: ${{ steps.validate_gates.outcome }}"
echo "audit gates: ${{ steps.audit_gates.outcome }}"
echo "One or more dogfood gates failed — not opening/merging an automated PR."
exit 1

- name: Open PR for STATE.md + loop-run-log if changed
id: pr
env:
Expand Down Expand Up @@ -186,16 +201,27 @@ jobs:
core.setFailed('Missing head_sha for commit statuses');
return;
}
// Map each gate's real step outcome to the commit status it backs.
// 'success' is only posted when the corresponding gate actually passed.
const toState = (outcome) => (outcome === 'success' ? 'success' : 'failure');
const checks = [
{ context: 'validate', description: 'Pattern/registry gates (daily-triage inline)' },
{ context: 'audit', description: 'Loop readiness gates (daily-triage inline)' },
{
context: 'validate',
description: 'Pattern/registry gates (daily-triage inline)',
state: toState('${{ steps.validate_gates.outcome }}'),
},
{
context: 'audit',
description: 'Loop readiness gates (daily-triage inline)',
state: toState('${{ steps.audit_gates.outcome }}'),
},
];
for (const check of checks) {
await github.rest.repos.createCommitStatus({
owner: context.repo.owner,
repo: context.repo.repo,
sha,
state: 'success',
state: check.state,
context: check.context,
description: check.description,
target_url: `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`,
Expand Down
90 changes: 64 additions & 26 deletions tools/loop-audit/dist/auditor.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,43 @@ const LOOP_SKILL_NAMES = [
'draft-release-notes',
'issue-triage',
];
/**
* Score contribution for each readiness signal, out of 100.
*
* These weights are intentionally centralized (rather than inlined in
* `computeScore`) so the rubric is auditable in one place and the
* level thresholds below stay meaningful when weights change.
* `base` is the floor every project starts from. The remaining weights
* sum (with `base`) to 100 when every signal is present.
*/
const SCORE_WEIGHTS = {
base: 10,
stateFile: 18,
triage: 14,
loopConfig: 9,
agentsMd: 9,
skillsTwoPlus: 14,
skillsOne: 7,
verifier: 14,
safetyLoopMd: 4,
safetyDoc: 4,
github: 6,
githubWorkflows: 4,
mcp: 3,
worktree: 3,
registry: 2,
budgetDoc: 3,
runLog: 3,
loopMdBudget: 2,
budgetSkill: 2,
loopActivity: 6,
};
/** Minimum total score required to reach each readiness level (gated by additional signal requirements in `computeScore`). */
const LEVEL_THRESHOLDS = {
L1: 38,
L2: 58,
L3: 78,
};
const SAFETY_FILES = ['safety.md', 'docs/safety.md', 'SECURITY.md'];
const MCP_FILES = ['.mcp.json', 'mcp.json', '.mcp/config.json'];
const WORKTREE_HINTS = ['worktree', 'worktrees', 'git worktree'];
Expand Down Expand Up @@ -122,7 +159,7 @@ async function detectLoopActivity(root) {
timeout: 1500,
});
const lower = log.toLowerCase();
if (/state\.md|loop| t riage |changelog-drafter|post-merge|daily triage|audit/i.test(lower)) {
if (/state\.md|loop|triage|changelog-drafter|post-merge|daily triage|audit/i.test(lower)) {
const firstMatch = log.trim().split('\n')[0] || '';
evidence.push(`git:${firstMatch.slice(0, 60)}`);
}
Expand All @@ -143,57 +180,58 @@ async function detectLoopActivity(root) {
return { present: evidence.length > 0, evidence: Array.from(new Set(evidence)).slice(0, 4) };
}
export function computeScore(signals) {
let score = 10;
const w = SCORE_WEIGHTS;
let score = w.base;
if (signals.stateFile.present)
score += 18;
score += w.stateFile;
if (signals.triage.present)
score += 14;
score += w.triage;
if (signals.loopConfig.present)
score += 9;
score += w.loopConfig;
if (signals.agentsMd.present)
score += 9;
score += w.agentsMd;
if (signals.skills.count >= 2)
score += 14;
score += w.skillsTwoPlus;
else if (signals.skills.count === 1)
score += 7;
score += w.skillsOne;
if (signals.verifier.present)
score += 14;
score += w.verifier;
if (signals.safety.loopMdMentionsSafety)
score += 4;
score += w.safetyLoopMd;
if (signals.safety.safetyDocPresent)
score += 4;
score += w.safetyDoc;
if (signals.github.present)
score += 6;
score += w.github;
if (signals.github.workflows)
score += 4;
score += w.githubWorkflows;
if (signals.mcp.present)
score += 3;
score += w.mcp;
if (signals.worktreeEvidence.present)
score += 3;
score += w.worktree;
if (signals.registry.present)
score += 2;
score += w.registry;
if (signals.cost.budgetDoc)
score += 3;
score += w.budgetDoc;
if (signals.cost.runLog)
score += 3;
score += w.runLog;
if (signals.cost.loopMdBudget)
score += 2;
score += w.loopMdBudget;
if (signals.cost.budgetSkill)
score += 2;
score += w.budgetSkill;
if (signals.loopActivity.present)
score += 6;
score += w.loopActivity;
score = Math.min(100, Math.max(0, score));
const costReady = signals.cost.budgetDoc &&
signals.cost.runLog &&
signals.cost.loopMdBudget;
const hasRealActivity = signals.loopActivity.present;
const l3Ready = costReady && hasRealActivity;
let level = 'L0';
if (score >= 78 && signals.verifier.present && signals.stateFile.present && l3Ready)
if (score >= LEVEL_THRESHOLDS.L3 && signals.verifier.present && signals.stateFile.present && l3Ready)
level = 'L3';
else if (score >= 58 && signals.triage.present)
else if (score >= LEVEL_THRESHOLDS.L2 && signals.triage.present)
level = 'L2';
else if (score >= 38 && signals.stateFile.present)
else if (score >= LEVEL_THRESHOLDS.L1 && signals.stateFile.present)
level = 'L1';
else
level = 'L0';
Expand Down Expand Up @@ -409,13 +447,13 @@ export async function auditProject(target) {
const costReady = signals.cost.budgetDoc &&
signals.cost.runLog &&
signals.cost.loopMdBudget;
if (score >= 78 && signals.verifier.present && signals.stateFile.present && !costReady) {
if (score >= LEVEL_THRESHOLDS.L3 && signals.verifier.present && signals.stateFile.present && !costReady) {
findings.push({
level: 'warn',
message: 'Score qualifies for L3 but cost observability is incomplete — capped at L2 until budget + run log + LOOP.md budget exist.',
});
}
if (score >= 78 && signals.verifier.present && signals.stateFile.present && costReady && !signals.loopActivity.present) {
if (score >= LEVEL_THRESHOLDS.L3 && signals.verifier.present && signals.stateFile.present && costReady && !signals.loopActivity.present) {
findings.push({
level: 'warn',
message: 'Score qualifies for L3 but no proven loop activity yet — capped at L2 until you run and commit at least one loop cycle.',
Expand Down
94 changes: 67 additions & 27 deletions tools/loop-audit/src/auditor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,45 @@ const LOOP_SKILL_NAMES = [
'issue-triage',
];

/**
* Score contribution for each readiness signal, out of 100.
*
* These weights are intentionally centralized (rather than inlined in
* `computeScore`) so the rubric is auditable in one place and the
* level thresholds below stay meaningful when weights change.
* `base` is the floor every project starts from. The remaining weights
* sum (with `base`) to 100 when every signal is present.
*/
const SCORE_WEIGHTS = {
base: 10,
stateFile: 18,
triage: 14,
loopConfig: 9,
agentsMd: 9,
skillsTwoPlus: 14,
skillsOne: 7,
verifier: 14,
safetyLoopMd: 4,
safetyDoc: 4,
github: 6,
githubWorkflows: 4,
mcp: 3,
worktree: 3,
registry: 2,
budgetDoc: 3,
runLog: 3,
loopMdBudget: 2,
budgetSkill: 2,
loopActivity: 6,
} as const;

/** Minimum total score required to reach each readiness level (gated by additional signal requirements in `computeScore`). */
const LEVEL_THRESHOLDS = {
L1: 38,
L2: 58,
L3: 78,
} as const;

const SAFETY_FILES = ['safety.md', 'docs/safety.md', 'SECURITY.md'];
const MCP_FILES = ['.mcp.json', 'mcp.json', '.mcp/config.json'];
const WORKTREE_HINTS = ['worktree', 'worktrees', 'git worktree'];
Expand Down Expand Up @@ -163,7 +202,7 @@ async function detectLoopActivity(root: string): Promise<{ present: boolean; evi
timeout: 1500,
});
const lower = log.toLowerCase();
if (/state\.md|loop| t riage |changelog-drafter|post-merge|daily triage|audit/i.test(lower)) {
if (/state\.md|loop|triage|changelog-drafter|post-merge|daily triage|audit/i.test(lower)) {
const firstMatch = log.trim().split('\n')[0] || '';
evidence.push(`git:${firstMatch.slice(0, 60)}`);
}
Expand All @@ -184,27 +223,28 @@ async function detectLoopActivity(root: string): Promise<{ present: boolean; evi
}

export function computeScore(signals: LoopSignals): { score: number; level: 'L0' | 'L1' | 'L2' | 'L3'; assessment: string } {
let score = 10;

if (signals.stateFile.present) score += 18;
if (signals.triage.present) score += 14;
if (signals.loopConfig.present) score += 9;
if (signals.agentsMd.present) score += 9;
if (signals.skills.count >= 2) score += 14;
else if (signals.skills.count === 1) score += 7;
if (signals.verifier.present) score += 14;
if (signals.safety.loopMdMentionsSafety) score += 4;
if (signals.safety.safetyDocPresent) score += 4;
if (signals.github.present) score += 6;
if (signals.github.workflows) score += 4;
if (signals.mcp.present) score += 3;
if (signals.worktreeEvidence.present) score += 3;
if (signals.registry.present) score += 2;
if (signals.cost.budgetDoc) score += 3;
if (signals.cost.runLog) score += 3;
if (signals.cost.loopMdBudget) score += 2;
if (signals.cost.budgetSkill) score += 2;
if (signals.loopActivity.present) score += 6;
const w = SCORE_WEIGHTS;
let score: number = w.base;

if (signals.stateFile.present) score += w.stateFile;
if (signals.triage.present) score += w.triage;
if (signals.loopConfig.present) score += w.loopConfig;
if (signals.agentsMd.present) score += w.agentsMd;
if (signals.skills.count >= 2) score += w.skillsTwoPlus;
else if (signals.skills.count === 1) score += w.skillsOne;
if (signals.verifier.present) score += w.verifier;
if (signals.safety.loopMdMentionsSafety) score += w.safetyLoopMd;
if (signals.safety.safetyDocPresent) score += w.safetyDoc;
if (signals.github.present) score += w.github;
if (signals.github.workflows) score += w.githubWorkflows;
if (signals.mcp.present) score += w.mcp;
if (signals.worktreeEvidence.present) score += w.worktree;
if (signals.registry.present) score += w.registry;
if (signals.cost.budgetDoc) score += w.budgetDoc;
if (signals.cost.runLog) score += w.runLog;
if (signals.cost.loopMdBudget) score += w.loopMdBudget;
if (signals.cost.budgetSkill) score += w.budgetSkill;
if (signals.loopActivity.present) score += w.loopActivity;

score = Math.min(100, Math.max(0, score));

Expand All @@ -216,9 +256,9 @@ export function computeScore(signals: LoopSignals): { score: number; level: 'L0'
const l3Ready = costReady && hasRealActivity;

let level: 'L0' | 'L1' | 'L2' | 'L3' = 'L0';
if (score >= 78 && signals.verifier.present && signals.stateFile.present && l3Ready) level = 'L3';
else if (score >= 58 && signals.triage.present) level = 'L2';
else if (score >= 38 && signals.stateFile.present) level = 'L1';
if (score >= LEVEL_THRESHOLDS.L3 && signals.verifier.present && signals.stateFile.present && l3Ready) level = 'L3';
else if (score >= LEVEL_THRESHOLDS.L2 && signals.triage.present) level = 'L2';
else if (score >= LEVEL_THRESHOLDS.L1 && signals.stateFile.present) level = 'L1';
else level = 'L0';

const assessment =
Expand Down Expand Up @@ -452,14 +492,14 @@ export async function auditProject(target: string): Promise<AuditResult> {
signals.cost.runLog &&
signals.cost.loopMdBudget;

if (score >= 78 && signals.verifier.present && signals.stateFile.present && !costReady) {
if (score >= LEVEL_THRESHOLDS.L3 && signals.verifier.present && signals.stateFile.present && !costReady) {
findings.push({
level: 'warn',
message: 'Score qualifies for L3 but cost observability is incomplete — capped at L2 until budget + run log + LOOP.md budget exist.',
});
}

if (score >= 78 && signals.verifier.present && signals.stateFile.present && costReady && !signals.loopActivity.present) {
if (score >= LEVEL_THRESHOLDS.L3 && signals.verifier.present && signals.stateFile.present && costReady && !signals.loopActivity.present) {
findings.push({
level: 'warn',
message: 'Score qualifies for L3 but no proven loop activity yet — capped at L2 until you run and commit at least one loop cycle.',
Expand Down
Loading