Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,5 @@ jobs:
run: node scripts/validate-skill.mjs
- name: Test installer behavior
run: node scripts/test-install-adapters.mjs
- name: Test approved-plan schema examples
run: node scripts/test-approved-plan-schema.mjs
2 changes: 2 additions & 0 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ Run from the repository root:
```bash
node scripts/validate-skill.mjs
node scripts/test-install-adapters.mjs
node scripts/test-approved-plan-schema.mjs
```

The installer smoke test uses temporary Codex and Claude homes plus a temporary target project for Cursor, Gemini, and generic adapters. It verifies identical-directory skip, stale-file pruning, exact skill sync, `--all` preflight failure before writes, and multi-platform installs on the current OS. CI runs the same checks on Linux, Windows, and macOS.
The approved-plan schema test verifies that the checked-in sample payload passes and that missing `decision_trace`, Deep Risk plans without evidence anchors, unresolved confirmations, extra fields, and unsupported high-risk categories fail.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -212,10 +212,12 @@ Quick smoke run before publishing:
```bash
node scripts/validate-skill.mjs
node scripts/test-install-adapters.mjs
node scripts/test-approved-plan-schema.mjs
node scripts/install-adapters.mjs --platform codex --force
```

The installer smoke test uses temporary Codex/Claude homes and a temporary target project to verify exact skill sync, stale-file pruning, identical-directory skip, `--all` preflight behavior, and multi-platform adapter installs.
The approved-plan schema test validates the sample payload and rejects missing `decision_trace`, Deep Risk plans without evidence anchors, unresolved confirmations, extra fields, and unsupported high-risk categories.

## Influences and Attribution

Expand Down
186 changes: 186 additions & 0 deletions scripts/test-approved-plan-schema.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
import assert from "node:assert/strict";
import fs from "node:fs";
import path from "node:path";
import { fileURLToPath } from "node:url";

const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
const coderRoot = path.join(repoRoot, "skills", "crossframe-coder");
const schema = JSON.parse(fs.readFileSync(path.join(coderRoot, "schemas", "approved-plan-payload.schema.json"), "utf8"));
const validPayload = JSON.parse(fs.readFileSync(path.join(coderRoot, "examples", "approved-plan-payload.valid.json"), "utf8"));

function validatePayload(payload) {
const errors = [];
if (!isPlainObject(payload)) return ["payload must be an object"];

const allowedFields = new Set(Object.keys(schema.properties));
for (const field of Object.keys(payload)) {
if (!allowedFields.has(field)) errors.push(`unknown field: ${field}`);
}

for (const field of schema.required) {
if (!(field in payload)) errors.push(`missing required field: ${field}`);
}

if (payload.approved !== true) errors.push("approved must be true");
if (!schema.properties.plan_mode.enum.includes(payload.plan_mode)) errors.push(`unsupported plan_mode: ${payload.plan_mode}`);

requireStringArray(payload, "exact_files_allowed", errors, { unique: true });
requireStringArray(payload, "behavior_to_preserve", errors);
requireStringArray(payload, "non_goals", errors);
requireStringArray(payload, "high_risk_categories", errors, {
unique: true,
allowed: schema.properties.high_risk_categories.items.enum,
});

if (!Array.isArray(payload.required_verification) || payload.required_verification.length === 0) {
errors.push("required_verification must be a non-empty array");
} else {
payload.required_verification.forEach((entry, index) => {
if (!isPlainObject(entry)) {
errors.push(`required_verification[${index}] must be an object`);
return;
}
for (const field of ["command", "purpose"]) {
if (typeof entry[field] !== "string" || entry[field].trim() === "") {
errors.push(`required_verification[${index}].${field} must be a non-empty string`);
}
}
});
}

if (typeof payload.first_safe_slice !== "string" || payload.first_safe_slice.trim() === "") {
errors.push("first_safe_slice must be a non-empty string");
}

if (!Array.isArray(payload.unresolved_required_confirmations)) {
errors.push("unresolved_required_confirmations must be an array");
} else if (payload.unresolved_required_confirmations.length > 0) {
errors.push("unresolved_required_confirmations must be empty");
}

if (!Array.isArray(payload.evidence_anchors)) {
errors.push("evidence_anchors must be an array");
} else {
payload.evidence_anchors.forEach((anchor, index) => validateEvidenceAnchor(anchor, index, errors));
}

if (payload.plan_mode === "Deep Risk Patch Plan" && (!Array.isArray(payload.evidence_anchors) || payload.evidence_anchors.length === 0)) {
errors.push("Deep Risk Patch Plan requires at least one evidence anchor");
}

validateEnvironment(payload.environment, errors);
validateDecisionTrace(payload.decision_trace, errors);

return errors;
}

function requireStringArray(payload, field, errors, options = {}) {
const value = payload[field];
if (!Array.isArray(value) || value.length === 0) {
errors.push(`${field} must be a non-empty array`);
return;
}
const seen = new Set();
for (const item of value) {
if (typeof item !== "string" || item.trim() === "") {
errors.push(`${field} must contain only non-empty strings`);
continue;
}
if (options.unique && seen.has(item)) errors.push(`${field} must be unique`);
seen.add(item);
if (options.allowed && !options.allowed.includes(item)) {
const label = field === "high_risk_categories" ? "high_risk_category" : field.slice(0, -1);
errors.push(`unsupported ${label}: ${item}`);
}
}
}

function validateEvidenceAnchor(anchor, index, errors) {
if (!isPlainObject(anchor)) {
errors.push(`evidence_anchors[${index}] must be an object`);
return;
}
for (const field of ["severity", "file", "line_function_symbol", "observed_behavior", "why_this_is_risky"]) {
if (typeof anchor[field] !== "string" || anchor[field].trim() === "") {
errors.push(`evidence_anchors[${index}].${field} must be a non-empty string`);
}
}
if (!["P0", "P1", "P2", "P3"].includes(anchor.severity)) {
errors.push(`evidence_anchors[${index}].severity is unsupported`);
}
}

function validateEnvironment(environment, errors) {
if (!isPlainObject(environment)) {
errors.push("environment must be an object");
return;
}
const allowed = schema.properties.environment.properties;
for (const field of schema.properties.environment.required) {
if (typeof environment[field] !== "string" || environment[field].trim() === "") {
errors.push(`environment.${field} must be a non-empty string`);
}
}
for (const field of ["agent_platform", "os_family"]) {
if (!allowed[field].enum.includes(environment[field])) {
errors.push(`environment.${field} is unsupported`);
}
}
}

function validateDecisionTrace(decisionTrace, errors) {
if (!Array.isArray(decisionTrace) || decisionTrace.length < schema.properties.decision_trace.minItems) {
errors.push("decision_trace must contain at least three steps");
return;
}
decisionTrace.forEach((entry, index) => {
if (!isPlainObject(entry)) {
errors.push(`decision_trace[${index}] must be an object`);
return;
}
for (const field of ["step", "decision", "evidence"]) {
if (typeof entry[field] !== "string" || entry[field].trim() === "") {
errors.push(`decision_trace[${index}].${field} must be a non-empty string`);
}
}
});
}

function isPlainObject(value) {
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
}

function clone(value) {
return JSON.parse(JSON.stringify(value));
}

function expectInvalid(name, mutate, expectedMessage) {
const payload = clone(validPayload);
mutate(payload);
const errors = validatePayload(payload);
assert.ok(errors.some((error) => error.includes(expectedMessage)), `${name} should fail with ${expectedMessage}; got ${errors.join("; ")}`);
}

assert.deepEqual(validatePayload(validPayload), []);

expectInvalid("missing decision trace", (payload) => {
delete payload.decision_trace;
}, "missing required field: decision_trace");

expectInvalid("deep risk without evidence anchors", (payload) => {
payload.evidence_anchors = [];
}, "Deep Risk Patch Plan requires at least one evidence anchor");

expectInvalid("unknown category", (payload) => {
payload.high_risk_categories = ["billing", "unknown"];
}, "unsupported high_risk_category: unknown");

expectInvalid("extra field", (payload) => {
payload.extra = true;
}, "unknown field: extra");

expectInvalid("unresolved confirmations", (payload) => {
payload.unresolved_required_confirmations = ["confirm downtime window"];
}, "unresolved_required_confirmations must be empty");

console.log("approved-plan schema sample tests passed");
38 changes: 38 additions & 0 deletions scripts/validate-skill.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,10 @@ check(skillText.includes("Keep direct implementation in `crossframe-code` only w
check(skillText.includes("evals/dual-core-routing-conflict-tests.md"), "crossframe-code Trial Materials lists dual-core routing conflict eval");
check(skillText.includes("../crossframe-coder/evals/golden-implementation-reports.md"), "crossframe-code Trial Materials lists coder golden reports");
check(skillText.includes("../crossframe-coder/evals/platform-trigger-routing-tests.md"), "crossframe-code Trial Materials lists platform trigger eval");
check(skillText.includes("../crossframe-coder/examples/approved-plan-payload.valid.json"), "crossframe-code Trial Materials lists approved-plan valid fixture");
check(skillText.includes("../crossframe-coder/schemas/approved-plan-payload.schema.json"), "crossframe-code Trial Materials lists approved-plan schema");
check(skillText.includes("../../scripts/test-install-adapters.mjs"), "crossframe-code Trial Materials lists installer smoke test");
check(skillText.includes("../../scripts/test-approved-plan-schema.mjs"), "crossframe-code Trial Materials lists approved-plan schema test");
check(skillText.includes("Fixing suspicious legacy output"), "SKILL.md rejects premature suspicious-output fixes");

const problemRouter = exists(path.join("references", "problem-router.md"), skillRoot)
Expand Down Expand Up @@ -811,9 +813,11 @@ check(coderText.includes("first safe slice"), "crossframe-coder approved plan ex
check(coderText.includes("references/approved-plan-payload-schema.md"), "crossframe-coder references approved-plan schema reference");
check(coderText.includes("schemas/approved-plan-payload.schema.json"), "crossframe-coder references approved-plan JSON schema");
check(coderText.includes("decision trace"), "crossframe-coder requires approved-plan decision trace");
check(coderText.includes("examples/approved-plan-payload.valid.json"), "crossframe-coder Trial Materials lists approved-plan valid fixture");
check(coderText.includes("evals/platform-trigger-routing-tests.md"), "crossframe-coder Trial Materials lists platform trigger eval");
check(coderText.includes("evals/golden-implementation-reports.md"), "crossframe-coder Trial Materials lists golden implementation reports");
check(coderText.includes("../../scripts/test-install-adapters.mjs"), "crossframe-coder Trial Materials lists installer smoke test");
check(coderText.includes("../../scripts/test-approved-plan-schema.mjs"), "crossframe-coder Trial Materials lists approved-plan schema test");

let approvedPlanSchema;
try {
Expand Down Expand Up @@ -851,6 +855,23 @@ if (approvedPlanSchema) {
check(environmentRequired.includes(field), `approved-plan schema environment requires ${field}`);
}
check((approvedPlanSchema.properties?.decision_trace?.minItems || 0) >= 3, "approved-plan schema requires multi-step decision trace");
const deepRiskRule = JSON.stringify(approvedPlanSchema.allOf || []);
check(deepRiskRule.includes("Deep Risk Patch Plan") && deepRiskRule.includes("evidence_anchors") && deepRiskRule.includes("minItems"), "approved-plan schema requires Deep Risk evidence anchors");
}

let approvedPlanSample;
try {
approvedPlanSample = JSON.parse(read(path.join("examples", "approved-plan-payload.valid.json"), coderRoot));
pass("approved-plan valid fixture parses");
} catch (error) {
fail("approved-plan valid fixture parses", error.message);
}

if (approvedPlanSample) {
check(approvedPlanSample.plan_mode === "Deep Risk Patch Plan", "approved-plan valid fixture uses Deep Risk plan");
check(Array.isArray(approvedPlanSample.evidence_anchors) && approvedPlanSample.evidence_anchors.length > 0, "approved-plan valid fixture includes evidence anchors");
check(Array.isArray(approvedPlanSample.decision_trace) && approvedPlanSample.decision_trace.length >= 3, "approved-plan valid fixture includes decision trace");
check(Array.isArray(approvedPlanSample.high_risk_categories) && approvedPlanSample.high_risk_categories.includes("idempotency"), "approved-plan valid fixture includes high-risk categories");
}

const coderReferenced = new Set(
Expand Down Expand Up @@ -903,9 +924,11 @@ const approvedPlanReference = exists(path.join("references", "approved-plan-payl
: "";
check(approvedPlanReference.includes("schemas/approved-plan-payload.schema.json"), "approved-plan reference points to JSON schema");
check(approvedPlanReference.includes("Required Payload Fields"), "approved-plan reference lists required fields");
check(approvedPlanReference.includes("Deep Risk Conditional Rule"), "approved-plan reference documents Deep Risk evidence anchors");
check(approvedPlanReference.includes("Environment Marker"), "approved-plan reference includes environment marker");
check(approvedPlanReference.includes("Decision Trace Line"), "approved-plan reference includes decision trace line");
check(approvedPlanReference.includes("High-Risk Coverage Check"), "approved-plan reference includes high-risk coverage check");
check(approvedPlanReference.includes("test-approved-plan-schema.mjs"), "approved-plan reference points to schema test script");

const coderSourceDriven = exists(path.join("references", "source-driven-api-check.md"), coderRoot)
? read(path.join("references", "source-driven-api-check.md"), coderRoot)
Expand Down Expand Up @@ -1016,7 +1039,9 @@ check(readme.includes("Platform Adapters"), "README includes platform adapters s
check(readme.includes("Claude Code") && readme.includes("Cursor") && readme.includes("Gemini CLI"), "README names supported non-Codex platforms");
check(readme.includes("install-adapters.mjs"), "README documents install adapter script");
check(readme.includes("node scripts/test-install-adapters.mjs"), "README documents installer smoke test command");
check(readme.includes("node scripts/test-approved-plan-schema.mjs"), "README documents approved-plan schema test command");
check(readme.includes("multi-platform adapter installs"), "README documents multi-platform installer smoke coverage");
check(readme.includes("Deep Risk plans without evidence anchors"), "README documents approved-plan invalid cases");
check(readme.includes("Influences and Attribution"), "README includes influences and attribution section");
check(readme.includes("felipereisdev/code-review-skill"), "README attributes felipereisdev code-review-skill");
check(readme.includes("review scope selection") && readme.includes("stack detection") && readme.includes("project convention-first"), "README names borrowed review-scope stack-convention ideas");
Expand All @@ -1032,6 +1057,8 @@ check(installDoc.includes("stale destination files"), "INSTALL.md documents stal
check(installDoc.includes("exact post-sync verification"), "INSTALL.md documents exact sync verification");
check(installDoc.includes("EBUSY") && installDoc.includes("EPERM"), "INSTALL.md documents file lock retry");
check(installDoc.includes("Linux") && installDoc.includes("Windows") && installDoc.includes("macOS"), "INSTALL.md documents cross-platform CI smoke");
check(installDoc.includes("node scripts/test-approved-plan-schema.mjs"), "INSTALL.md documents approved-plan schema test command");
check(installDoc.includes("Deep Risk plans without evidence anchors"), "INSTALL.md documents approved-plan invalid cases");
for (const phrase of ["Codex", "Claude Code", "Cursor", "Gemini CLI", "Generic Agents", "--dry-run", "--force"]) {
check(installDoc.includes(phrase), `INSTALL.md documents ${phrase}`);
}
Expand Down Expand Up @@ -1098,12 +1125,23 @@ check(installTest.includes("assertDirectoryExact"), "install smoke test verifies
check(installTest.includes("crossframe-install-platform-target"), "install smoke test uses multi-platform temp target");
check(installTest.includes(".cursor") && installTest.includes(".gemini") && installTest.includes(".agent-skills"), "install smoke test covers platform adapter destinations");

const approvedPlanSchemaTest = exists(path.join("scripts", "test-approved-plan-schema.mjs"), repoRoot)
? read(path.join("scripts", "test-approved-plan-schema.mjs"), repoRoot)
: "";
check(Boolean(approvedPlanSchemaTest), "approved-plan schema executable test exists");
check(approvedPlanSchemaTest.includes("approved-plan-payload.valid.json"), "approved-plan schema test reads valid fixture");
check(approvedPlanSchemaTest.includes("Deep Risk Patch Plan requires at least one evidence anchor"), "approved-plan schema test rejects Deep Risk without evidence anchors");
check(approvedPlanSchemaTest.includes("missing required field: decision_trace"), "approved-plan schema test rejects missing decision trace");
check(approvedPlanSchemaTest.includes("unknown field: extra"), "approved-plan schema test rejects extra fields");
check(approvedPlanSchemaTest.includes("unsupported high_risk_category"), "approved-plan schema test rejects unsupported high-risk categories");

const validateWorkflow = exists(path.join(".github", "workflows", "validate.yml"), repoRoot)
? read(path.join(".github", "workflows", "validate.yml"), repoRoot)
: "";
check(Boolean(validateWorkflow), "GitHub Actions validation workflow exists");
check(validateWorkflow.includes("node scripts/validate-skill.mjs"), "workflow runs validator");
check(validateWorkflow.includes("node scripts/test-install-adapters.mjs"), "workflow runs installer smoke test");
check(validateWorkflow.includes("node scripts/test-approved-plan-schema.mjs"), "workflow runs approved-plan schema test");
check(validateWorkflow.includes("ubuntu-latest") && validateWorkflow.includes("windows-latest") && validateWorkflow.includes("macos-latest"), "workflow runs validation on Linux Windows macOS");

const installedPath = path.join(os.homedir(), ".codex", "skills", "crossframe-code");
Expand Down
2 changes: 2 additions & 0 deletions skills/crossframe-code/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ Treat these as blockers:
- `evals/dual-core-routing-conflict-tests.md`: conflict checks for code/coder routing, approved-plan implementation, review-before-fix, and security shortcut handling.
- `../crossframe-coder/evals/golden-implementation-reports.md`: companion implementation, blocked verification, high-risk handoff, and approved-plan first-safe-slice reports.
- `../crossframe-coder/evals/platform-trigger-routing-tests.md`: Codex, Claude, Cursor, Gemini, and generic adapter route checks.
- `../crossframe-coder/examples/approved-plan-payload.valid.json`: machine-validated approved-plan payload fixture.
- `../crossframe-coder/schemas/approved-plan-payload.schema.json`: machine-readable approved-plan payload fields for high-risk implementation.
- `../../scripts/test-install-adapters.mjs`: repository installer smoke test for exact sync, stale pruning, identical skip, preflight, and multi-platform temp installs.
- `../../scripts/test-approved-plan-schema.mjs`: repository schema test for valid payloads and invalid missing-field/Deep Risk cases.
- `evals/golden-patch-plans.md`: sample passing outputs for local, architecture, and post-implementation modes.
Loading
Loading