diff --git a/.codex-plugin/plugin.json b/.codex-plugin/plugin.json index e36fa7f..a5e1b48 100644 --- a/.codex-plugin/plugin.json +++ b/.codex-plugin/plugin.json @@ -41,7 +41,8 @@ "defaultPrompt": [ "Use $crossframe-code to diagnose risky code and produce a minimal replacement plan.", "Use $crossframe-code to review this diff for structural failure modes before implementation.", - "Use $crossframe-coder to implement a clear code change in small verified slices." + "Use $crossframe-coder to implement a clear code change in small verified slices.", + "For high-risk implementation, require the Approved High-Risk Plan Exception payload: exact files, behavior to preserve, non-goals, verification, first safe slice, resolved confirmations, environment marker, and decision_trace." ], "brandColor": "#4F46E5" } diff --git a/.cursor/rules/crossframe-code.mdc b/.cursor/rules/crossframe-code.mdc index d402614..7db548e 100644 --- a/.cursor/rules/crossframe-code.mdc +++ b/.cursor/rules/crossframe-code.mdc @@ -26,3 +26,5 @@ Operational rules: - Do not implement without a verification command or manual check. - Use `crossframe-coder` only when the user explicitly asks to implement, build, add, fix, generate, modify, patch, or directly change code. - Hand off from `crossframe-coder` to `crossframe-code` when mechanism, behavior preservation, auth, tenant, money, migration, concurrency, durable state, webhook, outbox, or security risk is unclear. +- Approved high-risk implementation may stay in `crossframe-coder` only when the approved-plan payload names exact files, behavior to preserve, non-goals, verification, first safe slice, resolved confirmations, high-risk categories, environment marker, and `decision_trace`; otherwise hand off and log missing fields. +- Decision trace shape: `decision_trace: environment=; risk=; schema=pass|fail; route=implement|handoff; files=; verification=; first_safe_slice=; reason=`. diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index 12f8f0a..3e81bd9 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -8,7 +8,15 @@ on: jobs: validate: - runs-on: ubuntu-latest + name: validate (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + - windows-latest + - macos-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-node@v4 diff --git a/AGENTS.md b/AGENTS.md index 7705694..f47da8e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -12,3 +12,5 @@ When working on this repo: - Do not weaken the default no-edit contract, evidence-anchor requirements, or verification requirements. - Keep platform adapters concise: they should point agents to `SKILL.md` and name only the triggering rules each platform needs. - Keep the split clear: `crossframe-code` thinks through risk and plans; `crossframe-coder` implements clear approved changes in verified slices. +- Approved high-risk implementation requires the approved-plan payload fields from `skills/crossframe-coder/references/approved-plan-payload-schema.md`; otherwise hand off to `crossframe-code` and log missing fields. +- Platform adapters must preserve the `decision_trace` line for high-risk handoff or implementation decisions. diff --git a/CLAUDE.md b/CLAUDE.md index 3b6f995..43feebc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,6 +6,8 @@ Trigger this workflow when the user asks for `crossframe-code`, code diagnosis, Use `crossframe-coder` when the user explicitly asks to implement, build, add, fix, generate, modify, patch, or directly change code. Hand off back to `crossframe-code` when mechanism, behavior preservation, auth, tenant, money, migration, concurrency, durable state, webhook, outbox, or security risk is unclear. +Approved high-risk implementation may stay in `crossframe-coder` only when the approved-plan payload names exact files, behavior to preserve, non-goals, verification, first safe slice, resolved confirmations, high-risk categories, environment marker, and `decision_trace`. Otherwise hand off to `crossframe-code` and log the missing payload fields. + Core behavior: - Default output is read-only diagnosis or a patch plan; do not edit files unless the user explicitly asks for implementation. @@ -16,3 +18,4 @@ Core behavior: - Every P0/P1 must include an evidence anchor with file, line/function/symbol, observed behavior, and why this is risky. - Do not rank files high risk by line count alone. - Do not implement without a verification path. +- Handoff/implement decisions for approved high-risk plans must include `decision_trace: environment=; risk=; schema=pass|fail; route=implement|handoff; files=; verification=; first_safe_slice=; reason=`. diff --git a/GEMINI.md b/GEMINI.md index 893a3e1..c6d9e11 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -25,3 +25,7 @@ Non-negotiable rules: - Every implementation plan must include verification. Use `crossframe-coder` when the user explicitly asks to implement, build, add, fix, generate, modify, patch, or directly change code. Hand off to `crossframe-code` when mechanism, behavior preservation, auth, tenant, money, migration, concurrency, durable state, webhook, outbox, or security risk is unclear. + +Approved high-risk implementation may stay in `crossframe-coder` only when the approved-plan payload names exact files, behavior to preserve, non-goals, verification, first safe slice, resolved confirmations, high-risk categories, environment marker, and `decision_trace`. Otherwise hand off to `crossframe-code` and log the missing payload fields. + +Decision trace shape: `decision_trace: environment=; risk=; schema=pass|fail; route=implement|handoff; files=; verification=; first_safe_slice=; reason=`. diff --git a/INSTALL.md b/INSTALL.md index 8b4be59..d8f4e92 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -81,5 +81,17 @@ This copies `AGENTS.md` and both skill folders to `.agent-skills/` inside the ta - Existing files and directories are not overwritten unless `--force` is provided. - For Codex self-installs, the installer copies `crossframe-coder` before `crossframe-code` so the suite is less likely to be left half-installed if the host reloads the active diagnosis skill. - Identical skill directories are reported as `SKIP ... (identical)` even with `--force`; this avoids touching active skill directories unnecessarily. -- Non-identical skill directories are synchronized before use; stale destination files are pruned by the installer. +- Non-identical skill directories are synchronized before use; stale destination files are pruned by the installer and exact post-sync verification reports missing, stale, or changed files if sync fails. +- File copy and stale-file removal retry transient Windows-style file locks (`EBUSY` / `EPERM`) before failing. - Platform adapters are intentionally thin. They point agents to `skills/crossframe-code/SKILL.md` instead of duplicating the full skill instructions. + +## Smoke Test + +Run from the repository root: + +```bash +node scripts/validate-skill.mjs +node scripts/test-install-adapters.mjs +``` + +The installer smoke test uses temporary Codex and Claude homes plus a temporary target project for Cursor, Gemini, and generic adapters. It verifies identical-directory skip, stale-file pruning, exact skill sync, `--all` preflight failure before writes, and multi-platform installs on the current OS. CI runs the same checks on Linux, Windows, and macOS. diff --git a/README.md b/README.md index c15c825..4009ffb 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ Use `crossframe-code` when the user asks "what is risky?", "can this PR merge?", Use `crossframe-coder` when the user explicitly asks to implement, build, add, fix, generate, modify, patch, or directly change code and the change is clear enough to verify. +Approved high-risk implementation is a narrow exception. It requires an approved-plan payload with exact files, behavior to preserve, non-goals, verification, first safe slice, resolved confirmations, high-risk categories, environment marker, and `decision_trace`; otherwise `crossframe-coder` hands the work back to `crossframe-code`. + ## What It Does - Diagnoses the exact code object under review: function, component, module, interface, test, runtime path, or architecture boundary. @@ -157,12 +159,14 @@ skills/ crossframe-coder/ SKILL.md agents/ + schemas/ references/ templates/ examples/ evals/ scripts/ install-adapters.mjs + test-install-adapters.mjs validate-skill.mjs ``` @@ -203,6 +207,16 @@ node scripts/test-install-adapters.mjs The validator checks plugin metadata, skill frontmatter, referenced files, architecture lenses, templates, eval coverage, Golden Master rules, local-project risk scan rules, review scope and stack-convention rules, settlement-consistency constraints, and installation isolation. +Quick smoke run before publishing: + +```bash +node scripts/validate-skill.mjs +node scripts/test-install-adapters.mjs +node scripts/install-adapters.mjs --platform codex --force +``` + +The installer smoke test uses temporary Codex/Claude homes and a temporary target project to verify exact skill sync, stale-file pruning, identical-directory skip, `--all` preflight behavior, and multi-platform adapter installs. + ## Influences and Attribution This repository studies adjacent public skill repositories and adapts compatible ideas into the CrossFrame Code workflow. Borrowed ideas are rewritten as local instructions and validator-backed structures rather than copied wholesale. diff --git a/scripts/install-adapters.mjs b/scripts/install-adapters.mjs index c7f22e5..9252fc5 100644 --- a/scripts/install-adapters.mjs +++ b/scripts/install-adapters.mjs @@ -7,6 +7,8 @@ const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".." // Copy the companion implementation skill before the active diagnosis skill so // Codex self-updates cannot leave the suite half-installed if the host reloads. const skillNames = ["crossframe-coder", "crossframe-code"]; +const fileLockRetryCodes = new Set(["EBUSY", "EPERM"]); +const fileLockRetryLimit = 3; const args = process.argv.slice(2); const platforms = new Set(); @@ -120,7 +122,7 @@ function copyFile(source, destination) { logWrite("file", source, destination); if (dryRun) return; fs.mkdirSync(path.dirname(destination), { recursive: true }); - fs.copyFileSync(source, destination); + withFileLockRetry(() => fs.copyFileSync(source, destination), `copy file ${destination}`); } function copyDir(source, destination) { @@ -134,8 +136,9 @@ function copyDir(source, destination) { fs.mkdirSync(destination, { recursive: true }); copyDirectoryContents(source, destination); pruneStaleDestination(source, destination); - if (!directoriesMatch(source, destination)) { - throw new Error(`Directory sync did not produce an exact match: ${destination}`); + const comparison = compareDirectories(source, destination); + if (!comparison.matches) { + throw new Error(`Directory sync did not produce an exact match: ${destination}. ${formatDirectoryMismatch(comparison)}`); } } @@ -149,7 +152,7 @@ function copyDirectoryContents(source, destination) { copyDirectoryContents(sourcePath, destinationPath); } else if (entry.isFile()) { fs.mkdirSync(path.dirname(destinationPath), { recursive: true }); - fs.copyFileSync(sourcePath, destinationPath); + withFileLockRetry(() => fs.copyFileSync(sourcePath, destinationPath), `copy file ${destinationPath}`); } } } @@ -160,7 +163,7 @@ function pruneStaleDestination(source, destination) { for (const relativePath of destinationFiles) { if (!sourceFiles.has(relativePath)) { const stalePath = path.join(destination, relativePath); - fs.rmSync(stalePath, { force: true }); + withFileLockRetry(() => fs.rmSync(stalePath, { force: true }), `remove stale file ${stalePath}`); logSkip("file", path.join(source, relativePath), stalePath, "removed stale destination file"); } } @@ -177,31 +180,62 @@ function pruneEmptyDirs(root, relativeRoot = "") { } } if (relativeRoot && fs.readdirSync(absoluteRoot).length === 0) { - fs.rmdirSync(absoluteRoot); + withFileLockRetry(() => fs.rmdirSync(absoluteRoot), `remove empty directory ${absoluteRoot}`); } } function directoriesMatch(source, destination) { + return compareDirectories(source, destination).matches; +} + +function compareDirectories(source, destination) { try { const sourceFiles = collectFiles(source); - const destinationFiles = collectFiles(destination); - if (sourceFiles.length !== destinationFiles.length) return false; - for (let index = 0; index < sourceFiles.length; index += 1) { - const relativePath = sourceFiles[index]; - if (relativePath !== destinationFiles[index]) return false; - const sourceFile = path.join(source, relativePath); - const destinationFile = path.join(destination, relativePath); - const sourceStat = fs.statSync(sourceFile); - const destinationStat = fs.statSync(destinationFile); - if (sourceStat.size !== destinationStat.size) return false; - if (!fs.readFileSync(sourceFile).equals(fs.readFileSync(destinationFile))) return false; + const destinationFiles = fs.existsSync(destination) ? collectFiles(destination) : []; + const sourceSet = new Set(sourceFiles); + const destinationSet = new Set(destinationFiles); + const missingFiles = sourceFiles.filter((relativePath) => !destinationSet.has(relativePath)); + const staleFiles = destinationFiles.filter((relativePath) => !sourceSet.has(relativePath)); + const changedFiles = []; + for (const relativePath of sourceFiles) { + if (destinationSet.has(relativePath) && !filesEqual(path.join(source, relativePath), path.join(destination, relativePath))) { + changedFiles.push(relativePath); + } } - return true; - } catch { - return false; + return { + matches: missingFiles.length === 0 && staleFiles.length === 0 && changedFiles.length === 0, + missingFiles, + staleFiles, + changedFiles, + error: "", + }; + } catch (error) { + return { + matches: false, + missingFiles: [], + staleFiles: [], + changedFiles: [], + error: error.message, + }; } } +function filesEqual(sourceFile, destinationFile) { + const sourceStat = fs.statSync(sourceFile); + const destinationStat = fs.statSync(destinationFile); + if (sourceStat.size !== destinationStat.size) return false; + return fs.readFileSync(sourceFile).equals(fs.readFileSync(destinationFile)); +} + +function formatDirectoryMismatch(comparison) { + const parts = []; + if (comparison.missingFiles.length > 0) parts.push(`missing=${comparison.missingFiles.join(",")}`); + if (comparison.staleFiles.length > 0) parts.push(`stale=${comparison.staleFiles.join(",")}`); + if (comparison.changedFiles.length > 0) parts.push(`changed=${comparison.changedFiles.join(",")}`); + if (comparison.error) parts.push(`error=${comparison.error}`); + return parts.length > 0 ? parts.join("; ") : "unknown mismatch"; +} + function collectFiles(root, relativeRoot = "") { const entries = fs.readdirSync(path.join(root, relativeRoot), { withFileTypes: true }); const files = []; @@ -239,6 +273,27 @@ function logSkip(kind, source, destination, reason) { console.log(`SKIP ${kind}: ${source} -> ${destination} (${reason})`); } +function withFileLockRetry(operation, label) { + let lastError; + for (let attempt = 1; attempt <= fileLockRetryLimit; attempt += 1) { + try { + return operation(); + } catch (error) { + lastError = error; + if (!fileLockRetryCodes.has(error.code) || attempt === fileLockRetryLimit) { + throw error; + } + console.warn(`RETRY file lock: ${label} (${error.code}, attempt ${attempt + 1}/${fileLockRetryLimit})`); + sleep(50 * attempt); + } + } + throw lastError; +} + +function sleep(milliseconds) { + Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, milliseconds); +} + function printHelp() { console.log(`Usage: node scripts/install-adapters.mjs --platform codex diff --git a/scripts/test-install-adapters.mjs b/scripts/test-install-adapters.mjs index 5beb393..cf271cf 100644 --- a/scripts/test-install-adapters.mjs +++ b/scripts/test-install-adapters.mjs @@ -23,6 +23,38 @@ function assertSuccess(result) { assert.equal(result.status, 0, result.stderr || result.stdout); } +function collectFiles(root, relativeRoot = "") { + const entries = fs.readdirSync(path.join(root, relativeRoot), { withFileTypes: true }); + const files = []; + for (const entry of entries) { + const relativePath = path.join(relativeRoot, entry.name); + if (entry.isDirectory()) { + files.push(...collectFiles(root, relativePath)); + } else if (entry.isFile()) { + files.push(relativePath); + } + } + return files.sort(); +} + +function assertDirectoryExact(source, destination) { + const sourceFiles = collectFiles(source); + const destinationFiles = collectFiles(destination); + assert.deepEqual(destinationFiles, sourceFiles, `${destination} should have exact file list`); + for (const relativePath of sourceFiles) { + const sourceFile = path.join(source, relativePath); + const destinationFile = path.join(destination, relativePath); + assert.equal(fs.statSync(destinationFile).size, fs.statSync(sourceFile).size, `${relativePath} size should match`); + assert.equal(fs.readFileSync(destinationFile).equals(fs.readFileSync(sourceFile)), true, `${relativePath} content should match`); + } +} + +function assertSkillInstallExact(destinationRoot) { + for (const skillName of ["crossframe-coder", "crossframe-code"]) { + assertDirectoryExact(path.join(repoRoot, "skills", skillName), path.join(destinationRoot, skillName)); + } +} + const tempHome = fs.mkdtempSync(path.join(os.tmpdir(), "crossframe-install-")); try { @@ -31,6 +63,7 @@ try { assert.ok(fs.existsSync(path.join(tempHome, "skills", "crossframe-coder", "SKILL.md"))); assert.ok(fs.existsSync(path.join(tempHome, "skills", "crossframe-code", "SKILL.md"))); + assertSkillInstallExact(path.join(tempHome, "skills")); const stale = path.join(tempHome, "skills", "crossframe-code", "obsolete-reference.md"); fs.writeFileSync(stale, "old file"); @@ -39,6 +72,7 @@ try { assertSuccess(result); assert.equal(fs.existsSync(stale), false, "stale file should be pruned"); assert.match(result.stdout, /removed stale destination file/); + assertSkillInstallExact(path.join(tempHome, "skills")); result = runInstall(tempHome); assertSuccess(result); @@ -54,6 +88,34 @@ try { } finally { fs.rmSync(allHome, { recursive: true, force: true }); } + + const platformHome = fs.mkdtempSync(path.join(os.tmpdir(), "crossframe-install-platform-home-")); + const platformTarget = fs.mkdtempSync(path.join(os.tmpdir(), "crossframe-install-platform-target-")); + try { + result = runInstall(platformHome, ["--all", "--target", platformTarget, "--force"]); + assertSuccess(result); + + assertSkillInstallExact(path.join(platformHome, "skills")); + assertSkillInstallExact(path.join(platformHome, ".claude", "skills")); + assert.ok(fs.existsSync(path.join(platformHome, ".claude", "CLAUDE.md"))); + + assertSkillInstallExact(path.join(platformTarget, ".cursor", "skills")); + assert.ok(fs.existsSync(path.join(platformTarget, ".cursor", "rules", "crossframe-code.mdc"))); + + assertSkillInstallExact(path.join(platformTarget, ".gemini", "skills")); + assert.ok(fs.existsSync(path.join(platformTarget, "GEMINI.md"))); + + assertSkillInstallExact(path.join(platformTarget, ".agent-skills")); + assert.ok(fs.existsSync(path.join(platformTarget, "AGENTS.md"))); + + result = runInstall(platformHome, ["--all", "--target", platformTarget, "--force"]); + assertSuccess(result); + assert.match(result.stdout, /SKIP dir: .*crossframe-coder.*identical/); + assert.match(result.stdout, /SKIP dir: .*crossframe-code.*identical/); + } finally { + fs.rmSync(platformHome, { recursive: true, force: true }); + fs.rmSync(platformTarget, { recursive: true, force: true }); + } } finally { fs.rmSync(tempHome, { recursive: true, force: true }); } diff --git a/scripts/validate-skill.mjs b/scripts/validate-skill.mjs index 33cdfbd..f0e6dad 100644 --- a/scripts/validate-skill.mjs +++ b/scripts/validate-skill.mjs @@ -56,6 +56,11 @@ if (plugin) { plugin.interface.defaultPrompt.some((prompt) => prompt.includes("$crossframe-coder")), "plugin default prompt mentions $crossframe-coder" ); + check( + Array.isArray(plugin.interface?.defaultPrompt) && + plugin.interface.defaultPrompt.some((prompt) => prompt.includes("Approved High-Risk Plan Exception") && prompt.includes("decision_trace")), + "plugin default prompt documents approved-plan payload" + ); } let claudePlugin; @@ -114,6 +119,7 @@ const openaiYaml = exists(path.join("skills", "crossframe-code", "agents", "open ? read(path.join("skills", "crossframe-code", "agents", "openai.yaml")) : ""; check(openaiYaml.includes("$crossframe-code"), "openai.yaml default prompt mentions $crossframe-code"); +check(openaiYaml.includes("Approved High-Risk Plan Exception") && openaiYaml.includes("decision_trace"), "openai.yaml documents approved high-risk routing"); const classicCases = exists(path.join("examples", "classic-case-library.md"), skillRoot) ? read(path.join("examples", "classic-case-library.md"), skillRoot) @@ -273,6 +279,10 @@ check(skillText.includes("templates/source-driven-output.md"), "SKILL.md referen check(skillText.includes("In dual-core use, prefer handing clear implementation requests to `crossframe-coder`"), "SKILL.md prefers coder handoff for clear implementation"); check(skillText.includes("Keep direct implementation in `crossframe-code` only when no coder skill is available"), "SKILL.md keeps crossframe-code implementation as fallback"); check(skillText.includes("evals/dual-core-routing-conflict-tests.md"), "crossframe-code Trial Materials lists dual-core routing conflict eval"); +check(skillText.includes("../crossframe-coder/evals/golden-implementation-reports.md"), "crossframe-code Trial Materials lists coder golden reports"); +check(skillText.includes("../crossframe-coder/evals/platform-trigger-routing-tests.md"), "crossframe-code Trial Materials lists platform trigger eval"); +check(skillText.includes("../crossframe-coder/schemas/approved-plan-payload.schema.json"), "crossframe-code Trial Materials lists approved-plan schema"); +check(skillText.includes("../../scripts/test-install-adapters.mjs"), "crossframe-code Trial Materials lists installer smoke test"); check(skillText.includes("Fixing suspicious legacy output"), "SKILL.md rejects premature suspicious-output fixes"); const problemRouter = exists(path.join("references", "problem-router.md"), skillRoot) @@ -754,6 +764,11 @@ for (const phrase of [ check(dualCoreConflictEval.includes("Review first"), "dual-core routing conflict eval requires review before fix"); check(dualCoreConflictEval.includes("No new files unless the plan names them"), "dual-core routing conflict eval forbids unplanned new files"); check(dualCoreConflictEval.includes("Run exactly the listed verification first"), "dual-core routing conflict eval constrains approved plan verification"); +check(dualCoreConflictEval.includes("missing approved-plan payload fields"), "dual-core routing conflict eval requires missing field log"); +check(dualCoreConflictEval.includes("High-risk categories: billing, payment, webhook, tenant, idempotency, durable_state"), "dual-core routing conflict eval covers high-risk categories"); +check(dualCoreConflictEval.includes("Environment: agent_platform=codex"), "dual-core routing conflict eval includes environment marker"); +check(dualCoreConflictEval.includes("approved-plan payload schema"), "dual-core routing conflict eval requires schema validation"); +check(dualCoreConflictEval.includes("decision_trace: environment="), "dual-core routing conflict eval includes trace line shape"); check( occurrenceCount(dualCoreConflictEval, "crossframe-coder may implement only named files and planned verification.") === 0, "dual-core routing conflict eval removes duplicate approved-plan line" @@ -793,7 +808,50 @@ check(coderText.includes("verification blocked"), "crossframe-coder uses blocked check(coderText.includes("Approved High-Risk Plan Exception"), "crossframe-coder defines approved high-risk plan exception"); check(coderText.includes("no unresolved required confirmations"), "crossframe-coder approved plan exception requires confirmations to be resolved"); check(coderText.includes("first safe slice"), "crossframe-coder approved plan exception limits implementation to first safe slice"); +check(coderText.includes("references/approved-plan-payload-schema.md"), "crossframe-coder references approved-plan schema reference"); +check(coderText.includes("schemas/approved-plan-payload.schema.json"), "crossframe-coder references approved-plan JSON schema"); +check(coderText.includes("decision trace"), "crossframe-coder requires approved-plan decision trace"); +check(coderText.includes("evals/platform-trigger-routing-tests.md"), "crossframe-coder Trial Materials lists platform trigger eval"); check(coderText.includes("evals/golden-implementation-reports.md"), "crossframe-coder Trial Materials lists golden implementation reports"); +check(coderText.includes("../../scripts/test-install-adapters.mjs"), "crossframe-coder Trial Materials lists installer smoke test"); + +let approvedPlanSchema; +try { + approvedPlanSchema = JSON.parse(read(path.join("schemas", "approved-plan-payload.schema.json"), coderRoot)); + pass("approved-plan payload schema parses"); +} catch (error) { + fail("approved-plan payload schema parses", error.message); +} + +if (approvedPlanSchema) { + const requiredFields = [ + "approved", + "plan_mode", + "exact_files_allowed", + "behavior_to_preserve", + "non_goals", + "required_verification", + "first_safe_slice", + "unresolved_required_confirmations", + "evidence_anchors", + "high_risk_categories", + "environment", + "decision_trace", + ]; + for (const field of requiredFields) { + check(Array.isArray(approvedPlanSchema.required) && approvedPlanSchema.required.includes(field), `approved-plan schema requires ${field}`); + } + check(approvedPlanSchema.additionalProperties === false, "approved-plan schema rejects unknown top-level fields"); + const categoryEnum = approvedPlanSchema.properties?.high_risk_categories?.items?.enum || []; + for (const category of ["billing", "payment", "webhook", "tenant", "idempotency", "durable_state"]) { + check(categoryEnum.includes(category), `approved-plan schema includes high-risk category ${category}`); + } + const environmentRequired = approvedPlanSchema.properties?.environment?.required || []; + for (const field of ["agent_platform", "os_family", "repo_root", "dependency_context"]) { + check(environmentRequired.includes(field), `approved-plan schema environment requires ${field}`); + } + check((approvedPlanSchema.properties?.decision_trace?.minItems || 0) >= 3, "approved-plan schema requires multi-step decision trace"); +} const coderReferenced = new Set( Array.from(coderText.matchAll(/`([^`]+\.(?:md|yaml|json))`/g), (match) => match[1]).filter((relativePath) => @@ -812,11 +870,13 @@ check(coderOpenai.includes("$crossframe-code"), "crossframe-coder openai.yaml me check(coderOpenai.includes("interface:"), "crossframe-coder openai.yaml uses interface schema"); check(coderOpenai.includes("display_name: \"CrossFrame Coder\""), "crossframe-coder openai.yaml has display name"); check(coderOpenai.includes("default_prompt:"), "crossframe-coder openai.yaml has default prompt"); +check(coderOpenai.includes("Approved High-Risk Plan Exception") && coderOpenai.includes("decision_trace"), "crossframe-coder openai.yaml documents approved-plan exception"); for (const referenceName of [ "intent-and-scope.md", "repo-instructions-and-conventions.md", "source-driven-api-check.md", + "approved-plan-payload-schema.md", "edit-loop.md", "test-first-loop.md", "verification-matrix.md", @@ -833,6 +893,19 @@ check(coderHandoff.includes("auth") && coderHandoff.includes("tenant") && coderH check(coderHandoff.includes("Verification cannot be defined"), "crossframe-coder handoff covers missing verification"); check(coderHandoff.includes("unless the Approved High-Risk Plan Exception is satisfied"), "handoff reference allows approved high-risk plan exception"); check(coderHandoff.includes("Approved Plan Exception Check"), "handoff reference includes approved plan exception checklist"); +check(coderHandoff.includes("schema path checked"), "handoff reference checks schema path"); +check(coderHandoff.includes("environment marker"), "handoff reference requires environment marker"); +check(coderHandoff.includes("decision_trace:"), "handoff reference includes decision trace line"); +check(coderHandoff.includes("missing schema field"), "handoff reference logs missing schema fields"); + +const approvedPlanReference = exists(path.join("references", "approved-plan-payload-schema.md"), coderRoot) + ? read(path.join("references", "approved-plan-payload-schema.md"), coderRoot) + : ""; +check(approvedPlanReference.includes("schemas/approved-plan-payload.schema.json"), "approved-plan reference points to JSON schema"); +check(approvedPlanReference.includes("Required Payload Fields"), "approved-plan reference lists required fields"); +check(approvedPlanReference.includes("Environment Marker"), "approved-plan reference includes environment marker"); +check(approvedPlanReference.includes("Decision Trace Line"), "approved-plan reference includes decision trace line"); +check(approvedPlanReference.includes("High-Risk Coverage Check"), "approved-plan reference includes high-risk coverage check"); const coderSourceDriven = exists(path.join("references", "source-driven-api-check.md"), coderRoot) ? read(path.join("references", "source-driven-api-check.md"), coderRoot) @@ -875,6 +948,7 @@ for (const evalName of [ "codewriter-boundary-tests.md", "verification-failure-tests.md", "handoff-to-crossframe-code-tests.md", + "platform-trigger-routing-tests.md", "golden-implementation-reports.md", ]) { check(exists(path.join("evals", evalName), coderRoot), `crossframe-coder eval exists: ${evalName}`); @@ -895,6 +969,22 @@ for (const phrase of [ } check(coderGolden.includes("Source evidence:"), "crossframe-coder golden report covers source-driven implementation"); check(coderGolden.includes("Failure evidence preserved:"), "crossframe-coder golden report preserves failing evidence"); +check(coderGolden.includes("Golden 6: Approved High-Risk First Safe Slice"), "crossframe-coder golden report covers approved high-risk implementation"); +check(coderGolden.includes("Handoff reason:"), "crossframe-coder golden report includes handoff reason field"); +check(coderGolden.includes("Verification status: not run - handed off"), "crossframe-coder golden handoff report includes verification status"); +check(coderGolden.includes("decision_trace: environment=codex/windows"), "crossframe-coder golden report includes decision trace"); +for (const category of ["billing", "payment", "webhook", "tenant", "idempotency", "durable_state"]) { + check(coderGolden.includes(category), `crossframe-coder golden report covers high-risk category ${category}`); +} + +const platformTriggerEval = exists(path.join("evals", "platform-trigger-routing-tests.md"), coderRoot) + ? read(path.join("evals", "platform-trigger-routing-tests.md"), coderRoot) + : ""; +check(platformTriggerEval.includes("Codex, Claude Code, Cursor, Gemini CLI, and generic"), "platform trigger eval names supported platforms"); +check(platformTriggerEval.includes("Clear Implementation Request"), "platform trigger eval covers clear implementation"); +check(platformTriggerEval.includes("Claimed Approved Plan Missing Payload"), "platform trigger eval covers missing payload handoff"); +check(platformTriggerEval.includes("Complete Approved Plan Payload"), "platform trigger eval covers complete approved payload"); +check(platformTriggerEval.includes("decision_trace"), "platform trigger eval requires decision trace"); const coderBoundaryEval = exists(path.join("evals", "codewriter-boundary-tests.md"), coderRoot) ? read(path.join("evals", "codewriter-boundary-tests.md"), coderRoot) @@ -919,12 +1009,14 @@ check(readme.includes("v0.3 multi-tenant SaaS key completeness"), "README explai check(readme.includes("Dual-Core Model"), "README explains dual-core model"); check(readme.includes("crossframe-coder"), "README mentions crossframe-coder"); check(readme.includes("implement clear code changes"), "README explains coder implementation role"); +check(readme.includes("Approved high-risk implementation") && readme.includes("decision_trace"), "README documents approved high-risk payload"); check(readme.includes("early dual-core coding skill suite"), "README status reflects dual-core suite"); check(readme.includes("not automatically installed unless the user runs `scripts/install-adapters.mjs`"), "README status documents explicit install requirement"); check(readme.includes("Platform Adapters"), "README includes platform adapters section"); check(readme.includes("Claude Code") && readme.includes("Cursor") && readme.includes("Gemini CLI"), "README names supported non-Codex platforms"); check(readme.includes("install-adapters.mjs"), "README documents install adapter script"); check(readme.includes("node scripts/test-install-adapters.mjs"), "README documents installer smoke test command"); +check(readme.includes("multi-platform adapter installs"), "README documents multi-platform installer smoke coverage"); check(readme.includes("Influences and Attribution"), "README includes influences and attribution section"); check(readme.includes("felipereisdev/code-review-skill"), "README attributes felipereisdev code-review-skill"); check(readme.includes("review scope selection") && readme.includes("stack detection") && readme.includes("project convention-first"), "README names borrowed review-scope stack-convention ideas"); @@ -937,6 +1029,9 @@ check(installDoc.includes("both skills"), "INSTALL.md says installer copies both check(installDoc.includes("copies `crossframe-coder` before `crossframe-code`"), "INSTALL.md documents Codex self-install order"); check(installDoc.includes("SKIP") && installDoc.includes("identical"), "INSTALL.md documents identical skip behavior"); check(installDoc.includes("stale destination files"), "INSTALL.md documents stale file pruning"); +check(installDoc.includes("exact post-sync verification"), "INSTALL.md documents exact sync verification"); +check(installDoc.includes("EBUSY") && installDoc.includes("EPERM"), "INSTALL.md documents file lock retry"); +check(installDoc.includes("Linux") && installDoc.includes("Windows") && installDoc.includes("macOS"), "INSTALL.md documents cross-platform CI smoke"); for (const phrase of ["Codex", "Claude Code", "Cursor", "Gemini CLI", "Generic Agents", "--dry-run", "--force"]) { check(installDoc.includes(phrase), `INSTALL.md documents ${phrase}`); } @@ -946,18 +1041,21 @@ check(claudeMd.includes("skills/crossframe-code/SKILL.md"), "CLAUDE.md points to check(claudeMd.includes("skills/crossframe-coder/SKILL.md"), "CLAUDE.md points to coder entrypoint"); check(claudeMd.includes("Key Completeness Audit"), "CLAUDE.md includes key completeness trigger"); check(claudeMd.includes("P0/P1"), "CLAUDE.md includes evidence-anchor severity rule"); +check(claudeMd.includes("Approved high-risk implementation") && claudeMd.includes("decision_trace"), "CLAUDE.md includes approved-plan trace rule"); const geminiMd = exists("GEMINI.md", repoRoot) ? read("GEMINI.md", repoRoot) : ""; check(geminiMd.includes("skills/crossframe-code/SKILL.md"), "GEMINI.md points to skill entrypoint"); check(geminiMd.includes("skills/crossframe-coder/SKILL.md"), "GEMINI.md points to coder entrypoint"); check(geminiMd.includes("Key Completeness Audit"), "GEMINI.md includes key completeness trigger"); check(geminiMd.includes("P0/P1"), "GEMINI.md includes evidence-anchor severity rule"); +check(geminiMd.includes("Approved high-risk implementation") && geminiMd.includes("decision_trace"), "GEMINI.md includes approved-plan trace rule"); const agentsMd = exists("AGENTS.md", repoRoot) ? read("AGENTS.md", repoRoot) : ""; check(agentsMd.includes("skills/crossframe-code/SKILL.md"), "AGENTS.md points to skill entrypoint"); check(agentsMd.includes("skills/crossframe-coder/SKILL.md"), "AGENTS.md points to coder entrypoint"); check(agentsMd.includes("validate-skill.mjs"), "AGENTS.md requires validator"); check(agentsMd.includes("Platform adapters"), "AGENTS.md keeps adapters thin"); +check(agentsMd.includes("approved-plan payload") && agentsMd.includes("decision_trace"), "AGENTS.md includes approved-plan trace rule"); const cursorRule = exists(path.join(".cursor", "rules", "crossframe-code.mdc"), repoRoot) ? read(path.join(".cursor", "rules", "crossframe-code.mdc"), repoRoot) @@ -966,6 +1064,7 @@ check(cursorRule.includes("skills/crossframe-code/SKILL.md"), "Cursor rule point check(cursorRule.includes("skills/crossframe-coder/SKILL.md"), "Cursor rule points to coder entrypoint"); check(cursorRule.includes("Key Completeness Audit"), "Cursor rule includes key completeness trigger"); check(cursorRule.includes("P0/P1"), "Cursor rule includes evidence-anchor severity rule"); +check(cursorRule.includes("Approved high-risk implementation") && cursorRule.includes("decision_trace"), "Cursor rule includes approved-plan trace rule"); const installScript = exists(path.join("scripts", "install-adapters.mjs"), repoRoot) ? read(path.join("scripts", "install-adapters.mjs"), repoRoot) @@ -983,6 +1082,9 @@ check(installScript.includes("copyDirectoryContents") && installScript.includes( check(installScript.includes("pruneStaleDestination"), "install script prunes stale destination files"); check(installScript.includes("Directory sync did not produce an exact match"), "install script verifies post-copy exact sync"); check(installScript.includes("removed stale destination file"), "install script logs stale destination pruning"); +check(installScript.includes("withFileLockRetry") && installScript.includes("EBUSY") && installScript.includes("EPERM"), "install script retries transient file locks"); +check(installScript.includes("compareDirectories") && installScript.includes("formatDirectoryMismatch"), "install script reports directory mismatch details"); +check(installScript.includes("missingFiles") && installScript.includes("staleFiles") && installScript.includes("changedFiles"), "install script records missing stale changed files"); const installTest = exists(path.join("scripts", "test-install-adapters.mjs"), repoRoot) ? read(path.join("scripts", "test-install-adapters.mjs"), repoRoot) @@ -992,6 +1094,9 @@ check(installTest.includes("CODEX_HOME"), "install smoke test uses isolated CODE check(installTest.includes("obsolete-reference.md"), "install smoke test covers stale file pruning"); check(installTest.includes("SKIP dir"), "install smoke test covers identical skip"); check(installTest.includes("--all") && installTest.includes("--target is required"), "install smoke test covers --all target preflight"); +check(installTest.includes("assertDirectoryExact"), "install smoke test verifies exact directory sync"); +check(installTest.includes("crossframe-install-platform-target"), "install smoke test uses multi-platform temp target"); +check(installTest.includes(".cursor") && installTest.includes(".gemini") && installTest.includes(".agent-skills"), "install smoke test covers platform adapter destinations"); const validateWorkflow = exists(path.join(".github", "workflows", "validate.yml"), repoRoot) ? read(path.join(".github", "workflows", "validate.yml"), repoRoot) @@ -999,6 +1104,7 @@ const validateWorkflow = exists(path.join(".github", "workflows", "validate.yml" check(Boolean(validateWorkflow), "GitHub Actions validation workflow exists"); check(validateWorkflow.includes("node scripts/validate-skill.mjs"), "workflow runs validator"); check(validateWorkflow.includes("node scripts/test-install-adapters.mjs"), "workflow runs installer smoke test"); +check(validateWorkflow.includes("ubuntu-latest") && validateWorkflow.includes("windows-latest") && validateWorkflow.includes("macos-latest"), "workflow runs validation on Linux Windows macOS"); const installedPath = path.join(os.homedir(), ".codex", "skills", "crossframe-code"); info("user .codex skill install status", fs.existsSync(installedPath) ? `installed at ${installedPath}` : `not installed at ${installedPath}`); diff --git a/skills/crossframe-code/SKILL.md b/skills/crossframe-code/SKILL.md index 5a33e2a..7947976 100644 --- a/skills/crossframe-code/SKILL.md +++ b/skills/crossframe-code/SKILL.md @@ -146,4 +146,8 @@ Treat these as blockers: - `evals/local-project-risk-scan-smoke-tests.md`: smoke prompts for risky modules, legacy hotspots, AI patch regression surfaces, and safe refactoring candidates. - `evals/problem-router-smoke-tests.md`: smoke prompts for request routing across debugging, implementation, review, source-driven, high-risk, and toolchain cases. - `evals/dual-core-routing-conflict-tests.md`: conflict checks for code/coder routing, approved-plan implementation, review-before-fix, and security shortcut handling. +- `../crossframe-coder/evals/golden-implementation-reports.md`: companion implementation, blocked verification, high-risk handoff, and approved-plan first-safe-slice reports. +- `../crossframe-coder/evals/platform-trigger-routing-tests.md`: Codex, Claude, Cursor, Gemini, and generic adapter route checks. +- `../crossframe-coder/schemas/approved-plan-payload.schema.json`: machine-readable approved-plan payload fields for high-risk implementation. +- `../../scripts/test-install-adapters.mjs`: repository installer smoke test for exact sync, stale pruning, identical skip, preflight, and multi-platform temp installs. - `evals/golden-patch-plans.md`: sample passing outputs for local, architecture, and post-implementation modes. diff --git a/skills/crossframe-code/agents/openai.yaml b/skills/crossframe-code/agents/openai.yaml index 9c2a4da..ced23e2 100644 --- a/skills/crossframe-code/agents/openai.yaml +++ b/skills/crossframe-code/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "CrossFrame Code" short_description: "代码结构诊断、替换计划与验证闭环" - default_prompt: "Use $crossframe-code to diagnose this code, compare replacement options, and produce a minimal patch plan before implementation." + default_prompt: "Use $crossframe-code to diagnose this code, compare replacement options, and produce a minimal patch plan before implementation. Route clear implementation to $crossframe-coder; high-risk implementation needs an Approved High-Risk Plan Exception payload with exact files, non-goals, verification, first safe slice, environment marker, and decision_trace." diff --git a/skills/crossframe-code/evals/dual-core-routing-conflict-tests.md b/skills/crossframe-code/evals/dual-core-routing-conflict-tests.md index d2c8614..330ba3d 100644 --- a/skills/crossframe-code/evals/dual-core-routing-conflict-tests.md +++ b/skills/crossframe-code/evals/dual-core-routing-conflict-tests.md @@ -44,6 +44,7 @@ Expected behavior: - If the plan text, named files, confirmations, or verification are missing, do not implement. - Use `crossframe-coder` handoff report or return to `crossframe-code`. +- Handoff log must name missing approved-plan payload fields, including files, behavior, non-goals, verification, first safe slice, environment marker, and decision trace. Failure mode: @@ -64,6 +65,9 @@ Approved Deep Risk Patch Plan: - Required verification: npm test -- webhook.test.ts. - First safe slice: add duplicate refund replay guard only. - P0/P1 evidence anchors: complete in the plan. +- High-risk categories: billing, payment, webhook, tenant, idempotency, durable_state. +- Environment: agent_platform=codex; os_family=windows; repo_root=repo; dependency_context=package-lock and local test command. +- Decision trace: environment checked -> schema pass -> route implement. ``` Expected route: @@ -74,9 +78,15 @@ problem-router -> high-risk implementation -> approved plan check -> crossframe- Expected behavior: +- Validate the plan payload against `crossframe-coder`'s approved-plan payload schema. - `crossframe-coder` may implement only named files. - No new files unless the plan names them. - Run exactly the listed verification first. +- The implementation report must include this trace line shape: + +```text +decision_trace: environment=; risk=billing,payment,webhook,tenant,idempotency,durable_state; schema=pass; route=implement; files=src/billing/webhook.ts,tests/billing/webhook.test.ts; verification=npm test -- webhook.test.ts; first_safe_slice=add duplicate refund replay guard only; reason=approved first safe slice +``` Failure mode: diff --git a/skills/crossframe-coder/SKILL.md b/skills/crossframe-coder/SKILL.md index 302fc32..631bd44 100644 --- a/skills/crossframe-coder/SKILL.md +++ b/skills/crossframe-coder/SKILL.md @@ -47,6 +47,8 @@ High-risk implementation may stay in `crossframe-coder` only when all entry cond - The plan includes runnable verification or a concrete manual replay/check. - P0/P1 findings in the plan already include complete evidence anchors. - The requested work is the first safe slice, not a broader redesign. +- The plan payload satisfies `references/approved-plan-payload-schema.md` and `schemas/approved-plan-payload.schema.json`. +- The implementation report records a decision trace with environment, risk categories, schema result, route, allowed files, verification, first safe slice, and reason. If any condition is missing, use `templates/handoff-report.md` and return to `crossframe-code`. @@ -69,6 +71,7 @@ If any condition is missing, use `templates/handoff-report.md` and return to `cr 3. **Plan** - Use `templates/implementation-plan.md` for non-trivial edits. - Keep the plan to one slice that can compile or run independently. + - For approved high-risk plans, read `references/approved-plan-payload-schema.md` before editing and restrict the slice to the schema payload. - For bugfixes, preserve the failure evidence before changing code. - For refactors, define behavior to preserve before editing. - Use `references/test-first-loop.md` when tests should come first. @@ -120,8 +123,12 @@ Use `references/handoff-to-crossframe-code.md` and `templates/handoff-report.md` - `examples/python-bugfix-slice.md`: focused Python bugfix slice. - `examples/source-driven-framework-change.md`: dependency-version-aware framework change. - `examples/toolchain-blocked-example.md`: blocked verification report example. +- `references/approved-plan-payload-schema.md`: required schema, environment marker, and decision trace for approved high-risk implementation. - `evals/crossframe-coder-smoke-tests.md`: basic implementation workflow checks. - `evals/codewriter-boundary-tests.md`: checks that high-risk work is handed off. - `evals/verification-failure-tests.md`: checks verification failure and blocked reporting. - `evals/handoff-to-crossframe-code-tests.md`: checks handoff boundaries. +- `evals/platform-trigger-routing-tests.md`: cross-platform Codex, Claude, Cursor, Gemini, and generic routing checks. - `evals/golden-implementation-reports.md`: sample passing implementation, blocked verification, source-driven, and high-risk handoff reports. +- `schemas/approved-plan-payload.schema.json`: machine-readable approved-plan payload shape. +- `../../scripts/test-install-adapters.mjs`: repository installer smoke test for identical skip, stale pruning, exact sync, and multi-platform temp installs. diff --git a/skills/crossframe-coder/agents/openai.yaml b/skills/crossframe-coder/agents/openai.yaml index 06aed63..f1482ae 100644 --- a/skills/crossframe-coder/agents/openai.yaml +++ b/skills/crossframe-coder/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "CrossFrame Coder" short_description: "Small verified code implementation slices" - default_prompt: "Use $crossframe-coder when the user explicitly asks to implement, build, add, fix, generate, modify, patch, or directly change code. Hand off high-risk or unclear changes to $crossframe-code." + default_prompt: "Use $crossframe-coder when the user explicitly asks to implement, build, add, fix, generate, modify, patch, or directly change code. Hand off high-risk or unclear changes to $crossframe-code unless the Approved High-Risk Plan Exception has a complete approved-plan payload with exact files, behavior to preserve, non-goals, verification, first safe slice, resolved confirmations, environment marker, and decision_trace." diff --git a/skills/crossframe-coder/evals/golden-implementation-reports.md b/skills/crossframe-coder/evals/golden-implementation-reports.md index 0e2dbf9..060a0e7 100644 --- a/skills/crossframe-coder/evals/golden-implementation-reports.md +++ b/skills/crossframe-coder/evals/golden-implementation-reports.md @@ -19,6 +19,9 @@ Target behavior: Implementation status: implemented +Handoff reason: +- None. This is an ordinary local feature slice with no high-risk boundary. + Files changed: - src/components/UserList.tsx - src/components/UserList.test.tsx @@ -58,6 +61,9 @@ Target behavior: Implementation status: implemented +Handoff reason: +- None. The failing parser behavior is localized and covered by a focused regression test. + Failure evidence preserved: - Before edit: pytest tests/test_parser.py::test_empty_metadata_line failed with ValueError. @@ -95,6 +101,9 @@ Target behavior: Implementation status: implemented +Handoff reason: +- None. The change is source-driven but stays inside the installed SDK wrapper and focused test. + Source evidence: - Installed version: package-lock.json shows stripe 17.x. - Local types: node_modules/stripe/types expose stripe.refunds.create(params). @@ -134,6 +143,9 @@ Target behavior: Implementation status: blocked +Handoff reason: +- None. Verification is blocked before application behavior is reached; no architecture handoff is needed yet. + Files changed: - None. @@ -177,9 +189,62 @@ Handoff reason: - Payment webhook and duplicate refund behavior affect durable money state and idempotency. - The prompt references an approved plan, but the plan text, named files, and verification commands were not available in the current context. +Verification: +- Command: not run +- Verification status: not run - handed off +- Result summary: no code was edited because the approved plan payload was incomplete. + Required handoff payload: - Approved Deep Risk Patch Plan. - Named files allowed for edit. - Idempotency and replay verification. - Exact focused test command or manual replay fixture. ``` + +## Golden 6: Approved High-Risk First Safe Slice + +Prompt: + +```text +Implement this approved Deep Risk Patch Plan for duplicate refund webhooks. The approved plan payload names src/billing/webhook.ts and tests/billing/webhook.test.ts, preserves signature verification and tenant binding, forbids schema migration and new ledger tables, requires npm test -- webhook.test.ts, and limits this pass to the duplicate refund replay guard. +``` + +Expected report: + +```text +Target behavior: +- Add the approved duplicate refund replay guard only. +- Preserve existing signature verification, tenant binding, ledger write shape, and webhook amount matching. + +Implementation status: implemented + +Handoff reason: +- None. Approved-plan payload schema passed, unresolved confirmations were empty, and the requested edit stayed inside the first safe slice. + +Decision trace: +- decision_trace: environment=codex/windows; risk=billing,payment,webhook,tenant,idempotency,durable_state; schema=pass; route=implement; files=src/billing/webhook.ts,tests/billing/webhook.test.ts; verification=npm test -- webhook.test.ts; first_safe_slice=duplicate refund replay guard only; reason=approved first safe slice + +Approved plan payload checked: +- exact_files_allowed: src/billing/webhook.ts, tests/billing/webhook.test.ts +- behavior_to_preserve: signature verification, tenant binding, ledger write shape, webhook amount matching +- non_goals: no schema migration, no new ledger table, no refund state machine rewrite +- high_risk_categories: billing, payment, webhook, tenant, idempotency, durable_state +- unresolved_required_confirmations: none + +Files changed: +- src/billing/webhook.ts +- tests/billing/webhook.test.ts + +Unsupported assumptions rejected: +- Did not add files not named by the approved plan payload. +- Did not change tenant lookup, signature verification, invoice uniqueness, or ledger schema. +- Did not broaden the slice into refund state-machine redesign. + +Verification: +- Command: npm test -- webhook.test.ts +- Verification status: passed +- Result summary: duplicate refund replay guard and existing signature/tenant binding tests passed. + +Remaining risk: +- Broader billing replay and revenue-recognition flows remain outside this first safe slice. +``` diff --git a/skills/crossframe-coder/evals/platform-trigger-routing-tests.md b/skills/crossframe-coder/evals/platform-trigger-routing-tests.md new file mode 100644 index 0000000..2fc4edf --- /dev/null +++ b/skills/crossframe-coder/evals/platform-trigger-routing-tests.md @@ -0,0 +1,89 @@ +# Platform Trigger Routing Tests + +These tests guard thin adapter behavior across Codex, Claude Code, Cursor, Gemini CLI, and generic agent repositories. Platform adapters may be brief, but they must route implementation and high-risk approved plans the same way. + +## Test 1: Clear Implementation Request + +Prompt: + +```text +Add the disabled state to this Button component and update its focused test. +``` + +Expected platform route: + +```text +Codex/Claude/Cursor/Gemini/generic -> crossframe-coder -> small implementation slice +``` + +Expected behavior: + +- Route to `crossframe-coder`. +- Name target behavior, files likely touched, non-goals, and verification seam. +- Do not escalate to `crossframe-code` without a concrete high-risk signal. + +Failure mode: + +- Adapter defaults every implementation request to read-only diagnosis. + +## Test 2: Claimed Approved Plan Missing Payload + +Prompt: + +```text +Implement the approved billing webhook Deep Risk plan. +``` + +Expected platform route: + +```text +Codex/Claude/Cursor/Gemini/generic -> crossframe-coder -> approved-plan schema failure -> handoff to crossframe-code +``` + +Expected behavior: + +- Do not edit code. +- Handoff log includes missing exact files, behavior to preserve, non-goals, verification, first safe slice, high-risk categories, environment marker, and decision trace. + +Failure mode: + +- Adapter treats the words "approved plan" as sufficient approval. + +## Test 3: Complete Approved Plan Payload + +Prompt: + +```text +Implement this approved Deep Risk Patch Plan. + +Approved plan payload: +- approved: true +- plan_mode: Deep Risk Patch Plan +- exact_files_allowed: src/billing/webhook.ts, tests/billing/webhook.test.ts +- behavior_to_preserve: signature verification, tenant binding +- non_goals: no schema migration, no new ledger table +- required_verification: npm test -- webhook.test.ts +- first_safe_slice: duplicate refund replay guard only +- unresolved_required_confirmations: none +- high_risk_categories: billing, payment, webhook, tenant, idempotency, durable_state +- environment: agent_platform=; os_family=; repo_root=; dependency_context=package-lock and focused test available +- decision_trace: environment checked -> schema pass -> route implement +``` + +Expected platform route: + +```text +Codex/Claude/Cursor/Gemini/generic -> crossframe-coder -> approved-plan schema pass -> implement first safe slice only +``` + +Expected behavior: + +- Implement only files in `exact_files_allowed`. +- Do not add files unless the payload names them. +- Run exactly the listed verification first. +- Report `decision_trace` with environment, risk, schema, route, files, verification, first safe slice, and reason. + +Failure mode: + +- Adapter expands the plan, changes unlisted files, or omits decision trace. + diff --git a/skills/crossframe-coder/references/approved-plan-payload-schema.md b/skills/crossframe-coder/references/approved-plan-payload-schema.md new file mode 100644 index 0000000..5008865 --- /dev/null +++ b/skills/crossframe-coder/references/approved-plan-payload-schema.md @@ -0,0 +1,106 @@ +# Approved Plan Payload Schema + +Use this reference before `crossframe-coder` implements any approved high-risk plan. The machine-readable schema lives at `schemas/approved-plan-payload.schema.json`. + +## Required Payload Fields + +The approved plan payload must include: + +- `approved`: true. +- `plan_mode`: `Compact Patch Plan` or `Deep Risk Patch Plan`. +- `exact_files_allowed`: non-empty list of files the coder may edit. +- `behavior_to_preserve`: required behavior that must not change. +- `non_goals`: explicit out-of-scope changes. +- `required_verification`: command or manual replay/check, with purpose. +- `first_safe_slice`: the only high-risk slice allowed in this implementation pass. +- `unresolved_required_confirmations`: empty list. +- `evidence_anchors`: P0/P1 anchors already captured by `crossframe-code` when applicable. +- `high_risk_categories`: auth, authorization, tenant, billing, payment, coupon, invoice, ledger, migration, queue, lock, retry, idempotency, webhook, outbox, revenue, durable_state, security, or legacy. +- `environment`: `agent_platform`, `os_family`, `repo_root`, and `dependency_context`. +- `decision_trace`: at least three steps showing why the route is implement or handoff. + +If any required field is missing, empty, or contradicted by the repo state, do not edit. Hand off to `crossframe-code`. + +## Environment Marker + +Record the execution environment before deciding route: + +```text +environment: agent_platform=codex|claude|cursor|gemini|generic|other; os_family=windows|linux|macos|unknown; repo_root=; dependency_context= +``` + +This marker prevents a plan approved in one platform from silently skipping local constraints in another platform. + +## Decision Trace Line + +For every approved high-risk slice, report one compact trace line: + +```text +decision_trace: environment=; risk=; schema=pass|fail; route=implement|handoff; files=; verification=; first_safe_slice=; reason= +``` + +Use `route=handoff` when: + +- the plan payload is absent or not schema-shaped; +- the user asks for files outside `exact_files_allowed`; +- `unresolved_required_confirmations` is not empty; +- the first safe slice is broader than the named slice; +- verification is not runnable or the manual replay is not concrete; +- local stack, generated client, dependency, or platform evidence contradicts the plan. + +## High-Risk Coverage Check + +When a plan touches billing, payment, webhook, tenant, idempotency, or durable state, the payload must name those categories in `high_risk_categories` and preserve the existing trust, tenant, state-transition, and replay behavior explicitly. Missing categories are a schema failure for route selection even if the user says the plan is approved. + +## Minimal Payload Example + +```json +{ + "approved": true, + "plan_mode": "Deep Risk Patch Plan", + "exact_files_allowed": ["src/billing/webhook.ts", "tests/billing/webhook.test.ts"], + "behavior_to_preserve": ["existing signature verification", "tenant binding"], + "non_goals": ["no schema migration", "no new ledger table"], + "required_verification": [ + { + "command": "npm test -- webhook.test.ts", + "purpose": "duplicate replay guard and existing signature behavior" + } + ], + "first_safe_slice": "add duplicate refund replay guard only", + "unresolved_required_confirmations": [], + "evidence_anchors": [ + { + "severity": "P1", + "file": "src/billing/webhook.ts", + "line_function_symbol": "handleRefundWebhook", + "observed_behavior": "duplicate refund events can reach the ledger write path", + "why_this_is_risky": "money-state side effects may be replayed" + } + ], + "high_risk_categories": ["billing", "payment", "webhook", "tenant", "idempotency", "durable_state"], + "environment": { + "agent_platform": "codex", + "os_family": "windows", + "repo_root": "E:/repo", + "dependency_context": "package-lock.json and local test command available" + }, + "decision_trace": [ + { + "step": "environment", + "decision": "codex windows local repo", + "evidence": "repo root and package-lock present" + }, + { + "step": "schema", + "decision": "pass", + "evidence": "required files, non-goals, verification, and first safe slice present" + }, + { + "step": "route", + "decision": "implement", + "evidence": "requested edit stays inside exact_files_allowed" + } + ] +} +``` diff --git a/skills/crossframe-coder/references/handoff-to-crossframe-code.md b/skills/crossframe-coder/references/handoff-to-crossframe-code.md index 12be58a..bb5de16 100644 --- a/skills/crossframe-coder/references/handoff-to-crossframe-code.md +++ b/skills/crossframe-coder/references/handoff-to-crossframe-code.md @@ -14,19 +14,31 @@ Use this instead of editing when implementation risk exceeds a small verified sl ## Approved Plan Exception Check +Read `approved-plan-payload-schema.md` before applying the exception. + Before implementing an approved high-risk plan, verify: - plan present: - explicit approval: -- named files: +- schema path checked: +- exact files allowed: - behavior to preserve: - non-goals: - unresolved confirmations: - verification command/manual check: - first safe slice: +- high-risk categories: +- environment marker: +- decision trace: If any field is missing, hand off. +Decision trace format: + +```text +decision_trace: environment=; risk=; schema=pass|fail; route=implement|handoff; files=; verification=; first_safe_slice=; reason= +``` + ## Handoff Report Fields - Requested implementation: @@ -36,9 +48,12 @@ If any field is missing, hand off. - Recommended `crossframe-code` route: - Verification seam to design: - Smallest safe next step: +- Environment marker: +- Decision trace: ## Rules - Do not hand off ordinary local edits merely to avoid implementation. - Do not proceed with high-risk changes just because the user asked for speed. - When handing off, preserve the user's implementation goal and name what evidence is missing. +- When handing off a claimed approved plan, log every missing schema field rather than saying only "high risk". diff --git a/skills/crossframe-coder/schemas/approved-plan-payload.schema.json b/skills/crossframe-coder/schemas/approved-plan-payload.schema.json new file mode 100644 index 0000000..00bfe93 --- /dev/null +++ b/skills/crossframe-coder/schemas/approved-plan-payload.schema.json @@ -0,0 +1,182 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://github.com/xi-kari/crossframe-code-skill/schemas/approved-plan-payload.schema.json", + "title": "CrossFrame Coder Approved Plan Payload", + "type": "object", + "additionalProperties": false, + "required": [ + "approved", + "plan_mode", + "exact_files_allowed", + "behavior_to_preserve", + "non_goals", + "required_verification", + "first_safe_slice", + "unresolved_required_confirmations", + "evidence_anchors", + "high_risk_categories", + "environment", + "decision_trace" + ], + "properties": { + "approved": { + "const": true + }, + "plan_mode": { + "enum": ["Compact Patch Plan", "Deep Risk Patch Plan"] + }, + "exact_files_allowed": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + }, + "minItems": 1, + "uniqueItems": true + }, + "behavior_to_preserve": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + }, + "minItems": 1 + }, + "non_goals": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + }, + "minItems": 1 + }, + "required_verification": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "required": ["command", "purpose"], + "properties": { + "command": { + "type": "string", + "minLength": 1 + }, + "purpose": { + "type": "string", + "minLength": 1 + } + } + }, + "minItems": 1 + }, + "first_safe_slice": { + "type": "string", + "minLength": 1 + }, + "unresolved_required_confirmations": { + "type": "array", + "maxItems": 0 + }, + "evidence_anchors": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "required": ["severity", "file", "line_function_symbol", "observed_behavior", "why_this_is_risky"], + "properties": { + "severity": { + "enum": ["P0", "P1", "P2", "P3"] + }, + "file": { + "type": "string", + "minLength": 1 + }, + "line_function_symbol": { + "type": "string", + "minLength": 1 + }, + "observed_behavior": { + "type": "string", + "minLength": 1 + }, + "why_this_is_risky": { + "type": "string", + "minLength": 1 + } + } + } + }, + "high_risk_categories": { + "type": "array", + "items": { + "enum": [ + "auth", + "authorization", + "tenant", + "billing", + "payment", + "coupon", + "invoice", + "ledger", + "migration", + "queue", + "lock", + "retry", + "idempotency", + "webhook", + "outbox", + "revenue", + "durable_state", + "security", + "legacy" + ] + }, + "minItems": 1, + "uniqueItems": true + }, + "environment": { + "type": "object", + "additionalProperties": false, + "required": ["agent_platform", "os_family", "repo_root", "dependency_context"], + "properties": { + "agent_platform": { + "enum": ["codex", "claude", "cursor", "gemini", "generic", "other"] + }, + "os_family": { + "enum": ["windows", "linux", "macos", "unknown"] + }, + "repo_root": { + "type": "string", + "minLength": 1 + }, + "dependency_context": { + "type": "string", + "minLength": 1 + } + } + }, + "decision_trace": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "required": ["step", "decision", "evidence"], + "properties": { + "step": { + "type": "string", + "minLength": 1 + }, + "decision": { + "type": "string", + "minLength": 1 + }, + "evidence": { + "type": "string", + "minLength": 1 + } + } + }, + "minItems": 3 + } + } +}