From c06dcc5ed57e2c9451a37aa824b65c77555393ea Mon Sep 17 00:00:00 2001 From: qiankunli Date: Sat, 11 Apr 2026 18:22:41 +0800 Subject: [PATCH 01/15] feat: add strict /codex:test workflow --- README.md | 36 ++ plugins/codex/commands/test.md | 60 ++++ plugins/codex/prompts/write-tests.md | 60 ++++ plugins/codex/scripts/codex-companion.mjs | 169 +++++++-- plugins/codex/scripts/lib/test-context.mjs | 394 +++++++++++++++++++++ tests/commands.test.mjs | 40 ++- tests/runtime.test.mjs | 214 +++++++++++ 7 files changed, 950 insertions(+), 23 deletions(-) create mode 100644 plugins/codex/commands/test.md create mode 100644 plugins/codex/prompts/write-tests.md create mode 100644 plugins/codex/scripts/lib/test-context.mjs diff --git a/README.md b/README.md index 458c39fb..401ce9b0 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ they already have. - `/codex:review` for a normal read-only Codex review - `/codex:adversarial-review` for a steerable challenge review +- `/codex:test` to delegate matching test updates for the current diff - `/codex:rescue`, `/codex:status`, `/codex:result`, and `/codex:cancel` to delegate work and manage background jobs ## Requirements @@ -123,6 +124,35 @@ Examples: This command is read-only. It does not fix code. +### `/codex:test` + +Runs a strict test-writing workflow for the current code changes. + +Use it when you want: + +- Codex writes the matching tests for the current diff +- Claude to keep the implementation work while Codex writes the matching tests +- Codex to inspect the current diff, infer the repository test layout, and add the smallest sufficient test coverage +- Codex to avoid modifying production code by default + +This command fails closed if it cannot collect the required repository context first. It looks for project guidance such as `CLAUDE.md`, `AGENTS.md`, or `README.md`, inspects the current diff, infers likely test targets, and then asks Codex to write the tests. + +Examples: + +```bash +/codex:test +/codex:test --base main +/codex:test --background +/codex:test --model gpt-5.4-mini --effort high +``` + +By default, Codex should: + +- explain the author's apparent purpose for the change before editing +- summarize the touched production files and detected test locations +- update or create the matching tests +- avoid modifying production code by default + ### `/codex:rescue` Hands a task to Codex through the `codex:codex-rescue` subagent. @@ -229,6 +259,12 @@ When the review gate is enabled, the plugin uses a `Stop` hook to run a targeted /codex:review ``` +### Let Codex Add Tests + +```bash +/codex:test +``` + ### Hand A Problem To Codex ```bash diff --git a/plugins/codex/commands/test.md b/plugins/codex/commands/test.md new file mode 100644 index 00000000..5905f853 --- /dev/null +++ b/plugins/codex/commands/test.md @@ -0,0 +1,60 @@ +--- +description: Delegate test writing for the current code changes to Codex with a strict test-only workflow +argument-hint: '[--wait|--background] [--base ] [--scope auto|working-tree|branch] [--model ] [--effort ]' +disable-model-invocation: true +allowed-tools: Read, Glob, Grep, Bash(node:*), Bash(git:*), AskUserQuestion +--- + +Run Codex test writing through the shared plugin runtime. + +Raw slash-command arguments: +`$ARGUMENTS` + +Core constraints: +- This command is test-only. +- Do not modify production code by default. +- Fail closed if the runtime cannot collect the required repository context. +- Your only job is to run the command and return Codex's output verbatim to the user. + +Execution mode rules: +- If the raw arguments include `--wait`, do not ask. Run in the foreground. +- If the raw arguments include `--background`, do not ask. Run in a Claude background task. +- Otherwise, estimate the change size before asking: + - For working-tree mode, start with `git status --short --untracked-files=all`. + - For working-tree mode, also inspect both `git diff --shortstat --cached` and `git diff --shortstat`. + - For base-branch mode, use `git diff --shortstat ...HEAD`. + - Treat untracked files or directories as real work even when `git diff --shortstat` is empty. + - Recommend waiting only when the change is clearly tiny, roughly 1-2 files total and no sign of broader test work. + - In every other case, including unclear size, recommend background. + - When in doubt, run the command instead of claiming there is no test work to do. +- Then use `AskUserQuestion` exactly once with two options, putting the recommended option first and suffixing its label with `(Recommended)`: + - `Wait for results` + - `Run in background` + +Argument handling: +- Preserve the user's arguments exactly. +- Do not strip `--wait` or `--background` yourself. +- The companion script parses `--wait` and `--background`, but Claude Code's `Bash(..., run_in_background: true)` is what actually detaches the run. +- This command accepts `--base ` and `--scope auto|working-tree|branch`. +- This command accepts `--model` and `--effort` and forwards them to the companion runtime. +- Do not add extra instructions or rewrite the user's intent. + +Foreground flow: +- Run: +```bash +node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" test "$ARGUMENTS" +``` +- Return the command stdout verbatim, exactly as-is. +- Do not paraphrase, summarize, or add commentary before or after it. + +Background flow: +- Launch the command with `Bash` in the background: +```typescript +Bash({ + command: `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" test "$ARGUMENTS"`, + description: "Codex test writing", + run_in_background: true +}) +``` +- Do not call `BashOutput` or wait for completion in this turn. +- After launching the command, tell the user: "Codex test writing started in the background. Check `/codex:status` for progress." diff --git a/plugins/codex/prompts/write-tests.md b/plugins/codex/prompts/write-tests.md new file mode 100644 index 00000000..50d0a288 --- /dev/null +++ b/plugins/codex/prompts/write-tests.md @@ -0,0 +1,60 @@ + +You are Codex writing tests for an existing code change. +Your job is to understand the author's intent, map the impacted code to the repository's testing layout, and then make the smallest sufficient test-only edits. + + + +Write or update the tests for {{TARGET_LABEL}}. + + + +Use the provided project guidance and diff context as the starting point for your understanding of the change. +Before you edit anything, infer and state the author's purpose for this change in one short section titled exactly `Author purpose:`. + + + +- Default to test-only changes. +- Do not modify production code by default. +- Do not delete existing tests unless the tested behavior is explicitly removed by this diff or the test is being replaced by an updated equivalent covering the same intent. +- If you believe a production code change is required, stop and explain why instead of editing it. +- Follow the repository's existing test conventions, naming patterns, and directory layout. +- Prefer the smallest sufficient regression coverage for the changed behavior. +- Reuse existing fixtures, helpers, and snapshots when they already fit. + + + +Before editing any files, print a concise plan that includes these headings exactly: +- `Author purpose:` +- `Touched production files:` +- `Detected test locations:` +- `Planned test file changes:` + +Under `Planned test file changes:`, list which files you expect to create, update, or remove. +Mention the relevant test functions or scenarios you expect to add or update when you can infer them from the context. + + + +Prefer the repository-specific test commands listed below when they fit the changed tests. +After editing, run the most relevant repository test command you can identify from the available context. +If the repository does not expose a clear test command, run the narrowest command that verifies the changed tests. + + + +{{SUGGESTED_TEST_COMMANDS}} + + + +{{PROJECT_GUIDANCE}} + + + +{{DIFF_CONTEXT}} + + + +{{TEST_LAYOUT}} + + + +{{TEST_PLAN}} + diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 35222fd5..032928e9 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -24,6 +24,7 @@ import { readStdinIfPiped } from "./lib/fs.mjs"; import { collectReviewContext, ensureGitRepository, resolveReviewTarget } from "./lib/git.mjs"; import { binaryAvailable, terminateProcessTree } from "./lib/process.mjs"; import { loadPromptTemplate, interpolateTemplate } from "./lib/prompts.mjs"; +import { collectTestCommandContext } from "./lib/test-context.mjs"; import { generateJobId, getConfig, @@ -77,6 +78,7 @@ function printUsage() { " node scripts/codex-companion.mjs setup [--enable-review-gate|--disable-review-gate] [--json]", " node scripts/codex-companion.mjs review [--wait|--background] [--base ] [--scope ]", " node scripts/codex-companion.mjs adversarial-review [--wait|--background] [--base ] [--scope ] [focus text]", + " node scripts/codex-companion.mjs test [--wait|--background] [--base ] [--scope ] [--model ] [--effort ]", " node scripts/codex-companion.mjs task [--background] [--write] [--resume-last|--resume|--fresh] [--model ] [--effort ] [prompt]", " node scripts/codex-companion.mjs status [job-id] [--all] [--json]", " node scripts/codex-companion.mjs result [job-id] [--json]", @@ -246,6 +248,18 @@ function buildAdversarialReviewPrompt(context, focusText) { }); } +function buildWriteTestsPrompt(context) { + const template = loadPromptTemplate(ROOT_DIR, "write-tests"); + return interpolateTemplate(template, { + TARGET_LABEL: context.target.label, + PROJECT_GUIDANCE: context.renderedGuidance, + DIFF_CONTEXT: context.reviewContext.content, + TEST_LAYOUT: context.renderedTestLayout, + TEST_PLAN: context.renderedPlan, + SUGGESTED_TEST_COMMANDS: context.renderedSuggestedCommands + }); +} + function ensureCodexAvailable(cwd) { const availability = getCodexAvailability(cwd); if (!availability.available) { @@ -526,6 +540,61 @@ async function executeTaskRun(request) { }; } +async function executeTestRun(request) { + ensureCodexAvailable(request.cwd); + ensureGitRepository(request.cwd); + + const context = collectTestCommandContext(request.cwd, { + base: request.base, + scope: request.scope, + target: request.target + }); + const prompt = buildWriteTestsPrompt(context); + const result = await runAppServerTurn(context.repoRoot, { + prompt, + model: request.model, + effort: request.effort, + sandbox: "workspace-write", + onProgress: request.onProgress, + persistThread: true, + threadName: buildPersistentTaskThreadName(`Write tests for ${context.target.label}`) + }); + + const rawOutput = typeof result.finalMessage === "string" ? result.finalMessage : ""; + const failureMessage = result.error?.message ?? result.stderr ?? ""; + const rendered = renderTaskResult( + { + rawOutput, + failureMessage, + reasoningSummary: result.reasoningSummary + }, + { + title: "Codex Test", + jobId: request.jobId ?? null, + write: true + } + ); + const payload = { + status: result.status, + threadId: result.threadId, + rawOutput, + touchedFiles: result.touchedFiles, + reasoningSummary: result.reasoningSummary + }; + + return { + exitStatus: result.status, + threadId: result.threadId, + turnId: result.turnId, + payload, + rendered, + summary: firstMeaningfulLine(rawOutput, firstMeaningfulLine(failureMessage, "Codex Test finished.")), + jobTitle: "Codex Test", + jobClass: "task", + write: true + }; +} + function buildReviewJobMetadata(reviewName, target) { return { kind: reviewName === "Adversarial Review" ? "adversarial-review" : "review", @@ -558,6 +627,9 @@ function getJobKindLabel(kind, jobClass) { if (kind === "adversarial-review") { return "adversarial-review"; } + if (kind === "test") { + return "test"; + } return jobClass === "review" ? "review" : "rescue"; } @@ -586,10 +658,10 @@ function createTrackedProgress(job, options = {}) { }; } -function buildTaskJob(workspaceRoot, taskMetadata, write) { +function buildTaskJob(workspaceRoot, taskMetadata, write, options = {}) { return createCompanionJob({ - prefix: "task", - kind: "task", + prefix: options.prefix ?? "task", + kind: options.kind ?? "task", title: taskMetadata.title, workspaceRoot, jobClass: "task", @@ -598,18 +670,6 @@ function buildTaskJob(workspaceRoot, taskMetadata, write) { }); } -function buildTaskRequest({ cwd, model, effort, prompt, write, resumeLast, jobId }) { - return { - cwd, - model, - effort, - prompt, - write, - resumeLast, - jobId - }; -} - function readTaskPrompt(cwd, options, positionals) { if (options["prompt-file"]) { return fs.readFileSync(path.resolve(cwd, options["prompt-file"]), "utf8"); @@ -760,7 +820,7 @@ async function handleTask(argv) { requireTaskRequest(prompt, resumeLast); const job = buildTaskJob(workspaceRoot, taskMetadata, write); - const request = buildTaskRequest({ + const request = { cwd, model, effort, @@ -768,7 +828,7 @@ async function handleTask(argv) { write, resumeLast, jobId: job.id - }); + }; const { payload } = enqueueBackgroundTask(cwd, job, request); outputCommandResult(payload, renderQueuedTaskLaunch(payload), options.json); return; @@ -792,6 +852,70 @@ async function handleTask(argv) { ); } +async function handleTest(argv) { + const { options, positionals } = parseCommandInput(argv, { + valueOptions: ["base", "scope", "model", "effort", "cwd"], + booleanOptions: ["json", "background", "wait"], + aliasMap: { + m: "model" + } + }); + + if (positionals.length > 0) { + throw new Error("/codex:test does not accept extra focus text. Put repository-specific test guidance in project docs instead."); + } + + const cwd = resolveCommandCwd(options); + const workspaceRoot = resolveCommandWorkspace(options); + const model = normalizeRequestedModel(options.model); + const effort = normalizeReasoningEffort(options.effort); + const target = resolveReviewTarget(cwd, { + base: options.base, + scope: options.scope + }); + const taskMetadata = { + title: "Codex Test", + summary: `Write tests for ${target.label}` + }; + const job = buildTaskJob(workspaceRoot, taskMetadata, true, { + prefix: "test", + kind: "test" + }); + + if (options.background) { + ensureCodexAvailable(cwd); + const request = { + kind: "test", + cwd, + base: options.base, + scope: options.scope, + target, + model, + effort, + jobId: job.id + }; + const { payload } = enqueueBackgroundTask(cwd, job, request); + outputCommandResult(payload, renderQueuedTaskLaunch(payload), options.json); + return; + } + + await runForegroundCommand( + job, + (progress) => + executeTestRun({ + cwd, + base: options.base, + scope: options.scope, + target, + model, + effort, + jobId: job.id, + onProgress: progress + }), + { json: options.json } + ); +} + async function handleTaskWorker(argv) { const { options } = parseCommandInput(argv, { valueOptions: ["cwd", "job-id"] @@ -828,11 +952,13 @@ async function handleTaskWorker(argv) { workspaceRoot, logFile }, - () => - executeTaskRun({ + () => { + const runner = request.kind === "test" ? executeTestRun : executeTaskRun; + return runner({ ...request, onProgress: progress - }), + }); + }, { logFile } ); } @@ -997,6 +1123,9 @@ async function main() { reviewName: "Adversarial Review" }); break; + case "test": + await handleTest(argv); + break; case "task": await handleTask(argv); break; diff --git a/plugins/codex/scripts/lib/test-context.mjs b/plugins/codex/scripts/lib/test-context.mjs new file mode 100644 index 00000000..4072bdf6 --- /dev/null +++ b/plugins/codex/scripts/lib/test-context.mjs @@ -0,0 +1,394 @@ +import fs from "node:fs"; +import path from "node:path"; + +import { collectReviewContext, getRepoRoot, resolveReviewTarget } from "./git.mjs"; + +const GUIDANCE_PREFERENCE = ["claude.md", "agents.md", "readme.md"]; +const GUIDANCE_LIMIT_BYTES = 24 * 1024; +const MAX_GUIDANCE_DEPTH = 3; +const TEST_COMMAND_LIMIT = 6; +const WALK_SKIP_DIRS = new Set([ + ".git", + ".hg", + ".svn", + "node_modules", + ".venv", + "venv", + "dist", + "build", + "coverage" +]); +const TEST_DIR_NAMES = new Set(["test", "tests", "__tests__"]); +const SOURCE_ROOT_NAMES = new Set(["src", "lib", "app", "server", "backend", "frontend", "cmd", "internal"]); +const TEST_COMMAND_PATTERNS = [ + /\b(?:make|just)\s+(?:test|test-ci|test-local|check|verify)\b/gi, + /\b(?:npm|pnpm|yarn)\s+test\b/gi, + /\b(?:uv\s+run\s+)?pytest(?:\s+[^\n`]+)?/gi, + /\bnode\s+--test(?:\s+[^\n`]+)?/gi, + /\bgo\s+test(?:\s+[^\n`]+)?/gi +]; + +function walkRepoFiles(rootDir, options = {}) { + const maxDepth = Number.isInteger(options.maxDepth) ? options.maxDepth : Number.POSITIVE_INFINITY; + const results = []; + const queue = [{ absoluteDir: rootDir, relativeDir: "", depth: 0 }]; + const visitedDirectories = new Set(); + + try { + visitedDirectories.add(fs.realpathSync.native(rootDir)); + } catch { + visitedDirectories.add(rootDir); + } + + while (queue.length > 0) { + const current = queue.shift(); + const entries = fs.readdirSync(current.absoluteDir, { withFileTypes: true }); + for (const entry of entries) { + const absolutePath = path.join(current.absoluteDir, entry.name); + const relativePath = current.relativeDir ? path.posix.join(current.relativeDir, entry.name) : entry.name; + const normalizedName = entry.name.toLowerCase(); + if (entry.isDirectory() || entry.isSymbolicLink()) { + if (WALK_SKIP_DIRS.has(normalizedName) || current.depth >= maxDepth) { + continue; + } + let stat; + try { + stat = fs.statSync(absolutePath); + } catch { + continue; + } + if (stat.isDirectory()) { + let directoryKey; + try { + directoryKey = fs.realpathSync.native(absolutePath); + } catch { + directoryKey = absolutePath; + } + if (visitedDirectories.has(directoryKey)) { + continue; + } + visitedDirectories.add(directoryKey); + queue.push({ + absoluteDir: absolutePath, + relativeDir: relativePath, + depth: current.depth + 1 + }); + continue; + } + if (entry.isSymbolicLink() && stat.isFile()) { + results.push(relativePath); + } + continue; + } + if (entry.isFile()) { + results.push(relativePath); + } + } + } + + return results.sort(); +} + +function readTrimmedFile(repoRoot, relativePath, maxBytes = GUIDANCE_LIMIT_BYTES) { + const absolutePath = path.join(repoRoot, relativePath); + const buffer = fs.readFileSync(absolutePath); + if (buffer.length <= maxBytes) { + return buffer.toString("utf8").trim(); + } + return `${buffer.subarray(0, maxBytes).toString("utf8").trim()}\n...[truncated]`; +} + +function guidanceSortKey(relativePath) { + const baseName = path.basename(relativePath).toLowerCase(); + const preferenceIndex = GUIDANCE_PREFERENCE.indexOf(baseName); + return [preferenceIndex === -1 ? GUIDANCE_PREFERENCE.length : preferenceIndex, relativePath]; +} + +function collectGuidanceFiles(repoRoot) { + const repoFiles = walkRepoFiles(repoRoot, { maxDepth: MAX_GUIDANCE_DEPTH }); + const matches = repoFiles + .filter((relativePath) => GUIDANCE_PREFERENCE.includes(path.basename(relativePath).toLowerCase())) + .sort((left, right) => { + const [leftIndex, leftPath] = guidanceSortKey(left); + const [rightIndex, rightPath] = guidanceSortKey(right); + return leftIndex - rightIndex || leftPath.localeCompare(rightPath); + }); + + return matches.map((relativePath) => ({ + path: relativePath, + content: readTrimmedFile(repoRoot, relativePath) + })); +} + +function isTestFile(relativePath) { + const normalized = relativePath.replace(/\\/g, "/"); + const baseName = path.basename(normalized).toLowerCase(); + const parts = normalized.split("/"); + if (parts.some((part) => TEST_DIR_NAMES.has(part.toLowerCase()))) { + return true; + } + return ( + baseName.includes(".test.") || + baseName.includes(".spec.") || + baseName.startsWith("test_") || + baseName.endsWith("_test.go") + ); +} + +function uniqueSorted(values) { + return [...new Set(values.filter(Boolean))].sort(); +} + +function detectPrimaryTestLocations(testFiles) { + return uniqueSorted( + testFiles.map((relativePath) => { + const parts = relativePath.replace(/\\/g, "/").split("/"); + const testDirIndex = parts.findIndex((part) => TEST_DIR_NAMES.has(part.toLowerCase())); + if (testDirIndex >= 0) { + return parts.slice(0, testDirIndex + 1).join("/"); + } + return path.posix.dirname(relativePath); + }) + ); +} + +function extnamePreservingDeclaration(relativePath) { + const normalized = relativePath.replace(/\\/g, "/"); + if (normalized.endsWith(".d.ts")) { + return ".ts"; + } + return path.extname(normalized); +} + +function fileStem(relativePath) { + const normalized = relativePath.replace(/\\/g, "/"); + const baseName = path.basename(normalized); + if (baseName.endsWith(".d.ts")) { + return baseName.slice(0, -5); + } + const extension = path.extname(baseName); + return extension ? baseName.slice(0, -extension.length) : baseName; +} + +function inferPreferredJavascriptTestExtension(testFiles) { + const counts = new Map(); + for (const file of testFiles) { + const baseName = path.basename(file).toLowerCase(); + if (!baseName.includes(".test.") && !baseName.includes(".spec.")) { + continue; + } + const extension = path.extname(baseName) || ".js"; + counts.set(extension, (counts.get(extension) ?? 0) + 1); + } + const ranked = [...counts.entries()].sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0])); + return ranked[0]?.[0] ?? ".js"; +} + +function buildTestFileCandidates(relativePath, testFiles, primaryLocations) { + const normalized = relativePath.replace(/\\/g, "/"); + const baseName = path.basename(normalized).toLowerCase(); + const stem = fileStem(normalized); + const extension = extnamePreservingDeclaration(normalized).toLowerCase(); + const dirName = path.posix.dirname(normalized); + + const directMatches = testFiles.filter((candidate) => { + const candidateBaseName = path.basename(candidate).toLowerCase(); + if (candidateBaseName === baseName) { + return true; + } + if (candidateBaseName.includes(`${stem.toLowerCase()}.test.`) || candidateBaseName.includes(`${stem.toLowerCase()}.spec.`)) { + return true; + } + if (candidateBaseName === `test_${stem.toLowerCase()}.py`) { + return true; + } + if (candidateBaseName === `${stem.toLowerCase()}_test.go`) { + return true; + } + return false; + }); + if (directMatches.length > 0) { + return uniqueSorted(directMatches).map((candidate) => ({ path: candidate, action: "update" })); + } + + if (extension === ".go") { + return [{ path: path.posix.join(dirName, `${stem}_test.go`), action: "create" }]; + } + + if (extension === ".py") { + const preferredRoot = primaryLocations.find((location) => TEST_DIR_NAMES.has(path.posix.basename(location).toLowerCase())); + if (!preferredRoot) { + return []; + } + return [{ path: path.posix.join(preferredRoot, `test_${stem}.py`), action: "create" }]; + } + + if ([".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"].includes(extension)) { + const preferredRoot = + primaryLocations.find((location) => TEST_DIR_NAMES.has(path.posix.basename(location).toLowerCase())) ?? + primaryLocations[0]; + if (!preferredRoot) { + return []; + } + const preferredExtension = inferPreferredJavascriptTestExtension(testFiles) || extension || ".js"; + const relativeParts = normalized.split("/"); + const withoutFile = relativeParts.slice(0, -1); + const strippedParts = SOURCE_ROOT_NAMES.has(withoutFile[0]?.toLowerCase()) ? withoutFile.slice(1) : withoutFile; + const candidateDir = path.posix.join(preferredRoot, ...strippedParts); + return [{ path: path.posix.join(candidateDir, `${stem}.test${preferredExtension}`), action: "create" }]; + } + + return []; +} + +function inferTestPlan(changedFiles, testFiles, primaryLocations) { + const entries = []; + for (const relativePath of changedFiles) { + if (isTestFile(relativePath)) { + entries.push({ + sourcePath: relativePath, + targets: [{ path: relativePath, action: "update" }] + }); + continue; + } + const targets = buildTestFileCandidates(relativePath, testFiles, primaryLocations); + if (targets.length === 0) { + continue; + } + entries.push({ + sourcePath: relativePath, + targets + }); + } + + return entries; +} + +function formatGuidanceSection(guidanceFiles) { + return guidanceFiles + .map( + (file) => [ + `### ${file.path}`, + "```md", + file.content || "(empty)", + "```" + ].join("\n") + ) + .join("\n\n"); +} + +function formatTestLayout(testFiles, primaryLocations) { + const lines = [ + `Primary test locations: ${primaryLocations.join(", ")}`, + "", + "Known test files:" + ]; + for (const file of testFiles.slice(0, 40)) { + lines.push(`- ${file}`); + } + if (testFiles.length > 40) { + lines.push(`- ... ${testFiles.length - 40} more`); + } + return lines.join("\n"); +} + +function formatPlannedTestChanges(entries) { + return entries + .map((entry) => { + const lines = [`- ${entry.sourcePath}`]; + for (const target of entry.targets) { + lines.push(` - ${target.action}: ${target.path}`); + } + return lines.join("\n"); + }) + .join("\n"); +} + +function extractTestCommandsFromText(text) { + const matches = []; + for (const pattern of TEST_COMMAND_PATTERNS) { + for (const match of text.matchAll(pattern)) { + const candidate = match[0].trim().replace(/[.,;:]+$/, ""); + if (candidate) { + matches.push(candidate); + } + } + } + return matches; +} + +function collectSuggestedTestCommands(repoRoot, guidanceFiles) { + const candidates = []; + for (const guidance of guidanceFiles) { + candidates.push(...extractTestCommandsFromText(guidance.content)); + } + + const makefilePath = path.join(repoRoot, "Makefile"); + if (fs.existsSync(makefilePath)) { + const makefile = fs.readFileSync(makefilePath, "utf8"); + candidates.push(...extractTestCommandsFromText(makefile)); + if (/^test-ci:/m.test(makefile)) { + candidates.push("make test-ci"); + } + if (/^test-local:/m.test(makefile)) { + candidates.push("make test-local"); + } + if (/^test:/m.test(makefile)) { + candidates.push("make test"); + } + } + + return uniqueSorted(candidates).slice(0, TEST_COMMAND_LIMIT); +} + +function formatSuggestedTestCommands(commands) { + if (commands.length === 0) { + return "No repository-specific test command could be inferred from the available guidance."; + } + return commands.map((command) => `- ${command}`).join("\n"); +} + +export function collectTestCommandContext(cwd, options = {}) { + const repoRoot = getRepoRoot(cwd); + const target = + options.target ?? + resolveReviewTarget(cwd, { + base: options.base, + scope: options.scope + }); + const reviewContext = collectReviewContext(repoRoot, target, options); + const guidanceFiles = collectGuidanceFiles(repoRoot); + if (guidanceFiles.length === 0) { + throw new Error("No project guidance found: expected at least one of CLAUDE.md, AGENTS.md, README.md."); + } + + const repoFiles = walkRepoFiles(repoRoot); + const testFiles = repoFiles.filter((relativePath) => isTestFile(relativePath)); + const primaryLocations = detectPrimaryTestLocations(testFiles); + if (primaryLocations.length === 0) { + throw new Error("No test layout detected for this repository."); + } + + const productionFiles = reviewContext.changedFiles.filter((relativePath) => !isTestFile(relativePath)); + const testPlanEntries = inferTestPlan(productionFiles, testFiles, primaryLocations); + if (testPlanEntries.length === 0) { + throw new Error("Unable to infer test targets from changed files."); + } + const suggestedTestCommands = collectSuggestedTestCommands(repoRoot, guidanceFiles); + + return { + repoRoot, + target, + reviewContext, + guidanceFiles, + productionFiles, + testFiles, + primaryLocations, + testPlanEntries, + suggestedTestCommands, + renderedGuidance: formatGuidanceSection(guidanceFiles), + renderedTestLayout: formatTestLayout(testFiles, primaryLocations), + renderedPlan: formatPlannedTestChanges(testPlanEntries), + renderedSuggestedCommands: formatSuggestedTestCommands(suggestedTestCommands) + }; +} diff --git a/tests/commands.test.mjs b/tests/commands.test.mjs index ef5adb09..2c4e03b0 100644 --- a/tests/commands.test.mjs +++ b/tests/commands.test.mjs @@ -70,6 +70,39 @@ test("adversarial review command uses AskUserQuestion and background Bash while assert.match(source, /can still take extra focus text after the flags/i); }); +test("test command uses AskUserQuestion and background Bash while enforcing a test-only workflow", () => { + const source = read("commands/test.md"); + const readme = fs.readFileSync(path.join(ROOT, "README.md"), "utf8"); + + assert.match(source, /AskUserQuestion/); + assert.match(source, /\bBash\(/); + assert.match(source, /return Codex's output verbatim to the user/i); + assert.match(source, /test-only/i); + assert.match(source, /Do not modify production code by default/i); + assert.match(source, /Fail closed if the runtime cannot collect the required repository context/i); + const testPrompt = read("prompts/write-tests.md"); + assert.match(testPrompt, /Do not delete existing tests unless the tested behavior is explicitly removed by this diff/i); + assert.match(testPrompt, /updated equivalent covering the same intent/i); + assert.match(source, /```bash/); + assert.match(source, /```typescript/); + assert.match(source, /test "\$ARGUMENTS"/); + assert.match(source, /\[--scope auto\|working-tree\|branch\]/); + assert.match(source, /--model /); + assert.match(source, /--effort /); + assert.match(source, /run_in_background:\s*true/); + assert.match(source, /command:\s*`node "\$\{CLAUDE_PLUGIN_ROOT\}\/scripts\/codex-companion\.mjs" test "\$ARGUMENTS"`/); + assert.match(source, /description:\s*"Codex test writing"/); + assert.match(source, /git status --short --untracked-files=all/); + assert.match(source, /git diff --shortstat/); + assert.match(source, /When in doubt, run the command instead of claiming there is no test work to do/i); + assert.match(source, /Claude Code's `Bash\(..., run_in_background: true\)` is what actually detaches the run/i); + assert.match(source, /\(Recommended\)/); + + assert.match(readme, /### `\/codex:test`/); + assert.match(readme, /Codex writes the matching tests for the current diff/i); + assert.match(readme, /avoid modifying production code by default/i); +}); + test("continue is not exposed as a user-facing command", () => { const commandFiles = fs.readdirSync(path.join(PLUGIN_ROOT, "commands")).sort(); assert.deepEqual(commandFiles, [ @@ -79,7 +112,8 @@ test("continue is not exposed as a user-facing command", () => { "result.md", "review.md", "setup.md", - "status.md" + "status.md", + "test.md" ]); }); @@ -165,9 +199,9 @@ test("result and cancel commands are exposed as deterministic runtime entrypoint const resultHandling = read("skills/codex-result-handling/SKILL.md"); assert.match(result, /disable-model-invocation:\s*true/); - assert.match(result, /codex-companion\.mjs" result \$ARGUMENTS/); + assert.match(result, /codex-companion\.mjs" result "\$ARGUMENTS"/); assert.match(cancel, /disable-model-invocation:\s*true/); - assert.match(cancel, /codex-companion\.mjs" cancel \$ARGUMENTS/); + assert.match(cancel, /codex-companion\.mjs" cancel "\$ARGUMENTS"/); assert.match(resultHandling, /do not turn a failed or incomplete Codex run into a Claude-side implementation attempt/i); assert.match(resultHandling, /if Codex was never successfully invoked, do not generate a substitute answer at all/i); }); diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index 90408372..0c6ea051 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -211,6 +211,116 @@ test("task reports the actual Codex auth error when the run is rejected", () => assert.match(result.stderr, /authentication expired; run codex login/); }); +test("test command fails closed when no project guidance files are available", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.mkdirSync(path.join(repo, "src")); + fs.mkdirSync(path.join(repo, "tests")); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = 1;\n"); + fs.writeFileSync(path.join(repo, "tests", "app.test.mjs"), "export {};\n"); + run("git", ["add", "src/app.js", "tests/app.test.mjs"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = 2;\n"); + + const result = run("node", [SCRIPT, "test"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 1); + assert.match(result.stderr, /No project guidance found/i); + assert.match(result.stderr, /CLAUDE\.md, AGENTS\.md, README\.md/i); +}); + +test("test command fails closed when no test layout is detectable", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.mkdirSync(path.join(repo, "src")); + fs.writeFileSync(path.join(repo, "README.md"), "# Sample project\n"); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = 1;\n"); + run("git", ["add", "README.md", "src/app.js"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = 2;\n"); + + const result = run("node", [SCRIPT, "test"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 1); + assert.match(result.stderr, /No test layout detected/i); +}); + +test("test command builds a strict prompt from project guidance, diff context, and inferred test targets", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + const statePath = path.join(binDir, "fake-codex-state.json"); + installFakeCodex(binDir); + initGitRepo(repo); + fs.mkdirSync(path.join(repo, "src")); + fs.mkdirSync(path.join(repo, "tests")); + fs.writeFileSync(path.join(repo, "README.md"), "# Demo\n\nThis project demonstrates the plugin.\n"); + fs.writeFileSync(path.join(repo, "CLAUDE.md"), "Always prefer focused regression tests.\n"); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export function getValue() { return 1; }\n"); + fs.writeFileSync(path.join(repo, "tests", "app.test.mjs"), "import test from \"node:test\";\n"); + run("git", ["add", "README.md", "CLAUDE.md", "src/app.js", "tests/app.test.mjs"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export function getValue(items) { return items[0].id; }\n"); + + const result = run("node", [SCRIPT, "test"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + assert.match(result.stdout, /Handled the requested task/); + + const state = JSON.parse(fs.readFileSync(statePath, "utf8")); + assert.match(state.lastTurnStart.prompt, /Author purpose:/); + assert.match(state.lastTurnStart.prompt, /Touched production files:/); + assert.match(state.lastTurnStart.prompt, /Detected test locations:/); + assert.match(state.lastTurnStart.prompt, /Planned test file changes:/); + assert.match(state.lastTurnStart.prompt, /README\.md/); + assert.match(state.lastTurnStart.prompt, /CLAUDE\.md/); + assert.match(state.lastTurnStart.prompt, /src\/app\.js/); + assert.match(state.lastTurnStart.prompt, /tests\/app\.test\.mjs/); + assert.match(state.lastTurnStart.prompt, /Do not modify production code by default/i); + assert.match(state.lastTurnStart.prompt, /If you believe a production code change is required, stop and explain why instead of editing it/i); +}); + +test("test command surfaces repository test commands in the prompt when it can infer them", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + const statePath = path.join(binDir, "fake-codex-state.json"); + installFakeCodex(binDir); + initGitRepo(repo); + fs.mkdirSync(path.join(repo, "src")); + fs.mkdirSync(path.join(repo, "tests")); + fs.writeFileSync(path.join(repo, "README.md"), "# Demo\n\nRun `npm test` for the fast suite.\n"); + fs.writeFileSync(path.join(repo, "CLAUDE.md"), "Prefer `make test-ci` before any full suite.\n"); + fs.writeFileSync(path.join(repo, "Makefile"), "test:\n\tnpm test\n\ntest-ci:\n\tnode --test tests/*.test.mjs\n"); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export function getValue() { return 1; }\n"); + fs.writeFileSync(path.join(repo, "tests", "app.test.mjs"), "import test from \"node:test\";\n"); + run("git", ["add", "README.md", "CLAUDE.md", "Makefile", "src/app.js", "tests/app.test.mjs"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export function getValue(items) { return items[0].id; }\n"); + + const result = run("node", [SCRIPT, "test"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const state = JSON.parse(fs.readFileSync(statePath, "utf8")); + assert.match(state.lastTurnStart.prompt, //); + assert.match(state.lastTurnStart.prompt, /make test-ci/); + assert.match(state.lastTurnStart.prompt, /npm test/); +}); + test("review accepts the quoted raw argument style for built-in base-branch review", () => { const repo = makeTempDir(); const binDir = makeTempDir(); @@ -833,6 +943,110 @@ test("task --background enqueues a detached worker and exposes per-job status", assert.match(resultPayload.storedJob.rendered, /Handled the requested task/); }); +test("test --background enqueues a detached worker and exposes per-job status", async () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "slow-task"); + initGitRepo(repo); + fs.mkdirSync(path.join(repo, "src")); + fs.mkdirSync(path.join(repo, "tests")); + fs.writeFileSync(path.join(repo, "README.md"), "# Demo\n"); + fs.writeFileSync(path.join(repo, "CLAUDE.md"), "Prefer focused regression tests.\n"); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export function getValue() { return 1; }\n"); + fs.writeFileSync(path.join(repo, "tests", "app.test.mjs"), "import test from \"node:test\";\n"); + run("git", ["add", "README.md", "CLAUDE.md", "src/app.js", "tests/app.test.mjs"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export function getValue(items) { return items[0].id; }\n"); + + const launched = run("node", [SCRIPT, "test", "--background", "--json"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(launched.status, 0, launched.stderr); + const launchPayload = JSON.parse(launched.stdout); + assert.equal(launchPayload.status, "queued"); + assert.match(launchPayload.jobId, /^test-/); + + const waitedStatus = run( + "node", + [SCRIPT, "status", launchPayload.jobId, "--wait", "--timeout-ms", "15000", "--json"], + { + cwd: repo, + env: buildEnv(binDir) + } + ); + + assert.equal(waitedStatus.status, 0, waitedStatus.stderr); + const waitedPayload = JSON.parse(waitedStatus.stdout); + assert.equal(waitedPayload.job.id, launchPayload.jobId); + assert.equal(waitedPayload.job.status, "completed"); + + const resultPayload = await waitFor(() => { + const result = run("node", [SCRIPT, "result", launchPayload.jobId, "--json"], { + cwd: repo, + env: buildEnv(binDir) + }); + if (result.status !== 0) { + return null; + } + return JSON.parse(result.stdout); + }); + + assert.equal(resultPayload.job.id, launchPayload.jobId); + assert.equal(resultPayload.job.status, "completed"); + assert.match(resultPayload.storedJob.rendered, /Handled the requested task/); +}); + +test("task-worker routes test jobs to executeTestRun", () => { + const workspace = makeTempDir(); + const repo = path.join(workspace, "repo"); + const binDir = makeTempDir(); + const jobsDir = path.join(resolveStateDir(workspace), "jobs"); + installFakeCodex(binDir); + fs.mkdirSync(repo, { recursive: true }); + fs.mkdirSync(jobsDir, { recursive: true }); + initGitRepo(repo); + fs.mkdirSync(path.join(repo, "src")); + fs.mkdirSync(path.join(repo, "tests")); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = 1;\n"); + fs.writeFileSync(path.join(repo, "tests", "app.test.mjs"), "import test from \"node:test\";\n"); + run("git", ["add", "src/app.js", "tests/app.test.mjs"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = 2;\n"); + + const jobId = "test-worker-job"; + fs.writeFileSync( + path.join(jobsDir, `${jobId}.json`), + `${JSON.stringify( + { + id: jobId, + kind: "test", + jobClass: "task", + title: "Codex Test", + workspaceRoot: workspace, + request: { + kind: "test", + cwd: repo, + scope: "working-tree", + jobId + } + }, + null, + 2 + )}\n`, + "utf8" + ); + + const result = run("node", [SCRIPT, "task-worker", "--cwd", workspace, "--job-id", jobId], { + cwd: workspace, + env: buildEnv(binDir) + }); + + assert.notEqual(result.status, 0); + assert.match(result.stderr, /No project guidance found/i); +}); + test("review rejects focus text because it is native-review only", () => { const repo = makeTempDir(); const binDir = makeTempDir(); From 9140ee6c3bbc685d317087cc00ec198b32a2c19a Mon Sep 17 00:00:00 2001 From: qiankunli Date: Sat, 11 Apr 2026 18:38:30 +0800 Subject: [PATCH 02/15] fix: skip symlinked directories during test context walk --- plugins/codex/scripts/lib/test-context.mjs | 43 ++++++++++++---------- tests/git.test.mjs | 18 +++++++++ 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/plugins/codex/scripts/lib/test-context.mjs b/plugins/codex/scripts/lib/test-context.mjs index 4072bdf6..e3a4f34f 100644 --- a/plugins/codex/scripts/lib/test-context.mjs +++ b/plugins/codex/scripts/lib/test-context.mjs @@ -47,37 +47,40 @@ function walkRepoFiles(rootDir, options = {}) { const absolutePath = path.join(current.absoluteDir, entry.name); const relativePath = current.relativeDir ? path.posix.join(current.relativeDir, entry.name) : entry.name; const normalizedName = entry.name.toLowerCase(); - if (entry.isDirectory() || entry.isSymbolicLink()) { + if (entry.isDirectory()) { if (WALK_SKIP_DIRS.has(normalizedName) || current.depth >= maxDepth) { continue; } - let stat; + let directoryKey; try { - stat = fs.statSync(absolutePath); + directoryKey = fs.realpathSync.native(absolutePath); } catch { + directoryKey = absolutePath; + } + if (visitedDirectories.has(directoryKey)) { continue; } - if (stat.isDirectory()) { - let directoryKey; - try { - directoryKey = fs.realpathSync.native(absolutePath); - } catch { - directoryKey = absolutePath; - } - if (visitedDirectories.has(directoryKey)) { - continue; - } - visitedDirectories.add(directoryKey); - queue.push({ - absoluteDir: absolutePath, - relativeDir: relativePath, - depth: current.depth + 1 - }); + visitedDirectories.add(directoryKey); + queue.push({ + absoluteDir: absolutePath, + relativeDir: relativePath, + depth: current.depth + 1 + }); + continue; + } + if (entry.isSymbolicLink()) { + let stat; + try { + stat = fs.statSync(absolutePath); + } catch { continue; } - if (entry.isSymbolicLink() && stat.isFile()) { + if (stat.isFile()) { results.push(relativePath); } + // Skip symlinked directories for now: following them can escape repoRoot + // and pull unrelated files into /codex:test. If we need this later, + // constrain traversal to realpaths that still stay under repoRoot. continue; } if (entry.isFile()) { diff --git a/tests/git.test.mjs b/tests/git.test.mjs index 14ff2576..ac408f6e 100644 --- a/tests/git.test.mjs +++ b/tests/git.test.mjs @@ -4,6 +4,7 @@ import test from "node:test"; import assert from "node:assert/strict"; import { collectReviewContext, resolveReviewTarget } from "../plugins/codex/scripts/lib/git.mjs"; +import { collectTestCommandContext } from "../plugins/codex/scripts/lib/test-context.mjs"; import { initGitRepo, makeTempDir, run } from "./helpers.mjs"; test("resolveReviewTarget prefers working tree when repo is dirty", () => { @@ -181,3 +182,20 @@ test("collectReviewContext keeps untracked file content in lightweight working t assert.match(context.content, /## Untracked Files/); assert.match(context.content, /UNTRACKED_RISK_MARKER/); }); + +test("collectTestCommandContext ignores symlinked test directories outside the repo", () => { + const cwd = makeTempDir(); + const externalTests = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "src")); + fs.writeFileSync(path.join(cwd, "README.md"), "# Sample project\n"); + fs.writeFileSync(path.join(cwd, "src", "app.js"), "export const value = 1;\n"); + fs.mkdirSync(path.join(externalTests, "nested")); + fs.writeFileSync(path.join(externalTests, "nested", "app.test.mjs"), "import test from 'node:test';\n"); + fs.symlinkSync(externalTests, path.join(cwd, "tests")); + run("git", ["add", "README.md", "src/app.js", "tests"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "src", "app.js"), "export const value = 2;\n"); + + assert.throws(() => collectTestCommandContext(cwd), /No test layout detected/i); +}); From 263e07752183a68f27f474a8a344091ddbdca9c8 Mon Sep 17 00:00:00 2001 From: qiankunli Date: Sat, 11 Apr 2026 18:49:05 +0800 Subject: [PATCH 03/15] fix: include self-collect guidance in test prompt --- plugins/codex/prompts/write-tests.md | 4 +++ plugins/codex/scripts/codex-companion.mjs | 1 + tests/runtime.test.mjs | 33 +++++++++++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/plugins/codex/prompts/write-tests.md b/plugins/codex/prompts/write-tests.md index 50d0a288..379c64fa 100644 --- a/plugins/codex/prompts/write-tests.md +++ b/plugins/codex/prompts/write-tests.md @@ -47,6 +47,10 @@ If the repository does not expose a clear test command, run the narrowest comman {{PROJECT_GUIDANCE}} + +{{DIFF_COLLECTION_GUIDANCE}} + + {{DIFF_CONTEXT}} diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 032928e9..7431ab19 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -253,6 +253,7 @@ function buildWriteTestsPrompt(context) { return interpolateTemplate(template, { TARGET_LABEL: context.target.label, PROJECT_GUIDANCE: context.renderedGuidance, + DIFF_COLLECTION_GUIDANCE: context.reviewContext.collectionGuidance, DIFF_CONTEXT: context.reviewContext.content, TEST_LAYOUT: context.renderedTestLayout, TEST_PLAN: context.renderedPlan, diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index 0c6ea051..19707cb1 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -321,6 +321,39 @@ test("test command surfaces repository test commands in the prompt when it can i assert.match(state.lastTurnStart.prompt, /npm test/); }); +test("test command includes self-collect guidance when diff context is downgraded", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + const statePath = path.join(binDir, "fake-codex-state.json"); + installFakeCodex(binDir); + initGitRepo(repo); + fs.mkdirSync(path.join(repo, "src")); + fs.mkdirSync(path.join(repo, "tests")); + for (const name of ["a.js", "b.js", "c.js"]) { + fs.writeFileSync(path.join(repo, "src", name), `export const value = "${name}-v1";\n`); + fs.writeFileSync(path.join(repo, "tests", `${name.replace(".js", ".test.mjs")}`), "import test from \"node:test\";\n"); + } + fs.writeFileSync(path.join(repo, "README.md"), "# Demo\n"); + run("git", ["add", "README.md", "src/a.js", "src/b.js", "src/c.js", "tests/a.test.mjs", "tests/b.test.mjs", "tests/c.test.mjs"], { + cwd: repo + }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "src", "a.js"), 'export const value = "PROMPT_SELF_COLLECT_A";\n'); + fs.writeFileSync(path.join(repo, "src", "b.js"), 'export const value = "PROMPT_SELF_COLLECT_B";\n'); + fs.writeFileSync(path.join(repo, "src", "c.js"), 'export const value = "PROMPT_SELF_COLLECT_C";\n'); + + const result = run("node", [SCRIPT, "test"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const state = JSON.parse(fs.readFileSync(statePath, "utf8")); + assert.match(state.lastTurnStart.prompt, /lightweight summary/i); + assert.match(state.lastTurnStart.prompt, /read-only git commands/i); + assert.doesNotMatch(state.lastTurnStart.prompt, /PROMPT_SELF_COLLECT_[ABC]/); +}); + test("review accepts the quoted raw argument style for built-in base-branch review", () => { const repo = makeTempDir(); const binDir = makeTempDir(); From b53a036a9cb5190fd448225a469bc70309f0b4c6 Mon Sep 17 00:00:00 2001 From: qiankunli Date: Sat, 11 Apr 2026 18:58:22 +0800 Subject: [PATCH 04/15] fix: harden test context repo walk --- plugins/codex/scripts/lib/test-context.mjs | 28 ++++++++++++--- tests/git.test.mjs | 42 ++++++++++++++++++++++ 2 files changed, 65 insertions(+), 5 deletions(-) diff --git a/plugins/codex/scripts/lib/test-context.mjs b/plugins/codex/scripts/lib/test-context.mjs index e3a4f34f..cee3dcc9 100644 --- a/plugins/codex/scripts/lib/test-context.mjs +++ b/plugins/codex/scripts/lib/test-context.mjs @@ -28,17 +28,24 @@ const TEST_COMMAND_PATTERNS = [ /\bgo\s+test(?:\s+[^\n`]+)?/gi ]; +function isPathWithinRoot(rootPath, targetPath) { + const relative = path.relative(rootPath, targetPath); + return relative === "" || (!relative.startsWith("..") && !path.isAbsolute(relative)); +} + function walkRepoFiles(rootDir, options = {}) { const maxDepth = Number.isInteger(options.maxDepth) ? options.maxDepth : Number.POSITIVE_INFINITY; const results = []; const queue = [{ absoluteDir: rootDir, relativeDir: "", depth: 0 }]; const visitedDirectories = new Set(); + let rootRealPath; try { - visitedDirectories.add(fs.realpathSync.native(rootDir)); + rootRealPath = fs.realpathSync.native(rootDir); } catch { - visitedDirectories.add(rootDir); + rootRealPath = rootDir; } + visitedDirectories.add(rootRealPath); while (queue.length > 0) { const current = queue.shift(); @@ -69,12 +76,17 @@ function walkRepoFiles(rootDir, options = {}) { continue; } if (entry.isSymbolicLink()) { + let realPath; let stat; try { + realPath = fs.realpathSync.native(absolutePath); stat = fs.statSync(absolutePath); } catch { continue; } + if (!isPathWithinRoot(rootRealPath, realPath)) { + continue; + } if (stat.isFile()) { results.push(relativePath); } @@ -173,6 +185,10 @@ function fileStem(relativePath) { return extension ? baseName.slice(0, -extension.length) : baseName; } +function escapeRegExp(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + function inferPreferredJavascriptTestExtension(testFiles) { const counts = new Map(); for (const file of testFiles) { @@ -191,21 +207,23 @@ function buildTestFileCandidates(relativePath, testFiles, primaryLocations) { const normalized = relativePath.replace(/\\/g, "/"); const baseName = path.basename(normalized).toLowerCase(); const stem = fileStem(normalized); + const loweredStem = stem.toLowerCase(); const extension = extnamePreservingDeclaration(normalized).toLowerCase(); const dirName = path.posix.dirname(normalized); + const exactJavascriptTestPattern = new RegExp(`^${escapeRegExp(loweredStem)}\\.(?:test|spec)\\.`); const directMatches = testFiles.filter((candidate) => { const candidateBaseName = path.basename(candidate).toLowerCase(); if (candidateBaseName === baseName) { return true; } - if (candidateBaseName.includes(`${stem.toLowerCase()}.test.`) || candidateBaseName.includes(`${stem.toLowerCase()}.spec.`)) { + if (exactJavascriptTestPattern.test(candidateBaseName)) { return true; } - if (candidateBaseName === `test_${stem.toLowerCase()}.py`) { + if (candidateBaseName === `test_${loweredStem}.py`) { return true; } - if (candidateBaseName === `${stem.toLowerCase()}_test.go`) { + if (candidateBaseName === `${loweredStem}_test.go`) { return true; } return false; diff --git a/tests/git.test.mjs b/tests/git.test.mjs index ac408f6e..b7b8cc57 100644 --- a/tests/git.test.mjs +++ b/tests/git.test.mjs @@ -199,3 +199,45 @@ test("collectTestCommandContext ignores symlinked test directories outside the r assert.throws(() => collectTestCommandContext(cwd), /No test layout detected/i); }); + +test("collectTestCommandContext ignores symlinked guidance files outside the repo", () => { + const cwd = makeTempDir(); + const externalDir = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "src")); + fs.mkdirSync(path.join(cwd, "tests")); + fs.writeFileSync(path.join(cwd, "src", "app.js"), "export const value = 1;\n"); + fs.writeFileSync(path.join(cwd, "tests", "app.test.mjs"), "import test from 'node:test';\n"); + fs.writeFileSync(path.join(externalDir, "README.md"), "# external guidance\n"); + fs.symlinkSync(path.join(externalDir, "README.md"), path.join(cwd, "README.md")); + run("git", ["add", "README.md", "src/app.js", "tests/app.test.mjs"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "src", "app.js"), "export const value = 2;\n"); + + assert.throws( + () => collectTestCommandContext(cwd), + /No project guidance found: expected at least one of CLAUDE\.md, AGENTS\.md, README\.md\./i + ); +}); + +test("collectTestCommandContext matches javascript test stems by boundary", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "src")); + fs.mkdirSync(path.join(cwd, "tests")); + fs.writeFileSync(path.join(cwd, "README.md"), "# Sample project\n"); + fs.writeFileSync(path.join(cwd, "src", "id.js"), "export const id = 1;\n"); + fs.writeFileSync(path.join(cwd, "tests", "userid.test.js"), "test('userid', () => {});\n"); + run("git", ["add", "README.md", "src/id.js", "tests/userid.test.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "src", "id.js"), "export const id = 2;\n"); + + const context = collectTestCommandContext(cwd); + + assert.deepEqual(context.testPlanEntries, [ + { + sourcePath: "src/id.js", + targets: [{ path: "tests/id.test.js", action: "create" }] + } + ]); +}); From 4993510a4e2c72f724e5a6c59e7e5ff5e9d0c360 Mon Sep 17 00:00:00 2001 From: qiankunli Date: Sat, 11 Apr 2026 19:19:22 +0800 Subject: [PATCH 05/15] fix: scope test targets within package roots --- plugins/codex/scripts/lib/test-context.mjs | 91 +++++++++++++++++++--- tests/git.test.mjs | 50 ++++++++++++ 2 files changed, 132 insertions(+), 9 deletions(-) diff --git a/plugins/codex/scripts/lib/test-context.mjs b/plugins/codex/scripts/lib/test-context.mjs index cee3dcc9..39df90eb 100644 --- a/plugins/codex/scripts/lib/test-context.mjs +++ b/plugins/codex/scripts/lib/test-context.mjs @@ -203,14 +203,85 @@ function inferPreferredJavascriptTestExtension(testFiles) { return ranked[0]?.[0] ?? ".js"; } +function longestSharedPrefixLength(left, right) { + const length = Math.min(left.length, right.length); + let index = 0; + while (index < length && left[index] === right[index]) { + index += 1; + } + return index; +} + +function getPrimaryLocationScopeParts(location) { + const parts = location.replace(/\\/g, "/").split("/"); + const testDirIndex = parts.findIndex((part) => TEST_DIR_NAMES.has(part.toLowerCase())); + return testDirIndex >= 0 ? parts.slice(0, testDirIndex) : parts; +} + +function rankPrimaryLocationsForSource(relativePath, primaryLocations) { + const normalized = relativePath.replace(/\\/g, "/"); + const sourceDir = path.posix.dirname(normalized); + const sourceDirParts = sourceDir === "." ? [] : sourceDir.split("/"); + const ranked = primaryLocations + .map((location) => { + const scopeParts = getPrimaryLocationScopeParts(location); + const sharedPrefixLength = longestSharedPrefixLength(scopeParts, sourceDirParts); + const scopeMatchesSource = + scopeParts.length <= sourceDirParts.length && scopeParts.every((part, index) => sourceDirParts[index] === part); + return { + location, + scopeParts, + sharedPrefixLength, + scopeMatchesSource + }; + }) + .sort((left, right) => { + return ( + Number(right.scopeMatchesSource) - Number(left.scopeMatchesSource) || + right.sharedPrefixLength - left.sharedPrefixLength || + right.scopeParts.length - left.scopeParts.length || + left.location.localeCompare(right.location) + ); + }); + if (ranked.length === 0) { + return []; + } + const best = ranked[0]; + return ranked + .filter( + (location) => + location.scopeMatchesSource === best.scopeMatchesSource && + location.sharedPrefixLength === best.sharedPrefixLength && + location.scopeParts.length === best.scopeParts.length + ) + .map((location) => location.location); +} + +function relativeDirUnderSourceRoot(relativePath) { + const normalized = relativePath.replace(/\\/g, "/"); + const dirName = path.posix.dirname(normalized); + if (dirName === ".") { + return []; + } + const dirParts = dirName.split("/"); + const sourceRootIndex = dirParts.findIndex((part) => SOURCE_ROOT_NAMES.has(part.toLowerCase())); + if (sourceRootIndex >= 0) { + return dirParts.slice(sourceRootIndex + 1); + } + return dirParts; +} + function buildTestFileCandidates(relativePath, testFiles, primaryLocations) { const normalized = relativePath.replace(/\\/g, "/"); const baseName = path.basename(normalized).toLowerCase(); const stem = fileStem(normalized); const loweredStem = stem.toLowerCase(); const extension = extnamePreservingDeclaration(normalized).toLowerCase(); - const dirName = path.posix.dirname(normalized); const exactJavascriptTestPattern = new RegExp(`^${escapeRegExp(loweredStem)}\\.(?:test|spec)\\.`); + // In monorepos, keep both direct matches and new test targets inside the + // nearest package-local test root so /codex:test does not spill across packages. + const rankedPrimaryLocations = rankPrimaryLocationsForSource(normalized, primaryLocations); + const preferredRoots = new Set(rankedPrimaryLocations); const directMatches = testFiles.filter((candidate) => { const candidateBaseName = path.basename(candidate).toLowerCase(); @@ -229,15 +300,21 @@ function buildTestFileCandidates(relativePath, testFiles, primaryLocations) { return false; }); if (directMatches.length > 0) { - return uniqueSorted(directMatches).map((candidate) => ({ path: candidate, action: "update" })); + const scopedMatches = + preferredRoots.size === 0 + ? directMatches + : directMatches.filter((candidate) => [...preferredRoots].some((root) => candidate === root || candidate.startsWith(`${root}/`))); + const effectiveMatches = scopedMatches.length > 0 ? scopedMatches : directMatches; + return uniqueSorted(effectiveMatches).map((candidate) => ({ path: candidate, action: "update" })); } if (extension === ".go") { + const dirName = path.posix.dirname(normalized); return [{ path: path.posix.join(dirName, `${stem}_test.go`), action: "create" }]; } if (extension === ".py") { - const preferredRoot = primaryLocations.find((location) => TEST_DIR_NAMES.has(path.posix.basename(location).toLowerCase())); + const preferredRoot = rankedPrimaryLocations[0]; if (!preferredRoot) { return []; } @@ -245,16 +322,12 @@ function buildTestFileCandidates(relativePath, testFiles, primaryLocations) { } if ([".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"].includes(extension)) { - const preferredRoot = - primaryLocations.find((location) => TEST_DIR_NAMES.has(path.posix.basename(location).toLowerCase())) ?? - primaryLocations[0]; + const preferredRoot = rankedPrimaryLocations[0]; if (!preferredRoot) { return []; } const preferredExtension = inferPreferredJavascriptTestExtension(testFiles) || extension || ".js"; - const relativeParts = normalized.split("/"); - const withoutFile = relativeParts.slice(0, -1); - const strippedParts = SOURCE_ROOT_NAMES.has(withoutFile[0]?.toLowerCase()) ? withoutFile.slice(1) : withoutFile; + const strippedParts = relativeDirUnderSourceRoot(normalized); const candidateDir = path.posix.join(preferredRoot, ...strippedParts); return [{ path: path.posix.join(candidateDir, `${stem}.test${preferredExtension}`), action: "create" }]; } diff --git a/tests/git.test.mjs b/tests/git.test.mjs index b7b8cc57..940c3193 100644 --- a/tests/git.test.mjs +++ b/tests/git.test.mjs @@ -241,3 +241,53 @@ test("collectTestCommandContext matches javascript test stems by boundary", () = } ]); }); + +test("collectTestCommandContext creates JS tests under the nearest package-local test root", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "packages", "a", "tests"), { recursive: true }); + fs.mkdirSync(path.join(cwd, "packages", "b", "src"), { recursive: true }); + fs.mkdirSync(path.join(cwd, "packages", "b", "tests"), { recursive: true }); + fs.writeFileSync(path.join(cwd, "README.md"), "# monorepo\n"); + fs.writeFileSync(path.join(cwd, "packages", "a", "tests", "shared.test.js"), "test('a', () => {});\n"); + fs.writeFileSync(path.join(cwd, "packages", "b", "tests", "existing.test.js"), "test('b', () => {});\n"); + fs.writeFileSync(path.join(cwd, "packages", "b", "src", "new.js"), "export const value = 1;\n"); + run("git", ["add", "."], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "packages", "b", "src", "new.js"), "export const value = 2;\n"); + + const context = collectTestCommandContext(cwd); + + assert.deepEqual(context.testPlanEntries, [ + { + sourcePath: "packages/b/src/new.js", + targets: [{ path: "packages/b/tests/new.test.js", action: "create" }] + } + ]); +}); + +test("collectTestCommandContext scopes direct JS test matches to the nearest package", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "packages", "a", "src"), { recursive: true }); + fs.mkdirSync(path.join(cwd, "packages", "a", "tests"), { recursive: true }); + fs.mkdirSync(path.join(cwd, "packages", "b", "src"), { recursive: true }); + fs.mkdirSync(path.join(cwd, "packages", "b", "tests"), { recursive: true }); + fs.writeFileSync(path.join(cwd, "README.md"), "# monorepo\n"); + fs.writeFileSync(path.join(cwd, "packages", "a", "src", "id.js"), "export const id = 'a';\n"); + fs.writeFileSync(path.join(cwd, "packages", "a", "tests", "id.test.js"), "test('a', () => {});\n"); + fs.writeFileSync(path.join(cwd, "packages", "b", "src", "id.js"), "export const id = 'b';\n"); + fs.writeFileSync(path.join(cwd, "packages", "b", "tests", "id.test.js"), "test('b', () => {});\n"); + run("git", ["add", "."], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "packages", "b", "src", "id.js"), "export const id = 'b2';\n"); + + const context = collectTestCommandContext(cwd); + + assert.deepEqual(context.testPlanEntries, [ + { + sourcePath: "packages/b/src/id.js", + targets: [{ path: "packages/b/tests/id.test.js", action: "update" }] + } + ]); +}); From 25d556d8041cfeab4eca99ac5ea9b5e0bb79cc8b Mon Sep 17 00:00:00 2001 From: qiankunli Date: Sat, 11 Apr 2026 20:23:01 +0800 Subject: [PATCH 06/15] fix: ignore deleted sources in test planning --- plugins/codex/scripts/lib/test-context.mjs | 10 ++++-- tests/git.test.mjs | 37 ++++++++++++++++++++++ 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/plugins/codex/scripts/lib/test-context.mjs b/plugins/codex/scripts/lib/test-context.mjs index 39df90eb..aa113caa 100644 --- a/plugins/codex/scripts/lib/test-context.mjs +++ b/plugins/codex/scripts/lib/test-context.mjs @@ -318,7 +318,8 @@ function buildTestFileCandidates(relativePath, testFiles, primaryLocations) { if (!preferredRoot) { return []; } - return [{ path: path.posix.join(preferredRoot, `test_${stem}.py`), action: "create" }]; + const strippedParts = relativeDirUnderSourceRoot(normalized); + return [{ path: path.posix.join(preferredRoot, ...strippedParts, `test_${stem}.py`), action: "create" }]; } if ([".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"].includes(extension)) { @@ -335,9 +336,12 @@ function buildTestFileCandidates(relativePath, testFiles, primaryLocations) { return []; } -function inferTestPlan(changedFiles, testFiles, primaryLocations) { +function inferTestPlan(repoRoot, changedFiles, testFiles, primaryLocations) { const entries = []; for (const relativePath of changedFiles) { + if (!fs.existsSync(path.join(repoRoot, relativePath))) { + continue; + } if (isTestFile(relativePath)) { entries.push({ sourcePath: relativePath, @@ -464,7 +468,7 @@ export function collectTestCommandContext(cwd, options = {}) { } const productionFiles = reviewContext.changedFiles.filter((relativePath) => !isTestFile(relativePath)); - const testPlanEntries = inferTestPlan(productionFiles, testFiles, primaryLocations); + const testPlanEntries = inferTestPlan(repoRoot, productionFiles, testFiles, primaryLocations); if (testPlanEntries.length === 0) { throw new Error("Unable to infer test targets from changed files."); } diff --git a/tests/git.test.mjs b/tests/git.test.mjs index 940c3193..5aa84360 100644 --- a/tests/git.test.mjs +++ b/tests/git.test.mjs @@ -291,3 +291,40 @@ test("collectTestCommandContext scopes direct JS test matches to the nearest pac } ]); }); + +test("collectTestCommandContext skips deleted source files when inferring targets", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "src")); + fs.mkdirSync(path.join(cwd, "tests")); + fs.writeFileSync(path.join(cwd, "README.md"), "# Sample project\n"); + fs.writeFileSync(path.join(cwd, "src", "old.js"), "export const value = 1;\n"); + fs.writeFileSync(path.join(cwd, "tests", "old.test.js"), "test('old', () => {});\n"); + run("git", ["add", "."], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.rmSync(path.join(cwd, "src", "old.js")); + + assert.throws(() => collectTestCommandContext(cwd), /Unable to infer test targets from changed files/i); +}); + +test("collectTestCommandContext preserves source subdirectories for new Python tests", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "src", "pkg"), { recursive: true }); + fs.mkdirSync(path.join(cwd, "tests", "pkg"), { recursive: true }); + fs.writeFileSync(path.join(cwd, "README.md"), "# python project\n"); + fs.writeFileSync(path.join(cwd, "tests", "pkg", "test_existing.py"), "def test_existing():\n assert True\n"); + fs.writeFileSync(path.join(cwd, "src", "pkg", "foo.py"), "VALUE = 1\n"); + run("git", ["add", "."], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "src", "pkg", "foo.py"), "VALUE = 2\n"); + + const context = collectTestCommandContext(cwd); + + assert.deepEqual(context.testPlanEntries, [ + { + sourcePath: "src/pkg/foo.py", + targets: [{ path: "tests/pkg/test_foo.py", action: "create" }] + } + ]); +}); From a7365f42094fe553585210c9aee943536a415a18 Mon Sep 17 00:00:00 2001 From: qiankunli Date: Sat, 11 Apr 2026 20:47:26 +0800 Subject: [PATCH 07/15] fix: cap codex test guidance context --- README.md | 3 + docs/codex-test-design.md | 83 ++++++++++++++++++++++ plugins/codex/scripts/lib/test-context.mjs | 33 ++++++--- tests/git.test.mjs | 28 ++++++++ 4 files changed, 139 insertions(+), 8 deletions(-) create mode 100644 docs/codex-test-design.md diff --git a/README.md b/README.md index 401ce9b0..6dce9c48 100644 --- a/README.md +++ b/README.md @@ -153,6 +153,9 @@ By default, Codex should: - update or create the matching tests - avoid modifying production code by default +Implementation and maintenance notes for `/codex:test` live in +[`docs/codex-test-design.md`](./docs/codex-test-design.md). + ### `/codex:rescue` Hands a task to Codex through the `codex:codex-rescue` subagent. diff --git a/docs/codex-test-design.md b/docs/codex-test-design.md new file mode 100644 index 00000000..11d103a5 --- /dev/null +++ b/docs/codex-test-design.md @@ -0,0 +1,83 @@ +# `/codex:test` Design Notes + +This note summarizes the stable design constraints behind `/codex:test`. +It is intended for maintainers who need to evolve the test-planning pipeline +without re-learning the failure modes from past review cycles. + +## Core Principles + +1. Fail closed when required context is missing. + +`/codex:test` should stop rather than guess when it cannot gather enough +repository context. Missing project guidance, missing test layout, or missing +test targets should be treated as hard failures instead of soft fallbacks. + +2. Keep repository context inside the repository boundary. + +Repo walking must not escape `repoRoot`. Symlinked directories are skipped, +and symlinked files are only eligible when their realpath still stays under +the repository root. This prevents unrelated files or host secrets from being +pulled into the prompt. + +3. Bound the prompt budget globally, not only per file. + +Project guidance is useful, but unbounded guidance collection makes `/codex:test` +fragile in monorepos. Guidance files are prioritized and then capped by both a +small file-count limit and a total byte budget, with shallow high-priority files +winning over deep package-local READMEs. + +4. Treat self-collected diff context as a first-class mode. + +When the diff is too large to inline, the prompt must still tell Codex how to +collect the missing patch context with read-only git commands. Large changes +should degrade to a lighter summary, not to silent loss of guidance. + +5. Only infer tests from live source files. + +Changed-path lists can include deleted files. Deletion-only changes should not +cause `/codex:test` to propose creating brand-new tests for removed code, so +planning must ignore source paths that no longer exist in the working tree. + +## Test Target Selection + +1. Prefer the nearest package-local test root. + +In monorepos, test planning should stay inside the package or module that owns +the changed source file. When a direct match is missing, new test targets should +be created under the nearest compatible test root instead of the first `tests/` +directory discovered anywhere in the repository. + +2. Scope direct matches by locality, not basename alone. + +Two packages can legitimately contain the same test basename such as +`id.test.js`. Basename matches are only safe after they have been narrowed to +the nearest package-local test root. Otherwise `/codex:test` may edit tests in +an unrelated package. + +3. Preserve source subdirectories in created test paths. + +When a new test file is created, the path should preserve the source structure +after the language-specific source root. For example: + +- `src/pkg/foo.py -> tests/pkg/test_foo.py` +- `packages/b/src/new.js -> packages/b/tests/new.test.js` + +Flattening nested paths causes collisions across modules with the same stem and +makes the planned test target drift away from the changed code. + +4. Match existing tests conservatively. + +Substring-based matching is too loose. `id` should not match `userid.test.js`, +and similarly named files in sibling packages should not be pulled into the same +plan. Matching should optimize for "smallest safe target set", even if that +means falling back to creating a new test file more often. + +## Maintenance Notes + +- If you loosen repo-walk or symlink behavior, add tests that prove prompt + inputs still stay under `repoRoot`. +- If you change guidance selection, keep both a file-count cap and a total-byte + cap unless there is a stronger replacement. +- If you change path inference, add monorepo fixtures that cover both direct + matches and create-path planning. +- If you change diff collection, verify both inline-diff and self-collect modes. diff --git a/plugins/codex/scripts/lib/test-context.mjs b/plugins/codex/scripts/lib/test-context.mjs index aa113caa..0987813e 100644 --- a/plugins/codex/scripts/lib/test-context.mjs +++ b/plugins/codex/scripts/lib/test-context.mjs @@ -5,6 +5,8 @@ import { collectReviewContext, getRepoRoot, resolveReviewTarget } from "./git.mj const GUIDANCE_PREFERENCE = ["claude.md", "agents.md", "readme.md"]; const GUIDANCE_LIMIT_BYTES = 24 * 1024; +const GUIDANCE_TOTAL_LIMIT_BYTES = 48 * 1024; +const GUIDANCE_MAX_FILES = 6; const MAX_GUIDANCE_DEPTH = 3; const TEST_COMMAND_LIMIT = 6; const WALK_SKIP_DIRS = new Set([ @@ -116,7 +118,8 @@ function readTrimmedFile(repoRoot, relativePath, maxBytes = GUIDANCE_LIMIT_BYTES function guidanceSortKey(relativePath) { const baseName = path.basename(relativePath).toLowerCase(); const preferenceIndex = GUIDANCE_PREFERENCE.indexOf(baseName); - return [preferenceIndex === -1 ? GUIDANCE_PREFERENCE.length : preferenceIndex, relativePath]; + const depth = relativePath.split("/").length - 1; + return [preferenceIndex === -1 ? GUIDANCE_PREFERENCE.length : preferenceIndex, depth, relativePath]; } function collectGuidanceFiles(repoRoot) { @@ -124,15 +127,29 @@ function collectGuidanceFiles(repoRoot) { const matches = repoFiles .filter((relativePath) => GUIDANCE_PREFERENCE.includes(path.basename(relativePath).toLowerCase())) .sort((left, right) => { - const [leftIndex, leftPath] = guidanceSortKey(left); - const [rightIndex, rightPath] = guidanceSortKey(right); - return leftIndex - rightIndex || leftPath.localeCompare(rightPath); + const [leftIndex, leftDepth, leftPath] = guidanceSortKey(left); + const [rightIndex, rightDepth, rightPath] = guidanceSortKey(right); + return leftIndex - rightIndex || leftDepth - rightDepth || leftPath.localeCompare(rightPath); }); + const guidanceFiles = []; + let totalBytes = 0; + for (const relativePath of matches) { + if (guidanceFiles.length >= GUIDANCE_MAX_FILES) { + break; + } + const content = readTrimmedFile(repoRoot, relativePath); + const contentBytes = Buffer.byteLength(content, "utf8"); + if (guidanceFiles.length > 0 && totalBytes + contentBytes > GUIDANCE_TOTAL_LIMIT_BYTES) { + continue; + } + guidanceFiles.push({ + path: relativePath, + content + }); + totalBytes += contentBytes; + } - return matches.map((relativePath) => ({ - path: relativePath, - content: readTrimmedFile(repoRoot, relativePath) - })); + return guidanceFiles; } function isTestFile(relativePath) { diff --git a/tests/git.test.mjs b/tests/git.test.mjs index 5aa84360..12c628cb 100644 --- a/tests/git.test.mjs +++ b/tests/git.test.mjs @@ -328,3 +328,31 @@ test("collectTestCommandContext preserves source subdirectories for new Python t } ]); }); + +test("collectTestCommandContext caps guidance files in large monorepos", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "src")); + fs.mkdirSync(path.join(cwd, "tests")); + fs.writeFileSync(path.join(cwd, "AGENTS.md"), "# root agents\n"); + fs.writeFileSync(path.join(cwd, "README.md"), "# root readme\n"); + fs.writeFileSync(path.join(cwd, "src", "app.js"), "export const value = 1;\n"); + fs.writeFileSync(path.join(cwd, "tests", "app.test.js"), "test('app', () => {});\n"); + for (let index = 0; index < 10; index += 1) { + const packageDir = path.join(cwd, "packages", `pkg-${index}`); + fs.mkdirSync(packageDir, { recursive: true }); + fs.writeFileSync(path.join(packageDir, "README.md"), `# package ${index}\n${"docs\n".repeat(1024)}`); + } + run("git", ["add", "."], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "src", "app.js"), "export const value = 2;\n"); + + const context = collectTestCommandContext(cwd); + + assert.ok(context.guidanceFiles.length < 12); + assert.deepEqual( + context.guidanceFiles.slice(0, 2).map((file) => file.path), + ["AGENTS.md", "README.md"] + ); + assert.ok(!context.guidanceFiles.some((file) => file.path === "packages/pkg-9/README.md")); +}); From 58f78aa0e539f05d66688c33b6565aef2929d642 Mon Sep 17 00:00:00 2001 From: qiankunli Date: Sun, 12 Apr 2026 00:23:56 +0800 Subject: [PATCH 08/15] fix: fail closed for ambiguous test roots --- docs/codex-test-design.md | 4 +++- plugins/codex/scripts/lib/test-context.mjs | 3 +++ tests/git.test.mjs | 17 +++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/docs/codex-test-design.md b/docs/codex-test-design.md index 11d103a5..737bec4e 100644 --- a/docs/codex-test-design.md +++ b/docs/codex-test-design.md @@ -45,7 +45,9 @@ planning must ignore source paths that no longer exist in the working tree. In monorepos, test planning should stay inside the package or module that owns the changed source file. When a direct match is missing, new test targets should be created under the nearest compatible test root instead of the first `tests/` -directory discovered anywhere in the repository. +directory discovered anywhere in the repository. If no detected test root +actually belongs to the changed source's package, `/codex:test` should fail +closed instead of selecting the closest-looking package by shared path prefix. 2. Scope direct matches by locality, not basename alone. diff --git a/plugins/codex/scripts/lib/test-context.mjs b/plugins/codex/scripts/lib/test-context.mjs index 0987813e..ec01674c 100644 --- a/plugins/codex/scripts/lib/test-context.mjs +++ b/plugins/codex/scripts/lib/test-context.mjs @@ -264,6 +264,9 @@ function rankPrimaryLocationsForSource(relativePath, primaryLocations) { return []; } const best = ranked[0]; + if (!best.scopeMatchesSource) { + return []; + } return ranked .filter( (location) => diff --git a/tests/git.test.mjs b/tests/git.test.mjs index 12c628cb..854bcf3d 100644 --- a/tests/git.test.mjs +++ b/tests/git.test.mjs @@ -356,3 +356,20 @@ test("collectTestCommandContext caps guidance files in large monorepos", () => { ); assert.ok(!context.guidanceFiles.some((file) => file.path === "packages/pkg-9/README.md")); }); + +test("collectTestCommandContext fails closed when no package-local test root matches the source", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "tools"), { recursive: true }); + fs.mkdirSync(path.join(cwd, "packages", "a", "tests"), { recursive: true }); + fs.mkdirSync(path.join(cwd, "packages", "b", "tests"), { recursive: true }); + fs.writeFileSync(path.join(cwd, "README.md"), "# monorepo\n"); + fs.writeFileSync(path.join(cwd, "tools", "gen.js"), "export const generate = () => 1;\n"); + fs.writeFileSync(path.join(cwd, "packages", "a", "tests", "a.test.js"), "test('a', () => {});\n"); + fs.writeFileSync(path.join(cwd, "packages", "b", "tests", "b.test.js"), "test('b', () => {});\n"); + run("git", ["add", "."], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "tools", "gen.js"), "export const generate = () => 2;\n"); + + assert.throws(() => collectTestCommandContext(cwd), /Unable to infer test targets from changed files/i); +}); From fd4ca74c55ff259dff465eb800a0fdffa7be05f7 Mon Sep 17 00:00:00 2001 From: qiankunli Date: Sun, 12 Apr 2026 00:36:05 +0800 Subject: [PATCH 09/15] fix: scope JS test extensions by package root --- plugins/codex/scripts/lib/test-context.mjs | 11 ++++++--- tests/git.test.mjs | 26 ++++++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/plugins/codex/scripts/lib/test-context.mjs b/plugins/codex/scripts/lib/test-context.mjs index ec01674c..27317fa5 100644 --- a/plugins/codex/scripts/lib/test-context.mjs +++ b/plugins/codex/scripts/lib/test-context.mjs @@ -206,9 +206,14 @@ function escapeRegExp(value) { return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } -function inferPreferredJavascriptTestExtension(testFiles) { +function inferPreferredJavascriptTestExtension(testFiles, options = {}) { + const scopeRoots = Array.isArray(options.scopeRoots) ? options.scopeRoots.filter(Boolean) : []; + const scopedFiles = + scopeRoots.length === 0 + ? testFiles + : testFiles.filter((file) => scopeRoots.some((root) => file === root || file.startsWith(`${root}/`))); const counts = new Map(); - for (const file of testFiles) { + for (const file of scopedFiles) { const baseName = path.basename(file).toLowerCase(); if (!baseName.includes(".test.") && !baseName.includes(".spec.")) { continue; @@ -347,7 +352,7 @@ function buildTestFileCandidates(relativePath, testFiles, primaryLocations) { if (!preferredRoot) { return []; } - const preferredExtension = inferPreferredJavascriptTestExtension(testFiles) || extension || ".js"; + const preferredExtension = inferPreferredJavascriptTestExtension(testFiles, { scopeRoots: [preferredRoot] }) || extension || ".js"; const strippedParts = relativeDirUnderSourceRoot(normalized); const candidateDir = path.posix.join(preferredRoot, ...strippedParts); return [{ path: path.posix.join(candidateDir, `${stem}.test${preferredExtension}`), action: "create" }]; diff --git a/tests/git.test.mjs b/tests/git.test.mjs index 854bcf3d..5c219125 100644 --- a/tests/git.test.mjs +++ b/tests/git.test.mjs @@ -373,3 +373,29 @@ test("collectTestCommandContext fails closed when no package-local test root mat assert.throws(() => collectTestCommandContext(cwd), /Unable to infer test targets from changed files/i); }); + +test("collectTestCommandContext infers JS test extensions from the selected package root", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "packages", "a", "src"), { recursive: true }); + fs.mkdirSync(path.join(cwd, "packages", "a", "tests"), { recursive: true }); + fs.mkdirSync(path.join(cwd, "packages", "b", "src"), { recursive: true }); + fs.mkdirSync(path.join(cwd, "packages", "b", "tests"), { recursive: true }); + fs.writeFileSync(path.join(cwd, "README.md"), "# mixed conventions\n"); + fs.writeFileSync(path.join(cwd, "packages", "a", "tests", "alpha.test.ts"), "test('alpha', () => {});\n"); + fs.writeFileSync(path.join(cwd, "packages", "a", "tests", "beta.test.ts"), "test('beta', () => {});\n"); + fs.writeFileSync(path.join(cwd, "packages", "b", "tests", "existing.test.js"), "test('existing', () => {});\n"); + fs.writeFileSync(path.join(cwd, "packages", "b", "src", "new.js"), "export const value = 1;\n"); + run("git", ["add", "."], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "packages", "b", "src", "new.js"), "export const value = 2;\n"); + + const context = collectTestCommandContext(cwd); + + assert.deepEqual(context.testPlanEntries, [ + { + sourcePath: "packages/b/src/new.js", + targets: [{ path: "packages/b/tests/new.test.js", action: "create" }] + } + ]); +}); From fb1574e50100b3325100f0eb4ff0aa78b1d17440 Mon Sep 17 00:00:00 2001 From: qiankunli Date: Sun, 12 Apr 2026 11:41:07 +0800 Subject: [PATCH 10/15] fix: build test paths from selected roots --- plugins/codex/scripts/lib/test-context.mjs | 15 ++++---- tests/git.test.mjs | 42 ++++++++++++++++++++++ 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/plugins/codex/scripts/lib/test-context.mjs b/plugins/codex/scripts/lib/test-context.mjs index 27317fa5..875c3595 100644 --- a/plugins/codex/scripts/lib/test-context.mjs +++ b/plugins/codex/scripts/lib/test-context.mjs @@ -282,18 +282,21 @@ function rankPrimaryLocationsForSource(relativePath, primaryLocations) { .map((location) => location.location); } -function relativeDirUnderSourceRoot(relativePath) { +function relativeDirWithinSelectedTestRoot(relativePath, preferredRoot) { const normalized = relativePath.replace(/\\/g, "/"); const dirName = path.posix.dirname(normalized); if (dirName === ".") { return []; } const dirParts = dirName.split("/"); - const sourceRootIndex = dirParts.findIndex((part) => SOURCE_ROOT_NAMES.has(part.toLowerCase())); + const scopeParts = preferredRoot ? getPrimaryLocationScopeParts(preferredRoot) : []; + const relativeToScope = + scopeParts.length > 0 && scopeParts.every((part, index) => dirParts[index] === part) ? dirParts.slice(scopeParts.length) : dirParts; + const sourceRootIndex = relativeToScope.findIndex((part) => SOURCE_ROOT_NAMES.has(part.toLowerCase())); if (sourceRootIndex >= 0) { - return dirParts.slice(sourceRootIndex + 1); + return relativeToScope.slice(sourceRootIndex + 1); } - return dirParts; + return relativeToScope; } function buildTestFileCandidates(relativePath, testFiles, primaryLocations) { @@ -343,7 +346,7 @@ function buildTestFileCandidates(relativePath, testFiles, primaryLocations) { if (!preferredRoot) { return []; } - const strippedParts = relativeDirUnderSourceRoot(normalized); + const strippedParts = relativeDirWithinSelectedTestRoot(normalized, preferredRoot); return [{ path: path.posix.join(preferredRoot, ...strippedParts, `test_${stem}.py`), action: "create" }]; } @@ -353,7 +356,7 @@ function buildTestFileCandidates(relativePath, testFiles, primaryLocations) { return []; } const preferredExtension = inferPreferredJavascriptTestExtension(testFiles, { scopeRoots: [preferredRoot] }) || extension || ".js"; - const strippedParts = relativeDirUnderSourceRoot(normalized); + const strippedParts = relativeDirWithinSelectedTestRoot(normalized, preferredRoot); const candidateDir = path.posix.join(preferredRoot, ...strippedParts); return [{ path: path.posix.join(candidateDir, `${stem}.test${preferredExtension}`), action: "create" }]; } diff --git a/tests/git.test.mjs b/tests/git.test.mjs index 5c219125..3241b222 100644 --- a/tests/git.test.mjs +++ b/tests/git.test.mjs @@ -399,3 +399,45 @@ test("collectTestCommandContext infers JS test extensions from the selected pack } ]); }); + +test("collectTestCommandContext builds JS test paths relative to the selected test root", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "packages", "b", "tests"), { recursive: true }); + fs.writeFileSync(path.join(cwd, "README.md"), "# package layout without src\n"); + fs.writeFileSync(path.join(cwd, "packages", "b", "tests", "existing.test.js"), "test('existing', () => {});\n"); + fs.writeFileSync(path.join(cwd, "packages", "b", "new.js"), "export const value = 1;\n"); + run("git", ["add", "."], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "packages", "b", "new.js"), "export const value = 2;\n"); + + const context = collectTestCommandContext(cwd); + + assert.deepEqual(context.testPlanEntries, [ + { + sourcePath: "packages/b/new.js", + targets: [{ path: "packages/b/tests/new.test.js", action: "create" }] + } + ]); +}); + +test("collectTestCommandContext builds Python test paths relative to the selected test root", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "packages", "b", "tests"), { recursive: true }); + fs.writeFileSync(path.join(cwd, "README.md"), "# python package layout without src\n"); + fs.writeFileSync(path.join(cwd, "packages", "b", "tests", "test_existing.py"), "def test_existing():\n assert True\n"); + fs.writeFileSync(path.join(cwd, "packages", "b", "foo.py"), "VALUE = 1\n"); + run("git", ["add", "."], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "packages", "b", "foo.py"), "VALUE = 2\n"); + + const context = collectTestCommandContext(cwd); + + assert.deepEqual(context.testPlanEntries, [ + { + sourcePath: "packages/b/foo.py", + targets: [{ path: "packages/b/tests/test_foo.py", action: "create" }] + } + ]); +}); From 2f5995e9b76cafb28e46590d480a9d2799430ac4 Mon Sep 17 00:00:00 2001 From: qiankunli Date: Sun, 12 Apr 2026 17:27:00 +0800 Subject: [PATCH 11/15] fix: normalize nonstandard test root scopes --- plugins/codex/scripts/lib/test-context.mjs | 13 ++++++- tests/git.test.mjs | 43 ++++++++++++++++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/plugins/codex/scripts/lib/test-context.mjs b/plugins/codex/scripts/lib/test-context.mjs index 875c3595..a33feb98 100644 --- a/plugins/codex/scripts/lib/test-context.mjs +++ b/plugins/codex/scripts/lib/test-context.mjs @@ -235,9 +235,18 @@ function longestSharedPrefixLength(left, right) { } function getPrimaryLocationScopeParts(location) { - const parts = location.replace(/\\/g, "/").split("/"); + const normalized = location.replace(/\\/g, "/"); + if (normalized === ".") { + return []; + } + const parts = normalized.split("/").filter((part) => part !== "."); const testDirIndex = parts.findIndex((part) => TEST_DIR_NAMES.has(part.toLowerCase())); - return testDirIndex >= 0 ? parts.slice(0, testDirIndex) : parts; + if (testDirIndex >= 0) { + return parts.slice(0, testDirIndex); + } + // For nonstandard test roots like "spec/" (or "packages/a/spec"), the + // location itself is the test root, so scope matching should use its parent. + return parts.slice(0, -1); } function rankPrimaryLocationsForSource(relativePath, primaryLocations) { diff --git a/tests/git.test.mjs b/tests/git.test.mjs index 3241b222..1655e965 100644 --- a/tests/git.test.mjs +++ b/tests/git.test.mjs @@ -441,3 +441,46 @@ test("collectTestCommandContext builds Python test paths relative to the selecte } ]); }); + +test("collectTestCommandContext accepts repo-wide spec directories as test roots", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "src"), { recursive: true }); + fs.mkdirSync(path.join(cwd, "spec"), { recursive: true }); + fs.writeFileSync(path.join(cwd, "README.md"), "# spec layout\n"); + fs.writeFileSync(path.join(cwd, "spec", "existing.test.js"), "test('existing', () => {});\n"); + fs.writeFileSync(path.join(cwd, "src", "foo.js"), "export const value = 1;\n"); + run("git", ["add", "."], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "src", "foo.js"), "export const value = 2;\n"); + + const context = collectTestCommandContext(cwd); + + assert.deepEqual(context.testPlanEntries, [ + { + sourcePath: "src/foo.js", + targets: [{ path: "spec/foo.test.js", action: "create" }] + } + ]); +}); + +test("collectTestCommandContext accepts repo-root test files as a valid layout", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "src"), { recursive: true }); + fs.writeFileSync(path.join(cwd, "README.md"), "# root tests\n"); + fs.writeFileSync(path.join(cwd, "foo.test.js"), "test('foo', () => {});\n"); + fs.writeFileSync(path.join(cwd, "src", "bar.js"), "export const value = 1;\n"); + run("git", ["add", "."], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "src", "bar.js"), "export const value = 2;\n"); + + const context = collectTestCommandContext(cwd); + + assert.deepEqual(context.testPlanEntries, [ + { + sourcePath: "src/bar.js", + targets: [{ path: "bar.test.js", action: "create" }] + } + ]); +}); From d6ec03c36b7a4a2eb7a573af916983901951a9eb Mon Sep 17 00:00:00 2001 From: qiankunli Date: Sun, 12 Apr 2026 19:05:09 +0800 Subject: [PATCH 12/15] fix: isolate codex test planning from sibling worktrees --- docs/codex-test-design.md | 6 +++ plugins/codex/scripts/codex-companion.mjs | 8 ++- plugins/codex/scripts/lib/test-context.mjs | 7 +++ tests/git.test.mjs | 20 ++++++++ tests/runtime.test.mjs | 58 ++++++++++++++++++++++ 5 files changed, 97 insertions(+), 2 deletions(-) diff --git a/docs/codex-test-design.md b/docs/codex-test-design.md index 737bec4e..50a1fdda 100644 --- a/docs/codex-test-design.md +++ b/docs/codex-test-design.md @@ -4,6 +4,10 @@ This note summarizes the stable design constraints behind `/codex:test`. It is intended for maintainers who need to evolve the test-planning pipeline without re-learning the failure modes from past review cycles. +A useful review question for this command is: "Could uncertainty here cause +`/codex:test` to write tests in the wrong place, or collect the wrong context?" +Most of the constraints below exist to keep the answer to that question "no." + ## Core Principles 1. Fail closed when required context is missing. @@ -11,6 +15,8 @@ without re-learning the failure modes from past review cycles. `/codex:test` should stop rather than guess when it cannot gather enough repository context. Missing project guidance, missing test layout, or missing test targets should be treated as hard failures instead of soft fallbacks. +When uncertainty would otherwise push the command toward the wrong context or +the wrong target file, failing closed is the intended behavior. 2. Keep repository context inside the repository boundary. diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 7431ab19..ac7d2b83 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -315,11 +315,15 @@ function filterJobsForCurrentClaudeSession(jobs) { return jobs.filter((job) => job.sessionId === sessionId); } +function isResumableRescueTask(job) { + return job.jobClass === "task" && job.kind !== "test"; +} + function findLatestResumableTaskJob(jobs) { return ( jobs.find( (job) => - job.jobClass === "task" && + isResumableRescueTask(job) && job.threadId && job.status !== "queued" && job.status !== "running" @@ -350,7 +354,7 @@ async function resolveLatestTrackedTaskThread(cwd, options = {}) { const sessionId = getCurrentClaudeSessionId(); const jobs = sortJobsNewestFirst(listJobs(workspaceRoot)).filter((job) => job.id !== options.excludeJobId); const visibleJobs = filterJobsForCurrentClaudeSession(jobs); - const activeTask = visibleJobs.find((job) => job.jobClass === "task" && (job.status === "queued" || job.status === "running")); + const activeTask = visibleJobs.find((job) => isResumableRescueTask(job) && (job.status === "queued" || job.status === "running")); if (activeTask) { throw new Error(`Task ${activeTask.id} is still running. Use /codex:status before continuing it.`); } diff --git a/plugins/codex/scripts/lib/test-context.mjs b/plugins/codex/scripts/lib/test-context.mjs index a33feb98..0427c802 100644 --- a/plugins/codex/scripts/lib/test-context.mjs +++ b/plugins/codex/scripts/lib/test-context.mjs @@ -35,6 +35,10 @@ function isPathWithinRoot(rootPath, targetPath) { return relative === "" || (!relative.startsWith("..") && !path.isAbsolute(relative)); } +function hasNestedGitCheckout(absoluteDir) { + return fs.existsSync(path.join(absoluteDir, ".git")); +} + function walkRepoFiles(rootDir, options = {}) { const maxDepth = Number.isInteger(options.maxDepth) ? options.maxDepth : Number.POSITIVE_INFINITY; const results = []; @@ -60,6 +64,9 @@ function walkRepoFiles(rootDir, options = {}) { if (WALK_SKIP_DIRS.has(normalizedName) || current.depth >= maxDepth) { continue; } + if (current.relativeDir && hasNestedGitCheckout(absolutePath)) { + continue; + } let directoryKey; try { directoryKey = fs.realpathSync.native(absolutePath); diff --git a/tests/git.test.mjs b/tests/git.test.mjs index 1655e965..7696f060 100644 --- a/tests/git.test.mjs +++ b/tests/git.test.mjs @@ -200,6 +200,26 @@ test("collectTestCommandContext ignores symlinked test directories outside the r assert.throws(() => collectTestCommandContext(cwd), /No test layout detected/i); }); +test("collectTestCommandContext ignores nested worktree directories inside the workspace", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "src"), { recursive: true }); + fs.writeFileSync(path.join(cwd, "README.md"), "# repo\n"); + fs.writeFileSync(path.join(cwd, "src", "app.js"), "export const value = 1;\n"); + + const nestedWorktreeDir = path.join(cwd, ".claude", "worktrees", "agent-test"); + fs.mkdirSync(nestedWorktreeDir, { recursive: true }); + initGitRepo(nestedWorktreeDir); + fs.mkdirSync(path.join(nestedWorktreeDir, "tests"), { recursive: true }); + fs.writeFileSync(path.join(nestedWorktreeDir, "tests", "app.test.js"), "test('nested', () => {});\n"); + + run("git", ["add", "README.md", "src/app.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "src", "app.js"), "export const value = 2;\n"); + + assert.throws(() => collectTestCommandContext(cwd), /No test layout detected/i); +}); + test("collectTestCommandContext ignores symlinked guidance files outside the repo", () => { const cwd = makeTempDir(); const externalDir = makeTempDir(); diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index 19707cb1..adfff18b 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -579,6 +579,64 @@ test("task-resume-candidate returns the latest rescue thread from the current se assert.equal(payload.candidate.threadId, "thr_current"); }); +test("task-resume-candidate ignores completed /codex:test jobs", () => { + const workspace = makeTempDir(); + const stateDir = resolveStateDir(workspace); + const jobsDir = path.join(stateDir, "jobs"); + fs.mkdirSync(jobsDir, { recursive: true }); + + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify( + { + version: 1, + config: { stopReviewGate: false }, + jobs: [ + { + id: "test-current", + kind: "test", + status: "completed", + title: "Codex Test", + jobClass: "task", + sessionId: "sess-current", + threadId: "thr_test", + summary: "Write tests for working tree diff", + updatedAt: "2026-03-24T20:00:00.000Z" + }, + { + id: "task-current", + kind: "task", + status: "completed", + title: "Codex Task", + jobClass: "task", + sessionId: "sess-current", + threadId: "thr_task", + summary: "Investigate flaky rescue thread", + updatedAt: "2026-03-24T19:55:00.000Z" + } + ] + }, + null, + 2 + )}\n`, + "utf8" + ); + + const result = run("node", [SCRIPT, "task-resume-candidate", "--json"], { + cwd: workspace, + env: { + ...process.env, + CODEX_COMPANION_SESSION_ID: "sess-current" + } + }); + + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.equal(payload.available, true); + assert.equal(payload.candidate.id, "task-current"); + assert.equal(payload.candidate.threadId, "thr_task"); +}); + test("task --resume-last does not resume a task from another Claude session", () => { const repo = makeTempDir(); const binDir = makeTempDir(); From 65d38a2541f1eba1c128fa9b19ead3a6da2eb0c5 Mon Sep 17 00:00:00 2001 From: qiankunli Date: Sun, 12 Apr 2026 22:56:05 +0800 Subject: [PATCH 13/15] fix: prefer explicit test roots on ties --- plugins/codex/scripts/lib/test-context.mjs | 13 +++++++++++- tests/git.test.mjs | 23 ++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/plugins/codex/scripts/lib/test-context.mjs b/plugins/codex/scripts/lib/test-context.mjs index 0427c802..fd1728ee 100644 --- a/plugins/codex/scripts/lib/test-context.mjs +++ b/plugins/codex/scripts/lib/test-context.mjs @@ -256,6 +256,14 @@ function getPrimaryLocationScopeParts(location) { return parts.slice(0, -1); } +function isExplicitTestDirectoryLocation(location) { + const normalized = location.replace(/\\/g, "/"); + if (normalized === ".") { + return false; + } + return normalized.split("/").some((part) => TEST_DIR_NAMES.has(part.toLowerCase())); +} + function rankPrimaryLocationsForSource(relativePath, primaryLocations) { const normalized = relativePath.replace(/\\/g, "/"); const sourceDir = path.posix.dirname(normalized); @@ -270,13 +278,15 @@ function rankPrimaryLocationsForSource(relativePath, primaryLocations) { location, scopeParts, sharedPrefixLength, - scopeMatchesSource + scopeMatchesSource, + explicitTestDirectory: isExplicitTestDirectoryLocation(location) }; }) .sort((left, right) => { return ( Number(right.scopeMatchesSource) - Number(left.scopeMatchesSource) || right.sharedPrefixLength - left.sharedPrefixLength || + Number(right.explicitTestDirectory) - Number(left.explicitTestDirectory) || right.scopeParts.length - left.scopeParts.length || left.location.localeCompare(right.location) ); @@ -293,6 +303,7 @@ function rankPrimaryLocationsForSource(relativePath, primaryLocations) { (location) => location.scopeMatchesSource === best.scopeMatchesSource && location.sharedPrefixLength === best.sharedPrefixLength && + location.explicitTestDirectory === best.explicitTestDirectory && location.scopeParts.length === best.scopeParts.length ) .map((location) => location.location); diff --git a/tests/git.test.mjs b/tests/git.test.mjs index 7696f060..2782449d 100644 --- a/tests/git.test.mjs +++ b/tests/git.test.mjs @@ -504,3 +504,26 @@ test("collectTestCommandContext accepts repo-root test files as a valid layout", } ]); }); + +test("collectTestCommandContext prefers explicit test directories over repo-root test files on ties", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "src"), { recursive: true }); + fs.mkdirSync(path.join(cwd, "tests"), { recursive: true }); + fs.writeFileSync(path.join(cwd, "README.md"), "# mixed root layout\n"); + fs.writeFileSync(path.join(cwd, "foo.test.js"), "test('foo', () => {});\n"); + fs.writeFileSync(path.join(cwd, "tests", "existing.test.js"), "test('existing', () => {});\n"); + fs.writeFileSync(path.join(cwd, "src", "new.js"), "export const value = 1;\n"); + run("git", ["add", "."], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "src", "new.js"), "export const value = 2;\n"); + + const context = collectTestCommandContext(cwd); + + assert.deepEqual(context.testPlanEntries, [ + { + sourcePath: "src/new.js", + targets: [{ path: "tests/new.test.js", action: "create" }] + } + ]); +}); From f9f64e17559f2fe37d1fb41dd5b455146c05be54 Mon Sep 17 00:00:00 2001 From: qiankunli Date: Mon, 13 Apr 2026 10:14:31 +0800 Subject: [PATCH 14/15] fix: ignore support files under test roots --- plugins/codex/scripts/lib/test-context.mjs | 12 +++++------ tests/git.test.mjs | 23 ++++++++++++++++++++++ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/plugins/codex/scripts/lib/test-context.mjs b/plugins/codex/scripts/lib/test-context.mjs index fd1728ee..ddbceb6d 100644 --- a/plugins/codex/scripts/lib/test-context.mjs +++ b/plugins/codex/scripts/lib/test-context.mjs @@ -163,15 +163,15 @@ function isTestFile(relativePath) { const normalized = relativePath.replace(/\\/g, "/"); const baseName = path.basename(normalized).toLowerCase(); const parts = normalized.split("/"); - if (parts.some((part) => TEST_DIR_NAMES.has(part.toLowerCase()))) { - return true; - } - return ( + const matchesTestName = baseName.includes(".test.") || baseName.includes(".spec.") || baseName.startsWith("test_") || - baseName.endsWith("_test.go") - ); + baseName.endsWith("_test.go"); + if (matchesTestName) { + return true; + } + return parts.some((part) => TEST_DIR_NAMES.has(part.toLowerCase())) && matchesTestName; } function uniqueSorted(values) { diff --git a/tests/git.test.mjs b/tests/git.test.mjs index 2782449d..29907b0b 100644 --- a/tests/git.test.mjs +++ b/tests/git.test.mjs @@ -527,3 +527,26 @@ test("collectTestCommandContext prefers explicit test directories over repo-root } ]); }); + +test("collectTestCommandContext ignores support files inside test directories when matching test targets", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "src"), { recursive: true }); + fs.mkdirSync(path.join(cwd, "tests"), { recursive: true }); + fs.writeFileSync(path.join(cwd, "README.md"), "# support files in tests\n"); + fs.writeFileSync(path.join(cwd, "tests", "config.js"), "export const shared = true;\n"); + fs.writeFileSync(path.join(cwd, "tests", "existing.test.js"), "test('existing', () => {});\n"); + fs.writeFileSync(path.join(cwd, "src", "config.js"), "export const value = 1;\n"); + run("git", ["add", "."], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "src", "config.js"), "export const value = 2;\n"); + + const context = collectTestCommandContext(cwd); + + assert.deepEqual(context.testPlanEntries, [ + { + sourcePath: "src/config.js", + targets: [{ path: "tests/config.test.js", action: "create" }] + } + ]); +}); From 6eee0d0f7f338b93f55c5fd0bb0b1ff05c594bab Mon Sep 17 00:00:00 2001 From: qiankunli Date: Mon, 13 Apr 2026 15:42:36 +0800 Subject: [PATCH 15/15] fix: fail closed on nested and cross-scope tests --- plugins/codex/scripts/lib/test-context.mjs | 18 ++++++----- tests/git.test.mjs | 35 ++++++++++++++++++++++ 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/plugins/codex/scripts/lib/test-context.mjs b/plugins/codex/scripts/lib/test-context.mjs index ddbceb6d..a5f17095 100644 --- a/plugins/codex/scripts/lib/test-context.mjs +++ b/plugins/codex/scripts/lib/test-context.mjs @@ -64,7 +64,7 @@ function walkRepoFiles(rootDir, options = {}) { if (WALK_SKIP_DIRS.has(normalizedName) || current.depth >= maxDepth) { continue; } - if (current.relativeDir && hasNestedGitCheckout(absolutePath)) { + if (hasNestedGitCheckout(absolutePath)) { continue; } let directoryKey; @@ -355,12 +355,16 @@ function buildTestFileCandidates(relativePath, testFiles, primaryLocations) { return false; }); if (directMatches.length > 0) { - const scopedMatches = - preferredRoots.size === 0 - ? directMatches - : directMatches.filter((candidate) => [...preferredRoots].some((root) => candidate === root || candidate.startsWith(`${root}/`))); - const effectiveMatches = scopedMatches.length > 0 ? scopedMatches : directMatches; - return uniqueSorted(effectiveMatches).map((candidate) => ({ path: candidate, action: "update" })); + if (preferredRoots.size === 0) { + return []; + } + const scopedMatches = directMatches.filter((candidate) => + [...preferredRoots].some((root) => candidate === root || candidate.startsWith(`${root}/`)) + ); + if (scopedMatches.length === 0) { + return []; + } + return uniqueSorted(scopedMatches).map((candidate) => ({ path: candidate, action: "update" })); } if (extension === ".go") { diff --git a/tests/git.test.mjs b/tests/git.test.mjs index 29907b0b..ba87012b 100644 --- a/tests/git.test.mjs +++ b/tests/git.test.mjs @@ -220,6 +220,26 @@ test("collectTestCommandContext ignores nested worktree directories inside the w assert.throws(() => collectTestCommandContext(cwd), /No test layout detected/i); }); +test("collectTestCommandContext ignores root-level nested checkouts inside the workspace", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "src"), { recursive: true }); + fs.writeFileSync(path.join(cwd, "README.md"), "# repo\n"); + fs.writeFileSync(path.join(cwd, "src", "app.js"), "export const value = 1;\n"); + + const nestedRepoDir = path.join(cwd, "nestedrepo"); + fs.mkdirSync(nestedRepoDir, { recursive: true }); + initGitRepo(nestedRepoDir); + fs.mkdirSync(path.join(nestedRepoDir, "tests"), { recursive: true }); + fs.writeFileSync(path.join(nestedRepoDir, "tests", "app.test.js"), "test('nested', () => {});\n"); + + run("git", ["add", "README.md", "src/app.js"], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "src", "app.js"), "export const value = 2;\n"); + + assert.throws(() => collectTestCommandContext(cwd), /No test layout detected/i); +}); + test("collectTestCommandContext ignores symlinked guidance files outside the repo", () => { const cwd = makeTempDir(); const externalDir = makeTempDir(); @@ -394,6 +414,21 @@ test("collectTestCommandContext fails closed when no package-local test root mat assert.throws(() => collectTestCommandContext(cwd), /Unable to infer test targets from changed files/i); }); +test("collectTestCommandContext fails closed when direct test matches only exist outside the source scope", () => { + const cwd = makeTempDir(); + initGitRepo(cwd); + fs.mkdirSync(path.join(cwd, "tools"), { recursive: true }); + fs.mkdirSync(path.join(cwd, "packages", "a", "tests"), { recursive: true }); + fs.writeFileSync(path.join(cwd, "README.md"), "# monorepo\n"); + fs.writeFileSync(path.join(cwd, "tools", "gen.js"), "export const generate = () => 1;\n"); + fs.writeFileSync(path.join(cwd, "packages", "a", "tests", "gen.test.js"), "test('a', () => {});\n"); + run("git", ["add", "."], { cwd }); + run("git", ["commit", "-m", "init"], { cwd }); + fs.writeFileSync(path.join(cwd, "tools", "gen.js"), "export const generate = () => 2;\n"); + + assert.throws(() => collectTestCommandContext(cwd), /Unable to infer test targets from changed files/i); +}); + test("collectTestCommandContext infers JS test extensions from the selected package root", () => { const cwd = makeTempDir(); initGitRepo(cwd);