From 8162734da43a7e86c232589146d58c5ebf46e51f Mon Sep 17 00:00:00 2001 From: ubcent Date: Mon, 16 Mar 2026 11:35:35 +0100 Subject: [PATCH] feat(review): use Copilot CLI as reviewer --- src/commands/start.ts | 14 +- src/lib/copilot.ts | 238 ++++++++++++++++++++++++++++++++ src/lib/review-loop.ts | 10 +- src/types/index.ts | 2 + test/unit/copilot.test.ts | 94 +++++++++++++ test/unit/review-loop.test.ts | 34 ++--- tests/integration/start.test.ts | 16 ++- 7 files changed, 378 insertions(+), 30 deletions(-) create mode 100644 src/lib/copilot.ts create mode 100644 test/unit/copilot.test.ts diff --git a/src/commands/start.ts b/src/commands/start.ts index c99c173..250b1ef 100644 --- a/src/commands/start.ts +++ b/src/commands/start.ts @@ -3,6 +3,7 @@ import path from 'node:path'; import type { Command } from 'commander'; import { ClaudeClient } from '../lib/claude.js'; +import { checkCopilotAvailable, runCopilotReview } from '../lib/copilot.js'; import * as codex from '../lib/codex.js'; import { findProjectRoot, loadConfig } from '../lib/config.js'; import * as git from '../lib/git.js'; @@ -46,6 +47,7 @@ export async function runStart(taskFile: string, options: StartCommandOptions): if (!options.dryRun) { requireAnthropicApiKey(); await codex.checkCodexAvailable(); + await checkCopilotAvailable(); } let state = loadState(projectRoot); @@ -134,6 +136,7 @@ export async function runStart(taskFile: string, options: StartCommandOptions): claude, verbose: options.verbose, log: scopedLogger, + serviceRoot, }); await updateStep(projectRoot, task.id, step.service, { @@ -227,6 +230,7 @@ async function runCloudReviewLoop(opts: { claude: ClaudeClient; verbose?: boolean; log: logger.Logger; + serviceRoot: string; }): Promise<{ sessionId: string; finalIteration: number; lastReviewComments: ReviewComment[]; lastArbiterResult: ArbiterResult }> { let sessionId = opts.sessionId; let iteration = opts.stepState.iteration; @@ -258,11 +262,13 @@ async function runCloudReviewLoop(opts: { const diff = await codex.getDiff(sessionId); opts.log.info(`Running review (iteration ${String(iteration + 1)})...`); - const review = await opts.claude.runReviewer({ - spec: opts.spec, - diff, - model: opts.config.review.model, + + await codex.applyDiff(sessionId); + const comments = await runCopilotReview(opts.spec, { cwd: opts.serviceRoot }).finally(async () => { + await git.exec(['checkout', '.'], opts.serviceRoot); }); + + const review = { comments }; opts.log.reviewSummary(review.comments); opts.log.info(`Requesting arbiter decision (model: ${opts.config.review.model})`); diff --git a/src/lib/copilot.ts b/src/lib/copilot.ts new file mode 100644 index 0000000..77a4fb5 --- /dev/null +++ b/src/lib/copilot.ts @@ -0,0 +1,238 @@ +import { spawn } from 'node:child_process'; + +import type { CommentSeverity, CopilotErrorCode, ReviewComment } from '../types/index.js'; + +const COPILOT_TIMEOUT_MS = 120_000; + +interface CommandResult { + stdout: string; + stderr: string; + exitCode: number; +} + +interface ParsedComment { + message: string; + severity?: unknown; + file?: unknown; + line?: unknown; +} + +/** + * `copilot --output-format=json` emits JSONL (one JSON object per line). The schema appears to vary + * across versions, so this parser is intentionally defensive: it scans parsed object trees for + * message-like fields (`message`/`body`/`text`) plus optional location/severity metadata. + */ +export class CopilotReviewError extends Error { + code: CopilotErrorCode; + stdout: string; + stderr: string; + exitCode: number; + + constructor(code: CopilotErrorCode, message: string, details?: { stdout?: string; stderr?: string; exitCode?: number }) { + super(message); + this.name = 'CopilotReviewError'; + this.code = code; + this.stdout = details?.stdout ?? ''; + this.stderr = details?.stderr ?? ''; + this.exitCode = details?.exitCode ?? 1; + } +} + +export class CopilotNotFoundError extends CopilotReviewError { + constructor() { + super('not_found', 'copilot CLI not found. Install and authenticate GitHub Copilot CLI, then retry.'); + this.name = 'CopilotNotFoundError'; + } +} + +function runCopilotCommand(args: string[], opts?: { cwd?: string; timeoutMs?: number }): Promise { + return new Promise((resolve, reject) => { + const child = spawn('copilot', args, { + cwd: opts?.cwd, + stdio: ['ignore', 'pipe', 'pipe'], + }); + + let stdout = ''; + let stderr = ''; + + child.stdout.on('data', (chunk: Buffer | string) => { + stdout += chunk.toString(); + }); + + child.stderr.on('data', (chunk: Buffer | string) => { + stderr += chunk.toString(); + }); + + const timeout = setTimeout(() => { + child.kill('SIGTERM'); + reject(new Error(`copilot command timed out after ${String(opts?.timeoutMs ?? COPILOT_TIMEOUT_MS)}ms`)); + }, opts?.timeoutMs ?? COPILOT_TIMEOUT_MS); + + child.once('error', (error) => { + clearTimeout(timeout); + reject(error); + }); + + child.once('close', (code) => { + clearTimeout(timeout); + resolve({ + stdout: stdout.trim(), + stderr: stderr.trim(), + exitCode: code ?? 1, + }); + }); + }); +} + +function normalizeSeverity(raw: unknown): CommentSeverity { + const sev = typeof raw === 'string' ? raw.toLowerCase() : ''; + + if (sev === 'error' || sev === 'high' || sev === 'critical') { + return 'critical'; + } + + if (sev === 'warning' || sev === 'medium') { + return 'important'; + } + + if (sev === 'info' || sev === 'low') { + return 'minor'; + } + + return 'minor'; +} + +function maybeNumber(raw: unknown): number | undefined { + if (typeof raw === 'number' && Number.isFinite(raw)) { + return Math.trunc(raw); + } + + if (typeof raw === 'string') { + const n = Number.parseInt(raw, 10); + if (!Number.isNaN(n)) { + return n; + } + } + + return undefined; +} + +function collectParsedComments(value: unknown, comments: ParsedComment[]): void { + if (Array.isArray(value)) { + for (const item of value) { + collectParsedComments(item, comments); + } + return; + } + + if (!value || typeof value !== 'object') { + return; + } + + const record = value as Record; + + const message = [record.message, record.body, record.text].find((field) => typeof field === 'string'); + + if (typeof message === 'string' && message.trim()) { + comments.push({ + message: message.trim(), + severity: record.severity ?? record.priority, + file: record.file ?? record.path, + line: record.line, + }); + } + + for (const nested of Object.values(record)) { + collectParsedComments(nested, comments); + } +} + +function parseReviewComments(stdout: string): ReviewComment[] { + const parsedObjects: unknown[] = []; + + for (const line of stdout.split(/\r?\n/)) { + const trimmed = line.trim(); + if (!trimmed) { + continue; + } + + try { + parsedObjects.push(JSON.parse(trimmed)); + } catch { + // Ignore non-JSON lines. + } + } + + const parsed: ParsedComment[] = []; + for (const entry of parsedObjects) { + collectParsedComments(entry, parsed); + } + + const deduped = new Set(); + const output: ReviewComment[] = []; + + for (const item of parsed) { + const file = typeof item.file === 'string' && item.file.length > 0 ? item.file : undefined; + const line = maybeNumber(item.line); + const lineKey = line === undefined ? '' : String(line); + const key = `${item.message}::${file ?? ''}::${lineKey}`; + + if (deduped.has(key)) { + continue; + } + + deduped.add(key); + output.push({ + severity: normalizeSeverity(item.severity), + file, + line, + comment: item.message, + }); + } + + return output; +} + +export async function checkCopilotAvailable(): Promise { + try { + const result = await runCopilotCommand(['--version']); + if (result.exitCode !== 0) { + throw new CopilotNotFoundError(); + } + } catch { + throw new CopilotNotFoundError(); + } +} + +export async function runCopilotReview(spec: string, opts?: { cwd?: string }): Promise { + const prompt = `Review the staged changes against the following spec.\nReport bugs, missing requirements, security issues, and logic errors.\nIgnore style issues.\n\nSpec:\n${spec}`; + + let result: CommandResult; + + try { + result = await runCopilotCommand(['-p', prompt, '--silent', '--output-format=json'], { cwd: opts?.cwd }); + } catch (error: unknown) { + throw new CopilotReviewError( + 'review_failed', + `Failed to execute copilot review: ${error instanceof Error ? error.message : String(error)}`, + ); + } + + if (result.exitCode !== 0) { + throw new CopilotReviewError('review_failed', 'Copilot review command failed.', result); + } + + if (!result.stdout.trim()) { + throw new CopilotReviewError('review_failed', 'Copilot review produced no output.', result); + } + + try { + return parseReviewComments(result.stdout); + } catch (error: unknown) { + throw new CopilotReviewError( + 'parse_failed', + `Failed to parse Copilot review output: ${error instanceof Error ? error.message : String(error)}`, + result, + ); + } +} diff --git a/src/lib/review-loop.ts b/src/lib/review-loop.ts index 132fdf9..b4c93d0 100644 --- a/src/lib/review-loop.ts +++ b/src/lib/review-loop.ts @@ -2,6 +2,7 @@ import path from 'node:path'; import * as codex from './codex.js'; import type { ClaudeClient } from './claude.js'; +import { runCopilotReview } from './copilot.js'; import * as git from './git.js'; import * as logger from './logger.js'; import * as state from './state.js'; @@ -89,19 +90,14 @@ export async function runReviewLoop(opts: ReviewLoopOptions): Promise { + const comments = await runCopilotReview(opts.step.spec, { cwd: serviceRoot }).finally(() => { if (reviewerHeartbeat) { clearInterval(reviewerHeartbeat); } }); logger.info(`Reviewer response received in ${formatElapsed(reviewerStartedAt)}`); + const review = { comments }; logger.reviewSummary(review.comments); logger.info(`Requesting arbiter decision (model: ${opts.config.review.model})`); diff --git a/src/types/index.ts b/src/types/index.ts index 39d66c8..f6cd0cc 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -6,6 +6,8 @@ export type StepStatus = 'pending' | 'in_progress' | 'done' | 'failed' | 'escala export type TaskStatus = 'in_progress' | 'review' | 'done' | 'blocked' | 'escalated'; + +export type CopilotErrorCode = 'not_found' | 'review_failed' | 'parse_failed'; export type CodexErrorCode = | 'submit_failed' | 'resume_failed' diff --git a/test/unit/copilot.test.ts b/test/unit/copilot.test.ts new file mode 100644 index 0000000..7e96af0 --- /dev/null +++ b/test/unit/copilot.test.ts @@ -0,0 +1,94 @@ +import { EventEmitter } from 'node:events'; + +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const { spawnMock } = vi.hoisted(() => ({ + spawnMock: vi.fn(), +})); + +vi.mock('node:child_process', () => ({ + spawn: spawnMock, +})); + +import { checkCopilotAvailable, CopilotNotFoundError, CopilotReviewError, runCopilotReview } from '../../src/lib/copilot.js'; + +function mockSpawnRun(stdoutText: string, stderrText = '', exitCode = 0): void { + spawnMock.mockImplementation(() => { + const stdout = new EventEmitter(); + const stderr = new EventEmitter(); + const child = new EventEmitter() as EventEmitter & { + stdout: EventEmitter; + stderr: EventEmitter; + kill: () => void; + }; + child.stdout = stdout; + child.stderr = stderr; + child.kill = vi.fn(); + + setTimeout(() => { + stdout.emit('data', stdoutText); + stderr.emit('data', stderrText); + child.emit('close', exitCode); + }, 0); + + return child; + }); +} + +beforeEach(() => { + spawnMock.mockReset(); +}); + +describe('checkCopilotAvailable', () => { + it('resolves when copilot --version exits 0', async () => { + mockSpawnRun('1.2.3\n'); + await expect(checkCopilotAvailable()).resolves.toBeUndefined(); + }); + + it('throws CopilotNotFoundError when copilot cannot be started', async () => { + spawnMock.mockImplementation(() => { + const child = new EventEmitter() as EventEmitter & { + stdout: EventEmitter; + stderr: EventEmitter; + kill: () => void; + }; + child.stdout = new EventEmitter(); + child.stderr = new EventEmitter(); + child.kill = vi.fn(); + setTimeout(() => child.emit('error', new Error('ENOENT')), 0); + return child; + }); + + await expect(checkCopilotAvailable()).rejects.toBeInstanceOf(CopilotNotFoundError); + }); +}); + +describe('runCopilotReview', () => { + it('maps severity and location fields from JSONL output', async () => { + mockSpawnRun('{"message":"Null dereference","severity":"high","file":"src/a.ts","line":13}\n'); + + await expect(runCopilotReview('spec', { cwd: '/repo' })).resolves.toEqual([ + { + severity: 'critical', + file: 'src/a.ts', + line: 13, + comment: 'Null dereference', + }, + ]); + }); + + it('returns [] when output has no parseable comments and command succeeds', async () => { + mockSpawnRun('{"event":"done"}\nnot-json\n'); + await expect(runCopilotReview('spec')).resolves.toEqual([]); + }); + + it('throws CopilotReviewError when command fails', async () => { + mockSpawnRun('oops', 'broken', 1); + await expect(runCopilotReview('spec')).rejects.toBeInstanceOf(CopilotReviewError); + }); + + it('throws CopilotReviewError when stdout is empty', async () => { + mockSpawnRun(' \n'); + await expect(runCopilotReview('spec')).rejects.toBeInstanceOf(CopilotReviewError); + }); +}); diff --git a/test/unit/review-loop.test.ts b/test/unit/review-loop.test.ts index 4ab811e..653167f 100644 --- a/test/unit/review-loop.test.ts +++ b/test/unit/review-loop.test.ts @@ -1,12 +1,13 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; -const { getDiffMock, codexExecMock, saveIterationLogMock, reviewSummaryMock, infoMock, iterationMock } = vi.hoisted(() => ({ +const { getDiffMock, codexExecMock, saveIterationLogMock, reviewSummaryMock, infoMock, iterationMock, runCopilotReviewMock } = vi.hoisted(() => ({ getDiffMock: vi.fn(), codexExecMock: vi.fn(), saveIterationLogMock: vi.fn(), reviewSummaryMock: vi.fn(), infoMock: vi.fn(), iterationMock: vi.fn(), + runCopilotReviewMock: vi.fn(), })); vi.mock('../../src/lib/git.js', () => ({ @@ -21,6 +22,10 @@ vi.mock('../../src/lib/state.js', () => ({ saveIterationLog: saveIterationLogMock, })); +vi.mock('../../src/lib/copilot.js', () => ({ + runCopilotReview: runCopilotReviewMock, +})); + vi.mock('../../src/lib/logger.js', () => ({ reviewSummary: reviewSummaryMock, info: infoMock, @@ -48,6 +53,7 @@ describe('runReviewLoop', () => { reviewSummaryMock.mockReset(); infoMock.mockReset(); iterationMock.mockReset(); + runCopilotReviewMock.mockReset(); stepState = { service: 'svc', status: 'in_progress', iteration: 0 }; }); @@ -56,7 +62,6 @@ describe('runReviewLoop', () => { getDiffMock.mockResolvedValue(''); const claude = { - runReviewer: vi.fn(), runArbiter: vi.fn(), }; @@ -71,16 +76,16 @@ describe('runReviewLoop', () => { }); expect(result.decision).toBe('submit'); - expect(claude.runReviewer).not.toHaveBeenCalled(); + expect(runCopilotReviewMock).not.toHaveBeenCalled(); expect(claude.runArbiter).not.toHaveBeenCalled(); }); it('submit decision returns correct ReviewLoopResult', async () => { getDiffMock.mockResolvedValue('diff'); const review: ReviewResult = { comments: [{ severity: 'minor', comment: 'small' }] }; + runCopilotReviewMock.mockResolvedValue(review.comments); const arbiter: ArbiterResult = { decision: 'submit', reasoning: 'ok', summary: 'ok' }; const claude = { - runReviewer: vi.fn().mockResolvedValue(review), runArbiter: vi.fn().mockResolvedValue(arbiter), }; @@ -106,9 +111,9 @@ describe('runReviewLoop', () => { it('escalate decision returns correct ReviewLoopResult', async () => { getDiffMock.mockResolvedValue('diff'); const review: ReviewResult = { comments: [{ severity: 'critical', comment: 'broken' }] }; + runCopilotReviewMock.mockResolvedValue(review.comments); const arbiter: ArbiterResult = { decision: 'escalate', reasoning: 'conflict', summary: 'escalate' }; const claude = { - runReviewer: vi.fn().mockResolvedValue(review), runArbiter: vi.fn().mockResolvedValue(arbiter), }; @@ -120,11 +125,8 @@ describe('runReviewLoop', () => { it('fix decision calls codex then loops', async () => { getDiffMock.mockResolvedValueOnce('diff-1').mockResolvedValueOnce('diff-2'); + runCopilotReviewMock.mockResolvedValueOnce([{ severity: 'important', comment: 'fix me' }]).mockResolvedValueOnce([]); const claude = { - runReviewer: vi - .fn() - .mockResolvedValueOnce({ comments: [{ severity: 'important', comment: 'fix me' }] }) - .mockResolvedValueOnce({ comments: [] }), runArbiter: vi .fn() .mockResolvedValueOnce({ decision: 'fix', reasoning: 'needs fix', summary: 'fix', feedback_for_codex: 'edit file' }) @@ -141,8 +143,8 @@ describe('runReviewLoop', () => { it('After max_iterations with fix: escalates', async () => { stepState.iteration = 2; getDiffMock.mockResolvedValue('diff'); + runCopilotReviewMock.mockResolvedValue([{ severity: 'important', comment: 'still bad' }]); const claude = { - runReviewer: vi.fn().mockResolvedValue({ comments: [{ severity: 'important', comment: 'still bad' }] }), runArbiter: vi.fn().mockResolvedValue({ decision: 'fix', reasoning: 'still broken', summary: 'fix again', feedback_for_codex: 'more' }), }; @@ -154,7 +156,6 @@ describe('runReviewLoop', () => { it('dryRun skips all external calls', async () => { const claude = { - runReviewer: vi.fn(), runArbiter: vi.fn(), }; @@ -172,18 +173,17 @@ describe('runReviewLoop', () => { expect(result.decision).toBe('submit'); expect(getDiffMock).not.toHaveBeenCalled(); expect(codexExecMock).not.toHaveBeenCalled(); - expect(claude.runReviewer).not.toHaveBeenCalled(); + expect(runCopilotReviewMock).not.toHaveBeenCalled(); expect(claude.runArbiter).not.toHaveBeenCalled(); }); it('Iteration logs are saved on each iteration', async () => { getDiffMock.mockResolvedValueOnce('d1').mockResolvedValueOnce('d2').mockResolvedValueOnce('d3'); + runCopilotReviewMock + .mockResolvedValueOnce([{ severity: 'important', comment: '1' }]) + .mockResolvedValueOnce([{ severity: 'important', comment: '2' }]) + .mockResolvedValueOnce([]); const claude = { - runReviewer: vi - .fn() - .mockResolvedValueOnce({ comments: [{ severity: 'important', comment: '1' }] }) - .mockResolvedValueOnce({ comments: [{ severity: 'important', comment: '2' }] }) - .mockResolvedValueOnce({ comments: [] }), runArbiter: vi .fn() .mockResolvedValueOnce({ decision: 'fix', reasoning: '1', summary: '1', feedback_for_codex: 'a' }) diff --git a/tests/integration/start.test.ts b/tests/integration/start.test.ts index ad12196..4e7dfbd 100644 --- a/tests/integration/start.test.ts +++ b/tests/integration/start.test.ts @@ -9,13 +9,17 @@ const mocks = vi.hoisted(() => ({ submitTask: vi.fn(), pollStatus: vi.fn(), getDiff: vi.fn(), + applyDiff: vi.fn(), resumeTask: vi.fn(), createBranch: vi.fn(), checkoutBranch: vi.fn(), + gitExec: vi.fn(), getBranchName: vi.fn((taskId: string, service: string) => `vexdo/${taskId}/${service}`), checkGhAvailable: vi.fn(), createPr: vi.fn(), ClaudeClient: vi.fn(), + checkCopilotAvailable: vi.fn(), + runCopilotReview: vi.fn(), })); vi.mock('../../src/lib/codex.js', () => ({ @@ -23,12 +27,18 @@ vi.mock('../../src/lib/codex.js', () => ({ submitTask: mocks.submitTask, pollStatus: mocks.pollStatus, getDiff: mocks.getDiff, + applyDiff: mocks.applyDiff, resumeTask: mocks.resumeTask, })); vi.mock('../../src/lib/git.js', () => ({ createBranch: mocks.createBranch, checkoutBranch: mocks.checkoutBranch, getBranchName: mocks.getBranchName, + exec: mocks.gitExec, +})); +vi.mock('../../src/lib/copilot.js', () => ({ + checkCopilotAvailable: mocks.checkCopilotAvailable, + runCopilotReview: mocks.runCopilotReview, })); vi.mock('../../src/lib/gh.js', () => ({ checkGhAvailable: mocks.checkGhAvailable, @@ -76,13 +86,16 @@ beforeEach(() => { } mocks.getBranchName.mockImplementation((taskId: string, service: string) => `vexdo/${taskId}/${service}`); + mocks.gitExec.mockResolvedValue(''); mocks.submitTask.mockResolvedValue('sess-1'); mocks.pollStatus.mockResolvedValue('completed'); mocks.getDiff.mockResolvedValue('diff --git a/x b/x'); + mocks.applyDiff.mockResolvedValue(undefined); mocks.resumeTask.mockResolvedValue('sess-2'); + mocks.checkCopilotAvailable.mockResolvedValue(undefined); + mocks.runCopilotReview.mockResolvedValue([]); mocks.ClaudeClient.mockImplementation(() => ({ - runReviewer: vi.fn().mockResolvedValue({comments: []}), runArbiter: vi.fn().mockResolvedValue({decision: 'submit', reasoning: 'ok', summary: 'ok'}), })); }); @@ -119,7 +132,6 @@ describe('start integration', () => { process.chdir(root); mocks.ClaudeClient.mockImplementation(() => ({ - runReviewer: vi.fn().mockResolvedValue({comments: []}), runArbiter: vi .fn() .mockResolvedValueOnce({decision: 'fix', reasoning: 'x', summary: 'x', feedback_for_codex: 'please fix'})