From dabcd92de8508b46bbc448edef24a3640ed051dd Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Thu, 2 Apr 2026 16:59:40 -0500 Subject: [PATCH 1/4] fix: detect service unavailability and fail fast with clear error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the Claude API returns persistent 500s, the SDK exhausts retries and returns a result with subtype 'success' but is_error: true. Our code only checked subtype, so it treated the error as success and proceeded with validation retries — burning ~9 minutes on 30 hopeless API calls before showing a raw JSON error. Now: - handleSDKMessage checks is_error on result messages - 500/server_error/internal_error classified as SERVICE_UNAVAILABLE - abortRetries flag skips validation retries on fatal SDK errors - CLI adapter shows "AI service temporarily unavailable" instead of raw JSON - Headless adapter emits service_unavailable error code --- src/lib/adapters/cli-adapter.ts | 11 ++- src/lib/adapters/headless-adapter.ts | 9 ++- src/lib/agent-interface.spec.ts | 108 ++++++++++++++++++++++++++- src/lib/agent-interface.ts | 43 +++++++++++ 4 files changed, 167 insertions(+), 4 deletions(-) diff --git a/src/lib/adapters/cli-adapter.ts b/src/lib/adapters/cli-adapter.ts index 9bed10f3..7d29b970 100644 --- a/src/lib/adapters/cli-adapter.ts +++ b/src/lib/adapters/cli-adapter.ts @@ -402,7 +402,16 @@ export class CLIAdapter implements InstallerAdapter { this.stopSpinner('Error'); this.stopAgentUpdates(); - clack.log.error(message); + // Rewrite raw API errors into user-friendly messages + const isServiceError = + /\b50[0-9]\b/.test(message) || + /server_error|internal_error|overloaded|service.unavailable/i.test(message); + if (isServiceError) { + clack.log.error('The AI service is temporarily unavailable.'); + clack.log.info('This is usually resolved within a few minutes. Please try again shortly.'); + } else { + clack.log.error(message); + } // Add actionable hints for common errors if (message.includes('authentication') || message.includes('auth')) { diff --git a/src/lib/adapters/headless-adapter.ts b/src/lib/adapters/headless-adapter.ts index b56d5b96..b359f085 100644 --- a/src/lib/adapters/headless-adapter.ts +++ b/src/lib/adapters/headless-adapter.ts @@ -336,7 +336,14 @@ export class HeadlessAdapter implements InstallerAdapter { }; private handleError = ({ message, stack }: InstallerEvents['error']): void => { - writeNDJSON({ type: 'error', code: 'installer_error', message }); + const isServiceError = + /\b50[0-9]\b/.test(message) || + /server_error|internal_error|overloaded|service.unavailable/i.test(message); + const code = isServiceError ? 'service_unavailable' : 'installer_error'; + const displayMessage = isServiceError + ? 'The AI service is temporarily unavailable. Please try again in a few minutes.' + : message; + writeNDJSON({ type: 'error', code, message: displayMessage }); this.debugLog(stack ?? ''); }; } diff --git a/src/lib/agent-interface.spec.ts b/src/lib/agent-interface.spec.ts index 2ac314b8..eab0531c 100644 --- a/src/lib/agent-interface.spec.ts +++ b/src/lib/agent-interface.spec.ts @@ -67,7 +67,7 @@ vi.mock('../utils/urls.js', () => ({ getLlmGatewayUrlFromHost: vi.fn(() => 'http://localhost:8000'), })); -import { runAgent } from './agent-interface.js'; +import { runAgent, AgentErrorType } from './agent-interface.js'; import { InstallerEventEmitter } from './events.js'; import type { InstallerOptions } from '../utils/types.js'; @@ -75,8 +75,15 @@ import type { InstallerOptions } from '../utils/types.js'; * Create a mock SDK response that consumes the prompt stream and yields * responses for each prompt message. This models the real SDK behavior: * the response generator stays alive as long as prompts keep coming. + * + * Turn options: + * - text: assistant text to yield + * - error: result subtype is 'error' with errors array + * - is_error: result has subtype 'success' but is_error: true (SDK exhausted retries) */ -function createMockSDKResponse(turns: Array<{ text?: string; error?: boolean }>) { +function createMockSDKResponse( + turns: Array<{ text?: string; error?: boolean; is_error?: boolean }>, +) { return function mockQueryImpl({ prompt }: { prompt: AsyncIterable; options: unknown }) { let turnIndex = 0; @@ -102,6 +109,7 @@ function createMockSDKResponse(turns: Array<{ text?: string; error?: boolean }>) yield { type: 'result', subtype: turn.error ? 'error' : 'success', + is_error: turn.is_error ?? false, result: turn.text ?? '', ...(turn.error ? { errors: ['Test error'] } : {}), }; @@ -265,3 +273,99 @@ describe('runAgent retry loop', () => { expect(validateAndFormat).toHaveBeenCalledTimes(1); }); }); + +describe('service unavailability handling', () => { + let emitter: InstallerEventEmitter; + let emittedEvents: Array<{ event: string; payload: unknown }>; + + beforeEach(() => { + mockQuery.mockReset(); + emitter = new InstallerEventEmitter(); + emittedEvents = []; + + const originalEmit = emitter.emit.bind(emitter); + emitter.emit = ((event: string, payload: unknown) => { + emittedEvents.push({ event, payload }); + return originalEmit(event, payload); + }) as typeof emitter.emit; + }); + + it('detects is_error result with API 500 as SERVICE_UNAVAILABLE', async () => { + const apiErrorText = + 'API Error: 500 {"error":{"type":"internal_error","message":"An unexpected error occurred"}}'; + mockQuery.mockImplementation( + createMockSDKResponse([{ text: apiErrorText, is_error: true }]), + ); + + const result = await runAgent( + makeAgentConfig(), + 'Test prompt', + makeOptions(), + undefined, + emitter, + ); + + expect(result.error).toBe(AgentErrorType.SERVICE_UNAVAILABLE); + expect(result.errorMessage).toMatch(/temporarily unavailable/); + }); + + it('detects is_error result with server_error as SERVICE_UNAVAILABLE', async () => { + mockQuery.mockImplementation( + createMockSDKResponse([{ text: 'server_error: service overloaded', is_error: true }]), + ); + + const result = await runAgent( + makeAgentConfig(), + 'Test prompt', + makeOptions(), + undefined, + emitter, + ); + + expect(result.error).toBe(AgentErrorType.SERVICE_UNAVAILABLE); + }); + + it('detects is_error result without service pattern as EXECUTION_ERROR', async () => { + mockQuery.mockImplementation( + createMockSDKResponse([{ text: 'Some other failure', is_error: true }]), + ); + + const result = await runAgent( + makeAgentConfig(), + 'Test prompt', + makeOptions(), + undefined, + emitter, + ); + + expect(result.error).toBe(AgentErrorType.EXECUTION_ERROR); + expect(result.errorMessage).toBe('Some other failure'); + }); + + it('skips validation retries when service is unavailable', async () => { + const apiErrorText = + 'API Error: 500 {"error":{"type":"internal_error","message":"An unexpected error occurred"}}'; + mockQuery.mockImplementation( + createMockSDKResponse([{ text: apiErrorText, is_error: true }]), + ); + + const validateAndFormat = vi.fn().mockResolvedValue('Still broken'); + + const result = await runAgent( + makeAgentConfig(), + 'Test prompt', + makeOptions(), + undefined, + emitter, + { maxRetries: 2, validateAndFormat }, + ); + + expect(result.error).toBe(AgentErrorType.SERVICE_UNAVAILABLE); + // validateAndFormat should never be called because retries are aborted + expect(validateAndFormat).not.toHaveBeenCalled(); + + // No retry events should be emitted + const retryEvents = emittedEvents.filter((e) => e.event === 'agent:retry'); + expect(retryEvents).toHaveLength(0); + }); +}); diff --git a/src/lib/agent-interface.ts b/src/lib/agent-interface.ts index b8108b34..5253c8f6 100644 --- a/src/lib/agent-interface.ts +++ b/src/lib/agent-interface.ts @@ -56,6 +56,9 @@ export const AgentSignals = { export type AgentSignal = (typeof AgentSignals)[keyof typeof AgentSignals]; +/** Internal prefix used to tag service-unavailability errors from handleSDKMessage */ +const SERVICE_UNAVAILABLE_PREFIX = '__SERVICE_UNAVAILABLE__'; + /** * Error types that can be returned from agent execution. * These correspond to the error signals that the agent emits. @@ -67,6 +70,8 @@ export enum AgentErrorType { RESOURCE_MISSING = 'INSTALLER_RESOURCE_MISSING', /** Agent execution failed (API error, auth error, etc.) */ EXECUTION_ERROR = 'INSTALLER_EXECUTION_ERROR', + /** AI service is unavailable (API 500, outage, etc.) */ + SERVICE_UNAVAILABLE = 'INSTALLER_SERVICE_UNAVAILABLE', } export type AgentConfig = { @@ -536,6 +541,11 @@ export async function runAgent( let resolveCurrentTurn!: () => void; let currentTurnDone!: Promise; + // Set by the message loop when a fatal SDK error is detected (e.g. service + // unavailability). The prompt stream checks this before yielding retry + // prompts so we fail fast instead of burning minutes on hopeless retries. + let abortRetries = false; + function resetTurnSignal() { currentTurnDone = new Promise((resolve) => { resolveCurrentTurn = resolve; @@ -555,6 +565,12 @@ export async function runAgent( while (retryCount < maxRetries) { await currentTurnDone; + // Don't send correction prompts when the service itself is down + if (abortRetries) { + logInfo('Skipping validation retries due to service error'); + break; + } + emitter?.emit('validation:retry:start', { attempt: retryCount + 1 }); let validationPrompt: string | null; @@ -628,6 +644,8 @@ export async function runAgent( const messageError = handleSDKMessage(message, options, collectedText, emitter); if (messageError) { sdkError = messageError; + // Signal the prompt stream to stop yielding retry prompts + abortRetries = true; } if (message.type === 'result') { resolveCurrentTurn(); @@ -645,6 +663,14 @@ export async function runAgent( // Check for SDK errors first (e.g., API errors, auth failures) // Return error type + message - caller decides whether to throw or emit events if (sdkError) { + if (sdkError.startsWith(SERVICE_UNAVAILABLE_PREFIX)) { + const detail = sdkError.slice(SERVICE_UNAVAILABLE_PREFIX.length); + logError('AI service unavailable:', detail); + return { + error: AgentErrorType.SERVICE_UNAVAILABLE, + errorMessage: 'The AI service is temporarily unavailable. Please try again in a few minutes.', + }; + } logError('Agent SDK error:', sdkError); return { error: AgentErrorType.EXECUTION_ERROR, errorMessage: sdkError }; } @@ -837,6 +863,23 @@ function handleSDKMessage( } case 'result': { + // The SDK may return subtype 'success' with is_error: true when API + // retries are exhausted (e.g., persistent 500s). Check is_error first. + const isResultError = + (message as Record).is_error === true; + + if (isResultError) { + const resultText = + typeof message.result === 'string' ? message.result : ''; + logError('Agent result marked as error:', resultText); + + // Detect service unavailability (API 500, upstream outage) + if (/\b50[0-9]\b/.test(resultText) || /server_error|internal_error|overloaded/.test(resultText)) { + return `${SERVICE_UNAVAILABLE_PREFIX}${resultText}`; + } + return resultText || 'Agent execution failed'; + } + if (message.subtype === 'success') { logInfo('Agent completed successfully'); if (typeof message.result === 'string') { From df972b163c7737c7796e5dad555f74b77609b269 Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Thu, 2 Apr 2026 17:02:56 -0500 Subject: [PATCH 2/4] chore: formatting --- src/lib/adapters/cli-adapter.ts | 3 +- src/lib/adapters/headless-adapter.ts | 3 +- src/lib/agent-interface.spec.ts | 62 +++++++--------------------- src/lib/agent-interface.ts | 6 +-- 4 files changed, 18 insertions(+), 56 deletions(-) diff --git a/src/lib/adapters/cli-adapter.ts b/src/lib/adapters/cli-adapter.ts index 7d29b970..e865da5c 100644 --- a/src/lib/adapters/cli-adapter.ts +++ b/src/lib/adapters/cli-adapter.ts @@ -404,8 +404,7 @@ export class CLIAdapter implements InstallerAdapter { // Rewrite raw API errors into user-friendly messages const isServiceError = - /\b50[0-9]\b/.test(message) || - /server_error|internal_error|overloaded|service.unavailable/i.test(message); + /\b50[0-9]\b/.test(message) || /server_error|internal_error|overloaded|service.unavailable/i.test(message); if (isServiceError) { clack.log.error('The AI service is temporarily unavailable.'); clack.log.info('This is usually resolved within a few minutes. Please try again shortly.'); diff --git a/src/lib/adapters/headless-adapter.ts b/src/lib/adapters/headless-adapter.ts index b359f085..e067a257 100644 --- a/src/lib/adapters/headless-adapter.ts +++ b/src/lib/adapters/headless-adapter.ts @@ -337,8 +337,7 @@ export class HeadlessAdapter implements InstallerAdapter { private handleError = ({ message, stack }: InstallerEvents['error']): void => { const isServiceError = - /\b50[0-9]\b/.test(message) || - /server_error|internal_error|overloaded|service.unavailable/i.test(message); + /\b50[0-9]\b/.test(message) || /server_error|internal_error|overloaded|service.unavailable/i.test(message); const code = isServiceError ? 'service_unavailable' : 'installer_error'; const displayMessage = isServiceError ? 'The AI service is temporarily unavailable. Please try again in a few minutes.' diff --git a/src/lib/agent-interface.spec.ts b/src/lib/agent-interface.spec.ts index eab0531c..2947b8ac 100644 --- a/src/lib/agent-interface.spec.ts +++ b/src/lib/agent-interface.spec.ts @@ -81,9 +81,7 @@ import type { InstallerOptions } from '../utils/types.js'; * - error: result subtype is 'error' with errors array * - is_error: result has subtype 'success' but is_error: true (SDK exhausted retries) */ -function createMockSDKResponse( - turns: Array<{ text?: string; error?: boolean; is_error?: boolean }>, -) { +function createMockSDKResponse(turns: Array<{ text?: string; error?: boolean; is_error?: boolean }>) { return function mockQueryImpl({ prompt }: { prompt: AsyncIterable; options: unknown }) { let turnIndex = 0; @@ -291,74 +289,42 @@ describe('service unavailability handling', () => { }); it('detects is_error result with API 500 as SERVICE_UNAVAILABLE', async () => { - const apiErrorText = - 'API Error: 500 {"error":{"type":"internal_error","message":"An unexpected error occurred"}}'; - mockQuery.mockImplementation( - createMockSDKResponse([{ text: apiErrorText, is_error: true }]), - ); + const apiErrorText = 'API Error: 500 {"error":{"type":"internal_error","message":"An unexpected error occurred"}}'; + mockQuery.mockImplementation(createMockSDKResponse([{ text: apiErrorText, is_error: true }])); - const result = await runAgent( - makeAgentConfig(), - 'Test prompt', - makeOptions(), - undefined, - emitter, - ); + const result = await runAgent(makeAgentConfig(), 'Test prompt', makeOptions(), undefined, emitter); expect(result.error).toBe(AgentErrorType.SERVICE_UNAVAILABLE); expect(result.errorMessage).toMatch(/temporarily unavailable/); }); it('detects is_error result with server_error as SERVICE_UNAVAILABLE', async () => { - mockQuery.mockImplementation( - createMockSDKResponse([{ text: 'server_error: service overloaded', is_error: true }]), - ); + mockQuery.mockImplementation(createMockSDKResponse([{ text: 'server_error: service overloaded', is_error: true }])); - const result = await runAgent( - makeAgentConfig(), - 'Test prompt', - makeOptions(), - undefined, - emitter, - ); + const result = await runAgent(makeAgentConfig(), 'Test prompt', makeOptions(), undefined, emitter); expect(result.error).toBe(AgentErrorType.SERVICE_UNAVAILABLE); }); it('detects is_error result without service pattern as EXECUTION_ERROR', async () => { - mockQuery.mockImplementation( - createMockSDKResponse([{ text: 'Some other failure', is_error: true }]), - ); + mockQuery.mockImplementation(createMockSDKResponse([{ text: 'Some other failure', is_error: true }])); - const result = await runAgent( - makeAgentConfig(), - 'Test prompt', - makeOptions(), - undefined, - emitter, - ); + const result = await runAgent(makeAgentConfig(), 'Test prompt', makeOptions(), undefined, emitter); expect(result.error).toBe(AgentErrorType.EXECUTION_ERROR); expect(result.errorMessage).toBe('Some other failure'); }); it('skips validation retries when service is unavailable', async () => { - const apiErrorText = - 'API Error: 500 {"error":{"type":"internal_error","message":"An unexpected error occurred"}}'; - mockQuery.mockImplementation( - createMockSDKResponse([{ text: apiErrorText, is_error: true }]), - ); + const apiErrorText = 'API Error: 500 {"error":{"type":"internal_error","message":"An unexpected error occurred"}}'; + mockQuery.mockImplementation(createMockSDKResponse([{ text: apiErrorText, is_error: true }])); const validateAndFormat = vi.fn().mockResolvedValue('Still broken'); - const result = await runAgent( - makeAgentConfig(), - 'Test prompt', - makeOptions(), - undefined, - emitter, - { maxRetries: 2, validateAndFormat }, - ); + const result = await runAgent(makeAgentConfig(), 'Test prompt', makeOptions(), undefined, emitter, { + maxRetries: 2, + validateAndFormat, + }); expect(result.error).toBe(AgentErrorType.SERVICE_UNAVAILABLE); // validateAndFormat should never be called because retries are aborted diff --git a/src/lib/agent-interface.ts b/src/lib/agent-interface.ts index 5253c8f6..95cbf3e9 100644 --- a/src/lib/agent-interface.ts +++ b/src/lib/agent-interface.ts @@ -865,12 +865,10 @@ function handleSDKMessage( case 'result': { // The SDK may return subtype 'success' with is_error: true when API // retries are exhausted (e.g., persistent 500s). Check is_error first. - const isResultError = - (message as Record).is_error === true; + const isResultError = (message as Record).is_error === true; if (isResultError) { - const resultText = - typeof message.result === 'string' ? message.result : ''; + const resultText = typeof message.result === 'string' ? message.result : ''; logError('Agent result marked as error:', resultText); // Detect service unavailability (API 500, upstream outage) From 5e49ac63782164a3d6ac2cc09c194ebb25392048 Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Thu, 2 Apr 2026 17:23:07 -0500 Subject: [PATCH 3/4] fix: handle rate limit, network, and process exit errors with clear messages Extend error classification to cover additional failure modes: - 429/rate limit: "AI service is currently rate-limited" - ECONNREFUSED/ETIMEDOUT/ENOTFOUND: "Could not connect to the AI service" - Process exit: "AI agent process exited unexpectedly" Rate limits also abort validation retries (same as 500s). --- src/lib/adapters/cli-adapter.ts | 15 ++++++++++++++- src/lib/adapters/headless-adapter.ts | 25 +++++++++++++++++++++---- src/lib/agent-interface.ts | 9 +++++++-- 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/src/lib/adapters/cli-adapter.ts b/src/lib/adapters/cli-adapter.ts index e865da5c..9bb6a810 100644 --- a/src/lib/adapters/cli-adapter.ts +++ b/src/lib/adapters/cli-adapter.ts @@ -402,12 +402,25 @@ export class CLIAdapter implements InstallerAdapter { this.stopSpinner('Error'); this.stopAgentUpdates(); - // Rewrite raw API errors into user-friendly messages + // Rewrite raw API/SDK errors into user-friendly messages const isServiceError = /\b50[0-9]\b/.test(message) || /server_error|internal_error|overloaded|service.unavailable/i.test(message); + const isRateLimit = /\b429\b/.test(message) || /rate.limit/i.test(message); + const isNetworkError = /ECONNREFUSED|ETIMEDOUT|ENOTFOUND|fetch failed/i.test(message); + const isProcessExit = /process exited with code/i.test(message); + if (isServiceError) { clack.log.error('The AI service is temporarily unavailable.'); clack.log.info('This is usually resolved within a few minutes. Please try again shortly.'); + } else if (isRateLimit) { + clack.log.error('The AI service is currently rate-limited.'); + clack.log.info('Please wait a minute and try again.'); + } else if (isNetworkError) { + clack.log.error('Could not connect to the AI service.'); + clack.log.info('Check your internet connection and try again.'); + } else if (isProcessExit) { + clack.log.error('The AI agent process exited unexpectedly.'); + clack.log.info('Try running again. If this persists, run with --debug for details.'); } else { clack.log.error(message); } diff --git a/src/lib/adapters/headless-adapter.ts b/src/lib/adapters/headless-adapter.ts index e067a257..d57ea343 100644 --- a/src/lib/adapters/headless-adapter.ts +++ b/src/lib/adapters/headless-adapter.ts @@ -338,10 +338,27 @@ export class HeadlessAdapter implements InstallerAdapter { private handleError = ({ message, stack }: InstallerEvents['error']): void => { const isServiceError = /\b50[0-9]\b/.test(message) || /server_error|internal_error|overloaded|service.unavailable/i.test(message); - const code = isServiceError ? 'service_unavailable' : 'installer_error'; - const displayMessage = isServiceError - ? 'The AI service is temporarily unavailable. Please try again in a few minutes.' - : message; + const isRateLimit = /\b429\b/.test(message) || /rate.limit/i.test(message); + const isNetworkError = /ECONNREFUSED|ETIMEDOUT|ENOTFOUND|fetch failed/i.test(message); + const isProcessExit = /process exited with code/i.test(message); + + let code = 'installer_error'; + let displayMessage = message; + + if (isServiceError) { + code = 'service_unavailable'; + displayMessage = 'The AI service is temporarily unavailable. Please try again in a few minutes.'; + } else if (isRateLimit) { + code = 'rate_limited'; + displayMessage = 'The AI service is currently rate-limited. Please wait a minute and try again.'; + } else if (isNetworkError) { + code = 'network_error'; + displayMessage = 'Could not connect to the AI service. Check your internet connection and try again.'; + } else if (isProcessExit) { + code = 'process_error'; + displayMessage = 'The AI agent process exited unexpectedly. Try running again with --debug for details.'; + } + writeNDJSON({ type: 'error', code, message: displayMessage }); this.debugLog(stack ?? ''); }; diff --git a/src/lib/agent-interface.ts b/src/lib/agent-interface.ts index 95cbf3e9..b731efd2 100644 --- a/src/lib/agent-interface.ts +++ b/src/lib/agent-interface.ts @@ -871,8 +871,13 @@ function handleSDKMessage( const resultText = typeof message.result === 'string' ? message.result : ''; logError('Agent result marked as error:', resultText); - // Detect service unavailability (API 500, upstream outage) - if (/\b50[0-9]\b/.test(resultText) || /server_error|internal_error|overloaded/.test(resultText)) { + // Detect service unavailability (API 500, upstream outage) or rate limiting (429) + if ( + /\b50[0-9]\b/.test(resultText) || + /server_error|internal_error|overloaded/.test(resultText) || + /\b429\b/.test(resultText) || + /rate.limit/i.test(resultText) + ) { return `${SERVICE_UNAVAILABLE_PREFIX}${resultText}`; } return resultText || 'Agent execution failed'; From d1ad4730ef4263f48eb60816d32569762ff5babd Mon Sep 17 00:00:00 2001 From: Nick Nisi Date: Thu, 2 Apr 2026 17:46:36 -0500 Subject: [PATCH 4/4] fix: correct service-error regex and separate rate-limit handling P1: The adapter regex /service.unavailable/ only matched a single char between "service" and "unavailable", so it missed our own friendly message "The AI service is temporarily unavailable". Fixed to /service.*unavailable/. Also removed the "Agent SDK error:" prefix from all framework integrations so user-friendly messages pass through cleanly. P2: 429 rate limits were folded into SERVICE_UNAVAILABLE_PREFIX, which rewrote them to "temporarily unavailable" before adapters could see the rate-limit signal. Now 429s get a separate RATE_LIMITED_PREFIX with distinct messaging ("currently rate-limited"), while still aborting validation retries. --- src/integrations/dotnet/index.ts | 2 +- src/integrations/elixir/index.ts | 2 +- src/integrations/go/index.ts | 2 +- src/integrations/ruby/index.ts | 2 +- src/lib/adapters/cli-adapter.ts | 2 +- src/lib/adapters/headless-adapter.ts | 2 +- src/lib/agent-interface.spec.ts | 29 ++++++++++++++++++++++++++++ src/lib/agent-interface.ts | 25 +++++++++++++++++------- src/lib/agent-runner.ts | 4 +++- 9 files changed, 56 insertions(+), 14 deletions(-) diff --git a/src/integrations/dotnet/index.ts b/src/integrations/dotnet/index.ts index bc7a744e..3d4f4eed 100644 --- a/src/integrations/dotnet/index.ts +++ b/src/integrations/dotnet/index.ts @@ -144,7 +144,7 @@ Begin integration now.`; if (agentResult.error) { await analytics.shutdown('error'); const message = agentResult.errorMessage || agentResult.error; - throw new Error(`Agent SDK error: ${message}`); + throw new Error(message); } // Post-installation validation diff --git a/src/integrations/elixir/index.ts b/src/integrations/elixir/index.ts index 34c97ea6..36dd17a5 100644 --- a/src/integrations/elixir/index.ts +++ b/src/integrations/elixir/index.ts @@ -124,7 +124,7 @@ export async function run(options: InstallerOptions): Promise { if (agentResult.error) { await analytics.shutdown('error'); const message = agentResult.errorMessage || agentResult.error; - throw new Error(`Agent SDK error: ${message}`); + throw new Error(message); } // Build summary diff --git a/src/integrations/go/index.ts b/src/integrations/go/index.ts index 7776946a..c2fdf082 100644 --- a/src/integrations/go/index.ts +++ b/src/integrations/go/index.ts @@ -213,7 +213,7 @@ Begin integration now.`; if (agentResult.error) { await analytics.shutdown('error'); const message = agentResult.errorMessage || agentResult.error; - throw new Error(`Agent SDK error: ${message}`); + throw new Error(message); } // Post-installation validation (gracefully skips — no rules file for Go) diff --git a/src/integrations/ruby/index.ts b/src/integrations/ruby/index.ts index a453ecd2..d259a6e6 100644 --- a/src/integrations/ruby/index.ts +++ b/src/integrations/ruby/index.ts @@ -143,7 +143,7 @@ Begin integration now.`; if (agentResult.error) { await analytics.shutdown('error'); const message = agentResult.errorMessage || agentResult.error; - throw new Error(`Agent SDK error: ${message}`); + throw new Error(message); } // Build completion summary diff --git a/src/lib/adapters/cli-adapter.ts b/src/lib/adapters/cli-adapter.ts index 9bb6a810..32cb856f 100644 --- a/src/lib/adapters/cli-adapter.ts +++ b/src/lib/adapters/cli-adapter.ts @@ -404,7 +404,7 @@ export class CLIAdapter implements InstallerAdapter { // Rewrite raw API/SDK errors into user-friendly messages const isServiceError = - /\b50[0-9]\b/.test(message) || /server_error|internal_error|overloaded|service.unavailable/i.test(message); + /\b50[0-9]\b/.test(message) || /server_error|internal_error|overloaded|service.*unavailable/i.test(message); const isRateLimit = /\b429\b/.test(message) || /rate.limit/i.test(message); const isNetworkError = /ECONNREFUSED|ETIMEDOUT|ENOTFOUND|fetch failed/i.test(message); const isProcessExit = /process exited with code/i.test(message); diff --git a/src/lib/adapters/headless-adapter.ts b/src/lib/adapters/headless-adapter.ts index d57ea343..2d414a92 100644 --- a/src/lib/adapters/headless-adapter.ts +++ b/src/lib/adapters/headless-adapter.ts @@ -337,7 +337,7 @@ export class HeadlessAdapter implements InstallerAdapter { private handleError = ({ message, stack }: InstallerEvents['error']): void => { const isServiceError = - /\b50[0-9]\b/.test(message) || /server_error|internal_error|overloaded|service.unavailable/i.test(message); + /\b50[0-9]\b/.test(message) || /server_error|internal_error|overloaded|service.*unavailable/i.test(message); const isRateLimit = /\b429\b/.test(message) || /rate.limit/i.test(message); const isNetworkError = /ECONNREFUSED|ETIMEDOUT|ENOTFOUND|fetch failed/i.test(message); const isProcessExit = /process exited with code/i.test(message); diff --git a/src/lib/agent-interface.spec.ts b/src/lib/agent-interface.spec.ts index 2947b8ac..c4f1486f 100644 --- a/src/lib/agent-interface.spec.ts +++ b/src/lib/agent-interface.spec.ts @@ -334,4 +334,33 @@ describe('service unavailability handling', () => { const retryEvents = emittedEvents.filter((e) => e.event === 'agent:retry'); expect(retryEvents).toHaveLength(0); }); + + it('detects 429 rate limit as distinct from service unavailability', async () => { + mockQuery.mockImplementation( + createMockSDKResponse([{ text: 'API Error: 429 rate_limit_exceeded', is_error: true }]), + ); + + const result = await runAgent(makeAgentConfig(), 'Test prompt', makeOptions(), undefined, emitter); + + expect(result.error).toBe(AgentErrorType.SERVICE_UNAVAILABLE); + expect(result.errorMessage).toMatch(/rate-limited/); + expect(result.errorMessage).not.toMatch(/temporarily unavailable/); + }); + + it('skips validation retries when rate-limited', async () => { + mockQuery.mockImplementation( + createMockSDKResponse([{ text: 'API Error: 429 rate_limit_exceeded', is_error: true }]), + ); + + const validateAndFormat = vi.fn().mockResolvedValue('Still broken'); + + const result = await runAgent(makeAgentConfig(), 'Test prompt', makeOptions(), undefined, emitter, { + maxRetries: 2, + validateAndFormat, + }); + + expect(result.error).toBe(AgentErrorType.SERVICE_UNAVAILABLE); + expect(result.errorMessage).toMatch(/rate-limited/); + expect(validateAndFormat).not.toHaveBeenCalled(); + }); }); diff --git a/src/lib/agent-interface.ts b/src/lib/agent-interface.ts index b731efd2..59dae0f1 100644 --- a/src/lib/agent-interface.ts +++ b/src/lib/agent-interface.ts @@ -59,6 +59,9 @@ export type AgentSignal = (typeof AgentSignals)[keyof typeof AgentSignals]; /** Internal prefix used to tag service-unavailability errors from handleSDKMessage */ const SERVICE_UNAVAILABLE_PREFIX = '__SERVICE_UNAVAILABLE__'; +/** Internal prefix used to tag rate-limit errors from handleSDKMessage */ +const RATE_LIMITED_PREFIX = '__RATE_LIMITED__'; + /** * Error types that can be returned from agent execution. * These correspond to the error signals that the agent emits. @@ -671,6 +674,14 @@ export async function runAgent( errorMessage: 'The AI service is temporarily unavailable. Please try again in a few minutes.', }; } + if (sdkError.startsWith(RATE_LIMITED_PREFIX)) { + const detail = sdkError.slice(RATE_LIMITED_PREFIX.length); + logError('AI service rate-limited:', detail); + return { + error: AgentErrorType.SERVICE_UNAVAILABLE, + errorMessage: 'The AI service is currently rate-limited. Please wait a minute and try again.', + }; + } logError('Agent SDK error:', sdkError); return { error: AgentErrorType.EXECUTION_ERROR, errorMessage: sdkError }; } @@ -871,13 +882,13 @@ function handleSDKMessage( const resultText = typeof message.result === 'string' ? message.result : ''; logError('Agent result marked as error:', resultText); - // Detect service unavailability (API 500, upstream outage) or rate limiting (429) - if ( - /\b50[0-9]\b/.test(resultText) || - /server_error|internal_error|overloaded/.test(resultText) || - /\b429\b/.test(resultText) || - /rate.limit/i.test(resultText) - ) { + // Detect rate limiting (429) — check before 5xx so it gets distinct messaging + if (/\b429\b/.test(resultText) || /rate.limit/i.test(resultText)) { + return `${RATE_LIMITED_PREFIX}${resultText}`; + } + + // Detect service unavailability (API 500, upstream outage) + if (/\b50[0-9]\b/.test(resultText) || /server_error|internal_error|overloaded/.test(resultText)) { return `${SERVICE_UNAVAILABLE_PREFIX}${resultText}`; } return resultText || 'Agent execution failed'; diff --git a/src/lib/agent-runner.ts b/src/lib/agent-runner.ts index 54d9f0be..41fef21e 100644 --- a/src/lib/agent-runner.ts +++ b/src/lib/agent-runner.ts @@ -139,7 +139,9 @@ export async function runAgentInstaller(config: FrameworkConfig, options: Instal if (agentResult.error) { await analytics.shutdown('error'); const message = agentResult.errorMessage || agentResult.error; - throw new Error(`Agent SDK error: ${message}`); + // Pass user-friendly messages through without wrapping them in + // "Agent SDK error:" — that prefix obscures the actionable text. + throw new Error(message); } // Track retry metrics