diff --git a/src/integrations/dotnet/index.ts b/src/integrations/dotnet/index.ts index bc7a744..3d4f4ee 100644 --- a/src/integrations/dotnet/index.ts +++ b/src/integrations/dotnet/index.ts @@ -144,7 +144,7 @@ Begin integration now.`; if (agentResult.error) { await analytics.shutdown('error'); const message = agentResult.errorMessage || agentResult.error; - throw new Error(`Agent SDK error: ${message}`); + throw new Error(message); } // Post-installation validation diff --git a/src/integrations/elixir/index.ts b/src/integrations/elixir/index.ts index 34c97ea..36dd17a 100644 --- a/src/integrations/elixir/index.ts +++ b/src/integrations/elixir/index.ts @@ -124,7 +124,7 @@ export async function run(options: InstallerOptions): Promise { if (agentResult.error) { await analytics.shutdown('error'); const message = agentResult.errorMessage || agentResult.error; - throw new Error(`Agent SDK error: ${message}`); + throw new Error(message); } // Build summary diff --git a/src/integrations/go/index.ts b/src/integrations/go/index.ts index 7776946..c2fdf08 100644 --- a/src/integrations/go/index.ts +++ b/src/integrations/go/index.ts @@ -213,7 +213,7 @@ Begin integration now.`; if (agentResult.error) { await analytics.shutdown('error'); const message = agentResult.errorMessage || agentResult.error; - throw new Error(`Agent SDK error: ${message}`); + throw new Error(message); } // Post-installation validation (gracefully skips — no rules file for Go) diff --git a/src/integrations/ruby/index.ts b/src/integrations/ruby/index.ts index a453ecd..d259a6e 100644 --- a/src/integrations/ruby/index.ts +++ b/src/integrations/ruby/index.ts @@ -143,7 +143,7 @@ Begin integration now.`; if (agentResult.error) { await analytics.shutdown('error'); const message = agentResult.errorMessage || agentResult.error; - throw new Error(`Agent SDK error: ${message}`); + throw new Error(message); } // Build completion summary diff --git a/src/lib/adapters/cli-adapter.ts b/src/lib/adapters/cli-adapter.ts index 9bed10f..32cb856 100644 --- a/src/lib/adapters/cli-adapter.ts +++ b/src/lib/adapters/cli-adapter.ts @@ -402,7 +402,28 @@ export class CLIAdapter implements InstallerAdapter { this.stopSpinner('Error'); this.stopAgentUpdates(); - clack.log.error(message); + // Rewrite raw API/SDK errors into user-friendly messages + const isServiceError = + /\b50[0-9]\b/.test(message) || /server_error|internal_error|overloaded|service.*unavailable/i.test(message); + const isRateLimit = /\b429\b/.test(message) || /rate.limit/i.test(message); + const isNetworkError = /ECONNREFUSED|ETIMEDOUT|ENOTFOUND|fetch failed/i.test(message); + const isProcessExit = /process exited with code/i.test(message); + + if (isServiceError) { + clack.log.error('The AI service is temporarily unavailable.'); + clack.log.info('This is usually resolved within a few minutes. Please try again shortly.'); + } else if (isRateLimit) { + clack.log.error('The AI service is currently rate-limited.'); + clack.log.info('Please wait a minute and try again.'); + } else if (isNetworkError) { + clack.log.error('Could not connect to the AI service.'); + clack.log.info('Check your internet connection and try again.'); + } else if (isProcessExit) { + clack.log.error('The AI agent process exited unexpectedly.'); + clack.log.info('Try running again. If this persists, run with --debug for details.'); + } else { + clack.log.error(message); + } // Add actionable hints for common errors if (message.includes('authentication') || message.includes('auth')) { diff --git a/src/lib/adapters/headless-adapter.ts b/src/lib/adapters/headless-adapter.ts index b56d5b9..2d414a9 100644 --- a/src/lib/adapters/headless-adapter.ts +++ b/src/lib/adapters/headless-adapter.ts @@ -336,7 +336,30 @@ export class HeadlessAdapter implements InstallerAdapter { }; private handleError = ({ message, stack }: InstallerEvents['error']): void => { - writeNDJSON({ type: 'error', code: 'installer_error', message }); + const isServiceError = + /\b50[0-9]\b/.test(message) || /server_error|internal_error|overloaded|service.*unavailable/i.test(message); + const isRateLimit = /\b429\b/.test(message) || /rate.limit/i.test(message); + const isNetworkError = /ECONNREFUSED|ETIMEDOUT|ENOTFOUND|fetch failed/i.test(message); + const isProcessExit = /process exited with code/i.test(message); + + let code = 'installer_error'; + let displayMessage = message; + + if (isServiceError) { + code = 'service_unavailable'; + displayMessage = 'The AI service is temporarily unavailable. Please try again in a few minutes.'; + } else if (isRateLimit) { + code = 'rate_limited'; + displayMessage = 'The AI service is currently rate-limited. Please wait a minute and try again.'; + } else if (isNetworkError) { + code = 'network_error'; + displayMessage = 'Could not connect to the AI service. Check your internet connection and try again.'; + } else if (isProcessExit) { + code = 'process_error'; + displayMessage = 'The AI agent process exited unexpectedly. Try running again with --debug for details.'; + } + + writeNDJSON({ type: 'error', code, message: displayMessage }); this.debugLog(stack ?? ''); }; } diff --git a/src/lib/agent-interface.spec.ts b/src/lib/agent-interface.spec.ts index 2ac314b..c4f1486 100644 --- a/src/lib/agent-interface.spec.ts +++ b/src/lib/agent-interface.spec.ts @@ -67,7 +67,7 @@ vi.mock('../utils/urls.js', () => ({ getLlmGatewayUrlFromHost: vi.fn(() => 'http://localhost:8000'), })); -import { runAgent } from './agent-interface.js'; +import { runAgent, AgentErrorType } from './agent-interface.js'; import { InstallerEventEmitter } from './events.js'; import type { InstallerOptions } from '../utils/types.js'; @@ -75,8 +75,13 @@ import type { InstallerOptions } from '../utils/types.js'; * Create a mock SDK response that consumes the prompt stream and yields * responses for each prompt message. This models the real SDK behavior: * the response generator stays alive as long as prompts keep coming. + * + * Turn options: + * - text: assistant text to yield + * - error: result subtype is 'error' with errors array + * - is_error: result has subtype 'success' but is_error: true (SDK exhausted retries) */ -function createMockSDKResponse(turns: Array<{ text?: string; error?: boolean }>) { +function createMockSDKResponse(turns: Array<{ text?: string; error?: boolean; is_error?: boolean }>) { return function mockQueryImpl({ prompt }: { prompt: AsyncIterable; options: unknown }) { let turnIndex = 0; @@ -102,6 +107,7 @@ function createMockSDKResponse(turns: Array<{ text?: string; error?: boolean }>) yield { type: 'result', subtype: turn.error ? 'error' : 'success', + is_error: turn.is_error ?? false, result: turn.text ?? '', ...(turn.error ? { errors: ['Test error'] } : {}), }; @@ -265,3 +271,96 @@ describe('runAgent retry loop', () => { expect(validateAndFormat).toHaveBeenCalledTimes(1); }); }); + +describe('service unavailability handling', () => { + let emitter: InstallerEventEmitter; + let emittedEvents: Array<{ event: string; payload: unknown }>; + + beforeEach(() => { + mockQuery.mockReset(); + emitter = new InstallerEventEmitter(); + emittedEvents = []; + + const originalEmit = emitter.emit.bind(emitter); + emitter.emit = ((event: string, payload: unknown) => { + emittedEvents.push({ event, payload }); + return originalEmit(event, payload); + }) as typeof emitter.emit; + }); + + it('detects is_error result with API 500 as SERVICE_UNAVAILABLE', async () => { + const apiErrorText = 'API Error: 500 {"error":{"type":"internal_error","message":"An unexpected error occurred"}}'; + mockQuery.mockImplementation(createMockSDKResponse([{ text: apiErrorText, is_error: true }])); + + const result = await runAgent(makeAgentConfig(), 'Test prompt', makeOptions(), undefined, emitter); + + expect(result.error).toBe(AgentErrorType.SERVICE_UNAVAILABLE); + expect(result.errorMessage).toMatch(/temporarily unavailable/); + }); + + it('detects is_error result with server_error as SERVICE_UNAVAILABLE', async () => { + mockQuery.mockImplementation(createMockSDKResponse([{ text: 'server_error: service overloaded', is_error: true }])); + + const result = await runAgent(makeAgentConfig(), 'Test prompt', makeOptions(), undefined, emitter); + + expect(result.error).toBe(AgentErrorType.SERVICE_UNAVAILABLE); + }); + + it('detects is_error result without service pattern as EXECUTION_ERROR', async () => { + mockQuery.mockImplementation(createMockSDKResponse([{ text: 'Some other failure', is_error: true }])); + + const result = await runAgent(makeAgentConfig(), 'Test prompt', makeOptions(), undefined, emitter); + + expect(result.error).toBe(AgentErrorType.EXECUTION_ERROR); + expect(result.errorMessage).toBe('Some other failure'); + }); + + it('skips validation retries when service is unavailable', async () => { + const apiErrorText = 'API Error: 500 {"error":{"type":"internal_error","message":"An unexpected error occurred"}}'; + mockQuery.mockImplementation(createMockSDKResponse([{ text: apiErrorText, is_error: true }])); + + const validateAndFormat = vi.fn().mockResolvedValue('Still broken'); + + const result = await runAgent(makeAgentConfig(), 'Test prompt', makeOptions(), undefined, emitter, { + maxRetries: 2, + validateAndFormat, + }); + + expect(result.error).toBe(AgentErrorType.SERVICE_UNAVAILABLE); + // validateAndFormat should never be called because retries are aborted + expect(validateAndFormat).not.toHaveBeenCalled(); + + // No retry events should be emitted + const retryEvents = emittedEvents.filter((e) => e.event === 'agent:retry'); + expect(retryEvents).toHaveLength(0); + }); + + it('detects 429 rate limit as distinct from service unavailability', async () => { + mockQuery.mockImplementation( + createMockSDKResponse([{ text: 'API Error: 429 rate_limit_exceeded', is_error: true }]), + ); + + const result = await runAgent(makeAgentConfig(), 'Test prompt', makeOptions(), undefined, emitter); + + expect(result.error).toBe(AgentErrorType.SERVICE_UNAVAILABLE); + expect(result.errorMessage).toMatch(/rate-limited/); + expect(result.errorMessage).not.toMatch(/temporarily unavailable/); + }); + + it('skips validation retries when rate-limited', async () => { + mockQuery.mockImplementation( + createMockSDKResponse([{ text: 'API Error: 429 rate_limit_exceeded', is_error: true }]), + ); + + const validateAndFormat = vi.fn().mockResolvedValue('Still broken'); + + const result = await runAgent(makeAgentConfig(), 'Test prompt', makeOptions(), undefined, emitter, { + maxRetries: 2, + validateAndFormat, + }); + + expect(result.error).toBe(AgentErrorType.SERVICE_UNAVAILABLE); + expect(result.errorMessage).toMatch(/rate-limited/); + expect(validateAndFormat).not.toHaveBeenCalled(); + }); +}); diff --git a/src/lib/agent-interface.ts b/src/lib/agent-interface.ts index b8108b3..59dae0f 100644 --- a/src/lib/agent-interface.ts +++ b/src/lib/agent-interface.ts @@ -56,6 +56,12 @@ export const AgentSignals = { export type AgentSignal = (typeof AgentSignals)[keyof typeof AgentSignals]; +/** Internal prefix used to tag service-unavailability errors from handleSDKMessage */ +const SERVICE_UNAVAILABLE_PREFIX = '__SERVICE_UNAVAILABLE__'; + +/** Internal prefix used to tag rate-limit errors from handleSDKMessage */ +const RATE_LIMITED_PREFIX = '__RATE_LIMITED__'; + /** * Error types that can be returned from agent execution. * These correspond to the error signals that the agent emits. @@ -67,6 +73,8 @@ export enum AgentErrorType { RESOURCE_MISSING = 'INSTALLER_RESOURCE_MISSING', /** Agent execution failed (API error, auth error, etc.) */ EXECUTION_ERROR = 'INSTALLER_EXECUTION_ERROR', + /** AI service is unavailable (API 500, outage, etc.) */ + SERVICE_UNAVAILABLE = 'INSTALLER_SERVICE_UNAVAILABLE', } export type AgentConfig = { @@ -536,6 +544,11 @@ export async function runAgent( let resolveCurrentTurn!: () => void; let currentTurnDone!: Promise; + // Set by the message loop when a fatal SDK error is detected (e.g. service + // unavailability). The prompt stream checks this before yielding retry + // prompts so we fail fast instead of burning minutes on hopeless retries. + let abortRetries = false; + function resetTurnSignal() { currentTurnDone = new Promise((resolve) => { resolveCurrentTurn = resolve; @@ -555,6 +568,12 @@ export async function runAgent( while (retryCount < maxRetries) { await currentTurnDone; + // Don't send correction prompts when the service itself is down + if (abortRetries) { + logInfo('Skipping validation retries due to service error'); + break; + } + emitter?.emit('validation:retry:start', { attempt: retryCount + 1 }); let validationPrompt: string | null; @@ -628,6 +647,8 @@ export async function runAgent( const messageError = handleSDKMessage(message, options, collectedText, emitter); if (messageError) { sdkError = messageError; + // Signal the prompt stream to stop yielding retry prompts + abortRetries = true; } if (message.type === 'result') { resolveCurrentTurn(); @@ -645,6 +666,22 @@ export async function runAgent( // Check for SDK errors first (e.g., API errors, auth failures) // Return error type + message - caller decides whether to throw or emit events if (sdkError) { + if (sdkError.startsWith(SERVICE_UNAVAILABLE_PREFIX)) { + const detail = sdkError.slice(SERVICE_UNAVAILABLE_PREFIX.length); + logError('AI service unavailable:', detail); + return { + error: AgentErrorType.SERVICE_UNAVAILABLE, + errorMessage: 'The AI service is temporarily unavailable. Please try again in a few minutes.', + }; + } + if (sdkError.startsWith(RATE_LIMITED_PREFIX)) { + const detail = sdkError.slice(RATE_LIMITED_PREFIX.length); + logError('AI service rate-limited:', detail); + return { + error: AgentErrorType.SERVICE_UNAVAILABLE, + errorMessage: 'The AI service is currently rate-limited. Please wait a minute and try again.', + }; + } logError('Agent SDK error:', sdkError); return { error: AgentErrorType.EXECUTION_ERROR, errorMessage: sdkError }; } @@ -837,6 +874,26 @@ function handleSDKMessage( } case 'result': { + // The SDK may return subtype 'success' with is_error: true when API + // retries are exhausted (e.g., persistent 500s). Check is_error first. + const isResultError = (message as Record).is_error === true; + + if (isResultError) { + const resultText = typeof message.result === 'string' ? message.result : ''; + logError('Agent result marked as error:', resultText); + + // Detect rate limiting (429) — check before 5xx so it gets distinct messaging + if (/\b429\b/.test(resultText) || /rate.limit/i.test(resultText)) { + return `${RATE_LIMITED_PREFIX}${resultText}`; + } + + // Detect service unavailability (API 500, upstream outage) + if (/\b50[0-9]\b/.test(resultText) || /server_error|internal_error|overloaded/.test(resultText)) { + return `${SERVICE_UNAVAILABLE_PREFIX}${resultText}`; + } + return resultText || 'Agent execution failed'; + } + if (message.subtype === 'success') { logInfo('Agent completed successfully'); if (typeof message.result === 'string') { diff --git a/src/lib/agent-runner.ts b/src/lib/agent-runner.ts index 54d9f0b..41fef21 100644 --- a/src/lib/agent-runner.ts +++ b/src/lib/agent-runner.ts @@ -139,7 +139,9 @@ export async function runAgentInstaller(config: FrameworkConfig, options: Instal if (agentResult.error) { await analytics.shutdown('error'); const message = agentResult.errorMessage || agentResult.error; - throw new Error(`Agent SDK error: ${message}`); + // Pass user-friendly messages through without wrapping them in + // "Agent SDK error:" — that prefix obscures the actionable text. + throw new Error(message); } // Track retry metrics