diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts index 89a6e661..99db20e8 100644 --- a/src/services/api/claude.ts +++ b/src/services/api/claude.ts @@ -2281,13 +2281,15 @@ async function* queryModel( max_tokens: maxOutputTokens, output_tokens: usage.output_tokens, }) - // Reuse the max_output_tokens recovery path — from the model's - // perspective, both mean "response was cut off, continue from - // where you left off." + // Keep the public error shape aligned with max_output_tokens so + // existing clients keep the same UX, and stash the precise + // overflow kind in errorDetails for the query loop's recovery + // routing. yield createAssistantAPIErrorMessage({ content: `${API_ERROR_MESSAGE_PREFIX}: The model has reached its context window limit.`, apiError: 'max_output_tokens', error: 'max_output_tokens', + errorDetails: 'context_window_exceeded', }) } break diff --git a/src/utils/context.ts b/src/utils/context.ts index d9714de9..06b235ef 100644 --- a/src/utils/context.ts +++ b/src/utils/context.ts @@ -52,14 +52,11 @@ export function getContextWindowForModel( model: string, betas?: string[], ): number { - // Allow override via environment variable (ant-only) + // Allow override via environment variable. // This takes precedence over all other context window resolution, including 1M detection, // so users can cap the effective context window for local decisions (auto-compact, etc.) // while still using a 1M-capable endpoint. - if ( - process.env.USER_TYPE === 'ant' && - process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS - ) { + if (process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS) { const override = parseInt(process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS, 10) if (!isNaN(override) && override > 0) { return override