From 01b17eb4f3e42fc08986235a5e787dd00a790ab0 Mon Sep 17 00:00:00 2001 From: elliot Date: Thu, 16 Apr 2026 19:37:40 +0800 Subject: [PATCH 1/2] Allow context window env override for all builds --- src/utils/context.ts | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/utils/context.ts b/src/utils/context.ts index d9714de9..06b235ef 100644 --- a/src/utils/context.ts +++ b/src/utils/context.ts @@ -52,14 +52,11 @@ export function getContextWindowForModel( model: string, betas?: string[], ): number { - // Allow override via environment variable (ant-only) + // Allow override via environment variable. // This takes precedence over all other context window resolution, including 1M detection, // so users can cap the effective context window for local decisions (auto-compact, etc.) // while still using a 1M-capable endpoint. - if ( - process.env.USER_TYPE === 'ant' && - process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS - ) { + if (process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS) { const override = parseInt(process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS, 10) if (!isNaN(override) && override > 0) { return override From 6531db992d7758ec81cc61d1598393ad99e0ccce Mon Sep 17 00:00:00 2001 From: elliot Date: Thu, 16 Apr 2026 21:25:33 +0800 Subject: [PATCH 2/2] refactor: align error handling for context window limit Updated the error message structure to maintain consistency with the max_output_tokens error shape, ensuring existing clients experience the same user experience. Added detailed overflow information for improved recovery routing. --- src/services/api/claude.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts index 89a6e661..99db20e8 100644 --- a/src/services/api/claude.ts +++ b/src/services/api/claude.ts @@ -2281,13 +2281,15 @@ async function* queryModel( max_tokens: maxOutputTokens, output_tokens: usage.output_tokens, }) - // Reuse the max_output_tokens recovery path — from the model's - // perspective, both mean "response was cut off, continue from - // where you left off." + // Keep the public error shape aligned with max_output_tokens so + // existing clients keep the same UX, and stash the precise + // overflow kind in errorDetails for the query loop's recovery + // routing. yield createAssistantAPIErrorMessage({ content: `${API_ERROR_MESSAGE_PREFIX}: The model has reached its context window limit.`, apiError: 'max_output_tokens', error: 'max_output_tokens', + errorDetails: 'context_window_exceeded', }) } break