From 01b17eb4f3e42fc08986235a5e787dd00a790ab0 Mon Sep 17 00:00:00 2001
From: elliot <elliothu.my@gmail.com>
Date: Thu, 16 Apr 2026 19:37:40 +0800
Subject: [PATCH 1/2] Allow context window env override for all builds

---
 src/utils/context.ts | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/utils/context.ts b/src/utils/context.ts
index d9714de9..06b235ef 100644
--- a/src/utils/context.ts
+++ b/src/utils/context.ts
@@ -52,14 +52,11 @@ export function getContextWindowForModel(
   model: string,
   betas?: string[],
 ): number {
-  // Allow override via environment variable (ant-only)
+  // Allow override via environment variable.
   // This takes precedence over all other context window resolution, including 1M detection,
   // so users can cap the effective context window for local decisions (auto-compact, etc.)
   // while still using a 1M-capable endpoint.
-  if (
-    process.env.USER_TYPE === 'ant' &&
-    process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS
-  ) {
+  if (process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS) {
     const override = parseInt(process.env.CLAUDE_CODE_MAX_CONTEXT_TOKENS, 10)
     if (!isNaN(override) && override > 0) {
       return override

From 6531db992d7758ec81cc61d1598393ad99e0ccce Mon Sep 17 00:00:00 2001
From: elliot <elliothu.my@gmail.com>
Date: Thu, 16 Apr 2026 21:25:33 +0800
Subject: [PATCH 2/2] refactor: align error handling for context window limit

Updated the error message structure to maintain consistency with the max_output_tokens error shape, ensuring existing clients experience the same user experience. Added detailed overflow information for improved recovery routing.
---
 src/services/api/claude.ts | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/services/api/claude.ts b/src/services/api/claude.ts
index 89a6e661..99db20e8 100644
--- a/src/services/api/claude.ts
+++ b/src/services/api/claude.ts
@@ -2281,13 +2281,15 @@ async function* queryModel(
                 max_tokens: maxOutputTokens,
                 output_tokens: usage.output_tokens,
               })
-              // Reuse the max_output_tokens recovery path — from the model's
-              // perspective, both mean "response was cut off, continue from
-              // where you left off."
+              // Keep the public error shape aligned with max_output_tokens so
+              // existing clients keep the same UX, and stash the precise
+              // overflow kind in errorDetails for the query loop's recovery
+              // routing.
               yield createAssistantAPIErrorMessage({
                 content: `${API_ERROR_MESSAGE_PREFIX}: The model has reached its context window limit.`,
                 apiError: 'max_output_tokens',
                 error: 'max_output_tokens',
+                errorDetails: 'context_window_exceeded',
               })
             }
             break