From 4a186f9b34c2b040882155cb955c40f1dd0a7e71 Mon Sep 17 00:00:00 2001 From: Peter Dave Hello Date: Wed, 3 Jun 2026 03:22:47 +0800 Subject: [PATCH] feat: add Anthropic's Claude Opus 4.8 support Register Claude Opus 4.8 model IDs with the 1M context window across Anthropic, Vertex AI, and supported Bedrock aliases. Preserve temperature forwarding for the new aliases while keeping Opus 4.8 outside Claude extended thinking handling. Add focused coverage for max-token lookup, request parameters, and existing seed validation. --- pr_agent/algo/__init__.py | 9 +++ tests/unittest/test_get_max_tokens.py | 26 ++++++++ .../test_litellm_chat_completion_core.py | 65 +++++++++++++++++++ 3 files changed, 100 insertions(+) diff --git a/pr_agent/algo/__init__.py b/pr_agent/algo/__init__.py index b55b58a77a..268ad65915 100644 --- a/pr_agent/algo/__init__.py +++ b/pr_agent/algo/__init__.py @@ -84,6 +84,7 @@ 'vertex_ai/claude-opus-4-6@20260120': 200000, 'vertex_ai/claude-opus-4-6': 200000, 'vertex_ai/claude-opus-4-7': 1000000, + 'vertex_ai/claude-opus-4-8': 1000000, 'vertex_ai/claude-3-5-sonnet@20240620': 100000, 'vertex_ai/claude-3-5-sonnet-v2@20241022': 100000, 'vertex_ai/claude-3-7-sonnet@20250219': 200000, @@ -131,6 +132,7 @@ 'anthropic/claude-opus-4-6': 200000, 'anthropic/claude-opus-4-6-20260120': 200000, 'anthropic/claude-opus-4-7': 1000000, + 'anthropic/claude-opus-4-8': 1000000, 'anthropic/claude-3-5-sonnet-20240620': 100000, 'anthropic/claude-3-5-sonnet-20241022': 100000, 'anthropic/claude-3-7-sonnet-20250219': 200000, @@ -142,6 +144,7 @@ 'claude-opus-4-6': 200000, 'claude-opus-4-6-20260120': 200000, 'claude-opus-4-7': 1000000, + 'claude-opus-4-8': 1000000, 'claude-3-7-sonnet-20250219': 200000, 'claude-sonnet-4-6': 200000, 'anthropic/claude-3-5-haiku-20241022': 100000, @@ -156,6 +159,7 @@ 'bedrock/anthropic.claude-opus-4-6-20260120-v1:0': 200000, 'bedrock/anthropic.claude-opus-4-6-v1:0': 200000, 'bedrock/anthropic.claude-opus-4-7': 1000000, + 'bedrock/anthropic.claude-opus-4-8': 1000000, 'bedrock/anthropic.claude-3-haiku-20240307-v1:0': 100000, 'bedrock/anthropic.claude-3-5-haiku-20241022-v1:0': 100000, 'bedrock/anthropic.claude-haiku-4-5-20251001-v1:0': 200000, @@ -184,6 +188,11 @@ "bedrock/us.anthropic.claude-opus-4-6-v1:0": 200000, "bedrock/global.anthropic.claude-opus-4-7": 1000000, "bedrock/us.anthropic.claude-opus-4-7": 1000000, + "bedrock/global.anthropic.claude-opus-4-8": 1000000, + "bedrock/us.anthropic.claude-opus-4-8": 1000000, + "bedrock/eu.anthropic.claude-opus-4-8": 1000000, + "bedrock/au.anthropic.claude-opus-4-8": 1000000, + "bedrock/jp.anthropic.claude-opus-4-8": 1000000, "bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0": 100000, "bedrock/us.anthropic.claude-haiku-4-5-20251001-v1:0": 200000, "bedrock/eu.anthropic.claude-haiku-4-5-20251001-v1:0": 200000, diff --git a/tests/unittest/test_get_max_tokens.py b/tests/unittest/test_get_max_tokens.py index a7a4d898bb..64a21ae268 100644 --- a/tests/unittest/test_get_max_tokens.py +++ b/tests/unittest/test_get_max_tokens.py @@ -159,6 +159,32 @@ def test_gemini_3_and_3_1_pro_preview(self, monkeypatch, model): monkeypatch.setattr(utils, "get_settings", lambda: fake_settings) assert get_max_tokens(model) == 1048576 + @pytest.mark.parametrize( + "model", + [ + "anthropic/claude-opus-4-8", + "claude-opus-4-8", + "vertex_ai/claude-opus-4-8", + "bedrock/anthropic.claude-opus-4-8", + "bedrock/global.anthropic.claude-opus-4-8", + "bedrock/us.anthropic.claude-opus-4-8", + "bedrock/eu.anthropic.claude-opus-4-8", + "bedrock/au.anthropic.claude-opus-4-8", + "bedrock/jp.anthropic.claude-opus-4-8", + ], + ) + def test_claude_opus_4_8_model_max_tokens(self, monkeypatch, model): + fake_settings = type("", (), { + "config": type("", (), { + "custom_model_max_tokens": 0, + "max_model_tokens": 0 + })() + })() + + monkeypatch.setattr(utils, "get_settings", lambda: fake_settings) + + assert get_max_tokens(model) == 1000000 + @pytest.mark.parametrize( "model", [ diff --git a/tests/unittest/test_litellm_chat_completion_core.py b/tests/unittest/test_litellm_chat_completion_core.py index eb05952789..09a5154186 100644 --- a/tests/unittest/test_litellm_chat_completion_core.py +++ b/tests/unittest/test_litellm_chat_completion_core.py @@ -54,6 +54,71 @@ async def test_chat_completion_passes_seed_when_temperature_is_zero(monkeypatch) assert mock_call.call_args.kwargs["seed"] == 123 +@pytest.mark.asyncio +async def test_chat_completion_rejects_seed_for_claude_opus_4_8_default_temperature(monkeypatch): + class FakeAPIError(Exception): + pass + + monkeypatch.setattr(litellm_handler, "get_settings", lambda: FakeSettings(config_values={"seed": 123})) + monkeypatch.setattr(litellm_handler.openai, "APIError", FakeAPIError) + + with patch("pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", new_callable=AsyncMock) as mock_call: + handler = litellm_handler.LiteLLMAIHandler() + + with pytest.raises(FakeAPIError) as exc_info: + await handler.chat_completion(model="claude-opus-4-8", system="sys", user="usr") + + assert isinstance(exc_info.value.__cause__, ValueError) + assert str(exc_info.value.__cause__) == "Seed (123) is not supported with temperature (0.2) > 0" + mock_call.assert_not_called() + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "model", + [ + "anthropic/claude-opus-4-8", + "claude-opus-4-8", + "vertex_ai/claude-opus-4-8", + "bedrock/anthropic.claude-opus-4-8", + "bedrock/global.anthropic.claude-opus-4-8", + "bedrock/us.anthropic.claude-opus-4-8", + "bedrock/eu.anthropic.claude-opus-4-8", + "bedrock/au.anthropic.claude-opus-4-8", + "bedrock/jp.anthropic.claude-opus-4-8", + ], +) +async def test_chat_completion_passes_temperature_for_claude_opus_4_8(monkeypatch, model): + monkeypatch.setattr(litellm_handler, "get_settings", FakeSettings) + + with patch("pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", new_callable=AsyncMock) as mock_call: + mock_call.return_value = _mock_response() + handler = litellm_handler.LiteLLMAIHandler() + + await handler.chat_completion(model=model, system="sys", user="usr", temperature=0.2) + + assert mock_call.call_args.kwargs["temperature"] == 0.2 + + +@pytest.mark.asyncio +async def test_chat_completion_does_not_use_extended_thinking_for_claude_opus_4_8(monkeypatch): + monkeypatch.setattr( + litellm_handler, + "get_settings", + lambda: FakeSettings(config_values={"enable_claude_extended_thinking": True}), + ) + + with patch("pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", new_callable=AsyncMock) as mock_call: + mock_call.return_value = _mock_response() + handler = litellm_handler.LiteLLMAIHandler() + + await handler.chat_completion(model="claude-opus-4-8", system="sys", user="usr", temperature=0.2) + + assert "thinking" not in mock_call.call_args.kwargs + assert "max_tokens" not in mock_call.call_args.kwargs + assert mock_call.call_args.kwargs["temperature"] == 0.2 + + @pytest.mark.asyncio async def test_chat_completion_combines_prompts_for_user_message_only_models(monkeypatch): monkeypatch.setattr(litellm_handler, "get_settings", FakeSettings)