Skip to content

Commit 82afffa

Browse files
Merge pull request #83 from askui/feat/introduce-claude-4
feat!: introduce Claude 4 Sonnet
2 parents 5657462 + 70b26c2 commit 82afffa

File tree

20 files changed

+265
-85
lines changed

20 files changed

+265
-85
lines changed

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,8 @@ with VisionAgent() as agent:
128128

129129
agent.click("search field")
130130

131-
# Use Anthropic (Claude 3.5 Sonnet V2) as model
132-
agent.click("search field", model="anthropic-claude-3-5-sonnet-20241022")
131+
# Use Anthropic (Claude 4 Sonnet) as model
132+
agent.click("search field", model="claude-sonnet-4-20250514")
133133
```
134134

135135

@@ -201,7 +201,7 @@ with VisionAgent(model="askui-combo") as agent:
201201

202202
# Use different models for different tasks
203203
with VisionAgent(model={
204-
"act": "anthropic-claude-3-5-sonnet-20241022", # Use Claude for act()
204+
"act": "claude-sonnet-4-20250514", # Use Claude for act()
205205
"get": "askui", # Use AskUI for get()
206206
"locate": "askui-combo", # Use AskUI combo for locate() (and click(), mouse_move())
207207
}) as agent:
@@ -240,7 +240,7 @@ Supported commands are: `act()`, `click()`, `get()`, `locate()`, `mouse_move()`
240240
Supported commands are: `act()`, `get()`, `click()`, `locate()`, `mouse_move()`
241241
| Model Name | Info | Execution Speed | Security | Cost | Reliability |
242242
|-------------|--------------------|--------------|--------------|--------------|--------------|
243-
| `anthropic-claude-3-5-sonnet-20241022` | The [Computer Use](https://docs.anthropic.com/en/docs/agents-and-tools/computer-use) model from Antrophic is a Large Action Model (LAM), which can autonomously achieve goals. e.g. `"Book me a flight from Berlin to Rom"` | slow, >1s per step | Model hosting by Anthropic | High, up to 1,5$ per act | Not recommended for production usage |
243+
| `claude-sonnet-4-20250514` | The [Computer Use](https://docs.anthropic.com/en/docs/agents-and-tools/computer-use) model from Antrophic is a Large Action Model (LAM), which can autonomously achieve goals. e.g. `"Book me a flight from Berlin to Rom"` | slow, >1s per step | Model hosting by Anthropic | High, up to 1,5$ per act | Not recommended for production usage |
244244
> **Note:** Configure your Antrophic Model Provider [here](#3a-authenticate-with-an-ai-model-provider)
245245
246246

@@ -409,7 +409,7 @@ custom_models: ModelRegistry = {
409409
"dynamic-model": lambda: create_custom_model("your-api-key"),
410410
"dynamic-model-cached": lambda: create_custom_model_cached("your-api-key"),
411411
"askui": lambda: create_custom_model_cached("your-api-key"), # overrides default model
412-
"anthropic-claude-3-5-sonnet-20241022": lambda: create_custom_model_cached("your-api-key"), # overrides model
412+
"claude-sonnet-4-20250514": lambda: create_custom_model_cached("your-api-key"), # overrides model
413413
}
414414

415415

@@ -735,7 +735,7 @@ with VisionAgent() as agent:
735735

736736
**AskUI Vision Agent** is a versatile AI powered framework that enables you to automate computer tasks in Python.
737737

738-
It connects Agent OS with powerful computer use models like Anthropic's Claude Sonnet 3.5 v2 and the AskUI Prompt-to-Action series. It is your entry point for building complex automation scenarios with detailed instructions or let the agent explore new challenges on its own.
738+
It connects Agent OS with powerful computer use models like Anthropic's Claude Sonnet 4 and the AskUI Prompt-to-Action series. It is your entry point for building complex automation scenarios with detailed instructions or let the agent explore new challenges on its own.
739739

740740

741741
![image](docs/assets/Architecture.svg)

src/askui/agent.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from askui.models.shared.computer_agent_cb_param import OnMessageCb
1212
from askui.models.shared.computer_agent_message_param import MessageParam
1313
from askui.models.shared.tools import ToolCollection
14-
from askui.tools.computer import Computer20241022Tool
14+
from askui.tools.computer import Computer20241022Tool, Computer20250124Tool
1515
from askui.tools.exception_tool import ExceptionTool
1616
from askui.utils.image_utils import ImageSource, Img
1717

@@ -81,10 +81,13 @@ def __init__(
8181
reporter=self._reporter,
8282
),
8383
)
84+
self._tool_collection = ToolCollection(
85+
tools=[
86+
ExceptionTool(),
87+
]
88+
)
8489
_models = initialize_default_model_registry(
85-
tool_collection=ToolCollection(
86-
tools=[Computer20241022Tool(self.tools.os), ExceptionTool()]
87-
),
90+
tool_collection=self._tool_collection,
8891
reporter=self._reporter,
8992
)
9093
_models.update(models or {})
@@ -628,6 +631,11 @@ def act(
628631
Returns:
629632
None
630633
634+
Raises:
635+
MaxTokensExceededError: If the model reaches the maximum token limit
636+
defined in the agent settings.
637+
ModelRefusalError: If the model refuses to process the request.
638+
631639
Example:
632640
```python
633641
from askui import VisionAgent
@@ -650,7 +658,19 @@ def act(
650658
messages: list[MessageParam] = (
651659
[MessageParam(role="user", content=goal)] if isinstance(goal, str) else goal
652660
)
653-
self._model_router.act(messages, model or self._model_choice["act"], on_message)
661+
_model = model or self._model_choice["act"]
662+
self._update_tool_collection(_model)
663+
self._model_router.act(messages, _model, on_message)
664+
665+
def _update_tool_collection(self, model: str) -> None:
666+
if model == ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022:
667+
self._tool_collection.append_tool(
668+
Computer20241022Tool(agent_os=self.tools.os)
669+
)
670+
if model == ModelName.CLAUDE__SONNET__4__20250514 or model == ModelName.ASKUI:
671+
self._tool_collection.append_tool(
672+
Computer20250124Tool(agent_os=self.tools.os)
673+
)
654674

655675
@telemetry.record_call()
656676
@validate_call

src/askui/models/anthropic/computer_agent.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,22 @@
11
from typing import TYPE_CHECKING, cast
22

3-
from anthropic import Anthropic
3+
from anthropic import NOT_GIVEN, Anthropic, NotGiven
4+
from anthropic.types import AnthropicBetaParam
45
from typing_extensions import override
56

67
from askui.models.anthropic.settings import ClaudeComputerAgentSettings
78
from askui.models.models import ANTHROPIC_MODEL_NAME_MAPPING, ModelName
8-
from askui.models.shared.computer_agent import ComputerAgent
9+
from askui.models.shared.computer_agent import (
10+
COMPUTER_USE_20241022_BETA_FLAG,
11+
COMPUTER_USE_20250124_BETA_FLAG,
12+
ComputerAgent,
13+
)
914
from askui.models.shared.computer_agent_message_param import MessageParam
1015
from askui.models.shared.tools import ToolCollection
1116
from askui.reporting import Reporter
1217

1318
if TYPE_CHECKING:
14-
from anthropic.types.beta import BetaMessageParam
19+
from anthropic.types.beta import BetaMessageParam, BetaThinkingConfigParam
1520

1621

1722
class ClaudeComputerAgent(ComputerAgent[ClaudeComputerAgentSettings]):
@@ -26,20 +31,31 @@ def __init__(
2631
api_key=self._settings.anthropic.api_key.get_secret_value()
2732
)
2833

34+
def _get_betas(self, model_choice: str) -> list[AnthropicBetaParam] | NotGiven:
35+
if model_choice == ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022:
36+
return self._settings.betas + [COMPUTER_USE_20241022_BETA_FLAG]
37+
if model_choice == ModelName.CLAUDE__SONNET__4__20250514:
38+
return self._settings.betas + [COMPUTER_USE_20250124_BETA_FLAG]
39+
return NOT_GIVEN
40+
2941
@override
3042
def _create_message(
3143
self, messages: list[MessageParam], model_choice: str
3244
) -> MessageParam:
3345
response = self._client.beta.messages.with_raw_response.create(
3446
max_tokens=self._settings.max_tokens,
3547
messages=[
36-
cast("BetaMessageParam", message.model_dump(mode="json"))
48+
cast("BetaMessageParam", message.model_dump(exclude={"stop_reason"}))
3749
for message in messages
3850
],
3951
model=ANTHROPIC_MODEL_NAME_MAPPING[ModelName(model_choice)],
4052
system=[self._system],
4153
tools=self._tool_collection.to_params(),
42-
betas=self._settings.betas,
54+
betas=self._get_betas(model_choice),
55+
thinking=cast(
56+
"BetaThinkingConfigParam", self._settings.thinking.model_dump()
57+
),
58+
tool_choice=self._settings.tool_choice,
4359
)
4460
parsed_response = response.parse()
4561
return MessageParam.model_validate(parsed_response.model_dump())

src/askui/models/anthropic/settings.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
from pydantic import BaseModel, Field, SecretStr
1+
from pydantic import Field, SecretStr
22
from pydantic_settings import BaseSettings
33

44
from askui.models.shared.computer_agent import ComputerAgentSettingsBase
55
from askui.models.shared.settings import ChatCompletionsCreateSettings
66

7-
COMPUTER_USE_BETA_FLAG = "computer-use-2024-10-22"
8-
97

108
class AnthropicSettings(BaseSettings):
119
api_key: SecretStr = Field(
@@ -15,7 +13,7 @@ class AnthropicSettings(BaseSettings):
1513
)
1614

1715

18-
class ClaudeSettingsBase(BaseModel):
16+
class ClaudeSettingsBase(BaseSettings):
1917
anthropic: AnthropicSettings = Field(default_factory=lambda: AnthropicSettings())
2018

2119

src/askui/models/askui/computer_agent.py

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,34 @@
11
import httpx
2+
from anthropic.types.beta import (
3+
BetaTextBlockParam,
4+
BetaToolChoiceParam,
5+
BetaToolUnionParam,
6+
)
7+
from pydantic import BaseModel, ConfigDict
28
from tenacity import retry, retry_if_exception, stop_after_attempt, wait_exponential
39
from typing_extensions import override
410

511
from askui.models.askui.settings import AskUiComputerAgentSettings
6-
from askui.models.shared.computer_agent import ComputerAgent
12+
from askui.models.shared.computer_agent import ComputerAgent, ThinkingConfigParam
713
from askui.models.shared.computer_agent_message_param import MessageParam
814
from askui.models.shared.tools import ToolCollection
915
from askui.reporting import Reporter
1016

1117
from ...logger import logger
1218

1319

20+
class RequestBody(BaseModel):
21+
model_config = ConfigDict(arbitrary_types_allowed=True)
22+
max_tokens: int
23+
messages: list[MessageParam]
24+
model: str
25+
tools: list[BetaToolUnionParam]
26+
betas: list[str]
27+
system: list[BetaTextBlockParam]
28+
thinking: ThinkingConfigParam
29+
tool_choice: BetaToolChoiceParam
30+
31+
1432
def is_retryable_error(exception: BaseException) -> bool:
1533
"""Check if the exception is a retryable error (status codes 429 or 529)."""
1634
if isinstance(exception, httpx.HTTPStatusError):
@@ -47,21 +65,31 @@ def _create_message(
4765
model_choice: str, # noqa: ARG002
4866
) -> MessageParam:
4967
try:
50-
request_body = {
51-
"max_tokens": self._settings.max_tokens,
52-
"messages": [msg.model_dump(mode="json") for msg in messages],
53-
"model": self._settings.model,
54-
"tools": self._tool_collection.to_params(),
55-
"betas": self._settings.betas,
56-
"system": [self._system],
57-
}
68+
request_body = RequestBody(
69+
max_tokens=self._settings.max_tokens,
70+
messages=messages,
71+
model=self._settings.model,
72+
tools=self._tool_collection.to_params(),
73+
betas=self._settings.betas,
74+
system=[self._system],
75+
tool_choice=self._settings.tool_choice,
76+
thinking=self._settings.thinking,
77+
)
5878
response = self._client.post(
59-
"/act/inference", json=request_body, timeout=300.0
79+
"/act/inference",
80+
json=request_body.model_dump(
81+
mode="json", exclude={"messages": {"stop_reason"}}
82+
),
83+
timeout=300.0,
6084
)
6185
response.raise_for_status()
62-
response_data = response.json()
63-
return MessageParam.model_validate(response_data)
86+
return MessageParam.model_validate_json(response.text)
6487
except Exception as e: # noqa: BLE001
6588
if is_retryable_error(e):
6689
logger.debug(e)
90+
if (
91+
isinstance(e, httpx.HTTPStatusError)
92+
and 400 <= e.response.status_code < 500
93+
):
94+
raise ValueError(e.response.json()) from e
6795
raise

src/askui/models/askui/settings.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,12 @@
66

77
from askui.models.models import ModelName
88
from askui.models.shared.base_agent import AgentSettingsBase
9-
from askui.models.shared.computer_agent import ComputerAgentSettingsBase
9+
from askui.models.shared.computer_agent import (
10+
COMPUTER_USE_20250124_BETA_FLAG,
11+
ComputerAgentSettingsBase,
12+
ThinkingConfigEnabledParam,
13+
ThinkingConfigParam,
14+
)
1015

1116

1217
class AskUiSettings(BaseSettings):
@@ -40,12 +45,16 @@ def base_url(self) -> str:
4045

4146

4247
class AskUiComputerAgentSettings(ComputerAgentSettingsBase):
43-
model: str = ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022
48+
model: str = ModelName.CLAUDE__SONNET__4__20250514
4449
askui: AskUiSettings = Field(default_factory=AskUiSettings)
50+
betas: list[str] = Field(default_factory=lambda: [COMPUTER_USE_20250124_BETA_FLAG])
51+
thinking: ThinkingConfigParam = Field(
52+
default_factory=lambda: ThinkingConfigEnabledParam(budget_tokens=2048)
53+
)
4554

4655

4756
class AskUiAndroidAgentSettings(AgentSettingsBase):
4857
"""Settings for AskUI Android agent."""
4958

50-
model: str = ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022
59+
model: str = ModelName.CLAUDE__SONNET__4__20250514
5160
askui: AskUiSettings = Field(default_factory=AskUiSettings)

src/askui/models/exceptions.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,36 @@ def __init__(
9999
message=f'Model "{model_choice}" is an instance of {actual_type.mro()}, '
100100
f"expected it to be an instance of {expected_type.mro()}",
101101
)
102+
103+
104+
class MaxTokensExceededError(AutomationError):
105+
"""Exception raised when the model stops due to reaching the maximum token limit.
106+
107+
Args:
108+
max_tokens (int): The maximum token limit that was exceeded.
109+
message (str, optional): Custom error message. If not provided, a default
110+
message will be generated.
111+
"""
112+
113+
def __init__(self, max_tokens: int, message: str | None = None):
114+
self.max_tokens = max_tokens
115+
error_msg = (
116+
f"Model stopped due to reaching maximum token limit of {max_tokens} tokens"
117+
if message is None
118+
else message
119+
)
120+
super().__init__(error_msg)
121+
122+
123+
class ModelRefusalError(AutomationError):
124+
"""Exception raised when the model refuses to process the request.
125+
126+
Args:
127+
message (str, optional): Custom error message. If not provided, a default
128+
message will be generated.
129+
"""
130+
131+
def __init__(self, message: str | None = None):
132+
super().__init__(
133+
"Model refused to process the request" if message is None else message
134+
)

src/askui/models/model_router.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,7 @@
55

66
from askui.locators.locators import Locator
77
from askui.locators.serializers import AskUiLocatorSerializer, VlmLocatorSerializer
8-
from askui.models.anthropic.settings import (
9-
AnthropicSettings,
10-
ClaudeComputerAgentSettings,
11-
ClaudeSettings,
12-
)
8+
from askui.models.anthropic.settings import ClaudeComputerAgentSettings, ClaudeSettings
139
from askui.models.askui.ai_element_utils import AiElementCollection
1410
from askui.models.askui.android_agent import AskUiAndroidAgent
1511
from askui.models.askui.computer_agent import AskUiComputerAgent
@@ -75,18 +71,13 @@ def vlm_locator_serializer() -> VlmLocatorSerializer:
7571

7672
@functools.cache
7773
def anthropic_facade() -> ModelFacade:
78-
settings = AnthropicSettings()
7974
computer_agent = ClaudeComputerAgent(
8075
tool_collection=tool_collection,
8176
reporter=reporter,
82-
settings=ClaudeComputerAgentSettings(
83-
anthropic=settings,
84-
),
77+
settings=ClaudeComputerAgentSettings(),
8578
)
8679
handler = ClaudeHandler(
87-
settings=ClaudeSettings(
88-
anthropic=settings,
89-
),
80+
settings=ClaudeSettings(),
9081
locator_serializer=vlm_locator_serializer(),
9182
)
9283
return ModelFacade(
@@ -117,12 +108,13 @@ def hf_spaces_handler() -> HFSpacesHandler:
117108
)
118109

119110
return {
111+
ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022: anthropic_facade,
120112
ModelName.ASKUI: askui_facade,
121113
ModelName.ASKUI__AI_ELEMENT: askui_model_router,
122114
ModelName.ASKUI__COMBO: askui_model_router,
123115
ModelName.ASKUI__OCR: askui_model_router,
124116
ModelName.ASKUI__PTA: askui_model_router,
125-
ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022: anthropic_facade,
117+
ModelName.CLAUDE__SONNET__4__20250514: anthropic_facade,
126118
ModelName.HF__SPACES__ASKUI__PTA_1: hf_spaces_handler,
127119
ModelName.HF__SPACES__QWEN__QWEN2_VL_2B_INSTRUCT: hf_spaces_handler,
128120
ModelName.HF__SPACES__QWEN__QWEN2_VL_7B_INSTRUCT: hf_spaces_handler,

0 commit comments

Comments
 (0)