Skip to content

Commit b41efe3

Browse files
committed
feat/web-support: add web agent
1 parent 5cc7b7d commit b41efe3

File tree

12 files changed

+1090
-263
lines changed

12 files changed

+1090
-263
lines changed

pdm.lock

Lines changed: 89 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ dependencies = [
2424
"httpx>=0.28.1",
2525
"fastmcp>=2.3.4",
2626
"pure-python-adb>=0.3.0.dev0",
27+
"playwright>=1.0.0",
2728
]
2829
requires-python = ">=3.10"
2930
readme = "README.md"
@@ -85,6 +86,7 @@ test = [
8586
"types-pyperclip>=1.8.2.20240311",
8687
"pytest-timeout>=2.4.0",
8788
"types-pynput>=1.8.1.20250318",
89+
"playwright>=1.41.0",
8890
]
8991

9092

@@ -195,6 +197,7 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
195197
[tool.ruff.lint.per-file-ignores]
196198
"src/askui/agent.py" = ["E501"]
197199
"src/askui/android_agent.py" = ["E501"]
200+
"src/askui/web_agent.py" = ["E501"]
198201
"src/askui/models/shared/android_agent.py" = ["E501"]
199202
"src/askui/chat/*" = ["E501", "F401", "F403"]
200203
"src/askui/tools/askui/askui_workspaces/*" = ["ALL"]

src/askui/agent.py

Lines changed: 6 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -28,136 +28,12 @@
2828
from .tools import AgentToolbox, ModifierKey, PcKey
2929
from .tools.askui import AskUiControllerClient
3030

31-
_PC_KEY = [
32-
"backspace",
33-
"delete",
34-
"enter",
35-
"tab",
36-
"escape",
37-
"up",
38-
"down",
39-
"right",
40-
"left",
41-
"home",
42-
"end",
43-
"pageup",
44-
"pagedown",
45-
"f1",
46-
"f2",
47-
"f3",
48-
"f4",
49-
"f5",
50-
"f6",
51-
"f7",
52-
"f8",
53-
"f9",
54-
"f10",
55-
"f11",
56-
"f12",
57-
"space",
58-
"0",
59-
"1",
60-
"2",
61-
"3",
62-
"4",
63-
"5",
64-
"6",
65-
"7",
66-
"8",
67-
"9",
68-
"a",
69-
"b",
70-
"c",
71-
"d",
72-
"e",
73-
"f",
74-
"g",
75-
"h",
76-
"i",
77-
"j",
78-
"k",
79-
"l",
80-
"m",
81-
"n",
82-
"o",
83-
"p",
84-
"q",
85-
"r",
86-
"s",
87-
"t",
88-
"u",
89-
"v",
90-
"w",
91-
"x",
92-
"y",
93-
"z",
94-
"A",
95-
"B",
96-
"C",
97-
"D",
98-
"E",
99-
"F",
100-
"G",
101-
"H",
102-
"I",
103-
"J",
104-
"K",
105-
"L",
106-
"M",
107-
"N",
108-
"O",
109-
"P",
110-
"Q",
111-
"R",
112-
"S",
113-
"T",
114-
"U",
115-
"V",
116-
"W",
117-
"X",
118-
"Y",
119-
"Z",
120-
"!",
121-
'"',
122-
"#",
123-
"$",
124-
"%",
125-
"&",
126-
"'",
127-
"(",
128-
")",
129-
"*",
130-
"+",
131-
",",
132-
"-",
133-
".",
134-
"/",
135-
":",
136-
";",
137-
"<",
138-
"=",
139-
">",
140-
"?",
141-
"@",
142-
"[",
143-
"\\",
144-
"]",
145-
"^",
146-
"_",
147-
"`",
148-
"{",
149-
"|",
150-
"}",
151-
"~",
152-
]
153-
15431
_SYSTEM_PROMPT = f"""<SYSTEM_CAPABILITY>
15532
* You are utilising a {sys.platform} machine using {platform.machine()} architecture with internet access.
15633
* When asked to perform web tasks try to open the browser (firefox, chrome, safari, ...) if not already open. Often you can find the browser icons in the toolbars of the operating systems.
15734
* When viewing a page it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
158-
* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
159-
* Valid keyboard keys available are {", ".join(_PC_KEY)}
160-
* The current date is {datetime.now(timezone.utc).strftime("%A, %B %d, %Y").replace(" 0", " ")}.
35+
* When using your function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
36+
* The current date and time is {datetime.now(timezone.utc).strftime("%A, %B %d, %Y %H:%M:%S %z")}.
16137
</SYSTEM_CAPABILITY>
16238
16339
<IMPORTANT>
@@ -211,7 +87,7 @@ class VisionAgent(AgentBase):
21187
```
21288
"""
21389

214-
@telemetry.record_call(exclude={"model_router", "reporters", "tools"})
90+
@telemetry.record_call(exclude={"model_router", "reporters", "tools", "act_tools"})
21591
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
21692
def __init__(
21793
self,
@@ -222,6 +98,7 @@ def __init__(
22298
model: ModelChoice | ModelComposition | str | None = None,
22399
retry: Retry | None = None,
224100
models: ModelRegistry | None = None,
101+
act_tools: list[Tool] | None = None,
225102
) -> None:
226103
reporter = CompositeReporter(reporters=reporters)
227104
self.tools = tools or AgentToolbox(
@@ -238,7 +115,8 @@ def __init__(
238115
models=models,
239116
tools=[
240117
ExceptionTool(),
241-
],
118+
]
119+
+ (act_tools or []),
242120
agent_os=self.tools.os,
243121
)
244122

src/askui/tools/agent_os.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,15 @@
44
from PIL import Image
55
from pydantic import BaseModel
66

7-
ModifierKey = Literal["command", "alt", "control", "shift", "right_shift"]
7+
ModifierKey = Literal[
8+
"command",
9+
"alt",
10+
"control",
11+
"shift",
12+
"right_shift",
13+
]
814
"""Modifier keys for keyboard actions."""
915

10-
ModifierKeys: list[ModifierKey] = ["command", "alt", "control", "shift", "right_shift"]
11-
1216
PcKey = Literal[
1317
"backspace",
1418
"delete",
@@ -308,7 +312,6 @@ def keyboard_tap(
308312
"""
309313
raise NotImplementedError
310314

311-
@abstractmethod
312315
def set_display(self, display: int = 1) -> None:
313316
"""
314317
Sets the active display for screen interactions.
@@ -319,7 +322,6 @@ def set_display(self, display: int = 1) -> None:
319322
"""
320323
raise NotImplementedError
321324

322-
@abstractmethod
323325
def run_command(self, command: str, timeout_ms: int = 30000) -> None:
324326
"""
325327
Executes a shell command.

0 commit comments

Comments
 (0)