Skip to content

Commit 99cdd7b

Browse files
committed
feat/web-support: add web agent
1 parent 5cc7b7d commit 99cdd7b

File tree

12 files changed

+1091
-263
lines changed

12 files changed

+1091
-263
lines changed

pdm.lock

Lines changed: 89 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ dependencies = [
2424
"httpx>=0.28.1",
2525
"fastmcp>=2.3.4",
2626
"pure-python-adb>=0.3.0.dev0",
27+
"playwright>=1.0.0",
2728
]
2829
requires-python = ">=3.10"
2930
readme = "README.md"
@@ -85,6 +86,7 @@ test = [
8586
"types-pyperclip>=1.8.2.20240311",
8687
"pytest-timeout>=2.4.0",
8788
"types-pynput>=1.8.1.20250318",
89+
"playwright>=1.41.0",
8890
]
8991

9092

@@ -195,6 +197,7 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
195197
[tool.ruff.lint.per-file-ignores]
196198
"src/askui/agent.py" = ["E501"]
197199
"src/askui/android_agent.py" = ["E501"]
200+
"src/askui/web_agent.py" = ["E501"]
198201
"src/askui/models/shared/android_agent.py" = ["E501"]
199202
"src/askui/chat/*" = ["E501", "F401", "F403"]
200203
"src/askui/tools/askui/askui_workspaces/*" = ["ALL"]

src/askui/agent.py

Lines changed: 7 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -28,136 +28,13 @@
2828
from .tools import AgentToolbox, ModifierKey, PcKey
2929
from .tools.askui import AskUiControllerClient
3030

31-
_PC_KEY = [
32-
"backspace",
33-
"delete",
34-
"enter",
35-
"tab",
36-
"escape",
37-
"up",
38-
"down",
39-
"right",
40-
"left",
41-
"home",
42-
"end",
43-
"pageup",
44-
"pagedown",
45-
"f1",
46-
"f2",
47-
"f3",
48-
"f4",
49-
"f5",
50-
"f6",
51-
"f7",
52-
"f8",
53-
"f9",
54-
"f10",
55-
"f11",
56-
"f12",
57-
"space",
58-
"0",
59-
"1",
60-
"2",
61-
"3",
62-
"4",
63-
"5",
64-
"6",
65-
"7",
66-
"8",
67-
"9",
68-
"a",
69-
"b",
70-
"c",
71-
"d",
72-
"e",
73-
"f",
74-
"g",
75-
"h",
76-
"i",
77-
"j",
78-
"k",
79-
"l",
80-
"m",
81-
"n",
82-
"o",
83-
"p",
84-
"q",
85-
"r",
86-
"s",
87-
"t",
88-
"u",
89-
"v",
90-
"w",
91-
"x",
92-
"y",
93-
"z",
94-
"A",
95-
"B",
96-
"C",
97-
"D",
98-
"E",
99-
"F",
100-
"G",
101-
"H",
102-
"I",
103-
"J",
104-
"K",
105-
"L",
106-
"M",
107-
"N",
108-
"O",
109-
"P",
110-
"Q",
111-
"R",
112-
"S",
113-
"T",
114-
"U",
115-
"V",
116-
"W",
117-
"X",
118-
"Y",
119-
"Z",
120-
"!",
121-
'"',
122-
"#",
123-
"$",
124-
"%",
125-
"&",
126-
"'",
127-
"(",
128-
")",
129-
"*",
130-
"+",
131-
",",
132-
"-",
133-
".",
134-
"/",
135-
":",
136-
";",
137-
"<",
138-
"=",
139-
">",
140-
"?",
141-
"@",
142-
"[",
143-
"\\",
144-
"]",
145-
"^",
146-
"_",
147-
"`",
148-
"{",
149-
"|",
150-
"}",
151-
"~",
152-
]
153-
15431
_SYSTEM_PROMPT = f"""<SYSTEM_CAPABILITY>
15532
* You are utilising a {sys.platform} machine using {platform.machine()} architecture with internet access.
15633
* When asked to perform web tasks try to open the browser (firefox, chrome, safari, ...) if not already open. Often you can find the browser icons in the toolbars of the operating systems.
15734
* When viewing a page it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
158-
* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
159-
* Valid keyboard keys available are {", ".join(_PC_KEY)}
160-
* The current date is {datetime.now(timezone.utc).strftime("%A, %B %d, %Y").replace(" 0", " ")}.
35+
* When using your function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
36+
* The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
37+
* The current date and time is {datetime.now(timezone.utc).strftime("%A, %B %d, %Y %H:%M:%S %z")}.
16138
</SYSTEM_CAPABILITY>
16239
16340
<IMPORTANT>
@@ -211,7 +88,7 @@ class VisionAgent(AgentBase):
21188
```
21289
"""
21390

214-
@telemetry.record_call(exclude={"model_router", "reporters", "tools"})
91+
@telemetry.record_call(exclude={"model_router", "reporters", "tools", "act_tools"})
21592
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
21693
def __init__(
21794
self,
@@ -222,6 +99,7 @@ def __init__(
22299
model: ModelChoice | ModelComposition | str | None = None,
223100
retry: Retry | None = None,
224101
models: ModelRegistry | None = None,
102+
act_tools: list[Tool] | None = None,
225103
) -> None:
226104
reporter = CompositeReporter(reporters=reporters)
227105
self.tools = tools or AgentToolbox(
@@ -238,7 +116,8 @@ def __init__(
238116
models=models,
239117
tools=[
240118
ExceptionTool(),
241-
],
119+
]
120+
+ (act_tools or []),
242121
agent_os=self.tools.os,
243122
)
244123

src/askui/tools/agent_os.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,15 @@
44
from PIL import Image
55
from pydantic import BaseModel
66

7-
ModifierKey = Literal["command", "alt", "control", "shift", "right_shift"]
7+
ModifierKey = Literal[
8+
"command",
9+
"alt",
10+
"control",
11+
"shift",
12+
"right_shift",
13+
]
814
"""Modifier keys for keyboard actions."""
915

10-
ModifierKeys: list[ModifierKey] = ["command", "alt", "control", "shift", "right_shift"]
11-
1216
PcKey = Literal[
1317
"backspace",
1418
"delete",
@@ -308,7 +312,6 @@ def keyboard_tap(
308312
"""
309313
raise NotImplementedError
310314

311-
@abstractmethod
312315
def set_display(self, display: int = 1) -> None:
313316
"""
314317
Sets the active display for screen interactions.
@@ -319,7 +322,6 @@ def set_display(self, display: int = 1) -> None:
319322
"""
320323
raise NotImplementedError
321324

322-
@abstractmethod
323325
def run_command(self, command: str, timeout_ms: int = 30000) -> None:
324326
"""
325327
Executes a shell command.

0 commit comments

Comments
 (0)