Skip to content

Commit 2afc4c0

Browse files
Merge pull request #241 from askui/feat/superagent
Add `MultiDeviceAgent`
2 parents 7db3839 + 7e838ff commit 2afc4c0

File tree

3 files changed

+190
-2
lines changed

3 files changed

+190
-2
lines changed

docs/02_using_agents.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Using Agents
22

3-
AskUI Vision Agent provides three predefined agent types for different automation targets. All agents share the same core API (`act()`, `get()`, `locate()`) but are optimized for their respective platforms. Each agent comes with its own system prompt tailored to its platform-specific tools and capabilities.
3+
AskUI Vision Agent provides four predefined agent types for different automation targets. All agents share the same core API (`act()`, `get()`, `locate()`) but are optimized for their respective platforms. Each agent comes with its own system prompt tailored to its platform-specific tools and capabilities.
44

55
## ComputerAgent
66

@@ -47,10 +47,30 @@ with WebVisionAgent() as agent:
4747

4848
**Default tools:** All `ComputerAgent` tools plus `goto`, `back`, `forward`, `get_page_title`, `get_page_url`
4949

50+
## MultiDeviceAgent
51+
52+
Use this agent when you need to control a desktop computer and an Android device within the same task. The agent has access to both the full set of computer tools (via AskUI Agent OS) and Android tools (via ADB), and can switch between devices seamlessly during execution.
53+
54+
This is useful for cross-device workflows, such as triggering an action on the desktop and verifying the result on a mobile device, or transferring data between devices.
55+
56+
```python
57+
from askui import MultiDeviceAgent
58+
59+
with MultiDeviceAgent(android_device_sn="emulator-5554") as agent:
60+
agent.act("Open the web app on the computer and send a push notification, then verify it appears on the Android device")
61+
```
62+
63+
If you have multiple Android devices connected, pass the serial number of the target device via `android_device_sn`. You can find serial numbers by running `adb devices`. If omitted, no device is preselected and the agent will select one at runtime.
64+
65+
Requires the `android` dependency installed (`pip install askui[android]`) and a connected device (physical or emulator).
66+
67+
**Default tools:** All `ComputerAgent` tools plus all `AndroidAgent` tools. Additional tools can be provided via the `act_tools` parameter.
68+
5069
## Choosing an Agent
5170

5271
| Target | Agent | Backend |
5372
|--------|-------|---------|
5473
| Desktop (Windows/macOS/Linux) | `ComputerAgent` | AskUI Agent OS (gRPC) |
5574
| Android devices | `AndroidAgent` | ADB |
75+
| Desktop + Android | `MultiDeviceAgent` | AskUI Agent OS (gRPC) + ADB |
5676
| Web browsers | `WebVisionAgent` | Playwright |

src/askui/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@
5454
except ImportError:
5555
_ANDROID_AGENT_AVAILABLE = False
5656

57+
if _ANDROID_AGENT_AVAILABLE:
58+
from .multi_device_agent import MultiDeviceAgent
59+
5760
try:
5861
from .web_agent import WebVisionAgent
5962
from .web_testing_agent import WebTestingAgent
@@ -107,7 +110,7 @@
107110
]
108111

109112
if _ANDROID_AGENT_AVAILABLE:
110-
__all__ += ["AndroidAgent", "AndroidVisionAgent"]
113+
__all__ += ["AndroidAgent", "AndroidVisionAgent", "MultiDeviceAgent"]
111114

112115
if _WEB_AGENTS_AVAILABLE:
113116
__all__ += ["WebVisionAgent", "WebTestingAgent"]

src/askui/multi_device_agent.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
from typing import Annotated, Optional, Type, overload
2+
3+
from pydantic import Field
4+
5+
from askui.agent import ComputerAgent
6+
from askui.agent_base import Agent
7+
from askui.agent_settings import AgentSettings
8+
from askui.android_agent import AndroidAgent
9+
from askui.locators.locators import Locator
10+
from askui.models.shared.settings import GetSettings, LocateSettings
11+
from askui.models.shared.tools import Tool
12+
from askui.models.types.geometry import Point
13+
from askui.models.types.response_schemas import ResponseSchema
14+
from askui.prompts.act_prompts import create_multidevice_agent_prompt
15+
from askui.reporting import CompositeReporter, Reporter
16+
from askui.retry import Retry
17+
from askui.utils.source_utils import InputSource
18+
19+
20+
class MultiDeviceAgent(Agent):
21+
"""
22+
Multi device agent that combines a computer and an Android agent.
23+
It can be used to perform actions on both devices simultaneously.
24+
25+
Args:
26+
display (int, optional): The display number for computer screen
27+
interactions. Defaults to `1`.
28+
reporters (list[Reporter] | None, optional): List of reporter instances.
29+
tools (AgentToolbox | None, optional): Not supported; use `act_tools`.
30+
retry (Retry | None, optional): Retry instance for failed actions.
31+
act_tools (list[Tool] | None, optional): Additional tools for `act()`.
32+
android_device_sn (str | None, optional): Android device serial number
33+
to select on open.
34+
35+
Example:
36+
```python
37+
from askui import MultiDeviceAgent
38+
39+
with MultiDeviceAgent(android_device_sn="emulator-5554") as agent:
40+
agent.computer.click("Start")
41+
agent.android.tap("OK")
42+
agent.act("Fill the form on the phone and submit from the desktop")
43+
```
44+
"""
45+
46+
def __init__(
47+
self,
48+
desktop_display: Annotated[int, Field(ge=1)] = 1,
49+
android_device_sn: str | int = 0,
50+
reporters: list[Reporter] | None = None,
51+
retry: Retry | None = None,
52+
act_tools: list[Tool] | None = None,
53+
settings: AgentSettings | None = None,
54+
) -> None:
55+
reporter = CompositeReporter(reporters=reporters)
56+
57+
# Initialize the base agent
58+
super().__init__(
59+
reporter=reporter,
60+
retry=retry,
61+
settings=settings,
62+
)
63+
64+
# Initialize the computer agent
65+
self._computer_agent = ComputerAgent(
66+
display=desktop_display,
67+
reporters=[reporter],
68+
settings=settings,
69+
)
70+
71+
# Initialize the Android agent
72+
self._android_agent = AndroidAgent(
73+
device=android_device_sn,
74+
reporters=[reporter],
75+
settings=settings,
76+
)
77+
78+
# Combine the tool collections of the computer and Android agents
79+
self.act_tool_collection = (
80+
self._computer_agent.act_tool_collection
81+
+ self._android_agent.act_tool_collection
82+
)
83+
84+
self.act_tool_collection.append_tool(*(act_tools or []))
85+
86+
self.act_settings.messages.system = create_multidevice_agent_prompt()
87+
88+
@property
89+
def computer(self) -> ComputerAgent:
90+
"""The composed computer agent."""
91+
return self._computer_agent
92+
93+
@property
94+
def android(self) -> AndroidAgent:
95+
"""The composed Android agent."""
96+
return self._android_agent
97+
98+
@overload
99+
def get(
100+
self,
101+
query: Annotated[str, Field(min_length=1)],
102+
response_schema: None = None,
103+
source: Optional[InputSource] = None,
104+
get_settings: GetSettings | None = None,
105+
) -> str: ...
106+
@overload
107+
def get(
108+
self,
109+
query: Annotated[str, Field(min_length=1)],
110+
response_schema: Type[ResponseSchema],
111+
source: Optional[InputSource] = None,
112+
get_settings: GetSettings | None = None,
113+
) -> ResponseSchema: ...
114+
115+
def get(
116+
self,
117+
query: Annotated[str, Field(min_length=1)],
118+
response_schema: Type[ResponseSchema] | None = None,
119+
source: Optional[InputSource] = None,
120+
get_settings: GetSettings | None = None,
121+
) -> ResponseSchema | str:
122+
"""Not supported on `MultiDeviceAgent`.
123+
124+
Use `agent.computer.get()` or `agent.android.get()` instead.
125+
126+
Raises:
127+
NotImplementedError: Always.
128+
"""
129+
error_msg = (
130+
"MultiDeviceAgent does not support get() directly."
131+
" Use agent.computer.get() or agent.android.get()"
132+
" instead."
133+
)
134+
raise NotImplementedError(error_msg)
135+
136+
def locate(
137+
self,
138+
locator: str | Locator,
139+
screenshot: Optional[InputSource] = None,
140+
locate_settings: LocateSettings | None = None,
141+
) -> Point:
142+
"""Not supported on `MultiDeviceAgent`.
143+
144+
Use `agent.computer.locate()` or `agent.android.locate()`
145+
instead.
146+
147+
Raises:
148+
NotImplementedError: Always.
149+
"""
150+
error_msg = (
151+
"MultiDeviceAgent does not support locate() directly."
152+
" Use agent.computer.locate() or"
153+
" agent.android.locate() instead."
154+
)
155+
raise NotImplementedError(error_msg)
156+
157+
def close(self) -> None:
158+
self._computer_agent.act_agent_os_facade.disconnect()
159+
self._android_agent.act_agent_os_facade.disconnect()
160+
super().close()
161+
162+
def open(self) -> None:
163+
self._computer_agent.open()
164+
self._android_agent.open()
165+
super().open()

0 commit comments

Comments
 (0)