Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions dimos/agents/skills/osm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
# limitations under the License.


from typing import Any

from dimos.agents.annotation import skill
from dimos.core.module import Module
from dimos.core.stream import In
Expand All @@ -31,8 +33,8 @@ class OsmSkill(Module):

gps_location: In[LatLon]

def __init__(self) -> None:
super().__init__()
def __init__(self, **kwargs: Any) -> None:
super().__init__(**kwargs)
self._latest_location = None
self._current_location_map = CurrentLocationMap(QwenVlModel())

Expand Down
2 changes: 1 addition & 1 deletion dimos/perception/spatial_perception.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ def query_by_image(self, image: np.ndarray, limit: int = 5) -> list[dict]: # ty
return self.vector_db.query_by_embedding(embedding, limit)

@rpc
def query_by_text(self, text: str, limit: int = 5) -> list[dict]: # type: ignore[type-arg]
def query_by_text(self, text: str, limit: int = 5) -> list[dict[str, Any]]:
"""
Query the vector database for images matching the provided text description.

Expand Down
4 changes: 4 additions & 0 deletions dimos/robot/all_blueprints.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,11 @@
"demo-osm": "dimos.mapping.osm.demo_osm:demo_osm",
"demo-skill": "dimos.agents.skills.demo_skill:demo_skill",
"drone-agentic": "dimos.robot.drone.blueprints.agentic.drone_agentic:drone_agentic",
"drone-agentic-gazebo": "dimos.robot.drone.blueprints.agentic.drone_agentic:drone_agentic_gazebo",
"drone-agentic-gazebo-spatial": "dimos.robot.drone.blueprints.agentic.drone_agentic:drone_agentic_gazebo_spatial",
"drone-basic": "dimos.robot.drone.blueprints.basic.drone_basic:drone_basic",
"drone-basic-gazebo": "dimos.robot.drone.blueprints.basic.drone_basic:drone_basic_gazebo",
"drone-basic-gazebo-spatial": "dimos.robot.drone.blueprints.basic.drone_basic:drone_basic_gazebo_spatial",
"dual-xarm6-planner": "dimos.manipulation.blueprints:dual_xarm6_planner",
"keyboard-teleop-piper": "dimos.robot.manipulators.piper.blueprints:keyboard_teleop_piper",
"keyboard-teleop-xarm6": "dimos.robot.manipulators.xarm.blueprints:keyboard_teleop_xarm6",
Expand Down
27 changes: 27 additions & 0 deletions dimos/robot/drone/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ dimos run drone-basic --set outdoor=true

# Agentic with LLM control
dimos run drone-agentic

# Gazebo + ArduPilot SITL
dimos run drone-basic-gazebo
dimos run drone-agentic-gazebo
dimos run drone-basic-gazebo-spatial
dimos run drone-agentic-gazebo-spatial # + spatial memory + navigate
```

To interact with the agent, run `dimos humancli` in a separate terminal.
Expand Down Expand Up @@ -48,6 +54,27 @@ Composes on top of `drone-basic`, adding autonomous capabilities:
| `McpServer` + `McpClient` | LLM agent (default: GPT-4o) via MCP |
| `WebInput` | Web/CLI interface for human commands |

### `drone-basic-gazebo` / `drone-agentic-gazebo` / `drone-basic-gazebo-spatial` / `drone-agentic-gazebo-spatial`
Same as the non-Gazebo blueprints but with video from Gazebo (RTP/H264 on UDP 5600). Odometry from MAVLink; when SITL sends `LOCAL_POSITION_NED` it is used for position. Spatial variants add `SpatialMemory`. **`drone-agentic-gazebo-spatial`** also adds `DroneSpatialNavSkill` with **`navigate_to_where_i_saw(description)`**: text-search remembered views and send a local NED position target to the drone.

## Running with Gazebo + ArduPilot

1. **Install ArduPilot SITL** and a GCS (e.g. MAVProxy). See the [ArduPilot development wiki](https://ardupilot.org/dev/).
2. **Install Gazebo and the ArduPilot Gazebo plugin** (Iris quad, camera streaming, etc.): [Using SITL with Gazebo](https://ardupilot.org/dev/docs/sitl-with-gazebo.html). Set `GZ_SIM_SYSTEM_PLUGIN_PATH` and `GZ_SIM_RESOURCE_PATH` as described there.
3. **Start Gazebo** with the Iris world (camera streams to UDP 5600):
```bash
gz sim -v4 -r iris_runway.sdf
```
Enable camera streaming if needed:
```bash
gz topic -t /world/iris_runway/model/iris_with_gimbal/model/gimbal/link/pitch_link/sensor/camera/image/enable_streaming -m gz.msgs.Boolean -p "data: 1"
```
4. **Start SITL** in another terminal:
```bash
sim_vehicle.py -v ArduCopter -f gazebo-iris --model JSON --map --console
```
5. **Run DimOS** (e.g. `dimos run drone-agentic-gazebo`). MAVLink: default UDP 14550. Video: port 5600.

## Installation

### Python (included with DimOS)
Expand Down
52 changes: 46 additions & 6 deletions dimos/robot/drone/blueprints/agentic/drone_agentic.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,24 @@
from dimos.agents.skills.osm import OsmSkill
from dimos.agents.web_human_input import WebInput
from dimos.core.blueprints import autoconnect
from dimos.robot.drone.blueprints.basic.drone_basic import drone_basic
from dimos.robot.drone.blueprints.basic.drone_basic import (
drone_basic,
drone_basic_gazebo,
drone_basic_gazebo_spatial,
)
from dimos.robot.drone.drone_spatial_nav_skill import DroneSpatialNavSkill
from dimos.robot.drone.drone_tracking_module import DroneTrackingModule

DRONE_SYSTEM_PROMPT = """\
You are controlling a DJI drone with MAVLink interface.
You have access to drone control skills you are already flying so only run move_twist, set_mode, and fly_to.
When the user gives commands, use the appropriate skills to control the drone.
Always confirm actions and report results. Send fly_to commands only at above 200 meters altitude to be safe.
Here are some GPS locations to remember
You control a drone over MAVLink (ArduPilot-compatible). Use the tool/schema names and parameters exactly as exposed.
Confirm actions and report results. For GPS missions (fly_to), use safe altitudes appropriate to the environment; do not invent extreme altitudes. Use is_flying_to_target to see if a fly_to is still active.

Motion (see each tool's Args): move (velocity, body NED: x right, y forward, z down m/s); move_by_distance (body displacement m → local NED setpoint when available, else timed forward velocity); go_to_position (absolute local NED: x North, y East, z Down — z negative means up);
rotate_to(heading_deg) for compass yaw (0° North, 90° East), takeoff, land, arm, disarm, set_mode.
Tracking/follow: follow_object (velocity via move_twist only — do not use move_by_distance for tracking). Perception: observe (camera frame). Maps/OSM: place and route tools when GPS or location context applies.
Spatial stack (drone-agentic-gazebo-spatial): navigate_to_where_i_saw(description) runs CLIP on stored views and sends a local NED position target; memory fills when the drone moves and TF+video are available.

Example GPS waypoints (San Francisco area):
6th and Natoma intersection: 37.78019978319006, -122.40770815020853,
454 Natoma (Office): 37.780967465525244, -122.40688342010769
5th and mission intersection: 37.782598539339695, -122.40649441875473
Expand All @@ -55,7 +64,38 @@
]
)

drone_agentic_gazebo = autoconnect(
drone_basic_gazebo,
DroneTrackingModule.blueprint(outdoor=False),
GoogleMapsSkillContainer.blueprint(),
OsmSkill.blueprint(),
Agent.blueprint(system_prompt=DRONE_SYSTEM_PROMPT, model="gpt-4o-mini"),
WebInput.blueprint(),
).remappings(
[
(DroneTrackingModule, "video_input", "video"),
(DroneTrackingModule, "cmd_vel", "movecmd_twist"),
]
)

drone_agentic_gazebo_spatial = autoconnect(
drone_basic_gazebo_spatial,
DroneTrackingModule.blueprint(outdoor=False),
GoogleMapsSkillContainer.blueprint(),
OsmSkill.blueprint(),
DroneSpatialNavSkill.blueprint(),
Agent.blueprint(system_prompt=DRONE_SYSTEM_PROMPT, model="gpt-4o-mini"),
WebInput.blueprint(),
).remappings(
[
(DroneTrackingModule, "video_input", "video"),
(DroneTrackingModule, "cmd_vel", "movecmd_twist"),
]
)

__all__ = [
"DRONE_SYSTEM_PROMPT",
"drone_agentic",
"drone_agentic_gazebo",
"drone_agentic_gazebo_spatial",
]
22 changes: 21 additions & 1 deletion dimos/robot/drone/blueprints/basic/drone_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from dimos.core.blueprints import autoconnect
from dimos.core.global_config import global_config
from dimos.perception.spatial_perception import SpatialMemory
from dimos.protocol.pubsub.impl.lcmpubsub import LCM
from dimos.robot.drone.camera_module import DroneCameraModule
from dimos.robot.drone.connection_module import DroneConnectionModule
Expand All @@ -44,7 +45,7 @@ def _drone_rerun_blueprint() -> Any:

return rrb.Blueprint(
rrb.Horizontal(
rrb.Spatial2DView(origin="world/video", name="Camera"),
rrb.Spatial2DView(origin="world/color_image", name="Camera"),
rrb.Spatial3DView(
origin="world",
name="3D",
Expand Down Expand Up @@ -95,6 +96,25 @@ def _drone_rerun_blueprint() -> Any:
WebsocketVisModule.blueprint(),
)

drone_basic_gazebo = autoconnect(
_vis,
DroneConnectionModule.blueprint(
connection_string=connection_string,
video_port=video_port,
video_source="gazebo",
outdoor=False,
),
DroneCameraModule.blueprint(camera_intrinsics=[1000.0, 1000.0, 960.0, 540.0]),
WebsocketVisModule.blueprint(),
)

drone_basic_gazebo_spatial = autoconnect(
drone_basic_gazebo,
SpatialMemory.blueprint(),
)

__all__ = [
"drone_basic",
"drone_basic_gazebo",
"drone_basic_gazebo_spatial",
]
Loading