From 87a60173abdfe4b2effaf6d0db8a2f3b815435b5 Mon Sep 17 00:00:00 2001 From: jlelong-berkeley Date: Wed, 25 Mar 2026 04:42:18 -0700 Subject: [PATCH] feat(drone): add RoboMaster TT/Tello integration --- dimos/robot/all_blueprints.py | 3 + dimos/robot/drone/README.md | 65 + .../agentic/drone_tello_tt_agentic.py | 57 + .../blueprints/basic/drone_tello_tt_basic.py | 96 ++ dimos/robot/drone/drone_tracking_module.py | 1178 ++++++++++++++++- dimos/robot/drone/tello_connection_module.py | 557 ++++++++ dimos/robot/drone/tello_sdk.py | 483 +++++++ docs/usage/README.md | 1 + docs/usage/drone_tello_tt.md | 136 ++ 9 files changed, 2542 insertions(+), 34 deletions(-) create mode 100644 dimos/robot/drone/blueprints/agentic/drone_tello_tt_agentic.py create mode 100644 dimos/robot/drone/blueprints/basic/drone_tello_tt_basic.py create mode 100644 dimos/robot/drone/tello_connection_module.py create mode 100644 dimos/robot/drone/tello_sdk.py create mode 100644 docs/usage/drone_tello_tt.md diff --git a/dimos/robot/all_blueprints.py b/dimos/robot/all_blueprints.py index 5910093d61..c4d009292e 100644 --- a/dimos/robot/all_blueprints.py +++ b/dimos/robot/all_blueprints.py @@ -48,6 +48,8 @@ "demo-skill": "dimos.agents.skills.demo_skill:demo_skill", "drone-agentic": "dimos.robot.drone.blueprints.agentic.drone_agentic:drone_agentic", "drone-basic": "dimos.robot.drone.blueprints.basic.drone_basic:drone_basic", + "drone-tello-tt-agentic": "dimos.robot.drone.blueprints.agentic.drone_tello_tt_agentic:drone_tello_tt_agentic", + "drone-tello-tt-basic": "dimos.robot.drone.blueprints.basic.drone_tello_tt_basic:drone_tello_tt_basic", "dual-xarm6-planner": "dimos.manipulation.blueprints:dual_xarm6_planner", "keyboard-teleop-piper": "dimos.robot.manipulators.piper.blueprints:keyboard_teleop_piper", "keyboard-teleop-xarm6": "dimos.robot.manipulators.xarm.blueprints:keyboard_teleop_xarm6", @@ -161,6 +163,7 @@ "simple-phone-teleop": "dimos.teleop.phone.phone_extensions", "spatial-memory": "dimos.perception.spatial_perception", "speak-skill": "dimos.agents.skills.speak_skill", + "tello-connection-module": "dimos.robot.drone.tello_connection_module", "temporal-memory": "dimos.perception.experimental.temporal_memory.temporal_memory", "twist-teleop-module": "dimos.teleop.quest.quest_extensions", "unitree-g1-skill-container": "dimos.robot.unitree.g1.skill_container", diff --git a/dimos/robot/drone/README.md b/dimos/robot/drone/README.md index 4e094e9f28..865d86de44 100644 --- a/dimos/robot/drone/README.md +++ b/dimos/robot/drone/README.md @@ -19,10 +19,34 @@ dimos run drone-basic --set outdoor=true # Agentic with LLM control dimos run drone-agentic + +# RoboMaster TT / Tello (Wi-Fi SDK) +dimos run drone-tello-tt-basic --robot-ip 192.168.10.1 +dimos run drone-tello-tt-agentic --robot-ip 192.168.10.1 --disable web-input ``` To interact with the agent, run `dimos humancli` in a separate terminal. +## RoboMaster TT / Tello Hardware + Network Setup + +Tello control traffic and cloud API traffic should be split across two interfaces. + +1. Power on the drone and connect to `TELLO-XXXXXX` from one network interface. +2. Keep internet on a separate interface: + - wired Ethernet is recommended, or + - a second Wi-Fi adapter connected to normal internet. +3. Confirm network state: + - Tello link: `ip a` should show `192.168.10.x` on the Tello interface. + - Tello subnet route: `192.168.10.0/24` on that same interface. + - Default route should prefer internet interface (not `192.168.10.1`). +4. Verify both paths: + - `ping -c 1 192.168.10.1` + - `curl -I https://api.openai.com` +5. Run the Tello blueprint: + - `dimos --robot-ip 192.168.10.1 run drone-tello-tt-agentic --disable web-input` + +If you use only one Wi-Fi radio and connect it to Tello, cloud model calls usually fail due to loss of internet routing. + ## Blueprints ### `drone-basic` @@ -48,6 +72,25 @@ Composes on top of `drone-basic`, adding autonomous capabilities: | `McpServer` + `McpClient` | LLM agent (default: GPT-4o) via MCP | | `WebInput` | Web/CLI interface for human commands | +### `drone-tello-tt-basic` +RoboMaster TT/Tello SDK control over Wi-Fi UDP. + +| Module | Purpose | +|--------|---------| +| `TelloConnectionModule` | Tello command/state/video bridge, skills, and follow commands | +| `DroneCameraModule` | Camera intrinsics + camera streams | +| `WebsocketVisModule` | Web-based visualization | +| `RerunBridgeModule` / `FoxgloveBridge` | 3D viewer (selected by `--viewer`) | + +### `drone-tello-tt-agentic` +Composes on top of `drone-tello-tt-basic`, adding: + +| Module | Purpose | +|--------|---------| +| `DroneTrackingModule` | Person detection, follow, yaw-centering, tracking overlay | +| `Agent` | LLM agent (default: GPT-4o) | +| `WebInput` | Web/CLI interface for human commands | + ## Installation ### Python (included with DimOS) @@ -73,6 +116,17 @@ export OPENAI_API_KEY=sk-... # Optional export GOOGLE_MAPS_API_KEY=... # For GoogleMapsSkillContainer +export ALIBABA_API_KEY=... # Optional Qwen detection for tracking +``` + +### Optional Tello Tracking Tuning +These variables are optional and only affect local YOLO person detection in `DroneTrackingModule`: + +```bash +export DIMOS_DRONE_YOLO_DEVICE=cuda:0 # or cpu +export DIMOS_DRONE_YOLO_MODEL=yolo11s-pose.pt +export DIMOS_DRONE_YOLO_IMGSZ=416 +export DIMOS_DRONE_YOLO_MAX_DET=5 ``` ## RosettaDrone Setup (Critical) @@ -139,12 +193,16 @@ DJI Drone ← Wireless → DJI Controller ← USB → Android Device ← WiFi dimos/robot/drone/ ├── blueprints/ │ ├── basic/drone_basic.py # Base blueprint (connection + camera + vis) +│ ├── basic/drone_tello_tt_basic.py # Tello basic blueprint │ └── agentic/drone_agentic.py # Agentic blueprint (composes on basic) +│ └── agentic/drone_tello_tt_agentic.py # Tello agentic blueprint ├── connection_module.py # MAVLink communication & skills ├── camera_module.py # Camera processing & intrinsics ├── drone_tracking_module.py # Visual servoing & object tracking ├── drone_visual_servoing_controller.py # PID-based visual servoing ├── mavlink_connection.py # Low-level MAVLink protocol +├── tello_connection_module.py # Tello module and skills +├── tello_sdk.py # Low-level Tello UDP SDK adapter └── dji_video_stream.py # GStreamer video capture + replay ``` @@ -198,6 +256,13 @@ Parameters: `(Kp, Ki, Kd, (min_output, max_output), integral_limit, deadband_pix 4. Velocity commands sent via LCM stream 5. Connection module converts to MAVLink commands +### Tello Tracking Flow +1. Tello camera stream is decoded in `TelloSdkClient`. +2. `DroneTrackingModule` receives `/video`, publishes `/tracking_overlay`. +3. For TT agentic blueprint, person follow uses detector-driven tracking with yaw-centering and forward approach. +4. `cmd_vel` is remapped to `/movecmd_twist` and converted to Tello `rc` commands by `TelloConnectionModule`. +5. Agent skills call `follow_object`, `center_person_by_yaw`, and `orbit_object`. + ## Available Skills All skills are exposed to the LLM agent via the `@skill` decorator on `DroneConnectionModule`: diff --git a/dimos/robot/drone/blueprints/agentic/drone_tello_tt_agentic.py b/dimos/robot/drone/blueprints/agentic/drone_tello_tt_agentic.py new file mode 100644 index 0000000000..a77c523fcc --- /dev/null +++ b/dimos/robot/drone/blueprints/agentic/drone_tello_tt_agentic.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 + +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Agentic TT/Tello blueprint with tracking and LLM control.""" + +from dimos.agents.agent import agent +from dimos.agents.web_human_input import web_input +from dimos.core.blueprints import autoconnect +from dimos.robot.drone.blueprints.basic.drone_tello_tt_basic import drone_tello_tt_basic +from dimos.robot.drone.drone_tracking_module import DroneTrackingModule + +TELLO_TT_SYSTEM_PROMPT = """\ +You are controlling a RoboMaster TT (Tello Talent) drone over the Tello SDK. +Use short, safe movement commands and keep altitude conservative indoors. +Prefer follow_object, orbit_object, move, move_relative, yaw, takeoff, and land. +For "scan/find person then follow", call follow_object(object_description="person", distance_m=1.0). +For any "hover + rotate/center person in frame" request, ALWAYS call +center_person_by_yaw(duration=..., scan_step_deg=0.0, max_scan_steps=1). +Do NOT call follow_object in default mode for rotate-only requests. +For "circle around person", call orbit_object(radius_m=1.0, revolutions=1.0). +Always end with land() when user asks to land. +Use observe to inspect the environment before aggressive motion. +Report what command you executed and the observed result. +""" + +drone_tello_tt_agentic = autoconnect( + drone_tello_tt_basic, + DroneTrackingModule.blueprint( + outdoor=False, + enable_passive_overlay=True, + use_local_person_detector=True, + force_detection_servoing_for_person=True, + person_follow_policy="yaw_forward_constant", + ), + agent(system_prompt=TELLO_TT_SYSTEM_PROMPT, model="gpt-4o"), + web_input(), +).remappings( + [ + (DroneTrackingModule, "video_input", "video"), + (DroneTrackingModule, "cmd_vel", "movecmd_twist"), + ] +) + +__all__ = ["TELLO_TT_SYSTEM_PROMPT", "drone_tello_tt_agentic"] diff --git a/dimos/robot/drone/blueprints/basic/drone_tello_tt_basic.py b/dimos/robot/drone/blueprints/basic/drone_tello_tt_basic.py new file mode 100644 index 0000000000..28a06b9be3 --- /dev/null +++ b/dimos/robot/drone/blueprints/basic/drone_tello_tt_basic.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Basic RoboMaster TT/Tello blueprint with connection, camera, and visualization.""" + +from typing import Any + +from dimos.core.blueprints import autoconnect +from dimos.core.global_config import global_config +from dimos.protocol.pubsub.impl.lcmpubsub import LCM +from dimos.robot.drone.camera_module import DroneCameraModule +from dimos.robot.drone.tello_connection_module import TelloConnectionModule +from dimos.web.websocket_vis.websocket_vis_module import websocket_vis + + +def _static_drone_body(rr: Any) -> list[Any]: + """Static visualization of TT body.""" + return [ + rr.Boxes3D( + half_sizes=[0.09, 0.09, 0.03], + colors=[(40, 180, 90)], + ), + rr.Transform3D(parent_frame="tf#/base_link"), + ] + + +def _drone_rerun_blueprint() -> Any: + """Split layout: camera feed + 3D world view side by side.""" + import rerun as rr + import rerun.blueprint as rrb + + return rrb.Blueprint( + rrb.Horizontal( + rrb.Spatial2DView(origin="world/video", name="Camera"), + rrb.Spatial2DView(origin="world/tracking_overlay", name="Tracking Overlay"), + rrb.Spatial3DView( + origin="world", + name="3D", + background=rrb.Background(kind="SolidColor", color=[0, 0, 0]), + line_grid=rrb.LineGrid3D( + plane=rr.components.Plane3D.XY.with_distance(0.2), + ), + ), + column_shares=[1, 1, 2], + ), + ) + + +_rerun_config = { + "blueprint": _drone_rerun_blueprint, + "pubsubs": [LCM()], + "static": { + "world/tf/base_link": _static_drone_body, + }, +} + +if global_config.viewer == "foxglove": + from dimos.robot.foxglove_bridge import foxglove_bridge + + _vis = foxglove_bridge() +elif global_config.viewer.startswith("rerun"): + from dimos.visualization.rerun.bridge import _resolve_viewer_mode, rerun_bridge + + _vis = rerun_bridge(viewer_mode=_resolve_viewer_mode(), **_rerun_config) +else: + _vis = autoconnect() + +tello_ip = global_config.robot_ip or "192.168.10.1" + +drone_tello_tt_basic = autoconnect( + _vis, + TelloConnectionModule.blueprint( + tello_ip=tello_ip, + local_ip="", + local_command_port=9000, + state_port=8890, + video_port=11111, + ), + DroneCameraModule.blueprint(camera_intrinsics=[920.0, 920.0, 480.0, 360.0]), + websocket_vis(), +) + +__all__ = ["drone_tello_tt_basic"] diff --git a/dimos/robot/drone/drone_tracking_module.py b/dimos/robot/drone/drone_tracking_module.py index 5798db374b..8406db4f78 100644 --- a/dimos/robot/drone/drone_tracking_module.py +++ b/dimos/robot/drone/drone_tracking_module.py @@ -16,6 +16,7 @@ """Drone tracking module with visual servoing for object following.""" import json +import os import threading import time from typing import Any @@ -36,13 +37,53 @@ DroneVisualServoingController, PIDParams, ) +from dimos.utils.data import get_data from dimos.utils.logging_config import setup_logger +from dimos.utils.simple_controller import PIDController logger = setup_logger() INDOOR_PID_PARAMS: PIDParams = (0.001, 0.0, 0.0001, (-1.0, 1.0), None, 30) OUTDOOR_PID_PARAMS: PIDParams = (0.05, 0.0, 0.0003, (-5.0, 5.0), None, 10) INDOOR_MAX_VELOCITY = 1.0 # m/s safety cap for indoor mode +INDOOR_MAX_FORWARD_VELOCITY = 0.25 +INDOOR_MAX_LATERAL_VELOCITY = 0.35 +PERSON_ALIASES = {"person", "people", "human", "man", "woman"} +PERSON_CONFIRM_FRAMES = 2 +PERSON_CONFIRM_IOU = 0.12 +YOLO_PERSON_CONF_MIN = 0.18 +YOLO_PERSON_MIN_KEYPOINTS = 4 +YOLO_MODEL_NAME = "yolo11s-pose.pt" +YOLO_MODEL_NAME_CPU = "yolo11n-pose.pt" +YOLO_DEFAULT_IMGSZ = 416 +YOLO_DEFAULT_MAX_DET = 5 +PASSIVE_DETECT_INTERVAL_SEC = 0.3 +YAW_ONLY_DEADBAND_RATIO = 0.028 +YAW_ONLY_PID_KP = 1.0 +YAW_ONLY_PID_KI = 0.1 +YAW_ONLY_PID_KD = 0.62 +YAW_ONLY_MAX_RATE = 0.85 +YAW_ONLY_INTEGRAL_LIMIT = 0.2 +YAW_ONLY_CMD_SMOOTH_ALPHA = 0.40 +YAW_ONLY_DT = 0.05 +YAW_ONLY_ZERO_RATE_EPS = 0.015 +YAW_ONLY_MIN_RATE = 0.14 +YAW_ONLY_MIN_RATE_ERR_RATIO = 0.25 +YAW_ONLY_RATE_SLEW_PER_SEC = 7.0 +YAW_ONLY_LOCK_ENTER_RATIO = 0.03 +YAW_ONLY_LOCK_EXIT_RATIO = 0.075 +YAW_ONLY_LOCK_CMD_EPS = 0.08 +YAW_ONLY_ERROR_LPF_ALPHA = 0.6 +YAW_ONLY_DECEL_ERR_RATIO = 0.24 +YAW_ONLY_DECEL_MIN_RATE = 0.08 +YAW_ONLY_CROSS_BRAKE_ERR_RATIO = 0.24 +YAW_ONLY_CROSS_BRAKE_SEC = 0.12 +FOLLOW_TARGET_MIN_M = 0.6 +FOLLOW_TARGET_MAX_M = 2.5 +FOLLOW_APPROACH_SPEED = 0.15 +FOLLOW_ALIGN_SOFT_ERR = 0.15 +FOLLOW_ALIGN_STOP_ERR = 0.30 +FOLLOW_EDGE_MARGIN_RATIO = 0.06 class DroneTrackingModule(Module): @@ -63,6 +104,10 @@ def __init__( x_pid_params: PIDParams | None = None, y_pid_params: PIDParams | None = None, z_pid_params: PIDParams | None = None, + enable_passive_overlay: bool = False, + use_local_person_detector: bool = False, + force_detection_servoing_for_person: bool = False, + person_follow_policy: str = "legacy_pid", ) -> None: """Initialize the drone tracking module. @@ -74,6 +119,11 @@ def __init__( y_pid_params: PID parameters for left/right strafe control. If None, uses preset based on outdoor flag. z_pid_params: Optional PID parameters for altitude control. + enable_passive_overlay: If True, continuously publish person-detection overlays. + use_local_person_detector: If True, allow local YOLO person detection fallback when Qwen is unavailable. + force_detection_servoing_for_person: If True, bypass OpenCV tracker for person targets and use detector loop. + person_follow_policy: Person follow behavior in detector loop. + Supported: "legacy_pid" (original vx/vy PID), "yaw_forward_constant" (yaw-first + constant forward). """ super().__init__() @@ -83,6 +133,8 @@ def __init__( self._outdoor = outdoor self._max_velocity = None if outdoor else INDOOR_MAX_VELOCITY + self._max_forward_velocity = None if outdoor else INDOOR_MAX_FORWARD_VELOCITY + self._max_lateral_velocity = None if outdoor else INDOOR_MAX_LATERAL_VELOCITY self.servoing_controller = DroneVisualServoingController( x_pid_params=x_pid_params, y_pid_params=y_pid_params, z_pid_params=z_pid_params @@ -91,9 +143,48 @@ def __init__( # Tracking state self._tracking_active = False self._tracking_thread: threading.Thread | None = None + self._passive_overlay_active = False + self._passive_overlay_thread: threading.Thread | None = None + self._enable_passive_overlay = enable_passive_overlay + self._use_local_person_detector = use_local_person_detector + self._force_detection_servoing_for_person = force_detection_servoing_for_person + self._control_mode = "full" + normalized_policy = person_follow_policy.strip().lower() + if normalized_policy not in {"legacy_pid", "yaw_forward_constant"}: + normalized_policy = "legacy_pid" + self._person_follow_policy = normalized_policy + self._yaw_rate_ema = 0.0 + self._yaw_error_lpf = 0.0 + self._yaw_lock_active = False + self._yaw_prev_raw_error = 0.0 + self._yaw_cross_brake_until = 0.0 + self._yaw_pid = PIDController( + YAW_ONLY_PID_KP, + YAW_ONLY_PID_KI, + YAW_ONLY_PID_KD, + output_limits=(-YAW_ONLY_MAX_RATE, YAW_ONLY_MAX_RATE), + integral_limit=YAW_ONLY_INTEGRAL_LIMIT, + deadband=YAW_ONLY_DEADBAND_RATIO, + inverse_output=True, + ) self._current_object: str | None = None self._latest_frame: Image | None = None self._frame_lock = threading.Lock() + self._yolo: Any | None = None + self._yolo_device = "cpu" + self._yolo_half = False + self._yolo_model_name = YOLO_MODEL_NAME + self._yolo_imgsz = self._env_int("DIMOS_DRONE_YOLO_IMGSZ", YOLO_DEFAULT_IMGSZ, min_value=192) + self._yolo_max_det = self._env_int("DIMOS_DRONE_YOLO_MAX_DET", YOLO_DEFAULT_MAX_DET, min_value=1) + self._target_distance_m = 1.0 + self._person_lock_bbox: tuple[int, int, int, int] | None = None + self._local_detector_enabled = ( + self._enable_passive_overlay + or self._use_local_person_detector + or self._force_detection_servoing_for_person + ) + if self._local_detector_enabled: + self._init_yolo_person_detector() # Subscribe to video input when transport is set # (will be done by connection module) @@ -105,7 +196,125 @@ def _on_new_frame(self, frame: Image) -> None: def _on_follow_object_cmd(self, cmd: String) -> None: msg = json.loads(cmd.data) - self.track_object(msg["object_description"], msg["duration"]) + self.track_object( + object_name=msg.get("object_description"), + duration=float(msg.get("duration", 120.0)), + distance_m=float(msg.get("distance_m", 1.0)), + control_mode=str(msg.get("control_mode", "full")), + ) + + def _create_tracker(self) -> Any | None: + """Create the best available OpenCV tracker for this environment.""" + candidates: list[tuple[Any, str]] = [] + + if hasattr(cv2, "legacy"): + candidates.extend( + [ + (getattr(cv2.legacy, "TrackerCSRT_create", None), "legacy.CSRT"), + (getattr(cv2.legacy, "TrackerKCF_create", None), "legacy.KCF"), + (getattr(cv2.legacy, "TrackerMIL_create", None), "legacy.MIL"), + ] + ) + + candidates.extend( + [ + (getattr(cv2, "TrackerCSRT_create", None), "CSRT"), + (getattr(cv2, "TrackerKCF_create", None), "KCF"), + (getattr(cv2, "TrackerMIL_create", None), "MIL"), + ] + ) + + for factory, name in candidates: + if callable(factory): + try: + tracker = factory() + logger.info(f"Using OpenCV tracker backend: {name}") + return tracker + except Exception as e: + logger.warning(f"Tracker backend {name} unavailable: {e}") + + logger.error("No supported OpenCV tracker backend available (CSRT/KCF/MIL)") + return None + + def _bbox_to_tracker_rect( + self, + frame: np.ndarray[Any, np.dtype[Any]], + bbox: tuple[int, int, int, int], + ) -> tuple[int, int, int, int] | None: + """Convert x1,y1,x2,y2 bbox into a safe x,y,w,h tracker rectangle.""" + frame_h, frame_w = frame.shape[:2] + x1, y1, x2, y2 = bbox + + x1 = max(0, min(int(x1), frame_w - 2)) + y1 = max(0, min(int(y1), frame_h - 2)) + x2 = max(x1 + 2, min(int(x2), frame_w - 1)) + y2 = max(y1 + 2, min(int(y2), frame_h - 1)) + + w = x2 - x1 + h = y2 - y1 + if w < 2 or h < 2: + return None + + # Expand box slightly to make initialization less brittle. + scale = 1.15 + cx = x1 + (w / 2.0) + cy = y1 + (h / 2.0) + w2 = max(8, round(w * scale)) + h2 = max(8, round(h * scale)) + + tx = max(0, round(cx - w2 / 2.0)) + ty = max(0, round(cy - h2 / 2.0)) + tw = min(w2, frame_w - tx) + th = min(h2, frame_h - ty) + + if tw < 8 or th < 8: + return None + + return (tx, ty, tw, th) + + def _prepare_tracker_frame( + self, + frame: np.ndarray[Any, np.dtype[Any]], + ) -> np.ndarray[Any, np.dtype[Any]]: + """Normalize frame layout/type for OpenCV tracker APIs.""" + prepared = frame + if prepared.dtype != np.uint8: + prepared = prepared.astype(np.uint8, copy=False) + return np.ascontiguousarray(prepared) + + def _init_tracker( + self, + tracker: Any, + frame: np.ndarray[Any, np.dtype[Any]], + tracker_rect: tuple[int, int, int, int], + ) -> tuple[bool, str]: + """Initialize tracker with conservative argument fallbacks.""" + init_frame = self._prepare_tracker_frame(frame) + rect_float = ( + float(tracker_rect[0]), + float(tracker_rect[1]), + float(tracker_rect[2]), + float(tracker_rect[3]), + ) + rect_variants: list[tuple[float, float, float, float] | tuple[int, int, int, int]] = [ + rect_float, + tracker_rect, + ] + + last_error: Exception | None = None + for rect in rect_variants: + try: + init_result = tracker.init(init_frame, rect) + init_failed = isinstance(init_result, bool) and not init_result + if init_failed: + continue + return True, "ok" + except Exception as exc: + last_error = exc + + if last_error is not None: + return False, f"{last_error!s}" + return False, "tracker init returned false" def _get_latest_frame(self) -> np.ndarray[Any, np.dtype[Any]] | None: """Get the latest video frame as numpy array.""" @@ -128,20 +337,47 @@ def start(self) -> None: if self.follow_object_cmd.transport: self.follow_object_cmd.subscribe(self._on_follow_object_cmd) + if self._enable_passive_overlay: + self._start_passive_overlay_loop() return @rpc def stop(self) -> None: + self._stop_passive_overlay_loop() self._stop_tracking() super().stop() + def _start_passive_overlay_loop(self) -> None: + if self._passive_overlay_active: + return + self._passive_overlay_active = True + self._passive_overlay_thread = threading.Thread( + target=self._passive_detection_overlay_loop, + daemon=True, + ) + self._passive_overlay_thread.start() + + def _stop_passive_overlay_loop(self) -> None: + self._passive_overlay_active = False + if self._passive_overlay_thread and self._passive_overlay_thread.is_alive(): + self._passive_overlay_thread.join(timeout=1.0) + self._passive_overlay_thread = None + @rpc - def track_object(self, object_name: str | None = None, duration: float = 120.0) -> str: + def track_object( + self, + object_name: str | None = None, + duration: float = 120.0, + distance_m: float = 1.0, + control_mode: str = "full", + ) -> str: """Track and follow an object using visual servoing. Args: object_name: Name of object to track, or None for most prominent duration: Maximum tracking duration in seconds + distance_m: Reserved follow-distance hint (person follow currently uses fixed approach speed) + control_mode: "full" (translate/strafe) or "yaw_only" (rotate in place) Returns: String status message @@ -157,33 +393,145 @@ def track_object(self, object_name: str | None = None, duration: float = 120.0) logger.info(f"Starting track_object for {object_name or 'any object'}") try: - # Detect object with Qwen - logger.info("Detecting object with Qwen...") - bbox = get_bbox_from_qwen_frame(frame, object_name) + normalized_mode = control_mode.strip().lower() + if normalized_mode not in {"full", "yaw_only"}: + normalized_mode = "full" + self._control_mode = normalized_mode + self._reset_yaw_controller() + self._target_distance_m = max(FOLLOW_TARGET_MIN_M, min(FOLLOW_TARGET_MAX_M, distance_m)) + self._person_lock_bbox = None + + bbox = self._detect_initial_bbox(frame, object_name) if bbox is None: msg = f"No object detected{' for: ' + object_name if object_name else ''}" logger.warning(msg) + if self.tracking_overlay.transport: + overlay = self._draw_search_overlay( + frame, f"Searching for {object_name or 'object'} (not found)" + ) + self.tracking_overlay.publish(Image.from_numpy(overlay, format=ImageFormat.BGR)) self._publish_status({"status": "not_found", "object": self._current_object}) return msg logger.info(f"Object detected at bbox: {bbox}") - # Initialize CSRT tracker (use legacy for OpenCV 4) - try: - tracker = cv2.legacy.TrackerCSRT_create() # type: ignore[attr-defined] - except AttributeError: - tracker = cv2.TrackerCSRT_create() # type: ignore[attr-defined] + if ( + object_name is None or object_name.lower().strip() in PERSON_ALIASES + ) and self._local_detector_enabled: + confirmed_bbox = self._confirm_person_detection(bbox) + if confirmed_bbox is None: + self._publish_status( + { + "status": "not_found", + "object": object_name or "person", + "error": "person confirmation failed", + } + ) + return "Detected candidate was not a stable person target" + bbox = confirmed_bbox + + person_target = object_name is None or object_name.lower().strip() in PERSON_ALIASES + detector_person_mode = person_target and ( + self._control_mode == "yaw_only" or self._force_detection_servoing_for_person + ) + + if detector_person_mode: + self._current_object = object_name or "person" + self._tracking_active = True + self._publish_status( + { + "status": "tracking", + "object": self._current_object, + "mode": self._control_mode, + "backend": "yolo_detection", + } + ) + self._tracking_thread = threading.Thread( + target=self._detection_servoing_loop, + args=(object_name, duration, self._target_distance_m), + daemon=True, + ) + self._tracking_thread.start() + if self._control_mode == "yaw_only": + return f"Yaw-only tracking started for {self._current_object}" + return f"Tracking started for {self._current_object} (YOLO detection mode)" + + if self._control_mode == "yaw_only": + self._current_object = object_name or "object" + self._tracking_active = True + self._publish_status( + { + "status": "tracking", + "object": self._current_object, + "mode": "yaw_only", + } + ) + self._tracking_thread = threading.Thread( + target=self._detection_servoing_loop, + args=(object_name, duration, self._target_distance_m), + daemon=True, + ) + self._tracking_thread.start() + return f"Yaw-only tracking started for {self._current_object}" - # Convert bbox format from [x1, y1, x2, y2] to [x, y, w, h] - x1, y1, x2, y2 = bbox - x, y, w, h = x1, y1, x2 - x1, y2 - y1 + tracker = self._create_tracker() + if tracker is None: + self._publish_status( + { + "status": "failed", + "object": self._current_object, + "error": "No supported OpenCV tracker backend", + } + ) + return "Failed: no supported OpenCV tracker backend (need at least MIL/KCF/CSRT)" + + tracker_rect = self._bbox_to_tracker_rect(frame, bbox) + if tracker_rect is None: + self._publish_status( + { + "status": "failed", + "object": self._current_object, + "error": "Invalid detection bbox for tracker init", + } + ) + return "Failed: invalid detection bbox" # Initialize tracker - success = tracker.init(frame, (x, y, w, h)) - if not success: - self._publish_status({"status": "failed", "object": self._current_object}) - return "Failed to initialize tracker" + init_ok, init_reason = self._init_tracker(tracker, frame, tracker_rect) + if not init_ok: + if not self._local_detector_enabled: + self._publish_status( + { + "status": "failed", + "object": self._current_object, + "error": f"Tracker init failed: {init_reason}", + } + ) + return f"Failed to initialize tracker: {init_reason}" + # Fall back to detector-only loop so follow behavior still works + # in OpenCV builds where tracker init is unreliable. + logger.warning(f"Tracker init failed ({init_reason}); using detection fallback loop") + self._current_object = object_name or "object" + self._tracking_active = True + self._publish_status( + { + "status": "tracking", + "object": self._current_object, + "mode": "detection_fallback", + "reason": init_reason, + } + ) + self._tracking_thread = threading.Thread( + target=self._detection_servoing_loop, + args=(object_name, duration, self._target_distance_m), + daemon=True, + ) + self._tracking_thread.start() + return ( + f"Tracking started for {self._current_object} " + "(detection fallback mode)" + ) self._current_object = object_name or "object" self._tracking_active = True @@ -201,6 +549,536 @@ def track_object(self, object_name: str | None = None, duration: float = 120.0) self._stop_tracking() return f"Tracking failed: {e!s}" + def _detect_initial_bbox( + self, + frame: np.ndarray[Any, np.dtype[Any]], + object_name: str | None, + ) -> tuple[int, int, int, int] | None: + """Detect initial bbox using Qwen when available, with local fallback for person.""" + qwen_available = bool(os.getenv("ALIBABA_API_KEY")) + if qwen_available: + try: + logger.info("Detecting object with Qwen...") + # Qwen helper expects RGB input. + rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + bbox = get_bbox_from_qwen_frame(rgb_frame, object_name) + if bbox is not None: + return (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])) + except Exception as e: + logger.warning(f"Qwen detection failed: {e}") + else: + if not self._use_local_person_detector: + logger.warning( + "ALIBABA_API_KEY is not set and local person detector fallback is disabled" + ) + return None + logger.warning("ALIBABA_API_KEY is not set; using local YOLO detector") + + if object_name is not None and object_name.lower().strip() not in PERSON_ALIASES: + return None + + return self._detect_person_local(frame) + + def _bbox_iou( + self, + a: tuple[int, int, int, int], + b: tuple[int, int, int, int], + ) -> float: + ax1, ay1, ax2, ay2 = a + bx1, by1, bx2, by2 = b + + inter_x1 = max(ax1, bx1) + inter_y1 = max(ay1, by1) + inter_x2 = min(ax2, bx2) + inter_y2 = min(ay2, by2) + + inter_w = max(0, inter_x2 - inter_x1) + inter_h = max(0, inter_y2 - inter_y1) + inter_area = inter_w * inter_h + + area_a = max(1, (ax2 - ax1) * (ay2 - ay1)) + area_b = max(1, (bx2 - bx1) * (by2 - by1)) + union = area_a + area_b - inter_area + if union <= 0: + return 0.0 + return inter_area / union + + def _is_plausible_person_bbox( + self, + frame: np.ndarray[Any, np.dtype[Any]], + bbox: tuple[int, int, int, int], + score: float, + ) -> bool: + frame_h, frame_w = frame.shape[:2] + x1, y1, x2, y2 = bbox + w = max(1, x2 - x1) + h = max(1, y2 - y1) + + aspect = h / float(w) + area_ratio = (w * h) / float(frame_w * frame_h) + height_ratio = h / float(frame_h) + + if score < YOLO_PERSON_CONF_MIN: + return False + if aspect < 1.15 or aspect > 4.8: + return False + if height_ratio < 0.08 or height_ratio > 0.98: + return False + if area_ratio < 0.003 or area_ratio > 0.55: + return False + + # Tiny edge-hugging boxes are usually false positives. + edge_margin = 2 + touches_edge = ( + x1 <= edge_margin + or y1 <= edge_margin + or x2 >= frame_w - edge_margin + or y2 >= frame_h - edge_margin + ) + if touches_edge and area_ratio < 0.02: + return False + + return True + + def _init_yolo_person_detector(self) -> None: + """Initialize local YOLO pose model for person detection.""" + try: + from ultralytics import YOLO # type: ignore[attr-defined, import-not-found] + + device, cuda_ok = self._select_yolo_device() + self._yolo_device = device + self._yolo_half = cuda_ok + configured_model = os.getenv("DIMOS_DRONE_YOLO_MODEL") + self._yolo_model_name = ( + configured_model if configured_model else (YOLO_MODEL_NAME if cuda_ok else YOLO_MODEL_NAME_CPU) + ) + + model_path = get_data("models_yolo") / self._yolo_model_name + self._yolo = YOLO(model_path, task="pose") + warmup_ok = self._warmup_yolo() + if not warmup_ok and self._yolo_device.startswith("cuda"): + logger.warning("YOLO CUDA warmup failed; falling back to CPU") + self._yolo_device = "cpu" + self._yolo_half = False + if configured_model is None and self._yolo_model_name != YOLO_MODEL_NAME_CPU: + self._yolo_model_name = YOLO_MODEL_NAME_CPU + cpu_model_path = get_data("models_yolo") / self._yolo_model_name + self._yolo = YOLO(cpu_model_path, task="pose") + _ = self._warmup_yolo() + logger.info( + "Initialized YOLO person detector " + f"model={self._yolo_model_name} device={self._yolo_device} " + f"half={self._yolo_half} imgsz={self._yolo_imgsz} max_det={self._yolo_max_det}" + ) + except Exception as e: + self._yolo = None + logger.warning(f"YOLO person detector unavailable: {e}") + + def _env_int(self, name: str, default: int, min_value: int = 1) -> int: + raw = os.getenv(name) + if raw is None: + return default + try: + return max(min_value, int(raw)) + except ValueError: + logger.warning(f"Invalid {name}={raw!r}; using default {default}") + return default + + def _select_yolo_device(self) -> tuple[str, bool]: + override = os.getenv("DIMOS_DRONE_YOLO_DEVICE") + if override: + normalized = override.strip().lower() + if normalized == "cpu": + return "cpu", False + if normalized.startswith("cuda"): + return normalized, True + + try: + import torch + + if torch.cuda.is_available(): + # Force lazy init early so we can fail fast and avoid inference-time stalls. + _ = torch.cuda.get_device_name(0) + return "cuda:0", True + + if torch.cuda.device_count() > 0: + logger.warning( + "Torch reports CUDA devices but initialization failed; using CPU. " + "Run `uv run python -c \"import torch; print(torch.cuda.is_available()); " + "print(torch.cuda.get_device_name(0))\"` to diagnose." + ) + except Exception as e: + logger.warning(f"Torch CUDA check failed; using CPU: {e}") + + return "cpu", False + + def _warmup_yolo(self) -> bool: + if self._yolo is None: + return False + try: + warmup = np.zeros((self._yolo_imgsz, self._yolo_imgsz, 3), dtype=np.uint8) + _ = self._yolo.predict( + source=warmup, + conf=YOLO_PERSON_CONF_MIN, + iou=0.5, + classes=[0], + imgsz=self._yolo_imgsz, + max_det=1, + verbose=False, + device=self._yolo_device, + half=self._yolo_half, + ) + return True + except Exception as e: + logger.warning(f"YOLO warmup failed: {e}") + return False + + def _detect_person_yolo_candidates( + self, + frame: np.ndarray[Any, np.dtype[Any]], + ) -> list[tuple[tuple[int, int, int, int], float, int]]: + """Detect person candidates using local YOLO pose model.""" + if self._yolo is None: + return [] + + try: + results = self._yolo.predict( + source=frame, + conf=YOLO_PERSON_CONF_MIN, + iou=0.5, + classes=[0], # person class + imgsz=self._yolo_imgsz, + max_det=self._yolo_max_det, + verbose=False, + device=self._yolo_device, + half=self._yolo_half, + ) + except Exception as e: + logger.warning(f"YOLO detection inference failed: {e}") + return [] + + candidates: list[tuple[tuple[int, int, int, int], float, int]] = [] + frame_h, frame_w = frame.shape[:2] + + for result in results: + boxes = getattr(result, "boxes", None) + if boxes is None or boxes.xyxy is None: + continue + + kp_conf = None + keypoints = getattr(result, "keypoints", None) + if keypoints is not None and getattr(keypoints, "conf", None) is not None: + kp_conf = keypoints.conf + + num_dets = len(boxes.xyxy) + for i in range(num_dets): + try: + class_id = int(boxes.cls[i].item()) + if class_id != 0: + continue + conf = float(boxes.conf[i].item()) + xyxy = boxes.xyxy[i].cpu().numpy() + bbox = (int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3])) + except Exception: + continue + + if not self._is_plausible_person_bbox(frame, bbox, conf): + continue + + visible_keypoints = 0 + if kp_conf is not None: + try: + kp = kp_conf[i].cpu().numpy() + visible_keypoints = int((kp > 0.35).sum()) + except Exception: + visible_keypoints = 0 + + if visible_keypoints < YOLO_PERSON_MIN_KEYPOINTS: + continue + + cx = (bbox[0] + bbox[2]) / 2.0 + cy = (bbox[1] + bbox[3]) / 2.0 + nx = abs((cx / frame_w) - 0.5) + ny = abs((cy / frame_h) - 0.55) + center_bonus = max(0.0, 1.0 - (nx + ny)) + + score = conf + (0.2 * center_bonus) + (0.01 * visible_keypoints) + candidates.append((bbox, score, visible_keypoints)) + + return candidates + + def _detect_person_yolo( + self, + frame: np.ndarray[Any, np.dtype[Any]], + log_miss: bool = True, + log_hit: bool = True, + anchor_bbox: tuple[int, int, int, int] | None = None, + ) -> tuple[int, int, int, int] | None: + """Detect best person using local YOLO pose model.""" + candidates = self._detect_person_yolo_candidates(frame) + if not candidates: + if log_miss: + logger.info("YOLO detector found no person") + return None + + best_bbox, best_score, best_kp = self._select_person_candidate(candidates, anchor_bbox) + if log_hit: + logger.info( + f"YOLO person detection bbox={best_bbox} score={best_score:.3f} keypoints={best_kp}" + ) + return best_bbox + + def _detect_person_local( + self, + frame: np.ndarray[Any, np.dtype[Any]], + log_miss: bool = True, + log_hit: bool = True, + anchor_bbox: tuple[int, int, int, int] | None = None, + ) -> tuple[int, int, int, int] | None: + """Local detector path: YOLO person pose only.""" + return self._detect_person_yolo( + frame, + log_miss=log_miss, + log_hit=log_hit, + anchor_bbox=anchor_bbox, + ) + + def _select_person_candidate( + self, + candidates: list[tuple[tuple[int, int, int, int], float, int]], + anchor_bbox: tuple[int, int, int, int] | None, + ) -> tuple[tuple[int, int, int, int], float, int]: + if anchor_bbox is None: + return max(candidates, key=lambda x: x[1]) + + def _rank(item: tuple[tuple[int, int, int, int], float, int]) -> float: + bbox, score, _ = item + iou_bonus = 0.35 * self._bbox_iou(anchor_bbox, bbox) + return score + iou_bonus + + return max(candidates, key=_rank) + + def _confirm_person_detection( + self, + seed_bbox: tuple[int, int, int, int], + timeout_s: float = 2.5, + ) -> tuple[int, int, int, int] | None: + """Require consistent person detections before enabling follow.""" + hits = 1 + misses = 0 + max_misses = 3 + last_bbox = seed_bbox + deadline = time.time() + timeout_s + + while time.time() < deadline and hits < PERSON_CONFIRM_FRAMES: + frame = self._get_latest_frame() + if frame is None: + time.sleep(0.03) + continue + + candidate = self._detect_person_local( + self._prepare_tracker_frame(frame), + log_miss=False, + log_hit=False, + anchor_bbox=last_bbox, + ) + if candidate is None: + misses += 1 + if misses > max_misses: + return None + time.sleep(0.05) + continue + + iou = self._bbox_iou(last_bbox, candidate) + if iou >= PERSON_CONFIRM_IOU: + hits += 1 + last_bbox = candidate + else: + hits = 1 + last_bbox = candidate + time.sleep(0.05) + + if hits >= PERSON_CONFIRM_FRAMES: + return last_bbox + return None + + def _draw_labeled_bbox( + self, + image: NDArray[np.uint8], + bbox: tuple[int, int, int, int], + label: str, + color: tuple[int, int, int] = (0, 255, 0), + ) -> NDArray[np.uint8]: # type: ignore[type-arg] + """Draw a bbox with text label.""" + overlay: NDArray[np.uint8] = image.copy() # type: ignore[type-arg] + x1, y1, x2, y2 = bbox + cv2.rectangle(overlay, (x1, y1), (x2, y2), color, 2) + text_y = max(18, y1 - 8) + cv2.putText( + overlay, + label, + (x1, text_y), + cv2.FONT_HERSHEY_SIMPLEX, + 0.6, + color, + 2, + ) + return overlay + + def _passive_detection_overlay_loop(self) -> None: + """Continuously publish detection overlay even when not actively following.""" + while self._passive_overlay_active: + if self._tracking_active: + time.sleep(PASSIVE_DETECT_INTERVAL_SEC) + continue + + frame = self._get_latest_frame() + if frame is None: + time.sleep(PASSIVE_DETECT_INTERVAL_SEC) + continue + + detect_frame = self._prepare_tracker_frame(frame) + overlay = detect_frame.copy() + + yolo_candidates = self._detect_person_yolo_candidates(detect_frame) + if yolo_candidates: + sorted_candidates = sorted(yolo_candidates, key=lambda x: x[1], reverse=True) + for bbox, score, kps in sorted_candidates[:3]: + overlay = self._draw_labeled_bbox( + overlay, + bbox, + f"person {score:.2f} kp={kps}", + color=(0, 255, 0), + ) + overlay = self._draw_search_overlay( + overlay, + f"Passive detect: {len(yolo_candidates)} person(s) [YOLO]", + ) + else: + overlay = self._draw_search_overlay( + overlay, + "Passive detect: no person", + ) + + if self.tracking_overlay.transport: + self.tracking_overlay.publish(Image.from_numpy(overlay, format=ImageFormat.BGR)) + + time.sleep(PASSIVE_DETECT_INTERVAL_SEC) + + def _compute_yaw_rate(self, current_x: float, center_x: float) -> float: + """Compute yaw-rate with PID control on normalized horizontal image error.""" + now = time.time() + raw_error = (current_x - center_x) / max(center_x, 1.0) + + # If we just crossed centerline, briefly brake yaw to avoid ping-pong from latency. + crossed_center = (self._yaw_prev_raw_error * raw_error) < 0.0 + if crossed_center and max(abs(self._yaw_prev_raw_error), abs(raw_error)) <= YAW_ONLY_CROSS_BRAKE_ERR_RATIO: + self._yaw_cross_brake_until = now + YAW_ONLY_CROSS_BRAKE_SEC + self._yaw_rate_ema = 0.0 + self._yaw_pid.integral = 0.0 + self._yaw_pid.prev_error = 0.0 + self._yaw_lock_active = True + + if now < self._yaw_cross_brake_until: + self._yaw_prev_raw_error = raw_error + return 0.0 + + self._yaw_error_lpf = (YAW_ONLY_ERROR_LPF_ALPHA * self._yaw_error_lpf) + ( + (1.0 - YAW_ONLY_ERROR_LPF_ALPHA) * raw_error + ) + error = self._yaw_error_lpf + + # Hysteresis lock around centerline prevents sign-flip oscillation near zero error. + if self._yaw_lock_active: + if abs(raw_error) <= YAW_ONLY_LOCK_EXIT_RATIO: + self._yaw_rate_ema = 0.0 + self._yaw_pid.integral = 0.0 + self._yaw_pid.prev_error = 0.0 + self._yaw_prev_raw_error = raw_error + return 0.0 + self._yaw_lock_active = False + + target_rate = float(self._yaw_pid.update(error, YAW_ONLY_DT)) # type: ignore[no-untyped-call] + + # Keep enough turning authority once the target is meaningfully off-center. + if abs(raw_error) >= YAW_ONLY_MIN_RATE_ERR_RATIO and abs(target_rate) < YAW_ONLY_MIN_RATE: + sign = 1.0 if error > 0 else -1.0 + target_rate = sign * YAW_ONLY_MIN_RATE + + # Taper max yaw rate as target approaches center to avoid overshoot in large turns. + abs_error = abs(raw_error) + if abs_error < YAW_ONLY_DECEL_ERR_RATIO: + frac = abs_error / max(YAW_ONLY_DECEL_ERR_RATIO, 1e-6) + adaptive_limit = YAW_ONLY_DECEL_MIN_RATE + ( + (YAW_ONLY_MAX_RATE - YAW_ONLY_DECEL_MIN_RATE) * frac + ) + target_rate = max(-adaptive_limit, min(adaptive_limit, target_rate)) + + prev_rate = self._yaw_rate_ema + filtered_rate = (YAW_ONLY_CMD_SMOOTH_ALPHA * prev_rate) + ( + (1.0 - YAW_ONLY_CMD_SMOOTH_ALPHA) * target_rate + ) + + # Limit per-step command delta to reduce oscillation and overshoot near lock. + max_step = YAW_ONLY_RATE_SLEW_PER_SEC * YAW_ONLY_DT + delta = filtered_rate - prev_rate + if delta > max_step: + filtered_rate = prev_rate + max_step + elif delta < -max_step: + filtered_rate = prev_rate - max_step + self._yaw_rate_ema = filtered_rate + + if abs(target_rate) <= YAW_ONLY_ZERO_RATE_EPS and abs(self._yaw_rate_ema) <= YAW_ONLY_ZERO_RATE_EPS: + self._yaw_rate_ema = 0.0 + + if abs(raw_error) <= YAW_ONLY_LOCK_ENTER_RATIO and abs(self._yaw_rate_ema) <= YAW_ONLY_LOCK_CMD_EPS: + self._yaw_lock_active = True + self._yaw_rate_ema = 0.0 + self._yaw_pid.integral = 0.0 + self._yaw_pid.prev_error = 0.0 + self._yaw_prev_raw_error = raw_error + return 0.0 + + self._yaw_prev_raw_error = raw_error + return float(self._yaw_rate_ema) + + def _reset_yaw_controller(self) -> None: + self._yaw_rate_ema = 0.0 + self._yaw_error_lpf = 0.0 + self._yaw_lock_active = False + self._yaw_prev_raw_error = 0.0 + self._yaw_cross_brake_until = 0.0 + self._yaw_pid.integral = 0.0 + self._yaw_pid.prev_error = 0.0 + + def _compute_person_follow_command( + self, + bbox: tuple[int, int, int, int], + frame_width: int, + _frame_height: int, + current_x: float, + ) -> tuple[float, float, float, float]: + yaw_rate = self._compute_yaw_rate(current_x, frame_width / 2.0) + norm_yaw_error = abs((current_x - (frame_width / 2.0)) / max(frame_width / 2.0, 1.0)) + + x1, _y1, x2, _y2 = bbox + vx = FOLLOW_APPROACH_SPEED + + if norm_yaw_error >= FOLLOW_ALIGN_STOP_ERR: + vx = 0.0 + elif norm_yaw_error > FOLLOW_ALIGN_SOFT_ERR: + soft = 1.0 - ( + (norm_yaw_error - FOLLOW_ALIGN_SOFT_ERR) + / max(FOLLOW_ALIGN_STOP_ERR - FOLLOW_ALIGN_SOFT_ERR, 1e-6) + ) + vx *= max(0.25, min(1.0, soft)) + + edge_margin = int(frame_width * FOLLOW_EDGE_MARGIN_RATIO) + if (x1 < edge_margin or x2 > (frame_width - edge_margin)) and vx > 0.0: + vx = 0.0 + + return float(vx), 0.0, 0.0, float(yaw_rate) + def _visual_servoing_loop(self, tracker: Any, duration: float) -> None: """Main visual servoing control loop. @@ -226,9 +1104,15 @@ def _visual_servoing_loop(self, tracker: Any, duration: float) -> None: frame_count += 1 # Update tracker - success, bbox = tracker.update(frame) - - if not success: + update_frame = self._prepare_tracker_frame(frame) + update_result = tracker.update(update_frame) + success = False + bbox: Any = None + if isinstance(update_result, tuple) and len(update_result) == 2: + success = bool(update_result[0]) + bbox = update_result[1] + + if not success or bbox is None: lost_track_count += 1 logger.warning(f"Lost track (count: {lost_track_count})") @@ -252,26 +1136,35 @@ def _visual_servoing_loop(self, tracker: Any, duration: float) -> None: center_x = frame_width / 2 center_y = frame_height / 2 - # Compute velocity commands - vx, vy, vz = self.servoing_controller.compute_velocity_control( - target_x=current_x, - target_y=current_y, - center_x=center_x, - center_y=center_y, - dt=0.033, # ~30Hz - lock_altitude=True, - ) + yaw_rate = 0.0 + if self._control_mode == "yaw_only": + vx, vy, vz = 0.0, 0.0, 0.0 + yaw_rate = self._compute_yaw_rate(current_x, center_x) + else: + # Compute velocity commands + vx, vy, vz = self.servoing_controller.compute_velocity_control( + target_x=current_x, + target_y=current_y, + center_x=center_x, + center_y=center_y, + dt=0.033, # ~30Hz + lock_altitude=True, + ) - # Clamp velocity for indoor safety - if self._max_velocity is not None: - vx = max(-self._max_velocity, min(self._max_velocity, vx)) - vy = max(-self._max_velocity, min(self._max_velocity, vy)) + # Clamp velocity for indoor safety + if self._max_velocity is not None: + vx = max(-self._max_velocity, min(self._max_velocity, vx)) + vy = max(-self._max_velocity, min(self._max_velocity, vy)) + if self._max_forward_velocity is not None: + vx = max(-self._max_forward_velocity, min(self._max_forward_velocity, vx)) + if self._max_lateral_velocity is not None: + vy = max(-self._max_lateral_velocity, min(self._max_lateral_velocity, vy)) # Publish velocity command via LCM if self.cmd_vel.transport: twist = Twist() twist.linear = Vector3(vx, vy, 0) - twist.angular = Vector3(0, 0, 0) # No rotation for now + twist.angular = Vector3(0, 0, yaw_rate) self.cmd_vel.publish(twist) # Publish visualization if transport is set @@ -291,6 +1184,8 @@ def _visual_servoing_loop(self, tracker: Any, duration: float) -> None: "center": [int(current_x), int(current_y)], "error": [int(current_x - center_x), int(current_y - center_y)], "velocity": [float(vx), float(vy), float(vz)], + "yaw_rate": float(yaw_rate), + "mode": self._control_mode, "frame": frame_count, } ) @@ -310,6 +1205,196 @@ def _visual_servoing_loop(self, tracker: Any, duration: float) -> None: self._tracking_active = False logger.info(f"Visual servoing loop ended after {frame_count} frames") + def _detection_servoing_loop( + self, object_name: str | None, duration: float, distance_m: float + ) -> None: + """Detector-driven servoing loop that re-detects the target every frame.""" + start_time = time.time() + frame_count = 0 + lost_track_count = 0 + max_lost_frames = 60 + confirm_hits = 0 + confirmed = False + last_candidate_bbox: tuple[int, int, int, int] | None = None + person_target = object_name is None or object_name.lower().strip() in PERSON_ALIASES + + logger.info("Starting detector-driven servoing loop") + + try: + while self._tracking_active and (time.time() - start_time < duration): + frame = self._get_latest_frame() + if frame is None: + time.sleep(0.02) + continue + + frame_count += 1 + detect_frame = self._prepare_tracker_frame(frame) + + bbox_xyxy: tuple[int, int, int, int] | None + if person_target: + bbox_xyxy = self._detect_person_local( + detect_frame, + log_miss=False, + log_hit=False, + anchor_bbox=self._person_lock_bbox, + ) + else: + bbox_xyxy = self._detect_initial_bbox(detect_frame, object_name) + + if bbox_xyxy is None: + lost_track_count += 1 + if not confirmed: + confirm_hits = max(0, confirm_hits - 1) + if person_target and lost_track_count > 8: + self._person_lock_bbox = None + if lost_track_count % 10 == 0: + logger.warning(f"Detector loop has no target (count: {lost_track_count})") + if self.tracking_overlay.transport and lost_track_count % 5 == 0: + overlay = self._draw_search_overlay( + detect_frame, f"Searching for {object_name or 'object'}" + ) + self.tracking_overlay.publish(Image.from_numpy(overlay, format=ImageFormat.BGR)) + if lost_track_count >= max_lost_frames: + self._publish_status( + {"status": "lost", "object": self._current_object, "frame": frame_count} + ) + break + time.sleep(0.05) + continue + + lost_track_count = 0 + if person_target: + self._person_lock_bbox = bbox_xyxy + + if not confirmed: + if ( + last_candidate_bbox is None + or self._bbox_iou(last_candidate_bbox, bbox_xyxy) >= PERSON_CONFIRM_IOU + ): + confirm_hits += 1 + else: + confirm_hits = 1 + last_candidate_bbox = bbox_xyxy + + if confirm_hits < PERSON_CONFIRM_FRAMES: + if self.cmd_vel.transport: + hold_twist = Twist() + hold_twist.linear = Vector3(0, 0, 0) + hold_twist.angular = Vector3(0, 0, 0) + self.cmd_vel.publish(hold_twist) + if self.tracking_overlay.transport: + cx = int((bbox_xyxy[0] + bbox_xyxy[2]) / 2) + cy = int((bbox_xyxy[1] + bbox_xyxy[3]) / 2) + overlay = self._draw_tracking_overlay( + detect_frame, + ( + int(bbox_xyxy[0]), + int(bbox_xyxy[1]), + int(max(1, bbox_xyxy[2] - bbox_xyxy[0])), + int(max(1, bbox_xyxy[3] - bbox_xyxy[1])), + ), + (cx, cy), + ) + overlay = self._draw_search_overlay( + overlay, + f"Candidate person {confirm_hits}/{PERSON_CONFIRM_FRAMES}", + ) + self.tracking_overlay.publish(Image.from_numpy(overlay, format=ImageFormat.BGR)) + time.sleep(0.05) + continue + + confirmed = True + logger.info("Detector loop person target confirmed") + + x1, y1, x2, y2 = bbox_xyxy + x = int(x1) + y = int(y1) + w = int(max(1, x2 - x1)) + h = int(max(1, y2 - y1)) + current_x = x + (w / 2.0) + current_y = y + (h / 2.0) + + frame_height, frame_width = detect_frame.shape[:2] + center_x = frame_width / 2 + center_y = frame_height / 2 + + yaw_rate = 0.0 + bbox_height_ratio = h / float(max(frame_height, 1)) + if self._control_mode == "yaw_only": + vx, vy, vz = 0.0, 0.0, 0.0 + yaw_rate = self._compute_yaw_rate(current_x, center_x) + elif person_target and self._person_follow_policy == "yaw_forward_constant": + self._target_distance_m = max( + FOLLOW_TARGET_MIN_M, min(FOLLOW_TARGET_MAX_M, distance_m) + ) + vx, vy, vz, yaw_rate = self._compute_person_follow_command( + (x, y, x + w, y + h), + frame_width, + frame_height, + current_x, + ) + else: + vx, vy, vz = self.servoing_controller.compute_velocity_control( + target_x=current_x, + target_y=current_y, + center_x=center_x, + center_y=center_y, + dt=0.05, + lock_altitude=True, + ) + yaw_rate = 0.0 + + if self._max_velocity is not None: + vx = max(-self._max_velocity, min(self._max_velocity, vx)) + vy = max(-self._max_velocity, min(self._max_velocity, vy)) + if self._max_forward_velocity is not None: + vx = max(-self._max_forward_velocity, min(self._max_forward_velocity, vx)) + if self._max_lateral_velocity is not None: + vy = max(-self._max_lateral_velocity, min(self._max_lateral_velocity, vy)) + + if self.cmd_vel.transport: + twist = Twist() + twist.linear = Vector3(vx, vy, 0) + twist.angular = Vector3(0, 0, yaw_rate) + self.cmd_vel.publish(twist) + + if self.tracking_overlay.transport: + overlay = self._draw_tracking_overlay( + detect_frame, (x, y, w, h), (int(current_x), int(current_y)) + ) + overlay_msg = Image.from_numpy(overlay, format=ImageFormat.BGR) + self.tracking_overlay.publish(overlay_msg) + + self._publish_status( + { + "status": "tracking", + "object": self._current_object, + "backend": "yolo_detection", + "bbox": [x, y, w, h], + "center": [int(current_x), int(current_y)], + "error": [int(current_x - center_x), int(current_y - center_y)], + "velocity": [float(vx), float(vy), float(vz)], + "yaw_rate": float(yaw_rate), + "mode": self._control_mode, + "distance_target_m": float(self._target_distance_m), + "bbox_height_ratio": float(bbox_height_ratio), + "frame": frame_count, + } + ) + + time.sleep(0.05) + + except Exception as e: + logger.error(f"Error in detection fallback loop: {e}") + finally: + if self.cmd_vel.transport: + stop_twist = Twist() + stop_twist.linear = Vector3(0, 0, 0) + stop_twist.angular = Vector3(0, 0, 0) + self.cmd_vel.publish(stop_twist) + self._tracking_active = False + logger.info(f"Detector loop ended after {frame_count} frames") + def _draw_tracking_overlay( self, frame: NDArray[np.uint8], @@ -357,6 +1442,27 @@ def _draw_tracking_overlay( return overlay + def _draw_search_overlay( + self, + frame: NDArray[np.uint8], + text: str, + ) -> NDArray[np.uint8]: # type: ignore[type-arg] + """Draw lightweight debug overlay while target is not detected.""" + overlay: NDArray[np.uint8] = frame.copy() # type: ignore[type-arg] + frame_h, frame_w = overlay.shape[:2] + center = (frame_w // 2, frame_h // 2) + cv2.drawMarker(overlay, center, (255, 255, 0), cv2.MARKER_CROSS, 30, 2) + cv2.putText( + overlay, + text, + (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, + 0.8, + (0, 200, 255), + 2, + ) + return overlay + def _publish_status(self, status: dict[str, Any]) -> None: """Publish tracking status as JSON. @@ -383,6 +1489,10 @@ def _stop_tracking(self) -> None: self._publish_status({"status": "stopped", "object": self._current_object}) self._current_object = None + self._control_mode = "full" + self._person_lock_bbox = None + self._target_distance_m = 1.0 + self._reset_yaw_controller() logger.info("Tracking stopped") @rpc diff --git a/dimos/robot/drone/tello_connection_module.py b/dimos/robot/drone/tello_connection_module.py new file mode 100644 index 0000000000..9cf10e9fe5 --- /dev/null +++ b/dimos/robot/drone/tello_connection_module.py @@ -0,0 +1,557 @@ +#!/usr/bin/env python3 +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""DimOS module wrapper for RoboMaster TT / Tello SDK control.""" + +from __future__ import annotations + +import json +import math +import threading +import time +from typing import Any + +from dimos_lcm.std_msgs import String +from reactivex.disposable import CompositeDisposable, Disposable + +from dimos.agents.annotation import skill +from dimos.core.core import rpc +from dimos.core.module import Module +from dimos.core.stream import In, Out +from dimos.msgs.geometry_msgs import PoseStamped, Quaternion, Transform, Twist, Vector3 +from dimos.msgs.sensor_msgs import Image +from dimos.robot.drone.tello_sdk import TelloSdkClient +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + + +def _add_disposable(composite: CompositeDisposable, item: Disposable | Any) -> None: + if isinstance(item, Disposable): + composite.add(item) + elif callable(item): + composite.add(Disposable(item)) + + +def _to_float(value: Any, default: float = 0.0) -> float: + if value is None: + return default + if isinstance(value, (float, int)): + return float(value) + try: + return float(value) + except (TypeError, ValueError): + return default + + +class TelloConnectionModule(Module): + """Module that bridges Tello SDK to DimOS streams and skills.""" + + # Inputs + movecmd: In[Vector3] + movecmd_twist: In[Twist] + extcmd: In[String] + tracking_status: In[Any] + + # Outputs + odom: Out[PoseStamped] + status: Out[Any] + telemetry: Out[Any] + video: Out[Image] + follow_object_cmd: Out[Any] + + # Parameters + tello_ip: str + local_ip: str + local_command_port: int + state_port: int + video_port: int + command_timeout: float + video_uri: str | None + + # Internal state + _latest_odom: PoseStamped | None = None + _latest_video_frame: Image | None = None + _latest_status: dict[str, Any] | None = None + _latest_telemetry: dict[str, Any] | None = None + _latest_tracking_status: dict[str, Any] | None = None + _state_lock: threading.RLock + + def __init__( + self, + tello_ip: str = "192.168.10.1", + local_ip: str = "", + local_command_port: int = 9000, + state_port: int = 8890, + video_port: int = 11111, + command_timeout: float = 7.0, + video_uri: str | None = None, + *args: Any, + **kwargs: Any, + ) -> None: + """Initialize Tello module. + + Args: + tello_ip: Drone IP address. + local_ip: Local bind host. Empty binds all interfaces. + local_command_port: Local UDP port for command responses. + state_port: Local UDP port receiving state packets. + video_port: Local UDP port receiving H264 video. + command_timeout: Timeout for SDK commands. + video_uri: Optional OpenCV URI override for the video stream. + """ + self.tello_ip = tello_ip + self.local_ip = local_ip + self.local_command_port = local_command_port + self.state_port = state_port + self.video_port = video_port + self.command_timeout = command_timeout + self.video_uri = video_uri + + self.connection: TelloSdkClient | None = None + self._latest_odom = None + self._latest_video_frame = None + self._latest_status = None + self._latest_telemetry = None + self._latest_tracking_status = None + self._state_lock = threading.RLock() + self._manual_override_until = 0.0 + Module.__init__(self, *args, **kwargs) + + @rpc + def start(self) -> None: + """Start the module and subscribe to Tello streams.""" + self.connection = TelloSdkClient( + tello_ip=self.tello_ip, + local_ip=self.local_ip, + local_command_port=self.local_command_port, + state_port=self.state_port, + video_port=self.video_port, + command_timeout=self.command_timeout, + video_uri=self.video_uri, + ) + + if not self.connection.connect(): + logger.error("Failed to connect to Tello TT") + return + + _add_disposable(self._disposables, self.connection.odom_stream().subscribe(self._publish_tf)) + _add_disposable( + self._disposables, self.connection.status_stream().subscribe(self._publish_status) + ) + _add_disposable( + self._disposables, self.connection.telemetry_stream().subscribe(self._publish_telemetry) + ) + _add_disposable( + self._disposables, self.connection.video_stream().subscribe(self._store_and_publish_frame) + ) + + _add_disposable(self._disposables, self.movecmd.subscribe(self._on_move)) + + if self.movecmd_twist.transport: + _add_disposable(self._disposables, self.movecmd_twist.subscribe(self._on_move_twist)) + if self.extcmd.transport: + _add_disposable(self._disposables, self.extcmd.subscribe(self._on_ext_cmd)) + if self.tracking_status.transport: + _add_disposable(self._disposables, self.tracking_status.subscribe(self._on_tracking_status)) + + logger.info("TelloConnectionModule started") + return + + def _store_and_publish_frame(self, frame: Image) -> None: + self._latest_video_frame = frame + self.video.publish(frame) + + def _publish_tf(self, msg: PoseStamped) -> None: + self._latest_odom = msg + self.odom.publish(msg) + + base_link = Transform( + translation=msg.position, + rotation=msg.orientation, + frame_id="world", + child_frame_id="base_link", + ts=msg.ts if hasattr(msg, "ts") else time.time(), + ) + self.tf.publish(base_link) + + camera_link = Transform( + translation=Vector3(0.05, 0.0, -0.02), + rotation=Quaternion(0.0, 0.0, 0.0, 1.0), + frame_id="base_link", + child_frame_id="camera_link", + ts=time.time(), + ) + self.tf.publish(camera_link) + + def _publish_status(self, status: dict[str, Any]) -> None: + with self._state_lock: + self._latest_status = dict(status) + self.status.publish(String(json.dumps(status))) + + def _publish_telemetry(self, telemetry: dict[str, Any]) -> None: + with self._state_lock: + self._latest_telemetry = dict(telemetry) + self.telemetry.publish(String(json.dumps(telemetry))) + + def _on_move(self, cmd: Vector3) -> None: + if self.connection: + self.connection.move(cmd, duration=0.0) + + def _on_move_twist(self, msg: Twist) -> None: + if time.time() < self._manual_override_until: + return + if self.connection: + self.connection.move_twist(msg, duration=0.0, lock_altitude=True) + + def _on_ext_cmd(self, msg: String) -> None: + if self.connection: + self.connection.send_ext(msg.data) + + def _on_tracking_status(self, status: Any) -> None: + data: dict[str, Any] | None = None + if isinstance(status, String): + try: + data = json.loads(status.data) + except json.JSONDecodeError: + data = None + elif isinstance(status, dict): + data = status + + if data is not None: + with self._state_lock: + self._latest_tracking_status = data + + def _wait_for_tracking_status( + self, timeout: float + ) -> tuple[str | None, dict[str, Any] | None]: + deadline = time.time() + timeout + while time.time() < deadline: + with self._state_lock: + status = dict(self._latest_tracking_status or {}) or None + if status is not None: + value = status.get("status") + if isinstance(value, str) and value in {"tracking", "not_found", "failed", "lost"}: + return value, status + time.sleep(0.05) + with self._state_lock: + status = dict(self._latest_tracking_status or {}) or None + return None, status + + def _wait_until_airborne(self, min_height_cm: float = 20.0, timeout: float = 12.0) -> bool: + if not self.connection: + return False + deadline = time.time() + timeout + while time.time() < deadline: + state = self.connection.get_state() + if _to_float(state.get("h")) >= min_height_cm: + return True + time.sleep(0.1) + state = self.connection.get_state() + return _to_float(state.get("h")) >= min_height_cm + + @rpc + def get_odom(self) -> PoseStamped | None: + """Get latest odometry estimate.""" + return self._latest_odom + + @rpc + def get_status(self) -> dict[str, Any]: + """Get latest compact status dict.""" + with self._state_lock: + return dict(self._latest_status or {}) + + @rpc + def get_state(self) -> dict[str, Any]: + """Get latest raw Tello state packet as dict.""" + with self._state_lock: + return dict(self._latest_telemetry or {}) + + @skill + def move(self, x: float = 0.0, y: float = 0.0, z: float = 0.0, duration: float = 0.0) -> str: + """Move with body-frame velocity. + + Args: + x: Forward velocity in m/s. + y: Left velocity in m/s. + z: Up velocity in m/s. + duration: Optional duration in seconds. 0 means single update. + """ + if not self.connection: + return "Failed: no Tello connection" + ok = self.connection.move(Vector3(x, y, z), duration=duration) + return "Move command sent" if ok else "Failed: move command rejected" + + @skill + def rc( + self, + left_right: int = 0, + forward_back: int = 0, + up_down: int = 0, + yaw: int = 0, + duration: float = 0.0, + ) -> str: + """Send low-level rc command in SDK units. + + Args: + left_right: Left/right channel in [-100, 100]. Positive is right. + forward_back: Forward/back channel in [-100, 100]. Positive is forward. + up_down: Vertical channel in [-100, 100]. Positive is up. + yaw: Yaw channel in [-100, 100]. Positive is clockwise. + duration: Optional duration before auto-stop. + """ + if not self.connection: + return "Failed: no Tello connection" + ok = self.connection.rc(left_right, forward_back, up_down, yaw) + if duration > 0: + time.sleep(duration) + self.connection.rc(0, 0, 0, 0) + return "RC command sent" if ok else "Failed: rc command rejected" + + @skill + def move_relative(self, x: float = 0.0, y: float = 0.0, z: float = 0.0, speed: float = 0.4) -> str: + """Move by a relative offset using the SDK go command. + + Args: + x: Forward distance in meters. + y: Left distance in meters. + z: Up distance in meters. + speed: Translation speed in m/s. + """ + if not self.connection: + return "Failed: no Tello connection" + ok = self.connection.move_relative(x=x, y=y, z=z, speed=speed) + return "Relative move command sent" if ok else "Failed: move_relative rejected" + + @skill + def yaw(self, degrees: float) -> str: + """Rotate in place. + + Args: + degrees: Positive rotates counter-clockwise, negative clockwise. + """ + if not self.connection: + return "Failed: no Tello connection" + ok = self.connection.yaw(degrees) + return "Yaw command sent" if ok else "Failed: yaw command rejected" + + @skill + def takeoff(self, altitude: float = 1.0) -> str: + """Take off and hover. + + Args: + altitude: Desired altitude hint in meters. + """ + if not self.connection: + return "Failed: no Tello connection" + ok = self.connection.takeoff() + if not ok: + return "Failed: takeoff rejected" + if altitude > 1.0: + return "Takeoff command sent (TT hovers at fixed initial altitude; use move_relative z for more)" + return "Takeoff command sent" + + @skill + def land(self) -> str: + """Land immediately.""" + if not self.connection: + return "Failed: no Tello connection" + ok = self.connection.land() + return "Land command sent" if ok else "Failed: land rejected" + + @skill + def emergency_stop(self) -> str: + """Emergency stop motors immediately.""" + if not self.connection: + return "Failed: no Tello connection" + ok = self.connection.emergency() + return "Emergency command sent" if ok else "Failed: emergency rejected" + + @skill + def send_ext(self, ext_command: str) -> str: + """Send a TT expansion-board EXT command. + + Args: + ext_command: Raw EXT payload, with or without the EXT prefix. + """ + if not self.connection: + return "Failed: no Tello connection" + ok = self.connection.send_ext(ext_command) + return "EXT command sent" if ok else "Failed: EXT command rejected" + + @skill + def follow_object( + self, + object_description: str = "person", + distance_m: float = 1.0, + duration: float = 120.0, + scan_step_deg: float = 30.0, + max_scan_steps: int = 12, + control_mode: str = "full", + ) -> str: + """Scan for and follow an object using visual tracking. + + Args: + object_description: Object to search for (for example "person"). + distance_m: Desired following distance in meters (currently a hint only). + duration: Maximum follow duration in seconds once acquired. Also used as search budget. + scan_step_deg: Yaw step used between search attempts. + max_scan_steps: Number of scan attempts before giving up. + control_mode: "full" for translation+tracking, or "yaw_only" to rotate in place only. + Use "yaw_only" when the user asks to hover and only rotate to center a person. + """ + if not self.connection: + return "Failed: no Tello connection" + if not self.follow_object_cmd.transport: + return "Failed: follow_object stream is not connected" + if not self._wait_until_airborne(min_height_cm=20.0, timeout=12.0): + logger.warning("follow_object: drone not confirmed airborne; refusing to track") + return "Failed: drone is not airborne yet" + + steps = max(1, max_scan_steps) + scan_budget_s = max(2.0, min(duration, 30.0)) + per_step_budget_s = max(0.8, scan_budget_s / steps) + follow_duration_s = max(duration, 60.0) + + for step in range(steps): + step_deadline = time.time() + per_step_budget_s + while time.time() < step_deadline: + with self._state_lock: + self._latest_tracking_status = None + + msg = { + "object_description": object_description, + "duration": follow_duration_s, + "distance_m": distance_m, + "control_mode": control_mode, + } + self.follow_object_cmd.publish(String(json.dumps(msg))) + + time_left = max(0.2, step_deadline - time.time()) + status, detail = self._wait_for_tracking_status(timeout=min(1.5, time_left)) + if status == "tracking": + return ( + f"Following {object_description} for up to {follow_duration_s:.0f}s. " + "Using fixed forward approach speed with yaw centering. " + f"Mode={control_mode}." + ) + if status == "lost": + return f"Started tracking {object_description} but target was lost" + if status == "failed": + reason = detail.get("error", "tracker failed") if detail else "tracker failed" + # Hard backend/config errors should return immediately. + if "no supported" in str(reason).lower() or "stream is not connected" in str( + reason + ).lower(): + return f"Failed to start tracking {object_description}: {reason}" + + time.sleep(0.15) + + if step < steps - 1 and abs(scan_step_deg) > 0.0: + self.connection.yaw(scan_step_deg) + time.sleep(0.5) + + return ( + f"Could not find {object_description} after scanning for about {scan_budget_s:.0f}s" + ) + + @skill + def center_person_by_yaw( + self, + duration: float = 120.0, + scan_step_deg: float = 0.0, + max_scan_steps: int = 1, + ) -> str: + """Hover and rotate to keep a detected person centered in view (no translation). + + Args: + duration: Maximum tracking duration in seconds once acquired. + scan_step_deg: Optional yaw scan step between search attempts. + max_scan_steps: Number of search orientations before giving up. + """ + return self.follow_object( + object_description="person", + distance_m=0.0, + duration=duration, + scan_step_deg=scan_step_deg, + max_scan_steps=max_scan_steps, + control_mode="yaw_only", + ) + + @skill + def orbit_object( + self, + radius_m: float = 1.0, + speed_mps: float = 0.35, + revolutions: float = 1.0, + clockwise: bool = False, + ) -> str: + """Circle around the currently followed target using velocity+yaw control. + + Args: + radius_m: Desired orbit radius in meters. + speed_mps: Tangential orbit speed in m/s. + revolutions: Number of revolutions to complete. + clockwise: If True orbit clockwise, otherwise counter-clockwise. + """ + if not self.connection: + return "Failed: no Tello connection" + if radius_m <= 0.1: + return "Failed: radius_m must be greater than 0.1" + if speed_mps <= 0.05: + return "Failed: speed_mps must be greater than 0.05" + if revolutions <= 0.0: + return "Failed: revolutions must be positive" + + duration = (2.0 * math.pi * radius_m * revolutions) / speed_mps + direction = -1.0 if clockwise else 1.0 + + self._manual_override_until = time.time() + duration + 0.5 + try: + yaw_rate = direction * (speed_mps / radius_m) + ok = self.connection.move( + Vector3(0.0, direction * speed_mps, 0.0), + yaw_rate=yaw_rate, + duration=duration, + ) + finally: + self._manual_override_until = 0.0 + + if ok: + orbit_dir = "clockwise" if clockwise else "counter-clockwise" + return ( + f"Completed {revolutions:.2f} {orbit_dir} orbit(s) at radius {radius_m:.2f}m " + f"and speed {speed_mps:.2f}m/s" + ) + return "Failed: orbit command rejected" + + @skill + def observe(self) -> Image | None: + """Return latest camera frame for perception tasks.""" + return self._latest_video_frame + + @rpc + def stop(self) -> None: + """Stop module and close the Tello connection.""" + if self.connection: + self.connection.disconnect() + self.connection = None + logger.info("TelloConnectionModule stopped") + super().stop() + + +tello_connection_module = TelloConnectionModule.blueprint + +__all__ = ["TelloConnectionModule", "tello_connection_module"] diff --git a/dimos/robot/drone/tello_sdk.py b/dimos/robot/drone/tello_sdk.py new file mode 100644 index 0000000000..79672d4e75 --- /dev/null +++ b/dimos/robot/drone/tello_sdk.py @@ -0,0 +1,483 @@ +#!/usr/bin/env python3 +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Low-level RoboMaster TT (Tello Talent) SDK adapter.""" + +from __future__ import annotations + +import math +import socket +import threading +import time +from typing import Any, TypeAlias + +import cv2 +from reactivex import Subject + +from dimos.msgs.geometry_msgs import PoseStamped, Quaternion, Twist, Vector3 +from dimos.msgs.sensor_msgs import Image, ImageFormat +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + +TelloScalar: TypeAlias = int | float | str +TelloState: TypeAlias = dict[str, TelloScalar] + + +def _to_float(value: TelloScalar | None, default: float = 0.0) -> float: + if value is None: + return default + if isinstance(value, (float, int)): + return float(value) + try: + return float(value) + except ValueError: + return default + + +def _clamp(value: int, lo: int, hi: int) -> int: + return max(lo, min(hi, value)) + + +class TelloSdkClient: + """SDK client for RoboMaster TT / Tello over UDP.""" + + def __init__( + self, + tello_ip: str = "192.168.10.1", + command_port: int = 8889, + state_port: int = 8890, + video_port: int = 11111, + local_ip: str = "", + local_command_port: int = 9000, + command_timeout: float = 7.0, + video_uri: str | None = None, + velocity_scale: float = 100.0, + yaw_scale: float = 60.0, + ) -> None: + """Initialize the client. + + Args: + tello_ip: Drone IP, default AP mode address. + command_port: SDK command UDP port. + state_port: SDK state UDP port. + video_port: H264 UDP stream port. + local_ip: Local bind host. Empty string binds all interfaces. + local_command_port: Local UDP port used for command/response. + command_timeout: Timeout waiting for command responses. + video_uri: Optional explicit URI for OpenCV video capture. + velocity_scale: Meters/second -> rc units conversion factor. + yaw_scale: Radians/second -> rc units conversion factor. + """ + self.tello_ip = tello_ip + self.command_port = command_port + self.state_port = state_port + self.video_port = video_port + self.local_ip = local_ip + self.local_command_port = local_command_port + self.command_timeout = command_timeout + self.video_uri = video_uri + self.velocity_scale = velocity_scale + self.yaw_scale = yaw_scale + + self.connected = False + self._running = False + + self._tello_addr = (self.tello_ip, self.command_port) + self._command_socket: socket.socket | None = None + self._state_socket: socket.socket | None = None + self._video_capture: cv2.VideoCapture | None = None + + self._command_lock = threading.Lock() + self._state_lock = threading.RLock() + self._frame_lock = threading.RLock() + self._pose_lock = threading.RLock() + + self._state_thread: threading.Thread | None = None + self._video_thread: threading.Thread | None = None + + self._latest_state: TelloState = {} + self._latest_frame: Image | None = None + + self._position = Vector3(0.0, 0.0, 0.0) + self._last_pose_ts = time.time() + + self._telemetry_subject: Subject[TelloState] = Subject() + self._status_subject: Subject[dict[str, Any]] = Subject() + self._odom_subject: Subject[PoseStamped] = Subject() + self._video_subject: Subject[Image] = Subject() + + @staticmethod + def parse_state_packet(packet: str) -> TelloState: + """Parse semicolon-separated SDK state packet to a dict.""" + result: TelloState = {} + for item in packet.strip().split(";"): + if not item or ":" not in item: + continue + key, raw_value = item.split(":", 1) + value = raw_value.strip() + if value == "": + continue + try: + if "." in value: + result[key] = float(value) + else: + result[key] = int(value) + except ValueError: + result[key] = value + return result + + def connect(self) -> bool: + """Connect sockets, enter SDK mode, start state/video workers.""" + if self.connected: + return True + + try: + self._command_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + self._command_socket.bind((self.local_ip, self.local_command_port)) + self._command_socket.settimeout(self.command_timeout) + + self._state_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + self._state_socket.bind((self.local_ip, self.state_port)) + self._state_socket.settimeout(0.5) + except OSError as exc: + logger.error(f"Failed to bind Tello sockets: {exc}") + self.disconnect() + return False + + self._running = True + self._state_thread = threading.Thread(target=self._state_loop, daemon=True) + self._state_thread.start() + + if not self.enter_sdk_mode(): + logger.error("Tello did not enter SDK command mode") + self.disconnect() + return False + + if not self.send_command("streamon", retries=1).startswith("ok"): + logger.warning("Tello streamon did not return ok; continuing without video") + else: + self._video_thread = threading.Thread(target=self._video_loop, daemon=True) + self._video_thread.start() + + self.connected = True + logger.info(f"Tello SDK connected to {self.tello_ip}:{self.command_port}") + return True + + def disconnect(self) -> None: + """Stop workers and close sockets.""" + if self.connected: + try: + self.send_command_no_wait("rc 0 0 0 0") + except Exception: + pass + try: + self.send_command("streamoff", retries=0) + except Exception: + pass + + self._running = False + + if self._state_thread and self._state_thread.is_alive(): + self._state_thread.join(timeout=1.0) + if self._video_thread and self._video_thread.is_alive(): + self._video_thread.join(timeout=1.0) + + if self._video_capture is not None: + self._video_capture.release() + self._video_capture = None + + if self._state_socket is not None: + self._state_socket.close() + self._state_socket = None + if self._command_socket is not None: + self._command_socket.close() + self._command_socket = None + + self.connected = False + logger.info("Tello SDK disconnected") + + def send_command(self, command: str, retries: int = 2) -> str: + """Send a raw command and wait for a response.""" + if self._command_socket is None: + return "error:not_connected" + + attempt = 0 + while attempt <= retries: + try: + with self._command_lock: + self._command_socket.sendto(command.encode("utf-8"), self._tello_addr) + data, _ = self._command_socket.recvfrom(2048) + response = data.decode("utf-8", errors="ignore").strip().lower() + return response + except TimeoutError: + attempt += 1 + logger.warning(f"Tello command timeout [{command}] attempt {attempt}/{retries + 1}") + except OSError as exc: + logger.error(f"Tello command socket error [{command}]: {exc}") + break + return "error:timeout" + + def send_command_no_wait(self, command: str) -> bool: + """Send a raw command without waiting for an SDK response. + + Useful for high-rate commands like `rc` where waiting for a reply + can add control lag and frequent timeout noise. + """ + if self._command_socket is None: + return False + try: + with self._command_lock: + self._command_socket.sendto(command.encode("utf-8"), self._tello_addr) + return True + except OSError as exc: + logger.warning(f"Tello non-blocking send failed [{command}]: {exc}") + return False + + def enter_sdk_mode(self) -> bool: + """Enter SDK command mode.""" + return self.send_command("command").startswith("ok") + + def takeoff(self) -> bool: + """Takeoff command.""" + response = self.send_command("takeoff") + if response.startswith("ok"): + return True + + # Some firmware/network conditions drop command ACKs. + # Treat timeout as success if telemetry shows we actually climbed. + if response == "error:timeout": + deadline = time.time() + 4.0 + while time.time() < deadline: + state = self.get_state() + if _to_float(state.get("h")) >= 20.0: # centimeters + logger.info("Takeoff timeout recovered by telemetry height check") + return True + time.sleep(0.1) + + return False + + def land(self) -> bool: + """Land command.""" + return self.send_command("land").startswith("ok") + + def emergency(self) -> bool: + """Emergency stop command.""" + return self.send_command("emergency", retries=0).startswith("ok") + + def stop(self) -> bool: + """Stop all motion.""" + return self.send_command("stop", retries=0).startswith("ok") + + def yaw(self, degrees_ccw: float) -> bool: + """Rotate in place. Positive values rotate counter-clockwise.""" + degrees = round(abs(degrees_ccw)) + if degrees == 0: + return True + if degrees_ccw > 0: + return self.send_command(f"ccw {degrees}").startswith("ok") + return self.send_command(f"cw {degrees}").startswith("ok") + + def move_relative(self, x: float, y: float, z: float, speed: float = 0.4) -> bool: + """Move relative in meters (x forward, y left, z up).""" + x_cm = _clamp(round(x * 100.0), -500, 500) + y_right_cm = _clamp(round(-y * 100.0), -500, 500) + z_cm = _clamp(round(z * 100.0), -500, 500) + speed_cm = _clamp(round(speed * 100.0), 10, 100) + return self.send_command(f"go {x_cm} {y_right_cm} {z_cm} {speed_cm}").startswith("ok") + + def send_ext(self, ext_command: str) -> bool: + """Send TT expansion-board command through EXT path.""" + payload = ext_command.strip() + if not payload: + return False + if not payload.lower().startswith("ext "): + payload = f"EXT {payload}" + return self.send_command(payload).startswith("ok") + + def rc(self, left_right: int, forward_back: int, up_down: int, yaw: int) -> bool: + """Send low-level continuous control command in SDK rc units.""" + lr = _clamp(left_right, -100, 100) + fb = _clamp(forward_back, -100, 100) + ud = _clamp(up_down, -100, 100) + yw = _clamp(yaw, -100, 100) + return self.send_command_no_wait(f"rc {lr} {fb} {ud} {yw}") + + def move(self, linear: Vector3, yaw_rate: float = 0.0, duration: float = 0.0) -> bool: + """Move from SI units, converted to rc command.""" + lr = round(-linear.y * self.velocity_scale) + fb = round(linear.x * self.velocity_scale) + ud = round(linear.z * self.velocity_scale) + yw = round(-yaw_rate * self.yaw_scale) + ok = self.rc(lr, fb, ud, yw) + if duration > 0: + time.sleep(duration) + self.rc(0, 0, 0, 0) + return ok + + def move_twist(self, twist: Twist, duration: float = 0.0, lock_altitude: bool = True) -> bool: + """Move from Twist command.""" + z = 0.0 if lock_altitude else twist.linear.z + return self.move( + Vector3(twist.linear.x, twist.linear.y, z), + yaw_rate=twist.angular.z, + duration=duration, + ) + + def get_state(self) -> TelloState: + """Get the latest parsed state packet.""" + with self._state_lock: + return dict(self._latest_state) + + def get_video_frame(self) -> Image | None: + """Get latest decoded video frame.""" + with self._frame_lock: + return self._latest_frame + + def telemetry_stream(self) -> Subject[TelloState]: + """Stream of parsed state packets.""" + return self._telemetry_subject + + def status_stream(self) -> Subject[dict[str, Any]]: + """Stream of compact status dictionaries.""" + return self._status_subject + + def odom_stream(self) -> Subject[PoseStamped]: + """Stream of odometry estimates from state packets.""" + return self._odom_subject + + def video_stream(self) -> Subject[Image]: + """Stream of decoded camera frames.""" + return self._video_subject + + def _state_loop(self) -> None: + while self._running: + if self._state_socket is None: + break + try: + payload, _ = self._state_socket.recvfrom(2048) + except TimeoutError: + continue + except OSError: + break + + packet = payload.decode("utf-8", errors="ignore").strip() + if not packet: + continue + + state = self.parse_state_packet(packet) + if not state: + continue + + ts = time.time() + with self._state_lock: + self._latest_state = state + + self._telemetry_subject.on_next(dict(state)) + self._status_subject.on_next(self._build_status(state, ts)) + self._publish_odom_from_state(state, ts) + + def _video_loop(self) -> None: + bind_host = self.local_ip if self.local_ip else "0.0.0.0" + uri = self.video_uri or ( + f"udp://@{bind_host}:{self.video_port}?overrun_nonfatal=1&fifo_size=50000000" + ) + + capture = cv2.VideoCapture(uri, cv2.CAP_FFMPEG) + if not capture.isOpened(): + logger.warning(f"Failed to open Tello video stream URI: {uri}") + return + capture.set(cv2.CAP_PROP_BUFFERSIZE, 1) + + self._video_capture = capture + logger.info(f"Tello video stream started from {uri}") + + frame_count = 0 + no_frame_count = 0 + while self._running: + ok, frame = capture.read() + if not ok: + no_frame_count += 1 + time.sleep(0.02) + if no_frame_count > 150: + logger.warning("Tello video stalled; reopening capture") + capture.release() + capture = cv2.VideoCapture(uri, cv2.CAP_FFMPEG) + if capture.isOpened(): + capture.set(cv2.CAP_PROP_BUFFERSIZE, 1) + no_frame_count = 0 + continue + + no_frame_count = 0 + frame_count += 1 + if frame_count == 1: + logger.info(f"First Tello frame received: shape={frame.shape}") + + image = Image.from_opencv(frame, format=ImageFormat.BGR, frame_id="camera_link") + with self._frame_lock: + self._latest_frame = image + self._video_subject.on_next(image) + + capture.release() + self._video_capture = None + logger.info("Tello video stream stopped") + + def _build_status(self, state: TelloState, ts: float) -> dict[str, Any]: + return { + "connected": self.connected, + "battery": int(_to_float(state.get("bat"), -1.0)), + "height_m": _to_float(state.get("h")) / 100.0, + "tof_m": _to_float(state.get("tof")) / 100.0, + "yaw_deg": _to_float(state.get("yaw")), + "pitch_deg": _to_float(state.get("pitch")), + "roll_deg": _to_float(state.get("roll")), + "temp_low_c": _to_float(state.get("templ")), + "temp_high_c": _to_float(state.get("temph")), + "flight_time_s": _to_float(state.get("time")), + "ts": ts, + } + + def _publish_odom_from_state(self, state: TelloState, ts: float) -> None: + roll_rad = math.radians(_to_float(state.get("roll"))) + pitch_rad = math.radians(_to_float(state.get("pitch"))) + yaw_rad = math.radians(_to_float(state.get("yaw"))) + quaternion = Quaternion.from_euler(Vector3(roll_rad, pitch_rad, yaw_rad)) + + vgx = _to_float(state.get("vgx")) / 100.0 + vgy_right = _to_float(state.get("vgy")) / 100.0 + v_left = -vgy_right + + with self._pose_lock: + dt = max(0.0, min(ts - self._last_pose_ts, 0.2)) + self._last_pose_ts = ts + + vx_world = (vgx * math.cos(yaw_rad)) - (v_left * math.sin(yaw_rad)) + vy_world = (vgx * math.sin(yaw_rad)) + (v_left * math.cos(yaw_rad)) + + self._position.x += vx_world * dt + self._position.y += vy_world * dt + self._position.z = _to_float(state.get("h")) / 100.0 + + pose = PoseStamped( + position=Vector3(self._position), + orientation=quaternion, + frame_id="world", + ts=ts, + ) + + self._odom_subject.on_next(pose) + + +__all__ = ["TelloSdkClient", "TelloState"] diff --git a/docs/usage/README.md b/docs/usage/README.md index 071b6fc0b2..9a7037b076 100644 --- a/docs/usage/README.md +++ b/docs/usage/README.md @@ -7,6 +7,7 @@ This page explains general concepts. - [Modules](/docs/usage/modules.md): The primary units of deployment in DimOS, modules run in parallel and are python classes. - [Streams](/docs/usage/sensor_streams/README.md): How modules communicate, a Pub / Sub system. - [Blueprints](/docs/usage/blueprints.md): a way to group modules together and define their connections to each other. +- [RoboMaster TT / Tello](/docs/usage/drone_tello_tt.md): Tello SDK blueprints, modules, and compatibility notes. - [RPC](/docs/usage/blueprints.md#calling-the-methods-of-other-modules): how one module can call a method on another module (arguments get serialized to JSON-like binary data). - [Skills](/docs/usage/blueprints.md#defining-skills): An RPC function, except it can be called by an AI agent (a tool for an AI). - Agents: AI that has an objective, access to stream data, and is capable of calling skills as tools. diff --git a/docs/usage/drone_tello_tt.md b/docs/usage/drone_tello_tt.md new file mode 100644 index 0000000000..b6c866ce9f --- /dev/null +++ b/docs/usage/drone_tello_tt.md @@ -0,0 +1,136 @@ +# RoboMaster TT / Tello Integration + +This page documents the DimOS Tello integration and the compatibility constraints used to keep existing drone functionality intact. + +## Overview + +DimOS now includes a RoboMaster TT / Tello implementation that runs the main runtime on an edge computer and communicates with the drone over the DJI Tello UDP SDK. + +High-level stack: + +```text +DimOS (laptop / edge) + -> TelloConnectionModule + -> Tello UDP SDK (cmd/state/video) + -> Optional EXT commands (TT expansion board) + +Video/state + -> DroneTrackingModule + CameraModule + -> agent / teleop / visualization modules +``` + +## New Blueprints + +- `drone-tello-tt-basic` +- `drone-tello-tt-agentic` + +List and run: + +```bash +dimos list +dimos run --robot-ip 192.168.10.1 drone-tello-tt-agentic --disable web-input +``` + +## Hardware + Network Connection Steps + +For reliable Tello control plus cloud API access (OpenAI, optional Qwen, etc.), use two network paths: + +1. Power on the RoboMaster TT / Tello and wait for SSID `TELLO-XXXXXX`. +2. Connect your primary Wi-Fi interface to `TELLO-XXXXXX` (this provides the `192.168.10.x` control link). +3. Keep internet on a second interface: + - preferred: wired Ethernet (`eth0`), or + - alternate: a second USB Wi-Fi adapter connected to home/office internet. +4. Verify routes: + - Tello subnet exists on the Tello interface: `192.168.10.0/24` + - default route points to your internet interface (not the Tello AP). +5. Verify connectivity: + - `ping -c 1 192.168.10.1` (drone reachable) + - `curl -I https://api.openai.com` (internet reachable) +6. Launch DimOS: + - `dimos --robot-ip 192.168.10.1 run drone-tello-tt-agentic --disable web-input` + +If you only have a single Wi-Fi radio, connecting to Tello usually removes internet access and cloud tool-calls will fail. + +## New Modules + +- `dimos.robot.drone.tello_sdk.TelloSdkClient` + - Raw UDP command channel (`8889`) + - State stream (`8890`) + - Video stream (`11111`) decode + reconnect handling +- `dimos.robot.drone.tello_connection_module.TelloConnectionModule` + - Skills: `takeoff`, `land`, `move`, `move_relative`, `yaw`, `rc`, `send_ext`, `follow_object`, `center_person_by_yaw`, `orbit_object`, `observe` + - Publishes telemetry/status/odom/video and follow-command stream + +## Tracking and Follow Behavior + +`DroneTrackingModule` now supports both legacy and Tello-optimized behavior. + +Compatibility defaults: + +- `enable_passive_overlay=False` +- `use_local_person_detector=False` +- `force_detection_servoing_for_person=False` +- `person_follow_policy="legacy_pid"` + +These defaults preserve existing `drone-agentic` behavior (Qwen + tracker path) unless a blueprint explicitly opts in. + +Tello blueprint opt-in: + +- `enable_passive_overlay=True` +- `use_local_person_detector=True` +- `force_detection_servoing_for_person=True` +- `person_follow_policy="yaw_forward_constant"` + +This keeps Tello-specific behavior scoped to Tello blueprints. + +## Optional Local Detector/GPU Settings + +Local YOLO person detection is optional and only used when enabled by module options. + +Supported env overrides: + +```bash +export DIMOS_DRONE_YOLO_DEVICE=cuda:0 # or cpu +export DIMOS_DRONE_YOLO_MODEL=yolo11s-pose.pt +export DIMOS_DRONE_YOLO_IMGSZ=416 +export DIMOS_DRONE_YOLO_MAX_DET=5 +``` + +Notes: + +- CUDA is not required. +- If CUDA init fails, tracking falls back to CPU safely. +- No laptop-specific paths or machine-specific constants are required. + +## Files Added + +- `dimos/robot/drone/tello_sdk.py` +- `dimos/robot/drone/tello_connection_module.py` +- `dimos/robot/drone/blueprints/basic/drone_tello_tt_basic.py` +- `dimos/robot/drone/blueprints/agentic/drone_tello_tt_agentic.py` + +## Files Updated + +- `dimos/robot/drone/drone_tracking_module.py` + - Added configurable tracking modes for compatibility and Tello-specific behavior + - Added optional local detector path and passive overlay + - Added yaw-only centering and follow policies +- `dimos/robot/drone/__init__.py` +- `dimos/robot/drone/blueprints/__init__.py` +- `dimos/robot/drone/blueprints/basic/__init__.py` +- `dimos/robot/drone/blueprints/agentic/__init__.py` +- `dimos/robot/all_blueprints.py` +- `dimos/robot/drone/README.md` + +## Non-Regression Intent + +The integration is designed so legacy drone stacks are unaffected unless they opt into Tello-oriented tracking options. + +Validation commands: + +```bash +uv run ruff check dimos/robot/drone/ +uv run mypy dimos/robot/drone/ +python3 -m py_compile dimos/robot/drone/drone_tracking_module.py +python3 -m py_compile dimos/robot/drone/tello_connection_module.py +```