From 87a60173abdfe4b2effaf6d0db8a2f3b815435b5 Mon Sep 17 00:00:00 2001
From: jlelong-berkeley <jlelong@berkeley.edu>
Date: Wed, 25 Mar 2026 04:42:18 -0700
Subject: [PATCH] feat(drone): add RoboMaster TT/Tello integration

---
 dimos/robot/all_blueprints.py                 |    3 +
 dimos/robot/drone/README.md                   |   65 +
 .../agentic/drone_tello_tt_agentic.py         |   57 +
 .../blueprints/basic/drone_tello_tt_basic.py  |   96 ++
 dimos/robot/drone/drone_tracking_module.py    | 1178 ++++++++++++++++-
 dimos/robot/drone/tello_connection_module.py  |  557 ++++++++
 dimos/robot/drone/tello_sdk.py                |  483 +++++++
 docs/usage/README.md                          |    1 +
 docs/usage/drone_tello_tt.md                  |  136 ++
 9 files changed, 2542 insertions(+), 34 deletions(-)
 create mode 100644 dimos/robot/drone/blueprints/agentic/drone_tello_tt_agentic.py
 create mode 100644 dimos/robot/drone/blueprints/basic/drone_tello_tt_basic.py
 create mode 100644 dimos/robot/drone/tello_connection_module.py
 create mode 100644 dimos/robot/drone/tello_sdk.py
 create mode 100644 docs/usage/drone_tello_tt.md

diff --git a/dimos/robot/all_blueprints.py b/dimos/robot/all_blueprints.py
index 5910093d61..c4d009292e 100644
--- a/dimos/robot/all_blueprints.py
+++ b/dimos/robot/all_blueprints.py
@@ -48,6 +48,8 @@
     "demo-skill": "dimos.agents.skills.demo_skill:demo_skill",
     "drone-agentic": "dimos.robot.drone.blueprints.agentic.drone_agentic:drone_agentic",
     "drone-basic": "dimos.robot.drone.blueprints.basic.drone_basic:drone_basic",
+    "drone-tello-tt-agentic": "dimos.robot.drone.blueprints.agentic.drone_tello_tt_agentic:drone_tello_tt_agentic",
+    "drone-tello-tt-basic": "dimos.robot.drone.blueprints.basic.drone_tello_tt_basic:drone_tello_tt_basic",
     "dual-xarm6-planner": "dimos.manipulation.blueprints:dual_xarm6_planner",
     "keyboard-teleop-piper": "dimos.robot.manipulators.piper.blueprints:keyboard_teleop_piper",
     "keyboard-teleop-xarm6": "dimos.robot.manipulators.xarm.blueprints:keyboard_teleop_xarm6",
@@ -161,6 +163,7 @@
     "simple-phone-teleop": "dimos.teleop.phone.phone_extensions",
     "spatial-memory": "dimos.perception.spatial_perception",
     "speak-skill": "dimos.agents.skills.speak_skill",
+    "tello-connection-module": "dimos.robot.drone.tello_connection_module",
     "temporal-memory": "dimos.perception.experimental.temporal_memory.temporal_memory",
     "twist-teleop-module": "dimos.teleop.quest.quest_extensions",
     "unitree-g1-skill-container": "dimos.robot.unitree.g1.skill_container",
diff --git a/dimos/robot/drone/README.md b/dimos/robot/drone/README.md
index 4e094e9f28..865d86de44 100644
--- a/dimos/robot/drone/README.md
+++ b/dimos/robot/drone/README.md
@@ -19,10 +19,34 @@ dimos run drone-basic --set outdoor=true
 
 # Agentic with LLM control
 dimos run drone-agentic
+
+# RoboMaster TT / Tello (Wi-Fi SDK)
+dimos run drone-tello-tt-basic --robot-ip 192.168.10.1
+dimos run drone-tello-tt-agentic --robot-ip 192.168.10.1 --disable web-input
 ```
 
 To interact with the agent, run `dimos humancli` in a separate terminal.
 
+## RoboMaster TT / Tello Hardware + Network Setup
+
+Tello control traffic and cloud API traffic should be split across two interfaces.
+
+1. Power on the drone and connect to `TELLO-XXXXXX` from one network interface.
+2. Keep internet on a separate interface:
+   - wired Ethernet is recommended, or
+   - a second Wi-Fi adapter connected to normal internet.
+3. Confirm network state:
+   - Tello link: `ip a` should show `192.168.10.x` on the Tello interface.
+   - Tello subnet route: `192.168.10.0/24` on that same interface.
+   - Default route should prefer internet interface (not `192.168.10.1`).
+4. Verify both paths:
+   - `ping -c 1 192.168.10.1`
+   - `curl -I https://api.openai.com`
+5. Run the Tello blueprint:
+   - `dimos --robot-ip 192.168.10.1 run drone-tello-tt-agentic --disable web-input`
+
+If you use only one Wi-Fi radio and connect it to Tello, cloud model calls usually fail due to loss of internet routing.
+
 ## Blueprints
 
 ### `drone-basic`
@@ -48,6 +72,25 @@ Composes on top of `drone-basic`, adding autonomous capabilities:
 | `McpServer` + `McpClient` | LLM agent (default: GPT-4o) via MCP |
 | `WebInput` | Web/CLI interface for human commands |
 
+### `drone-tello-tt-basic`
+RoboMaster TT/Tello SDK control over Wi-Fi UDP.
+
+| Module | Purpose |
+|--------|---------|
+| `TelloConnectionModule` | Tello command/state/video bridge, skills, and follow commands |
+| `DroneCameraModule` | Camera intrinsics + camera streams |
+| `WebsocketVisModule` | Web-based visualization |
+| `RerunBridgeModule` / `FoxgloveBridge` | 3D viewer (selected by `--viewer`) |
+
+### `drone-tello-tt-agentic`
+Composes on top of `drone-tello-tt-basic`, adding:
+
+| Module | Purpose |
+|--------|---------|
+| `DroneTrackingModule` | Person detection, follow, yaw-centering, tracking overlay |
+| `Agent` | LLM agent (default: GPT-4o) |
+| `WebInput` | Web/CLI interface for human commands |
+
 ## Installation
 
 ### Python (included with DimOS)
@@ -73,6 +116,17 @@ export OPENAI_API_KEY=sk-...
 
 # Optional
 export GOOGLE_MAPS_API_KEY=...  # For GoogleMapsSkillContainer
+export ALIBABA_API_KEY=...      # Optional Qwen detection for tracking
+```
+
+### Optional Tello Tracking Tuning
+These variables are optional and only affect local YOLO person detection in `DroneTrackingModule`:
+
+```bash
+export DIMOS_DRONE_YOLO_DEVICE=cuda:0   # or cpu
+export DIMOS_DRONE_YOLO_MODEL=yolo11s-pose.pt
+export DIMOS_DRONE_YOLO_IMGSZ=416
+export DIMOS_DRONE_YOLO_MAX_DET=5
 ```
 
 ## RosettaDrone Setup (Critical)
@@ -139,12 +193,16 @@ DJI Drone ← Wireless → DJI Controller ← USB → Android Device ← WiFi 
 dimos/robot/drone/
 ├── blueprints/
 │   ├── basic/drone_basic.py              # Base blueprint (connection + camera + vis)
+│   ├── basic/drone_tello_tt_basic.py     # Tello basic blueprint
 │   └── agentic/drone_agentic.py          # Agentic blueprint (composes on basic)
+│   └── agentic/drone_tello_tt_agentic.py # Tello agentic blueprint
 ├── connection_module.py                   # MAVLink communication & skills
 ├── camera_module.py                       # Camera processing & intrinsics
 ├── drone_tracking_module.py               # Visual servoing & object tracking
 ├── drone_visual_servoing_controller.py    # PID-based visual servoing
 ├── mavlink_connection.py                  # Low-level MAVLink protocol
+├── tello_connection_module.py             # Tello module and skills
+├── tello_sdk.py                           # Low-level Tello UDP SDK adapter
 └── dji_video_stream.py                    # GStreamer video capture + replay
 ```
 
@@ -198,6 +256,13 @@ Parameters: `(Kp, Ki, Kd, (min_output, max_output), integral_limit, deadband_pix
 4. Velocity commands sent via LCM stream
 5. Connection module converts to MAVLink commands
 
+### Tello Tracking Flow
+1. Tello camera stream is decoded in `TelloSdkClient`.
+2. `DroneTrackingModule` receives `/video`, publishes `/tracking_overlay`.
+3. For TT agentic blueprint, person follow uses detector-driven tracking with yaw-centering and forward approach.
+4. `cmd_vel` is remapped to `/movecmd_twist` and converted to Tello `rc` commands by `TelloConnectionModule`.
+5. Agent skills call `follow_object`, `center_person_by_yaw`, and `orbit_object`.
+
 ## Available Skills
 
 All skills are exposed to the LLM agent via the `@skill` decorator on `DroneConnectionModule`:
diff --git a/dimos/robot/drone/blueprints/agentic/drone_tello_tt_agentic.py b/dimos/robot/drone/blueprints/agentic/drone_tello_tt_agentic.py
new file mode 100644
index 0000000000..a77c523fcc
--- /dev/null
+++ b/dimos/robot/drone/blueprints/agentic/drone_tello_tt_agentic.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Agentic TT/Tello blueprint with tracking and LLM control."""
+
+from dimos.agents.agent import agent
+from dimos.agents.web_human_input import web_input
+from dimos.core.blueprints import autoconnect
+from dimos.robot.drone.blueprints.basic.drone_tello_tt_basic import drone_tello_tt_basic
+from dimos.robot.drone.drone_tracking_module import DroneTrackingModule
+
+TELLO_TT_SYSTEM_PROMPT = """\
+You are controlling a RoboMaster TT (Tello Talent) drone over the Tello SDK.
+Use short, safe movement commands and keep altitude conservative indoors.
+Prefer follow_object, orbit_object, move, move_relative, yaw, takeoff, and land.
+For "scan/find person then follow", call follow_object(object_description="person", distance_m=1.0).
+For any "hover + rotate/center person in frame" request, ALWAYS call
+center_person_by_yaw(duration=..., scan_step_deg=0.0, max_scan_steps=1).
+Do NOT call follow_object in default mode for rotate-only requests.
+For "circle around person", call orbit_object(radius_m=1.0, revolutions=1.0).
+Always end with land() when user asks to land.
+Use observe to inspect the environment before aggressive motion.
+Report what command you executed and the observed result.
+"""
+
+drone_tello_tt_agentic = autoconnect(
+    drone_tello_tt_basic,
+    DroneTrackingModule.blueprint(
+        outdoor=False,
+        enable_passive_overlay=True,
+        use_local_person_detector=True,
+        force_detection_servoing_for_person=True,
+        person_follow_policy="yaw_forward_constant",
+    ),
+    agent(system_prompt=TELLO_TT_SYSTEM_PROMPT, model="gpt-4o"),
+    web_input(),
+).remappings(
+    [
+        (DroneTrackingModule, "video_input", "video"),
+        (DroneTrackingModule, "cmd_vel", "movecmd_twist"),
+    ]
+)
+
+__all__ = ["TELLO_TT_SYSTEM_PROMPT", "drone_tello_tt_agentic"]
diff --git a/dimos/robot/drone/blueprints/basic/drone_tello_tt_basic.py b/dimos/robot/drone/blueprints/basic/drone_tello_tt_basic.py
new file mode 100644
index 0000000000..28a06b9be3
--- /dev/null
+++ b/dimos/robot/drone/blueprints/basic/drone_tello_tt_basic.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Basic RoboMaster TT/Tello blueprint with connection, camera, and visualization."""
+
+from typing import Any
+
+from dimos.core.blueprints import autoconnect
+from dimos.core.global_config import global_config
+from dimos.protocol.pubsub.impl.lcmpubsub import LCM
+from dimos.robot.drone.camera_module import DroneCameraModule
+from dimos.robot.drone.tello_connection_module import TelloConnectionModule
+from dimos.web.websocket_vis.websocket_vis_module import websocket_vis
+
+
+def _static_drone_body(rr: Any) -> list[Any]:
+    """Static visualization of TT body."""
+    return [
+        rr.Boxes3D(
+            half_sizes=[0.09, 0.09, 0.03],
+            colors=[(40, 180, 90)],
+        ),
+        rr.Transform3D(parent_frame="tf#/base_link"),
+    ]
+
+
+def _drone_rerun_blueprint() -> Any:
+    """Split layout: camera feed + 3D world view side by side."""
+    import rerun as rr
+    import rerun.blueprint as rrb
+
+    return rrb.Blueprint(
+        rrb.Horizontal(
+            rrb.Spatial2DView(origin="world/video", name="Camera"),
+            rrb.Spatial2DView(origin="world/tracking_overlay", name="Tracking Overlay"),
+            rrb.Spatial3DView(
+                origin="world",
+                name="3D",
+                background=rrb.Background(kind="SolidColor", color=[0, 0, 0]),
+                line_grid=rrb.LineGrid3D(
+                    plane=rr.components.Plane3D.XY.with_distance(0.2),
+                ),
+            ),
+            column_shares=[1, 1, 2],
+        ),
+    )
+
+
+_rerun_config = {
+    "blueprint": _drone_rerun_blueprint,
+    "pubsubs": [LCM()],
+    "static": {
+        "world/tf/base_link": _static_drone_body,
+    },
+}
+
+if global_config.viewer == "foxglove":
+    from dimos.robot.foxglove_bridge import foxglove_bridge
+
+    _vis = foxglove_bridge()
+elif global_config.viewer.startswith("rerun"):
+    from dimos.visualization.rerun.bridge import _resolve_viewer_mode, rerun_bridge
+
+    _vis = rerun_bridge(viewer_mode=_resolve_viewer_mode(), **_rerun_config)
+else:
+    _vis = autoconnect()
+
+tello_ip = global_config.robot_ip or "192.168.10.1"
+
+drone_tello_tt_basic = autoconnect(
+    _vis,
+    TelloConnectionModule.blueprint(
+        tello_ip=tello_ip,
+        local_ip="",
+        local_command_port=9000,
+        state_port=8890,
+        video_port=11111,
+    ),
+    DroneCameraModule.blueprint(camera_intrinsics=[920.0, 920.0, 480.0, 360.0]),
+    websocket_vis(),
+)
+
+__all__ = ["drone_tello_tt_basic"]
diff --git a/dimos/robot/drone/drone_tracking_module.py b/dimos/robot/drone/drone_tracking_module.py
index 5798db374b..8406db4f78 100644
--- a/dimos/robot/drone/drone_tracking_module.py
+++ b/dimos/robot/drone/drone_tracking_module.py
@@ -16,6 +16,7 @@
 """Drone tracking module with visual servoing for object following."""
 
 import json
+import os
 import threading
 import time
 from typing import Any
@@ -36,13 +37,53 @@
     DroneVisualServoingController,
     PIDParams,
 )
+from dimos.utils.data import get_data
 from dimos.utils.logging_config import setup_logger
+from dimos.utils.simple_controller import PIDController
 
 logger = setup_logger()
 
 INDOOR_PID_PARAMS: PIDParams = (0.001, 0.0, 0.0001, (-1.0, 1.0), None, 30)
 OUTDOOR_PID_PARAMS: PIDParams = (0.05, 0.0, 0.0003, (-5.0, 5.0), None, 10)
 INDOOR_MAX_VELOCITY = 1.0  # m/s safety cap for indoor mode
+INDOOR_MAX_FORWARD_VELOCITY = 0.25
+INDOOR_MAX_LATERAL_VELOCITY = 0.35
+PERSON_ALIASES = {"person", "people", "human", "man", "woman"}
+PERSON_CONFIRM_FRAMES = 2
+PERSON_CONFIRM_IOU = 0.12
+YOLO_PERSON_CONF_MIN = 0.18
+YOLO_PERSON_MIN_KEYPOINTS = 4
+YOLO_MODEL_NAME = "yolo11s-pose.pt"
+YOLO_MODEL_NAME_CPU = "yolo11n-pose.pt"
+YOLO_DEFAULT_IMGSZ = 416
+YOLO_DEFAULT_MAX_DET = 5
+PASSIVE_DETECT_INTERVAL_SEC = 0.3
+YAW_ONLY_DEADBAND_RATIO = 0.028
+YAW_ONLY_PID_KP = 1.0
+YAW_ONLY_PID_KI = 0.1
+YAW_ONLY_PID_KD = 0.62
+YAW_ONLY_MAX_RATE = 0.85
+YAW_ONLY_INTEGRAL_LIMIT = 0.2
+YAW_ONLY_CMD_SMOOTH_ALPHA = 0.40
+YAW_ONLY_DT = 0.05
+YAW_ONLY_ZERO_RATE_EPS = 0.015
+YAW_ONLY_MIN_RATE = 0.14
+YAW_ONLY_MIN_RATE_ERR_RATIO = 0.25
+YAW_ONLY_RATE_SLEW_PER_SEC = 7.0
+YAW_ONLY_LOCK_ENTER_RATIO = 0.03
+YAW_ONLY_LOCK_EXIT_RATIO = 0.075
+YAW_ONLY_LOCK_CMD_EPS = 0.08
+YAW_ONLY_ERROR_LPF_ALPHA = 0.6
+YAW_ONLY_DECEL_ERR_RATIO = 0.24
+YAW_ONLY_DECEL_MIN_RATE = 0.08
+YAW_ONLY_CROSS_BRAKE_ERR_RATIO = 0.24
+YAW_ONLY_CROSS_BRAKE_SEC = 0.12
+FOLLOW_TARGET_MIN_M = 0.6
+FOLLOW_TARGET_MAX_M = 2.5
+FOLLOW_APPROACH_SPEED = 0.15
+FOLLOW_ALIGN_SOFT_ERR = 0.15
+FOLLOW_ALIGN_STOP_ERR = 0.30
+FOLLOW_EDGE_MARGIN_RATIO = 0.06
 
 
 class DroneTrackingModule(Module):
@@ -63,6 +104,10 @@ def __init__(
         x_pid_params: PIDParams | None = None,
         y_pid_params: PIDParams | None = None,
         z_pid_params: PIDParams | None = None,
+        enable_passive_overlay: bool = False,
+        use_local_person_detector: bool = False,
+        force_detection_servoing_for_person: bool = False,
+        person_follow_policy: str = "legacy_pid",
     ) -> None:
         """Initialize the drone tracking module.
 
@@ -74,6 +119,11 @@ def __init__(
             y_pid_params: PID parameters for left/right strafe control.
                           If None, uses preset based on outdoor flag.
             z_pid_params: Optional PID parameters for altitude control.
+            enable_passive_overlay: If True, continuously publish person-detection overlays.
+            use_local_person_detector: If True, allow local YOLO person detection fallback when Qwen is unavailable.
+            force_detection_servoing_for_person: If True, bypass OpenCV tracker for person targets and use detector loop.
+            person_follow_policy: Person follow behavior in detector loop.
+                Supported: "legacy_pid" (original vx/vy PID), "yaw_forward_constant" (yaw-first + constant forward).
         """
         super().__init__()
 
@@ -83,6 +133,8 @@ def __init__(
 
         self._outdoor = outdoor
         self._max_velocity = None if outdoor else INDOOR_MAX_VELOCITY
+        self._max_forward_velocity = None if outdoor else INDOOR_MAX_FORWARD_VELOCITY
+        self._max_lateral_velocity = None if outdoor else INDOOR_MAX_LATERAL_VELOCITY
 
         self.servoing_controller = DroneVisualServoingController(
             x_pid_params=x_pid_params, y_pid_params=y_pid_params, z_pid_params=z_pid_params
@@ -91,9 +143,48 @@ def __init__(
         # Tracking state
         self._tracking_active = False
         self._tracking_thread: threading.Thread | None = None
+        self._passive_overlay_active = False
+        self._passive_overlay_thread: threading.Thread | None = None
+        self._enable_passive_overlay = enable_passive_overlay
+        self._use_local_person_detector = use_local_person_detector
+        self._force_detection_servoing_for_person = force_detection_servoing_for_person
+        self._control_mode = "full"
+        normalized_policy = person_follow_policy.strip().lower()
+        if normalized_policy not in {"legacy_pid", "yaw_forward_constant"}:
+            normalized_policy = "legacy_pid"
+        self._person_follow_policy = normalized_policy
+        self._yaw_rate_ema = 0.0
+        self._yaw_error_lpf = 0.0
+        self._yaw_lock_active = False
+        self._yaw_prev_raw_error = 0.0
+        self._yaw_cross_brake_until = 0.0
+        self._yaw_pid = PIDController(
+            YAW_ONLY_PID_KP,
+            YAW_ONLY_PID_KI,
+            YAW_ONLY_PID_KD,
+            output_limits=(-YAW_ONLY_MAX_RATE, YAW_ONLY_MAX_RATE),
+            integral_limit=YAW_ONLY_INTEGRAL_LIMIT,
+            deadband=YAW_ONLY_DEADBAND_RATIO,
+            inverse_output=True,
+        )
         self._current_object: str | None = None
         self._latest_frame: Image | None = None
         self._frame_lock = threading.Lock()
+        self._yolo: Any | None = None
+        self._yolo_device = "cpu"
+        self._yolo_half = False
+        self._yolo_model_name = YOLO_MODEL_NAME
+        self._yolo_imgsz = self._env_int("DIMOS_DRONE_YOLO_IMGSZ", YOLO_DEFAULT_IMGSZ, min_value=192)
+        self._yolo_max_det = self._env_int("DIMOS_DRONE_YOLO_MAX_DET", YOLO_DEFAULT_MAX_DET, min_value=1)
+        self._target_distance_m = 1.0
+        self._person_lock_bbox: tuple[int, int, int, int] | None = None
+        self._local_detector_enabled = (
+            self._enable_passive_overlay
+            or self._use_local_person_detector
+            or self._force_detection_servoing_for_person
+        )
+        if self._local_detector_enabled:
+            self._init_yolo_person_detector()
 
         # Subscribe to video input when transport is set
         # (will be done by connection module)
@@ -105,7 +196,125 @@ def _on_new_frame(self, frame: Image) -> None:
 
     def _on_follow_object_cmd(self, cmd: String) -> None:
         msg = json.loads(cmd.data)
-        self.track_object(msg["object_description"], msg["duration"])
+        self.track_object(
+            object_name=msg.get("object_description"),
+            duration=float(msg.get("duration", 120.0)),
+            distance_m=float(msg.get("distance_m", 1.0)),
+            control_mode=str(msg.get("control_mode", "full")),
+        )
+
+    def _create_tracker(self) -> Any | None:
+        """Create the best available OpenCV tracker for this environment."""
+        candidates: list[tuple[Any, str]] = []
+
+        if hasattr(cv2, "legacy"):
+            candidates.extend(
+                [
+                    (getattr(cv2.legacy, "TrackerCSRT_create", None), "legacy.CSRT"),
+                    (getattr(cv2.legacy, "TrackerKCF_create", None), "legacy.KCF"),
+                    (getattr(cv2.legacy, "TrackerMIL_create", None), "legacy.MIL"),
+                ]
+            )
+
+        candidates.extend(
+            [
+                (getattr(cv2, "TrackerCSRT_create", None), "CSRT"),
+                (getattr(cv2, "TrackerKCF_create", None), "KCF"),
+                (getattr(cv2, "TrackerMIL_create", None), "MIL"),
+            ]
+        )
+
+        for factory, name in candidates:
+            if callable(factory):
+                try:
+                    tracker = factory()
+                    logger.info(f"Using OpenCV tracker backend: {name}")
+                    return tracker
+                except Exception as e:
+                    logger.warning(f"Tracker backend {name} unavailable: {e}")
+
+        logger.error("No supported OpenCV tracker backend available (CSRT/KCF/MIL)")
+        return None
+
+    def _bbox_to_tracker_rect(
+        self,
+        frame: np.ndarray[Any, np.dtype[Any]],
+        bbox: tuple[int, int, int, int],
+    ) -> tuple[int, int, int, int] | None:
+        """Convert x1,y1,x2,y2 bbox into a safe x,y,w,h tracker rectangle."""
+        frame_h, frame_w = frame.shape[:2]
+        x1, y1, x2, y2 = bbox
+
+        x1 = max(0, min(int(x1), frame_w - 2))
+        y1 = max(0, min(int(y1), frame_h - 2))
+        x2 = max(x1 + 2, min(int(x2), frame_w - 1))
+        y2 = max(y1 + 2, min(int(y2), frame_h - 1))
+
+        w = x2 - x1
+        h = y2 - y1
+        if w < 2 or h < 2:
+            return None
+
+        # Expand box slightly to make initialization less brittle.
+        scale = 1.15
+        cx = x1 + (w / 2.0)
+        cy = y1 + (h / 2.0)
+        w2 = max(8, round(w * scale))
+        h2 = max(8, round(h * scale))
+
+        tx = max(0, round(cx - w2 / 2.0))
+        ty = max(0, round(cy - h2 / 2.0))
+        tw = min(w2, frame_w - tx)
+        th = min(h2, frame_h - ty)
+
+        if tw < 8 or th < 8:
+            return None
+
+        return (tx, ty, tw, th)
+
+    def _prepare_tracker_frame(
+        self,
+        frame: np.ndarray[Any, np.dtype[Any]],
+    ) -> np.ndarray[Any, np.dtype[Any]]:
+        """Normalize frame layout/type for OpenCV tracker APIs."""
+        prepared = frame
+        if prepared.dtype != np.uint8:
+            prepared = prepared.astype(np.uint8, copy=False)
+        return np.ascontiguousarray(prepared)
+
+    def _init_tracker(
+        self,
+        tracker: Any,
+        frame: np.ndarray[Any, np.dtype[Any]],
+        tracker_rect: tuple[int, int, int, int],
+    ) -> tuple[bool, str]:
+        """Initialize tracker with conservative argument fallbacks."""
+        init_frame = self._prepare_tracker_frame(frame)
+        rect_float = (
+            float(tracker_rect[0]),
+            float(tracker_rect[1]),
+            float(tracker_rect[2]),
+            float(tracker_rect[3]),
+        )
+        rect_variants: list[tuple[float, float, float, float] | tuple[int, int, int, int]] = [
+            rect_float,
+            tracker_rect,
+        ]
+
+        last_error: Exception | None = None
+        for rect in rect_variants:
+            try:
+                init_result = tracker.init(init_frame, rect)
+                init_failed = isinstance(init_result, bool) and not init_result
+                if init_failed:
+                    continue
+                return True, "ok"
+            except Exception as exc:
+                last_error = exc
+
+        if last_error is not None:
+            return False, f"{last_error!s}"
+        return False, "tracker init returned false"
 
     def _get_latest_frame(self) -> np.ndarray[Any, np.dtype[Any]] | None:
         """Get the latest video frame as numpy array."""
@@ -128,20 +337,47 @@ def start(self) -> None:
         if self.follow_object_cmd.transport:
             self.follow_object_cmd.subscribe(self._on_follow_object_cmd)
 
+        if self._enable_passive_overlay:
+            self._start_passive_overlay_loop()
         return
 
     @rpc
     def stop(self) -> None:
+        self._stop_passive_overlay_loop()
         self._stop_tracking()
         super().stop()
 
+    def _start_passive_overlay_loop(self) -> None:
+        if self._passive_overlay_active:
+            return
+        self._passive_overlay_active = True
+        self._passive_overlay_thread = threading.Thread(
+            target=self._passive_detection_overlay_loop,
+            daemon=True,
+        )
+        self._passive_overlay_thread.start()
+
+    def _stop_passive_overlay_loop(self) -> None:
+        self._passive_overlay_active = False
+        if self._passive_overlay_thread and self._passive_overlay_thread.is_alive():
+            self._passive_overlay_thread.join(timeout=1.0)
+        self._passive_overlay_thread = None
+
     @rpc
-    def track_object(self, object_name: str | None = None, duration: float = 120.0) -> str:
+    def track_object(
+        self,
+        object_name: str | None = None,
+        duration: float = 120.0,
+        distance_m: float = 1.0,
+        control_mode: str = "full",
+    ) -> str:
         """Track and follow an object using visual servoing.
 
         Args:
             object_name: Name of object to track, or None for most prominent
             duration: Maximum tracking duration in seconds
+            distance_m: Reserved follow-distance hint (person follow currently uses fixed approach speed)
+            control_mode: "full" (translate/strafe) or "yaw_only" (rotate in place)
 
         Returns:
             String status message
@@ -157,33 +393,145 @@ def track_object(self, object_name: str | None = None, duration: float = 120.0)
         logger.info(f"Starting track_object for {object_name or 'any object'}")
 
         try:
-            # Detect object with Qwen
-            logger.info("Detecting object with Qwen...")
-            bbox = get_bbox_from_qwen_frame(frame, object_name)
+            normalized_mode = control_mode.strip().lower()
+            if normalized_mode not in {"full", "yaw_only"}:
+                normalized_mode = "full"
+            self._control_mode = normalized_mode
+            self._reset_yaw_controller()
+            self._target_distance_m = max(FOLLOW_TARGET_MIN_M, min(FOLLOW_TARGET_MAX_M, distance_m))
+            self._person_lock_bbox = None
+
+            bbox = self._detect_initial_bbox(frame, object_name)
 
             if bbox is None:
                 msg = f"No object detected{' for: ' + object_name if object_name else ''}"
                 logger.warning(msg)
+                if self.tracking_overlay.transport:
+                    overlay = self._draw_search_overlay(
+                        frame, f"Searching for {object_name or 'object'} (not found)"
+                    )
+                    self.tracking_overlay.publish(Image.from_numpy(overlay, format=ImageFormat.BGR))
                 self._publish_status({"status": "not_found", "object": self._current_object})
                 return msg
 
             logger.info(f"Object detected at bbox: {bbox}")
 
-            # Initialize CSRT tracker (use legacy for OpenCV 4)
-            try:
-                tracker = cv2.legacy.TrackerCSRT_create()  # type: ignore[attr-defined]
-            except AttributeError:
-                tracker = cv2.TrackerCSRT_create()  # type: ignore[attr-defined]
+            if (
+                object_name is None or object_name.lower().strip() in PERSON_ALIASES
+            ) and self._local_detector_enabled:
+                confirmed_bbox = self._confirm_person_detection(bbox)
+                if confirmed_bbox is None:
+                    self._publish_status(
+                        {
+                            "status": "not_found",
+                            "object": object_name or "person",
+                            "error": "person confirmation failed",
+                        }
+                    )
+                    return "Detected candidate was not a stable person target"
+                bbox = confirmed_bbox
+
+            person_target = object_name is None or object_name.lower().strip() in PERSON_ALIASES
+            detector_person_mode = person_target and (
+                self._control_mode == "yaw_only" or self._force_detection_servoing_for_person
+            )
+
+            if detector_person_mode:
+                self._current_object = object_name or "person"
+                self._tracking_active = True
+                self._publish_status(
+                    {
+                        "status": "tracking",
+                        "object": self._current_object,
+                        "mode": self._control_mode,
+                        "backend": "yolo_detection",
+                    }
+                )
+                self._tracking_thread = threading.Thread(
+                    target=self._detection_servoing_loop,
+                    args=(object_name, duration, self._target_distance_m),
+                    daemon=True,
+                )
+                self._tracking_thread.start()
+                if self._control_mode == "yaw_only":
+                    return f"Yaw-only tracking started for {self._current_object}"
+                return f"Tracking started for {self._current_object} (YOLO detection mode)"
+
+            if self._control_mode == "yaw_only":
+                self._current_object = object_name or "object"
+                self._tracking_active = True
+                self._publish_status(
+                    {
+                        "status": "tracking",
+                        "object": self._current_object,
+                        "mode": "yaw_only",
+                    }
+                )
+                self._tracking_thread = threading.Thread(
+                    target=self._detection_servoing_loop,
+                    args=(object_name, duration, self._target_distance_m),
+                    daemon=True,
+                )
+                self._tracking_thread.start()
+                return f"Yaw-only tracking started for {self._current_object}"
 
-            # Convert bbox format from [x1, y1, x2, y2] to [x, y, w, h]
-            x1, y1, x2, y2 = bbox
-            x, y, w, h = x1, y1, x2 - x1, y2 - y1
+            tracker = self._create_tracker()
+            if tracker is None:
+                self._publish_status(
+                    {
+                        "status": "failed",
+                        "object": self._current_object,
+                        "error": "No supported OpenCV tracker backend",
+                    }
+                )
+                return "Failed: no supported OpenCV tracker backend (need at least MIL/KCF/CSRT)"
+
+            tracker_rect = self._bbox_to_tracker_rect(frame, bbox)
+            if tracker_rect is None:
+                self._publish_status(
+                    {
+                        "status": "failed",
+                        "object": self._current_object,
+                        "error": "Invalid detection bbox for tracker init",
+                    }
+                )
+                return "Failed: invalid detection bbox"
 
             # Initialize tracker
-            success = tracker.init(frame, (x, y, w, h))
-            if not success:
-                self._publish_status({"status": "failed", "object": self._current_object})
-                return "Failed to initialize tracker"
+            init_ok, init_reason = self._init_tracker(tracker, frame, tracker_rect)
+            if not init_ok:
+                if not self._local_detector_enabled:
+                    self._publish_status(
+                        {
+                            "status": "failed",
+                            "object": self._current_object,
+                            "error": f"Tracker init failed: {init_reason}",
+                        }
+                    )
+                    return f"Failed to initialize tracker: {init_reason}"
+                # Fall back to detector-only loop so follow behavior still works
+                # in OpenCV builds where tracker init is unreliable.
+                logger.warning(f"Tracker init failed ({init_reason}); using detection fallback loop")
+                self._current_object = object_name or "object"
+                self._tracking_active = True
+                self._publish_status(
+                    {
+                        "status": "tracking",
+                        "object": self._current_object,
+                        "mode": "detection_fallback",
+                        "reason": init_reason,
+                    }
+                )
+                self._tracking_thread = threading.Thread(
+                    target=self._detection_servoing_loop,
+                    args=(object_name, duration, self._target_distance_m),
+                    daemon=True,
+                )
+                self._tracking_thread.start()
+                return (
+                    f"Tracking started for {self._current_object} "
+                    "(detection fallback mode)"
+                )
 
             self._current_object = object_name or "object"
             self._tracking_active = True
@@ -201,6 +549,536 @@ def track_object(self, object_name: str | None = None, duration: float = 120.0)
             self._stop_tracking()
             return f"Tracking failed: {e!s}"
 
+    def _detect_initial_bbox(
+        self,
+        frame: np.ndarray[Any, np.dtype[Any]],
+        object_name: str | None,
+    ) -> tuple[int, int, int, int] | None:
+        """Detect initial bbox using Qwen when available, with local fallback for person."""
+        qwen_available = bool(os.getenv("ALIBABA_API_KEY"))
+        if qwen_available:
+            try:
+                logger.info("Detecting object with Qwen...")
+                # Qwen helper expects RGB input.
+                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                bbox = get_bbox_from_qwen_frame(rgb_frame, object_name)
+                if bbox is not None:
+                    return (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))
+            except Exception as e:
+                logger.warning(f"Qwen detection failed: {e}")
+        else:
+            if not self._use_local_person_detector:
+                logger.warning(
+                    "ALIBABA_API_KEY is not set and local person detector fallback is disabled"
+                )
+                return None
+            logger.warning("ALIBABA_API_KEY is not set; using local YOLO detector")
+
+        if object_name is not None and object_name.lower().strip() not in PERSON_ALIASES:
+            return None
+
+        return self._detect_person_local(frame)
+
+    def _bbox_iou(
+        self,
+        a: tuple[int, int, int, int],
+        b: tuple[int, int, int, int],
+    ) -> float:
+        ax1, ay1, ax2, ay2 = a
+        bx1, by1, bx2, by2 = b
+
+        inter_x1 = max(ax1, bx1)
+        inter_y1 = max(ay1, by1)
+        inter_x2 = min(ax2, bx2)
+        inter_y2 = min(ay2, by2)
+
+        inter_w = max(0, inter_x2 - inter_x1)
+        inter_h = max(0, inter_y2 - inter_y1)
+        inter_area = inter_w * inter_h
+
+        area_a = max(1, (ax2 - ax1) * (ay2 - ay1))
+        area_b = max(1, (bx2 - bx1) * (by2 - by1))
+        union = area_a + area_b - inter_area
+        if union <= 0:
+            return 0.0
+        return inter_area / union
+
+    def _is_plausible_person_bbox(
+        self,
+        frame: np.ndarray[Any, np.dtype[Any]],
+        bbox: tuple[int, int, int, int],
+        score: float,
+    ) -> bool:
+        frame_h, frame_w = frame.shape[:2]
+        x1, y1, x2, y2 = bbox
+        w = max(1, x2 - x1)
+        h = max(1, y2 - y1)
+
+        aspect = h / float(w)
+        area_ratio = (w * h) / float(frame_w * frame_h)
+        height_ratio = h / float(frame_h)
+
+        if score < YOLO_PERSON_CONF_MIN:
+            return False
+        if aspect < 1.15 or aspect > 4.8:
+            return False
+        if height_ratio < 0.08 or height_ratio > 0.98:
+            return False
+        if area_ratio < 0.003 or area_ratio > 0.55:
+            return False
+
+        # Tiny edge-hugging boxes are usually false positives.
+        edge_margin = 2
+        touches_edge = (
+            x1 <= edge_margin
+            or y1 <= edge_margin
+            or x2 >= frame_w - edge_margin
+            or y2 >= frame_h - edge_margin
+        )
+        if touches_edge and area_ratio < 0.02:
+            return False
+
+        return True
+
+    def _init_yolo_person_detector(self) -> None:
+        """Initialize local YOLO pose model for person detection."""
+        try:
+            from ultralytics import YOLO  # type: ignore[attr-defined, import-not-found]
+
+            device, cuda_ok = self._select_yolo_device()
+            self._yolo_device = device
+            self._yolo_half = cuda_ok
+            configured_model = os.getenv("DIMOS_DRONE_YOLO_MODEL")
+            self._yolo_model_name = (
+                configured_model if configured_model else (YOLO_MODEL_NAME if cuda_ok else YOLO_MODEL_NAME_CPU)
+            )
+
+            model_path = get_data("models_yolo") / self._yolo_model_name
+            self._yolo = YOLO(model_path, task="pose")
+            warmup_ok = self._warmup_yolo()
+            if not warmup_ok and self._yolo_device.startswith("cuda"):
+                logger.warning("YOLO CUDA warmup failed; falling back to CPU")
+                self._yolo_device = "cpu"
+                self._yolo_half = False
+                if configured_model is None and self._yolo_model_name != YOLO_MODEL_NAME_CPU:
+                    self._yolo_model_name = YOLO_MODEL_NAME_CPU
+                    cpu_model_path = get_data("models_yolo") / self._yolo_model_name
+                    self._yolo = YOLO(cpu_model_path, task="pose")
+                _ = self._warmup_yolo()
+            logger.info(
+                "Initialized YOLO person detector "
+                f"model={self._yolo_model_name} device={self._yolo_device} "
+                f"half={self._yolo_half} imgsz={self._yolo_imgsz} max_det={self._yolo_max_det}"
+            )
+        except Exception as e:
+            self._yolo = None
+            logger.warning(f"YOLO person detector unavailable: {e}")
+
+    def _env_int(self, name: str, default: int, min_value: int = 1) -> int:
+        raw = os.getenv(name)
+        if raw is None:
+            return default
+        try:
+            return max(min_value, int(raw))
+        except ValueError:
+            logger.warning(f"Invalid {name}={raw!r}; using default {default}")
+            return default
+
+    def _select_yolo_device(self) -> tuple[str, bool]:
+        override = os.getenv("DIMOS_DRONE_YOLO_DEVICE")
+        if override:
+            normalized = override.strip().lower()
+            if normalized == "cpu":
+                return "cpu", False
+            if normalized.startswith("cuda"):
+                return normalized, True
+
+        try:
+            import torch
+
+            if torch.cuda.is_available():
+                # Force lazy init early so we can fail fast and avoid inference-time stalls.
+                _ = torch.cuda.get_device_name(0)
+                return "cuda:0", True
+
+            if torch.cuda.device_count() > 0:
+                logger.warning(
+                    "Torch reports CUDA devices but initialization failed; using CPU. "
+                    "Run `uv run python -c \"import torch; print(torch.cuda.is_available()); "
+                    "print(torch.cuda.get_device_name(0))\"` to diagnose."
+                )
+        except Exception as e:
+            logger.warning(f"Torch CUDA check failed; using CPU: {e}")
+
+        return "cpu", False
+
+    def _warmup_yolo(self) -> bool:
+        if self._yolo is None:
+            return False
+        try:
+            warmup = np.zeros((self._yolo_imgsz, self._yolo_imgsz, 3), dtype=np.uint8)
+            _ = self._yolo.predict(
+                source=warmup,
+                conf=YOLO_PERSON_CONF_MIN,
+                iou=0.5,
+                classes=[0],
+                imgsz=self._yolo_imgsz,
+                max_det=1,
+                verbose=False,
+                device=self._yolo_device,
+                half=self._yolo_half,
+            )
+            return True
+        except Exception as e:
+            logger.warning(f"YOLO warmup failed: {e}")
+            return False
+
+    def _detect_person_yolo_candidates(
+        self,
+        frame: np.ndarray[Any, np.dtype[Any]],
+    ) -> list[tuple[tuple[int, int, int, int], float, int]]:
+        """Detect person candidates using local YOLO pose model."""
+        if self._yolo is None:
+            return []
+
+        try:
+            results = self._yolo.predict(
+                source=frame,
+                conf=YOLO_PERSON_CONF_MIN,
+                iou=0.5,
+                classes=[0],  # person class
+                imgsz=self._yolo_imgsz,
+                max_det=self._yolo_max_det,
+                verbose=False,
+                device=self._yolo_device,
+                half=self._yolo_half,
+            )
+        except Exception as e:
+            logger.warning(f"YOLO detection inference failed: {e}")
+            return []
+
+        candidates: list[tuple[tuple[int, int, int, int], float, int]] = []
+        frame_h, frame_w = frame.shape[:2]
+
+        for result in results:
+            boxes = getattr(result, "boxes", None)
+            if boxes is None or boxes.xyxy is None:
+                continue
+
+            kp_conf = None
+            keypoints = getattr(result, "keypoints", None)
+            if keypoints is not None and getattr(keypoints, "conf", None) is not None:
+                kp_conf = keypoints.conf
+
+            num_dets = len(boxes.xyxy)
+            for i in range(num_dets):
+                try:
+                    class_id = int(boxes.cls[i].item())
+                    if class_id != 0:
+                        continue
+                    conf = float(boxes.conf[i].item())
+                    xyxy = boxes.xyxy[i].cpu().numpy()
+                    bbox = (int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3]))
+                except Exception:
+                    continue
+
+                if not self._is_plausible_person_bbox(frame, bbox, conf):
+                    continue
+
+                visible_keypoints = 0
+                if kp_conf is not None:
+                    try:
+                        kp = kp_conf[i].cpu().numpy()
+                        visible_keypoints = int((kp > 0.35).sum())
+                    except Exception:
+                        visible_keypoints = 0
+
+                if visible_keypoints < YOLO_PERSON_MIN_KEYPOINTS:
+                    continue
+
+                cx = (bbox[0] + bbox[2]) / 2.0
+                cy = (bbox[1] + bbox[3]) / 2.0
+                nx = abs((cx / frame_w) - 0.5)
+                ny = abs((cy / frame_h) - 0.55)
+                center_bonus = max(0.0, 1.0 - (nx + ny))
+
+                score = conf + (0.2 * center_bonus) + (0.01 * visible_keypoints)
+                candidates.append((bbox, score, visible_keypoints))
+
+        return candidates
+
+    def _detect_person_yolo(
+        self,
+        frame: np.ndarray[Any, np.dtype[Any]],
+        log_miss: bool = True,
+        log_hit: bool = True,
+        anchor_bbox: tuple[int, int, int, int] | None = None,
+    ) -> tuple[int, int, int, int] | None:
+        """Detect best person using local YOLO pose model."""
+        candidates = self._detect_person_yolo_candidates(frame)
+        if not candidates:
+            if log_miss:
+                logger.info("YOLO detector found no person")
+            return None
+
+        best_bbox, best_score, best_kp = self._select_person_candidate(candidates, anchor_bbox)
+        if log_hit:
+            logger.info(
+                f"YOLO person detection bbox={best_bbox} score={best_score:.3f} keypoints={best_kp}"
+            )
+        return best_bbox
+
+    def _detect_person_local(
+        self,
+        frame: np.ndarray[Any, np.dtype[Any]],
+        log_miss: bool = True,
+        log_hit: bool = True,
+        anchor_bbox: tuple[int, int, int, int] | None = None,
+    ) -> tuple[int, int, int, int] | None:
+        """Local detector path: YOLO person pose only."""
+        return self._detect_person_yolo(
+            frame,
+            log_miss=log_miss,
+            log_hit=log_hit,
+            anchor_bbox=anchor_bbox,
+        )
+
+    def _select_person_candidate(
+        self,
+        candidates: list[tuple[tuple[int, int, int, int], float, int]],
+        anchor_bbox: tuple[int, int, int, int] | None,
+    ) -> tuple[tuple[int, int, int, int], float, int]:
+        if anchor_bbox is None:
+            return max(candidates, key=lambda x: x[1])
+
+        def _rank(item: tuple[tuple[int, int, int, int], float, int]) -> float:
+            bbox, score, _ = item
+            iou_bonus = 0.35 * self._bbox_iou(anchor_bbox, bbox)
+            return score + iou_bonus
+
+        return max(candidates, key=_rank)
+
+    def _confirm_person_detection(
+        self,
+        seed_bbox: tuple[int, int, int, int],
+        timeout_s: float = 2.5,
+    ) -> tuple[int, int, int, int] | None:
+        """Require consistent person detections before enabling follow."""
+        hits = 1
+        misses = 0
+        max_misses = 3
+        last_bbox = seed_bbox
+        deadline = time.time() + timeout_s
+
+        while time.time() < deadline and hits < PERSON_CONFIRM_FRAMES:
+            frame = self._get_latest_frame()
+            if frame is None:
+                time.sleep(0.03)
+                continue
+
+            candidate = self._detect_person_local(
+                self._prepare_tracker_frame(frame),
+                log_miss=False,
+                log_hit=False,
+                anchor_bbox=last_bbox,
+            )
+            if candidate is None:
+                misses += 1
+                if misses > max_misses:
+                    return None
+                time.sleep(0.05)
+                continue
+
+            iou = self._bbox_iou(last_bbox, candidate)
+            if iou >= PERSON_CONFIRM_IOU:
+                hits += 1
+                last_bbox = candidate
+            else:
+                hits = 1
+                last_bbox = candidate
+            time.sleep(0.05)
+
+        if hits >= PERSON_CONFIRM_FRAMES:
+            return last_bbox
+        return None
+
+    def _draw_labeled_bbox(
+        self,
+        image: NDArray[np.uint8],
+        bbox: tuple[int, int, int, int],
+        label: str,
+        color: tuple[int, int, int] = (0, 255, 0),
+    ) -> NDArray[np.uint8]:  # type: ignore[type-arg]
+        """Draw a bbox with text label."""
+        overlay: NDArray[np.uint8] = image.copy()  # type: ignore[type-arg]
+        x1, y1, x2, y2 = bbox
+        cv2.rectangle(overlay, (x1, y1), (x2, y2), color, 2)
+        text_y = max(18, y1 - 8)
+        cv2.putText(
+            overlay,
+            label,
+            (x1, text_y),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.6,
+            color,
+            2,
+        )
+        return overlay
+
+    def _passive_detection_overlay_loop(self) -> None:
+        """Continuously publish detection overlay even when not actively following."""
+        while self._passive_overlay_active:
+            if self._tracking_active:
+                time.sleep(PASSIVE_DETECT_INTERVAL_SEC)
+                continue
+
+            frame = self._get_latest_frame()
+            if frame is None:
+                time.sleep(PASSIVE_DETECT_INTERVAL_SEC)
+                continue
+
+            detect_frame = self._prepare_tracker_frame(frame)
+            overlay = detect_frame.copy()
+
+            yolo_candidates = self._detect_person_yolo_candidates(detect_frame)
+            if yolo_candidates:
+                sorted_candidates = sorted(yolo_candidates, key=lambda x: x[1], reverse=True)
+                for bbox, score, kps in sorted_candidates[:3]:
+                    overlay = self._draw_labeled_bbox(
+                        overlay,
+                        bbox,
+                        f"person {score:.2f} kp={kps}",
+                        color=(0, 255, 0),
+                    )
+                overlay = self._draw_search_overlay(
+                    overlay,
+                    f"Passive detect: {len(yolo_candidates)} person(s) [YOLO]",
+                )
+            else:
+                overlay = self._draw_search_overlay(
+                    overlay,
+                    "Passive detect: no person",
+                )
+
+            if self.tracking_overlay.transport:
+                self.tracking_overlay.publish(Image.from_numpy(overlay, format=ImageFormat.BGR))
+
+            time.sleep(PASSIVE_DETECT_INTERVAL_SEC)
+
+    def _compute_yaw_rate(self, current_x: float, center_x: float) -> float:
+        """Compute yaw-rate with PID control on normalized horizontal image error."""
+        now = time.time()
+        raw_error = (current_x - center_x) / max(center_x, 1.0)
+
+        # If we just crossed centerline, briefly brake yaw to avoid ping-pong from latency.
+        crossed_center = (self._yaw_prev_raw_error * raw_error) < 0.0
+        if crossed_center and max(abs(self._yaw_prev_raw_error), abs(raw_error)) <= YAW_ONLY_CROSS_BRAKE_ERR_RATIO:
+            self._yaw_cross_brake_until = now + YAW_ONLY_CROSS_BRAKE_SEC
+            self._yaw_rate_ema = 0.0
+            self._yaw_pid.integral = 0.0
+            self._yaw_pid.prev_error = 0.0
+            self._yaw_lock_active = True
+
+        if now < self._yaw_cross_brake_until:
+            self._yaw_prev_raw_error = raw_error
+            return 0.0
+
+        self._yaw_error_lpf = (YAW_ONLY_ERROR_LPF_ALPHA * self._yaw_error_lpf) + (
+            (1.0 - YAW_ONLY_ERROR_LPF_ALPHA) * raw_error
+        )
+        error = self._yaw_error_lpf
+
+        # Hysteresis lock around centerline prevents sign-flip oscillation near zero error.
+        if self._yaw_lock_active:
+            if abs(raw_error) <= YAW_ONLY_LOCK_EXIT_RATIO:
+                self._yaw_rate_ema = 0.0
+                self._yaw_pid.integral = 0.0
+                self._yaw_pid.prev_error = 0.0
+                self._yaw_prev_raw_error = raw_error
+                return 0.0
+            self._yaw_lock_active = False
+
+        target_rate = float(self._yaw_pid.update(error, YAW_ONLY_DT))  # type: ignore[no-untyped-call]
+
+        # Keep enough turning authority once the target is meaningfully off-center.
+        if abs(raw_error) >= YAW_ONLY_MIN_RATE_ERR_RATIO and abs(target_rate) < YAW_ONLY_MIN_RATE:
+            sign = 1.0 if error > 0 else -1.0
+            target_rate = sign * YAW_ONLY_MIN_RATE
+
+        # Taper max yaw rate as target approaches center to avoid overshoot in large turns.
+        abs_error = abs(raw_error)
+        if abs_error < YAW_ONLY_DECEL_ERR_RATIO:
+            frac = abs_error / max(YAW_ONLY_DECEL_ERR_RATIO, 1e-6)
+            adaptive_limit = YAW_ONLY_DECEL_MIN_RATE + (
+                (YAW_ONLY_MAX_RATE - YAW_ONLY_DECEL_MIN_RATE) * frac
+            )
+            target_rate = max(-adaptive_limit, min(adaptive_limit, target_rate))
+
+        prev_rate = self._yaw_rate_ema
+        filtered_rate = (YAW_ONLY_CMD_SMOOTH_ALPHA * prev_rate) + (
+            (1.0 - YAW_ONLY_CMD_SMOOTH_ALPHA) * target_rate
+        )
+
+        # Limit per-step command delta to reduce oscillation and overshoot near lock.
+        max_step = YAW_ONLY_RATE_SLEW_PER_SEC * YAW_ONLY_DT
+        delta = filtered_rate - prev_rate
+        if delta > max_step:
+            filtered_rate = prev_rate + max_step
+        elif delta < -max_step:
+            filtered_rate = prev_rate - max_step
+        self._yaw_rate_ema = filtered_rate
+
+        if abs(target_rate) <= YAW_ONLY_ZERO_RATE_EPS and abs(self._yaw_rate_ema) <= YAW_ONLY_ZERO_RATE_EPS:
+            self._yaw_rate_ema = 0.0
+
+        if abs(raw_error) <= YAW_ONLY_LOCK_ENTER_RATIO and abs(self._yaw_rate_ema) <= YAW_ONLY_LOCK_CMD_EPS:
+            self._yaw_lock_active = True
+            self._yaw_rate_ema = 0.0
+            self._yaw_pid.integral = 0.0
+            self._yaw_pid.prev_error = 0.0
+            self._yaw_prev_raw_error = raw_error
+            return 0.0
+
+        self._yaw_prev_raw_error = raw_error
+        return float(self._yaw_rate_ema)
+
+    def _reset_yaw_controller(self) -> None:
+        self._yaw_rate_ema = 0.0
+        self._yaw_error_lpf = 0.0
+        self._yaw_lock_active = False
+        self._yaw_prev_raw_error = 0.0
+        self._yaw_cross_brake_until = 0.0
+        self._yaw_pid.integral = 0.0
+        self._yaw_pid.prev_error = 0.0
+
+    def _compute_person_follow_command(
+        self,
+        bbox: tuple[int, int, int, int],
+        frame_width: int,
+        _frame_height: int,
+        current_x: float,
+    ) -> tuple[float, float, float, float]:
+        yaw_rate = self._compute_yaw_rate(current_x, frame_width / 2.0)
+        norm_yaw_error = abs((current_x - (frame_width / 2.0)) / max(frame_width / 2.0, 1.0))
+
+        x1, _y1, x2, _y2 = bbox
+        vx = FOLLOW_APPROACH_SPEED
+
+        if norm_yaw_error >= FOLLOW_ALIGN_STOP_ERR:
+            vx = 0.0
+        elif norm_yaw_error > FOLLOW_ALIGN_SOFT_ERR:
+            soft = 1.0 - (
+                (norm_yaw_error - FOLLOW_ALIGN_SOFT_ERR)
+                / max(FOLLOW_ALIGN_STOP_ERR - FOLLOW_ALIGN_SOFT_ERR, 1e-6)
+            )
+            vx *= max(0.25, min(1.0, soft))
+
+        edge_margin = int(frame_width * FOLLOW_EDGE_MARGIN_RATIO)
+        if (x1 < edge_margin or x2 > (frame_width - edge_margin)) and vx > 0.0:
+            vx = 0.0
+
+        return float(vx), 0.0, 0.0, float(yaw_rate)
+
     def _visual_servoing_loop(self, tracker: Any, duration: float) -> None:
         """Main visual servoing control loop.
 
@@ -226,9 +1104,15 @@ def _visual_servoing_loop(self, tracker: Any, duration: float) -> None:
                 frame_count += 1
 
                 # Update tracker
-                success, bbox = tracker.update(frame)
-
-                if not success:
+                update_frame = self._prepare_tracker_frame(frame)
+                update_result = tracker.update(update_frame)
+                success = False
+                bbox: Any = None
+                if isinstance(update_result, tuple) and len(update_result) == 2:
+                    success = bool(update_result[0])
+                    bbox = update_result[1]
+
+                if not success or bbox is None:
                     lost_track_count += 1
                     logger.warning(f"Lost track (count: {lost_track_count})")
 
@@ -252,26 +1136,35 @@ def _visual_servoing_loop(self, tracker: Any, duration: float) -> None:
                 center_x = frame_width / 2
                 center_y = frame_height / 2
 
-                # Compute velocity commands
-                vx, vy, vz = self.servoing_controller.compute_velocity_control(
-                    target_x=current_x,
-                    target_y=current_y,
-                    center_x=center_x,
-                    center_y=center_y,
-                    dt=0.033,  # ~30Hz
-                    lock_altitude=True,
-                )
+                yaw_rate = 0.0
+                if self._control_mode == "yaw_only":
+                    vx, vy, vz = 0.0, 0.0, 0.0
+                    yaw_rate = self._compute_yaw_rate(current_x, center_x)
+                else:
+                    # Compute velocity commands
+                    vx, vy, vz = self.servoing_controller.compute_velocity_control(
+                        target_x=current_x,
+                        target_y=current_y,
+                        center_x=center_x,
+                        center_y=center_y,
+                        dt=0.033,  # ~30Hz
+                        lock_altitude=True,
+                    )
 
-                # Clamp velocity for indoor safety
-                if self._max_velocity is not None:
-                    vx = max(-self._max_velocity, min(self._max_velocity, vx))
-                    vy = max(-self._max_velocity, min(self._max_velocity, vy))
+                    # Clamp velocity for indoor safety
+                    if self._max_velocity is not None:
+                        vx = max(-self._max_velocity, min(self._max_velocity, vx))
+                        vy = max(-self._max_velocity, min(self._max_velocity, vy))
+                    if self._max_forward_velocity is not None:
+                        vx = max(-self._max_forward_velocity, min(self._max_forward_velocity, vx))
+                    if self._max_lateral_velocity is not None:
+                        vy = max(-self._max_lateral_velocity, min(self._max_lateral_velocity, vy))
 
                 # Publish velocity command via LCM
                 if self.cmd_vel.transport:
                     twist = Twist()
                     twist.linear = Vector3(vx, vy, 0)
-                    twist.angular = Vector3(0, 0, 0)  # No rotation for now
+                    twist.angular = Vector3(0, 0, yaw_rate)
                     self.cmd_vel.publish(twist)
 
                 # Publish visualization if transport is set
@@ -291,6 +1184,8 @@ def _visual_servoing_loop(self, tracker: Any, duration: float) -> None:
                         "center": [int(current_x), int(current_y)],
                         "error": [int(current_x - center_x), int(current_y - center_y)],
                         "velocity": [float(vx), float(vy), float(vz)],
+                        "yaw_rate": float(yaw_rate),
+                        "mode": self._control_mode,
                         "frame": frame_count,
                     }
                 )
@@ -310,6 +1205,196 @@ def _visual_servoing_loop(self, tracker: Any, duration: float) -> None:
             self._tracking_active = False
             logger.info(f"Visual servoing loop ended after {frame_count} frames")
 
+    def _detection_servoing_loop(
+        self, object_name: str | None, duration: float, distance_m: float
+    ) -> None:
+        """Detector-driven servoing loop that re-detects the target every frame."""
+        start_time = time.time()
+        frame_count = 0
+        lost_track_count = 0
+        max_lost_frames = 60
+        confirm_hits = 0
+        confirmed = False
+        last_candidate_bbox: tuple[int, int, int, int] | None = None
+        person_target = object_name is None or object_name.lower().strip() in PERSON_ALIASES
+
+        logger.info("Starting detector-driven servoing loop")
+
+        try:
+            while self._tracking_active and (time.time() - start_time < duration):
+                frame = self._get_latest_frame()
+                if frame is None:
+                    time.sleep(0.02)
+                    continue
+
+                frame_count += 1
+                detect_frame = self._prepare_tracker_frame(frame)
+
+                bbox_xyxy: tuple[int, int, int, int] | None
+                if person_target:
+                    bbox_xyxy = self._detect_person_local(
+                        detect_frame,
+                        log_miss=False,
+                        log_hit=False,
+                        anchor_bbox=self._person_lock_bbox,
+                    )
+                else:
+                    bbox_xyxy = self._detect_initial_bbox(detect_frame, object_name)
+
+                if bbox_xyxy is None:
+                    lost_track_count += 1
+                    if not confirmed:
+                        confirm_hits = max(0, confirm_hits - 1)
+                    if person_target and lost_track_count > 8:
+                        self._person_lock_bbox = None
+                    if lost_track_count % 10 == 0:
+                        logger.warning(f"Detector loop has no target (count: {lost_track_count})")
+                    if self.tracking_overlay.transport and lost_track_count % 5 == 0:
+                        overlay = self._draw_search_overlay(
+                            detect_frame, f"Searching for {object_name or 'object'}"
+                        )
+                        self.tracking_overlay.publish(Image.from_numpy(overlay, format=ImageFormat.BGR))
+                    if lost_track_count >= max_lost_frames:
+                        self._publish_status(
+                            {"status": "lost", "object": self._current_object, "frame": frame_count}
+                        )
+                        break
+                    time.sleep(0.05)
+                    continue
+
+                lost_track_count = 0
+                if person_target:
+                    self._person_lock_bbox = bbox_xyxy
+
+                if not confirmed:
+                    if (
+                        last_candidate_bbox is None
+                        or self._bbox_iou(last_candidate_bbox, bbox_xyxy) >= PERSON_CONFIRM_IOU
+                    ):
+                        confirm_hits += 1
+                    else:
+                        confirm_hits = 1
+                    last_candidate_bbox = bbox_xyxy
+
+                    if confirm_hits < PERSON_CONFIRM_FRAMES:
+                        if self.cmd_vel.transport:
+                            hold_twist = Twist()
+                            hold_twist.linear = Vector3(0, 0, 0)
+                            hold_twist.angular = Vector3(0, 0, 0)
+                            self.cmd_vel.publish(hold_twist)
+                        if self.tracking_overlay.transport:
+                            cx = int((bbox_xyxy[0] + bbox_xyxy[2]) / 2)
+                            cy = int((bbox_xyxy[1] + bbox_xyxy[3]) / 2)
+                            overlay = self._draw_tracking_overlay(
+                                detect_frame,
+                                (
+                                    int(bbox_xyxy[0]),
+                                    int(bbox_xyxy[1]),
+                                    int(max(1, bbox_xyxy[2] - bbox_xyxy[0])),
+                                    int(max(1, bbox_xyxy[3] - bbox_xyxy[1])),
+                                ),
+                                (cx, cy),
+                            )
+                            overlay = self._draw_search_overlay(
+                                overlay,
+                                f"Candidate person {confirm_hits}/{PERSON_CONFIRM_FRAMES}",
+                            )
+                            self.tracking_overlay.publish(Image.from_numpy(overlay, format=ImageFormat.BGR))
+                        time.sleep(0.05)
+                        continue
+
+                    confirmed = True
+                    logger.info("Detector loop person target confirmed")
+
+                x1, y1, x2, y2 = bbox_xyxy
+                x = int(x1)
+                y = int(y1)
+                w = int(max(1, x2 - x1))
+                h = int(max(1, y2 - y1))
+                current_x = x + (w / 2.0)
+                current_y = y + (h / 2.0)
+
+                frame_height, frame_width = detect_frame.shape[:2]
+                center_x = frame_width / 2
+                center_y = frame_height / 2
+
+                yaw_rate = 0.0
+                bbox_height_ratio = h / float(max(frame_height, 1))
+                if self._control_mode == "yaw_only":
+                    vx, vy, vz = 0.0, 0.0, 0.0
+                    yaw_rate = self._compute_yaw_rate(current_x, center_x)
+                elif person_target and self._person_follow_policy == "yaw_forward_constant":
+                    self._target_distance_m = max(
+                        FOLLOW_TARGET_MIN_M, min(FOLLOW_TARGET_MAX_M, distance_m)
+                    )
+                    vx, vy, vz, yaw_rate = self._compute_person_follow_command(
+                        (x, y, x + w, y + h),
+                        frame_width,
+                        frame_height,
+                        current_x,
+                    )
+                else:
+                    vx, vy, vz = self.servoing_controller.compute_velocity_control(
+                        target_x=current_x,
+                        target_y=current_y,
+                        center_x=center_x,
+                        center_y=center_y,
+                        dt=0.05,
+                        lock_altitude=True,
+                    )
+                    yaw_rate = 0.0
+
+                    if self._max_velocity is not None:
+                        vx = max(-self._max_velocity, min(self._max_velocity, vx))
+                        vy = max(-self._max_velocity, min(self._max_velocity, vy))
+                    if self._max_forward_velocity is not None:
+                        vx = max(-self._max_forward_velocity, min(self._max_forward_velocity, vx))
+                    if self._max_lateral_velocity is not None:
+                        vy = max(-self._max_lateral_velocity, min(self._max_lateral_velocity, vy))
+
+                if self.cmd_vel.transport:
+                    twist = Twist()
+                    twist.linear = Vector3(vx, vy, 0)
+                    twist.angular = Vector3(0, 0, yaw_rate)
+                    self.cmd_vel.publish(twist)
+
+                if self.tracking_overlay.transport:
+                    overlay = self._draw_tracking_overlay(
+                        detect_frame, (x, y, w, h), (int(current_x), int(current_y))
+                    )
+                    overlay_msg = Image.from_numpy(overlay, format=ImageFormat.BGR)
+                    self.tracking_overlay.publish(overlay_msg)
+
+                self._publish_status(
+                    {
+                        "status": "tracking",
+                        "object": self._current_object,
+                        "backend": "yolo_detection",
+                        "bbox": [x, y, w, h],
+                        "center": [int(current_x), int(current_y)],
+                        "error": [int(current_x - center_x), int(current_y - center_y)],
+                        "velocity": [float(vx), float(vy), float(vz)],
+                        "yaw_rate": float(yaw_rate),
+                        "mode": self._control_mode,
+                        "distance_target_m": float(self._target_distance_m),
+                        "bbox_height_ratio": float(bbox_height_ratio),
+                        "frame": frame_count,
+                    }
+                )
+
+                time.sleep(0.05)
+
+        except Exception as e:
+            logger.error(f"Error in detection fallback loop: {e}")
+        finally:
+            if self.cmd_vel.transport:
+                stop_twist = Twist()
+                stop_twist.linear = Vector3(0, 0, 0)
+                stop_twist.angular = Vector3(0, 0, 0)
+                self.cmd_vel.publish(stop_twist)
+            self._tracking_active = False
+            logger.info(f"Detector loop ended after {frame_count} frames")
+
     def _draw_tracking_overlay(
         self,
         frame: NDArray[np.uint8],
@@ -357,6 +1442,27 @@ def _draw_tracking_overlay(
 
         return overlay
 
+    def _draw_search_overlay(
+        self,
+        frame: NDArray[np.uint8],
+        text: str,
+    ) -> NDArray[np.uint8]:  # type: ignore[type-arg]
+        """Draw lightweight debug overlay while target is not detected."""
+        overlay: NDArray[np.uint8] = frame.copy()  # type: ignore[type-arg]
+        frame_h, frame_w = overlay.shape[:2]
+        center = (frame_w // 2, frame_h // 2)
+        cv2.drawMarker(overlay, center, (255, 255, 0), cv2.MARKER_CROSS, 30, 2)
+        cv2.putText(
+            overlay,
+            text,
+            (10, 30),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.8,
+            (0, 200, 255),
+            2,
+        )
+        return overlay
+
     def _publish_status(self, status: dict[str, Any]) -> None:
         """Publish tracking status as JSON.
 
@@ -383,6 +1489,10 @@ def _stop_tracking(self) -> None:
         self._publish_status({"status": "stopped", "object": self._current_object})
 
         self._current_object = None
+        self._control_mode = "full"
+        self._person_lock_bbox = None
+        self._target_distance_m = 1.0
+        self._reset_yaw_controller()
         logger.info("Tracking stopped")
 
     @rpc
diff --git a/dimos/robot/drone/tello_connection_module.py b/dimos/robot/drone/tello_connection_module.py
new file mode 100644
index 0000000000..9cf10e9fe5
--- /dev/null
+++ b/dimos/robot/drone/tello_connection_module.py
@@ -0,0 +1,557 @@
+#!/usr/bin/env python3
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""DimOS module wrapper for RoboMaster TT / Tello SDK control."""
+
+from __future__ import annotations
+
+import json
+import math
+import threading
+import time
+from typing import Any
+
+from dimos_lcm.std_msgs import String
+from reactivex.disposable import CompositeDisposable, Disposable
+
+from dimos.agents.annotation import skill
+from dimos.core.core import rpc
+from dimos.core.module import Module
+from dimos.core.stream import In, Out
+from dimos.msgs.geometry_msgs import PoseStamped, Quaternion, Transform, Twist, Vector3
+from dimos.msgs.sensor_msgs import Image
+from dimos.robot.drone.tello_sdk import TelloSdkClient
+from dimos.utils.logging_config import setup_logger
+
+logger = setup_logger()
+
+
+def _add_disposable(composite: CompositeDisposable, item: Disposable | Any) -> None:
+    if isinstance(item, Disposable):
+        composite.add(item)
+    elif callable(item):
+        composite.add(Disposable(item))
+
+
+def _to_float(value: Any, default: float = 0.0) -> float:
+    if value is None:
+        return default
+    if isinstance(value, (float, int)):
+        return float(value)
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return default
+
+
+class TelloConnectionModule(Module):
+    """Module that bridges Tello SDK to DimOS streams and skills."""
+
+    # Inputs
+    movecmd: In[Vector3]
+    movecmd_twist: In[Twist]
+    extcmd: In[String]
+    tracking_status: In[Any]
+
+    # Outputs
+    odom: Out[PoseStamped]
+    status: Out[Any]
+    telemetry: Out[Any]
+    video: Out[Image]
+    follow_object_cmd: Out[Any]
+
+    # Parameters
+    tello_ip: str
+    local_ip: str
+    local_command_port: int
+    state_port: int
+    video_port: int
+    command_timeout: float
+    video_uri: str | None
+
+    # Internal state
+    _latest_odom: PoseStamped | None = None
+    _latest_video_frame: Image | None = None
+    _latest_status: dict[str, Any] | None = None
+    _latest_telemetry: dict[str, Any] | None = None
+    _latest_tracking_status: dict[str, Any] | None = None
+    _state_lock: threading.RLock
+
+    def __init__(
+        self,
+        tello_ip: str = "192.168.10.1",
+        local_ip: str = "",
+        local_command_port: int = 9000,
+        state_port: int = 8890,
+        video_port: int = 11111,
+        command_timeout: float = 7.0,
+        video_uri: str | None = None,
+        *args: Any,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize Tello module.
+
+        Args:
+            tello_ip: Drone IP address.
+            local_ip: Local bind host. Empty binds all interfaces.
+            local_command_port: Local UDP port for command responses.
+            state_port: Local UDP port receiving state packets.
+            video_port: Local UDP port receiving H264 video.
+            command_timeout: Timeout for SDK commands.
+            video_uri: Optional OpenCV URI override for the video stream.
+        """
+        self.tello_ip = tello_ip
+        self.local_ip = local_ip
+        self.local_command_port = local_command_port
+        self.state_port = state_port
+        self.video_port = video_port
+        self.command_timeout = command_timeout
+        self.video_uri = video_uri
+
+        self.connection: TelloSdkClient | None = None
+        self._latest_odom = None
+        self._latest_video_frame = None
+        self._latest_status = None
+        self._latest_telemetry = None
+        self._latest_tracking_status = None
+        self._state_lock = threading.RLock()
+        self._manual_override_until = 0.0
+        Module.__init__(self, *args, **kwargs)
+
+    @rpc
+    def start(self) -> None:
+        """Start the module and subscribe to Tello streams."""
+        self.connection = TelloSdkClient(
+            tello_ip=self.tello_ip,
+            local_ip=self.local_ip,
+            local_command_port=self.local_command_port,
+            state_port=self.state_port,
+            video_port=self.video_port,
+            command_timeout=self.command_timeout,
+            video_uri=self.video_uri,
+        )
+
+        if not self.connection.connect():
+            logger.error("Failed to connect to Tello TT")
+            return
+
+        _add_disposable(self._disposables, self.connection.odom_stream().subscribe(self._publish_tf))
+        _add_disposable(
+            self._disposables, self.connection.status_stream().subscribe(self._publish_status)
+        )
+        _add_disposable(
+            self._disposables, self.connection.telemetry_stream().subscribe(self._publish_telemetry)
+        )
+        _add_disposable(
+            self._disposables, self.connection.video_stream().subscribe(self._store_and_publish_frame)
+        )
+
+        _add_disposable(self._disposables, self.movecmd.subscribe(self._on_move))
+
+        if self.movecmd_twist.transport:
+            _add_disposable(self._disposables, self.movecmd_twist.subscribe(self._on_move_twist))
+        if self.extcmd.transport:
+            _add_disposable(self._disposables, self.extcmd.subscribe(self._on_ext_cmd))
+        if self.tracking_status.transport:
+            _add_disposable(self._disposables, self.tracking_status.subscribe(self._on_tracking_status))
+
+        logger.info("TelloConnectionModule started")
+        return
+
+    def _store_and_publish_frame(self, frame: Image) -> None:
+        self._latest_video_frame = frame
+        self.video.publish(frame)
+
+    def _publish_tf(self, msg: PoseStamped) -> None:
+        self._latest_odom = msg
+        self.odom.publish(msg)
+
+        base_link = Transform(
+            translation=msg.position,
+            rotation=msg.orientation,
+            frame_id="world",
+            child_frame_id="base_link",
+            ts=msg.ts if hasattr(msg, "ts") else time.time(),
+        )
+        self.tf.publish(base_link)
+
+        camera_link = Transform(
+            translation=Vector3(0.05, 0.0, -0.02),
+            rotation=Quaternion(0.0, 0.0, 0.0, 1.0),
+            frame_id="base_link",
+            child_frame_id="camera_link",
+            ts=time.time(),
+        )
+        self.tf.publish(camera_link)
+
+    def _publish_status(self, status: dict[str, Any]) -> None:
+        with self._state_lock:
+            self._latest_status = dict(status)
+        self.status.publish(String(json.dumps(status)))
+
+    def _publish_telemetry(self, telemetry: dict[str, Any]) -> None:
+        with self._state_lock:
+            self._latest_telemetry = dict(telemetry)
+        self.telemetry.publish(String(json.dumps(telemetry)))
+
+    def _on_move(self, cmd: Vector3) -> None:
+        if self.connection:
+            self.connection.move(cmd, duration=0.0)
+
+    def _on_move_twist(self, msg: Twist) -> None:
+        if time.time() < self._manual_override_until:
+            return
+        if self.connection:
+            self.connection.move_twist(msg, duration=0.0, lock_altitude=True)
+
+    def _on_ext_cmd(self, msg: String) -> None:
+        if self.connection:
+            self.connection.send_ext(msg.data)
+
+    def _on_tracking_status(self, status: Any) -> None:
+        data: dict[str, Any] | None = None
+        if isinstance(status, String):
+            try:
+                data = json.loads(status.data)
+            except json.JSONDecodeError:
+                data = None
+        elif isinstance(status, dict):
+            data = status
+
+        if data is not None:
+            with self._state_lock:
+                self._latest_tracking_status = data
+
+    def _wait_for_tracking_status(
+        self, timeout: float
+    ) -> tuple[str | None, dict[str, Any] | None]:
+        deadline = time.time() + timeout
+        while time.time() < deadline:
+            with self._state_lock:
+                status = dict(self._latest_tracking_status or {}) or None
+            if status is not None:
+                value = status.get("status")
+                if isinstance(value, str) and value in {"tracking", "not_found", "failed", "lost"}:
+                    return value, status
+            time.sleep(0.05)
+        with self._state_lock:
+            status = dict(self._latest_tracking_status or {}) or None
+        return None, status
+
+    def _wait_until_airborne(self, min_height_cm: float = 20.0, timeout: float = 12.0) -> bool:
+        if not self.connection:
+            return False
+        deadline = time.time() + timeout
+        while time.time() < deadline:
+            state = self.connection.get_state()
+            if _to_float(state.get("h")) >= min_height_cm:
+                return True
+            time.sleep(0.1)
+        state = self.connection.get_state()
+        return _to_float(state.get("h")) >= min_height_cm
+
+    @rpc
+    def get_odom(self) -> PoseStamped | None:
+        """Get latest odometry estimate."""
+        return self._latest_odom
+
+    @rpc
+    def get_status(self) -> dict[str, Any]:
+        """Get latest compact status dict."""
+        with self._state_lock:
+            return dict(self._latest_status or {})
+
+    @rpc
+    def get_state(self) -> dict[str, Any]:
+        """Get latest raw Tello state packet as dict."""
+        with self._state_lock:
+            return dict(self._latest_telemetry or {})
+
+    @skill
+    def move(self, x: float = 0.0, y: float = 0.0, z: float = 0.0, duration: float = 0.0) -> str:
+        """Move with body-frame velocity.
+
+        Args:
+            x: Forward velocity in m/s.
+            y: Left velocity in m/s.
+            z: Up velocity in m/s.
+            duration: Optional duration in seconds. 0 means single update.
+        """
+        if not self.connection:
+            return "Failed: no Tello connection"
+        ok = self.connection.move(Vector3(x, y, z), duration=duration)
+        return "Move command sent" if ok else "Failed: move command rejected"
+
+    @skill
+    def rc(
+        self,
+        left_right: int = 0,
+        forward_back: int = 0,
+        up_down: int = 0,
+        yaw: int = 0,
+        duration: float = 0.0,
+    ) -> str:
+        """Send low-level rc command in SDK units.
+
+        Args:
+            left_right: Left/right channel in [-100, 100]. Positive is right.
+            forward_back: Forward/back channel in [-100, 100]. Positive is forward.
+            up_down: Vertical channel in [-100, 100]. Positive is up.
+            yaw: Yaw channel in [-100, 100]. Positive is clockwise.
+            duration: Optional duration before auto-stop.
+        """
+        if not self.connection:
+            return "Failed: no Tello connection"
+        ok = self.connection.rc(left_right, forward_back, up_down, yaw)
+        if duration > 0:
+            time.sleep(duration)
+            self.connection.rc(0, 0, 0, 0)
+        return "RC command sent" if ok else "Failed: rc command rejected"
+
+    @skill
+    def move_relative(self, x: float = 0.0, y: float = 0.0, z: float = 0.0, speed: float = 0.4) -> str:
+        """Move by a relative offset using the SDK go command.
+
+        Args:
+            x: Forward distance in meters.
+            y: Left distance in meters.
+            z: Up distance in meters.
+            speed: Translation speed in m/s.
+        """
+        if not self.connection:
+            return "Failed: no Tello connection"
+        ok = self.connection.move_relative(x=x, y=y, z=z, speed=speed)
+        return "Relative move command sent" if ok else "Failed: move_relative rejected"
+
+    @skill
+    def yaw(self, degrees: float) -> str:
+        """Rotate in place.
+
+        Args:
+            degrees: Positive rotates counter-clockwise, negative clockwise.
+        """
+        if not self.connection:
+            return "Failed: no Tello connection"
+        ok = self.connection.yaw(degrees)
+        return "Yaw command sent" if ok else "Failed: yaw command rejected"
+
+    @skill
+    def takeoff(self, altitude: float = 1.0) -> str:
+        """Take off and hover.
+
+        Args:
+            altitude: Desired altitude hint in meters.
+        """
+        if not self.connection:
+            return "Failed: no Tello connection"
+        ok = self.connection.takeoff()
+        if not ok:
+            return "Failed: takeoff rejected"
+        if altitude > 1.0:
+            return "Takeoff command sent (TT hovers at fixed initial altitude; use move_relative z for more)"
+        return "Takeoff command sent"
+
+    @skill
+    def land(self) -> str:
+        """Land immediately."""
+        if not self.connection:
+            return "Failed: no Tello connection"
+        ok = self.connection.land()
+        return "Land command sent" if ok else "Failed: land rejected"
+
+    @skill
+    def emergency_stop(self) -> str:
+        """Emergency stop motors immediately."""
+        if not self.connection:
+            return "Failed: no Tello connection"
+        ok = self.connection.emergency()
+        return "Emergency command sent" if ok else "Failed: emergency rejected"
+
+    @skill
+    def send_ext(self, ext_command: str) -> str:
+        """Send a TT expansion-board EXT command.
+
+        Args:
+            ext_command: Raw EXT payload, with or without the EXT prefix.
+        """
+        if not self.connection:
+            return "Failed: no Tello connection"
+        ok = self.connection.send_ext(ext_command)
+        return "EXT command sent" if ok else "Failed: EXT command rejected"
+
+    @skill
+    def follow_object(
+        self,
+        object_description: str = "person",
+        distance_m: float = 1.0,
+        duration: float = 120.0,
+        scan_step_deg: float = 30.0,
+        max_scan_steps: int = 12,
+        control_mode: str = "full",
+    ) -> str:
+        """Scan for and follow an object using visual tracking.
+
+        Args:
+            object_description: Object to search for (for example "person").
+            distance_m: Desired following distance in meters (currently a hint only).
+            duration: Maximum follow duration in seconds once acquired. Also used as search budget.
+            scan_step_deg: Yaw step used between search attempts.
+            max_scan_steps: Number of scan attempts before giving up.
+            control_mode: "full" for translation+tracking, or "yaw_only" to rotate in place only.
+                Use "yaw_only" when the user asks to hover and only rotate to center a person.
+        """
+        if not self.connection:
+            return "Failed: no Tello connection"
+        if not self.follow_object_cmd.transport:
+            return "Failed: follow_object stream is not connected"
+        if not self._wait_until_airborne(min_height_cm=20.0, timeout=12.0):
+            logger.warning("follow_object: drone not confirmed airborne; refusing to track")
+            return "Failed: drone is not airborne yet"
+
+        steps = max(1, max_scan_steps)
+        scan_budget_s = max(2.0, min(duration, 30.0))
+        per_step_budget_s = max(0.8, scan_budget_s / steps)
+        follow_duration_s = max(duration, 60.0)
+
+        for step in range(steps):
+            step_deadline = time.time() + per_step_budget_s
+            while time.time() < step_deadline:
+                with self._state_lock:
+                    self._latest_tracking_status = None
+
+                msg = {
+                    "object_description": object_description,
+                    "duration": follow_duration_s,
+                    "distance_m": distance_m,
+                    "control_mode": control_mode,
+                }
+                self.follow_object_cmd.publish(String(json.dumps(msg)))
+
+                time_left = max(0.2, step_deadline - time.time())
+                status, detail = self._wait_for_tracking_status(timeout=min(1.5, time_left))
+                if status == "tracking":
+                    return (
+                        f"Following {object_description} for up to {follow_duration_s:.0f}s. "
+                        "Using fixed forward approach speed with yaw centering. "
+                        f"Mode={control_mode}."
+                    )
+                if status == "lost":
+                    return f"Started tracking {object_description} but target was lost"
+                if status == "failed":
+                    reason = detail.get("error", "tracker failed") if detail else "tracker failed"
+                    # Hard backend/config errors should return immediately.
+                    if "no supported" in str(reason).lower() or "stream is not connected" in str(
+                        reason
+                    ).lower():
+                        return f"Failed to start tracking {object_description}: {reason}"
+
+                time.sleep(0.15)
+
+            if step < steps - 1 and abs(scan_step_deg) > 0.0:
+                self.connection.yaw(scan_step_deg)
+                time.sleep(0.5)
+
+        return (
+            f"Could not find {object_description} after scanning for about {scan_budget_s:.0f}s"
+        )
+
+    @skill
+    def center_person_by_yaw(
+        self,
+        duration: float = 120.0,
+        scan_step_deg: float = 0.0,
+        max_scan_steps: int = 1,
+    ) -> str:
+        """Hover and rotate to keep a detected person centered in view (no translation).
+
+        Args:
+            duration: Maximum tracking duration in seconds once acquired.
+            scan_step_deg: Optional yaw scan step between search attempts.
+            max_scan_steps: Number of search orientations before giving up.
+        """
+        return self.follow_object(
+            object_description="person",
+            distance_m=0.0,
+            duration=duration,
+            scan_step_deg=scan_step_deg,
+            max_scan_steps=max_scan_steps,
+            control_mode="yaw_only",
+        )
+
+    @skill
+    def orbit_object(
+        self,
+        radius_m: float = 1.0,
+        speed_mps: float = 0.35,
+        revolutions: float = 1.0,
+        clockwise: bool = False,
+    ) -> str:
+        """Circle around the currently followed target using velocity+yaw control.
+
+        Args:
+            radius_m: Desired orbit radius in meters.
+            speed_mps: Tangential orbit speed in m/s.
+            revolutions: Number of revolutions to complete.
+            clockwise: If True orbit clockwise, otherwise counter-clockwise.
+        """
+        if not self.connection:
+            return "Failed: no Tello connection"
+        if radius_m <= 0.1:
+            return "Failed: radius_m must be greater than 0.1"
+        if speed_mps <= 0.05:
+            return "Failed: speed_mps must be greater than 0.05"
+        if revolutions <= 0.0:
+            return "Failed: revolutions must be positive"
+
+        duration = (2.0 * math.pi * radius_m * revolutions) / speed_mps
+        direction = -1.0 if clockwise else 1.0
+
+        self._manual_override_until = time.time() + duration + 0.5
+        try:
+            yaw_rate = direction * (speed_mps / radius_m)
+            ok = self.connection.move(
+                Vector3(0.0, direction * speed_mps, 0.0),
+                yaw_rate=yaw_rate,
+                duration=duration,
+            )
+        finally:
+            self._manual_override_until = 0.0
+
+        if ok:
+            orbit_dir = "clockwise" if clockwise else "counter-clockwise"
+            return (
+                f"Completed {revolutions:.2f} {orbit_dir} orbit(s) at radius {radius_m:.2f}m "
+                f"and speed {speed_mps:.2f}m/s"
+            )
+        return "Failed: orbit command rejected"
+
+    @skill
+    def observe(self) -> Image | None:
+        """Return latest camera frame for perception tasks."""
+        return self._latest_video_frame
+
+    @rpc
+    def stop(self) -> None:
+        """Stop module and close the Tello connection."""
+        if self.connection:
+            self.connection.disconnect()
+            self.connection = None
+        logger.info("TelloConnectionModule stopped")
+        super().stop()
+
+
+tello_connection_module = TelloConnectionModule.blueprint
+
+__all__ = ["TelloConnectionModule", "tello_connection_module"]
diff --git a/dimos/robot/drone/tello_sdk.py b/dimos/robot/drone/tello_sdk.py
new file mode 100644
index 0000000000..79672d4e75
--- /dev/null
+++ b/dimos/robot/drone/tello_sdk.py
@@ -0,0 +1,483 @@
+#!/usr/bin/env python3
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Low-level RoboMaster TT (Tello Talent) SDK adapter."""
+
+from __future__ import annotations
+
+import math
+import socket
+import threading
+import time
+from typing import Any, TypeAlias
+
+import cv2
+from reactivex import Subject
+
+from dimos.msgs.geometry_msgs import PoseStamped, Quaternion, Twist, Vector3
+from dimos.msgs.sensor_msgs import Image, ImageFormat
+from dimos.utils.logging_config import setup_logger
+
+logger = setup_logger()
+
+TelloScalar: TypeAlias = int | float | str
+TelloState: TypeAlias = dict[str, TelloScalar]
+
+
+def _to_float(value: TelloScalar | None, default: float = 0.0) -> float:
+    if value is None:
+        return default
+    if isinstance(value, (float, int)):
+        return float(value)
+    try:
+        return float(value)
+    except ValueError:
+        return default
+
+
+def _clamp(value: int, lo: int, hi: int) -> int:
+    return max(lo, min(hi, value))
+
+
+class TelloSdkClient:
+    """SDK client for RoboMaster TT / Tello over UDP."""
+
+    def __init__(
+        self,
+        tello_ip: str = "192.168.10.1",
+        command_port: int = 8889,
+        state_port: int = 8890,
+        video_port: int = 11111,
+        local_ip: str = "",
+        local_command_port: int = 9000,
+        command_timeout: float = 7.0,
+        video_uri: str | None = None,
+        velocity_scale: float = 100.0,
+        yaw_scale: float = 60.0,
+    ) -> None:
+        """Initialize the client.
+
+        Args:
+            tello_ip: Drone IP, default AP mode address.
+            command_port: SDK command UDP port.
+            state_port: SDK state UDP port.
+            video_port: H264 UDP stream port.
+            local_ip: Local bind host. Empty string binds all interfaces.
+            local_command_port: Local UDP port used for command/response.
+            command_timeout: Timeout waiting for command responses.
+            video_uri: Optional explicit URI for OpenCV video capture.
+            velocity_scale: Meters/second -> rc units conversion factor.
+            yaw_scale: Radians/second -> rc units conversion factor.
+        """
+        self.tello_ip = tello_ip
+        self.command_port = command_port
+        self.state_port = state_port
+        self.video_port = video_port
+        self.local_ip = local_ip
+        self.local_command_port = local_command_port
+        self.command_timeout = command_timeout
+        self.video_uri = video_uri
+        self.velocity_scale = velocity_scale
+        self.yaw_scale = yaw_scale
+
+        self.connected = False
+        self._running = False
+
+        self._tello_addr = (self.tello_ip, self.command_port)
+        self._command_socket: socket.socket | None = None
+        self._state_socket: socket.socket | None = None
+        self._video_capture: cv2.VideoCapture | None = None
+
+        self._command_lock = threading.Lock()
+        self._state_lock = threading.RLock()
+        self._frame_lock = threading.RLock()
+        self._pose_lock = threading.RLock()
+
+        self._state_thread: threading.Thread | None = None
+        self._video_thread: threading.Thread | None = None
+
+        self._latest_state: TelloState = {}
+        self._latest_frame: Image | None = None
+
+        self._position = Vector3(0.0, 0.0, 0.0)
+        self._last_pose_ts = time.time()
+
+        self._telemetry_subject: Subject[TelloState] = Subject()
+        self._status_subject: Subject[dict[str, Any]] = Subject()
+        self._odom_subject: Subject[PoseStamped] = Subject()
+        self._video_subject: Subject[Image] = Subject()
+
+    @staticmethod
+    def parse_state_packet(packet: str) -> TelloState:
+        """Parse semicolon-separated SDK state packet to a dict."""
+        result: TelloState = {}
+        for item in packet.strip().split(";"):
+            if not item or ":" not in item:
+                continue
+            key, raw_value = item.split(":", 1)
+            value = raw_value.strip()
+            if value == "":
+                continue
+            try:
+                if "." in value:
+                    result[key] = float(value)
+                else:
+                    result[key] = int(value)
+            except ValueError:
+                result[key] = value
+        return result
+
+    def connect(self) -> bool:
+        """Connect sockets, enter SDK mode, start state/video workers."""
+        if self.connected:
+            return True
+
+        try:
+            self._command_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+            self._command_socket.bind((self.local_ip, self.local_command_port))
+            self._command_socket.settimeout(self.command_timeout)
+
+            self._state_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+            self._state_socket.bind((self.local_ip, self.state_port))
+            self._state_socket.settimeout(0.5)
+        except OSError as exc:
+            logger.error(f"Failed to bind Tello sockets: {exc}")
+            self.disconnect()
+            return False
+
+        self._running = True
+        self._state_thread = threading.Thread(target=self._state_loop, daemon=True)
+        self._state_thread.start()
+
+        if not self.enter_sdk_mode():
+            logger.error("Tello did not enter SDK command mode")
+            self.disconnect()
+            return False
+
+        if not self.send_command("streamon", retries=1).startswith("ok"):
+            logger.warning("Tello streamon did not return ok; continuing without video")
+        else:
+            self._video_thread = threading.Thread(target=self._video_loop, daemon=True)
+            self._video_thread.start()
+
+        self.connected = True
+        logger.info(f"Tello SDK connected to {self.tello_ip}:{self.command_port}")
+        return True
+
+    def disconnect(self) -> None:
+        """Stop workers and close sockets."""
+        if self.connected:
+            try:
+                self.send_command_no_wait("rc 0 0 0 0")
+            except Exception:
+                pass
+            try:
+                self.send_command("streamoff", retries=0)
+            except Exception:
+                pass
+
+        self._running = False
+
+        if self._state_thread and self._state_thread.is_alive():
+            self._state_thread.join(timeout=1.0)
+        if self._video_thread and self._video_thread.is_alive():
+            self._video_thread.join(timeout=1.0)
+
+        if self._video_capture is not None:
+            self._video_capture.release()
+            self._video_capture = None
+
+        if self._state_socket is not None:
+            self._state_socket.close()
+            self._state_socket = None
+        if self._command_socket is not None:
+            self._command_socket.close()
+            self._command_socket = None
+
+        self.connected = False
+        logger.info("Tello SDK disconnected")
+
+    def send_command(self, command: str, retries: int = 2) -> str:
+        """Send a raw command and wait for a response."""
+        if self._command_socket is None:
+            return "error:not_connected"
+
+        attempt = 0
+        while attempt <= retries:
+            try:
+                with self._command_lock:
+                    self._command_socket.sendto(command.encode("utf-8"), self._tello_addr)
+                    data, _ = self._command_socket.recvfrom(2048)
+                response = data.decode("utf-8", errors="ignore").strip().lower()
+                return response
+            except TimeoutError:
+                attempt += 1
+                logger.warning(f"Tello command timeout [{command}] attempt {attempt}/{retries + 1}")
+            except OSError as exc:
+                logger.error(f"Tello command socket error [{command}]: {exc}")
+                break
+        return "error:timeout"
+
+    def send_command_no_wait(self, command: str) -> bool:
+        """Send a raw command without waiting for an SDK response.
+
+        Useful for high-rate commands like `rc` where waiting for a reply
+        can add control lag and frequent timeout noise.
+        """
+        if self._command_socket is None:
+            return False
+        try:
+            with self._command_lock:
+                self._command_socket.sendto(command.encode("utf-8"), self._tello_addr)
+            return True
+        except OSError as exc:
+            logger.warning(f"Tello non-blocking send failed [{command}]: {exc}")
+            return False
+
+    def enter_sdk_mode(self) -> bool:
+        """Enter SDK command mode."""
+        return self.send_command("command").startswith("ok")
+
+    def takeoff(self) -> bool:
+        """Takeoff command."""
+        response = self.send_command("takeoff")
+        if response.startswith("ok"):
+            return True
+
+        # Some firmware/network conditions drop command ACKs.
+        # Treat timeout as success if telemetry shows we actually climbed.
+        if response == "error:timeout":
+            deadline = time.time() + 4.0
+            while time.time() < deadline:
+                state = self.get_state()
+                if _to_float(state.get("h")) >= 20.0:  # centimeters
+                    logger.info("Takeoff timeout recovered by telemetry height check")
+                    return True
+                time.sleep(0.1)
+
+        return False
+
+    def land(self) -> bool:
+        """Land command."""
+        return self.send_command("land").startswith("ok")
+
+    def emergency(self) -> bool:
+        """Emergency stop command."""
+        return self.send_command("emergency", retries=0).startswith("ok")
+
+    def stop(self) -> bool:
+        """Stop all motion."""
+        return self.send_command("stop", retries=0).startswith("ok")
+
+    def yaw(self, degrees_ccw: float) -> bool:
+        """Rotate in place. Positive values rotate counter-clockwise."""
+        degrees = round(abs(degrees_ccw))
+        if degrees == 0:
+            return True
+        if degrees_ccw > 0:
+            return self.send_command(f"ccw {degrees}").startswith("ok")
+        return self.send_command(f"cw {degrees}").startswith("ok")
+
+    def move_relative(self, x: float, y: float, z: float, speed: float = 0.4) -> bool:
+        """Move relative in meters (x forward, y left, z up)."""
+        x_cm = _clamp(round(x * 100.0), -500, 500)
+        y_right_cm = _clamp(round(-y * 100.0), -500, 500)
+        z_cm = _clamp(round(z * 100.0), -500, 500)
+        speed_cm = _clamp(round(speed * 100.0), 10, 100)
+        return self.send_command(f"go {x_cm} {y_right_cm} {z_cm} {speed_cm}").startswith("ok")
+
+    def send_ext(self, ext_command: str) -> bool:
+        """Send TT expansion-board command through EXT path."""
+        payload = ext_command.strip()
+        if not payload:
+            return False
+        if not payload.lower().startswith("ext "):
+            payload = f"EXT {payload}"
+        return self.send_command(payload).startswith("ok")
+
+    def rc(self, left_right: int, forward_back: int, up_down: int, yaw: int) -> bool:
+        """Send low-level continuous control command in SDK rc units."""
+        lr = _clamp(left_right, -100, 100)
+        fb = _clamp(forward_back, -100, 100)
+        ud = _clamp(up_down, -100, 100)
+        yw = _clamp(yaw, -100, 100)
+        return self.send_command_no_wait(f"rc {lr} {fb} {ud} {yw}")
+
+    def move(self, linear: Vector3, yaw_rate: float = 0.0, duration: float = 0.0) -> bool:
+        """Move from SI units, converted to rc command."""
+        lr = round(-linear.y * self.velocity_scale)
+        fb = round(linear.x * self.velocity_scale)
+        ud = round(linear.z * self.velocity_scale)
+        yw = round(-yaw_rate * self.yaw_scale)
+        ok = self.rc(lr, fb, ud, yw)
+        if duration > 0:
+            time.sleep(duration)
+            self.rc(0, 0, 0, 0)
+        return ok
+
+    def move_twist(self, twist: Twist, duration: float = 0.0, lock_altitude: bool = True) -> bool:
+        """Move from Twist command."""
+        z = 0.0 if lock_altitude else twist.linear.z
+        return self.move(
+            Vector3(twist.linear.x, twist.linear.y, z),
+            yaw_rate=twist.angular.z,
+            duration=duration,
+        )
+
+    def get_state(self) -> TelloState:
+        """Get the latest parsed state packet."""
+        with self._state_lock:
+            return dict(self._latest_state)
+
+    def get_video_frame(self) -> Image | None:
+        """Get latest decoded video frame."""
+        with self._frame_lock:
+            return self._latest_frame
+
+    def telemetry_stream(self) -> Subject[TelloState]:
+        """Stream of parsed state packets."""
+        return self._telemetry_subject
+
+    def status_stream(self) -> Subject[dict[str, Any]]:
+        """Stream of compact status dictionaries."""
+        return self._status_subject
+
+    def odom_stream(self) -> Subject[PoseStamped]:
+        """Stream of odometry estimates from state packets."""
+        return self._odom_subject
+
+    def video_stream(self) -> Subject[Image]:
+        """Stream of decoded camera frames."""
+        return self._video_subject
+
+    def _state_loop(self) -> None:
+        while self._running:
+            if self._state_socket is None:
+                break
+            try:
+                payload, _ = self._state_socket.recvfrom(2048)
+            except TimeoutError:
+                continue
+            except OSError:
+                break
+
+            packet = payload.decode("utf-8", errors="ignore").strip()
+            if not packet:
+                continue
+
+            state = self.parse_state_packet(packet)
+            if not state:
+                continue
+
+            ts = time.time()
+            with self._state_lock:
+                self._latest_state = state
+
+            self._telemetry_subject.on_next(dict(state))
+            self._status_subject.on_next(self._build_status(state, ts))
+            self._publish_odom_from_state(state, ts)
+
+    def _video_loop(self) -> None:
+        bind_host = self.local_ip if self.local_ip else "0.0.0.0"
+        uri = self.video_uri or (
+            f"udp://@{bind_host}:{self.video_port}?overrun_nonfatal=1&fifo_size=50000000"
+        )
+
+        capture = cv2.VideoCapture(uri, cv2.CAP_FFMPEG)
+        if not capture.isOpened():
+            logger.warning(f"Failed to open Tello video stream URI: {uri}")
+            return
+        capture.set(cv2.CAP_PROP_BUFFERSIZE, 1)
+
+        self._video_capture = capture
+        logger.info(f"Tello video stream started from {uri}")
+
+        frame_count = 0
+        no_frame_count = 0
+        while self._running:
+            ok, frame = capture.read()
+            if not ok:
+                no_frame_count += 1
+                time.sleep(0.02)
+                if no_frame_count > 150:
+                    logger.warning("Tello video stalled; reopening capture")
+                    capture.release()
+                    capture = cv2.VideoCapture(uri, cv2.CAP_FFMPEG)
+                    if capture.isOpened():
+                        capture.set(cv2.CAP_PROP_BUFFERSIZE, 1)
+                        no_frame_count = 0
+                continue
+
+            no_frame_count = 0
+            frame_count += 1
+            if frame_count == 1:
+                logger.info(f"First Tello frame received: shape={frame.shape}")
+
+            image = Image.from_opencv(frame, format=ImageFormat.BGR, frame_id="camera_link")
+            with self._frame_lock:
+                self._latest_frame = image
+            self._video_subject.on_next(image)
+
+        capture.release()
+        self._video_capture = None
+        logger.info("Tello video stream stopped")
+
+    def _build_status(self, state: TelloState, ts: float) -> dict[str, Any]:
+        return {
+            "connected": self.connected,
+            "battery": int(_to_float(state.get("bat"), -1.0)),
+            "height_m": _to_float(state.get("h")) / 100.0,
+            "tof_m": _to_float(state.get("tof")) / 100.0,
+            "yaw_deg": _to_float(state.get("yaw")),
+            "pitch_deg": _to_float(state.get("pitch")),
+            "roll_deg": _to_float(state.get("roll")),
+            "temp_low_c": _to_float(state.get("templ")),
+            "temp_high_c": _to_float(state.get("temph")),
+            "flight_time_s": _to_float(state.get("time")),
+            "ts": ts,
+        }
+
+    def _publish_odom_from_state(self, state: TelloState, ts: float) -> None:
+        roll_rad = math.radians(_to_float(state.get("roll")))
+        pitch_rad = math.radians(_to_float(state.get("pitch")))
+        yaw_rad = math.radians(_to_float(state.get("yaw")))
+        quaternion = Quaternion.from_euler(Vector3(roll_rad, pitch_rad, yaw_rad))
+
+        vgx = _to_float(state.get("vgx")) / 100.0
+        vgy_right = _to_float(state.get("vgy")) / 100.0
+        v_left = -vgy_right
+
+        with self._pose_lock:
+            dt = max(0.0, min(ts - self._last_pose_ts, 0.2))
+            self._last_pose_ts = ts
+
+            vx_world = (vgx * math.cos(yaw_rad)) - (v_left * math.sin(yaw_rad))
+            vy_world = (vgx * math.sin(yaw_rad)) + (v_left * math.cos(yaw_rad))
+
+            self._position.x += vx_world * dt
+            self._position.y += vy_world * dt
+            self._position.z = _to_float(state.get("h")) / 100.0
+
+            pose = PoseStamped(
+                position=Vector3(self._position),
+                orientation=quaternion,
+                frame_id="world",
+                ts=ts,
+            )
+
+        self._odom_subject.on_next(pose)
+
+
+__all__ = ["TelloSdkClient", "TelloState"]
diff --git a/docs/usage/README.md b/docs/usage/README.md
index 071b6fc0b2..9a7037b076 100644
--- a/docs/usage/README.md
+++ b/docs/usage/README.md
@@ -7,6 +7,7 @@ This page explains general concepts.
 - [Modules](/docs/usage/modules.md): The primary units of deployment in DimOS, modules run in parallel and are python classes.
 - [Streams](/docs/usage/sensor_streams/README.md): How modules communicate, a Pub / Sub system.
 - [Blueprints](/docs/usage/blueprints.md): a way to group modules together and define their connections to each other.
+- [RoboMaster TT / Tello](/docs/usage/drone_tello_tt.md): Tello SDK blueprints, modules, and compatibility notes.
 - [RPC](/docs/usage/blueprints.md#calling-the-methods-of-other-modules): how one module can call a method on another module (arguments get serialized to JSON-like binary data).
 - [Skills](/docs/usage/blueprints.md#defining-skills): An RPC function, except it can be called by an AI agent (a tool for an AI).
 - Agents: AI that has an objective, access to stream data, and is capable of calling skills as tools.
diff --git a/docs/usage/drone_tello_tt.md b/docs/usage/drone_tello_tt.md
new file mode 100644
index 0000000000..b6c866ce9f
--- /dev/null
+++ b/docs/usage/drone_tello_tt.md
@@ -0,0 +1,136 @@
+# RoboMaster TT / Tello Integration
+
+This page documents the DimOS Tello integration and the compatibility constraints used to keep existing drone functionality intact.
+
+## Overview
+
+DimOS now includes a RoboMaster TT / Tello implementation that runs the main runtime on an edge computer and communicates with the drone over the DJI Tello UDP SDK.
+
+High-level stack:
+
+```text
+DimOS (laptop / edge)
+  -> TelloConnectionModule
+  -> Tello UDP SDK (cmd/state/video)
+  -> Optional EXT commands (TT expansion board)
+
+Video/state
+  -> DroneTrackingModule + CameraModule
+  -> agent / teleop / visualization modules
+```
+
+## New Blueprints
+
+- `drone-tello-tt-basic`
+- `drone-tello-tt-agentic`
+
+List and run:
+
+```bash
+dimos list
+dimos run --robot-ip 192.168.10.1 drone-tello-tt-agentic  --disable web-input
+```
+
+## Hardware + Network Connection Steps
+
+For reliable Tello control plus cloud API access (OpenAI, optional Qwen, etc.), use two network paths:
+
+1. Power on the RoboMaster TT / Tello and wait for SSID `TELLO-XXXXXX`.
+2. Connect your primary Wi-Fi interface to `TELLO-XXXXXX` (this provides the `192.168.10.x` control link).
+3. Keep internet on a second interface:
+   - preferred: wired Ethernet (`eth0`), or
+   - alternate: a second USB Wi-Fi adapter connected to home/office internet.
+4. Verify routes:
+   - Tello subnet exists on the Tello interface: `192.168.10.0/24`
+   - default route points to your internet interface (not the Tello AP).
+5. Verify connectivity:
+   - `ping -c 1 192.168.10.1` (drone reachable)
+   - `curl -I https://api.openai.com` (internet reachable)
+6. Launch DimOS:
+   - `dimos --robot-ip 192.168.10.1 run drone-tello-tt-agentic --disable web-input`
+
+If you only have a single Wi-Fi radio, connecting to Tello usually removes internet access and cloud tool-calls will fail.
+
+## New Modules
+
+- `dimos.robot.drone.tello_sdk.TelloSdkClient`
+  - Raw UDP command channel (`8889`)
+  - State stream (`8890`)
+  - Video stream (`11111`) decode + reconnect handling
+- `dimos.robot.drone.tello_connection_module.TelloConnectionModule`
+  - Skills: `takeoff`, `land`, `move`, `move_relative`, `yaw`, `rc`, `send_ext`, `follow_object`, `center_person_by_yaw`, `orbit_object`, `observe`
+  - Publishes telemetry/status/odom/video and follow-command stream
+
+## Tracking and Follow Behavior
+
+`DroneTrackingModule` now supports both legacy and Tello-optimized behavior.
+
+Compatibility defaults:
+
+- `enable_passive_overlay=False`
+- `use_local_person_detector=False`
+- `force_detection_servoing_for_person=False`
+- `person_follow_policy="legacy_pid"`
+
+These defaults preserve existing `drone-agentic` behavior (Qwen + tracker path) unless a blueprint explicitly opts in.
+
+Tello blueprint opt-in:
+
+- `enable_passive_overlay=True`
+- `use_local_person_detector=True`
+- `force_detection_servoing_for_person=True`
+- `person_follow_policy="yaw_forward_constant"`
+
+This keeps Tello-specific behavior scoped to Tello blueprints.
+
+## Optional Local Detector/GPU Settings
+
+Local YOLO person detection is optional and only used when enabled by module options.
+
+Supported env overrides:
+
+```bash
+export DIMOS_DRONE_YOLO_DEVICE=cuda:0   # or cpu
+export DIMOS_DRONE_YOLO_MODEL=yolo11s-pose.pt
+export DIMOS_DRONE_YOLO_IMGSZ=416
+export DIMOS_DRONE_YOLO_MAX_DET=5
+```
+
+Notes:
+
+- CUDA is not required.
+- If CUDA init fails, tracking falls back to CPU safely.
+- No laptop-specific paths or machine-specific constants are required.
+
+## Files Added
+
+- `dimos/robot/drone/tello_sdk.py`
+- `dimos/robot/drone/tello_connection_module.py`
+- `dimos/robot/drone/blueprints/basic/drone_tello_tt_basic.py`
+- `dimos/robot/drone/blueprints/agentic/drone_tello_tt_agentic.py`
+
+## Files Updated
+
+- `dimos/robot/drone/drone_tracking_module.py`
+  - Added configurable tracking modes for compatibility and Tello-specific behavior
+  - Added optional local detector path and passive overlay
+  - Added yaw-only centering and follow policies
+- `dimos/robot/drone/__init__.py`
+- `dimos/robot/drone/blueprints/__init__.py`
+- `dimos/robot/drone/blueprints/basic/__init__.py`
+- `dimos/robot/drone/blueprints/agentic/__init__.py`
+- `dimos/robot/all_blueprints.py`
+- `dimos/robot/drone/README.md`
+
+## Non-Regression Intent
+
+The integration is designed so legacy drone stacks are unaffected unless they opt into Tello-oriented tracking options.
+
+Validation commands:
+
+```bash
+uv run ruff check dimos/robot/drone/
+uv run mypy dimos/robot/drone/
+python3 -m py_compile dimos/robot/drone/drone_tracking_module.py
+python3 -m py_compile dimos/robot/drone/tello_connection_module.py
+```