diff --git a/data/.lfs/security_detection.png.tar.gz b/data/.lfs/security_detection.png.tar.gz new file mode 100644 index 0000000000..30637471ff --- /dev/null +++ b/data/.lfs/security_detection.png.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7952034063d4216cb03b870ba8f20f51b59883767ee198880d58a5859151775c +size 42747 diff --git a/data/.lfs/security_no_detection.png.tar.gz b/data/.lfs/security_no_detection.png.tar.gz new file mode 100644 index 0000000000..22acd21a2e --- /dev/null +++ b/data/.lfs/security_no_detection.png.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:680467d4219daf29f9211930221b533193591b5d2ca15ff4dbd79cd78203350e +size 14903 diff --git a/dimos/agents/skills/speak_skill.py b/dimos/agents/skills/speak_skill.py index 802aec03d0..c9940b0fb0 100644 --- a/dimos/agents/skills/speak_skill.py +++ b/dimos/agents/skills/speak_skill.py @@ -31,6 +31,8 @@ class SpeakSkill(Module): _tts_node: OpenAITTSNode | None = None _audio_output: SounddeviceAudioOutput | None = None _audio_lock: threading.Lock = threading.Lock() + _bg_threads: list[threading.Thread] = [] + _bg_threads_lock: threading.Lock = threading.Lock() @rpc def start(self) -> None: @@ -41,6 +43,10 @@ def start(self) -> None: @rpc def stop(self) -> None: + with self._bg_threads_lock: + threads = list(self._bg_threads) + for t in threads: + t.join(timeout=10.0) if self._tts_node: self._tts_node.dispose() self._tts_node = None @@ -50,7 +56,7 @@ def stop(self) -> None: super().stop() @skill - def speak(self, text: str) -> str: + def speak(self, text: str, blocking: bool = True) -> str: """Speak text out loud through the robot's speakers. USE THIS TOOL AS OFTEN AS NEEDED. People can't normally see what you say in text, but can hear what you speak. @@ -64,8 +70,32 @@ def speak(self, text: str) -> str: if self._tts_node is None: return "Error: TTS not initialized" + if not blocking: + thread = threading.Thread( + target=self._speak_bg, args=(text,), daemon=True, name="SpeakSkill-bg" + ) + with self._bg_threads_lock: + self._bg_threads.append(thread) + thread.start() + return f"Speaking (non-blocking): {text}" + + return self._speak_blocking(text) + + def _speak_bg(self, text: str) -> None: + try: + self._speak_blocking(text) + finally: + with self._bg_threads_lock: + self._bg_threads = [ + t for t in self._bg_threads if t is not threading.current_thread() + ] + + def _speak_blocking(self, text: str) -> str: # Use lock to prevent simultaneous speech with self._audio_lock: + if self._tts_node is None: + return "Error: TTS not initialized" + text_subject: Subject[str] = Subject() audio_complete = threading.Event() self._tts_node.consume_text(text_subject) diff --git a/dimos/agents/skills/speak_skill_spec.py b/dimos/agents/skills/speak_skill_spec.py new file mode 100644 index 0000000000..02379bbda2 --- /dev/null +++ b/dimos/agents/skills/speak_skill_spec.py @@ -0,0 +1,21 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Protocol + +from dimos.spec.utils import Spec + + +class SpeakSkillSpec(Spec, Protocol): + def speak(self, text: str, blocking: bool = True) -> str: ... diff --git a/dimos/e2e_tests/conftest.py b/dimos/e2e_tests/conftest.py index 4509a7e5e4..f61adcb3d2 100644 --- a/dimos/e2e_tests/conftest.py +++ b/dimos/e2e_tests/conftest.py @@ -125,3 +125,30 @@ def direct_cmd_vel_explorer() -> Generator[PersonTrackPublisher, None, None]: explorer.start() yield explorer explorer.stop() + + +@pytest.fixture +def explore_office( + direct_cmd_vel_explorer: DirectCmdVelExplorer, +) -> Callable[[], None]: + points = [ + (0, -7.07), + (-4.16, -7.07), + (-4.45, 1.10), + (-6.72, 2.87), + (-1.78, 3.01), + (-1.54, 5.74), + (3.88, 6.16), + (2.16, 9.36), + (4.70, 3.87), + (4.67, -7.15), + (4.57, -4.19), + (-0.84, -2.78), + (-4.71, 1.17), + (4.30, 0.87), + ] + + def explore() -> None: + direct_cmd_vel_explorer.follow_points(points) + + return explore diff --git a/dimos/e2e_tests/test_patrol_and_follow.py b/dimos/e2e_tests/test_patrol_and_follow.py index d45fdf2e7f..c19e8bbafe 100644 --- a/dimos/e2e_tests/test_patrol_and_follow.py +++ b/dimos/e2e_tests/test_patrol_and_follow.py @@ -20,24 +20,6 @@ from dimos.e2e_tests.conf_types import StartPersonTrack from dimos.e2e_tests.dimos_cli_call import DimosCliCall from dimos.e2e_tests.lcm_spy import LcmSpy -from dimos.simulation.mujoco.direct_cmd_vel_explorer import DirectCmdVelExplorer - -points = [ - (0, -7.07), - (-4.16, -7.07), - (-4.45, 1.10), - (-6.72, 2.87), - (-1.78, 3.01), - (-1.54, 5.74), - (3.88, 6.16), - (2.16, 9.36), - (4.70, 3.87), - (4.67, -7.15), - (4.57, -4.19), - (-0.84, -2.78), - (-4.71, 1.17), - (4.30, 0.87), -] @pytest.mark.skipif_in_ci @@ -48,7 +30,7 @@ def test_patrol_and_follow( start_blueprint: Callable[[str], DimosCliCall], human_input: Callable[[str], None], start_person_track: StartPersonTrack, - direct_cmd_vel_explorer: DirectCmdVelExplorer, + explore_office: Callable[[], None], ) -> None: start_blueprint( "--mujoco-start-pos", @@ -66,12 +48,7 @@ def test_patrol_and_follow( time.sleep(5) - print("Starting discovery.") - - # Explore the entire room by driving directly via /cmd_vel. - direct_cmd_vel_explorer.follow_points(points) - - print("Ended discovery.") + explore_office() start_person_track( [ diff --git a/dimos/e2e_tests/test_security_module.py b/dimos/e2e_tests/test_security_module.py new file mode 100644 index 0000000000..16c2771381 --- /dev/null +++ b/dimos/e2e_tests/test_security_module.py @@ -0,0 +1,77 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections.abc import Callable +import time + +from dimos_lcm.std_msgs import String +import pytest + +from dimos.e2e_tests.conf_types import StartPersonTrack +from dimos.e2e_tests.dimos_cli_call import DimosCliCall +from dimos.e2e_tests.lcm_spy import LcmSpy + + +@pytest.mark.skipif_in_ci +@pytest.mark.skipif_no_openai +@pytest.mark.mujoco +def test_security_module( + lcm_spy: LcmSpy, + start_blueprint: Callable[[str], DimosCliCall], + human_input: Callable[[str], None], + start_person_track: StartPersonTrack, + explore_office: Callable[[], None], +) -> None: + start_blueprint( + "--mujoco-start-pos", + "-10.75 -6.78", + "--mujoco-camera-position", + "-0.797 0.007 0.468 26.825 88.998 -70.321", + "--nerf-speed", + "0.8", + "--dtop", + "run", + "--disable", + "spatial-memory", + "unitree-go2-security", + ) + + lcm_spy.save_topic("/rpc/McpClient/on_system_modules/res") + lcm_spy.save_topic("/security_state#std_msgs.String") + lcm_spy.wait_for_saved_topic("/rpc/McpClient/on_system_modules/res", timeout=120.0) + + time.sleep(2) + + explore_office() + + start_person_track( + [ + (-10.75, -6.78), + (0, -7.07), + ] + ) + human_input( + "start the security patrol. Just call start_security_patrol. Do not ask me anything." + ) + + def predicate(s: String) -> bool: + return s.data == "FOLLOWING" + + lcm_spy.wait_for_message_result( + "/security_state#std_msgs.String", + String, + predicate, + "Failed to transition to FOLLOWING.", + 360, + ) diff --git a/dimos/experimental/security_demo/conftest.py b/dimos/experimental/security_demo/conftest.py new file mode 100644 index 0000000000..fc5a73825b --- /dev/null +++ b/dimos/experimental/security_demo/conftest.py @@ -0,0 +1,87 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import pytest + +from dimos.experimental.security_demo.security_module import SecurityModule +from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo +from dimos.msgs.sensor_msgs.Image import Image +from dimos.perception.detection.detectors.yolo import Yolo2DDetector +from dimos.perception.detection.type.detection2d.bbox import Detection2DBBox +from dimos.utils.data import get_data + + +@pytest.fixture(scope="session") +def yolo_detector(): + detector = Yolo2DDetector(device="cpu") + yield detector + detector.stop() + + +@pytest.fixture(scope="session") +def person_image(): + return Image.from_file(get_data("security_detection.png")) + + +@pytest.fixture(scope="session") +def empty_image(): + return Image.from_file(get_data("security_no_detection.png")) + + +@pytest.fixture() +def security_module(mocker): + mocker.patch("dimos.experimental.security_demo.security_module._create_router") + mocker.patch("dimos.experimental.security_demo.security_module._create_visual_servo") + mocker.patch("dimos.experimental.security_demo.security_module.YoloPersonDetector") + mocker.patch("dimos.experimental.security_demo.security_module.EdgeTAMProcessor") + + module = SecurityModule(camera_info=CameraInfo()) + + # Replace output streams with mocks for test assertions + module.detection = mocker.MagicMock() + module.security_state = mocker.MagicMock() + module.goal_request = mocker.MagicMock() + module.cmd_vel = mocker.MagicMock() + + # These are set by framework wiring, not __init__ + module._planner_spec = mocker.MagicMock() + module._speak_skill = mocker.MagicMock() + + yield module + + module.stop() + + +@pytest.fixture() +def make_detection(person_image): + def _make( + bbox=(100.0, 50.0, 300.0, 400.0), + track_id=1, + class_id=0, + confidence=0.9, + name="person", + ): + return Detection2DBBox( + bbox=bbox, + track_id=track_id, + class_id=class_id, + confidence=confidence, + name=name, + ts=0.0, + image=person_image, + ) + + return _make diff --git a/dimos/experimental/security_demo/depth_estimator.py b/dimos/experimental/security_demo/depth_estimator.py new file mode 100644 index 0000000000..a7b9c6077f --- /dev/null +++ b/dimos/experimental/security_demo/depth_estimator.py @@ -0,0 +1,94 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from collections.abc import Callable +import threading + +import numpy as np +from PIL import Image as PILImage +import torch +from transformers import AutoImageProcessor, AutoModelForDepthEstimation + +from dimos.msgs.sensor_msgs.Image import Image, ImageFormat + +_DEPTH_MODEL_NAME = "depth-anything/Depth-Anything-V2-Small-hf" +_DEPTH_MAX_WIDTH = 640 + + +class DepthEstimator: + """Runs depth estimation in a background thread, always processing only the latest image.""" + + def __init__(self, publish: Callable[[Image], None]) -> None: + self._publish = publish + self._processor = AutoImageProcessor.from_pretrained(_DEPTH_MODEL_NAME) + self._model = AutoModelForDepthEstimation.from_pretrained(_DEPTH_MODEL_NAME).to("cuda") + self._latest: Image | None = None + self._event = threading.Event() + self._stop = threading.Event() + self._thread: threading.Thread | None = None + + def start(self) -> None: + self._stop.clear() + self._thread = threading.Thread(target=self._loop, daemon=True, name="DepthEstimator") + self._thread.start() + + def stop(self) -> None: + self._stop.set() + self._event.set() + if self._thread is not None: + self._thread.join(timeout=5.0) + self._thread = None + + def submit(self, image: Image) -> None: + """Submit a new image; any unprocessed previous image is discarded.""" + self._latest = image + self._event.set() + + def _loop(self) -> None: + while not self._stop.is_set(): + self._event.wait() + self._event.clear() + if self._stop.is_set(): + break + image = self._latest + if image is not None: + self._process(image) + + def _process(self, image: Image) -> None: + rgb = image.to_rgb() + pil_image = PILImage.fromarray(rgb.data) + if pil_image.width > _DEPTH_MAX_WIDTH: + scale = _DEPTH_MAX_WIDTH / pil_image.width + new_h = int(pil_image.height * scale) + pil_image = pil_image.resize((_DEPTH_MAX_WIDTH, new_h), PILImage.Resampling.BILINEAR) + inputs = self._processor(images=pil_image, return_tensors="pt").to("cuda") + + with torch.no_grad(): + outputs = self._model(**inputs) + + depth = torch.nn.functional.interpolate( + outputs.predicted_depth.unsqueeze(1), + size=(image.height, image.width), + mode="bicubic", + align_corners=False, + ).squeeze() + + depth_np = depth.cpu().numpy().astype(np.float32) + self._publish( + Image.from_numpy( + depth_np, format=ImageFormat.DEPTH, frame_id=image.frame_id, ts=image.ts + ) + ) diff --git a/dimos/experimental/security_demo/security_module.py b/dimos/experimental/security_demo/security_module.py new file mode 100644 index 0000000000..d28d33ddaf --- /dev/null +++ b/dimos/experimental/security_demo/security_module.py @@ -0,0 +1,401 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations # noqa: I001 + +import threading +import time +from typing import TYPE_CHECKING, Any, Literal + +import cv2 +import numpy as np +import torch +from dimos_lcm.std_msgs import String, Bool +from reactivex.disposable import Disposable + +from dimos.agents.annotation import skill +from dimos.experimental.security_demo.depth_estimator import DepthEstimator +from dimos.core.core import rpc +from dimos.core.global_config import GlobalConfig +from dimos.core.module import Module, ModuleConfig +from dimos.core.stream import In, Out +from dimos.models.segmentation.edge_tam import EdgeTAMProcessor +from dimos.perception.detection.detectors.person.yolo import YoloPersonDetector +from dimos.perception.detection.type.detection2d.person import Detection2DPerson +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.geometry_msgs.Twist import Twist +from dimos.msgs.nav_msgs.OccupancyGrid import OccupancyGrid +from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo +from dimos.msgs.sensor_msgs.Image import Image, ImageFormat +from dimos.navigation.patrolling.create_patrol_router import create_patrol_router +from dimos.navigation.patrolling.routers.patrol_router import PatrolRouter +from dimos.agents.skills.speak_skill_spec import SpeakSkillSpec +from dimos.navigation.replanning_a_star.module_spec import ReplanningAStarPlannerSpec +from dimos.navigation.visual_servoing.visual_servoing_2d import VisualServoing2D +from dimos.perception.common.utils import draw_bounding_box +from dimos.utils.logging_config import setup_logger + +if TYPE_CHECKING: + from dimos.perception.detection.type.detection2d.bbox import Detection2DBBox + +logger = setup_logger() + + +# COCO skeleton connections for drawing +_SKELETON_CONNECTIONS = [ + (0, 1), + (0, 2), + (1, 3), + (2, 4), # face + (5, 6), # shoulders + (5, 7), + (7, 9), # left arm + (6, 8), + (8, 10), # right arm + (5, 11), + (6, 12), + (11, 12), # torso + (11, 13), + (13, 15), # left leg + (12, 14), + (14, 16), # right leg +] + +_KP_CONF_THRESHOLD = 0.3 + + +def _draw_skeleton( + image: np.ndarray, # type: ignore[type-arg] + person: Detection2DPerson, + joint_color: tuple[int, int, int] = (0, 255, 0), + bone_color: tuple[int, int, int] = (255, 255, 0), +) -> None: + """Draw pose skeleton directly on *image* (in-place, BGR assumed).""" + kps = person.keypoints # (17, 2) + scores = person.keypoint_scores # (17,) + + # Draw bones first so joints are drawn on top + for i, j in _SKELETON_CONNECTIONS: + if scores[i] > _KP_CONF_THRESHOLD and scores[j] > _KP_CONF_THRESHOLD: + pt1 = (int(kps[i][0]), int(kps[i][1])) + pt2 = (int(kps[j][0]), int(kps[j][1])) + cv2.line(image, pt1, pt2, bone_color, 2, cv2.LINE_AA) + + # Draw joints + for idx in range(len(kps)): + if scores[idx] > _KP_CONF_THRESHOLD: + cx, cy = int(kps[idx][0]), int(kps[idx][1]) + cv2.circle(image, (cx, cy), 4, joint_color, -1, cv2.LINE_AA) + + +State = Literal["IDLE", "PATROLLING", "FOLLOWING"] + + +class SecurityModuleConfig(ModuleConfig): + camera_info: CameraInfo + follow_frequency: float = 20.0 + + +def _create_router(global_config: GlobalConfig) -> PatrolRouter: + clearance_multiplier = 0.5 + clearance_radius_m: float = global_config.robot_width * clearance_multiplier + return create_patrol_router("coverage", clearance_radius_m) + + +def _create_visual_servo( + config: SecurityModuleConfig, global_config: GlobalConfig +) -> VisualServoing2D: + camera_info = config.camera_info + if global_config.simulation: + from dimos.robot.unitree.mujoco_connection import MujocoConnection + + camera_info = MujocoConnection.camera_info_static + + return VisualServoing2D(camera_info, global_config.simulation) + + +class SecurityModule(Module[SecurityModuleConfig]): + """Integrated security patrol module. + + Manages the full patrol-detect-follow state machine internally, + eliminating agent round-trips between separate modules. + """ + + default_config = SecurityModuleConfig + + odom: In[PoseStamped] + global_costmap: In[OccupancyGrid] + goal_reached: In[Bool] + color_image: In[Image] + depth_image: Out[Image] + detection: Out[Image] + tracking_image: Out[Image] + security_state: Out[String] + + goal_request: Out[PoseStamped] + cmd_vel: Out[Twist] + + _planner_spec: ReplanningAStarPlannerSpec + _speak_skill: SpeakSkillSpec + + def __init__(self, **kwargs: Any) -> None: + super().__init__(**kwargs) + + self._router: PatrolRouter = _create_router(self.config.g) + self._visual_servo = _create_visual_servo(self.config, self.config.g) + self._detector = YoloPersonDetector() + self._tracker = EdgeTAMProcessor() + + self._depth_estimator: DepthEstimator | None = None + if torch.cuda.is_available(): + self._depth_estimator = DepthEstimator(self.depth_image.publish) + + self._lock = threading.RLock() + self._stop_event = threading.Event() + self._goal_reached_event = threading.Event() + self._main_thread: threading.Thread | None = None + self._state: State = "IDLE" + self._latest_pose: PoseStamped | None = None + self._latest_image: Image | None = None + self._has_active_goal = False + + @rpc + def start(self) -> None: + super().start() + self._disposables.add(Disposable(self.odom.subscribe(self._on_odom))) + self._disposables.add( + Disposable(self.global_costmap.subscribe(self._router.handle_occupancy_grid)) + ) + self._disposables.add(Disposable(self.goal_reached.subscribe(self._on_goal_reached))) + self._disposables.add(Disposable(self.color_image.subscribe(self._on_color_image))) + + if self._depth_estimator is not None: + self._depth_estimator.start() + + @rpc + def stop(self) -> None: + self._stop_security_patrol_internal() + if self._depth_estimator is not None: + self._depth_estimator.stop() + self._detector.stop() + self._tracker.stop() + super().stop() + + @skill + def start_security_patrol(self) -> str: + """ + Start the security patrol behavior. The robot will patrol, detect + persons visually and then follow them automatically. + """ + with self._lock: + if self._main_thread is not None and self._main_thread.is_alive(): + return "Security patrol is already running. Use `stop_security_patrol` to stop." + + self._router.reset() + + self._planner_spec.set_replanning_enabled(False) + self._planner_spec.set_safe_goal_clearance(self.config.g.robot_rotation_diameter / 2 + 0.2) + + self._stop_event.clear() + self._has_active_goal = False + + self._main_thread = threading.Thread( + target=self._main_loop, daemon=True, name=f"{self.__class__.__name__}-main" + ) + self._main_thread.start() + + return ( + "Security patrol started. The robot will patrol, detect, and follow " + "persons automatically. Use `stop_security_patrol` to stop." + ) + + @skill + def stop_security_patrol(self) -> str: + """Stop the security patrol behavior entirely.""" + self._stop_security_patrol_internal() + return "Security patrol stopped." + + def _on_odom(self, msg: PoseStamped) -> None: + with self._lock: + self._latest_pose = msg + self._router.handle_odom(msg) + + def _on_goal_reached(self, _msg: Bool) -> None: + self._goal_reached_event.set() + + def _on_color_image(self, image: Image) -> None: + with self._lock: + self._latest_image = image + if self._depth_estimator is not None: + self._depth_estimator.submit(image) + + def _main_loop(self) -> None: + self._transition_to("PATROLLING") + + while not self._stop_event.is_set(): + with self._lock: + state: State = self._state + match state: + case "PATROLLING": + self._patrol_step() + case "FOLLOWING": + self._follow_step() + + self.cmd_vel.publish(Twist.zero()) + self._transition_to("IDLE") + + def _patrol_step(self) -> None: + """Send patrol goals and run detection in a single non-blocking step.""" + if not self._has_active_goal: + goal = self._router.next_goal() + if goal is None: + # TODO: Fix this + # ######################################################################################### + # ######################################################################################### + # ######################################################################################### + # ######################################################################################### + # ######################################################################################### + # ######################################################################################### + # ######################################################################################### + # ######################################################################################### + logger.info("no patrol goal available, retrying in 2s") + self._stop_event.wait(timeout=2.0) + return + self._goal_reached_event.clear() + self.goal_request.publish(goal) + self._has_active_goal = True + + if self._goal_reached_event.is_set(): + self._goal_reached_event.clear() + self._has_active_goal = False + + with self._lock: + image = self._latest_image + if image is None: + self._stop_event.wait(timeout=0.01) + return + + best = self._find_best_person(image) + if best is None: + return + + logger.info( + "Detection", + best_bbox=best.bbox, + confidence=f"{best.confidence:.2f}", + area=f"{best.bbox_2d_volume():.0f}px", + ) + + annotated = draw_bounding_box( + image.data.copy(), + list(best.bbox), + label=best.name, + confidence=best.confidence, + ) + if isinstance(best, Detection2DPerson): + _draw_skeleton(annotated, best) + self.detection.publish(Image.from_numpy(annotated, format=image.format)) + + # Init EdgeTAM with YOLO bbox for continuous tracking + box = np.array(list(best.bbox), dtype=np.float32) + self._tracker.init_track(image=image, box=box, obj_id=1) + + self._cancel_current_goal() + self._has_active_goal = False + self._speak_skill.speak("Intruder detected", blocking=False) + self._transition_to("FOLLOWING") + + def _follow_step(self) -> None: + """One iteration of the follow loop (EdgeTAM track + servo + publish).""" + with self._lock: + latest_image = self._latest_image + + if latest_image is None: + self._stop_event.wait(timeout=0.01) + return + + detections = self._tracker.process_image(latest_image) + + if len(detections) == 0: + self.cmd_vel.publish(Twist.zero()) + self._speak_skill.speak("Lost sight of intruder, resuming patrol", blocking=False) + self._router.reset() + self._has_active_goal = False + self._transition_to("PATROLLING") + return + + best = max(detections.detections, key=lambda d: d.bbox_2d_volume()) + twist = self._visual_servo.compute_twist(best.bbox, latest_image.width) + self.cmd_vel.publish(twist) + + overlay = latest_image.data.copy() + if hasattr(best, "mask") and best.mask is not None: + mask_bool = best.mask > 0 + green = np.zeros_like(overlay) + green[:, :] = (0, 255, 0) if latest_image.format == ImageFormat.BGR else (0, 128, 0) + overlay[mask_bool] = cv2.addWeighted(overlay[mask_bool], 0.6, green[mask_bool], 0.4, 0) + + # Run pose estimation on the tracked frame and draw skeleton + pose_detections = self._detector.process_image(latest_image) + persons = [ + d + for d in pose_detections.detections + if isinstance(d, Detection2DPerson) and d.is_valid() + ] + for person in persons: + _draw_skeleton(overlay, person) + + self.tracking_image.publish( + Image.from_numpy(overlay, format=latest_image.format, ts=latest_image.ts) + ) + + time.sleep(1.0 / self.config.follow_frequency) + + def _find_best_person(self, image: Image) -> Detection2DBBox | None: + """Run YOLO and return the largest person detection, or None.""" + all_detections = self._detector.process_image(image) + persons = [d for d in all_detections.detections if d.name == "person"] + if not persons: + return None + return max(persons, key=lambda d: d.bbox_2d_volume()) + + def _cancel_current_goal(self) -> None: + """Publish current pose as goal to cancel in-progress navigation.""" + with self._lock: + pose = self._latest_pose + if pose is not None: + self.goal_request.publish(pose) + + def _transition_to(self, new_state: State) -> None: + with self._lock: + old = self._state + self._state = new_state + + logger.info("state transition", old=old, new=new_state) + self.security_state.publish(String(new_state)) + + def _stop_security_patrol_internal(self) -> None: + self._stop_event.set() + + self._planner_spec.set_replanning_enabled(True) + self._planner_spec.reset_safe_goal_clearance() + + self._cancel_current_goal() + + with self._lock: + thread = self._main_thread + if thread is not None: + thread.join(timeout=5.0) + with self._lock: + self._main_thread = None diff --git a/dimos/experimental/security_demo/test_security_module.py b/dimos/experimental/security_demo/test_security_module.py new file mode 100644 index 0000000000..a09a4eb85b --- /dev/null +++ b/dimos/experimental/security_demo/test_security_module.py @@ -0,0 +1,164 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import threading + +from dimos.msgs.geometry_msgs.PoseStamped import PoseStamped +from dimos.msgs.geometry_msgs.Twist import Twist +from dimos.perception.detection.type.detection2d.imageDetections2D import ImageDetections2D + + +def test_find_best_person_detects_person(security_module, yolo_detector, person_image): + security_module._detector = yolo_detector + + result = security_module._find_best_person(person_image) + + assert result is not None + assert result.name == "person" + assert result.bbox_2d_volume() > 0 + + +def test_find_best_person_returns_none_for_empty_scene(security_module, yolo_detector, empty_image): + security_module._detector = yolo_detector + + result = security_module._find_best_person(empty_image) + + assert result is None + + +def test_patrol_step_transitions_to_following_on_detection( + security_module, person_image, make_detection, mocker +): + module = security_module + module._state = "PATROLLING" + module._has_active_goal = True + module._latest_image = person_image + module._latest_pose = PoseStamped(position=[1, 2, 0], orientation=[0, 0, 0, 1]) + + det = make_detection() + module._detector.process_image.return_value = ImageDetections2D( + image=person_image, detections=[det] + ) + # patch to avoid cv2 dep issues + mocker.patch( + "dimos.experimental.security_demo.security_module.draw_bounding_box", + return_value=person_image.data.copy(), + ) + + module._patrol_step() + + assert module._state == "FOLLOWING" + last_state = module.security_state.publish.call_args[0][0] + assert last_state.data == "FOLLOWING" + module._speak_skill.speak.assert_called_with("Intruder detected", blocking=False) + module.goal_request.publish.assert_called() # goal cancellation + module.detection.publish.assert_called() + assert module._has_active_goal is False + assert module._tracker is not None + module._tracker.init_track.assert_called_once() + + +def test_patrol_step_requests_goal_when_no_active_goal(security_module): + module = security_module + module._state = "PATROLLING" + module._has_active_goal = False + module._latest_image = None # causes early return after goal logic + + goal = PoseStamped(position=[5, 5, 0], orientation=[0, 0, 0, 1]) + module._router.next_goal.return_value = goal + + module._patrol_step() + + module.goal_request.publish.assert_called_once_with(goal) + assert module._has_active_goal is True + + +def test_follow_step_publishes_twist_when_tracking( + security_module, person_image, make_detection, mocker +): + module = security_module + module._state = "FOLLOWING" + module._latest_image = person_image + + bbox = (100.0, 50.0, 300.0, 400.0) + det = make_detection(bbox=bbox) + module._tracker.process_image.return_value = ImageDetections2D( + image=person_image, detections=[det] + ) + + twist = Twist(linear=[0.3, 0, 0], angular=[0, 0, 0.1]) + module._visual_servo.compute_twist.return_value = twist + + mocker.patch("dimos.experimental.security_demo.security_module.time.sleep") + + module._follow_step() + + module._visual_servo.compute_twist.assert_called_once_with(bbox, person_image.width) + module.cmd_vel.publish.assert_called_once_with(twist) + assert module._state == "FOLLOWING" + + +def test_follow_step_transitions_to_patrolling_on_person_lost(security_module, person_image): + module = security_module + module._state = "FOLLOWING" + module._latest_image = person_image + + module._tracker.process_image.return_value = ImageDetections2D( + image=person_image, detections=[] + ) + + module._follow_step() + + # Zero twist should be published to stop the robot + published_twist = module.cmd_vel.publish.call_args[0][0] + assert published_twist.is_zero() + + module._speak_skill.speak.assert_called_with( + "Lost sight of intruder, resuming patrol", blocking=False + ) + module._router.reset.assert_called_once() + assert module._state == "PATROLLING" + assert module._has_active_goal is False + + +def test_main_loop_stops_cleanly(security_module): + module = security_module + + call_count = 0 + + def patrol_side_effect(): + nonlocal call_count + call_count += 1 + if call_count >= 1: + module._stop_event.set() + + module._patrol_step = patrol_side_effect + + thread = threading.Thread(target=module._main_loop, daemon=True) + thread.start() + thread.join(timeout=5.0) + + assert not thread.is_alive(), "main_loop thread did not stop" + assert module._state == "IDLE" + + # Verify zero twist published on shutdown + last_twist = module.cmd_vel.publish.call_args[0][0] + assert last_twist.is_zero() + + # Verify state transitions: PATROLLING then IDLE + state_values = [call.args[0].data for call in module.security_state.publish.call_args_list] + assert "PATROLLING" in state_values + assert "IDLE" in state_values diff --git a/dimos/robot/all_blueprints.py b/dimos/robot/all_blueprints.py index 5910093d61..0498b77c75 100644 --- a/dimos/robot/all_blueprints.py +++ b/dimos/robot/all_blueprints.py @@ -84,6 +84,7 @@ "unitree-go2-detection": "dimos.robot.unitree.go2.blueprints.smart.unitree_go2_detection:unitree_go2_detection", "unitree-go2-fleet": "dimos.robot.unitree.go2.blueprints.basic.unitree_go2_fleet:unitree_go2_fleet", "unitree-go2-ros": "dimos.robot.unitree.go2.blueprints.smart.unitree_go2_ros:unitree_go2_ros", + "unitree-go2-security": "dimos.robot.unitree.go2.blueprints.agentic.unitree_go2_security:unitree_go2_security", "unitree-go2-spatial": "dimos.robot.unitree.go2.blueprints.smart.unitree_go2_spatial:unitree_go2_spatial", "unitree-go2-temporal-memory": "dimos.robot.unitree.go2.blueprints.agentic.unitree_go2_temporal_memory:unitree_go2_temporal_memory", "unitree-go2-vlm-stream-test": "dimos.robot.unitree.go2.blueprints.smart.unitree_go2_vlm_stream_test:unitree_go2_vlm_stream_test", @@ -98,79 +99,80 @@ all_modules = { - "arm-teleop-module": "dimos.teleop.quest.quest_extensions", - "b-box-navigation-module": "dimos.navigation.bbox_navigation", - "b1-connection-module": "dimos.robot.unitree.b1.connection", - "camera-module": "dimos.hardware.sensors.camera.module", - "cartesian-motion-controller": "dimos.manipulation.control.servo_control.cartesian_motion_controller", - "control-coordinator": "dimos.control.coordinator", - "cost-mapper": "dimos.mapping.costmapper", - "demo-calculator-skill": "dimos.agents.skills.demo_calculator_skill", - "demo-robot": "dimos.agents.skills.demo_robot", - "detection2-d-module": "dimos.perception.detection.module2D", - "detection3-d-module": "dimos.perception.detection.module3D", - "drone-camera-module": "dimos.robot.drone.camera_module", - "drone-connection-module": "dimos.robot.drone.connection_module", - "drone-tracking-module": "dimos.robot.drone.drone_tracking_module", - "embedding-memory": "dimos.memory.embedding", - "emitter-module": "dimos.utils.demo_image_encoding", - "fast-lio2": "dimos.hardware.sensors.lidar.fastlio2.module", - "foxglove-bridge": "dimos.robot.foxglove_bridge", - "g1-connection": "dimos.robot.unitree.g1.connection", - "g1-connection-base": "dimos.robot.unitree.g1.connection", - "g1-sim-connection": "dimos.robot.unitree.g1.sim", - "go2-connection": "dimos.robot.unitree.go2.connection", - "go2-fleet-connection": "dimos.robot.unitree.go2.fleet_connection", - "google-maps-skill-container": "dimos.agents.skills.google_maps_skill_container", - "gps-nav-skill-container": "dimos.agents.skills.gps_nav_skill", - "grasp-gen-module": "dimos.manipulation.grasping.graspgen_module", - "grasping-module": "dimos.manipulation.grasping.grasping", - "gstreamer-camera-module": "dimos.hardware.sensors.camera.gstreamer.gstreamer_camera", - "joint-trajectory-controller": "dimos.manipulation.control.trajectory_controller.joint_trajectory_controller", - "joystick-module": "dimos.robot.unitree.b1.joystick_module", - "keyboard-teleop": "dimos.robot.unitree.keyboard_teleop", - "keyboard-teleop-module": "dimos.teleop.keyboard.keyboard_teleop_module", - "manipulation-module": "dimos.manipulation.manipulation_module", - "map": "dimos.robot.unitree.type.map", - "mcp-client": "dimos.agents.mcp.mcp_client", - "mcp-server": "dimos.agents.mcp.mcp_server", - "mock-b1-connection-module": "dimos.robot.unitree.b1.connection", - "module-a": "dimos.robot.unitree.demo_error_on_name_conflicts", - "module-b": "dimos.robot.unitree.demo_error_on_name_conflicts", - "navigation-module": "dimos.robot.unitree.rosnav", - "navigation-skill-container": "dimos.agents.skills.navigation", - "object-db-module": "dimos.perception.detection.moduleDB", - "object-scene-registration-module": "dimos.perception.object_scene_registration", - "object-tracker2-d": "dimos.perception.object_tracker_2d", - "object-tracker3-d": "dimos.perception.object_tracker_3d", - "object-tracking": "dimos.perception.object_tracker", - "osm-skill": "dimos.agents.skills.osm", - "patrolling-module": "dimos.navigation.patrolling.module", - "perceive-loop-skill": "dimos.perception.perceive_loop_skill", - "person-follow-skill-container": "dimos.agents.skills.person_follow", - "person-tracker": "dimos.perception.detection.person_tracker", - "phone-teleop-module": "dimos.teleop.phone.phone_teleop_module", - "pick-and-place-module": "dimos.manipulation.pick_and_place_module", - "quest-teleop-module": "dimos.teleop.quest.quest_teleop_module", - "real-sense-camera": "dimos.hardware.sensors.camera.realsense.camera", - "receiver-module": "dimos.utils.demo_image_encoding", - "reid-module": "dimos.perception.detection.reid.module", - "replanning-a-star-planner": "dimos.navigation.replanning_a_star.module", - "rerun-bridge-module": "dimos.visualization.rerun.bridge", - "ros-nav": "dimos.navigation.rosnav", - "simple-phone-teleop": "dimos.teleop.phone.phone_extensions", - "spatial-memory": "dimos.perception.spatial_perception", - "speak-skill": "dimos.agents.skills.speak_skill", - "temporal-memory": "dimos.perception.experimental.temporal_memory.temporal_memory", - "twist-teleop-module": "dimos.teleop.quest.quest_extensions", - "unitree-g1-skill-container": "dimos.robot.unitree.g1.skill_container", - "unitree-skill-container": "dimos.robot.unitree.unitree_skill_container", - "unity-bridge-module": "dimos.simulation.unity.module", - "vlm-agent": "dimos.agents.vlm_agent", - "vlm-stream-tester": "dimos.agents.vlm_stream_tester", - "voxel-grid-mapper": "dimos.mapping.voxels", - "wavefront-frontier-explorer": "dimos.navigation.frontier_exploration.wavefront_frontier_goal_selector", - "web-input": "dimos.agents.web_human_input", - "websocket-vis-module": "dimos.web.websocket_vis.websocket_vis_module", - "zed-camera": "dimos.hardware.sensors.camera.zed.camera", + "arm-teleop-module": "dimos.teleop.quest.quest_extensions.ArmTeleopModule", + "b-box-navigation-module": "dimos.navigation.bbox_navigation.BBoxNavigationModule", + "b1-connection-module": "dimos.robot.unitree.b1.connection.B1ConnectionModule", + "camera-module": "dimos.hardware.sensors.camera.module.CameraModule", + "cartesian-motion-controller": "dimos.manipulation.control.servo_control.cartesian_motion_controller.CartesianMotionController", + "control-coordinator": "dimos.control.coordinator.ControlCoordinator", + "cost-mapper": "dimos.mapping.costmapper.CostMapper", + "demo-calculator-skill": "dimos.agents.skills.demo_calculator_skill.DemoCalculatorSkill", + "demo-robot": "dimos.agents.skills.demo_robot.DemoRobot", + "detection2-d-module": "dimos.perception.detection.module2D.Detection2DModule", + "detection3-d-module": "dimos.perception.detection.module3D.Detection3DModule", + "drone-camera-module": "dimos.robot.drone.camera_module.DroneCameraModule", + "drone-connection-module": "dimos.robot.drone.connection_module.DroneConnectionModule", + "drone-tracking-module": "dimos.robot.drone.drone_tracking_module.DroneTrackingModule", + "embedding-memory": "dimos.memory.embedding.EmbeddingMemory", + "emitter-module": "dimos.utils.demo_image_encoding.EmitterModule", + "fast-lio2": "dimos.hardware.sensors.lidar.fastlio2.module.FastLio2", + "foxglove-bridge": "dimos.robot.foxglove_bridge.FoxgloveBridge", + "g1-connection": "dimos.robot.unitree.g1.connection.G1Connection", + "g1-connection-base": "dimos.robot.unitree.g1.connection.G1ConnectionBase", + "g1-sim-connection": "dimos.robot.unitree.g1.sim.G1SimConnection", + "go2-connection": "dimos.robot.unitree.go2.connection.GO2Connection", + "go2-fleet-connection": "dimos.robot.unitree.go2.fleet_connection.Go2FleetConnection", + "google-maps-skill-container": "dimos.agents.skills.google_maps_skill_container.GoogleMapsSkillContainer", + "gps-nav-skill-container": "dimos.agents.skills.gps_nav_skill.GpsNavSkillContainer", + "grasp-gen-module": "dimos.manipulation.grasping.graspgen_module.GraspGenModule", + "grasping-module": "dimos.manipulation.grasping.grasping.GraspingModule", + "gstreamer-camera-module": "dimos.hardware.sensors.camera.gstreamer.gstreamer_camera.GstreamerCameraModule", + "joint-trajectory-controller": "dimos.manipulation.control.trajectory_controller.joint_trajectory_controller.JointTrajectoryController", + "joystick-module": "dimos.robot.unitree.b1.joystick_module.JoystickModule", + "keyboard-teleop": "dimos.robot.unitree.keyboard_teleop.KeyboardTeleop", + "keyboard-teleop-module": "dimos.teleop.keyboard.keyboard_teleop_module.KeyboardTeleopModule", + "manipulation-module": "dimos.manipulation.manipulation_module.ManipulationModule", + "map": "dimos.robot.unitree.type.map.Map", + "mcp-client": "dimos.agents.mcp.mcp_client.McpClient", + "mcp-server": "dimos.agents.mcp.mcp_server.McpServer", + "mock-b1-connection-module": "dimos.robot.unitree.b1.connection.MockB1ConnectionModule", + "module-a": "dimos.robot.unitree.demo_error_on_name_conflicts.ModuleA", + "module-b": "dimos.robot.unitree.demo_error_on_name_conflicts.ModuleB", + "navigation-module": "dimos.robot.unitree.rosnav.NavigationModule", + "navigation-skill-container": "dimos.agents.skills.navigation.NavigationSkillContainer", + "object-db-module": "dimos.perception.detection.moduleDB.ObjectDBModule", + "object-scene-registration-module": "dimos.perception.object_scene_registration.ObjectSceneRegistrationModule", + "object-tracker2-d": "dimos.perception.object_tracker_2d.ObjectTracker2D", + "object-tracker3-d": "dimos.perception.object_tracker_3d.ObjectTracker3D", + "object-tracking": "dimos.perception.object_tracker.ObjectTracking", + "osm-skill": "dimos.agents.skills.osm.OsmSkill", + "patrolling-module": "dimos.navigation.patrolling.module.PatrollingModule", + "perceive-loop-skill": "dimos.perception.perceive_loop_skill.PerceiveLoopSkill", + "person-follow-skill-container": "dimos.agents.skills.person_follow.PersonFollowSkillContainer", + "person-tracker": "dimos.perception.detection.person_tracker.PersonTracker", + "phone-teleop-module": "dimos.teleop.phone.phone_teleop_module.PhoneTeleopModule", + "pick-and-place-module": "dimos.manipulation.pick_and_place_module.PickAndPlaceModule", + "quest-teleop-module": "dimos.teleop.quest.quest_teleop_module.QuestTeleopModule", + "real-sense-camera": "dimos.hardware.sensors.camera.realsense.camera.RealSenseCamera", + "receiver-module": "dimos.utils.demo_image_encoding.ReceiverModule", + "reid-module": "dimos.perception.detection.reid.module.ReidModule", + "replanning-a-star-planner": "dimos.navigation.replanning_a_star.module.ReplanningAStarPlanner", + "rerun-bridge-module": "dimos.visualization.rerun.bridge.RerunBridgeModule", + "ros-nav": "dimos.navigation.rosnav.ROSNav", + "security-module": "dimos.experimental.security_demo.security_module.SecurityModule", + "simple-phone-teleop": "dimos.teleop.phone.phone_extensions.SimplePhoneTeleop", + "spatial-memory": "dimos.perception.spatial_perception.SpatialMemory", + "speak-skill": "dimos.agents.skills.speak_skill.SpeakSkill", + "temporal-memory": "dimos.perception.experimental.temporal_memory.temporal_memory.TemporalMemory", + "twist-teleop-module": "dimos.teleop.quest.quest_extensions.TwistTeleopModule", + "unitree-g1-skill-container": "dimos.robot.unitree.g1.skill_container.UnitreeG1SkillContainer", + "unitree-skill-container": "dimos.robot.unitree.unitree_skill_container.UnitreeSkillContainer", + "unity-bridge-module": "dimos.simulation.unity.module.UnityBridgeModule", + "vlm-agent": "dimos.agents.vlm_agent.VLMAgent", + "vlm-stream-tester": "dimos.agents.vlm_stream_tester.VlmStreamTester", + "voxel-grid-mapper": "dimos.mapping.voxels.VoxelGridMapper", + "wavefront-frontier-explorer": "dimos.navigation.frontier_exploration.wavefront_frontier_goal_selector.WavefrontFrontierExplorer", + "web-input": "dimos.agents.web_human_input.WebInput", + "websocket-vis-module": "dimos.web.websocket_vis.websocket_vis_module.WebsocketVisModule", + "zed-camera": "dimos.hardware.sensors.camera.zed.camera.ZEDCamera", } diff --git a/dimos/robot/get_all_blueprints.py b/dimos/robot/get_all_blueprints.py index f7a79fb8d7..9fb4b9e94c 100644 --- a/dimos/robot/get_all_blueprints.py +++ b/dimos/robot/get_all_blueprints.py @@ -45,9 +45,9 @@ def get_blueprint_by_name(name: str) -> Blueprint: def get_module_by_name(name: str) -> Blueprint: if name not in all_modules: _fail_unknown(name, list(all_modules.keys())) - attr_name = name.replace("-", "_") - python_module = __import__(all_modules[name], fromlist=[attr_name]) - return getattr(python_module, attr_name)() # type: ignore[no-any-return] + module_path, class_name = all_modules[name].rsplit(".", 1) + python_module = __import__(module_path, fromlist=[class_name]) + return getattr(python_module, class_name).blueprint() # type: ignore[no-any-return] def get_by_name(name: str) -> Blueprint: diff --git a/dimos/robot/test_all_blueprints_generation.py b/dimos/robot/test_all_blueprints_generation.py index c4b9652e47..48d482f3b6 100644 --- a/dimos/robot/test_all_blueprints_generation.py +++ b/dimos/robot/test_all_blueprints_generation.py @@ -171,9 +171,9 @@ def _scan_for_blueprints(root: Path) -> tuple[dict[str, str], dict[str, str]]: # Only register modules from production files (skip test, deprecated, core) if _is_production_module_file(file_path, root): - for var_name in module_vars: + for var_name, class_name in module_vars: cli_name = var_name.replace("_", "-") - all_modules[cli_name] = module_name + all_modules[cli_name] = f"{module_name}.{class_name}" # Blueprints take priority when names collide (e.g. a pre-configured # blueprint named "mid360" vs the raw Mid360 Module class). @@ -250,9 +250,9 @@ def _path_to_module_name(path: Path, root: Path) -> str: def _find_blueprints_in_file( file_path: Path, module_classes: set[str] | None = None -) -> tuple[list[str], list[str]]: +) -> tuple[list[str], list[tuple[str, str]]]: blueprint_vars: list[str] = [] - module_vars: list[str] = [] + module_vars: list[tuple[str, str]] = [] try: source = file_path.read_text(encoding="utf-8") @@ -281,7 +281,7 @@ def _find_blueprints_in_file( if node.name.startswith("_") or node.name in _EXCLUDED_MODULE_NAMES: continue if any(b in module_classes for b in _get_base_class_names(node)): - module_vars.append(_camel_to_snake(node.name)) + module_vars.append((_camel_to_snake(node.name), node.name)) return blueprint_vars, module_vars diff --git a/dimos/robot/unitree/go2/blueprints/agentic/unitree_go2_security.py b/dimos/robot/unitree/go2/blueprints/agentic/unitree_go2_security.py new file mode 100644 index 0000000000..f8b292b103 --- /dev/null +++ b/dimos/robot/unitree/go2/blueprints/agentic/unitree_go2_security.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +# Copyright 2027 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any + +from dimos.core.blueprints import autoconnect +from dimos.protocol.pubsub.impl.lcmpubsub import LCM +from dimos.robot.unitree.go2.blueprints.agentic.unitree_go2_agentic import unitree_go2_agentic +from dimos.visualization.rerun.bridge import RerunBridgeModule, _resolve_viewer_mode + + +def _convert_camera_info(camera_info: Any) -> Any: + return camera_info.to_rerun( + image_topic="/world/color_image", + optical_frame="camera_optical", + ) + + +def _convert_global_map(grid: Any) -> Any: + return grid.to_rerun(voxel_size=0.1, mode="boxes") + + +def _convert_navigation_costmap(grid: Any) -> Any: + return grid.to_rerun( + colormap="Accent", + z_offset=0.015, + opacity=0.2, + background="#484981", + ) + + +def _static_base_link(rr: Any) -> list[Any]: + return [ + rr.Boxes3D( + half_sizes=[0.35, 0.155, 0.2], + colors=[(0, 255, 127)], + fill_mode="wireframe", + ), + rr.Transform3D(parent_frame="tf#/base_link"), + ] + + +def _go2_rerun_blueprint() -> Any: + import rerun.blueprint as rrb + + return rrb.Blueprint( + rrb.Horizontal( + rrb.Vertical( + rrb.Spatial2DView(origin="world/color_image", name="Camera"), + rrb.Spatial2DView(origin="world/depth_image", name="Depth"), + rrb.Spatial2DView(origin="world/tracking_image", name="Track"), + row_shares=[1, 1, 1], + ), + rrb.Vertical( + rrb.Spatial2DView(origin="world/tracking_image", name="Info"), + rrb.Spatial3DView(origin="world", name="3D"), + row_shares=[1, 2], + ), + column_shares=[1, 2], + ), + rrb.TimePanel(state="hidden"), + rrb.SelectionPanel(state="hidden"), + ) + + +rerun_config = { + "blueprint": _go2_rerun_blueprint, + "pubsubs": [LCM()], + "visual_override": { + "world/camera_info": _convert_camera_info, + "world/global_map": _convert_global_map, + "world/navigation_costmap": _convert_navigation_costmap, + }, + "static": { + "world/tf/base_link": _static_base_link, + }, +} + +unitree_go2_security = autoconnect( + unitree_go2_agentic, + RerunBridgeModule.blueprint(viewer_mode=_resolve_viewer_mode(), **rerun_config), +) + +__all__ = ["unitree_go2_security"] diff --git a/dimos/robot/unitree/go2/blueprints/basic/unitree_go2_basic.py b/dimos/robot/unitree/go2/blueprints/basic/unitree_go2_basic.py index 34d9610249..d6f382383a 100644 --- a/dimos/robot/unitree/go2/blueprints/basic/unitree_go2_basic.py +++ b/dimos/robot/unitree/go2/blueprints/basic/unitree_go2_basic.py @@ -90,6 +90,8 @@ def _go2_rerun_blueprint() -> Any: ), column_shares=[1, 2], ), + rrb.TimePanel(state="hidden"), + rrb.SelectionPanel(state="hidden"), ) diff --git a/dimos/robot/unitree/go2/blueprints/smart/unitree_go2_spatial.py b/dimos/robot/unitree/go2/blueprints/smart/unitree_go2_spatial.py index 840458d998..163d4da826 100644 --- a/dimos/robot/unitree/go2/blueprints/smart/unitree_go2_spatial.py +++ b/dimos/robot/unitree/go2/blueprints/smart/unitree_go2_spatial.py @@ -14,14 +14,17 @@ # limitations under the License. from dimos.core.blueprints import autoconnect +from dimos.experimental.security_demo.security_module import SecurityModule from dimos.perception.perceive_loop_skill import PerceiveLoopSkill from dimos.perception.spatial_perception import SpatialMemory from dimos.robot.unitree.go2.blueprints.smart.unitree_go2 import unitree_go2 +from dimos.robot.unitree.go2.connection import GO2Connection unitree_go2_spatial = autoconnect( unitree_go2, SpatialMemory.blueprint(), PerceiveLoopSkill.blueprint(), + SecurityModule.blueprint(camera_info=GO2Connection.camera_info_static), ).global_config(n_workers=8) __all__ = ["unitree_go2_spatial"] diff --git a/dimos/simulation/mujoco/constants.py b/dimos/simulation/mujoco/constants.py index 4e35011530..e1c6c60fc9 100644 --- a/dimos/simulation/mujoco/constants.py +++ b/dimos/simulation/mujoco/constants.py @@ -15,8 +15,8 @@ from pathlib import Path # Video/Camera constants -VIDEO_WIDTH = 320 -VIDEO_HEIGHT = 240 +VIDEO_WIDTH = 640 +VIDEO_HEIGHT = 360 VIDEO_CAMERA_FOV = 45 # MuJoCo default FOV for head_camera (degrees) DEPTH_CAMERA_FOV = 160 diff --git a/dimos/stream/audio/node_output.py b/dimos/stream/audio/node_output.py index 4b4d407329..f35b7615bd 100644 --- a/dimos/stream/audio/node_output.py +++ b/dimos/stream/audio/node_output.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import threading from typing import Any import numpy as np @@ -61,6 +62,7 @@ def __init__( self.dtype = dtype self._stream = None + self._stream_lock = threading.Lock() self._running = False self._subscription = None self.audio_observable = None @@ -128,14 +130,15 @@ def stop(self) -> None: self._subscription.dispose() self._subscription = None - if self._stream: - self._stream.stop() - self._stream.close() - self._stream = None + with self._stream_lock: + if self._stream: + self._stream.stop() + self._stream.close() + self._stream = None def _play_audio_event(self, audio_event) -> None: # type: ignore[no-untyped-def] """Play audio from an AudioEvent.""" - if not self._running or not self._stream: + if not self._running: return try: @@ -146,8 +149,9 @@ def _play_audio_event(self, audio_event) -> None: # type: ignore[no-untyped-def elif self.dtype == np.int16: audio_event = audio_event.to_int16() - # Write audio data to the stream - self._stream.write(audio_event.data) + with self._stream_lock: + if self._stream: + self._stream.write(audio_event.data) except Exception as e: logger.error(f"Error playing audio: {e}") @@ -159,10 +163,11 @@ def _handle_completion(self) -> None: """Handle completion of the observable.""" logger.info("Audio observable completed") self._running = False - if self._stream: - self._stream.stop() - self._stream.close() - self._stream = None + with self._stream_lock: + if self._stream: + self._stream.stop() + self._stream.close() + self._stream = None def get_available_devices(self) -> list[dict[str, Any]]: """Get a list of available audio output devices.""" diff --git a/dimos/stream/audio/tts/node_openai.py b/dimos/stream/audio/tts/node_openai.py index bed1f35682..10ad1bd5b0 100644 --- a/dimos/stream/audio/tts/node_openai.py +++ b/dimos/stream/audio/tts/node_openai.py @@ -204,8 +204,12 @@ def dispose(self) -> None: self.is_running = False + # Clear pending items so the thread doesn't start new synthesis. + with self.queue_lock: + self.text_queue.clear() + if self.processing_thread and self.processing_thread.is_alive(): - self.processing_thread.join(timeout=5.0) + self.processing_thread.join(timeout=30.0) if self.subscription: self.subscription.dispose() diff --git a/pyproject.toml b/pyproject.toml index 7e2f38546e..a6842bd10b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -416,6 +416,8 @@ module = [ "sqlite_vec", "std_msgs.*", "tf2_msgs.*", + "transformers", + "transformers.*", "torchreid", "turbojpeg", "ultralytics.*",