Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions data/.lfs/security_detection.png.tar.gz
Git LFS file not shown
3 changes: 3 additions & 0 deletions data/.lfs/security_no_detection.png.tar.gz
Git LFS file not shown
32 changes: 31 additions & 1 deletion dimos/agents/skills/speak_skill.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ class SpeakSkill(Module):
_tts_node: OpenAITTSNode | None = None
_audio_output: SounddeviceAudioOutput | None = None
_audio_lock: threading.Lock = threading.Lock()
_bg_threads: list[threading.Thread] = []
_bg_threads_lock: threading.Lock = threading.Lock()

@rpc
def start(self) -> None:
Expand All @@ -41,6 +43,10 @@ def start(self) -> None:

@rpc
def stop(self) -> None:
with self._bg_threads_lock:
threads = list(self._bg_threads)
for t in threads:
t.join(timeout=10.0)
if self._tts_node:
self._tts_node.dispose()
self._tts_node = None
Expand All @@ -50,7 +56,7 @@ def stop(self) -> None:
super().stop()

@skill
def speak(self, text: str) -> str:
def speak(self, text: str, blocking: bool = True) -> str:
"""Speak text out loud through the robot's speakers.

USE THIS TOOL AS OFTEN AS NEEDED. People can't normally see what you say in text, but can hear what you speak.
Expand All @@ -64,8 +70,32 @@ def speak(self, text: str) -> str:
if self._tts_node is None:
return "Error: TTS not initialized"

if not blocking:
thread = threading.Thread(
target=self._speak_bg, args=(text,), daemon=True, name="SpeakSkill-bg"
)
with self._bg_threads_lock:
self._bg_threads.append(thread)
thread.start()
return f"Speaking (non-blocking): {text}"

return self._speak_blocking(text)

def _speak_bg(self, text: str) -> None:
try:
self._speak_blocking(text)
finally:
with self._bg_threads_lock:
self._bg_threads = [
t for t in self._bg_threads if t is not threading.current_thread()
]

def _speak_blocking(self, text: str) -> str:
# Use lock to prevent simultaneous speech
with self._audio_lock:
if self._tts_node is None:
return "Error: TTS not initialized"

text_subject: Subject[str] = Subject()
audio_complete = threading.Event()
self._tts_node.consume_text(text_subject)
Expand Down
21 changes: 21 additions & 0 deletions dimos/agents/skills/speak_skill_spec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright 2026 Dimensional Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Protocol

from dimos.spec.utils import Spec


class SpeakSkillSpec(Spec, Protocol):
def speak(self, text: str, blocking: bool = True) -> str: ...
27 changes: 27 additions & 0 deletions dimos/e2e_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,30 @@ def direct_cmd_vel_explorer() -> Generator[PersonTrackPublisher, None, None]:
explorer.start()
yield explorer
explorer.stop()


@pytest.fixture
def explore_office(
direct_cmd_vel_explorer: DirectCmdVelExplorer,
) -> Callable[[], None]:
points = [
(0, -7.07),
(-4.16, -7.07),
(-4.45, 1.10),
(-6.72, 2.87),
(-1.78, 3.01),
(-1.54, 5.74),
(3.88, 6.16),
(2.16, 9.36),
(4.70, 3.87),
(4.67, -7.15),
(4.57, -4.19),
(-0.84, -2.78),
(-4.71, 1.17),
(4.30, 0.87),
]

def explore() -> None:
direct_cmd_vel_explorer.follow_points(points)

return explore
27 changes: 2 additions & 25 deletions dimos/e2e_tests/test_patrol_and_follow.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,6 @@
from dimos.e2e_tests.conf_types import StartPersonTrack
from dimos.e2e_tests.dimos_cli_call import DimosCliCall
from dimos.e2e_tests.lcm_spy import LcmSpy
from dimos.simulation.mujoco.direct_cmd_vel_explorer import DirectCmdVelExplorer

points = [
(0, -7.07),
(-4.16, -7.07),
(-4.45, 1.10),
(-6.72, 2.87),
(-1.78, 3.01),
(-1.54, 5.74),
(3.88, 6.16),
(2.16, 9.36),
(4.70, 3.87),
(4.67, -7.15),
(4.57, -4.19),
(-0.84, -2.78),
(-4.71, 1.17),
(4.30, 0.87),
]


@pytest.mark.skipif_in_ci
Expand All @@ -48,7 +30,7 @@ def test_patrol_and_follow(
start_blueprint: Callable[[str], DimosCliCall],
human_input: Callable[[str], None],
start_person_track: StartPersonTrack,
direct_cmd_vel_explorer: DirectCmdVelExplorer,
explore_office: Callable[[], None],
) -> None:
start_blueprint(
"--mujoco-start-pos",
Expand All @@ -66,12 +48,7 @@ def test_patrol_and_follow(

time.sleep(5)

print("Starting discovery.")

# Explore the entire room by driving directly via /cmd_vel.
direct_cmd_vel_explorer.follow_points(points)

print("Ended discovery.")
explore_office()

start_person_track(
[
Expand Down
77 changes: 77 additions & 0 deletions dimos/e2e_tests/test_security_module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Copyright 2026 Dimensional Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from collections.abc import Callable
import time

from dimos_lcm.std_msgs import String
import pytest

from dimos.e2e_tests.conf_types import StartPersonTrack
from dimos.e2e_tests.dimos_cli_call import DimosCliCall
from dimos.e2e_tests.lcm_spy import LcmSpy


@pytest.mark.skipif_in_ci
@pytest.mark.skipif_no_openai
@pytest.mark.mujoco
def test_security_module(
lcm_spy: LcmSpy,
start_blueprint: Callable[[str], DimosCliCall],
human_input: Callable[[str], None],
start_person_track: StartPersonTrack,
explore_office: Callable[[], None],
) -> None:
start_blueprint(
"--mujoco-start-pos",
"-10.75 -6.78",
"--mujoco-camera-position",
"-0.797 0.007 0.468 26.825 88.998 -70.321",
"--nerf-speed",
"0.8",
"--dtop",
"run",
"--disable",
"spatial-memory",
"unitree-go2-security",
)

lcm_spy.save_topic("/rpc/McpClient/on_system_modules/res")
lcm_spy.save_topic("/security_state#std_msgs.String")
lcm_spy.wait_for_saved_topic("/rpc/McpClient/on_system_modules/res", timeout=120.0)

time.sleep(2)

explore_office()

start_person_track(
[
(-10.75, -6.78),
(0, -7.07),
]
)
human_input(
"start the security patrol. Just call start_security_patrol. Do not ask me anything."
)

def predicate(s: String) -> bool:
return s.data == "FOLLOWING"

lcm_spy.wait_for_message_result(
"/security_state#std_msgs.String",
String,
predicate,
"Failed to transition to FOLLOWING.",
360,
)
87 changes: 87 additions & 0 deletions dimos/experimental/security_demo/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# Copyright 2026 Dimensional Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import pytest

from dimos.experimental.security_demo.security_module import SecurityModule
from dimos.msgs.sensor_msgs.CameraInfo import CameraInfo
from dimos.msgs.sensor_msgs.Image import Image
from dimos.perception.detection.detectors.yolo import Yolo2DDetector
from dimos.perception.detection.type.detection2d.bbox import Detection2DBBox
from dimos.utils.data import get_data


@pytest.fixture(scope="session")
def yolo_detector():
detector = Yolo2DDetector(device="cpu")
yield detector
detector.stop()


@pytest.fixture(scope="session")
def person_image():
return Image.from_file(get_data("security_detection.png"))


@pytest.fixture(scope="session")
def empty_image():
return Image.from_file(get_data("security_no_detection.png"))


@pytest.fixture()
def security_module(mocker):
mocker.patch("dimos.experimental.security_demo.security_module._create_router")
mocker.patch("dimos.experimental.security_demo.security_module._create_visual_servo")
mocker.patch("dimos.experimental.security_demo.security_module.YoloPersonDetector")
mocker.patch("dimos.experimental.security_demo.security_module.EdgeTAMProcessor")

module = SecurityModule(camera_info=CameraInfo())

# Replace output streams with mocks for test assertions
module.detection = mocker.MagicMock()
module.security_state = mocker.MagicMock()
module.goal_request = mocker.MagicMock()
module.cmd_vel = mocker.MagicMock()

# These are set by framework wiring, not __init__
module._planner_spec = mocker.MagicMock()
module._speak_skill = mocker.MagicMock()

yield module

module.stop()


@pytest.fixture()
def make_detection(person_image):
def _make(
bbox=(100.0, 50.0, 300.0, 400.0),
track_id=1,
class_id=0,
confidence=0.9,
name="person",
):
return Detection2DBBox(
bbox=bbox,
track_id=track_id,
class_id=class_id,
confidence=confidence,
name=name,
ts=0.0,
image=person_image,
)

return _make
Loading
Loading