Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
15526d4
Add Antim Labs sim integration module (dimsim)
Viswa4599 Mar 1, 2026
6d28968
Auto-install and update dimsim CLI on dimos run sim-nav
Viswa4599 Mar 2, 2026
fe77900
CI code cleanup
Viswa4599 Mar 2, 2026
97c77f7
Fixes:
Viswa4599 Mar 5, 2026
02d1342
CI code cleanup
Viswa4599 Mar 5, 2026
0dbcd7a
Fix rerun camera: separate pinhole from image entity for 2D/3D compat
Viswa4599 Mar 10, 2026
4a46b5d
Force Deno cache reload on dimsim CLI update
Viswa4599 Mar 10, 2026
60c0c00
sim mapper small adjustments
leshy Mar 10, 2026
81ee427
DimSim integration: compiled binary distribution, e2e tests, sim blue…
Viswa4599 Mar 28, 2026
7da940a
Merge remote-tracking branch 'origin/dev' into antim/sim-integration-…
Viswa4599 Mar 28, 2026
6131832
CI code cleanup
Viswa4599 Mar 28, 2026
3bd9c2c
Update dimos/e2e_tests/test_dimsim_eval_parallel.py
Viswa4599 Mar 28, 2026
71d571f
Fix PR review issues: viewer config bug, missing @rpc, process teardo…
Viswa4599 Mar 28, 2026
8454465
Fix sim blueprints for dev compatibility: update imports, remove __in…
Viswa4599 Mar 28, 2026
43403f0
CI code cleanup
Viswa4599 Mar 28, 2026
5f24c9c
fix: resolve runtime errors in sim modules after dev merge
Viswa4599 Mar 28, 2026
9c9d63b
fix dimsim binary resolution + add eval creation docs
Viswa4599 Mar 28, 2026
cb7b6c2
fix dimsim binary resolution, add eval docs, enable dimsim-nav in CI
Viswa4599 Mar 28, 2026
8f72932
CI code cleanup
Viswa4599 Mar 28, 2026
09d1296
docs: remove single eval run section from simulation.md
Viswa4599 Mar 28, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ __pycache__
/assets/saved_maps/
/assets/model-cache/
/assets/agent/memory.txt
/assets/temporal_memory/

.bash_history

Expand Down
18 changes: 15 additions & 3 deletions dimos/e2e_tests/dimos_cli_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,26 @@ def start(self) -> None:
start_new_session=True,
)

def _kill_group(self, sig: int) -> None:
"""Send *sig* to the process group, falling back to the process itself.

On macOS with ``start_new_session=True`` the child runs in its own
session and ``os.killpg`` can raise ``PermissionError``. In that case
we fall back to signalling just the lead process.
"""
try:
os.killpg(self.process.pid, sig)
except PermissionError:
os.kill(self.process.pid, sig)

def stop(self) -> None:
if self.process is None:
return

try:
# Send SIGTERM to the entire process group so child processes
# (e.g. the mujoco viewer subprocess) are also terminated.
os.killpg(self.process.pid, signal.SIGTERM)
self._kill_group(signal.SIGTERM)

# Record the time when we sent the kill signal
shutdown_start = time.time()
Expand All @@ -62,7 +74,7 @@ def stop(self) -> None:
)
except subprocess.TimeoutExpired:
# If we reach here, the process didn't terminate in 30 seconds
os.killpg(self.process.pid, signal.SIGKILL)
self._kill_group(signal.SIGKILL)
self.process.wait() # Clean up
raise AssertionError(
"Process did not shut down within 30 seconds after receiving SIGTERM"
Expand All @@ -72,7 +84,7 @@ def stop(self) -> None:
# Clean up if something goes wrong
if self.process.poll() is None: # Process still running
try:
os.killpg(self.process.pid, signal.SIGKILL)
self._kill_group(signal.SIGKILL)
except ProcessLookupError:
pass
self.process.wait()
Expand Down
219 changes: 219 additions & 0 deletions dimos/e2e_tests/test_dimsim_eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
# Copyright 2025-2026 Dimensional Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Sequential eval tests — 1 dimos instance, run workflows one at a time.

Uses sim-eval blueprint (includes rerun/browser for manual observation).
Run individual evals or all of them:

pytest dimos/e2e_tests/test_dimsim_eval.py -v -s -m slow
pytest dimos/e2e_tests/test_dimsim_eval.py::TestSimEvalSequential::test_go_to_tv -v -s -m slow
"""

import json
import os
from pathlib import Path
import signal
import socket
import subprocess
import sys
import time

import pytest
import websocket

from dimos.e2e_tests.dimos_cli_call import DimosCliCall
from dimos.e2e_tests.lcm_spy import LcmSpy

PORT = 8090
EVALS_DIR = Path.home() / ".dimsim" / "evals"


def _force_kill_port(port: int) -> None:
"""Kill any process listening on the given port."""
try:
result = subprocess.run(
["lsof", "-ti", f":{port}"],
capture_output=True,
text=True,
timeout=5,
)
pids = result.stdout.strip().split()
for pid in pids:
if pid:
try:
os.kill(int(pid), signal.SIGKILL)
except (ProcessLookupError, ValueError):
pass
except Exception:
pass


def _wait_for_port(port: int, timeout: float = 120) -> bool:
deadline = time.time() + timeout
while time.time() < deadline:
try:
with socket.create_connection(("localhost", port), timeout=2):
return True
except OSError:
time.sleep(1)
return False


def _wait_for_port_free(port: int, timeout: float = 30) -> bool:
"""Wait until nothing is listening on *port*."""
deadline = time.time() + timeout
while time.time() < deadline:
try:
with socket.create_connection(("localhost", port), timeout=1):
time.sleep(1)
except OSError:
return True
return False


class EvalClient:
"""Talks to the browser eval harness via the bridge WebSocket."""

def __init__(self, port: int = PORT):
self.ws = websocket.WebSocket()
self.ws.connect(f"ws://localhost:{port}")

def _send(self, msg: dict) -> None:
self.ws.send(json.dumps(msg))

def _wait_for(self, msg_type: str, timeout: float = 120) -> dict:
self.ws.settimeout(timeout)
while True:
raw = self.ws.recv()
if isinstance(raw, bytes):
continue
msg = json.loads(raw)
if msg.get("type") == msg_type:
return msg

def wait_for_harness(self, timeout: float = 60) -> bool:
deadline = time.time() + timeout
while time.time() < deadline:
try:
self._send({"type": "ping"})
self.ws.settimeout(3)
raw = self.ws.recv()
if isinstance(raw, str):
msg = json.loads(raw)
if msg.get("type") == "pong":
return True
except (websocket.WebSocketTimeoutException, Exception):
time.sleep(1)
return False

def run_workflow(self, workflow: dict) -> dict:
"""Send loadEnv + startWorkflow, wait for workflowComplete."""
timeout = workflow.get("timeoutSec", 120) + 30
self._send({"type": "loadEnv", "scene": workflow.get("environment", "apt")})
self._wait_for("envReady", timeout=30)
self._send({"type": "startWorkflow", "workflow": workflow})
return self._wait_for("workflowComplete", timeout=timeout)

def close(self):
self.ws.close()


def _load_workflow(env: str, name: str) -> dict:
path = EVALS_DIR / env / f"{name}.json"
return json.loads(path.read_text())


@pytest.fixture(scope="class")
def sim_eval():
"""Start dimos sim-eval headless, tear down after."""
_force_kill_port(PORT)
assert _wait_for_port_free(PORT, timeout=10), f"Port {PORT} still in use after force-kill"
log_dir = os.environ.get("DIMSIM_EVAL_LOG_DIR", "")
if log_dir:
log_path = Path(log_dir)
log_path.mkdir(parents=True, exist_ok=True)
log_file = open(log_path / "dimos-sequential.log", "w")
print(f"\n dimos logs → {log_path}/dimos-sequential.log")
else:
log_file = None

spy = LcmSpy()
spy.save_topic("/color_image#sensor_msgs.Image")
spy.save_topic("/odom#geometry_msgs.PoseStamped")
spy.start()

venv_bin = str(Path(sys.prefix) / "bin")
env = {
**os.environ,
"DIMSIM_HEADLESS": "1",
"DIMSIM_RENDER": "gpu",
"PATH": venv_bin + os.pathsep + os.environ.get("PATH", ""),
}
call = DimosCliCall()
call.demo_args = ["sim-eval"]
call.process = subprocess.Popen(
["dimos", "--simulation", "run", "sim-eval"],
env=env,
stdout=log_file or subprocess.DEVNULL,
stderr=log_file or subprocess.DEVNULL,
Copy link

Copilot AI Mar 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DimosCliCall.stop() uses os.killpg(self.process.pid, ...), which assumes the subprocess was started in a new session/process group. This test starts the process without start_new_session=True, so teardown may fail or target the wrong process group. Prefer call.start() or pass start_new_session=True to subprocess.Popen.

Suggested change
stderr=log_file or subprocess.DEVNULL,
stderr=log_file or subprocess.DEVNULL,
start_new_session=True,

Copilot uses AI. Check for mistakes.
start_new_session=True,
)

try:
assert _wait_for_port(PORT, timeout=120), f"Bridge not ready on port {PORT}"
spy.wait_for_saved_topic("/color_image#sensor_msgs.Image", timeout=60.0)
spy.wait_for_saved_topic("/odom#geometry_msgs.PoseStamped", timeout=60.0)

yield call
finally:
call.stop()
spy.stop()
if log_file:
log_file.close()
_force_kill_port(PORT)


@pytest.fixture(scope="class")
def eval_client(sim_eval):
"""Connect to bridge WS and wait for eval harness."""
client = EvalClient(PORT)
assert client.wait_for_harness(timeout=60), "Eval harness not responding"
yield client
client.close()


@pytest.mark.skipif_in_ci
@pytest.mark.slow
class TestSimEvalSequential:
"""Run DimSim evals sequentially against a live dimos sim-eval instance."""

def _run_and_assert(self, eval_client: EvalClient, env: str, workflow_name: str) -> None:
workflow = _load_workflow(env, workflow_name)
result = eval_client.run_workflow(workflow)
scores = result.get("rubricScores", {})
od = scores.get("objectDistance", {})
passed = od.get("pass", False)
details = od.get("details", result.get("reason", "unknown"))
print(f" {workflow_name}: {'PASS' if passed else 'FAIL'} — {details}")
assert passed, f"Eval '{workflow_name}' failed: {details}"

def test_go_to_tv(self, eval_client) -> None:
self._run_and_assert(eval_client, "apt", "television")

def test_go_to_couch(self, eval_client) -> None:
self._run_and_assert(eval_client, "apt", "go-to-couch")

def test_go_to_kitchen(self, eval_client) -> None:
self._run_and_assert(eval_client, "apt", "go-to-kitchen")
Loading
Loading