From d2e9446fe3d00723b2f2433ca714c3196c8a4878 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Sun, 15 Mar 2026 14:53:32 -0700 Subject: [PATCH 01/11] feat(native): add rebuild-on-change for NativeModule Add a generic file change detection utility (dimos/utils/change_detect.py) that tracks content hashes via xxhash and integrate it into NativeModule so it can automatically rebuild when watched source files change. - change_detect.did_change() hashes file content, stores per-cache-name hash files in the venv, and returns True when files differ - NativeModuleConfig gains rebuild_on_change: list[str] | None - NativeModule._maybe_build() deletes stale executables when sources change - 11 tests for change_detect, 3 integration tests for native rebuild --- dimos/core/native_module.py | 19 +++- dimos/core/test_native_rebuild.py | 139 ++++++++++++++++++++++++++++++ dimos/utils/change_detect.py | 131 ++++++++++++++++++++++++++++ dimos/utils/test_change_detect.py | 114 ++++++++++++++++++++++++ 4 files changed, 401 insertions(+), 2 deletions(-) create mode 100644 dimos/core/test_native_rebuild.py create mode 100644 dimos/utils/change_detect.py create mode 100644 dimos/utils/test_change_detect.py diff --git a/dimos/core/native_module.py b/dimos/core/native_module.py index f4a674cb5d..981f22a2b0 100644 --- a/dimos/core/native_module.py +++ b/dimos/core/native_module.py @@ -55,6 +55,7 @@ class MyCppModule(NativeModule): from dimos.core.core import rpc from dimos.core.module import Module, ModuleConfig +from dimos.utils.change_detect import did_change from dimos.utils.logging_config import setup_logger if sys.version_info < (3, 13): @@ -80,9 +81,10 @@ class NativeModuleConfig(ModuleConfig): extra_env: dict[str, str] = Field(default_factory=dict) shutdown_timeout: float = 10.0 log_format: LogFormat = LogFormat.TEXT + rebuild_on_change: list[str] | None = None # Override in subclasses to exclude fields from CLI arg generation - cli_exclude: frozenset[str] = frozenset() + cli_exclude: frozenset[str] = frozenset({"rebuild_on_change"}) def to_cli_args(self) -> list[str]: """Auto-convert subclass config fields to CLI args. @@ -244,8 +246,16 @@ def _resolve_paths(self) -> None: self.config.executable = str(Path(self.config.cwd) / self.config.executable) def _maybe_build(self) -> None: - """Run ``build_command`` if the executable does not exist.""" + """Run ``build_command`` if the executable does not exist or sources changed.""" exe = Path(self.config.executable) + + # Check if rebuild needed due to source changes + if self.config.rebuild_on_change and exe.exists(): + cache_name = f"native_{type(self).__name__}_build" + if did_change(cache_name, self.config.rebuild_on_change): + logger.info("Source files changed, triggering rebuild", executable=str(exe)) + exe.unlink(missing_ok=True) + if exe.exists(): return if self.config.build_command is None: @@ -282,6 +292,11 @@ def _maybe_build(self) -> None: f"Build command succeeded but executable still not found: {exe}" ) + # Update the change cache so next check is clean + if self.config.rebuild_on_change: + cache_name = f"native_{type(self).__name__}_build" + did_change(cache_name, self.config.rebuild_on_change) + def _collect_topics(self) -> dict[str, str]: """Extract LCM topic strings from blueprint-assigned stream transports.""" topics: dict[str, str] = {} diff --git a/dimos/core/test_native_rebuild.py b/dimos/core/test_native_rebuild.py new file mode 100644 index 0000000000..82c8be825e --- /dev/null +++ b/dimos/core/test_native_rebuild.py @@ -0,0 +1,139 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for NativeModule rebuild-on-change integration.""" + +from __future__ import annotations + +from pathlib import Path +import stat + +import pytest + +from dimos.core.native_module import NativeModule, NativeModuleConfig + + +@pytest.fixture(autouse=True) +def _use_tmp_cache(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Redirect the change-detection cache to a temp dir for every test.""" + monkeypatch.setattr( + "dimos.utils.change_detect._get_cache_dir", + lambda: tmp_path / "cache", + ) + + +@pytest.fixture() +def build_env(tmp_path: Path) -> dict[str, Path]: + """Set up a temp directory with a source file, executable path, and marker path.""" + src = tmp_path / "src" + src.mkdir() + (src / "main.c").write_text("int main() { return 0; }") + + exe = tmp_path / "mybin" + marker = tmp_path / "build_ran.marker" + + # Build script: create the executable and a marker file + build_script = tmp_path / "build.sh" + build_script.write_text(f"#!/bin/sh\ntouch {exe}\nchmod +x {exe}\ntouch {marker}\n") + build_script.chmod(build_script.stat().st_mode | stat.S_IEXEC) + + return {"src": src, "exe": exe, "marker": marker, "build_script": build_script} + + +class _RebuildConfig(NativeModuleConfig): + executable: str = "" + rebuild_on_change: list[str] | None = None + + +class _RebuildModule(NativeModule[_RebuildConfig]): + default_config = _RebuildConfig + + +def _make_module(build_env: dict[str, Path]) -> _RebuildModule: + """Create a _RebuildModule pointing at the temp build env.""" + return _RebuildModule( + executable=str(build_env["exe"]), + build_command=f"sh {build_env['build_script']}", + rebuild_on_change=[str(build_env["src"])], + cwd=str(build_env["src"]), + ) + + +def test_rebuild_on_change_triggers_build(build_env: dict[str, Path]) -> None: + """When source files change, the build_command should re-run.""" + mod = _make_module(build_env) + try: + exe = build_env["exe"] + marker = build_env["marker"] + + # First build: exe doesn't exist → build runs + mod._maybe_build() + assert exe.exists() + assert marker.exists() + marker.unlink() + + # No change → build should NOT run + mod._maybe_build() + assert not marker.exists() + + # Modify source → build SHOULD run + (build_env["src"] / "main.c").write_text("int main() { return 1; }") + mod._maybe_build() + assert marker.exists(), "Build should have re-run after source change" + finally: + mod.stop() + + +def test_no_change_skips_rebuild(build_env: dict[str, Path]) -> None: + """When sources haven't changed, build_command must not run again.""" + mod = _make_module(build_env) + try: + marker = build_env["marker"] + + # Initial build + mod._maybe_build() + assert marker.exists() + marker.unlink() + + # Second call — nothing changed + mod._maybe_build() + assert not marker.exists(), "Build should have been skipped (no source changes)" + finally: + mod.stop() + + +def test_rebuild_on_change_none_skips_check(build_env: dict[str, Path]) -> None: + """When rebuild_on_change is None, no change detection happens at all.""" + exe = build_env["exe"] + marker = build_env["marker"] + + mod = _RebuildModule( + executable=str(exe), + build_command=f"sh {build_env['build_script']}", + rebuild_on_change=None, + cwd=str(build_env["src"]), + ) + try: + # Initial build + mod._maybe_build() + assert exe.exists() + assert marker.exists() + marker.unlink() + + # Modify source — but rebuild_on_change is None, so no rebuild + (build_env["src"] / "main.c").write_text("int main() { return 1; }") + mod._maybe_build() + assert not marker.exists(), "Should not rebuild when rebuild_on_change is None" + finally: + mod.stop() diff --git a/dimos/utils/change_detect.py b/dimos/utils/change_detect.py new file mode 100644 index 0000000000..8e94bc85ee --- /dev/null +++ b/dimos/utils/change_detect.py @@ -0,0 +1,131 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Change detection utility for file content hashing. + +Tracks whether a set of files (by path, directory, or glob pattern) have +changed since the last check. Useful for skipping expensive rebuilds when +source files haven't been modified. +""" + +from __future__ import annotations + +from collections.abc import Sequence +import glob as glob_mod +import os +from pathlib import Path + +import xxhash + +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + + +def _get_cache_dir() -> Path: + """Return the directory used to store change-detection cache files. + + Uses ``/dimos_cache/change_detect/`` when running inside a + venv, otherwise falls back to ``~/.cache/dimos/change_detect/``. + """ + venv = os.environ.get("VIRTUAL_ENV") + if venv: + return Path(venv) / "dimos_cache" / "change_detect" + return Path.home() / ".cache" / "dimos" / "change_detect" + + +def _resolve_paths(paths: Sequence[str | Path]) -> list[Path]: + """Expand globs/directories into a sorted list of individual file paths.""" + files: set[Path] = set() + for entry in paths: + entry_str = str(entry) + # Try glob expansion first (handles both glob patterns and plain paths) + expanded = glob_mod.glob(entry_str, recursive=True) + if not expanded: + # Nothing matched — could be a non-existent path or empty glob + if any(c in entry_str for c in ("*", "?", "[")): + logger.warning("Glob pattern matched no files", pattern=entry_str) + else: + logger.warning("Path does not exist", path=entry_str) + continue + for match in expanded: + p = Path(match) + if p.is_file(): + files.add(p.resolve()) + elif p.is_dir(): + for root, _dirs, filenames in os.walk(p): + for fname in filenames: + files.add(Path(root, fname).resolve()) + return sorted(files) + + +def _hash_files(files: list[Path]) -> str: + """Compute an aggregate xxhash digest over the sorted file list.""" + h = xxhash.xxh64() + for fpath in files: + try: + # Include the path so additions/deletions/renames are detected + h.update(str(fpath).encode()) + h.update(fpath.read_bytes()) + except (OSError, PermissionError): + logger.warning("Cannot read file for hashing", path=str(fpath)) + return h.hexdigest() + + +def did_change(cache_name: str, paths: Sequence[str | Path]) -> bool: + """Check if any files/dirs matching the given paths have changed since last check. + + Args: + cache_name: Unique identifier for this cache (e.g. ``"mymodule_build_cache"``). + Different cache names track independently. + paths: List of file paths, directory paths, or glob patterns. + Directories are walked recursively. + Globs are expanded with :func:`glob.glob`. + + Returns: + ``True`` if any file has changed (or if no previous cache exists). + ``False`` if all files are identical to the cached state. + """ + if not paths: + return False + + files = _resolve_paths(paths) + current_hash = _hash_files(files) + + cache_dir = _get_cache_dir() + cache_file = cache_dir / f"{cache_name}.hash" + + changed = True + if cache_file.exists(): + previous_hash = cache_file.read_text().strip() + changed = current_hash != previous_hash + + # Always update the cache with the current hash + cache_dir.mkdir(parents=True, exist_ok=True) + cache_file.write_text(current_hash) + + return changed + + +def clear_cache(cache_name: str) -> bool: + """Remove the cached hash for the given cache name. + + Returns: + ``True`` if the cache file existed and was removed. + """ + cache_file = _get_cache_dir() / f"{cache_name}.hash" + if cache_file.exists(): + cache_file.unlink() + return True + return False diff --git a/dimos/utils/test_change_detect.py b/dimos/utils/test_change_detect.py new file mode 100644 index 0000000000..351abe7c87 --- /dev/null +++ b/dimos/utils/test_change_detect.py @@ -0,0 +1,114 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the change detection utility.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from dimos.utils.change_detect import clear_cache, did_change + + +@pytest.fixture(autouse=True) +def _use_tmp_cache(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Redirect the change-detection cache to a temp dir for every test.""" + monkeypatch.setattr( + "dimos.utils.change_detect._get_cache_dir", + lambda: tmp_path / "cache", + ) + + +@pytest.fixture() +def src_dir(tmp_path: Path) -> Path: + """A temp directory with two source files for testing.""" + d = tmp_path / "src" + d.mkdir() + (d / "a.c").write_text("int main() { return 0; }") + (d / "b.c").write_text("void helper() {}") + return d + + +def test_first_call_returns_true(src_dir: Path) -> None: + assert did_change("test_cache", [str(src_dir)]) is True + + +def test_second_call_no_change_returns_false(src_dir: Path) -> None: + did_change("test_cache", [str(src_dir)]) + assert did_change("test_cache", [str(src_dir)]) is False + + +def test_file_modified_returns_true(src_dir: Path) -> None: + did_change("test_cache", [str(src_dir)]) + (src_dir / "a.c").write_text("int main() { return 1; }") + assert did_change("test_cache", [str(src_dir)]) is True + + +def test_file_added_to_dir_returns_true(src_dir: Path) -> None: + did_change("test_cache", [str(src_dir)]) + (src_dir / "c.c").write_text("void new_func() {}") + assert did_change("test_cache", [str(src_dir)]) is True + + +def test_file_deleted_returns_true(src_dir: Path) -> None: + did_change("test_cache", [str(src_dir)]) + (src_dir / "b.c").unlink() + assert did_change("test_cache", [str(src_dir)]) is True + + +def test_glob_pattern(src_dir: Path) -> None: + pattern = str(src_dir / "*.c") + assert did_change("glob_cache", [pattern]) is True + assert did_change("glob_cache", [pattern]) is False + (src_dir / "a.c").write_text("changed!") + assert did_change("glob_cache", [pattern]) is True + + +def test_separate_cache_names_independent(src_dir: Path) -> None: + paths = [str(src_dir)] + did_change("cache_a", paths) + did_change("cache_b", paths) + # Both caches are now up-to-date + assert did_change("cache_a", paths) is False + assert did_change("cache_b", paths) is False + # Modify a file — both caches should report changed independently + (src_dir / "a.c").write_text("changed") + assert did_change("cache_a", paths) is True + # cache_b hasn't been checked since the change + assert did_change("cache_b", paths) is True + + +def test_clear_cache(src_dir: Path) -> None: + paths = [str(src_dir)] + did_change("clear_test", paths) + assert did_change("clear_test", paths) is False + assert clear_cache("clear_test") is True + assert did_change("clear_test", paths) is True + + +def test_clear_cache_nonexistent() -> None: + assert clear_cache("does_not_exist") is False + + +def test_empty_paths_returns_false() -> None: + assert did_change("empty_test", []) is False + + +def test_nonexistent_path_warns(caplog: pytest.LogCaptureFixture) -> None: + """A non-existent path logs a warning and doesn't crash.""" + result = did_change("missing_test", ["/nonexistent/path/to/file.c"]) + # First call with no resolvable files still returns True (no cache) + assert isinstance(result, bool) From bc5b44b78698ed0390302ac166228672e2033153 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Tue, 17 Mar 2026 02:12:21 +0800 Subject: [PATCH 02/11] fix: resolve relative rebuild_on_change paths against module cwd and avoid unlinking Nix store executables - Add `cwd` parameter to `did_change()` and `_resolve_paths()` so relative glob patterns in `rebuild_on_change` are resolved against the module's working directory instead of the process cwd. - Replace `exe.unlink()` with a `needs_rebuild` flag so executables that live in read-only locations (e.g. Nix store) are not deleted; instead the build command is re-run which handles the output path itself. --- dimos/core/native_module.py | 7 ++++--- dimos/utils/change_detect.py | 16 +++++++++++++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/dimos/core/native_module.py b/dimos/core/native_module.py index 981f22a2b0..8531ecff82 100644 --- a/dimos/core/native_module.py +++ b/dimos/core/native_module.py @@ -250,13 +250,14 @@ def _maybe_build(self) -> None: exe = Path(self.config.executable) # Check if rebuild needed due to source changes + needs_rebuild = False if self.config.rebuild_on_change and exe.exists(): cache_name = f"native_{type(self).__name__}_build" - if did_change(cache_name, self.config.rebuild_on_change): + if did_change(cache_name, self.config.rebuild_on_change, cwd=self.config.cwd): logger.info("Source files changed, triggering rebuild", executable=str(exe)) - exe.unlink(missing_ok=True) + needs_rebuild = True - if exe.exists(): + if exe.exists() and not needs_rebuild: return if self.config.build_command is None: raise FileNotFoundError( diff --git a/dimos/utils/change_detect.py b/dimos/utils/change_detect.py index 8e94bc85ee..7522c73098 100644 --- a/dimos/utils/change_detect.py +++ b/dimos/utils/change_detect.py @@ -45,11 +45,16 @@ def _get_cache_dir() -> Path: return Path.home() / ".cache" / "dimos" / "change_detect" -def _resolve_paths(paths: Sequence[str | Path]) -> list[Path]: +def _resolve_paths( + paths: Sequence[str | Path], cwd: str | Path | None = None +) -> list[Path]: """Expand globs/directories into a sorted list of individual file paths.""" files: set[Path] = set() for entry in paths: entry_str = str(entry) + # Resolve relative paths against cwd when provided + if cwd is not None and not Path(entry_str).is_absolute(): + entry_str = str(Path(cwd) / entry_str) # Try glob expansion first (handles both glob patterns and plain paths) expanded = glob_mod.glob(entry_str, recursive=True) if not expanded: @@ -83,7 +88,11 @@ def _hash_files(files: list[Path]) -> str: return h.hexdigest() -def did_change(cache_name: str, paths: Sequence[str | Path]) -> bool: +def did_change( + cache_name: str, + paths: Sequence[str | Path], + cwd: str | Path | None = None, +) -> bool: """Check if any files/dirs matching the given paths have changed since last check. Args: @@ -92,6 +101,7 @@ def did_change(cache_name: str, paths: Sequence[str | Path]) -> bool: paths: List of file paths, directory paths, or glob patterns. Directories are walked recursively. Globs are expanded with :func:`glob.glob`. + cwd: Optional working directory for resolving relative paths. Returns: ``True`` if any file has changed (or if no previous cache exists). @@ -100,7 +110,7 @@ def did_change(cache_name: str, paths: Sequence[str | Path]) -> bool: if not paths: return False - files = _resolve_paths(paths) + files = _resolve_paths(paths, cwd=cwd) current_hash = _hash_files(files) cache_dir = _get_cache_dir() From f2b7b0a3ae7cb08d025418b6e8b5ad8607f85549 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Mon, 16 Mar 2026 15:12:53 -0700 Subject: [PATCH 03/11] improve native build --- dimos/core/native_module.py | 18 +- dimos/core/test_native_rebuild.py | 3 +- .../manipulation/planning/utils/mesh_utils.py | 19 ++- dimos/utils/change_detect.py | 155 +++++++++++++----- dimos/utils/test_change_detect.py | 13 +- pyproject.toml | 1 + uv.lock | 2 + 7 files changed, 150 insertions(+), 61 deletions(-) diff --git a/dimos/core/native_module.py b/dimos/core/native_module.py index 981f22a2b0..441bcc99fe 100644 --- a/dimos/core/native_module.py +++ b/dimos/core/native_module.py @@ -55,7 +55,7 @@ class MyCppModule(NativeModule): from dimos.core.core import rpc from dimos.core.module import Module, ModuleConfig -from dimos.utils.change_detect import did_change +from dimos.utils.change_detect import PathEntry, did_change from dimos.utils.logging_config import setup_logger if sys.version_info < (3, 13): @@ -81,7 +81,7 @@ class NativeModuleConfig(ModuleConfig): extra_env: dict[str, str] = Field(default_factory=dict) shutdown_timeout: float = 10.0 log_format: LogFormat = LogFormat.TEXT - rebuild_on_change: list[str] | None = None + rebuild_on_change: list[PathEntry] | None = None # Override in subclasses to exclude fields from CLI arg generation cli_exclude: frozenset[str] = frozenset({"rebuild_on_change"}) @@ -245,14 +245,18 @@ def _resolve_paths(self) -> None: if not Path(self.config.executable).is_absolute() and self.config.cwd is not None: self.config.executable = str(Path(self.config.cwd) / self.config.executable) + def _build_cache_name(self) -> str: + """Return a stable, unique cache name for this module's build state.""" + source_file = Path(inspect.getfile(type(self))).resolve() + return f"native_{source_file}" + def _maybe_build(self) -> None: """Run ``build_command`` if the executable does not exist or sources changed.""" exe = Path(self.config.executable) # Check if rebuild needed due to source changes if self.config.rebuild_on_change and exe.exists(): - cache_name = f"native_{type(self).__name__}_build" - if did_change(cache_name, self.config.rebuild_on_change): + if did_change(self._build_cache_name(), self.config.rebuild_on_change): logger.info("Source files changed, triggering rebuild", executable=str(exe)) exe.unlink(missing_ok=True) @@ -292,10 +296,10 @@ def _maybe_build(self) -> None: f"Build command succeeded but executable still not found: {exe}" ) - # Update the change cache so next check is clean + # Seed the cache after a successful build so the next check has a baseline + # (needed for the initial build when the pre-build change check was skipped) if self.config.rebuild_on_change: - cache_name = f"native_{type(self).__name__}_build" - did_change(cache_name, self.config.rebuild_on_change) + did_change(self._build_cache_name(), self.config.rebuild_on_change) def _collect_topics(self) -> dict[str, str]: """Extract LCM topic strings from blueprint-assigned stream transports.""" diff --git a/dimos/core/test_native_rebuild.py b/dimos/core/test_native_rebuild.py index 82c8be825e..6f8a68b9aa 100644 --- a/dimos/core/test_native_rebuild.py +++ b/dimos/core/test_native_rebuild.py @@ -22,6 +22,7 @@ import pytest from dimos.core.native_module import NativeModule, NativeModuleConfig +from dimos.utils.change_detect import PathEntry @pytest.fixture(autouse=True) @@ -53,7 +54,7 @@ def build_env(tmp_path: Path) -> dict[str, Path]: class _RebuildConfig(NativeModuleConfig): executable: str = "" - rebuild_on_change: list[str] | None = None + rebuild_on_change: list[PathEntry] | None = None class _RebuildModule(NativeModule[_RebuildConfig]): diff --git a/dimos/manipulation/planning/utils/mesh_utils.py b/dimos/manipulation/planning/utils/mesh_utils.py index 92fcfc6eca..4dfa2231d0 100644 --- a/dimos/manipulation/planning/utils/mesh_utils.py +++ b/dimos/manipulation/planning/utils/mesh_utils.py @@ -38,6 +38,7 @@ import tempfile from typing import TYPE_CHECKING +from dimos.utils.change_detect import did_change from dimos.utils.logging_config import setup_logger if TYPE_CHECKING: @@ -76,14 +77,15 @@ def prepare_urdf_for_drake( package_paths = package_paths or {} xacro_args = xacro_args or {} - # Generate cache key + # Generate cache key from configuration (not file content — did_change handles that) cache_key = _generate_cache_key(urdf_path, package_paths, xacro_args, convert_meshes) cache_path = _CACHE_DIR / cache_key / urdf_path.stem cache_path.mkdir(parents=True, exist_ok=True) cached_urdf = cache_path / f"{urdf_path.stem}.urdf" - # Check cache - if cached_urdf.exists(): + # Check cache: reuse only if the output exists AND the source file hasn't changed + source_changed = did_change(f"urdf_{cache_key}", [str(urdf_path)]) + if cached_urdf.exists() and not source_changed: logger.debug(f"Using cached URDF: {cached_urdf}") return str(cached_urdf) @@ -118,16 +120,15 @@ def _generate_cache_key( ) -> str: """Generate a cache key for the URDF configuration. - Includes a version number to invalidate cache when processing logic changes. + Encodes the configuration inputs (not file content — ``did_change`` handles + content-based invalidation separately). Includes a version number to + invalidate the cache when processing logic changes. """ - # Include file modification time - mtime = urdf_path.stat().st_mtime if urdf_path.exists() else 0 - # Version number to invalidate cache when processing logic changes # Increment this when adding new processing steps (e.g., stripping transmission blocks) - processing_version = "v2" + processing_version = "v3" - key_data = f"{processing_version}:{urdf_path}:{mtime}:{sorted(package_paths.items())}:{sorted(xacro_args.items())}:{convert_meshes}" + key_data = f"{processing_version}:{urdf_path}:{sorted(package_paths.items())}:{sorted(xacro_args.items())}:{convert_meshes}" return hashlib.md5(key_data.encode()).hexdigest()[:16] diff --git a/dimos/utils/change_detect.py b/dimos/utils/change_detect.py index 8e94bc85ee..f253fe08ca 100644 --- a/dimos/utils/change_detect.py +++ b/dimos/utils/change_detect.py @@ -17,22 +17,48 @@ Tracks whether a set of files (by path, directory, or glob pattern) have changed since the last check. Useful for skipping expensive rebuilds when source files haven't been modified. + +Path entries are type-dispatched: + +- ``str`` / ``Path`` / ``LfsPath`` — treated as **literal** file or directory + paths (no glob expansion, even if the path contains ``*``). +- ``Glob`` — expanded with :func:`glob.glob` to match filesystem patterns. """ from __future__ import annotations from collections.abc import Sequence +import fcntl import glob as glob_mod +import hashlib import os from pathlib import Path +from typing import Union import xxhash +from dimos.utils.data import LfsPath from dimos.utils.logging_config import setup_logger logger = setup_logger() +class Glob(str): + """A string that should be interpreted as a filesystem glob pattern. + + Wraps a plain ``str`` to signal that :func:`did_change` should expand it + with :func:`glob.glob` rather than treating it as a literal path. + + Example:: + + Glob("src/**/*.c") + """ + + +PathEntry = Union[str, Path, LfsPath, Glob] +"""A single entry in a change-detection path list.""" + + def _get_cache_dir() -> Path: """Return the directory used to store change-detection cache files. @@ -45,28 +71,54 @@ def _get_cache_dir() -> Path: return Path.home() / ".cache" / "dimos" / "change_detect" -def _resolve_paths(paths: Sequence[str | Path]) -> list[Path]: - """Expand globs/directories into a sorted list of individual file paths.""" +def _safe_filename(cache_name: str) -> str: + """Convert an arbitrary cache name into a safe filename. + + If the cache name is already a simple identifier it is returned as-is. + Otherwise a short SHA-256 prefix is appended so that names containing + path separators or other special characters produce unique, safe filenames. + """ + safe_chars = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-") + if all(c in safe_chars for c in cache_name) and len(cache_name) <= 200: + return cache_name + digest = hashlib.sha256(cache_name.encode()).hexdigest()[:16] + return digest + + +def _add_path(files: set[Path], p: Path) -> None: + """Add *p* (file or directory, walked recursively) to *files*.""" + if p.is_file(): + files.add(p.resolve()) + elif p.is_dir(): + for root, _dirs, filenames in os.walk(p): + for fname in filenames: + files.add(Path(root, fname).resolve()) + + +def _resolve_paths(paths: Sequence[PathEntry]) -> list[Path]: + """Resolve a mixed list of path entries into a sorted list of files. + + ``Glob`` entries are expanded via :func:`glob.glob`. All other types + (``str``, ``Path``, ``LfsPath``) are treated as literal paths — no + wildcard expansion is performed. + """ files: set[Path] = set() for entry in paths: - entry_str = str(entry) - # Try glob expansion first (handles both glob patterns and plain paths) - expanded = glob_mod.glob(entry_str, recursive=True) - if not expanded: - # Nothing matched — could be a non-existent path or empty glob - if any(c in entry_str for c in ("*", "?", "[")): - logger.warning("Glob pattern matched no files", pattern=entry_str) - else: - logger.warning("Path does not exist", path=entry_str) - continue - for match in expanded: - p = Path(match) - if p.is_file(): - files.add(p.resolve()) - elif p.is_dir(): - for root, _dirs, filenames in os.walk(p): - for fname in filenames: - files.add(Path(root, fname).resolve()) + if isinstance(entry, Glob): + expanded = glob_mod.glob(entry, recursive=True) + if not expanded: + logger.warning("Glob pattern matched no files", pattern=entry) + continue + for match in expanded: + _add_path(files, Path(match)) + else: + # str, Path, LfsPath — literal path, no glob expansion + path_str = str(entry) + p = Path(path_str) + if not p.exists(): + logger.warning("Path does not exist", path=path_str) + continue + _add_path(files, p) return sorted(files) @@ -83,19 +135,31 @@ def _hash_files(files: list[Path]) -> str: return h.hexdigest() -def did_change(cache_name: str, paths: Sequence[str | Path]) -> bool: +def did_change(cache_name: str, paths: Sequence[PathEntry]) -> bool: """Check if any files/dirs matching the given paths have changed since last check. - Args: - cache_name: Unique identifier for this cache (e.g. ``"mymodule_build_cache"``). - Different cache names track independently. - paths: List of file paths, directory paths, or glob patterns. - Directories are walked recursively. - Globs are expanded with :func:`glob.glob`. + Example:: + + # Track a single file + if did_change("my_build", ["src/main.c"]): + rebuild() + + # Track a whole directory (walked recursively) + if did_change("assets", ["/data/models"]): + reload_models() + + # Use Glob for wildcard patterns (str is always literal) + if did_change("c_sources", [Glob("src/**/*.c"), Glob("include/**/*.h")]): + recompile() - Returns: - ``True`` if any file has changed (or if no previous cache exists). - ``False`` if all files are identical to the cached state. + # Mix literal paths and globs + if did_change("config_check", ["config.yaml", Glob("templates/*.j2")]): + regenerate() + + Returns ``True`` on the first call (no previous cache), and on subsequent + calls returns ``True`` only if file contents differ from the last check. + The cache is always updated, so two consecutive calls with no changes + return ``True`` then ``False``. """ if not paths: return False @@ -104,27 +168,34 @@ def did_change(cache_name: str, paths: Sequence[str | Path]) -> bool: current_hash = _hash_files(files) cache_dir = _get_cache_dir() - cache_file = cache_dir / f"{cache_name}.hash" + cache_dir.mkdir(parents=True, exist_ok=True) + cache_file = cache_dir / f"{_safe_filename(cache_name)}.hash" + lock_file = cache_dir / f"{_safe_filename(cache_name)}.lock" changed = True - if cache_file.exists(): - previous_hash = cache_file.read_text().strip() - changed = current_hash != previous_hash - - # Always update the cache with the current hash - cache_dir.mkdir(parents=True, exist_ok=True) - cache_file.write_text(current_hash) + with open(lock_file, "w") as lf: + fcntl.flock(lf, fcntl.LOCK_EX) + try: + if cache_file.exists(): + previous_hash = cache_file.read_text().strip() + changed = current_hash != previous_hash + # Always update the cache with the current hash + cache_file.write_text(current_hash) + finally: + fcntl.flock(lf, fcntl.LOCK_UN) return changed def clear_cache(cache_name: str) -> bool: - """Remove the cached hash for the given cache name. + """Remove the cached hash so the next ``did_change`` call returns ``True``. + + Example:: - Returns: - ``True`` if the cache file existed and was removed. + clear_cache("my_build") + did_change("my_build", ["src/main.c"]) # always True after clear """ - cache_file = _get_cache_dir() / f"{cache_name}.hash" + cache_file = _get_cache_dir() / f"{_safe_filename(cache_name)}.hash" if cache_file.exists(): cache_file.unlink() return True diff --git a/dimos/utils/test_change_detect.py b/dimos/utils/test_change_detect.py index 351abe7c87..8855ac5094 100644 --- a/dimos/utils/test_change_detect.py +++ b/dimos/utils/test_change_detect.py @@ -20,7 +20,7 @@ import pytest -from dimos.utils.change_detect import clear_cache, did_change +from dimos.utils.change_detect import Glob, clear_cache, did_change @pytest.fixture(autouse=True) @@ -70,13 +70,22 @@ def test_file_deleted_returns_true(src_dir: Path) -> None: def test_glob_pattern(src_dir: Path) -> None: - pattern = str(src_dir / "*.c") + pattern = Glob(str(src_dir / "*.c")) assert did_change("glob_cache", [pattern]) is True assert did_change("glob_cache", [pattern]) is False (src_dir / "a.c").write_text("changed!") assert did_change("glob_cache", [pattern]) is True +def test_str_with_glob_chars_is_literal(tmp_path: Path) -> None: + """A plain str containing '*' must NOT be glob-expanded.""" + weird_name = tmp_path / "file[1].txt" + weird_name.write_text("content") + # str path — treated literally, should find the file + assert did_change("literal_test", [str(weird_name)]) is True + assert did_change("literal_test", [str(weird_name)]) is False + + def test_separate_cache_names_independent(src_dir: Path) -> None: paths = [str(src_dir)] did_change("cache_a", paths) diff --git a/pyproject.toml b/pyproject.toml index 1fbd29f86f..e40225816f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,7 @@ dependencies = [ "annotation-protocol>=1.4.0", "lazy_loader", "plum-dispatch==2.5.7", + "xxhash>=3.0.0", # Logging "structlog>=25.5.0,<26", "colorlog==6.9.0", diff --git a/uv.lock b/uv.lock index 0d6a3a88ab..14c23ef32e 100644 --- a/uv.lock +++ b/uv.lock @@ -1713,6 +1713,7 @@ dependencies = [ { name = "textual" }, { name = "toolz" }, { name = "typer" }, + { name = "xxhash" }, ] [package.optional-dependencies] @@ -2146,6 +2147,7 @@ requires-dist = [ { name = "xarm-python-sdk", marker = "extra == 'manipulation'", specifier = ">=1.17.0" }, { name = "xarm-python-sdk", marker = "extra == 'misc'", specifier = ">=1.17.0" }, { name = "xformers", marker = "platform_machine == 'x86_64' and extra == 'cuda'", specifier = ">=0.0.20" }, + { name = "xxhash", specifier = ">=3.0.0" }, { name = "yapf", marker = "extra == 'misc'", specifier = "==0.40.2" }, ] provides-extras = ["misc", "visualization", "agents", "web", "perception", "unitree", "manipulation", "cpu", "cuda", "dev", "psql", "sim", "drone", "dds", "docker", "base"] From 18c56ea27877872c933edb5205ff698695ee8c35 Mon Sep 17 00:00:00 2001 From: jeff-hykin <17692058+jeff-hykin@users.noreply.github.com> Date: Tue, 17 Mar 2026 22:57:16 +0000 Subject: [PATCH 04/11] CI code cleanup --- dimos/utils/change_detect.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dimos/utils/change_detect.py b/dimos/utils/change_detect.py index 7522c73098..54d509fc14 100644 --- a/dimos/utils/change_detect.py +++ b/dimos/utils/change_detect.py @@ -45,9 +45,7 @@ def _get_cache_dir() -> Path: return Path.home() / ".cache" / "dimos" / "change_detect" -def _resolve_paths( - paths: Sequence[str | Path], cwd: str | Path | None = None -) -> list[Path]: +def _resolve_paths(paths: Sequence[str | Path], cwd: str | Path | None = None) -> list[Path]: """Expand globs/directories into a sorted list of individual file paths.""" files: set[Path] = set() for entry in paths: From e01688c49db85aa4de0915f52417b395f891e73d Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Wed, 18 Mar 2026 17:44:26 -0700 Subject: [PATCH 05/11] fixup pathing --- dimos/core/native_module.py | 10 ++++- dimos/utils/change_detect.py | 66 +++++++++++++++++++++++++------ dimos/utils/test_change_detect.py | 18 +++++++-- 3 files changed, 78 insertions(+), 16 deletions(-) diff --git a/dimos/core/native_module.py b/dimos/core/native_module.py index 8531ecff82..6df96e1f82 100644 --- a/dimos/core/native_module.py +++ b/dimos/core/native_module.py @@ -259,13 +259,19 @@ def _maybe_build(self) -> None: if exe.exists() and not needs_rebuild: return + if self.config.build_command is None: raise FileNotFoundError( f"Executable not found: {exe}. " "Set build_command in config to auto-build, or build it manually." ) + + # Don't unlink the exe before rebuilding — the build command is + # responsible for replacing it. For nix builds the exe lives inside + # a read-only store; `nix build -o` atomically swaps the output + # symlink without touching store contents. logger.info( - "Executable not found, running build", + "Rebuilding" if needs_rebuild else "Executable not found, building", executable=str(exe), build_command=self.config.build_command, ) @@ -296,7 +302,7 @@ def _maybe_build(self) -> None: # Update the change cache so next check is clean if self.config.rebuild_on_change: cache_name = f"native_{type(self).__name__}_build" - did_change(cache_name, self.config.rebuild_on_change) + did_change(cache_name, self.config.rebuild_on_change, cwd=self.config.cwd) def _collect_topics(self) -> dict[str, str]: """Extract LCM topic strings from blueprint-assigned stream transports.""" diff --git a/dimos/utils/change_detect.py b/dimos/utils/change_detect.py index 54d509fc14..691aba2f89 100644 --- a/dimos/utils/change_detect.py +++ b/dimos/utils/change_detect.py @@ -46,12 +46,23 @@ def _get_cache_dir() -> Path: def _resolve_paths(paths: Sequence[str | Path], cwd: str | Path | None = None) -> list[Path]: - """Expand globs/directories into a sorted list of individual file paths.""" + """Expand globs/directories into a sorted list of individual file paths. + + When *cwd* is provided, relative paths and glob patterns are resolved + against it. When *cwd* is ``None``, every entry must be absolute (or an + absolute glob); a relative path will raise :class:`ValueError` so that + callers don't silently resolve against an unpredictable process CWD. + """ files: set[Path] = set() for entry in paths: entry_str = str(entry) - # Resolve relative paths against cwd when provided - if cwd is not None and not Path(entry_str).is_absolute(): + is_relative = not Path(entry_str).is_absolute() + if is_relative: + if cwd is None: + raise ValueError( + f"Relative path {entry_str!r} passed to change detection without a cwd. " + "Either provide an absolute path or pass cwd= so relatives can be resolved." + ) entry_str = str(Path(cwd) / entry_str) # Try glob expansion first (handles both glob patterns and plain paths) expanded = glob_mod.glob(entry_str, recursive=True) @@ -91,24 +102,57 @@ def did_change( paths: Sequence[str | Path], cwd: str | Path | None = None, ) -> bool: - """Check if any files/dirs matching the given paths have changed since last check. + """Check if any files/dirs matching *paths* have changed since last check. + + Examples:: + + # Absolute paths — no cwd needed + did_change("my_build", ["/src/main.cpp", "/src/utils/*.hpp"]) + + # Relative paths — must pass cwd + did_change("my_build", ["src/main.cpp", "common/*.hpp"], cwd="/home/user/project") + + # Track a whole directory (walked recursively) + did_change("assets", ["/data/models/"], cwd="/project") + + # Second call with no file changes → False + did_change("my_build", ["/src/main.cpp"]) # True (first call, no cache) + did_change("my_build", ["/src/main.cpp"]) # False (nothing changed) + + # After editing a file → True again + Path("/src/main.cpp").write_text("// changed") + did_change("my_build", ["/src/main.cpp"]) # True + + # Relative path without cwd → ValueError + did_change("bad", ["src/main.cpp"]) # raises ValueError Args: - cache_name: Unique identifier for this cache (e.g. ``"mymodule_build_cache"``). - Different cache names track independently. - paths: List of file paths, directory paths, or glob patterns. + cache_name: Unique identifier for this cache. Different names track independently. + paths: File paths, directory paths, or glob patterns. Directories are walked recursively. - Globs are expanded with :func:`glob.glob`. - cwd: Optional working directory for resolving relative paths. + Relative paths require *cwd*; without it a ``ValueError`` is raised. + cwd: Working directory for resolving relative paths. Returns: - ``True`` if any file has changed (or if no previous cache exists). - ``False`` if all files are identical to the cached state. + ``True`` if any file has changed (or first call with no prior cache). + ``False`` if all files match the cached state, or if no files were found. """ if not paths: return False files = _resolve_paths(paths, cwd=cwd) + + # If none of the monitored paths resolve to actual files (e.g. source + # files don't exist on this branch or checkout), don't claim anything + # changed — deleting a working binary because we can't find the sources + # to compare against is destructive. + if not files: + logger.warning( + "No source files found for change detection, skipping rebuild check", + cache_name=cache_name, + ) + return False + current_hash = _hash_files(files) cache_dir = _get_cache_dir() diff --git a/dimos/utils/test_change_detect.py b/dimos/utils/test_change_detect.py index 351abe7c87..31e47ea8be 100644 --- a/dimos/utils/test_change_detect.py +++ b/dimos/utils/test_change_detect.py @@ -108,7 +108,19 @@ def test_empty_paths_returns_false() -> None: def test_nonexistent_path_warns(caplog: pytest.LogCaptureFixture) -> None: - """A non-existent path logs a warning and doesn't crash.""" + """A non-existent absolute path logs a warning and doesn't crash.""" result = did_change("missing_test", ["/nonexistent/path/to/file.c"]) - # First call with no resolvable files still returns True (no cache) - assert isinstance(result, bool) + # No resolvable files → returns False (skip rebuild) + assert result is False + + +def test_relative_path_without_cwd_raises() -> None: + """Relative paths without cwd= should raise ValueError.""" + with pytest.raises(ValueError, match="Relative path.*without a cwd"): + did_change("rel_test", ["some/relative/path.c"]) + + +def test_relative_path_with_cwd(src_dir: Path) -> None: + """Relative paths should resolve against the provided cwd.""" + assert did_change("cwd_test", ["src/a.c"], cwd=src_dir.parent) is True + assert did_change("cwd_test", ["src/a.c"], cwd=src_dir.parent) is False From cb5041b612d1436155f02b98b1f2b5d42da96365 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Thu, 19 Mar 2026 15:26:24 -0700 Subject: [PATCH 06/11] - --- dimos/core/native_module.py | 8 +-- dimos/utils/change_detect.py | 127 ++++++++++++++++++++++------------- 2 files changed, 83 insertions(+), 52 deletions(-) diff --git a/dimos/core/native_module.py b/dimos/core/native_module.py index 35f54957bb..ce9dd986d8 100644 --- a/dimos/core/native_module.py +++ b/dimos/core/native_module.py @@ -257,8 +257,9 @@ def _maybe_build(self) -> None: # Check if rebuild needed due to source changes needs_rebuild = False if self.config.rebuild_on_change and exe.exists(): - cache_name = f"native_{type(self).__name__}_build" - if did_change(cache_name, self.config.rebuild_on_change, cwd=self.config.cwd): + if did_change( + self._build_cache_name(), self.config.rebuild_on_change, cwd=self.config.cwd + ): logger.info("Source files changed, triggering rebuild", executable=str(exe)) needs_rebuild = True @@ -307,8 +308,7 @@ def _maybe_build(self) -> None: # Seed the cache after a successful build so the next check has a baseline # (needed for the initial build when the pre-build change check was skipped) if self.config.rebuild_on_change: - cache_name = f"native_{type(self).__name__}_build" - did_change(cache_name, self.config.rebuild_on_change, cwd=self.config.cwd) + did_change(self._build_cache_name(), self.config.rebuild_on_change, cwd=self.config.cwd) def _collect_topics(self) -> dict[str, str]: """Extract LCM topic strings from blueprint-assigned stream transports.""" diff --git a/dimos/utils/change_detect.py b/dimos/utils/change_detect.py index deebc285fd..7e37407222 100644 --- a/dimos/utils/change_detect.py +++ b/dimos/utils/change_detect.py @@ -30,6 +30,7 @@ from collections.abc import Sequence import fcntl import glob as glob_mod +import hashlib import os from pathlib import Path from typing import Union @@ -70,42 +71,72 @@ def _get_cache_dir() -> Path: return Path.home() / ".cache" / "dimos" / "change_detect" -def _resolve_paths(paths: Sequence[str | Path], cwd: str | Path | None = None) -> list[Path]: - """Expand globs/directories into a sorted list of individual file paths. +def _safe_filename(cache_name: str) -> str: + """Convert an arbitrary cache name into a safe filename. - When *cwd* is provided, relative paths and glob patterns are resolved - against it. When *cwd* is ``None``, every entry must be absolute (or an - absolute glob); a relative path will raise :class:`ValueError` so that - callers don't silently resolve against an unpredictable process CWD. + If the cache name is already a simple identifier it is returned as-is. + Otherwise a short SHA-256 prefix is appended so that names containing + path separators or other special characters produce unique, safe filenames. + """ + safe_chars = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-") + if all(c in safe_chars for c in cache_name) and len(cache_name) <= 200: + return cache_name + digest = hashlib.sha256(cache_name.encode()).hexdigest()[:16] + return digest + + +def _add_path(files: set[Path], p: Path) -> None: + """Add *p* (file or directory, walked recursively) to *files*.""" + if p.is_file(): + files.add(p.resolve()) + elif p.is_dir(): + for root, _dirs, filenames in os.walk(p): + for fname in filenames: + files.add(Path(root, fname).resolve()) + + +def _resolve_paths(paths: Sequence[PathEntry], cwd: str | Path | None = None) -> list[Path]: + """Resolve a mixed list of path entries into a sorted list of files. + + ``Glob`` entries are expanded via :func:`glob.glob`. All other types + (``str``, ``Path``, ``LfsPath``) are treated as literal paths — no + wildcard expansion is performed. + + When *cwd* is provided, relative paths are resolved against it. + When *cwd* is ``None``, relative paths raise :class:`ValueError`. """ files: set[Path] = set() for entry in paths: - entry_str = str(entry) - is_relative = not Path(entry_str).is_absolute() - if is_relative: - if cwd is None: - raise ValueError( - f"Relative path {entry_str!r} passed to change detection without a cwd. " - "Either provide an absolute path or pass cwd= so relatives can be resolved." - ) - entry_str = str(Path(cwd) / entry_str) - # Try glob expansion first (handles both glob patterns and plain paths) - expanded = glob_mod.glob(entry_str, recursive=True) - if not expanded: - # Nothing matched — could be a non-existent path or empty glob - if any(c in entry_str for c in ("*", "?", "[")): - logger.warning("Glob pattern matched no files", pattern=entry_str) - else: - logger.warning("Path does not exist", path=entry_str) - continue - for match in expanded: - p = Path(match) - if p.is_file(): - files.add(p.resolve()) - elif p.is_dir(): - for root, _dirs, filenames in os.walk(p): - for fname in filenames: - files.add(Path(root, fname).resolve()) + if isinstance(entry, Glob): + pattern = str(entry) + if not Path(pattern).is_absolute(): + if cwd is None: + raise ValueError( + f"Relative path {pattern!r} passed to change detection without a cwd. " + "Either provide an absolute path or pass cwd= so relatives can be resolved." + ) + pattern = str(Path(cwd) / pattern) + expanded = glob_mod.glob(pattern, recursive=True) + if not expanded: + logger.warning("Glob pattern matched no files", pattern=pattern) + continue + for match in expanded: + _add_path(files, Path(match)) + else: + # str, Path, LfsPath — literal path, no glob expansion + path_str = str(entry) + if not Path(path_str).is_absolute(): + if cwd is None: + raise ValueError( + f"Relative path {path_str!r} passed to change detection without a cwd. " + "Either provide an absolute path or pass cwd= so relatives can be resolved." + ) + path_str = str(Path(cwd) / path_str) + p = Path(path_str) + if not p.exists(): + logger.warning("Path does not exist", path=path_str) + continue + _add_path(files, p) return sorted(files) @@ -124,21 +155,27 @@ def _hash_files(files: list[Path]) -> str: def did_change( cache_name: str, - paths: Sequence[str | Path], + paths: Sequence[PathEntry], cwd: str | Path | None = None, ) -> bool: - """Check if any files/dirs matching *paths* have changed since last check. + """Check if any files/dirs matching the given paths have changed since last check. Examples:: # Absolute paths — no cwd needed - did_change("my_build", ["/src/main.cpp", "/src/utils/*.hpp"]) + did_change("my_build", ["/src/main.cpp"]) + + # Use Glob for wildcard patterns (str is always literal) + did_change("c_sources", [Glob("/src/**/*.c"), Glob("/include/**/*.h")]) # Relative paths — must pass cwd - did_change("my_build", ["src/main.cpp", "common/*.hpp"], cwd="/home/user/project") + did_change("my_build", ["src/main.cpp"], cwd="/home/user/project") + + # Mix literal paths and globs + did_change("config_check", ["config.yaml", Glob("templates/*.j2")], cwd="/project") # Track a whole directory (walked recursively) - did_change("assets", ["/data/models/"], cwd="/project") + did_change("assets", ["/data/models/"]) # Second call with no file changes → False did_change("my_build", ["/src/main.cpp"]) # True (first call, no cache) @@ -151,16 +188,10 @@ def did_change( # Relative path without cwd → ValueError did_change("bad", ["src/main.cpp"]) # raises ValueError - Args: - cache_name: Unique identifier for this cache. Different names track independently. - paths: File paths, directory paths, or glob patterns. - Directories are walked recursively. - Relative paths require *cwd*; without it a ``ValueError`` is raised. - cwd: Working directory for resolving relative paths. - - Returns: - ``True`` if any file has changed (or first call with no prior cache). - ``False`` if all files match the cached state, or if no files were found. + Returns ``True`` on the first call (no previous cache), and on subsequent + calls returns ``True`` only if file contents differ from the last check. + The cache is always updated, so two consecutive calls with no changes + return ``True`` then ``False``. """ if not paths: return False @@ -206,7 +237,7 @@ def clear_cache(cache_name: str) -> bool: Example:: clear_cache("my_build") - did_change("my_build", ["src/main.c"]) # always True after clear + did_change("my_build", ["/src/main.c"]) # always True after clear """ cache_file = _get_cache_dir() / f"{_safe_filename(cache_name)}.hash" if cache_file.exists(): From ee8e9367a19d22f6c08d5c31a0fd4ab95ebd4a51 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Sat, 21 Mar 2026 21:59:03 -0700 Subject: [PATCH 07/11] chore: regenerate uv.lock after merge with dev --- uv.lock | 2 ++ 1 file changed, 2 insertions(+) diff --git a/uv.lock b/uv.lock index 529842294b..449cc9e460 100644 --- a/uv.lock +++ b/uv.lock @@ -1714,6 +1714,7 @@ dependencies = [ { name = "toolz" }, { name = "typer" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, + { name = "xxhash" }, ] [package.optional-dependencies] @@ -2150,6 +2151,7 @@ requires-dist = [ { name = "xarm-python-sdk", marker = "extra == 'manipulation'", specifier = ">=1.17.0" }, { name = "xarm-python-sdk", marker = "extra == 'misc'", specifier = ">=1.17.0" }, { name = "xformers", marker = "platform_machine == 'x86_64' and extra == 'cuda'", specifier = ">=0.0.20" }, + { name = "xxhash", specifier = ">=3.0.0" }, { name = "yapf", marker = "extra == 'misc'", specifier = "==0.40.2" }, ] provides-extras = ["misc", "visualization", "agents", "web", "perception", "unitree", "manipulation", "cpu", "cuda", "dev", "psql", "sim", "drone", "dds", "docker", "base"] From eb9a913c8df6233c7b9cbe976375ecbc5492c0c1 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Sat, 21 Mar 2026 23:09:14 -0700 Subject: [PATCH 08/11] fix(change_detect): add threading.Lock alongside fcntl.flock fcntl.flock is per-file-description, not per-thread. Two threads in the same process can both hold LOCK_EX simultaneously. Add a per-cache-name threading.Lock to protect intra-process concurrent access. Revert: git revert HEAD --- dimos/utils/change_detect.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/dimos/utils/change_detect.py b/dimos/utils/change_detect.py index 7e37407222..7cdd46eb72 100644 --- a/dimos/utils/change_detect.py +++ b/dimos/utils/change_detect.py @@ -29,6 +29,7 @@ from collections.abc import Sequence import fcntl +import threading import glob as glob_mod import hashlib import os @@ -153,6 +154,18 @@ def _hash_files(files: list[Path]) -> str: return h.hexdigest() +# Thread-level locks keyed by cache_name (flock only protects cross-process). +_thread_locks: dict[str, threading.Lock] = {} +_thread_locks_guard = threading.Lock() + + +def _get_thread_lock(cache_name: str) -> threading.Lock: + with _thread_locks_guard: + if cache_name not in _thread_locks: + _thread_locks[cache_name] = threading.Lock() + return _thread_locks[cache_name] + + def did_change( cache_name: str, paths: Sequence[PathEntry], @@ -217,7 +230,8 @@ def did_change( lock_file = cache_dir / f"{_safe_filename(cache_name)}.lock" changed = True - with open(lock_file, "w") as lf: + thread_lock = _get_thread_lock(cache_name) + with thread_lock, open(lock_file, "w") as lf: fcntl.flock(lf, fcntl.LOCK_EX) try: if cache_file.exists(): From 4ed36cc127add84ad3c04b247674e6459f5adc00 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Sat, 21 Mar 2026 23:09:39 -0700 Subject: [PATCH 09/11] fix(native): include class qualname in build cache key Two NativeModule subclasses in the same file would share a cache key, corrupting each other's rebuild state. Add qualname to disambiguate. Revert: git revert HEAD --- dimos/core/native_module.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dimos/core/native_module.py b/dimos/core/native_module.py index ce9dd986d8..5aa226f374 100644 --- a/dimos/core/native_module.py +++ b/dimos/core/native_module.py @@ -248,7 +248,7 @@ def _resolve_paths(self) -> None: def _build_cache_name(self) -> str: """Return a stable, unique cache name for this module's build state.""" source_file = Path(inspect.getfile(type(self))).resolve() - return f"native_{source_file}" + return f"native_{source_file}:{type(self).__qualname__}" def _maybe_build(self) -> None: """Run ``build_command`` if the executable does not exist or sources changed.""" From 5b7a68e434636485e5f4686b120da0f925b359a6 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Mon, 23 Mar 2026 10:37:13 -0700 Subject: [PATCH 10/11] fix(native): prevent thread leaks in crash test Move super().stop() before self._process = None so the asyncio loop thread is joined before tests see the exit signal. Wrap crash test in try/finally with mod.stop(). --- dimos/agents_deprecated/memory/image_embedding.py | 2 +- dimos/core/native_module.py | 5 ++++- dimos/core/test_native_module.py | 15 +++++++++------ dimos/simulation/mujoco/policy.py | 2 +- dimos/utils/change_detect.py | 2 +- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/dimos/agents_deprecated/memory/image_embedding.py b/dimos/agents_deprecated/memory/image_embedding.py index 27e16f1aa8..d6b0967642 100644 --- a/dimos/agents_deprecated/memory/image_embedding.py +++ b/dimos/agents_deprecated/memory/image_embedding.py @@ -63,7 +63,7 @@ def __init__(self, model_name: str = "clip", dimensions: int = 512) -> None: def _initialize_model(self): # type: ignore[no-untyped-def] """Initialize the specified embedding model.""" try: - import onnxruntime as ort # type: ignore[import-untyped] + import onnxruntime as ort # type: ignore[import-untyped,import-not-found] import torch # noqa: F401 from transformers import ( # type: ignore[import-untyped] AutoFeatureExtractor, diff --git a/dimos/core/native_module.py b/dimos/core/native_module.py index 5aa226f374..1d9698b19d 100644 --- a/dimos/core/native_module.py +++ b/dimos/core/native_module.py @@ -188,8 +188,11 @@ def stop(self) -> None: if self._watchdog is not None and self._watchdog is not threading.current_thread(): self._watchdog.join(timeout=2) self._watchdog = None - self._process = None + # Clean up the asyncio loop thread (from ModuleBase) BEFORE + # clearing _process — tests use _process=None as their exit + # signal, and the loop thread must be joined first. super().stop() + self._process = None def _watch_process(self) -> None: """Block until the native process exits; trigger stop() if it crashed.""" diff --git a/dimos/core/test_native_module.py b/dimos/core/test_native_module.py index e77b8f9a53..fa973550e7 100644 --- a/dimos/core/test_native_module.py +++ b/dimos/core/test_native_module.py @@ -99,13 +99,16 @@ def test_process_crash_triggers_stop() -> None: assert mod._process is not None pid = mod._process.pid - # Wait for the process to die and the watchdog to call stop() - for _ in range(30): - time.sleep(0.1) - if mod._process is None: - break + try: + # Wait for the process to die and the watchdog to call stop() + for _ in range(30): + time.sleep(0.1) + if mod._process is None: + break - assert mod._process is None, f"Watchdog did not clean up after process {pid} died" + assert mod._process is None, f"Watchdog did not clean up after process {pid} died" + finally: + mod.stop() @pytest.mark.slow diff --git a/dimos/simulation/mujoco/policy.py b/dimos/simulation/mujoco/policy.py index 0a792baf1a..e55bc1a1e1 100644 --- a/dimos/simulation/mujoco/policy.py +++ b/dimos/simulation/mujoco/policy.py @@ -20,7 +20,7 @@ import mujoco import numpy as np -import onnxruntime as ort # type: ignore[import-untyped] +import onnxruntime as ort # type: ignore[import-untyped,import-not-found] from dimos.simulation.mujoco.input_controller import InputController from dimos.utils.logging_config import setup_logger diff --git a/dimos/utils/change_detect.py b/dimos/utils/change_detect.py index 7cdd46eb72..f2c59c053f 100644 --- a/dimos/utils/change_detect.py +++ b/dimos/utils/change_detect.py @@ -29,11 +29,11 @@ from collections.abc import Sequence import fcntl -import threading import glob as glob_mod import hashlib import os from pathlib import Path +import threading from typing import Union import xxhash From 68dd4bc50d121331e0af51ff09fa7edf31820810 Mon Sep 17 00:00:00 2001 From: Jeff Hykin Date: Mon, 23 Mar 2026 10:45:20 -0700 Subject: [PATCH 11/11] fix(test): isolate LCM multicast in flaky tests Use dedicated multicast addresses to prevent cross-test contamination in test_pattern_sub.py and test_lcmpubsub.py. --- dimos/protocol/pubsub/impl/test_lcmpubsub.py | 10 +++++++--- dimos/protocol/pubsub/test_pattern_sub.py | 12 ++++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/dimos/protocol/pubsub/impl/test_lcmpubsub.py b/dimos/protocol/pubsub/impl/test_lcmpubsub.py index c53bc32da2..9a08b48ae5 100644 --- a/dimos/protocol/pubsub/impl/test_lcmpubsub.py +++ b/dimos/protocol/pubsub/impl/test_lcmpubsub.py @@ -28,10 +28,14 @@ ) from dimos.utils.testing.collector import CallbackCollector +# Isolated multicast group so stale messages from other tests +# (which use the default 239.255.76.67:7667) don't leak in. +_ISOLATED_LCM_URL = "udpm://239.255.76.98:7698?ttl=0" + @pytest.fixture def lcm_pub_sub_base() -> Generator[LCMPubSubBase, None, None]: - lcm = LCMPubSubBase() + lcm = LCMPubSubBase(url=_ISOLATED_LCM_URL) lcm.start() yield lcm lcm.stop() @@ -39,7 +43,7 @@ def lcm_pub_sub_base() -> Generator[LCMPubSubBase, None, None]: @pytest.fixture def pickle_lcm() -> Generator[PickleLCM, None, None]: - lcm = PickleLCM() + lcm = PickleLCM(url=_ISOLATED_LCM_URL) lcm.start() yield lcm lcm.stop() @@ -47,7 +51,7 @@ def pickle_lcm() -> Generator[PickleLCM, None, None]: @pytest.fixture def lcm() -> Generator[LCM, None, None]: - lcm = LCM() + lcm = LCM(url=_ISOLATED_LCM_URL) lcm.start() yield lcm lcm.stop() diff --git a/dimos/protocol/pubsub/test_pattern_sub.py b/dimos/protocol/pubsub/test_pattern_sub.py index 4b888f4bba..8427ea34f3 100644 --- a/dimos/protocol/pubsub/test_pattern_sub.py +++ b/dimos/protocol/pubsub/test_pattern_sub.py @@ -52,10 +52,14 @@ class Case(Generic[TopicT, MsgT]): regex_patterns: list[tuple[TopicT, set[int]]] = field(default_factory=list) +# Use an isolated multicast group to avoid cross-test LCM contamination. +_ISOLATED_LCM_URL = "udpm://239.255.76.99:7699?ttl=0" + + @contextmanager def lcm_typed_context() -> Generator[tuple[LCM, LCM], None, None]: - pub = LCM() - sub = LCM() + pub = LCM(url=_ISOLATED_LCM_URL) + sub = LCM(url=_ISOLATED_LCM_URL) pub.start() sub.start() try: @@ -67,8 +71,8 @@ def lcm_typed_context() -> Generator[tuple[LCM, LCM], None, None]: @contextmanager def lcm_bytes_context() -> Generator[tuple[LCMPubSubBase, LCMPubSubBase], None, None]: - pub = LCMPubSubBase() - sub = LCMPubSubBase() + pub = LCMPubSubBase(url=_ISOLATED_LCM_URL) + sub = LCMPubSubBase(url=_ISOLATED_LCM_URL) pub.start() sub.start() try: