From d47090ac4d5bf634fae25fdab460c13ed6ff0e2a Mon Sep 17 00:00:00 2001 From: ndjama Date: Mon, 8 Jun 2026 19:53:38 +0200 Subject: [PATCH] fix(federation): skip federated subrepos on Windows (path normalization) is_under_any resolved the subrepo roots but left an already-absolute candidate path untouched, and compared with pathlib's case-sensitive relative_to. On Windows the filesystem is case-insensitive and resolve() can change casing or 8.3 short-names, so every federated subrepo failed to match and none of its files were skipped: cgh init counted, and cgh index scanned, the whole tree including federated subdirs. Now both sides are resolved and os.path.normcase'd, with a separator-boundary prefix check (so /foo/services-bar is not treated as under /foo/services). This fixes the file census and the actual parent scan, which both route through is_under_any. POSIX behavior is unchanged (normcase is identity). Tests: nested-subrepo match (the landing-zone edf-sa/services-* layout), sibling-prefix boundary safety, root-itself, on top of the existing cases. --- codegraph/analysis/federation.py | 30 ++++++++++++++++++++++------ tests/test_server/test_federation.py | 20 +++++++++++++++++++ 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/codegraph/analysis/federation.py b/codegraph/analysis/federation.py index 697a3fc..496a093 100644 --- a/codegraph/analysis/federation.py +++ b/codegraph/analysis/federation.py @@ -12,6 +12,7 @@ from __future__ import annotations +import os import sqlite3 from collections.abc import Callable, Iterator from contextlib import contextmanager @@ -108,16 +109,33 @@ def child_paths_to_skip(repo_root: str | Path) -> list[Path]: def is_under_any(path: str | Path, roots: list[Path]) -> bool: - """Return True if `path` lives under any of `roots` (inclusive).""" + """Return True if `path` lives under any of `roots` (inclusive). + + Both sides are resolved and case-normalized before comparison. This matters + on Windows: the filesystem is case-insensitive and `resolve()` may change + casing or 8.3 short-names, while a candidate built as `root / "a/b.tf"` from + a git ls-files path mixes separators. The previous version only resolved the + roots (via resolve_children) and left an already-absolute candidate + untouched, so on Windows the case/short-name mismatch made every federated + subrepo fail to match, and none of their files were skipped from the parent + scan. pathlib's relative_to is case-sensitive, so we compare normcase'd + strings on a path-separator boundary instead. + """ if not roots: return False - p = Path(path).resolve() if not Path(path).is_absolute() else Path(path) - for root in roots: + + def _norm(pp: Path) -> str: try: - p.relative_to(root) + pp = pp.resolve() + except OSError: + pp = pp.absolute() + return os.path.normcase(str(pp)) + + p_norm = _norm(Path(path)) + for root in roots: + r_norm = _norm(Path(root)) + if p_norm == r_norm or p_norm.startswith(r_norm + os.sep): return True - except ValueError: - continue return False diff --git a/tests/test_server/test_federation.py b/tests/test_server/test_federation.py index 7535dfa..f8e606b 100644 --- a/tests/test_server/test_federation.py +++ b/tests/test_server/test_federation.py @@ -92,6 +92,26 @@ def test_miss(self, tmp_path): def test_empty_roots(self, tmp_path): assert not is_under_any(tmp_path / "x", []) + def test_nested_subrepo_match(self, tmp_path): + # Mirrors the landing-zone layout: subrepos nested under a plain dir. + # A git-ls-files style candidate (root / "edf-sa/services-backup/main.tf") + # must match the resolved subrepo root, the case the parent scan skips. + root = tmp_path / "edf-sa" / "services-backup" + root.mkdir(parents=True) + candidate = tmp_path / "edf-sa/services-backup/main.tf" + assert is_under_any(candidate, [root]) + + def test_sibling_prefix_is_not_under(self, tmp_path): + # Boundary safety: /foo/services-bar must not count as under /foo/services + roots = [tmp_path / "services"] + roots[0].mkdir() + assert not is_under_any(tmp_path / "services-bar" / "f.tf", roots) + + def test_root_itself_is_under(self, tmp_path): + root = tmp_path / "sub" + root.mkdir() + assert is_under_any(root, [root]) + class TestVerifyChild: def test_missing_path(self, tmp_path):