From 8c5f1579fcdea30d50c5c48e9e2532ecdac6e768 Mon Sep 17 00:00:00 2001 From: "circleci-app[bot]" <127350680+circleci-app[bot]@users.noreply.github.com> Date: Fri, 12 Jun 2026 09:36:20 +0000 Subject: [PATCH] Fix Windows path sanitization in publication_output_dir_fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Linux, Path(r"C:\Users\bob\...") does not parse Windows backslash paths correctly — the entire string becomes a single path component, so .name returns the full path. Replace("\\", "/") then produced "C:/Users/bob/..." which leaked the username into output_dir_relative. Mirror the pattern already used in sanitize_path_for_publication and publication_audio_path_fields: detect Windows-style paths and use PureWindowsPath so .name correctly extracts only the last component. AI-Generated: true --- metadata_sanitizer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/metadata_sanitizer.py b/metadata_sanitizer.py index 5a22e88..7db34ce 100644 --- a/metadata_sanitizer.py +++ b/metadata_sanitizer.py @@ -683,10 +683,12 @@ def publication_audio_path_fields(path: Union[str, Path], *, dataset_root: Optio def publication_output_dir_fields(path: Union[str, Path], *, dataset_root: Optional[Path] = None) -> dict[str, Any]: """Publication-safe output directory (relative to corpus root when possible).""" - p = Path(str(path)) + path_text = str(path) + is_windows_style = bool(_WIN_ABS_START_EXPORT.match(path_text)) or "\\" in path_text + p = PureWindowsPath(path_text) if is_windows_style else Path(path_text) if dataset_root is not None: try: - rel = Path(p).resolve().relative_to(Path(dataset_root).resolve()).as_posix() + rel = Path(path_text).resolve().relative_to(Path(dataset_root).resolve()).as_posix() except Exception: rel = p.name or "output" else: