From d6dd97b6f8a092867e15c4051f8138d2329e4d69 Mon Sep 17 00:00:00 2001 From: gaoguobin <31329849+gaoguobin@users.noreply.github.com> Date: Wed, 20 May 2026 16:41:27 +0800 Subject: [PATCH 1/5] Add size-aware backup reporting and runtime cache exclusions --- src/agent_environment_backup/core.py | 46 ++++++++++++++++++++++++++++ tests/test_core.py | 34 ++++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/src/agent_environment_backup/core.py b/src/agent_environment_backup/core.py index 2803cb2..a9ad425 100644 --- a/src/agent_environment_backup/core.py +++ b/src/agent_environment_backup/core.py @@ -35,6 +35,7 @@ ".venv", "tmp", } +SIZE_REPORT_LIMIT = 10 LIVE_SQLITE_SUFFIXES = ( ".sqlite-wal", @@ -444,6 +445,7 @@ def inspect_claude_code_config(home: Path) -> dict[str, Any]: config_inspector=inspect_codex_config, commands=(("codex", "--version"), ("codex", "mcp", "list")), integration_module="codex_fast_proxy", + extra_excluded_dirs=("cache", "packages", "standalone", "node_modules"), ) CLAUDE_CODE_PROFILE = EnvironmentProfile( @@ -643,6 +645,16 @@ def onerror(exc: OSError) -> None: source = root_path / file_name relative = source.relative_to(home) if is_excluded(relative, extra_excluded_dirs): + if skipped is not None: + entry = { + "relative_path": normalize_relative(relative), + "reason": "excluded", + } + try: + entry["bytes"] = source.stat().st_size + except OSError: + pass + skipped.append(entry) continue try: skip_reason = regular_file_skip_reason(source) @@ -1421,6 +1433,31 @@ def create_backup( } ) + total_backup_bytes = sum(entry["bytes"] for entry in entries) + top_files_by_size = sorted( + ( + { + "relative_path": entry["relative_path"], + "bytes": entry["bytes"], + } + for entry in entries + ), + key=lambda item: item["bytes"], + reverse=True, + )[:SIZE_REPORT_LIMIT] + directory_sizes: dict[str, int] = {} + for entry in entries: + rel_path = Path(entry["relative_path"]) + for parent in rel_path.parents: + key = "." if str(parent) == "." else normalize_relative(parent) + directory_sizes[key] = directory_sizes.get(key, 0) + entry["bytes"] + top_directories_by_size = [ + {"relative_path": path, "bytes": size} + for path, size in sorted(directory_sizes.items(), key=lambda item: item[1], reverse=True)[:SIZE_REPORT_LIMIT] + ] + excluded_skipped = [item for item in skipped if item.get("reason") == "excluded"] + excluded_bytes = sum(int(item.get("bytes", 0)) for item in excluded_skipped) + sensitive_note = _make_sensitive_note(profile.display_name) doctor_report = doctor_environment(home, profile=profile, run_commands=run_doctor_commands) manifest = { @@ -1441,10 +1478,17 @@ def create_backup( "skipped": skipped, "counts": { "files": len(entries), + "bytes": total_backup_bytes, "sqlite_databases": sum(1 for entry in entries if entry["method"] == "sqlite_backup"), "errors": len(errors), "skipped": len(skipped), }, + "size_report": { + "top_files_by_size": top_files_by_size, + "top_directories_by_size": top_directories_by_size, + "excluded_entries": len(excluded_skipped), + "excluded_bytes": excluded_bytes, + }, } write_json(backup_dir / "manifest.json", manifest) write_json(backup_dir / "sqlite-integrity-check.json", sqlite_checks) @@ -1457,8 +1501,10 @@ def create_backup( f"Backup: {backup_dir}", f"{profile.display_name} home: {home}", f"Files: {len(entries)}", + f"Expanded size (files only): {total_backup_bytes} bytes", f"SQLite databases: {manifest['counts']['sqlite_databases']}", f"Errors: {len(errors)}", + f"Excluded entries: {len(excluded_skipped)} ({excluded_bytes} bytes)", f"Integrity: {'ok' if all(check.get('ok') for check in sqlite_checks) else 'failed'}", "Restore kit: RESTORE.md, RESTORE_INSTRUCTIONS.txt, restore-environment.cmd, restore-environment.ps1, restore-environment.command, restore-environment.sh", "", diff --git a/tests/test_core.py b/tests/test_core.py index f147d5c..9cb0206 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -228,6 +228,40 @@ def fake_walk(top, *args, **kwargs): self.assertEqual(listing["backups"][0]["status"], "failed") self.assertEqual(listing["backups"][0]["errors"], 1) + def test_backup_adds_size_report_and_excludes_runtime_cache_paths(self) -> None: + with self.temp_root() as temp_dir: + root = Path(temp_dir) + home = self.make_home(root) + (home / "cache").mkdir() + (home / "packages").mkdir() + (home / "standalone").mkdir() + (home / "node_modules").mkdir() + (home / "cache" / "cache.bin").write_bytes(b"x" * 4096) + (home / "packages" / "pkg.bin").write_bytes(b"y" * 2048) + (home / "standalone" / "rt.bin").write_bytes(b"z" * 1024) + (home / "node_modules" / "mod.bin").write_bytes(b"n" * 512) + backup_root = root / "backups" + + result = create_backup( + home, + backup_root=backup_root, + timestamp="codex-backup-size-report", + run_doctor_commands=False, + ) + + self.assertTrue(result["ok"], result) + manifest = json.loads(Path(result["manifest"]).read_text(encoding="utf-8")) + paths = {entry["relative_path"] for entry in manifest["entries"]} + self.assertNotIn("cache/cache.bin", paths) + self.assertNotIn("packages/pkg.bin", paths) + self.assertNotIn("standalone/rt.bin", paths) + self.assertNotIn("node_modules/mod.bin", paths) + self.assertIn("size_report", manifest) + self.assertIn("top_files_by_size", manifest["size_report"]) + self.assertIn("top_directories_by_size", manifest["size_report"]) + self.assertGreaterEqual(manifest["size_report"]["excluded_entries"], 2) + self.assertGreaterEqual(manifest["size_report"]["excluded_bytes"], 1) + def test_restore_dry_run_and_apply_overlay(self) -> None: with self.temp_root() as temp_dir: root = Path(temp_dir) From 6f2a02a0db449db5530a4262ee1bc80d03161b9a Mon Sep 17 00:00:00 2001 From: gaoguobin <31329849+gaoguobin@users.noreply.github.com> Date: Wed, 20 May 2026 16:58:49 +0800 Subject: [PATCH 2/5] Count pruned excluded directories in size report --- src/agent_environment_backup/core.py | 36 ++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/src/agent_environment_backup/core.py b/src/agent_environment_backup/core.py index a9ad425..7e4cc32 100644 --- a/src/agent_environment_backup/core.py +++ b/src/agent_environment_backup/core.py @@ -627,6 +627,20 @@ def iter_source_files( extra_excluded_dirs: frozenset[str] = frozenset(), skipped: list[dict[str, str]] | None = None, ) -> Iterator[tuple[Path, Path]]: + def excluded_subtree_stats(path: Path) -> tuple[int, int]: + file_count = 0 + total_bytes = 0 + for walk_root, _, walk_files in os.walk(path): + walk_root_path = Path(walk_root) + for walk_file in walk_files: + walk_path = walk_root_path / walk_file + file_count += 1 + try: + total_bytes += walk_path.stat().st_size + except OSError: + pass + return file_count, total_bytes + def onerror(exc: OSError) -> None: entry = walk_error_entry(home, exc, method="walk") if errors is not None: @@ -637,6 +651,23 @@ def onerror(exc: OSError) -> None: for root, dir_names, file_names in os.walk(home, onerror=onerror): root_path = Path(root) rel_root = root_path.relative_to(home) + excluded_dirs = [ + name + for name in dir_names + if is_excluded(rel_root / name, extra_excluded_dirs) + ] + if skipped is not None: + for excluded_dir in excluded_dirs: + excluded_path = root_path / excluded_dir + file_count, total_bytes = excluded_subtree_stats(excluded_path) + skipped.append( + { + "relative_path": normalize_relative(rel_root / excluded_dir), + "reason": "excluded", + "file_count": file_count, + "bytes": total_bytes, + } + ) dir_names[:] = [ name for name in dir_names if not is_excluded(rel_root / name, extra_excluded_dirs) @@ -1456,6 +1487,7 @@ def create_backup( for path, size in sorted(directory_sizes.items(), key=lambda item: item[1], reverse=True)[:SIZE_REPORT_LIMIT] ] excluded_skipped = [item for item in skipped if item.get("reason") == "excluded"] + excluded_entries = sum(int(item.get("file_count", 1)) for item in excluded_skipped) excluded_bytes = sum(int(item.get("bytes", 0)) for item in excluded_skipped) sensitive_note = _make_sensitive_note(profile.display_name) @@ -1486,7 +1518,7 @@ def create_backup( "size_report": { "top_files_by_size": top_files_by_size, "top_directories_by_size": top_directories_by_size, - "excluded_entries": len(excluded_skipped), + "excluded_entries": excluded_entries, "excluded_bytes": excluded_bytes, }, } @@ -1504,7 +1536,7 @@ def create_backup( f"Expanded size (files only): {total_backup_bytes} bytes", f"SQLite databases: {manifest['counts']['sqlite_databases']}", f"Errors: {len(errors)}", - f"Excluded entries: {len(excluded_skipped)} ({excluded_bytes} bytes)", + f"Excluded entries: {excluded_entries} ({excluded_bytes} bytes)", f"Integrity: {'ok' if all(check.get('ok') for check in sqlite_checks) else 'failed'}", "Restore kit: RESTORE.md, RESTORE_INSTRUCTIONS.txt, restore-environment.cmd, restore-environment.ps1, restore-environment.command, restore-environment.sh", "", From af395eec6b4e3f7302bc83a0575b4ec94cd7817d Mon Sep 17 00:00:00 2001 From: gaoguobin <31329849+gaoguobin@users.noreply.github.com> Date: Wed, 20 May 2026 17:09:11 +0800 Subject: [PATCH 3/5] Sync codex standalone restore exclusions and add tests --- src/agent_environment_backup/core.py | 1 + tests/test_core.py | 43 ++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/src/agent_environment_backup/core.py b/src/agent_environment_backup/core.py index 7e4cc32..cf60406 100644 --- a/src/agent_environment_backup/core.py +++ b/src/agent_environment_backup/core.py @@ -869,6 +869,7 @@ def restore_kit_markdown(display_name: str = "Codex") -> str: "claude-code": ".claude", } PROFILE_EXTRA_EXCLUDED = { + "codex": {"cache", "packages", "standalone", "node_modules"}, "claude-code": {"cache"}, } diff --git a/tests/test_core.py b/tests/test_core.py index 9cb0206..65335d7 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1293,6 +1293,49 @@ def test_standalone_restore_excludes_profile_dirs(self) -> None: "Standalone restore should skip profile-excluded dirs (cache for claude-code)", ) + def test_standalone_restore_excludes_codex_profile_dirs(self) -> None: + from agent_environment_backup.core import create_backup, CODEX_PROFILE + with self.temp_root() as temp_dir: + root = Path(temp_dir) + home = self.make_home(root) + result = create_backup( + home, + backup_root=root / "backups", + profile=CODEX_PROFILE, + timestamp="codex-excl-standalone-test", + run_doctor_commands=False, + ) + + backup_files = Path(result["backup_dir"]) / "files" + for excluded_dir in ("cache", "packages", "standalone", "node_modules"): + excluded_path = backup_files / excluded_dir + excluded_path.mkdir(parents=True, exist_ok=True) + (excluded_path / "stale.bin").write_text("stale", encoding="utf-8") + + standalone_target = root / "standalone-codex-excl-target" + standalone_target.mkdir() + standalone_apply = subprocess.run( + [ + sys.executable, + result["restore_kit"]["restore_py"], + "--backup-dir", + result["backup_dir"], + "--target-home", + str(standalone_target), + "--apply", + "--confirm", + ], + capture_output=True, + text=True, + check=False, + ) + self.assertEqual(standalone_apply.returncode, 0, standalone_apply.stderr) + for excluded_dir in ("cache", "packages", "standalone", "node_modules"): + self.assertFalse( + (standalone_target / excluded_dir / "stale.bin").exists(), + f"Standalone restore should skip {excluded_dir} for codex profile", + ) + # -- Issue 5a: doctor count_tree uses extra_excluded_dirs -- def test_doctor_count_tree_respects_extra_excluded_dirs(self) -> None: from agent_environment_backup.core import doctor_environment, CLAUDE_CODE_PROFILE From 5110da7ab7abceb73009546bc6f1c5434ec80302 Mon Sep 17 00:00:00 2001 From: gaoguobin <31329849+gaoguobin@users.noreply.github.com> Date: Wed, 20 May 2026 17:21:44 +0800 Subject: [PATCH 4/5] Avoid traversing symlinked excluded directories --- src/agent_environment_backup/core.py | 5 +++++ tests/test_core.py | 27 +++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/src/agent_environment_backup/core.py b/src/agent_environment_backup/core.py index cf60406..979a88f 100644 --- a/src/agent_environment_backup/core.py +++ b/src/agent_environment_backup/core.py @@ -628,6 +628,11 @@ def iter_source_files( skipped: list[dict[str, str]] | None = None, ) -> Iterator[tuple[Path, Path]]: def excluded_subtree_stats(path: Path) -> tuple[int, int]: + if path.is_symlink(): + try: + return 1, path.lstat().st_size + except OSError: + return 1, 0 file_count = 0 total_bytes = 0 for walk_root, _, walk_files in os.walk(path): diff --git a/tests/test_core.py b/tests/test_core.py index 65335d7..af36ae2 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -262,6 +262,33 @@ def test_backup_adds_size_report_and_excludes_runtime_cache_paths(self) -> None: self.assertGreaterEqual(manifest["size_report"]["excluded_entries"], 2) self.assertGreaterEqual(manifest["size_report"]["excluded_bytes"], 1) + def test_backup_excluded_symlinked_directory_does_not_traverse_outside_home(self) -> None: + with self.temp_root() as temp_dir: + root = Path(temp_dir) + home = self.make_home(root) + outside = root / "outside-cache" + outside.mkdir() + (outside / "huge.bin").write_bytes(b"x" * 8192) + + cache_link = home / "cache" + try: + cache_link.symlink_to(outside, target_is_directory=True) + except OSError: + self.skipTest("symlinks are not supported on this platform") + + backup_root = root / "backups" + result = create_backup( + home, + backup_root=backup_root, + timestamp="codex-backup-symlinked-excluded", + run_doctor_commands=False, + ) + + self.assertTrue(result["ok"], result) + manifest = json.loads(Path(result["manifest"]).read_text(encoding="utf-8")) + self.assertGreaterEqual(manifest["size_report"]["excluded_entries"], 1) + self.assertLess(manifest["size_report"]["excluded_bytes"], 8192) + def test_restore_dry_run_and_apply_overlay(self) -> None: with self.temp_root() as temp_dir: root = Path(temp_dir) From 6dae3534d60ce1fb4711aa683d31b0ca3021b6f9 Mon Sep 17 00:00:00 2001 From: gaoguobin <31329849+gaoguobin@users.noreply.github.com> Date: Wed, 20 May 2026 17:31:44 +0800 Subject: [PATCH 5/5] Use lstat for excluded symlink size accounting --- src/agent_environment_backup/core.py | 4 ++-- tests/test_core.py | 32 ++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/agent_environment_backup/core.py b/src/agent_environment_backup/core.py index 979a88f..7cdd903 100644 --- a/src/agent_environment_backup/core.py +++ b/src/agent_environment_backup/core.py @@ -641,7 +641,7 @@ def excluded_subtree_stats(path: Path) -> tuple[int, int]: walk_path = walk_root_path / walk_file file_count += 1 try: - total_bytes += walk_path.stat().st_size + total_bytes += walk_path.lstat().st_size except OSError: pass return file_count, total_bytes @@ -687,7 +687,7 @@ def onerror(exc: OSError) -> None: "reason": "excluded", } try: - entry["bytes"] = source.stat().st_size + entry["bytes"] = source.lstat().st_size except OSError: pass skipped.append(entry) diff --git a/tests/test_core.py b/tests/test_core.py index af36ae2..d8c34a1 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -289,6 +289,38 @@ def test_backup_excluded_symlinked_directory_does_not_traverse_outside_home(self self.assertGreaterEqual(manifest["size_report"]["excluded_entries"], 1) self.assertLess(manifest["size_report"]["excluded_bytes"], 8192) + def test_backup_excluded_directory_stats_do_not_follow_file_symlinks(self) -> None: + with self.temp_root() as temp_dir: + root = Path(temp_dir) + home = self.make_home(root) + outside = root / "outside-large.bin" + outside.write_bytes(b"x" * 8192) + cache = home / "cache" + cache.mkdir() + try: + (cache / "linked-large.bin").symlink_to(outside) + except OSError: + self.skipTest("symlinks are not supported on this platform") + + result = create_backup( + home, + backup_root=root / "backups", + timestamp="codex-backup-excluded-file-symlink", + run_doctor_commands=False, + ) + + self.assertTrue(result["ok"], result) + manifest = json.loads(Path(result["manifest"]).read_text(encoding="utf-8")) + skipped = { + entry["relative_path"]: entry + for entry in manifest["skipped"] + if entry.get("reason") == "excluded" + } + self.assertIn("cache", skipped) + self.assertEqual(skipped["cache"]["file_count"], 1) + self.assertLess(skipped["cache"]["bytes"], 8192) + self.assertLess(manifest["size_report"]["excluded_bytes"], 8192) + def test_restore_dry_run_and_apply_overlay(self) -> None: with self.temp_root() as temp_dir: root = Path(temp_dir)