diff --git a/src/agent_environment_backup/core.py b/src/agent_environment_backup/core.py index 2803cb2..7cdd903 100644 --- a/src/agent_environment_backup/core.py +++ b/src/agent_environment_backup/core.py @@ -35,6 +35,7 @@ ".venv", "tmp", } +SIZE_REPORT_LIMIT = 10 LIVE_SQLITE_SUFFIXES = ( ".sqlite-wal", @@ -444,6 +445,7 @@ def inspect_claude_code_config(home: Path) -> dict[str, Any]: config_inspector=inspect_codex_config, commands=(("codex", "--version"), ("codex", "mcp", "list")), integration_module="codex_fast_proxy", + extra_excluded_dirs=("cache", "packages", "standalone", "node_modules"), ) CLAUDE_CODE_PROFILE = EnvironmentProfile( @@ -625,6 +627,25 @@ def iter_source_files( extra_excluded_dirs: frozenset[str] = frozenset(), skipped: list[dict[str, str]] | None = None, ) -> Iterator[tuple[Path, Path]]: + def excluded_subtree_stats(path: Path) -> tuple[int, int]: + if path.is_symlink(): + try: + return 1, path.lstat().st_size + except OSError: + return 1, 0 + file_count = 0 + total_bytes = 0 + for walk_root, _, walk_files in os.walk(path): + walk_root_path = Path(walk_root) + for walk_file in walk_files: + walk_path = walk_root_path / walk_file + file_count += 1 + try: + total_bytes += walk_path.lstat().st_size + except OSError: + pass + return file_count, total_bytes + def onerror(exc: OSError) -> None: entry = walk_error_entry(home, exc, method="walk") if errors is not None: @@ -635,6 +656,23 @@ def onerror(exc: OSError) -> None: for root, dir_names, file_names in os.walk(home, onerror=onerror): root_path = Path(root) rel_root = root_path.relative_to(home) + excluded_dirs = [ + name + for name in dir_names + if is_excluded(rel_root / name, extra_excluded_dirs) + ] + if skipped is not None: + for excluded_dir in excluded_dirs: + excluded_path = root_path / excluded_dir + file_count, total_bytes = excluded_subtree_stats(excluded_path) + skipped.append( + { + "relative_path": normalize_relative(rel_root / excluded_dir), + "reason": "excluded", + "file_count": file_count, + "bytes": total_bytes, + } + ) dir_names[:] = [ name for name in dir_names if not is_excluded(rel_root / name, extra_excluded_dirs) @@ -643,6 +681,16 @@ def onerror(exc: OSError) -> None: source = root_path / file_name relative = source.relative_to(home) if is_excluded(relative, extra_excluded_dirs): + if skipped is not None: + entry = { + "relative_path": normalize_relative(relative), + "reason": "excluded", + } + try: + entry["bytes"] = source.lstat().st_size + except OSError: + pass + skipped.append(entry) continue try: skip_reason = regular_file_skip_reason(source) @@ -826,6 +874,7 @@ def restore_kit_markdown(display_name: str = "Codex") -> str: "claude-code": ".claude", } PROFILE_EXTRA_EXCLUDED = { + "codex": {"cache", "packages", "standalone", "node_modules"}, "claude-code": {"cache"}, } @@ -1421,6 +1470,32 @@ def create_backup( } ) + total_backup_bytes = sum(entry["bytes"] for entry in entries) + top_files_by_size = sorted( + ( + { + "relative_path": entry["relative_path"], + "bytes": entry["bytes"], + } + for entry in entries + ), + key=lambda item: item["bytes"], + reverse=True, + )[:SIZE_REPORT_LIMIT] + directory_sizes: dict[str, int] = {} + for entry in entries: + rel_path = Path(entry["relative_path"]) + for parent in rel_path.parents: + key = "." if str(parent) == "." else normalize_relative(parent) + directory_sizes[key] = directory_sizes.get(key, 0) + entry["bytes"] + top_directories_by_size = [ + {"relative_path": path, "bytes": size} + for path, size in sorted(directory_sizes.items(), key=lambda item: item[1], reverse=True)[:SIZE_REPORT_LIMIT] + ] + excluded_skipped = [item for item in skipped if item.get("reason") == "excluded"] + excluded_entries = sum(int(item.get("file_count", 1)) for item in excluded_skipped) + excluded_bytes = sum(int(item.get("bytes", 0)) for item in excluded_skipped) + sensitive_note = _make_sensitive_note(profile.display_name) doctor_report = doctor_environment(home, profile=profile, run_commands=run_doctor_commands) manifest = { @@ -1441,10 +1516,17 @@ def create_backup( "skipped": skipped, "counts": { "files": len(entries), + "bytes": total_backup_bytes, "sqlite_databases": sum(1 for entry in entries if entry["method"] == "sqlite_backup"), "errors": len(errors), "skipped": len(skipped), }, + "size_report": { + "top_files_by_size": top_files_by_size, + "top_directories_by_size": top_directories_by_size, + "excluded_entries": excluded_entries, + "excluded_bytes": excluded_bytes, + }, } write_json(backup_dir / "manifest.json", manifest) write_json(backup_dir / "sqlite-integrity-check.json", sqlite_checks) @@ -1457,8 +1539,10 @@ def create_backup( f"Backup: {backup_dir}", f"{profile.display_name} home: {home}", f"Files: {len(entries)}", + f"Expanded size (files only): {total_backup_bytes} bytes", f"SQLite databases: {manifest['counts']['sqlite_databases']}", f"Errors: {len(errors)}", + f"Excluded entries: {excluded_entries} ({excluded_bytes} bytes)", f"Integrity: {'ok' if all(check.get('ok') for check in sqlite_checks) else 'failed'}", "Restore kit: RESTORE.md, RESTORE_INSTRUCTIONS.txt, restore-environment.cmd, restore-environment.ps1, restore-environment.command, restore-environment.sh", "", diff --git a/tests/test_core.py b/tests/test_core.py index f147d5c..d8c34a1 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -228,6 +228,99 @@ def fake_walk(top, *args, **kwargs): self.assertEqual(listing["backups"][0]["status"], "failed") self.assertEqual(listing["backups"][0]["errors"], 1) + def test_backup_adds_size_report_and_excludes_runtime_cache_paths(self) -> None: + with self.temp_root() as temp_dir: + root = Path(temp_dir) + home = self.make_home(root) + (home / "cache").mkdir() + (home / "packages").mkdir() + (home / "standalone").mkdir() + (home / "node_modules").mkdir() + (home / "cache" / "cache.bin").write_bytes(b"x" * 4096) + (home / "packages" / "pkg.bin").write_bytes(b"y" * 2048) + (home / "standalone" / "rt.bin").write_bytes(b"z" * 1024) + (home / "node_modules" / "mod.bin").write_bytes(b"n" * 512) + backup_root = root / "backups" + + result = create_backup( + home, + backup_root=backup_root, + timestamp="codex-backup-size-report", + run_doctor_commands=False, + ) + + self.assertTrue(result["ok"], result) + manifest = json.loads(Path(result["manifest"]).read_text(encoding="utf-8")) + paths = {entry["relative_path"] for entry in manifest["entries"]} + self.assertNotIn("cache/cache.bin", paths) + self.assertNotIn("packages/pkg.bin", paths) + self.assertNotIn("standalone/rt.bin", paths) + self.assertNotIn("node_modules/mod.bin", paths) + self.assertIn("size_report", manifest) + self.assertIn("top_files_by_size", manifest["size_report"]) + self.assertIn("top_directories_by_size", manifest["size_report"]) + self.assertGreaterEqual(manifest["size_report"]["excluded_entries"], 2) + self.assertGreaterEqual(manifest["size_report"]["excluded_bytes"], 1) + + def test_backup_excluded_symlinked_directory_does_not_traverse_outside_home(self) -> None: + with self.temp_root() as temp_dir: + root = Path(temp_dir) + home = self.make_home(root) + outside = root / "outside-cache" + outside.mkdir() + (outside / "huge.bin").write_bytes(b"x" * 8192) + + cache_link = home / "cache" + try: + cache_link.symlink_to(outside, target_is_directory=True) + except OSError: + self.skipTest("symlinks are not supported on this platform") + + backup_root = root / "backups" + result = create_backup( + home, + backup_root=backup_root, + timestamp="codex-backup-symlinked-excluded", + run_doctor_commands=False, + ) + + self.assertTrue(result["ok"], result) + manifest = json.loads(Path(result["manifest"]).read_text(encoding="utf-8")) + self.assertGreaterEqual(manifest["size_report"]["excluded_entries"], 1) + self.assertLess(manifest["size_report"]["excluded_bytes"], 8192) + + def test_backup_excluded_directory_stats_do_not_follow_file_symlinks(self) -> None: + with self.temp_root() as temp_dir: + root = Path(temp_dir) + home = self.make_home(root) + outside = root / "outside-large.bin" + outside.write_bytes(b"x" * 8192) + cache = home / "cache" + cache.mkdir() + try: + (cache / "linked-large.bin").symlink_to(outside) + except OSError: + self.skipTest("symlinks are not supported on this platform") + + result = create_backup( + home, + backup_root=root / "backups", + timestamp="codex-backup-excluded-file-symlink", + run_doctor_commands=False, + ) + + self.assertTrue(result["ok"], result) + manifest = json.loads(Path(result["manifest"]).read_text(encoding="utf-8")) + skipped = { + entry["relative_path"]: entry + for entry in manifest["skipped"] + if entry.get("reason") == "excluded" + } + self.assertIn("cache", skipped) + self.assertEqual(skipped["cache"]["file_count"], 1) + self.assertLess(skipped["cache"]["bytes"], 8192) + self.assertLess(manifest["size_report"]["excluded_bytes"], 8192) + def test_restore_dry_run_and_apply_overlay(self) -> None: with self.temp_root() as temp_dir: root = Path(temp_dir) @@ -1259,6 +1352,49 @@ def test_standalone_restore_excludes_profile_dirs(self) -> None: "Standalone restore should skip profile-excluded dirs (cache for claude-code)", ) + def test_standalone_restore_excludes_codex_profile_dirs(self) -> None: + from agent_environment_backup.core import create_backup, CODEX_PROFILE + with self.temp_root() as temp_dir: + root = Path(temp_dir) + home = self.make_home(root) + result = create_backup( + home, + backup_root=root / "backups", + profile=CODEX_PROFILE, + timestamp="codex-excl-standalone-test", + run_doctor_commands=False, + ) + + backup_files = Path(result["backup_dir"]) / "files" + for excluded_dir in ("cache", "packages", "standalone", "node_modules"): + excluded_path = backup_files / excluded_dir + excluded_path.mkdir(parents=True, exist_ok=True) + (excluded_path / "stale.bin").write_text("stale", encoding="utf-8") + + standalone_target = root / "standalone-codex-excl-target" + standalone_target.mkdir() + standalone_apply = subprocess.run( + [ + sys.executable, + result["restore_kit"]["restore_py"], + "--backup-dir", + result["backup_dir"], + "--target-home", + str(standalone_target), + "--apply", + "--confirm", + ], + capture_output=True, + text=True, + check=False, + ) + self.assertEqual(standalone_apply.returncode, 0, standalone_apply.stderr) + for excluded_dir in ("cache", "packages", "standalone", "node_modules"): + self.assertFalse( + (standalone_target / excluded_dir / "stale.bin").exists(), + f"Standalone restore should skip {excluded_dir} for codex profile", + ) + # -- Issue 5a: doctor count_tree uses extra_excluded_dirs -- def test_doctor_count_tree_respects_extra_excluded_dirs(self) -> None: from agent_environment_backup.core import doctor_environment, CLAUDE_CODE_PROFILE