Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions src/agent_environment_backup/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
".venv",
"tmp",
}
SIZE_REPORT_LIMIT = 10

LIVE_SQLITE_SUFFIXES = (
".sqlite-wal",
Expand Down Expand Up @@ -444,6 +445,7 @@ def inspect_claude_code_config(home: Path) -> dict[str, Any]:
config_inspector=inspect_codex_config,
commands=(("codex", "--version"), ("codex", "mcp", "list")),
integration_module="codex_fast_proxy",
extra_excluded_dirs=("cache", "packages", "standalone", "node_modules"),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Sync standalone restore exclusions with Codex profile

Update the standalone restore script exclusions when adding extra_excluded_dirs for the Codex profile, otherwise restore behavior diverges between restore_environment(...) and restore-standalone.py. With this change, Codex backups now treat cache/packages/standalone/node_modules as excluded, but the embedded PROFILE_EXTRA_EXCLUDED used by the standalone script still omits Codex entries, so applying an older backup that contains those paths will restore runtime/cache content that the main restore path now skips.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@codex address that feedback. Please sync the standalone restore script profile exclusions with the Codex profile so restore-standalone.py skips cache, packages, standalone, and node_modules consistently with the main restore path. Add or update tests for standalone restore behavior.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Summary

  • Synced the embedded standalone restore script’s profile exclusions by adding a codex entry to PROFILE_EXTRA_EXCLUDED, so restore-standalone.py now skips cache, packages, standalone, and node_modules consistently with the main restore path. src/agent_environment_backup/core.pyL871-L874
  • Added a new regression test that creates a Codex backup, injects files under each excluded directory in the backup payload, runs restore-standalone.py, and asserts those files are not restored. tests/test_core.pyL1296-L1337
  • Committed the change on the current branch as 25c9d47 and created a follow-up PR message via the make_pr tool.

Testing

  • python -m pytest -q tests/test_core.py -k "standalone_restore_excludes_profile_dirs or standalone_restore_excludes_codex_profile_dirs"

View task →

)

CLAUDE_CODE_PROFILE = EnvironmentProfile(
Expand Down Expand Up @@ -625,6 +627,25 @@ def iter_source_files(
extra_excluded_dirs: frozenset[str] = frozenset(),
skipped: list[dict[str, str]] | None = None,
) -> Iterator[tuple[Path, Path]]:
def excluded_subtree_stats(path: Path) -> tuple[int, int]:
if path.is_symlink():
try:
return 1, path.lstat().st_size
except OSError:
return 1, 0
file_count = 0
total_bytes = 0
for walk_root, _, walk_files in os.walk(path):
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Avoid traversing symlinked excluded directories

excluded_subtree_stats walks every excluded directory with os.walk(path), but when the excluded entry itself is a symlink (for example ~/.codex/cache -> /mnt/cache), os.walk traverses the symlink target as the top path. That makes excluded_entries/excluded_bytes include files outside the backup home and can massively inflate runtime on large external trees, even though those files are never part of the backup. This is introduced by the new size-report scan path and should short-circuit or handle symlinked excluded dirs without recursive walking.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@codex address that feedback. Please make excluded-directory size accounting avoid traversing symlinked excluded directories. Count the symlink itself as skipped/excluded if useful, but do not recurse into the symlink target or include bytes outside the backup home. Add a regression test with an excluded directory symlink pointing outside the home.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Summary

  • Updated excluded-directory size accounting to short-circuit when an excluded directory entry is a symlink, so it no longer traverses symlink targets; instead it counts the symlink itself (file_count=1) and uses lstat() size when available. This prevents counting bytes outside the backup home while preserving exclusion behavior. src/agent_environment_backup/core.pyL624-L679
  • Added a regression test that creates an excluded cache directory as a symlink to an external directory and verifies excluded accounting does not include external subtree size. tests/test_core.pyL265-L290
  • Committed on the current branch: 969e091 with message Avoid traversing symlinked excluded directories.

Testing

  • python -m pytest -q tests/test_core.py -k "excluded_symlinked_directory_does_not_traverse_outside_home or excludes_runtime_cache_paths"

View task →

walk_root_path = Path(walk_root)
for walk_file in walk_files:
walk_path = walk_root_path / walk_file
file_count += 1
try:
total_bytes += walk_path.lstat().st_size
except OSError:
pass
return file_count, total_bytes

def onerror(exc: OSError) -> None:
entry = walk_error_entry(home, exc, method="walk")
if errors is not None:
Expand All @@ -635,6 +656,23 @@ def onerror(exc: OSError) -> None:
for root, dir_names, file_names in os.walk(home, onerror=onerror):
root_path = Path(root)
rel_root = root_path.relative_to(home)
excluded_dirs = [
name
for name in dir_names
if is_excluded(rel_root / name, extra_excluded_dirs)
]
if skipped is not None:
for excluded_dir in excluded_dirs:
excluded_path = root_path / excluded_dir
file_count, total_bytes = excluded_subtree_stats(excluded_path)
skipped.append(
{
"relative_path": normalize_relative(rel_root / excluded_dir),
"reason": "excluded",
"file_count": file_count,
"bytes": total_bytes,
}
)
dir_names[:] = [
name for name in dir_names
if not is_excluded(rel_root / name, extra_excluded_dirs)
Expand All @@ -643,6 +681,16 @@ def onerror(exc: OSError) -> None:
source = root_path / file_name
relative = source.relative_to(home)
if is_excluded(relative, extra_excluded_dirs):
if skipped is not None:
entry = {
"relative_path": normalize_relative(relative),
"reason": "excluded",
}
Comment on lines +684 to +688
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Count pruned excluded directories in size report

excluded_entries/excluded_bytes are derived from skipped, but this block only appends entries for excluded files that are still reached in file_names. In the same function, excluded directories are removed from dir_names before traversal, so files under newly excluded paths like cache/, packages/, standalone/, and node_modules/ are never visited and therefore never counted. This makes the new size report systematically understate excluded data (often reporting 0 bytes) when exclusions happen at directory level, which defeats the purpose of surfacing size drivers.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@codex address that feedback. Please count files and bytes under pruned excluded directories in the size report without including file contents or changing restore safety behavior.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Summary

  • Implemented excluded-directory accounting inside iter_source_files by detecting directories pruned from dir_names, walking those excluded subtrees, and recording aggregate file_count + bytes in skipped metadata (without reading file contents). This preserves restore behavior while fixing size-report undercounting for excluded directory trees. src/agent_environment_backup/core.pyL624-L708
  • Updated backup size-report aggregation to compute excluded_entries from excluded file counts (using file_count when present, defaulting to 1 for single-file exclusions) and kept excluded_bytes summed from skipped excluded items. src/agent_environment_backup/core.pyL1489-L1523
  • Updated summary output line to report the corrected excluded entry count. src/agent_environment_backup/core.pyL1531-L1540
  • Committed changes on the current branch: 7db7023 with message Count pruned excluded directories in size report. src/agent_environment_backup/core.pyL624-L708

Testing

  • python -m pytest -q

View task →

try:
entry["bytes"] = source.lstat().st_size
except OSError:
pass
skipped.append(entry)
continue
try:
skip_reason = regular_file_skip_reason(source)
Expand Down Expand Up @@ -826,6 +874,7 @@ def restore_kit_markdown(display_name: str = "Codex") -> str:
"claude-code": ".claude",
}
PROFILE_EXTRA_EXCLUDED = {
"codex": {"cache", "packages", "standalone", "node_modules"},
"claude-code": {"cache"},
}

Expand Down Expand Up @@ -1421,6 +1470,32 @@ def create_backup(
}
)

total_backup_bytes = sum(entry["bytes"] for entry in entries)
top_files_by_size = sorted(
(
{
"relative_path": entry["relative_path"],
"bytes": entry["bytes"],
}
for entry in entries
),
key=lambda item: item["bytes"],
reverse=True,
)[:SIZE_REPORT_LIMIT]
directory_sizes: dict[str, int] = {}
for entry in entries:
rel_path = Path(entry["relative_path"])
for parent in rel_path.parents:
key = "." if str(parent) == "." else normalize_relative(parent)
directory_sizes[key] = directory_sizes.get(key, 0) + entry["bytes"]
top_directories_by_size = [
{"relative_path": path, "bytes": size}
for path, size in sorted(directory_sizes.items(), key=lambda item: item[1], reverse=True)[:SIZE_REPORT_LIMIT]
]
excluded_skipped = [item for item in skipped if item.get("reason") == "excluded"]
excluded_entries = sum(int(item.get("file_count", 1)) for item in excluded_skipped)
excluded_bytes = sum(int(item.get("bytes", 0)) for item in excluded_skipped)

sensitive_note = _make_sensitive_note(profile.display_name)
doctor_report = doctor_environment(home, profile=profile, run_commands=run_doctor_commands)
manifest = {
Expand All @@ -1441,10 +1516,17 @@ def create_backup(
"skipped": skipped,
"counts": {
"files": len(entries),
"bytes": total_backup_bytes,
"sqlite_databases": sum(1 for entry in entries if entry["method"] == "sqlite_backup"),
"errors": len(errors),
"skipped": len(skipped),
},
"size_report": {
"top_files_by_size": top_files_by_size,
"top_directories_by_size": top_directories_by_size,
"excluded_entries": excluded_entries,
"excluded_bytes": excluded_bytes,
},
}
write_json(backup_dir / "manifest.json", manifest)
write_json(backup_dir / "sqlite-integrity-check.json", sqlite_checks)
Expand All @@ -1457,8 +1539,10 @@ def create_backup(
f"Backup: {backup_dir}",
f"{profile.display_name} home: {home}",
f"Files: {len(entries)}",
f"Expanded size (files only): {total_backup_bytes} bytes",
f"SQLite databases: {manifest['counts']['sqlite_databases']}",
f"Errors: {len(errors)}",
f"Excluded entries: {excluded_entries} ({excluded_bytes} bytes)",
f"Integrity: {'ok' if all(check.get('ok') for check in sqlite_checks) else 'failed'}",
"Restore kit: RESTORE.md, RESTORE_INSTRUCTIONS.txt, restore-environment.cmd, restore-environment.ps1, restore-environment.command, restore-environment.sh",
"",
Expand Down
136 changes: 136 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,99 @@ def fake_walk(top, *args, **kwargs):
self.assertEqual(listing["backups"][0]["status"], "failed")
self.assertEqual(listing["backups"][0]["errors"], 1)

def test_backup_adds_size_report_and_excludes_runtime_cache_paths(self) -> None:
with self.temp_root() as temp_dir:
root = Path(temp_dir)
home = self.make_home(root)
(home / "cache").mkdir()
(home / "packages").mkdir()
(home / "standalone").mkdir()
(home / "node_modules").mkdir()
(home / "cache" / "cache.bin").write_bytes(b"x" * 4096)
(home / "packages" / "pkg.bin").write_bytes(b"y" * 2048)
(home / "standalone" / "rt.bin").write_bytes(b"z" * 1024)
(home / "node_modules" / "mod.bin").write_bytes(b"n" * 512)
backup_root = root / "backups"

result = create_backup(
home,
backup_root=backup_root,
timestamp="codex-backup-size-report",
run_doctor_commands=False,
)

self.assertTrue(result["ok"], result)
manifest = json.loads(Path(result["manifest"]).read_text(encoding="utf-8"))
paths = {entry["relative_path"] for entry in manifest["entries"]}
self.assertNotIn("cache/cache.bin", paths)
self.assertNotIn("packages/pkg.bin", paths)
self.assertNotIn("standalone/rt.bin", paths)
self.assertNotIn("node_modules/mod.bin", paths)
self.assertIn("size_report", manifest)
self.assertIn("top_files_by_size", manifest["size_report"])
self.assertIn("top_directories_by_size", manifest["size_report"])
self.assertGreaterEqual(manifest["size_report"]["excluded_entries"], 2)
self.assertGreaterEqual(manifest["size_report"]["excluded_bytes"], 1)

def test_backup_excluded_symlinked_directory_does_not_traverse_outside_home(self) -> None:
with self.temp_root() as temp_dir:
root = Path(temp_dir)
home = self.make_home(root)
outside = root / "outside-cache"
outside.mkdir()
(outside / "huge.bin").write_bytes(b"x" * 8192)

cache_link = home / "cache"
try:
cache_link.symlink_to(outside, target_is_directory=True)
except OSError:
self.skipTest("symlinks are not supported on this platform")

backup_root = root / "backups"
result = create_backup(
home,
backup_root=backup_root,
timestamp="codex-backup-symlinked-excluded",
run_doctor_commands=False,
)

self.assertTrue(result["ok"], result)
manifest = json.loads(Path(result["manifest"]).read_text(encoding="utf-8"))
self.assertGreaterEqual(manifest["size_report"]["excluded_entries"], 1)
self.assertLess(manifest["size_report"]["excluded_bytes"], 8192)

def test_backup_excluded_directory_stats_do_not_follow_file_symlinks(self) -> None:
with self.temp_root() as temp_dir:
root = Path(temp_dir)
home = self.make_home(root)
outside = root / "outside-large.bin"
outside.write_bytes(b"x" * 8192)
cache = home / "cache"
cache.mkdir()
try:
(cache / "linked-large.bin").symlink_to(outside)
except OSError:
self.skipTest("symlinks are not supported on this platform")

result = create_backup(
home,
backup_root=root / "backups",
timestamp="codex-backup-excluded-file-symlink",
run_doctor_commands=False,
)

self.assertTrue(result["ok"], result)
manifest = json.loads(Path(result["manifest"]).read_text(encoding="utf-8"))
skipped = {
entry["relative_path"]: entry
for entry in manifest["skipped"]
if entry.get("reason") == "excluded"
}
self.assertIn("cache", skipped)
self.assertEqual(skipped["cache"]["file_count"], 1)
self.assertLess(skipped["cache"]["bytes"], 8192)
self.assertLess(manifest["size_report"]["excluded_bytes"], 8192)

def test_restore_dry_run_and_apply_overlay(self) -> None:
with self.temp_root() as temp_dir:
root = Path(temp_dir)
Expand Down Expand Up @@ -1259,6 +1352,49 @@ def test_standalone_restore_excludes_profile_dirs(self) -> None:
"Standalone restore should skip profile-excluded dirs (cache for claude-code)",
)

def test_standalone_restore_excludes_codex_profile_dirs(self) -> None:
from agent_environment_backup.core import create_backup, CODEX_PROFILE
with self.temp_root() as temp_dir:
root = Path(temp_dir)
home = self.make_home(root)
result = create_backup(
home,
backup_root=root / "backups",
profile=CODEX_PROFILE,
timestamp="codex-excl-standalone-test",
run_doctor_commands=False,
)

backup_files = Path(result["backup_dir"]) / "files"
for excluded_dir in ("cache", "packages", "standalone", "node_modules"):
excluded_path = backup_files / excluded_dir
excluded_path.mkdir(parents=True, exist_ok=True)
(excluded_path / "stale.bin").write_text("stale", encoding="utf-8")

standalone_target = root / "standalone-codex-excl-target"
standalone_target.mkdir()
standalone_apply = subprocess.run(
[
sys.executable,
result["restore_kit"]["restore_py"],
"--backup-dir",
result["backup_dir"],
"--target-home",
str(standalone_target),
"--apply",
"--confirm",
],
capture_output=True,
text=True,
check=False,
)
self.assertEqual(standalone_apply.returncode, 0, standalone_apply.stderr)
for excluded_dir in ("cache", "packages", "standalone", "node_modules"):
self.assertFalse(
(standalone_target / excluded_dir / "stale.bin").exists(),
f"Standalone restore should skip {excluded_dir} for codex profile",
)

# -- Issue 5a: doctor count_tree uses extra_excluded_dirs --
def test_doctor_count_tree_respects_extra_excluded_dirs(self) -> None:
from agent_environment_backup.core import doctor_environment, CLAUDE_CODE_PROFILE
Expand Down
Loading