-
Notifications
You must be signed in to change notification settings - Fork 0
Add size-aware reporting and safe exclusions for rebuildable runtime/cache paths #11
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d6dd97b
6f2a02a
af395ee
5110da7
6dae353
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -35,6 +35,7 @@ | |
| ".venv", | ||
| "tmp", | ||
| } | ||
| SIZE_REPORT_LIMIT = 10 | ||
|
|
||
| LIVE_SQLITE_SUFFIXES = ( | ||
| ".sqlite-wal", | ||
|
|
@@ -444,6 +445,7 @@ def inspect_claude_code_config(home: Path) -> dict[str, Any]: | |
| config_inspector=inspect_codex_config, | ||
| commands=(("codex", "--version"), ("codex", "mcp", "list")), | ||
| integration_module="codex_fast_proxy", | ||
| extra_excluded_dirs=("cache", "packages", "standalone", "node_modules"), | ||
| ) | ||
|
|
||
| CLAUDE_CODE_PROFILE = EnvironmentProfile( | ||
|
|
@@ -625,6 +627,25 @@ def iter_source_files( | |
| extra_excluded_dirs: frozenset[str] = frozenset(), | ||
| skipped: list[dict[str, str]] | None = None, | ||
| ) -> Iterator[tuple[Path, Path]]: | ||
| def excluded_subtree_stats(path: Path) -> tuple[int, int]: | ||
| if path.is_symlink(): | ||
| try: | ||
| return 1, path.lstat().st_size | ||
| except OSError: | ||
| return 1, 0 | ||
| file_count = 0 | ||
| total_bytes = 0 | ||
| for walk_root, _, walk_files in os.walk(path): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Useful? React with 👍 / 👎.
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @codex address that feedback. Please make excluded-directory size accounting avoid traversing symlinked excluded directories. Count the symlink itself as skipped/excluded if useful, but do not recurse into the symlink target or include bytes outside the backup home. Add a regression test with an excluded directory symlink pointing outside the home. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Summary
Testing
|
||
| walk_root_path = Path(walk_root) | ||
| for walk_file in walk_files: | ||
| walk_path = walk_root_path / walk_file | ||
| file_count += 1 | ||
| try: | ||
| total_bytes += walk_path.lstat().st_size | ||
| except OSError: | ||
| pass | ||
| return file_count, total_bytes | ||
|
|
||
| def onerror(exc: OSError) -> None: | ||
| entry = walk_error_entry(home, exc, method="walk") | ||
| if errors is not None: | ||
|
|
@@ -635,6 +656,23 @@ def onerror(exc: OSError) -> None: | |
| for root, dir_names, file_names in os.walk(home, onerror=onerror): | ||
| root_path = Path(root) | ||
| rel_root = root_path.relative_to(home) | ||
| excluded_dirs = [ | ||
| name | ||
| for name in dir_names | ||
| if is_excluded(rel_root / name, extra_excluded_dirs) | ||
| ] | ||
| if skipped is not None: | ||
| for excluded_dir in excluded_dirs: | ||
| excluded_path = root_path / excluded_dir | ||
| file_count, total_bytes = excluded_subtree_stats(excluded_path) | ||
| skipped.append( | ||
| { | ||
| "relative_path": normalize_relative(rel_root / excluded_dir), | ||
| "reason": "excluded", | ||
| "file_count": file_count, | ||
| "bytes": total_bytes, | ||
| } | ||
| ) | ||
| dir_names[:] = [ | ||
| name for name in dir_names | ||
| if not is_excluded(rel_root / name, extra_excluded_dirs) | ||
|
|
@@ -643,6 +681,16 @@ def onerror(exc: OSError) -> None: | |
| source = root_path / file_name | ||
| relative = source.relative_to(home) | ||
| if is_excluded(relative, extra_excluded_dirs): | ||
| if skipped is not None: | ||
| entry = { | ||
| "relative_path": normalize_relative(relative), | ||
| "reason": "excluded", | ||
| } | ||
|
Comment on lines
+684
to
+688
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Useful? React with 👍 / 👎.
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @codex address that feedback. Please count files and bytes under pruned excluded directories in the size report without including file contents or changing restore safety behavior. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Summary
Testing
|
||
| try: | ||
| entry["bytes"] = source.lstat().st_size | ||
| except OSError: | ||
| pass | ||
| skipped.append(entry) | ||
| continue | ||
| try: | ||
| skip_reason = regular_file_skip_reason(source) | ||
|
|
@@ -826,6 +874,7 @@ def restore_kit_markdown(display_name: str = "Codex") -> str: | |
| "claude-code": ".claude", | ||
| } | ||
| PROFILE_EXTRA_EXCLUDED = { | ||
| "codex": {"cache", "packages", "standalone", "node_modules"}, | ||
| "claude-code": {"cache"}, | ||
| } | ||
|
|
||
|
|
@@ -1421,6 +1470,32 @@ def create_backup( | |
| } | ||
| ) | ||
|
|
||
| total_backup_bytes = sum(entry["bytes"] for entry in entries) | ||
| top_files_by_size = sorted( | ||
| ( | ||
| { | ||
| "relative_path": entry["relative_path"], | ||
| "bytes": entry["bytes"], | ||
| } | ||
| for entry in entries | ||
| ), | ||
| key=lambda item: item["bytes"], | ||
| reverse=True, | ||
| )[:SIZE_REPORT_LIMIT] | ||
| directory_sizes: dict[str, int] = {} | ||
| for entry in entries: | ||
| rel_path = Path(entry["relative_path"]) | ||
| for parent in rel_path.parents: | ||
| key = "." if str(parent) == "." else normalize_relative(parent) | ||
| directory_sizes[key] = directory_sizes.get(key, 0) + entry["bytes"] | ||
| top_directories_by_size = [ | ||
| {"relative_path": path, "bytes": size} | ||
| for path, size in sorted(directory_sizes.items(), key=lambda item: item[1], reverse=True)[:SIZE_REPORT_LIMIT] | ||
| ] | ||
| excluded_skipped = [item for item in skipped if item.get("reason") == "excluded"] | ||
| excluded_entries = sum(int(item.get("file_count", 1)) for item in excluded_skipped) | ||
| excluded_bytes = sum(int(item.get("bytes", 0)) for item in excluded_skipped) | ||
|
|
||
| sensitive_note = _make_sensitive_note(profile.display_name) | ||
| doctor_report = doctor_environment(home, profile=profile, run_commands=run_doctor_commands) | ||
| manifest = { | ||
|
|
@@ -1441,10 +1516,17 @@ def create_backup( | |
| "skipped": skipped, | ||
| "counts": { | ||
| "files": len(entries), | ||
| "bytes": total_backup_bytes, | ||
| "sqlite_databases": sum(1 for entry in entries if entry["method"] == "sqlite_backup"), | ||
| "errors": len(errors), | ||
| "skipped": len(skipped), | ||
| }, | ||
| "size_report": { | ||
| "top_files_by_size": top_files_by_size, | ||
| "top_directories_by_size": top_directories_by_size, | ||
| "excluded_entries": excluded_entries, | ||
| "excluded_bytes": excluded_bytes, | ||
| }, | ||
| } | ||
| write_json(backup_dir / "manifest.json", manifest) | ||
| write_json(backup_dir / "sqlite-integrity-check.json", sqlite_checks) | ||
|
|
@@ -1457,8 +1539,10 @@ def create_backup( | |
| f"Backup: {backup_dir}", | ||
| f"{profile.display_name} home: {home}", | ||
| f"Files: {len(entries)}", | ||
| f"Expanded size (files only): {total_backup_bytes} bytes", | ||
| f"SQLite databases: {manifest['counts']['sqlite_databases']}", | ||
| f"Errors: {len(errors)}", | ||
| f"Excluded entries: {excluded_entries} ({excluded_bytes} bytes)", | ||
| f"Integrity: {'ok' if all(check.get('ok') for check in sqlite_checks) else 'failed'}", | ||
| "Restore kit: RESTORE.md, RESTORE_INSTRUCTIONS.txt, restore-environment.cmd, restore-environment.ps1, restore-environment.command, restore-environment.sh", | ||
| "", | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Update the standalone restore script exclusions when adding
extra_excluded_dirsfor the Codex profile, otherwise restore behavior diverges betweenrestore_environment(...)andrestore-standalone.py. With this change, Codex backups now treatcache/packages/standalone/node_modulesas excluded, but the embeddedPROFILE_EXTRA_EXCLUDEDused by the standalone script still omits Codex entries, so applying an older backup that contains those paths will restore runtime/cache content that the main restore path now skips.Useful? React with 👍 / 👎.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@codex address that feedback. Please sync the standalone restore script profile exclusions with the Codex profile so restore-standalone.py skips cache, packages, standalone, and node_modules consistently with the main restore path. Add or update tests for standalone restore behavior.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Summary
codexentry toPROFILE_EXTRA_EXCLUDED, sorestore-standalone.pynow skipscache,packages,standalone, andnode_modulesconsistently with the main restore path. src/agent_environment_backup/core.pyL871-L874restore-standalone.py, and asserts those files are not restored. tests/test_core.pyL1296-L133725c9d47and created a follow-up PR message via themake_prtool.Testing
python -m pytest -q tests/test_core.py -k "standalone_restore_excludes_profile_dirs or standalone_restore_excludes_codex_profile_dirs"View task →