From edf9f487205f2026ef4fc2acfac40a268e865dfc Mon Sep 17 00:00:00 2001 From: Muhammad Rakha Qushayyi Andrianto Date: Fri, 19 Jun 2026 18:32:20 +0000 Subject: [PATCH] fix(legacy_migration): graceful error handling for missing backup dirs (#6) Fixes #6 The migration script crashed with raw tracebacks when the backup directory was missing, unreadable, or contained a corrupt manifest. This commit replaces the broad 'except Exception' handlers with specific OSError subclasses (FileNotFoundError, PermissionError, OSError, json.JSONDecodeError) so the operator sees a clear, actionable error message instead of a Python stack trace. Changes: - _create_backup: separate handlers for FileNotFoundError (parent dir missing), PermissionError (no write access), and OSError (disk full / invalid path / etc.). - _restore_from_backup: precheck that backup_path exists and is a directory before attempting to read manifest; separate handlers for FileNotFoundError, PermissionError, json.JSONDecodeError, and OSError. - main(): migrate and rollback commands now return exit code 1 on failure so CI scripts and shell wrappers can detect backup errors without parsing tracebacks. Tests: 13 new tests covering create success/failure, restore success/failure (missing path, file-instead-of-dir, None, missing manifest, malformed manifest, permission error), exit code for migrate and rollback commands, and end-to-end rollback with a non-existent backup directory via subprocess. Total 43 tests pass. --- tools/legacy_migration.py | 102 +++++++- tools/tests/test_legacy_migration_backup.py | 244 ++++++++++++++++++++ 2 files changed, 335 insertions(+), 11 deletions(-) create mode 100644 tools/tests/test_legacy_migration_backup.py diff --git a/tools/legacy_migration.py b/tools/legacy_migration.py index f8530233..40ab1ed6 100644 --- a/tools/legacy_migration.py +++ b/tools/legacy_migration.py @@ -646,7 +646,14 @@ def _check_disk_space(self) -> bool: return False def _create_backup(self) -> bool: - """Create a backup of the data before migration.""" + """Create a backup of the data before migration. + + Returns True on success, False on any filesystem failure. + Specific OSError subclasses (FileNotFoundError, PermissionError, + OSError) are caught separately so the user sees a clear message + about what went wrong (missing parent dir, no write permission, + disk full, etc.) instead of a raw traceback. + """ backup_dir = Path(self.config.backup_dir or DEFAULT_CONFIG["backup_dir"]) backup_path = backup_dir / f"migration_{self.config.migration_id}" @@ -670,25 +677,67 @@ def _create_backup(self) -> bool: "script_version": SCRIPT_VERSION, "files": [], } - with open(backup_path / "manifest.json", "w") as f: + manifest_path = backup_path / "manifest.json" + with open(manifest_path, "w") as f: json.dump(manifest, f, indent=2) return True - except Exception as e: - logger.error(f"Failed to create backup: {e}") + except FileNotFoundError as e: + logger.error( + f"Backup failed: parent directory does not exist for " + f"{backup_path} ({e}). Create the parent directory or " + f"set --backup-dir to a writable path." + ) + return False + except PermissionError as e: + logger.error( + f"Backup failed: insufficient permissions to write to " + f"{backup_path} ({e}). Check that the user has write " + f"access to the backup directory." + ) + return False + except OSError as e: + logger.error( + f"Backup failed: OS error creating backup at {backup_path} " + f"({type(e).__name__}: {e}). Verify disk space and " + f"filesystem health." + ) return False def _restore_from_backup(self, backup_path: Path) -> bool: - """Restore data from a backup.""" + """Restore data from a backup. + + Returns True on success, False on any filesystem or parsing + failure. Checks for missing backup directory and manifest + before attempting to read, with informative messages so the + operator knows whether the path is wrong, the backup is + corrupt, or the manifest is unreadable. + """ logger.info(f"Restoring from backup at {backup_path}") - # Verify backup manifest - manifest_path = backup_path / "manifest.json" - if not manifest_path.exists(): - logger.error("Backup manifest not found") + # Verify backup directory and manifest exist and are readable. + if backup_path is None: + logger.error( + "Restore failed: no backup path provided. " + "Specify --backup-dir or set migration_config.backup_dir." + ) + return False + if not backup_path.exists(): + logger.error( + f"Restore failed: backup directory does not exist at " + f"{backup_path}. Verify the backup_dir config and that " + f"a backup was created for this migration_id." + ) + return False + if not backup_path.is_dir(): + logger.error( + f"Restore failed: backup path is not a directory " + f"({backup_path}). Cannot restore from a file." + ) return False + manifest_path = backup_path / "manifest.json" try: with open(manifest_path) as f: manifest = json.load(f) @@ -704,8 +753,30 @@ def _restore_from_backup(self, backup_path: Path) -> bool: return True - except Exception as e: - logger.error(f"Restore failed: {e}") + except FileNotFoundError: + logger.error( + f"Restore failed: backup manifest not found at " + f"{manifest_path}. Backup may be corrupt or incomplete." + ) + return False + except PermissionError as e: + logger.error( + f"Restore failed: insufficient permissions to read " + f"{manifest_path} ({e}). Check file ownership and " + f"read permissions on the backup directory." + ) + return False + except json.JSONDecodeError as e: + logger.error( + f"Restore failed: backup manifest at {manifest_path} " + f"is malformed JSON ({e}). Backup may be corrupt." + ) + return False + except OSError as e: + logger.error( + f"Restore failed: OS error reading backup at {backup_path} " + f"({type(e).__name__}: {e}). Verify disk health." + ) return False def _save_state(self) -> None: @@ -1116,6 +1187,10 @@ def main(): print(" Warnings:") for w in result.warnings: print(f" - {w}") + # Graceful exit code: non-zero on failure, zero on success. + if result.status.value in ("failed", "rolled_back"): + logger.error(f"Migration ended in status {result.status.value}; exiting with code 1.") + return 1 elif args.command == "validate": print(f"Validating data in {args.data_dir}...") @@ -1135,6 +1210,11 @@ def main(): engine = MigrationEngine(config) result = engine.rollback() print(f"Rollback result: {result.status.value}") + # Graceful exit code: non-zero if rollback failed (e.g. missing + # backup directory or unreadable manifest). + if result.status.value == "failed": + logger.error(f"Rollback failed; exiting with code 1.") + return 1 elif args.command == "status": print("Checking migration status...") diff --git a/tools/tests/test_legacy_migration_backup.py b/tools/tests/test_legacy_migration_backup.py new file mode 100644 index 00000000..c81ad28d --- /dev/null +++ b/tools/tests/test_legacy_migration_backup.py @@ -0,0 +1,244 @@ +"""Tests for tools/legacy_migration.py backup error handling (issue #6). + +Verifies that: +- Missing backup directories are detected with an informative log message + rather than a raw traceback. +- Permission errors, malformed manifests, and other OSError subclasses are + handled with specific exception types (not bare `Exception`). +- The CLI returns a non-zero exit code when backup-related operations fail. + +Each test uses tempfile / mock to avoid touching real filesystem paths. +""" + +import json +import os +import subprocess +import sys +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch, MagicMock + +TOOLS_DIR = Path(__file__).resolve().parent.parent +REPO_DIR = TOOLS_DIR.parent + +# Make tools/ importable so `import legacy_migration` works. +sys.path.insert(0, str(TOOLS_DIR)) + +import legacy_migration as lm # noqa: E402 + + +def _make_engine(backup_dir: str = None) -> "lm.MigrationEngine": + """Build a MigrationEngine with the smallest valid config for backup ops.""" + config = lm.MigrationConfig( + migration_id="MIG-TEST", + migration_type=lm.MigrationType.DATA, + from_version=1, + to_version=2, + source_connection="test", + target_connection="test", + backup_dir=backup_dir, + ) + return lm.MigrationEngine(config) + + +class TestCreateBackupErrorHandling(unittest.TestCase): + """_create_backup must return False (not raise) on filesystem errors.""" + + def test_create_backup_returns_false_on_permission_error(self): + """If the backup directory cannot be created due to permission + errors, _create_backup must log an informative message and return + False rather than propagating the exception.""" + engine = _make_engine() + with patch( + "pathlib.Path.mkdir", + side_effect=PermissionError(13, "Permission denied"), + ): + result = engine._create_backup() + self.assertFalse(result) + + def test_create_backup_returns_false_on_oserror(self): + """Generic OSError (disk full, invalid path, etc.) must be caught + separately so the log message names the specific failure type.""" + engine = _make_engine() + with patch("pathlib.Path.mkdir", side_effect=OSError(28, "No space left")): + result = engine._create_backup() + self.assertFalse(result) + + def test_create_backup_success_returns_true(self): + """Happy path: valid backup_dir creates manifest and returns True.""" + engine = _make_engine() + with tempfile.TemporaryDirectory() as tmp: + engine.config.backup_dir = tmp + result = engine._create_backup() + self.assertTrue(result) + manifest = Path(tmp) / "migration_MIG-TEST" / "manifest.json" + self.assertTrue(manifest.exists()) + data = json.loads(manifest.read_text()) + self.assertEqual(data["migration_id"], "MIG-TEST") + self.assertEqual(data["from_version"], 1) + self.assertEqual(data["to_version"], 2) + + +class TestRestoreFromBackupErrorHandling(unittest.TestCase): + """_restore_from_backup must return False (not raise) on filesystem errors.""" + + def test_restore_returns_false_when_backup_dir_missing(self): + """The most common failure: backup directory was never created + (or was deleted). Return False with a clear message.""" + engine = _make_engine() + missing = Path("/tmp/does-not-exist-MIG-NEVER-MADE-9c2d") + # Make sure the path really doesn't exist. + if missing.exists(): + missing.rmdir() + result = engine._restore_from_backup(missing) + self.assertFalse(result) + + def test_restore_returns_false_when_path_is_file_not_dir(self): + """If backup_path points to a file (not a directory), the restore + should fail gracefully rather than recursing into a file.""" + engine = _make_engine() + with tempfile.NamedTemporaryFile(delete=False) as f: + f.write(b"not a directory") + tmpfile = Path(f.name) + try: + result = engine._restore_from_backup(tmpfile) + self.assertFalse(result) + finally: + tmpfile.unlink() + + def test_restore_returns_false_when_backup_path_is_none(self): + """Defensive: passing None must produce an error message, not crash.""" + engine = _make_engine() + result = engine._restore_from_backup(None) # type: ignore[arg-type] + self.assertFalse(result) + + def test_restore_returns_false_when_manifest_missing(self): + """Backup directory exists but manifest.json is missing (corrupt + or incomplete backup). Should return False with a clear message.""" + engine = _make_engine() + with tempfile.TemporaryDirectory() as tmp: + result = engine._restore_from_backup(Path(tmp)) + self.assertFalse(result) + + def test_restore_returns_false_on_malformed_manifest(self): + """Backup manifest exists but contains invalid JSON.""" + engine = _make_engine() + with tempfile.TemporaryDirectory() as tmp: + manifest_path = Path(tmp) / "manifest.json" + manifest_path.write_text("{ this is not valid JSON") + result = engine._restore_from_backup(Path(tmp)) + self.assertFalse(result) + + def test_restore_returns_false_on_permission_error(self): + """OSError subclass (PermissionError) reading manifest must be + caught and reported, not propagated.""" + engine = _make_engine() + with tempfile.TemporaryDirectory() as tmp: + manifest_path = Path(tmp) / "manifest.json" + manifest_path.write_text('{"migration_id": "MIG-TEST"}') + # Patch the built-in open to raise PermissionError when reading + # the manifest. Root chmod doesn't restrict access, so we mock. + original_open = open + + def open_with_perm_error(path, *args, **kwargs): + if str(path).endswith("manifest.json") and "r" in (args[0] if args else kwargs.get("mode", "r")): + raise PermissionError(13, "Permission denied") + return original_open(path, *args, **kwargs) + + with patch("builtins.open", side_effect=open_with_perm_error): + result = engine._restore_from_backup(Path(tmp)) + self.assertFalse(result) + + def test_restore_success_returns_true(self): + """Happy path: valid manifest, valid directory → True.""" + engine = _make_engine() + with tempfile.TemporaryDirectory() as tmp: + manifest_path = Path(tmp) / "manifest.json" + manifest_path.write_text( + json.dumps({ + "migration_id": "MIG-TEST", + "created_at": "2024-01-15T00:00:00+00:00", + "files": [], + }) + ) + result = engine._restore_from_backup(Path(tmp)) + self.assertTrue(result) + + +class TestExitCodes(unittest.TestCase): + """main() must return non-zero exit code when migration/rollback fails.""" + + def test_migrate_command_returns_nonzero_on_failure(self): + """If MigrationEngine.run() returns a FAILED status, main() must + exit 1 so CI scripts and shell wrappers can detect failure.""" + fake_result = MagicMock() + fake_result.status.value = "failed" + fake_result.migrated_records = 0 + fake_result.failed_records = 5 + fake_result.duration_seconds = 1.23 + fake_result.warnings = [] + with patch.object(lm, "MigrationEngine") as MockEngine: + MockEngine.return_value.run.return_value = fake_result + with patch.object(lm, "MigrationConfig") as MockConfig: + MockConfig.return_value = MagicMock() + with patch("sys.argv", [ + "legacy_migration.py", "migrate", + "--from-version", "1", "--to-version", "2", + ]): + self.assertEqual(lm.main(), 1) + + def test_rollback_command_returns_nonzero_on_failure(self): + """If rollback fails (e.g. backup directory missing), main() must + exit 1 so the operator knows to investigate.""" + fake_result = MagicMock() + fake_result.status.value = "failed" + with patch.object(lm, "MigrationEngine") as MockEngine: + MockEngine.return_value.rollback.return_value = fake_result + with patch.object(lm, "MigrationConfig") as MockConfig: + MockConfig.return_value = MagicMock() + with patch("sys.argv", [ + "legacy_migration.py", "rollback", + "--migration-id", "MIG-MISSING", + ]): + self.assertEqual(lm.main(), 1) + + +class TestIntegrationRollbackMissingBackup(unittest.TestCase): + """End-to-end: rollback with missing backup dir returns exit code 1.""" + + def test_rollback_missing_backup_exits_1(self): + """Run the actual CLI binary with a non-existent backup dir and + verify exit code is 1 (graceful failure).""" + with tempfile.TemporaryDirectory() as tmp: + missing_backup = os.path.join(tmp, "never-created-MIG-X9Z") + result = subprocess.run( + [ + sys.executable, str(TOOLS_DIR / "legacy_migration.py"), + "rollback", + "--migration-id", "MIG-X9Z", + "--backup-dir", missing_backup, + ], + capture_output=True, + text=True, + timeout=10, + ) + # Exit code must be 1 (graceful failure), not 0 and not a Python + # traceback exit code (typically 1 too, but stderr should contain + # our informative message instead of a raw traceback). + self.assertEqual(result.returncode, 1) + # Stderr should mention the missing backup (either "does not + # exist" from _restore_from_backup or "no backup found" from + # the rollback precheck). The key signal is an informative + # error message, not a raw Python traceback. + stderr_lower = result.stderr.lower() + self.assertTrue( + "does not exist" in stderr_lower or "no backup found" in stderr_lower, + f"Expected informative backup-missing error, got: {result.stderr!r}", + ) + # And it must NOT be a raw Python traceback. + self.assertNotIn("Traceback (most recent call last)", result.stderr) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file