diff --git a/CHANGELOG.md b/CHANGELOG.md index 1056a60..8257447 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,48 @@ Each entry maps a release to the task IDs that shipped in it. The single-file `perseus.py` runtime is the only required artifact; everything else (installer, docs) is generated by `scripts/release.sh`. +## [1.0.6] — UNRELEASED + +Critical security + correctness hotfix bundle. See GitHub milestone +[v1.0.6](https://github.com/tcconnally/perseus/milestone/1). + +### 🐛 Bug Fixes + +- **#128** — Mnēmē narratives written under pre-1.0.3 (which used an MD5 + hash for the per-workspace narrative filename) are no longer silently + orphaned on upgrade. v1.0.3+ uses SHA-256; previously, `_mneme_path()` + unconditionally returned the SHA-256 path and the MD5 file sat untouched + on disk while the user saw `> ⚠ No Mnēmē narrative found for this + workspace.`. + - `_mneme_path()` now performs a one-shot in-place rename: if the + SHA-256 path doesn't exist but the legacy MD5 path does, `os.replace` + is used to atomically move it. The narrative is preserved verbatim. + - If both paths exist (concurrent write race, or operator-staged files), + the current SHA-256 file wins and the legacy file is left untouched. + - New CLI: **`perseus memory doctor`** — read-only scan of the memory + store that classifies files as SHA-256, legacy MD5, orphan, or unknown. + Add `--migrate` to rename all legacy files in one pass; add `--json` + for machine-readable output. Idempotent. + - New helpers: `_workspace_hash_legacy_md5()`, `_mneme_doctor_scan()`, + `_mneme_doctor_migrate()` — all importable from `perseus.py` for + operators who need to script around them. + +### 🔒 Security (other v1.0.6 items, tracked in milestone) +- #136 — `long_hex_secret` redaction rule corrupted git hashes (PR #159) +- #137 — `@query` audit log leaked secrets (PR #160) +- #138, #139, #140, #141, #142 + +### 🐛 Bug Fixes (other v1.0.6 items) +- #129, #130, #131, #135 + +### 📦 Migration Notes +- **No manual action required for the MD5→SHA-256 migration.** It happens + automatically on first access. Operators with many workspaces can opt + to run `perseus memory doctor --migrate` once after upgrading to + surface and fix every workspace in one pass. + +--- + ## [1.0.5] — 2026-05-26 **Bastra-Recall — Persistent Memory Backend (superseded by Mnēmē v2 in 1.0.6):** diff --git a/perseus.py b/perseus.py index 0527f05..7ee5e81 100644 --- a/perseus.py +++ b/perseus.py @@ -2451,7 +2451,6 @@ def resolve_include(args_str: str, workspace: Path | None = None, cfg: dict | No return f"> ⚠ @include: could not read `{file_path_str}`: {e}" # ── File size limit check (byte-counted, not character-counted) ── - max_bytes = render_cfg.get("max_include_bytes") if max_bytes is not None and len(data) > max_bytes: raw = data[:max_bytes].decode(errors="replace").rstrip() actual_size = len(data) @@ -2577,7 +2576,6 @@ def fallback_result() -> str: return f"> ⚠ @read: could not read `{file_path_str}`: {e}" # ── File size limit check (byte-counted, not character-counted) ── - max_bytes = render_cfg.get("max_read_bytes") if max_bytes is not None and len(data) > max_bytes: content = data[:max_bytes].decode(errors="replace") trunc_note = ( @@ -8115,10 +8113,154 @@ def _workspace_hash(workspace: Path) -> str: return hashlib.sha256(str(canonical).encode()).hexdigest()[:12] +def _workspace_hash_legacy_md5(workspace: Path) -> str: + """12-char MD5 hex digest — the pre-1.0.3 narrative file name scheme. + + Regression for #128: prior to v1.0.3, Mnēmē derived narrative file names + from an MD5 hash. v1.0.3+ switched to SHA-256. Without an explicit + migration, every existing narrative file on disk was silently orphaned + on upgrade. ``_mneme_path`` calls this function as a one-shot fallback + to locate and rename legacy files. Once migrated, this code path is + never re-entered for that workspace. + + We intentionally use ``usedforsecurity=False`` (Py3.9+) so FIPS-mode + Pythons don't reject the call — this is a file-naming hash, not a + security primitive. We fall back to the no-kwarg call for older Pythons. + """ + canonical = str(workspace.expanduser().resolve()).encode() + try: + return hashlib.md5(canonical, usedforsecurity=False).hexdigest()[:12] + except TypeError: + # Python < 3.9: no `usedforsecurity` kwarg. + return hashlib.md5(canonical).hexdigest()[:12] + + def _mneme_path(workspace: Path, cfg: dict) -> Path: - """Return the per-workspace narrative file path.""" + """Return the per-workspace narrative file path. + + Regression for #128: if a SHA-256 path doesn't exist but a legacy MD5 + path does, transparently rename the legacy file in place. This makes + upgrades from pre-1.0.3 lossless. + + The rename uses ``os.replace`` (atomic on POSIX/NTFS) and is best-effort: + if rename fails (cross-device, permission, etc.), we leave both files in + place and return the SHA-256 path. The caller will then see "no + narrative yet" and recreate — non-fatal but loses prior content. + Operators can also run ``perseus memory doctor --migrate`` to surface + and act on these cases explicitly. + """ store = Path(cfg.get("memory", {}).get("store", str(PERSEUS_HOME / "memory"))) - return store / f"{_workspace_hash(workspace)}.md" + new_path = store / f"{_workspace_hash(workspace)}.md" + if new_path.exists(): + return new_path + legacy_path = store / f"{_workspace_hash_legacy_md5(workspace)}.md" + if legacy_path.exists() and legacy_path != new_path: + try: + store.mkdir(parents=True, exist_ok=True) + os.replace(legacy_path, new_path) + except OSError: + # Cross-device / permission denied. Leave the legacy file in + # place so the operator can recover it manually; the caller will + # create a fresh narrative at the new path. + pass + return new_path + + +def _mneme_doctor_scan(cfg: dict) -> dict: + """Scan the memory store and report on narrative file inventory. + + Returns a dict with: + { + "store": str, # path to memory store + "narrative_files": [path, ...], # all *.md in store + "legacy_md5_files": [path, ...], # files whose name matches legacy MD5 of a known workspace + "sha256_files": [path, ...], # files that look like current-scheme files + "orphan_files": [path, ...], # files whose embedded `workspace` frontmatter no longer resolves to their filename + "unknown_files": [path, ...], # files whose stem isn't a 12-char hex hash + } + + "Known workspace" inference: we re-derive the SHA-256 and legacy MD5 + hashes from each file's ``workspace:`` frontmatter field, then match + against the actual filename stem. + + Used by ``perseus memory doctor`` to surface migration candidates. + """ + store = Path(cfg.get("memory", {}).get("store", str(PERSEUS_HOME / "memory"))) + out: dict = { + "store": str(store), + "narrative_files": [], + "legacy_md5_files": [], + "sha256_files": [], + "orphan_files": [], + "unknown_files": [], + } + if not store.exists(): + return out + hex_re = re.compile(r"^[a-f0-9]{12}$") + for fp in sorted(store.glob("*.md")): + out["narrative_files"].append(str(fp)) + stem = fp.stem + if not hex_re.match(stem): + out["unknown_files"].append(str(fp)) + continue + # Try to read the workspace from frontmatter and classify. + try: + fm, _ = _load_narrative(fp) + except Exception: + out["unknown_files"].append(str(fp)) + continue + ws_raw = str(fm.get("workspace", "")).strip() if isinstance(fm, dict) else "" + if not ws_raw: + # No workspace metadata — can't classify; treat as unknown. + out["unknown_files"].append(str(fp)) + continue + try: + ws = Path(ws_raw).expanduser() + expected_sha = _workspace_hash(ws) + expected_md5 = _workspace_hash_legacy_md5(ws) + except Exception: + out["unknown_files"].append(str(fp)) + continue + if stem == expected_sha: + out["sha256_files"].append(str(fp)) + elif stem == expected_md5: + out["legacy_md5_files"].append(str(fp)) + else: + out["orphan_files"].append(str(fp)) + return out + + +def _mneme_doctor_migrate(cfg: dict) -> dict: + """Rename legacy MD5-named narrative files to their SHA-256 names. + + Returns a dict: + { + "migrated": [(old, new), ...], + "skipped": [(old, new, reason), ...], + "errors": [(old, exc_str), ...], + } + + Idempotent: re-running after a successful migration is a no-op. + """ + report: dict = {"migrated": [], "skipped": [], "errors": []} + scan = _mneme_doctor_scan(cfg) + store = Path(scan["store"]) + for legacy_fp_str in scan["legacy_md5_files"]: + legacy_fp = Path(legacy_fp_str) + try: + fm, _ = _load_narrative(legacy_fp) + ws = Path(str(fm.get("workspace", "")).strip()).expanduser() + new_fp = store / f"{_workspace_hash(ws)}.md" + if new_fp.exists(): + report["skipped"].append( + (str(legacy_fp), str(new_fp), "destination already exists") + ) + continue + os.replace(legacy_fp, new_fp) + report["migrated"].append((str(legacy_fp), str(new_fp))) + except Exception as exc: # pragma: no cover - defensive + report["errors"].append((str(legacy_fp), str(exc))) + return report def _load_narrative(path: Path) -> tuple[dict, str]: @@ -9236,9 +9378,80 @@ def cmd_memory(args, cfg): _cmd_memory_index(args, cfg) return + if sub == "doctor": + cmd_memory_doctor(args, cfg) + return + print(f"perseus memory: unknown subcommand '{sub}'.", file=sys.stderr) sys.exit(2) + +def cmd_memory_doctor(args, cfg) -> None: + """Mnēmē doctor — scan and optionally migrate legacy MD5-named narratives. + + Regression for #128: pre-1.0.3 narratives are named after an MD5 hash of + the workspace path; v1.0.3+ uses SHA-256. _mneme_path() auto-migrates on + first access, but that requires the operator to actually open the + workspace. ``memory doctor`` lets an operator scan and migrate all + workspaces at once, and surface diagnostic info for files that can't be + auto-migrated (e.g. missing frontmatter, cross-device renames). + """ + do_migrate = bool(getattr(args, "migrate", False)) + use_json = bool(getattr(args, "json", False)) + scan = _mneme_doctor_scan(cfg) + + if do_migrate: + result = _mneme_doctor_migrate(cfg) + if use_json: + import json as _json + print(_json.dumps({"scan_before": scan, "migrate": result}, indent=2)) + return + print(f"Mnēmē doctor — store: {scan['store']}") + print(f" Narrative files: {len(scan['narrative_files'])}") + print(f" Legacy MD5 found: {len(scan['legacy_md5_files'])}") + print(f" Migrated: {len(result['migrated'])}") + for old, new in result["migrated"]: + print(f" ✓ {Path(old).name} → {Path(new).name}") + if result["skipped"]: + print(f" Skipped: {len(result['skipped'])}") + for old, new, reason in result["skipped"]: + print(f" ⚠ {Path(old).name}: {reason}") + if result["errors"]: + print(f" Errors: {len(result['errors'])}") + for old, exc_str in result["errors"]: + print(f" ✗ {Path(old).name}: {exc_str}") + return + + # Read-only scan + if use_json: + import json as _json + print(_json.dumps(scan, indent=2)) + return + print(f"Mnēmē doctor — store: {scan['store']}") + print(f" Narrative files: {len(scan['narrative_files'])}") + print(f" SHA-256 (current):{len(scan['sha256_files'])}") + print(f" Legacy MD5: {len(scan['legacy_md5_files'])}") + print(f" Orphan: {len(scan['orphan_files'])}") + print(f" Unknown stems: {len(scan['unknown_files'])}") + if scan["legacy_md5_files"]: + print() + print("Legacy MD5-named narratives detected. Run:") + print(" perseus memory doctor --migrate") + print("to rename them to their SHA-256 paths in place. Operation is") + print("idempotent and uses atomic os.replace.") + if scan["orphan_files"]: + print() + print("⚠ Orphan files (frontmatter workspace doesn't match filename):") + for fp in scan["orphan_files"]: + print(f" - {fp}") + print("These were likely written under a different store, OR the") + print("workspace path moved. Review manually before deleting.") + if scan["unknown_files"]: + print() + print("Files with non-standard names (skipped by Mnēmē):") + for fp in scan["unknown_files"]: + print(f" - {fp}") + def _memory_federation_diagnostic(name: str, args_str: str, cfg: dict, workspace: object) -> list[dict]: """Per-directive LSP diagnostic for @memory: warn on unsubscribed federation alias. @@ -14569,6 +14782,16 @@ def main(): p_fed_pull = fed_sub.add_parser("pull", help="Re-read all subscribed narratives (read-only, manual)") p_fed_pull.add_argument("--json", action="store_true", help="Machine-readable JSON output") + # memory doctor (#128 — legacy MD5 → SHA-256 narrative migration) + p_mem_doc = mem_sub.add_parser( + "doctor", + help="Scan/repair the Mnēmē memory store (legacy MD5 → SHA-256 narrative migration)", + ) + p_mem_doc.add_argument("--migrate", action="store_true", + help="Rename legacy MD5-named narratives to their SHA-256 paths (atomic, idempotent)") + p_mem_doc.add_argument("--json", action="store_true", + help="Machine-readable JSON output") + # memory index (Mnēmē v2) p_mem_idx = mem_sub.add_parser("index", help="Manage the FTS5 search index") idx_sub = p_mem_idx.add_subparsers(dest="index_command", required=True) diff --git a/src/perseus/agora.py b/src/perseus/agora.py index 349ffff..26223b4 100644 --- a/src/perseus/agora.py +++ b/src/perseus/agora.py @@ -269,9 +269,80 @@ def cmd_memory(args, cfg): _cmd_memory_index(args, cfg) return + if sub == "doctor": + cmd_memory_doctor(args, cfg) + return + print(f"perseus memory: unknown subcommand '{sub}'.", file=sys.stderr) sys.exit(2) + +def cmd_memory_doctor(args, cfg) -> None: + """Mnēmē doctor — scan and optionally migrate legacy MD5-named narratives. + + Regression for #128: pre-1.0.3 narratives are named after an MD5 hash of + the workspace path; v1.0.3+ uses SHA-256. _mneme_path() auto-migrates on + first access, but that requires the operator to actually open the + workspace. ``memory doctor`` lets an operator scan and migrate all + workspaces at once, and surface diagnostic info for files that can't be + auto-migrated (e.g. missing frontmatter, cross-device renames). + """ + do_migrate = bool(getattr(args, "migrate", False)) + use_json = bool(getattr(args, "json", False)) + scan = _mneme_doctor_scan(cfg) + + if do_migrate: + result = _mneme_doctor_migrate(cfg) + if use_json: + import json as _json + print(_json.dumps({"scan_before": scan, "migrate": result}, indent=2)) + return + print(f"Mnēmē doctor — store: {scan['store']}") + print(f" Narrative files: {len(scan['narrative_files'])}") + print(f" Legacy MD5 found: {len(scan['legacy_md5_files'])}") + print(f" Migrated: {len(result['migrated'])}") + for old, new in result["migrated"]: + print(f" ✓ {Path(old).name} → {Path(new).name}") + if result["skipped"]: + print(f" Skipped: {len(result['skipped'])}") + for old, new, reason in result["skipped"]: + print(f" ⚠ {Path(old).name}: {reason}") + if result["errors"]: + print(f" Errors: {len(result['errors'])}") + for old, exc_str in result["errors"]: + print(f" ✗ {Path(old).name}: {exc_str}") + return + + # Read-only scan + if use_json: + import json as _json + print(_json.dumps(scan, indent=2)) + return + print(f"Mnēmē doctor — store: {scan['store']}") + print(f" Narrative files: {len(scan['narrative_files'])}") + print(f" SHA-256 (current):{len(scan['sha256_files'])}") + print(f" Legacy MD5: {len(scan['legacy_md5_files'])}") + print(f" Orphan: {len(scan['orphan_files'])}") + print(f" Unknown stems: {len(scan['unknown_files'])}") + if scan["legacy_md5_files"]: + print() + print("Legacy MD5-named narratives detected. Run:") + print(" perseus memory doctor --migrate") + print("to rename them to their SHA-256 paths in place. Operation is") + print("idempotent and uses atomic os.replace.") + if scan["orphan_files"]: + print() + print("⚠ Orphan files (frontmatter workspace doesn't match filename):") + for fp in scan["orphan_files"]: + print(f" - {fp}") + print("These were likely written under a different store, OR the") + print("workspace path moved. Review manually before deleting.") + if scan["unknown_files"]: + print() + print("Files with non-standard names (skipped by Mnēmē):") + for fp in scan["unknown_files"]: + print(f" - {fp}") + def _memory_federation_diagnostic(name: str, args_str: str, cfg: dict, workspace: object) -> list[dict]: """Per-directive LSP diagnostic for @memory: warn on unsubscribed federation alias. diff --git a/src/perseus/cli.py b/src/perseus/cli.py index eb620da..881a4e1 100644 --- a/src/perseus/cli.py +++ b/src/perseus/cli.py @@ -195,6 +195,16 @@ def main(): p_fed_pull = fed_sub.add_parser("pull", help="Re-read all subscribed narratives (read-only, manual)") p_fed_pull.add_argument("--json", action="store_true", help="Machine-readable JSON output") + # memory doctor (#128 — legacy MD5 → SHA-256 narrative migration) + p_mem_doc = mem_sub.add_parser( + "doctor", + help="Scan/repair the Mnēmē memory store (legacy MD5 → SHA-256 narrative migration)", + ) + p_mem_doc.add_argument("--migrate", action="store_true", + help="Rename legacy MD5-named narratives to their SHA-256 paths (atomic, idempotent)") + p_mem_doc.add_argument("--json", action="store_true", + help="Machine-readable JSON output") + # memory index (Mnēmē v2) p_mem_idx = mem_sub.add_parser("index", help="Manage the FTS5 search index") idx_sub = p_mem_idx.add_subparsers(dest="index_command", required=True) diff --git a/src/perseus/mneme_narrative.py b/src/perseus/mneme_narrative.py index 82836fe..384ed9a 100644 --- a/src/perseus/mneme_narrative.py +++ b/src/perseus/mneme_narrative.py @@ -37,10 +37,154 @@ def _workspace_hash(workspace: Path) -> str: return hashlib.sha256(str(canonical).encode()).hexdigest()[:12] +def _workspace_hash_legacy_md5(workspace: Path) -> str: + """12-char MD5 hex digest — the pre-1.0.3 narrative file name scheme. + + Regression for #128: prior to v1.0.3, Mnēmē derived narrative file names + from an MD5 hash. v1.0.3+ switched to SHA-256. Without an explicit + migration, every existing narrative file on disk was silently orphaned + on upgrade. ``_mneme_path`` calls this function as a one-shot fallback + to locate and rename legacy files. Once migrated, this code path is + never re-entered for that workspace. + + We intentionally use ``usedforsecurity=False`` (Py3.9+) so FIPS-mode + Pythons don't reject the call — this is a file-naming hash, not a + security primitive. We fall back to the no-kwarg call for older Pythons. + """ + canonical = str(workspace.expanduser().resolve()).encode() + try: + return hashlib.md5(canonical, usedforsecurity=False).hexdigest()[:12] + except TypeError: + # Python < 3.9: no `usedforsecurity` kwarg. + return hashlib.md5(canonical).hexdigest()[:12] + + def _mneme_path(workspace: Path, cfg: dict) -> Path: - """Return the per-workspace narrative file path.""" + """Return the per-workspace narrative file path. + + Regression for #128: if a SHA-256 path doesn't exist but a legacy MD5 + path does, transparently rename the legacy file in place. This makes + upgrades from pre-1.0.3 lossless. + + The rename uses ``os.replace`` (atomic on POSIX/NTFS) and is best-effort: + if rename fails (cross-device, permission, etc.), we leave both files in + place and return the SHA-256 path. The caller will then see "no + narrative yet" and recreate — non-fatal but loses prior content. + Operators can also run ``perseus memory doctor --migrate`` to surface + and act on these cases explicitly. + """ + store = Path(cfg.get("memory", {}).get("store", str(PERSEUS_HOME / "memory"))) + new_path = store / f"{_workspace_hash(workspace)}.md" + if new_path.exists(): + return new_path + legacy_path = store / f"{_workspace_hash_legacy_md5(workspace)}.md" + if legacy_path.exists() and legacy_path != new_path: + try: + store.mkdir(parents=True, exist_ok=True) + os.replace(legacy_path, new_path) + except OSError: + # Cross-device / permission denied. Leave the legacy file in + # place so the operator can recover it manually; the caller will + # create a fresh narrative at the new path. + pass + return new_path + + +def _mneme_doctor_scan(cfg: dict) -> dict: + """Scan the memory store and report on narrative file inventory. + + Returns a dict with: + { + "store": str, # path to memory store + "narrative_files": [path, ...], # all *.md in store + "legacy_md5_files": [path, ...], # files whose name matches legacy MD5 of a known workspace + "sha256_files": [path, ...], # files that look like current-scheme files + "orphan_files": [path, ...], # files whose embedded `workspace` frontmatter no longer resolves to their filename + "unknown_files": [path, ...], # files whose stem isn't a 12-char hex hash + } + + "Known workspace" inference: we re-derive the SHA-256 and legacy MD5 + hashes from each file's ``workspace:`` frontmatter field, then match + against the actual filename stem. + + Used by ``perseus memory doctor`` to surface migration candidates. + """ store = Path(cfg.get("memory", {}).get("store", str(PERSEUS_HOME / "memory"))) - return store / f"{_workspace_hash(workspace)}.md" + out: dict = { + "store": str(store), + "narrative_files": [], + "legacy_md5_files": [], + "sha256_files": [], + "orphan_files": [], + "unknown_files": [], + } + if not store.exists(): + return out + hex_re = re.compile(r"^[a-f0-9]{12}$") + for fp in sorted(store.glob("*.md")): + out["narrative_files"].append(str(fp)) + stem = fp.stem + if not hex_re.match(stem): + out["unknown_files"].append(str(fp)) + continue + # Try to read the workspace from frontmatter and classify. + try: + fm, _ = _load_narrative(fp) + except Exception: + out["unknown_files"].append(str(fp)) + continue + ws_raw = str(fm.get("workspace", "")).strip() if isinstance(fm, dict) else "" + if not ws_raw: + # No workspace metadata — can't classify; treat as unknown. + out["unknown_files"].append(str(fp)) + continue + try: + ws = Path(ws_raw).expanduser() + expected_sha = _workspace_hash(ws) + expected_md5 = _workspace_hash_legacy_md5(ws) + except Exception: + out["unknown_files"].append(str(fp)) + continue + if stem == expected_sha: + out["sha256_files"].append(str(fp)) + elif stem == expected_md5: + out["legacy_md5_files"].append(str(fp)) + else: + out["orphan_files"].append(str(fp)) + return out + + +def _mneme_doctor_migrate(cfg: dict) -> dict: + """Rename legacy MD5-named narrative files to their SHA-256 names. + + Returns a dict: + { + "migrated": [(old, new), ...], + "skipped": [(old, new, reason), ...], + "errors": [(old, exc_str), ...], + } + + Idempotent: re-running after a successful migration is a no-op. + """ + report: dict = {"migrated": [], "skipped": [], "errors": []} + scan = _mneme_doctor_scan(cfg) + store = Path(scan["store"]) + for legacy_fp_str in scan["legacy_md5_files"]: + legacy_fp = Path(legacy_fp_str) + try: + fm, _ = _load_narrative(legacy_fp) + ws = Path(str(fm.get("workspace", "")).strip()).expanduser() + new_fp = store / f"{_workspace_hash(ws)}.md" + if new_fp.exists(): + report["skipped"].append( + (str(legacy_fp), str(new_fp), "destination already exists") + ) + continue + os.replace(legacy_fp, new_fp) + report["migrated"].append((str(legacy_fp), str(new_fp))) + except Exception as exc: # pragma: no cover - defensive + report["errors"].append((str(legacy_fp), str(exc))) + return report def _load_narrative(path: Path) -> tuple[dict, str]: diff --git a/tests/test_mneme.py b/tests/test_mneme.py index 7c0b5dc..f6e6a99 100644 --- a/tests/test_mneme.py +++ b/tests/test_mneme.py @@ -185,3 +185,176 @@ def fake_mneme(cfg_, query, k=5, scope=None, type_filter=None): perseus.resolve_memory('mode=search query="x"', cfg(), workspace=tmp_path) assert called + + +# --------------------------------------------------------------------------- +# #128 regression: MD5 → SHA-256 narrative migration +# --------------------------------------------------------------------------- + + +def _legacy_md5_name(workspace: Path) -> str: + """Reproduce the pre-1.0.3 hash exactly for fixture setup.""" + import hashlib as _h + canonical = str(workspace.expanduser().resolve()).encode() + try: + return _h.md5(canonical, usedforsecurity=False).hexdigest()[:12] + except TypeError: + return _h.md5(canonical).hexdigest()[:12] + + +def test_mneme_path_auto_migrates_legacy_md5_file(tmp_path): + """Regression for #128 — opening a workspace with only a legacy MD5 + narrative on disk renames it transparently to the SHA-256 path. + + Without this fix, every pre-1.0.3 user lost their narrative silently + on the v1.0.3 upgrade (the SHA-256 path didn't exist; Mnēmē reported + "No narrative yet" and started over, leaving the MD5 file orphaned). + """ + store = tmp_path / "store" + store.mkdir() + workspace = tmp_path / "ws" + workspace.mkdir() + cfg_ = {"memory": {"store": str(store)}} + + legacy_name = _legacy_md5_name(workspace) + legacy_fp = store / f"{legacy_name}.md" + legacy_fp.write_text( + f"---\nworkspace: {workspace}\nchecksum: legacy-md5\n---\n\n" + "## Project Arc\n\nLegacy content from v1.0.2.\n", + encoding="utf-8", + ) + + # First call should migrate. + new_fp = perseus._mneme_path(workspace, cfg_) + assert new_fp.exists(), "SHA-256 path must exist after migration" + assert not legacy_fp.exists(), "Legacy MD5 file must be renamed away" + body = new_fp.read_text(encoding="utf-8") + assert "Legacy content from v1.0.2." in body, ( + "Migration must preserve narrative content verbatim" + ) + + +def test_mneme_path_no_migration_when_sha256_already_exists(tmp_path): + """If both files exist, prefer SHA-256 and leave the legacy file alone. + + This protects against double-migration races and ensures we never + accidentally overwrite a current-scheme narrative. + """ + store = tmp_path / "store" + store.mkdir() + workspace = tmp_path / "ws" + workspace.mkdir() + cfg_ = {"memory": {"store": str(store)}} + + legacy_name = _legacy_md5_name(workspace) + legacy_fp = store / f"{legacy_name}.md" + legacy_fp.write_text("legacy\n", encoding="utf-8") + + sha_name = perseus._workspace_hash(workspace) + sha_fp = store / f"{sha_name}.md" + sha_fp.write_text("current\n", encoding="utf-8") + + result = perseus._mneme_path(workspace, cfg_) + assert result == sha_fp + assert sha_fp.read_text() == "current\n", "Current file must be untouched" + assert legacy_fp.exists(), "Legacy file must NOT be removed in this case" + + +def test_mneme_path_is_idempotent_after_migration(tmp_path): + """Calling _mneme_path twice in a row after a migration is a no-op.""" + store = tmp_path / "store" + store.mkdir() + workspace = tmp_path / "ws" + workspace.mkdir() + cfg_ = {"memory": {"store": str(store)}} + + legacy_fp = store / f"{_legacy_md5_name(workspace)}.md" + legacy_fp.write_text(f"---\nworkspace: {workspace}\n---\n\ndata\n", encoding="utf-8") + + p1 = perseus._mneme_path(workspace, cfg_) + p2 = perseus._mneme_path(workspace, cfg_) + assert p1 == p2 + assert p1.exists() + assert p1.read_text(encoding="utf-8").endswith("data\n") + + +def test_memory_doctor_scan_classifies_files(tmp_path): + """`memory doctor` (scan-only mode) correctly classifies the store.""" + store = tmp_path / "store" + store.mkdir() + cfg_ = {"memory": {"store": str(store)}} + + ws1 = tmp_path / "ws1"; ws1.mkdir() + ws2 = tmp_path / "ws2"; ws2.mkdir() + + # ws1 has a SHA-256 narrative; ws2 has a legacy MD5 narrative. + (store / f"{perseus._workspace_hash(ws1)}.md").write_text( + f"---\nworkspace: {ws1}\n---\n\nsha file\n", encoding="utf-8" + ) + (store / f"{_legacy_md5_name(ws2)}.md").write_text( + f"---\nworkspace: {ws2}\n---\n\nmd5 file\n", encoding="utf-8" + ) + # A pre-Mnēmē README that should be classified as "unknown stem". + (store / "README.md").write_text("# notes\n", encoding="utf-8") + + scan = perseus._mneme_doctor_scan(cfg_) + assert len(scan["narrative_files"]) == 3 + assert len(scan["sha256_files"]) == 1 + assert len(scan["legacy_md5_files"]) == 1 + assert len(scan["unknown_files"]) == 1 + assert scan["sha256_files"][0].endswith(f"{perseus._workspace_hash(ws1)}.md") + assert scan["legacy_md5_files"][0].endswith(f"{_legacy_md5_name(ws2)}.md") + + +def test_memory_doctor_migrate_renames_legacy_files(tmp_path): + """`memory doctor --migrate` renames every legacy MD5 file in the store.""" + store = tmp_path / "store" + store.mkdir() + cfg_ = {"memory": {"store": str(store)}} + + wsA = tmp_path / "wsA"; wsA.mkdir() + wsB = tmp_path / "wsB"; wsB.mkdir() + legacyA = store / f"{_legacy_md5_name(wsA)}.md" + legacyB = store / f"{_legacy_md5_name(wsB)}.md" + legacyA.write_text(f"---\nworkspace: {wsA}\n---\n\nA content\n", encoding="utf-8") + legacyB.write_text(f"---\nworkspace: {wsB}\n---\n\nB content\n", encoding="utf-8") + + result = perseus._mneme_doctor_migrate(cfg_) + assert len(result["migrated"]) == 2 + assert not legacyA.exists() + assert not legacyB.exists() + + new_A = store / f"{perseus._workspace_hash(wsA)}.md" + new_B = store / f"{perseus._workspace_hash(wsB)}.md" + assert new_A.exists() and new_A.read_text().endswith("A content\n") + assert new_B.exists() and new_B.read_text().endswith("B content\n") + + # Idempotent: re-running is a no-op. + second = perseus._mneme_doctor_migrate(cfg_) + assert second == {"migrated": [], "skipped": [], "errors": []} + + +def test_memory_doctor_migrate_skips_when_destination_exists(tmp_path): + """If a SHA-256 file is already there, --migrate skips the legacy file.""" + store = tmp_path / "store" + store.mkdir() + cfg_ = {"memory": {"store": str(store)}} + + workspace = tmp_path / "ws" + workspace.mkdir() + legacy_fp = store / f"{_legacy_md5_name(workspace)}.md" + legacy_fp.write_text(f"---\nworkspace: {workspace}\n---\n\nlegacy\n", + encoding="utf-8") + sha_fp = store / f"{perseus._workspace_hash(workspace)}.md" + sha_fp.write_text(f"---\nworkspace: {workspace}\n---\n\ncurrent\n", + encoding="utf-8") + + result = perseus._mneme_doctor_migrate(cfg_) + assert result["migrated"] == [] + assert len(result["skipped"]) == 1 + old, new, reason = result["skipped"][0] + assert "exists" in reason + # Both files still present. + assert legacy_fp.exists() + assert sha_fp.exists() + assert sha_fp.read_text().endswith("current\n")