diff --git a/CHANGELOG.md b/CHANGELOG.md index 1056a60..00ecaff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,54 @@ Each entry maps a release to the task IDs that shipped in it. The single-file `perseus.py` runtime is the only required artifact; everything else (installer, docs) is generated by `scripts/release.sh`. +## [1.0.6] — UNRELEASED + +Critical security + correctness hotfix bundle. See GitHub milestone +[v1.0.6](https://github.com/tcconnally/perseus/milestone/1). + +### 🐛 Bug Fixes + +- **#131** — `perseus memory compact` no longer hangs indefinitely when an + LLM provider (e.g. Ollama with a large model) is slow. Pre-1.0.6, + `_mneme_compact_llm()` called `run_llm()` which only enforced + `llm.timeout_s` (default 30s) on the HTTP request itself. With streaming + token providers, individual tokens can arrive within timeout but total + wall time was unbounded — operators reported `memory compact` hanging + for hours. + - `_memory_do_compact()` now wraps the LLM call in a wall-clock deadline + via `ThreadPoolExecutor.future.result(timeout=…)`. + - New config knob `memory.compact_total_timeout_s` (default 180s). + Set to 0 for pre-1.0.6 behavior (unbounded; not recommended). + - On timeout, `_memory_do_compact` falls back to the deterministic + narrative builder and writes a clear stderr message: + `> ⚠ Mnēmē compact: LLM provider 'ollama' exceeded + compact_total_timeout_s=180s; falling back to deterministic narrative.` + - New audit event `memory_compact_timeout` records provider, timeout + value, and workspace hash for observability. + - Same fallback path engages on any LLM exception (provider unreachable, + payload error) — `memory compact` always produces a usable narrative. + - **Limitation:** ThreadPoolExecutor cannot truly kill the worker + thread; the in-flight HTTP request continues until urllib's + per-request timeout fires. Worst-case wait is therefore + `compact_total_timeout_s + llm.timeout_s`. The leaked thread is + daemonized and will not block process exit. + +### 🔒 Security (other v1.0.6 items, tracked in milestone) +- #136 — `long_hex_secret` redaction rule corrupted git hashes (PR #159) +- #137 — `@query` audit log leaked secrets (PR #160) +- #138, #139, #140, #141, #142 + +### 🐛 Bug Fixes (other v1.0.6 items) +- #128 — Mnēmē narrative MD5→SHA-256 migration (PR #161) +- #129, #130, #135 + +### 📦 Migration Notes +- New default `memory.compact_total_timeout_s: 180` is strictly safer + than pre-1.0.6 behavior. Users who want the old (unbounded) behavior + can set it to 0. + +--- + ## [1.0.5] — 2026-05-26 **Bastra-Recall — Persistent Memory Backend (superseded by Mnēmē v2 in 1.0.6):** diff --git a/perseus.py b/perseus.py index 0527f05..b25f62c 100644 --- a/perseus.py +++ b/perseus.py @@ -156,6 +156,12 @@ "recent_keep": 5, # raw checkpoints to include in Recent Activity "auto_update": True, # update narrative on every checkpoint write "compact_threshold": 20, # advisory: compact after this many incremental updates + # #131: wall-clock deadline for `perseus memory compact` LLM path. + # 0 = no deadline (pre-1.0.6 behavior — can hang indefinitely on + # slow models). Default 180s (3 min) covers Ollama mistral on a + # modern laptop for typical workspace sizes. On timeout the LLM + # call is abandoned and the deterministic narrative is used. + "compact_total_timeout_s": 180, "llm_provider": None, # None = deterministic; "ollama" / "openai-compat" enables LLM "llm_model": None, # inherits from llm: block if None "max_narrative_lines": 300, # warn (not error) if narrative grows beyond this @@ -2451,7 +2457,6 @@ def resolve_include(args_str: str, workspace: Path | None = None, cfg: dict | No return f"> ⚠ @include: could not read `{file_path_str}`: {e}" # ── File size limit check (byte-counted, not character-counted) ── - max_bytes = render_cfg.get("max_include_bytes") if max_bytes is not None and len(data) > max_bytes: raw = data[:max_bytes].decode(errors="replace").rstrip() actual_size = len(data) @@ -2577,7 +2582,6 @@ def fallback_result() -> str: return f"> ⚠ @read: could not read `{file_path_str}`: {e}" # ── File size limit check (byte-counted, not character-counted) ── - max_bytes = render_cfg.get("max_read_bytes") if max_bytes is not None and len(data) > max_bytes: content = data[:max_bytes].decode(errors="replace") trunc_note = ( @@ -9057,7 +9061,72 @@ def _memory_do_compact(workspace: Path, cfg: dict, provider: str | None) -> str: fm = _mneme_default_frontmatter(workspace) if provider: - new_body = _mneme_compact_llm(all_checkpoints, all_pythia, workspace, cfg, provider) + # Regression for #131 — pre-1.0.6, _mneme_compact_llm() called run_llm() + # which only enforced `llm.timeout_s` (default 30s) on the HTTP request + # itself. With streaming-token providers like Ollama serving a large + # model, individual tokens can arrive within timeout but total wall + # time was unbounded — operators reported `memory compact` hanging + # for hours. + # + # We now wrap the LLM call in a wall-clock deadline (memory. + # compact_total_timeout_s, default 180s). On timeout we abandon the + # LLM future and fall back to deterministic narrative — operators get + # SOME narrative, plus a clear stderr signal so they can decide + # whether to upgrade their LLM setup or stay deterministic. + # + # Limitation: ThreadPoolExecutor cannot truly kill the worker thread + # (Python provides no public API for that). The in-flight HTTP + # request continues until urllib's per-request timeout fires. + # Worst-case observed total wait is therefore + # `compact_total_timeout_s + llm.timeout_s`. The leaked thread is + # daemonized by Python's default ThreadPoolExecutor settings; it + # will not prevent process exit. + total_timeout = float(cfg.get("memory", {}).get( + "compact_total_timeout_s", 180.0 + )) + try: + import concurrent.futures as _cf + executor = _cf.ThreadPoolExecutor( + max_workers=1, thread_name_prefix="mneme-compact-llm", + ) + try: + fut = executor.submit( + _mneme_compact_llm, + all_checkpoints, all_pythia, workspace, cfg, provider, + ) + new_body = fut.result(timeout=total_timeout) + finally: + # Don't block on the worker — it may still be waiting on + # urllib. The thread is daemonic and will not block exit. + executor.shutdown(wait=False, cancel_futures=True) + except _cf.TimeoutError: + sys.stderr.write( + f"> ⚠ Mnēmē compact: LLM provider {provider!r} exceeded " + f"compact_total_timeout_s={total_timeout:.0f}s; " + f"falling back to deterministic narrative.\n" + ) + try: + audit_event( + cfg, "memory_compact_timeout", + provider=provider, + total_timeout_s=total_timeout, + workspace_hash=_workspace_hash(workspace), + ) + except Exception: + pass + new_body = _deterministic_narrative( + all_checkpoints, all_pythia, "", workspace, cfg, + ) + except Exception as exc: + # LLM call raised (model server unreachable, payload error, etc.) + # — surface the failure but still produce SOMETHING usable. + sys.stderr.write( + f"> ⚠ Mnēmē compact: LLM provider {provider!r} failed " + f"({exc}); falling back to deterministic narrative.\n" + ) + new_body = _deterministic_narrative( + all_checkpoints, all_pythia, "", workspace, cfg, + ) else: new_body = _deterministic_narrative(all_checkpoints, all_pythia, "", workspace, cfg) diff --git a/src/perseus/agora.py b/src/perseus/agora.py index 349ffff..cf7638a 100644 --- a/src/perseus/agora.py +++ b/src/perseus/agora.py @@ -90,7 +90,72 @@ def _memory_do_compact(workspace: Path, cfg: dict, provider: str | None) -> str: fm = _mneme_default_frontmatter(workspace) if provider: - new_body = _mneme_compact_llm(all_checkpoints, all_pythia, workspace, cfg, provider) + # Regression for #131 — pre-1.0.6, _mneme_compact_llm() called run_llm() + # which only enforced `llm.timeout_s` (default 30s) on the HTTP request + # itself. With streaming-token providers like Ollama serving a large + # model, individual tokens can arrive within timeout but total wall + # time was unbounded — operators reported `memory compact` hanging + # for hours. + # + # We now wrap the LLM call in a wall-clock deadline (memory. + # compact_total_timeout_s, default 180s). On timeout we abandon the + # LLM future and fall back to deterministic narrative — operators get + # SOME narrative, plus a clear stderr signal so they can decide + # whether to upgrade their LLM setup or stay deterministic. + # + # Limitation: ThreadPoolExecutor cannot truly kill the worker thread + # (Python provides no public API for that). The in-flight HTTP + # request continues until urllib's per-request timeout fires. + # Worst-case observed total wait is therefore + # `compact_total_timeout_s + llm.timeout_s`. The leaked thread is + # daemonized by Python's default ThreadPoolExecutor settings; it + # will not prevent process exit. + total_timeout = float(cfg.get("memory", {}).get( + "compact_total_timeout_s", 180.0 + )) + try: + import concurrent.futures as _cf + executor = _cf.ThreadPoolExecutor( + max_workers=1, thread_name_prefix="mneme-compact-llm", + ) + try: + fut = executor.submit( + _mneme_compact_llm, + all_checkpoints, all_pythia, workspace, cfg, provider, + ) + new_body = fut.result(timeout=total_timeout) + finally: + # Don't block on the worker — it may still be waiting on + # urllib. The thread is daemonic and will not block exit. + executor.shutdown(wait=False, cancel_futures=True) + except _cf.TimeoutError: + sys.stderr.write( + f"> ⚠ Mnēmē compact: LLM provider {provider!r} exceeded " + f"compact_total_timeout_s={total_timeout:.0f}s; " + f"falling back to deterministic narrative.\n" + ) + try: + audit_event( + cfg, "memory_compact_timeout", + provider=provider, + total_timeout_s=total_timeout, + workspace_hash=_workspace_hash(workspace), + ) + except Exception: + pass + new_body = _deterministic_narrative( + all_checkpoints, all_pythia, "", workspace, cfg, + ) + except Exception as exc: + # LLM call raised (model server unreachable, payload error, etc.) + # — surface the failure but still produce SOMETHING usable. + sys.stderr.write( + f"> ⚠ Mnēmē compact: LLM provider {provider!r} failed " + f"({exc}); falling back to deterministic narrative.\n" + ) + new_body = _deterministic_narrative( + all_checkpoints, all_pythia, "", workspace, cfg, + ) else: new_body = _deterministic_narrative(all_checkpoints, all_pythia, "", workspace, cfg) diff --git a/src/perseus/config.py b/src/perseus/config.py index d417214..a998ef0 100644 --- a/src/perseus/config.py +++ b/src/perseus/config.py @@ -93,6 +93,12 @@ "recent_keep": 5, # raw checkpoints to include in Recent Activity "auto_update": True, # update narrative on every checkpoint write "compact_threshold": 20, # advisory: compact after this many incremental updates + # #131: wall-clock deadline for `perseus memory compact` LLM path. + # 0 = no deadline (pre-1.0.6 behavior — can hang indefinitely on + # slow models). Default 180s (3 min) covers Ollama mistral on a + # modern laptop for typical workspace sizes. On timeout the LLM + # call is abandoned and the deterministic narrative is used. + "compact_total_timeout_s": 180, "llm_provider": None, # None = deterministic; "ollama" / "openai-compat" enables LLM "llm_model": None, # inherits from llm: block if None "max_narrative_lines": 300, # warn (not error) if narrative grows beyond this diff --git a/tests/test_memory.py b/tests/test_memory.py index 0947af1..515da57 100644 --- a/tests/test_memory.py +++ b/tests/test_memory.py @@ -360,3 +360,109 @@ def test_memory_status_json_with_narrative(tmp_path, monkeypatch): "pythia_entries_processed", "pythia_entries_pending", "compaction_count", "line_count", "mode", "frontmatter"): assert key in out, f"Missing key: {key}" + + +# ───────────────────────────────────────────────────────────────────────────── +# #131 regression: memory compact must enforce a wall-clock deadline +# ───────────────────────────────────────────────────────────────────────────── + + +def test_memory_compact_total_timeout_falls_back_to_deterministic( + tmp_path, monkeypatch, capsys +): + """Regression for #131 — when the LLM compact path exceeds + `memory.compact_total_timeout_s`, _memory_do_compact must abandon the + LLM call and use the deterministic narrative builder instead. The + operator gets a clear stderr message AND a usable narrative. + """ + local = _mneme_cfg(tmp_path) + local["memory"]["compact_total_timeout_s"] = 0.5 # short for test + + _write_checkpoint(Path(local["checkpoints"]["store"]), + "2026-05-15T10:00:00+00:00", "A") + _write_checkpoint(Path(local["checkpoints"]["store"]), + "2026-05-16T10:00:00+00:00", "B") + + def slow_llm(*args, **kwargs): + # Simulate a slow LLM (e.g. Ollama with a large model). + time.sleep(2.0) + return "## LLM Content\n\nIf you see this, the timeout did not fire.\n" + + monkeypatch.setattr(perseus, "_mneme_compact_llm", slow_llm) + + start = time.time() + msg = perseus._memory_do_compact(tmp_path, local, provider="ollama") + elapsed = time.time() - start + + # Should return well under 2.0s — only block for the timeout deadline, + # not for the full LLM call (we cannot interrupt the thread, but + # future.result(timeout=…) returns immediately on TimeoutError). + assert elapsed < 1.5, ( + f"Compact took {elapsed:.2f}s — wall-clock deadline did not fire" + ) + + # Narrative should be the deterministic fallback, not the LLM payload. + p = perseus._mneme_path(tmp_path, local) + _, body = perseus._load_narrative(p) + assert "If you see this" not in body, ( + "LLM content present — fallback did not engage" + ) + assert "## Project Arc" in body, "Deterministic narrative missing" + + err = capsys.readouterr().err + assert "exceeded" in err.lower() or "timeout" in err.lower() + assert "deterministic" in err.lower() + + +def test_memory_compact_succeeds_within_total_timeout(tmp_path, monkeypatch): + """LLM compact succeeds when under the deadline.""" + local = _mneme_cfg(tmp_path) + local["memory"]["compact_total_timeout_s"] = 5.0 + + _write_checkpoint(Path(local["checkpoints"]["store"]), + "2026-05-15T10:00:00+00:00", "A") + + def fast_llm(*args, **kwargs): + time.sleep(0.05) + return "## Project Arc\n\nLLM-built narrative content.\n" + + monkeypatch.setattr(perseus, "_mneme_compact_llm", fast_llm) + + perseus._memory_do_compact(tmp_path, local, provider="ollama") + + p = perseus._mneme_path(tmp_path, local) + _, body = perseus._load_narrative(p) + assert "LLM-built narrative content." in body, ( + "LLM body should have been used when call returned within deadline" + ) + + +def test_memory_compact_llm_exception_falls_back_to_deterministic( + tmp_path, monkeypatch, capsys +): + """If the LLM call raises (e.g. provider unreachable), fall back to + deterministic narrative rather than propagating the exception up. + """ + local = _mneme_cfg(tmp_path) + _write_checkpoint(Path(local["checkpoints"]["store"]), + "2026-05-15T10:00:00+00:00", "A") + + def broken_llm(*args, **kwargs): + raise RuntimeError("> ⚠ LLM request failed: Connection refused") + + monkeypatch.setattr(perseus, "_mneme_compact_llm", broken_llm) + + # Must NOT raise — fallback engages. + msg = perseus._memory_do_compact(tmp_path, local, provider="ollama") + + p = perseus._mneme_path(tmp_path, local) + _, body = perseus._load_narrative(p) + assert "## Project Arc" in body + err = capsys.readouterr().err + assert "Connection refused" in err or "failed" in err + assert "deterministic" in err + + +def test_memory_compact_default_timeout_is_180s(): + """The DEFAULT_CONFIG must set compact_total_timeout_s to 180s.""" + assert perseus.DEFAULT_CONFIG["memory"]["compact_total_timeout_s"] == 180