tcconnally · tcconnally · Jun 3, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,54 @@ Each entry maps a release to the task IDs that shipped in it. The
 single-file `perseus.py` runtime is the only required artifact; everything
 else (installer, docs) is generated by `scripts/release.sh`.
 
+## [1.0.6] — UNRELEASED
+
+Critical security + correctness hotfix bundle. See GitHub milestone
+[v1.0.6](https://github.com/tcconnally/perseus/milestone/1).
+
+### 🐛 Bug Fixes
+
+- **#131** — `perseus memory compact` no longer hangs indefinitely when an
+  LLM provider (e.g. Ollama with a large model) is slow. Pre-1.0.6,
+  `_mneme_compact_llm()` called `run_llm()` which only enforced
+  `llm.timeout_s` (default 30s) on the HTTP request itself. With streaming
+  token providers, individual tokens can arrive within timeout but total
+  wall time was unbounded — operators reported `memory compact` hanging
+  for hours.
+  - `_memory_do_compact()` now wraps the LLM call in a wall-clock deadline
+    via `ThreadPoolExecutor.future.result(timeout=…)`.
+  - New config knob `memory.compact_total_timeout_s` (default 180s).
+    Set to 0 for pre-1.0.6 behavior (unbounded; not recommended).
+  - On timeout, `_memory_do_compact` falls back to the deterministic
+    narrative builder and writes a clear stderr message:
+    `> ⚠ Mnēmē compact: LLM provider 'ollama' exceeded
+    compact_total_timeout_s=180s; falling back to deterministic narrative.`
+  - New audit event `memory_compact_timeout` records provider, timeout
+    value, and workspace hash for observability.
+  - Same fallback path engages on any LLM exception (provider unreachable,
+    payload error) — `memory compact` always produces a usable narrative.
+  - **Limitation:** ThreadPoolExecutor cannot truly kill the worker
+    thread; the in-flight HTTP request continues until urllib's
+    per-request timeout fires. Worst-case wait is therefore
+    `compact_total_timeout_s + llm.timeout_s`. The leaked thread is
+    daemonized and will not block process exit.
+
+### 🔒 Security (other v1.0.6 items, tracked in milestone)
+- #136 — `long_hex_secret` redaction rule corrupted git hashes (PR #159)
+- #137 — `@query` audit log leaked secrets (PR #160)
+- #138, #139, #140, #141, #142
+
+### 🐛 Bug Fixes (other v1.0.6 items)
+- #128 — Mnēmē narrative MD5→SHA-256 migration (PR #161)
+- #129, #130, #135
+
+### 📦 Migration Notes
+- New default `memory.compact_total_timeout_s: 180` is strictly safer
+  than pre-1.0.6 behavior. Users who want the old (unbounded) behavior
+  can set it to 0.
+
+---
+
 ## [1.0.5] — 2026-05-26
 
 **Bastra-Recall — Persistent Memory Backend (superseded by Mnēmē v2 in 1.0.6):**

diff --git a/perseus.py b/perseus.py
@@ -156,6 +156,12 @@
         "recent_keep": 5,           # raw checkpoints to include in Recent Activity
         "auto_update": True,        # update narrative on every checkpoint write
         "compact_threshold": 20,    # advisory: compact after this many incremental updates
+        # #131: wall-clock deadline for `perseus memory compact` LLM path.
+        # 0 = no deadline (pre-1.0.6 behavior — can hang indefinitely on
+        # slow models). Default 180s (3 min) covers Ollama mistral on a
+        # modern laptop for typical workspace sizes. On timeout the LLM
+        # call is abandoned and the deterministic narrative is used.
+        "compact_total_timeout_s": 180,
         "llm_provider": None,       # None = deterministic; "ollama" / "openai-compat" enables LLM
         "llm_model": None,          # inherits from llm: block if None
         "max_narrative_lines": 300, # warn (not error) if narrative grows beyond this
@@ -2451,7 +2457,6 @@ def resolve_include(args_str: str, workspace: Path | None = None, cfg: dict | No
         return f"> ⚠ @include: could not read `{file_path_str}`: {e}"
 
     # ── File size limit check (byte-counted, not character-counted) ──
-    max_bytes = render_cfg.get("max_include_bytes")
     if max_bytes is not None and len(data) > max_bytes:
         raw = data[:max_bytes].decode(errors="replace").rstrip()
         actual_size = len(data)
@@ -2577,7 +2582,6 @@ def fallback_result() -> str:
         return f"> ⚠ @read: could not read `{file_path_str}`: {e}"
 
     # ── File size limit check (byte-counted, not character-counted) ──
-    max_bytes = render_cfg.get("max_read_bytes")
     if max_bytes is not None and len(data) > max_bytes:
         content = data[:max_bytes].decode(errors="replace")
         trunc_note = (
@@ -9057,7 +9061,72 @@ def _memory_do_compact(workspace: Path, cfg: dict, provider: str | None) -> str:
         fm = _mneme_default_frontmatter(workspace)
 
     if provider:
-        new_body = _mneme_compact_llm(all_checkpoints, all_pythia, workspace, cfg, provider)
+        # Regression for #131 — pre-1.0.6, _mneme_compact_llm() called run_llm()
+        # which only enforced `llm.timeout_s` (default 30s) on the HTTP request
+        # itself. With streaming-token providers like Ollama serving a large
+        # model, individual tokens can arrive within timeout but total wall
+        # time was unbounded — operators reported `memory compact` hanging
+        # for hours.
+        #
+        # We now wrap the LLM call in a wall-clock deadline (memory.
+        # compact_total_timeout_s, default 180s). On timeout we abandon the
+        # LLM future and fall back to deterministic narrative — operators get
+        # SOME narrative, plus a clear stderr signal so they can decide
+        # whether to upgrade their LLM setup or stay deterministic.
+        #
+        # Limitation: ThreadPoolExecutor cannot truly kill the worker thread
+        # (Python provides no public API for that). The in-flight HTTP
+        # request continues until urllib's per-request timeout fires.
+        # Worst-case observed total wait is therefore
+        # `compact_total_timeout_s + llm.timeout_s`. The leaked thread is
+        # daemonized by Python's default ThreadPoolExecutor settings; it
+        # will not prevent process exit.
+        total_timeout = float(cfg.get("memory", {}).get(
+            "compact_total_timeout_s", 180.0
+        ))
+        try:
+            import concurrent.futures as _cf
+            executor = _cf.ThreadPoolExecutor(
+                max_workers=1, thread_name_prefix="mneme-compact-llm",
+            )
+            try:
+                fut = executor.submit(
+                    _mneme_compact_llm,
+                    all_checkpoints, all_pythia, workspace, cfg, provider,
+                )
+                new_body = fut.result(timeout=total_timeout)
+            finally:
+                # Don't block on the worker — it may still be waiting on
+                # urllib. The thread is daemonic and will not block exit.
+                executor.shutdown(wait=False, cancel_futures=True)
+        except _cf.TimeoutError:
+            sys.stderr.write(
+                f"> ⚠ Mnēmē compact: LLM provider {provider!r} exceeded "
+                f"compact_total_timeout_s={total_timeout:.0f}s; "
+                f"falling back to deterministic narrative.\n"
+            )
+            try:
+                audit_event(
+                    cfg, "memory_compact_timeout",
+                    provider=provider,
+                    total_timeout_s=total_timeout,
+                    workspace_hash=_workspace_hash(workspace),
+                )
+            except Exception:
+                pass
+            new_body = _deterministic_narrative(
+                all_checkpoints, all_pythia, "", workspace, cfg,
+            )
+        except Exception as exc:
+            # LLM call raised (model server unreachable, payload error, etc.)
+            # — surface the failure but still produce SOMETHING usable.
+            sys.stderr.write(
+                f"> ⚠ Mnēmē compact: LLM provider {provider!r} failed "
+                f"({exc}); falling back to deterministic narrative.\n"
+            )
+            new_body = _deterministic_narrative(
+                all_checkpoints, all_pythia, "", workspace, cfg,
+            )
     else:
         new_body = _deterministic_narrative(all_checkpoints, all_pythia, "", workspace, cfg)
 

diff --git a/src/perseus/agora.py b/src/perseus/agora.py
@@ -90,7 +90,72 @@ def _memory_do_compact(workspace: Path, cfg: dict, provider: str | None) -> str:
         fm = _mneme_default_frontmatter(workspace)
 
     if provider:
-        new_body = _mneme_compact_llm(all_checkpoints, all_pythia, workspace, cfg, provider)
+        # Regression for #131 — pre-1.0.6, _mneme_compact_llm() called run_llm()
+        # which only enforced `llm.timeout_s` (default 30s) on the HTTP request
+        # itself. With streaming-token providers like Ollama serving a large
+        # model, individual tokens can arrive within timeout but total wall
+        # time was unbounded — operators reported `memory compact` hanging
+        # for hours.
+        #
+        # We now wrap the LLM call in a wall-clock deadline (memory.
+        # compact_total_timeout_s, default 180s). On timeout we abandon the
+        # LLM future and fall back to deterministic narrative — operators get
+        # SOME narrative, plus a clear stderr signal so they can decide
+        # whether to upgrade their LLM setup or stay deterministic.
+        #
+        # Limitation: ThreadPoolExecutor cannot truly kill the worker thread
+        # (Python provides no public API for that). The in-flight HTTP
+        # request continues until urllib's per-request timeout fires.
+        # Worst-case observed total wait is therefore
+        # `compact_total_timeout_s + llm.timeout_s`. The leaked thread is
+        # daemonized by Python's default ThreadPoolExecutor settings; it
+        # will not prevent process exit.
+        total_timeout = float(cfg.get("memory", {}).get(
+            "compact_total_timeout_s", 180.0
+        ))
+        try:
+            import concurrent.futures as _cf
+            executor = _cf.ThreadPoolExecutor(
+                max_workers=1, thread_name_prefix="mneme-compact-llm",
+            )
+            try:
+                fut = executor.submit(
+                    _mneme_compact_llm,
+                    all_checkpoints, all_pythia, workspace, cfg, provider,
+                )
+                new_body = fut.result(timeout=total_timeout)
+            finally:
+                # Don't block on the worker — it may still be waiting on
+                # urllib. The thread is daemonic and will not block exit.
+                executor.shutdown(wait=False, cancel_futures=True)
+        except _cf.TimeoutError:
+            sys.stderr.write(
+                f"> ⚠ Mnēmē compact: LLM provider {provider!r} exceeded "
+                f"compact_total_timeout_s={total_timeout:.0f}s; "
+                f"falling back to deterministic narrative.\n"
+            )
+            try:
+                audit_event(
+                    cfg, "memory_compact_timeout",
+                    provider=provider,
+                    total_timeout_s=total_timeout,
+                    workspace_hash=_workspace_hash(workspace),
+                )
+            except Exception:
+                pass
+            new_body = _deterministic_narrative(
+                all_checkpoints, all_pythia, "", workspace, cfg,
+            )
+        except Exception as exc:
+            # LLM call raised (model server unreachable, payload error, etc.)
+            # — surface the failure but still produce SOMETHING usable.
+            sys.stderr.write(
+                f"> ⚠ Mnēmē compact: LLM provider {provider!r} failed "
+                f"({exc}); falling back to deterministic narrative.\n"
+            )
+            new_body = _deterministic_narrative(
+                all_checkpoints, all_pythia, "", workspace, cfg,
+            )
     else:
         new_body = _deterministic_narrative(all_checkpoints, all_pythia, "", workspace, cfg)
 

diff --git a/src/perseus/config.py b/src/perseus/config.py
@@ -93,6 +93,12 @@
         "recent_keep": 5,           # raw checkpoints to include in Recent Activity
         "auto_update": True,        # update narrative on every checkpoint write
         "compact_threshold": 20,    # advisory: compact after this many incremental updates
+        # #131: wall-clock deadline for `perseus memory compact` LLM path.
+        # 0 = no deadline (pre-1.0.6 behavior — can hang indefinitely on
+        # slow models). Default 180s (3 min) covers Ollama mistral on a
+        # modern laptop for typical workspace sizes. On timeout the LLM
+        # call is abandoned and the deterministic narrative is used.
+        "compact_total_timeout_s": 180,
         "llm_provider": None,       # None = deterministic; "ollama" / "openai-compat" enables LLM
         "llm_model": None,          # inherits from llm: block if None
         "max_narrative_lines": 300, # warn (not error) if narrative grows beyond this

diff --git a/tests/test_memory.py b/tests/test_memory.py
@@ -360,3 +360,109 @@ def test_memory_status_json_with_narrative(tmp_path, monkeypatch):
                 "pythia_entries_processed", "pythia_entries_pending",
                 "compaction_count", "line_count", "mode", "frontmatter"):
         assert key in out, f"Missing key: {key}"
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# #131 regression: memory compact must enforce a wall-clock deadline
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+def test_memory_compact_total_timeout_falls_back_to_deterministic(
+    tmp_path, monkeypatch, capsys
+):
+    """Regression for #131 — when the LLM compact path exceeds
+    `memory.compact_total_timeout_s`, _memory_do_compact must abandon the
+    LLM call and use the deterministic narrative builder instead. The
+    operator gets a clear stderr message AND a usable narrative.
+    """
+    local = _mneme_cfg(tmp_path)
+    local["memory"]["compact_total_timeout_s"] = 0.5  # short for test
+
+    _write_checkpoint(Path(local["checkpoints"]["store"]),
+                      "2026-05-15T10:00:00+00:00", "A")
+    _write_checkpoint(Path(local["checkpoints"]["store"]),
+                      "2026-05-16T10:00:00+00:00", "B")
+
+    def slow_llm(*args, **kwargs):
+        # Simulate a slow LLM (e.g. Ollama with a large model).
+        time.sleep(2.0)
+        return "## LLM Content\n\nIf you see this, the timeout did not fire.\n"
+
+    monkeypatch.setattr(perseus, "_mneme_compact_llm", slow_llm)
+
+    start = time.time()
+    msg = perseus._memory_do_compact(tmp_path, local, provider="ollama")
+    elapsed = time.time() - start
+
+    # Should return well under 2.0s — only block for the timeout deadline,
+    # not for the full LLM call (we cannot interrupt the thread, but
+    # future.result(timeout=…) returns immediately on TimeoutError).
+    assert elapsed < 1.5, (
+        f"Compact took {elapsed:.2f}s — wall-clock deadline did not fire"
+    )
+
+    # Narrative should be the deterministic fallback, not the LLM payload.
+    p = perseus._mneme_path(tmp_path, local)
+    _, body = perseus._load_narrative(p)
+    assert "If you see this" not in body, (
+        "LLM content present — fallback did not engage"
+    )
+    assert "## Project Arc" in body, "Deterministic narrative missing"
+
+    err = capsys.readouterr().err
+    assert "exceeded" in err.lower() or "timeout" in err.lower()
+    assert "deterministic" in err.lower()
+
+
+def test_memory_compact_succeeds_within_total_timeout(tmp_path, monkeypatch):
+    """LLM compact succeeds when under the deadline."""
+    local = _mneme_cfg(tmp_path)
+    local["memory"]["compact_total_timeout_s"] = 5.0
+
+    _write_checkpoint(Path(local["checkpoints"]["store"]),
+                      "2026-05-15T10:00:00+00:00", "A")
+
+    def fast_llm(*args, **kwargs):
+        time.sleep(0.05)
+        return "## Project Arc\n\nLLM-built narrative content.\n"
+
+    monkeypatch.setattr(perseus, "_mneme_compact_llm", fast_llm)
+
+    perseus._memory_do_compact(tmp_path, local, provider="ollama")
+
+    p = perseus._mneme_path(tmp_path, local)
+    _, body = perseus._load_narrative(p)
+    assert "LLM-built narrative content." in body, (
+        "LLM body should have been used when call returned within deadline"
+    )
+
+
+def test_memory_compact_llm_exception_falls_back_to_deterministic(
+    tmp_path, monkeypatch, capsys
+):
+    """If the LLM call raises (e.g. provider unreachable), fall back to
+    deterministic narrative rather than propagating the exception up.
+    """
+    local = _mneme_cfg(tmp_path)
+    _write_checkpoint(Path(local["checkpoints"]["store"]),
+                      "2026-05-15T10:00:00+00:00", "A")
+
+    def broken_llm(*args, **kwargs):
+        raise RuntimeError("> ⚠ LLM request failed: Connection refused")
+
+    monkeypatch.setattr(perseus, "_mneme_compact_llm", broken_llm)
+
+    # Must NOT raise — fallback engages.
+    msg = perseus._memory_do_compact(tmp_path, local, provider="ollama")
+
+    p = perseus._mneme_path(tmp_path, local)
+    _, body = perseus._load_narrative(p)
+    assert "## Project Arc" in body
+    err = capsys.readouterr().err
+    assert "Connection refused" in err or "failed" in err
+    assert "deterministic" in err
+
+
+def test_memory_compact_default_timeout_is_180s():
+    """The DEFAULT_CONFIG must set compact_total_timeout_s to 180s."""
+    assert perseus.DEFAULT_CONFIG["memory"]["compact_total_timeout_s"] == 180