diff --git a/bin/chatlog_status.py b/bin/chatlog_status.py index 4d20659..5d985b2 100644 --- a/bin/chatlog_status.py +++ b/bin/chatlog_status.py @@ -127,13 +127,63 @@ def _get_row_counts(config: chatlog_config.ChatlogConfig) -> dict[str, int]: return counts +def _recent_write_count(config: chatlog_config.ChatlogConfig, + window_min: int = 15) -> int: + """Count chat_log rows WRITTEN in the last `window_min` minutes. + + This is the TRUE capture-health signal: it reflects whether writes are + actually landing in the DB right now. Use it instead of + config.host_agents[*].enabled — that flag only records whether a per-turn + shell hook was wired into settings.json at init time, and reads False even + when the Stop-hook / MCP write path is capturing perfectly (confirmed + 2026-06-13: status showed every hook enabled=False while 94 rows/15min were + landing). Reporting the wiring flag as "capture status" produced a permanent + false alarm for the CLAUDE.md session-start check. Returns -1 on query error + (distinct from a real 0 = nothing written). + """ + from m3_sdk import resolve_db_path + + db = os.path.abspath(resolve_db_path(None)) + if not os.path.exists(db): + return -1 + try: + conn = sqlite3.connect(db, timeout=5) + try: + row = conn.execute( + "SELECT COUNT(*) FROM memory_items WHERE type='chat_log' " + "AND created_at > datetime('now', ?)", + (f"-{int(window_min)} minutes",), + ).fetchone() + return int(row[0]) if row else 0 + finally: + conn.close() + except sqlite3.Error: + return -1 + + def _compute_warnings( state: dict[str, Any], config: chatlog_config.ChatlogConfig, row_counts: dict[str, int], + recent_writes: int = -1, + recent_window_min: int = 15, ) -> list[str]: warnings = [] + # PRIMARY capture-health signal: did anything actually get WRITTEN recently? + # Driven by recent_writes (a real DB count), NOT host_agents[*].enabled — see + # _recent_write_count for why the wiring flag is not a capture signal. Only + # warn when total chatlog rows exist (a fresh/empty install legitimately has + # 0 recent writes and should not scream). + total_rows = row_counts.get("main_chat_log_rows", 0) or row_counts.get("chatlog_rows", 0) + if recent_writes == 0 and total_rows > 0: + warnings.append( + f"NO chatlog writes in last {recent_window_min}min " + "(capture may be down — verify before trusting memory)" + ) + elif recent_writes < 0: + warnings.append("could not query recent chatlog writes (capture status unknown)") + if config.redaction.enabled: regex_errors = state.get("redaction", {}).get("regex_errors", []) if regex_errors: @@ -172,6 +222,8 @@ def chatlog_status_impl() -> str: config = chatlog_config.resolve_config() state = _load_state_file() row_counts = _get_row_counts(config) + recent_window_min = 15 + recent_writes = _recent_write_count(config, recent_window_min) main_db = os.path.abspath(resolve_db_path(None)) chatlog_db = os.path.abspath(config.db_path) @@ -204,8 +256,22 @@ def chatlog_status_impl() -> str: "bytes": state.get("spill", {}).get("bytes", 0), "oldest_ms_ago": state.get("spill", {}).get("oldest_ms_ago"), }, + # TRUE capture-health signal — reflects actual recent writes to the DB, + # not whether a per-turn shell hook was wired. Consumers (CLAUDE.md + # session-start check, m3:status, m3:health) should read THIS, not + # hooks[*].wired. recent_rows == -1 means the query failed (unknown). + "capture": { + "healthy": recent_writes > 0, + "recent_rows": recent_writes, + "window_min": recent_window_min, + }, "hooks": { name: { + # `wired`: a per-turn shell hook is configured in settings for this + # agent. This is NOT a capture signal — the Stop-hook / MCP write + # path captures even when wired is False. Kept (was misnamed + # `enabled`) for back-compat: `enabled` is aliased to `wired`. + "wired": spec.enabled, "enabled": spec.enabled, "last_write_ms_ago": state.get("hooks", {}).get(name, {}).get("last_write_ms_ago"), } @@ -217,7 +283,8 @@ def chatlog_status_impl() -> str: "regex_errors": state.get("redaction", {}).get("regex_errors", []), }, "last_write_at": state.get("last_write_at"), - "warnings": _compute_warnings(state, config, row_counts), + "warnings": _compute_warnings(state, config, row_counts, + recent_writes, recent_window_min), } return json.dumps(result, indent=2) diff --git a/bin/generate_configs.py b/bin/generate_configs.py index 2c8b7d7..431b949 100644 --- a/bin/generate_configs.py +++ b/bin/generate_configs.py @@ -11,11 +11,22 @@ def generate_configs(): m3_state_root = os.path.dirname(m3_repo_root) config_dir = os.path.join(m3_repo_root, "config") - # Prefer python3 on non-Windows; fall back to python only if python3 absent. + # Resolve the interpreter to the repo's own venv so hooks/MCP don't depend on + # whatever "python" happens to be on PATH (the venv may not be activated when a + # hook fires). venv layout differs by OS: Windows = .venv/Scripts/python.exe, + # macOS/Linux = .venv/bin/python. Always forward-slash: Claude Code runs hook + # commands through a shell (Git Bash on Windows) where backslashes are escapes. if os.name == "nt": - python_cmd = "python" + venv_py = os.path.join(m3_repo_root, ".venv", "Scripts", "python.exe") else: - python_cmd = "python3" if shutil.which("python3") else "python" + venv_py = os.path.join(m3_repo_root, ".venv", "bin", "python") + if os.path.exists(venv_py): + python_cmd = venv_py.replace("\\", "/") + else: + # No venv found — fall back to PATH (python3 preferred off-Windows). + python_cmd = "python" if os.name == "nt" else ( + "python3" if shutil.which("python3") else "python" + ) # M3_EMBED_GGUF: use env override, else auto-detect the standard LMStudio path. embed_gguf = os.environ.get("M3_EMBED_GGUF", "") @@ -35,9 +46,22 @@ def mcp_server(script, extra_env=None): env.update(extra_env) return {"command": python_cmd, "args": [repo(f"bin/{script}")], "env": env} - hook_cmd = f"/bin/sh {repo('bin/hooks/chatlog/claude_code_precompact.sh')}" + # Invoke the .py hook directly with the venv interpreter — no /bin/sh, which + # doesn't exist on native Windows (it only works today because Claude Code + # routes hooks through Git Bash). The .py is the cross-platform entry point. + hook_cmd = f"{python_cmd} {repo('bin/hooks/chatlog/claude_code_precompact.py')}" hook_entry = [{"hooks": [{"type": "command", "command": hook_cmd}]}] + session_start_cmd = ( + f"{python_cmd} {repo('bin/hooks/chatlog/session_start_capture_check.py')}" + ) + session_start_entry = [{"hooks": [{ + "type": "command", + "command": session_start_cmd, + "timeout": 15, + "statusMessage": "Checking m3 chatlog capture...", + }]}] + memory_env = {} if embed_gguf: memory_env["M3_EMBED_GGUF"] = embed_gguf.replace("\\", "/") @@ -54,8 +78,9 @@ def mcp_server(script, extra_env=None): claude = { "model": "opus", "hooks": { - "PreCompact": hook_entry, - "Stop": hook_entry, + "SessionStart": session_start_entry, + "PreCompact": hook_entry, + "Stop": hook_entry, }, "statusLine": { "type": "command", @@ -70,6 +95,7 @@ def mcp_server(script, extra_env=None): } _write_json(os.path.join(config_dir, "claude-settings.json"), claude) print(f"Generated claude-settings.json ({python_cmd}, M3_MEMORY_ROOT={m3_state_root})") + generate_configs._last_claude = claude # reused by install_claude_settings() # ── gemini-settings.json ────────────────────────────────────────────────── gemini_path = os.path.join(config_dir, "gemini-settings.json") @@ -132,5 +158,198 @@ def _write_json(path, data): os.remove(tmp) -if __name__ == "__main__": +def _m3_repo_root(): + return os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + +def _is_m3_command(cmd, repo_root_fwd): + """True if a hook/statusLine command string belongs to m3 (so an upgrade can + replace it in place instead of appending a duplicate). Matches on the repo + root path or the known m3 script markers — never on a hardcoded user path.""" + if not isinstance(cmd, str): + return False + markers = (repo_root_fwd, "bin/hooks/chatlog/", "bin/statusline-command", + "session_start_capture_check", "claude_code_precompact") + return any(m and m in cmd for m in markers) + + +def _strip_m3_hook_entries(hook_list, repo_root_fwd): + """Return hook_list with any m3-managed entries removed. A Claude hooks-list + entry looks like {"hooks": [{"type": "command", "command": "..."}]}. We drop an + entry if ANY of its inner commands is an m3 command — preserves user hooks.""" + kept = [] + for entry in hook_list or []: + inner = entry.get("hooks", []) if isinstance(entry, dict) else [] + if any(_is_m3_command(h.get("command", ""), repo_root_fwd) + for h in inner if isinstance(h, dict)): + continue # drop stale/previous m3 entry + kept.append(entry) + return kept + + +def install_claude_settings(settings_path=None, assume_yes=False, dry_run=False, + keep_status_line=False): + """Idempotently merge m3's hooks + statusLine + mcpServers into the user's live + Claude Code settings.json. Safe to re-run (upgrades): m3-owned entries are + replaced in place — never duplicated. User-owned keys/hooks are preserved. + + statusLine consent: we never silently replace a status line that differs from + our own. If the live one differs, the user is asked (default YES — adopt m3's + statusline-command.sh); pass keep_status_line=True to decline non-interactively. + When we DO replace it, the prior statusLine JSON is saved verbatim to a sidecar + m3_prior_statusline_{YYYY.MM.DD}-{HH.MM.SS}.md beside settings.json before the + overwrite, so the previous setup is preserved and restorable. settings.json is + strict JSON (no // comments), so the prior config is stashed in a file, not + inline-commented. + + Returns a dict: {"changed": bool, "path": str, "diff": str}. Generic across + OSes and users — all paths derive from this file's location. + """ + import difflib + from datetime import datetime + + repo_root = _m3_repo_root() + repo_root_fwd = repo_root.replace("\\", "/") + + # Build the canonical m3 settings via the generator (writes the template too). generate_configs() + m3 = getattr(generate_configs, "_last_claude", None) + if not m3: + raise RuntimeError("generate_configs did not produce claude settings") + + if settings_path is None: + settings_path = os.path.join( + os.path.expanduser("~"), ".claude", "settings.json" + ) + + # Load existing live settings (preserve everything we don't own). + if os.path.exists(settings_path): + with open(settings_path, encoding="utf-8") as f: + try: + live = json.load(f) + except json.JSONDecodeError: + live = {} + else: + live = {} + before = json.dumps(live, indent=2, sort_keys=True) + + # 1. hooks — replace m3-owned list entries in place, keep user hooks. + live_hooks = live.get("hooks", {}) if isinstance(live.get("hooks"), dict) else {} + for event, m3_entries in m3.get("hooks", {}).items(): + cleaned = _strip_m3_hook_entries(live_hooks.get(event, []), repo_root_fwd) + live_hooks[event] = cleaned + m3_entries + if live_hooks: + live["hooks"] = live_hooks + + # 2. statusLine — never silently replace a status line that differs from ours. + # Adopt m3's default (statusline-command.sh) when: nothing is set, OR the + # current one is already m3's exact default (idempotent path upgrade), OR the + # user consents. The prior statusLine is preserved to a sidecar file first. + cur_status = live.get("statusLine") + cur_cmd = cur_status.get("command", "") if isinstance(cur_status, dict) else "" + m3_status = m3["statusLine"] + m3_cmd = m3_status.get("command", "") + + if not cur_status: + live["statusLine"] = m3_status # none set — just adopt ours + elif cur_cmd == m3_cmd: + pass # already exactly ours — no-op + else: + # Differs from ours. Decide whether to adopt — default YES, but ask unless + # told otherwise; never replace when the user opted to keep theirs. + if keep_status_line: + adopt = False + elif assume_yes or dry_run: + adopt = True # default yes for headless/dry-run + else: + try: + resp = input( + "\nReplace your current status line with m3's " + "(statusline-command.sh)? [Y/n] " + ).strip().lower() + except EOFError: + resp = "" + adopt = resp in ("", "y", "yes") # default yes on empty + if adopt: + # Preserve the prior statusLine to a timestamped sidecar before swap. + if not dry_run: + ts = datetime.now().strftime("%Y.%m.%d-%H.%M.%S") + sidecar = os.path.join( + os.path.dirname(os.path.abspath(settings_path)), + f"m3_prior_statusline_{ts}.md", + ) + body = ( + f"# Prior Claude statusLine (replaced by m3 install {ts})\n\n" + "Your previous `statusLine` was replaced by m3's " + "`statusline-command.sh`. To restore it, copy the JSON below " + "back into the `statusLine` key of your settings.json.\n\n" + "```json\n" + + json.dumps(cur_status, indent=2) + "\n```\n" + ) + with open(sidecar, "w", encoding="utf-8") as f: + f.write(body) + print(f"Saved prior status line to {sidecar}") + live["statusLine"] = m3_status + + # 3. mcpServers — merge by key: m3 keys overwrite, foreign servers preserved. + live_mcp = live.get("mcpServers", {}) if isinstance(live.get("mcpServers"), dict) else {} + live_mcp.update(m3.get("mcpServers", {})) + live["mcpServers"] = live_mcp + + after = json.dumps(live, indent=2, sort_keys=True) + diff = "".join(difflib.unified_diff( + before.splitlines(keepends=True), after.splitlines(keepends=True), + fromfile="settings.json (current)", tofile="settings.json (after install)", + )) + changed = before != after + + if dry_run or not changed: + return {"changed": changed, "path": settings_path, "diff": diff} + + print(f"\nThe following changes will be merged into {settings_path}:\n") + print(diff or "(no textual diff)") + if not assume_yes: + try: + resp = input("\nApply these changes? [y/N] ").strip().lower() + except EOFError: + resp = "n" + if resp not in ("y", "yes"): + print("Skipped — no changes written.") + return {"changed": False, "path": settings_path, "diff": diff} + + # Back up, then write atomically. + os.makedirs(os.path.dirname(settings_path), exist_ok=True) + if os.path.exists(settings_path): + bak = settings_path + ".bak" + with open(settings_path, encoding="utf-8") as f: + backup = f.read() + with open(bak, "w", encoding="utf-8") as f: + f.write(backup) + print(f"Backed up existing settings to {bak}") + _write_json(settings_path, live) + print(f"Installed m3 hooks + statusLine + mcpServers into {settings_path}") + return {"changed": True, "path": settings_path, "diff": diff} + + +if __name__ == "__main__": + import argparse + + ap = argparse.ArgumentParser(description="Generate m3 configs / install Claude hooks") + ap.add_argument("--install-claude", action="store_true", + help="Merge hooks+statusLine+mcpServers into ~/.claude/settings.json") + ap.add_argument("--settings-path", default=None, + help="Override target settings.json path") + ap.add_argument("--yes", action="store_true", help="Apply without prompting") + ap.add_argument("--dry-run", action="store_true", + help="Show the diff but write nothing") + ap.add_argument("--keep-status-line", action="store_true", + help="Don't replace an existing custom status line (default is " + "to adopt m3's statusline-command.sh, preserving the prior " + "one to a timestamped sidecar file)") + a = ap.parse_args() + + if a.install_claude: + install_claude_settings(a.settings_path, assume_yes=a.yes, dry_run=a.dry_run, + keep_status_line=a.keep_status_line) + else: + generate_configs() diff --git a/bin/hooks/chatlog/claude_code_precompact.py b/bin/hooks/chatlog/claude_code_precompact.py index 758ab8b..ceda499 100644 --- a/bin/hooks/chatlog/claude_code_precompact.py +++ b/bin/hooks/chatlog/claude_code_precompact.py @@ -87,6 +87,71 @@ def scream(reason: str, fallback: Path, agent: str, event: str) -> None: print(msg, file=sys.stderr, flush=True) +def announce(reason: str, fallback: Path, agent: str, event: str) -> None: + """Surface the failure to the user via the harness, regardless of exit code. + + The stderr scream() alone is NOT enough: on a Stop event the harness only + reliably shows stderr on a non-zero exit / under --debug, and ingest can + exit 0 while writing 0 rows (the silent-capture-failure case). The hook + contract parses stdout as JSON and always surfaces `systemMessage`, so we + emit a red line there too — the scream can never be swallowed. + + stdout MUST stay JSON-only for the harness to parse it: this is the single + print to stdout in the whole hook (everything else goes to stderr). + """ + try: + display = "~/.m3/unsaved_chats/" + fallback.name + except Exception: # noqa: BLE001 + display = str(fallback) + msg = ( + f"\U0001f6a8 M3 CHATLOG NOT SAVED ({agent}/{event}): {reason}. " + f"Fallback written to {display}. Restart the m3 MCP server, then re-run " + "ingest on the transcript. Session context is at risk until you do." + ) + print(json.dumps({"systemMessage": msg}), flush=True) + + +def _extract_last_json_object(output: str): + """Return the last balanced top-level {...} JSON object in `output`, or None. + + ingest interleaves log lines with a final pretty-printed JSON object. We scan + from the end for the last '}', then walk backwards tracking brace depth + (ignoring braces inside strings) to find its matching '{', and json.loads the + slice. Handles single-line compact JSON and multi-line pretty JSON alike. + """ + end = output.rfind("}") + if end == -1: + return None + depth = 0 + in_str = False + for i in range(end, -1, -1): + ch = output[i] + if in_str: + # walking backwards: a quote not preceded by an (odd run of) backslash + # closes the string. Simplest robust check: count preceding backslashes. + if ch == '"': + bs = 0 + j = i - 1 + while j >= 0 and output[j] == "\\": + bs += 1 + j -= 1 + if bs % 2 == 0: + in_str = False + continue + if ch == '"': + in_str = True + elif ch == "}": + depth += 1 + elif ch == "{": + depth -= 1 + if depth == 0: + try: + return json.loads(output[i:end + 1]) + except json.JSONDecodeError: + return None + return None + + def run_ingest(py: str, ingest: Path, extra_args: list) -> tuple: """Run ingest, return (written, error, returncode).""" try: @@ -94,15 +159,14 @@ def run_ingest(py: str, ingest: Path, extra_args: list) -> tuple: [py, str(ingest)] + extra_args, capture_output=True, text=True, timeout=60) output = result.stdout.strip() - result_json = None - for line in reversed(output.splitlines()): - line = line.strip() - if line.startswith("{"): - try: - result_json = json.loads(line) - break - except json.JSONDecodeError: - pass + # ingest emits PRETTY-PRINTED (multi-line) JSON after its log lines, e.g. + # {\n "written": 9,\n ...\n} + # The old parser scanned for a line *starting with* "{" and json.loads'd + # that single line — which is just "{" for pretty JSON, so it always + # raised and reported written=0. That false-failure wrote 1485 bogus + # fallback files over 9 days (2026-06-04..13) while ingest was succeeding. + # Parse the LAST balanced {...} block from the end of stdout instead. + result_json = _extract_last_json_object(output) written = int(result_json.get("written", 0)) if result_json else 0 error = result_json.get("error") if result_json else "m3 ingest failed or unreachable" return written, error, result.returncode @@ -142,6 +206,7 @@ def main() -> int: reason = f"chatlog_ingest.py not found under {repo} — set M3_HOME" fallback = write_fallback(transcript, session_id, AGENT, event_name, variant, reason) scream(reason, fallback, AGENT, event_name) + announce(reason, fallback, AGENT, event_name) return 1 py = find_python(repo) @@ -155,6 +220,11 @@ def main() -> int: reason = error if error else "0 rows written — m3 may be down or unreachable" fallback = write_fallback(transcript, session_id, AGENT, event_name, variant, reason) scream(reason, fallback, AGENT, event_name) + announce(reason, fallback, AGENT, event_name) + # Force non-zero even if ingest exited 0: a 0-rows "success" is the exact + # silent-failure the harness would otherwise swallow. Non-zero makes the + # harness surface the hook, complementing the stdout systemMessage. + return rc if rc != 0 else 1 return rc diff --git a/bin/hooks/chatlog/session_start_capture_check.py b/bin/hooks/chatlog/session_start_capture_check.py new file mode 100644 index 0000000..bf4c377 --- /dev/null +++ b/bin/hooks/chatlog/session_start_capture_check.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python +"""SessionStart hook: verify m3 chatlog capture is actually LANDING writes. + +Prints a plain one-line GREEN/RED status as a hook systemMessage. Deliberately +checks whether rows are being WRITTEN to the DB in the recent past, NOT whether +config.host_agents[*].enabled is set — that flag reflects only whether a per-turn +shell hook was wired into settings.json at init time, and reads `false` even when +the Stop-hook / MCP write path is capturing fine (confirmed 2026-06-13). A future +session running the CLAUDE.md mandated session-start check must trust DATA, not the +flag, or it gets a permanent false alarm (or false comfort). + +Outputs hook JSON on stdout: {"systemMessage": "...", "suppressOutput": true}. +Never throws — a monitoring check must not break session start. +""" +from __future__ import annotations + +import json +import os +import sqlite3 +import sys +from pathlib import Path + +WINDOW_MIN = 15 + + +def _resolve_db() -> str: + """Resolve the m3 agent-memory DB path portably (no hardcoded user path). + + Order: m3_sdk.resolve_db_path (canonical) -> M3_DB_PATH env -> + M3_HOME/engine/agent_memory.db -> repo-root/engine/agent_memory.db. + Mirrors the resolution convention in claude_code_precompact.py so the hook + works on any machine/user, not just the author's. + """ + # bin/hooks/chatlog/this_file.py -> repo root is parents[3] + repo = Path(os.environ.get("M3_HOME") or Path(__file__).resolve().parents[3]) + try: + sys.path.insert(0, str(repo / "bin")) + from m3_sdk import resolve_db_path # type: ignore + p = resolve_db_path(None) + if p: + return os.path.abspath(p) + except Exception: # noqa: BLE001 — fall back to path heuristics + pass + env = os.environ.get("M3_DB_PATH") + if env: + return os.path.abspath(env) + return str(repo / "engine" / "agent_memory.db") + + +def main() -> None: + db = _resolve_db() + try: + conn = sqlite3.connect(db, timeout=5) + try: + (count,) = conn.execute( + "SELECT COUNT(*) FROM memory_items " + "WHERE type = 'chat_log' " + f"AND created_at > datetime('now', '-{WINDOW_MIN} minutes')" + ).fetchone() + finally: + conn.close() + except Exception as exc: # noqa: BLE001 — never break session start + msg = ( + f"\U0001f6a8 m3 chatlog check FAILED to query DB: {exc} " + "— capture status UNKNOWN. Verify before trusting memory." + ) + print(json.dumps({"systemMessage": msg})) + return + + if count > 0: + msg = f"✅ m3 chatlog capture: WORKING ({count} rows/{WINDOW_MIN}min)" + else: + msg = ( + f"\U0001f6a8 WARNING: m3 chatlog capture NOT writing " + f"(0 rows in last {WINDOW_MIN}min). Design decisions are NOT being " + "preserved. Restart the m3 MCP server before continuing." + ) + print(json.dumps({"systemMessage": msg, "suppressOutput": True})) + + +if __name__ == "__main__": + main() + sys.exit(0) diff --git a/bin/memory/util.py b/bin/memory/util.py index 4ab1c9e..b455955 100644 --- a/bin/memory/util.py +++ b/bin/memory/util.py @@ -30,7 +30,7 @@ from . import config -__all__ = ["sha256_hex", "_batch_cosine", "_cosine", "_cosine_batch_packed", "_check_content_safety"] +__all__ = ["sha256_hex", "_batch_cosine", "_cosine", "_cosine_batch_packed", "_cosine_batch_maxpool_packed", "_check_content_safety"] import logging import re @@ -147,3 +147,39 @@ def _cosine_batch_packed(query, blobs, dim: int) -> list[float]: logger.debug(f"cosine_batch_packed Rust path failed, falling back: {e}") matrix = _unpack_many(blobs, dim=dim) return _batch_cosine(query, matrix) + + +def _cosine_batch_maxpool_packed(anchors, blobs, dim: int) -> list[float]: + """Max-pooled multi-anchor cosine: per candidate blob, `max_j cosine(anchors[j], cand)`. + + The "max-similarity" rerank signal: max-cosine-to-any of several anchor vectors + beats a single centroid anchor for topically-dispersed targets, because the + centroid averages away sharp matches that max-sim preserves. Pushes the whole + N-candidate × M-anchor matrix op into the Rust core (`cosine_batch_maxpool_packed`, + rayon, GIL-released) so the caller never runs a per-anchor FFI loop. + + `anchors` is a list of M query vectors (each length `dim`); `blobs` is a list of raw + packed candidate embeddings. Both are packed into contiguous flat byte buffers for the + Rust entry point. Empty anchors -> all -1.0 (no signal). A wrong-length blob scores 0.0. + Falls back to pure-Python `max(cosine ...)` when the Rust symbol is absent (older wheel) + or the FFI errors — so retrieval never fails on a stale core. + """ + if not blobs: + return [] + if not anchors: + return [-1.0] * len(blobs) + rs = config.m3_core_rs + if rs is not None and hasattr(rs, "cosine_batch_maxpool_packed"): + try: + import struct + anchor_buf = b"".join(struct.pack(f"{len(a)}f", *a) for a in anchors) + cand_buf = b"".join(blobs) + return rs.cosine_batch_maxpool_packed(anchor_buf, cand_buf, dim) + except Exception as e: # noqa: BLE001 — fall back rather than fail retrieval + logger.debug(f"cosine_batch_maxpool_packed Rust path failed, falling back: {e}") + # Pure-Python / fallback: unpack candidates once, max cosine over anchors per candidate. + matrix = _unpack_many(blobs, dim=dim) + out = [] + for v in matrix: + out.append(max((_cosine(a, v) for a in anchors), default=-1.0)) + return out diff --git a/bin/setup_memory.py b/bin/setup_memory.py index b9c9619..2b96b96 100644 --- a/bin/setup_memory.py +++ b/bin/setup_memory.py @@ -16,7 +16,10 @@ VENV = BASE / ".venv" PY = VENV / ("Scripts/python.exe" if IS_WIN else "bin/python") PIP = VENV / ("Scripts/pip.exe" if IS_WIN else "bin/pip") -REQS = BASE / ("requirements-windows.txt" if IS_WIN else "requirements.txt") +# Prefer a Windows-specific requirements file if one exists, else fall back to the +# common requirements.txt (the windows variant is optional and may be absent). +_req_win = BASE / "requirements-windows.txt" +REQS = _req_win if (IS_WIN and _req_win.exists()) else BASE / "requirements.txt" # Bootstrap honors --database (positional for simplicity) and M3_DATABASE env. # Called before m3_sdk is importable in a fresh checkout, so resolution is # kept self-contained rather than delegated to resolve_db_path. @@ -50,12 +53,30 @@ def run(*args, **kw): log(f"Installing dependencies from {REQS.name} ...") run(str(PY), "-m", "pip", "install", "-r", str(REQS), "--quiet") -# 4. Run migrations +# 4. Run migrations — forward only, in numeric order. +# Apply .up.sql and bare NNN_*.sql; NEVER .down.sql (those are rollbacks and +# would undo a migration that hasn't been applied yet). Sort by the leading +# integer prefix so ordering is correct regardless of zero-padding, and so a +# migration's .up never sorts after the next migration's files. log(f"Running migrations against {DB} ...") os.makedirs(str(DB.parent), exist_ok=True) + + +def _mig_key(p): + stem = p.name.split("_", 1)[0] + try: + return (int(stem), p.name) + except ValueError: + return (1 << 30, p.name) # non-numeric prefixes last, stable by name + + conn = sqlite3.connect(str(DB)) if MIGS.exists(): - for sql_file in sorted(MIGS.glob("*.sql")): + forward = [ + p for p in MIGS.glob("*.sql") + if not p.name.endswith(".down.sql") + ] + for sql_file in sorted(forward, key=_mig_key): log(f" Applying {sql_file.name} ...") conn.executescript(sql_file.read_text(encoding="utf-8")) conn.commit() @@ -89,6 +110,28 @@ def run(*args, **kw): } } -log("\n=== Paste this into ~/.claude/settings.json mcpServers ===") +# 6. Detect a Claude Code install and offer the recommended hook install. +# This is the SAFE, re-runnable path: it merges m3's SessionStart capture-check +# hook + PreCompact/Stop hooks + statusLine + mcpServers into the live +# settings.json idempotently (an upgrade replaces m3's own entries in place — +# no duplicate or conflicting lines), backing up first and prompting before +# writing. Prefer this over the manual paste below. +claude_dir = pathlib.Path(os.path.expanduser("~")) / ".claude" +if claude_dir.is_dir(): + log("") + log("Detected a Claude Code install (~/.claude).") + log("RECOMMENDED (safe, re-runnable): auto-install m3 hooks + statusLine + MCP") + log("servers into ~/.claude/settings.json. Re-running upgrades in place without") + log("duplicate or conflicting lines, and backs up your current settings first:") + log("") + log(f' "{PY}" "{BASE / "bin" / "generate_configs.py"}" --install-claude') + log("") + log("Add --yes to skip the confirmation prompt, or --dry-run to preview only.") +else: + log("") + log("No ~/.claude install detected. To wire m3 into Claude Code later, run:") + log(f' "{PY}" "{BASE / "bin" / "generate_configs.py"}" --install-claude') + +log("\n=== Manual fallback — paste this into ~/.claude/settings.json mcpServers ===") print(json.dumps({"mcpServers": config}, indent=2)) log("Setup complete.") diff --git a/tests/test_cross_platform_config.py b/tests/test_cross_platform_config.py new file mode 100644 index 0000000..9bd2838 --- /dev/null +++ b/tests/test_cross_platform_config.py @@ -0,0 +1,189 @@ +"""Cross-platform path-resolution regression tests for the Claude install path. + +Locks in the OS-aware behavior of bin/generate_configs.py and bin/setup_memory.py +so a future edit can't reintroduce the bugs fixed on 2026-06-14: + + * backslash venv paths that a shell (Git Bash on Windows) mangles to + "command not found", + * a /bin/sh hook invocation that doesn't exist on native Windows, + * the Windows ".venv/Scripts/python.exe" layout leaking onto macOS/Linux + (which use ".venv/bin/python", no .exe). + +These tests don't touch a real venv or DB — they monkeypatch os.name / os.path.exists +to simulate each OS and inspect the generated command strings. bin/ is on sys.path +via tests/conftest.py. +""" + +import os + +import generate_configs as g +import pytest + + +def _all_commands(settings): + """Every command string the install would write: hooks + statusLine + mcp.""" + cmds = [] + for event_entries in settings.get("hooks", {}).values(): + for entry in event_entries: + for h in entry.get("hooks", []): + cmds.append(h["command"]) + cmds.append(settings["statusLine"]["command"]) + for srv in settings.get("mcpServers", {}).values(): + cmds.append(srv["command"]) + cmds.extend(srv.get("args", [])) + return cmds + + +def _build_settings(monkeypatch, os_name): + """Run the generator with os.name forced and the matching venv 'present', + and return the resulting claude settings dict (no files written).""" + repo_root = g._m3_repo_root() + if os_name == "nt": + venv_py = os.path.join(repo_root, ".venv", "Scripts", "python.exe") + else: + venv_py = os.path.join(repo_root, ".venv", "bin", "python") + venv_py_fwd = venv_py.replace("\\", "/") + + monkeypatch.setattr(g.os, "name", os_name) + + real_exists = os.path.exists + + def fake_exists(p): + # The forced venv interpreter "exists"; everything else (e.g. the GGUF + # auto-detect path) does not, to keep output deterministic. Coerce to str + # first — pytest's own internals call os.path.exists with Path objects. + s = str(p) + if s.replace("\\", "/") == venv_py_fwd: + return True + if s.endswith(".gguf"): + return False + return real_exists(p) + + monkeypatch.setattr(g.os.path, "exists", fake_exists) + monkeypatch.delenv("M3_EMBED_GGUF", raising=False) + # Don't write template files during the test. + monkeypatch.setattr(g, "_write_json", lambda path, data: None) + + g.generate_configs() + return g.generate_configs._last_claude, venv_py_fwd + + +# ── interpreter layout per OS ──────────────────────────────────────────────── + +def test_windows_interpreter_uses_scripts_python_exe(monkeypatch): + settings, venv_py = _build_settings(monkeypatch, "nt") + assert venv_py.endswith("/.venv/Scripts/python.exe") + session = settings["hooks"]["SessionStart"][0]["hooks"][0]["command"] + assert session.startswith(venv_py) + + +def test_posix_interpreter_uses_bin_python_no_exe(monkeypatch): + settings, venv_py = _build_settings(monkeypatch, "posix") + assert venv_py.endswith("/.venv/bin/python") + assert ".exe" not in venv_py + session = settings["hooks"]["SessionStart"][0]["hooks"][0]["command"] + assert session.startswith(venv_py) + + +# ── invariants that must hold on EVERY OS ──────────────────────────────────── + +@pytest.mark.parametrize("os_name", ["nt", "posix"]) +def test_no_backslashes_in_any_command(monkeypatch, os_name): + settings, _ = _build_settings(monkeypatch, os_name) + for cmd in _all_commands(settings): + assert "\\" not in cmd, f"backslash in command: {cmd!r}" + + +@pytest.mark.parametrize("os_name", ["nt", "posix"]) +def test_no_bin_sh_dependency(monkeypatch, os_name): + settings, _ = _build_settings(monkeypatch, os_name) + for cmd in _all_commands(settings): + assert "/bin/sh" not in cmd, f"/bin/sh dependency in: {cmd!r}" + + +@pytest.mark.parametrize("os_name", ["nt", "posix"]) +def test_session_start_hook_present(monkeypatch, os_name): + settings, _ = _build_settings(monkeypatch, os_name) + assert "SessionStart" in settings["hooks"] + inner = settings["hooks"]["SessionStart"][0]["hooks"][0]["command"] + assert "session_start_capture_check.py" in inner + + +def test_posix_has_no_windows_interpreter_leak(monkeypatch): + settings, _ = _build_settings(monkeypatch, "posix") + for cmd in _all_commands(settings): + assert "Scripts/python.exe" not in cmd, f"windows layout leaked: {cmd!r}" + assert ".exe" not in cmd, f".exe leaked onto posix: {cmd!r}" + + +def test_windows_has_no_posix_only_interpreter(monkeypatch): + # On Windows the venv python must be the Scripts/.exe form, not bin/python. + settings, _ = _build_settings(monkeypatch, "nt") + session = settings["hooks"]["SessionStart"][0]["hooks"][0]["command"] + assert "/.venv/Scripts/python.exe" in session + assert "/.venv/bin/python " not in session + + +# ── setup_memory.py path-resolution snippet (replicated logic) ─────────────── +# setup_memory.py runs pip/migrations at import, so we can't import it directly +# in a unit test. These assert the SAME resolution rules it uses, guarding +# against a regression to the windows-only .exe layout or the missing +# requirements-windows.txt crash. + +@pytest.mark.parametrize( + "is_win, expected_suffix", + [(True, ".venv/Scripts/python.exe"), (False, ".venv/bin/python")], +) +def test_setup_venv_layout_matches_os(is_win, expected_suffix): + from pathlib import PurePosixPath + + base = PurePosixPath("/repo/m3-memory") + venv = base / ".venv" + py = venv / ("Scripts/python.exe" if is_win else "bin/python") + assert str(py).endswith(expected_suffix) + if not is_win: + assert ".exe" not in str(py) + + +def test_setup_requirements_falls_back_when_windows_file_absent(tmp_path): + # Mirrors setup_memory.py: prefer requirements-windows.txt only if it exists, + # else requirements.txt — so a fresh Windows install never FileNotFoundErrors. + base = tmp_path + (base / "requirements.txt").write_text("pkg\n") + req_win = base / "requirements-windows.txt" + is_win = True + reqs = req_win if (is_win and req_win.exists()) else base / "requirements.txt" + assert reqs.name == "requirements.txt" + + # And when the windows file DOES exist on Windows, it wins. + req_win.write_text("pkg-win\n") + reqs = req_win if (is_win and req_win.exists()) else base / "requirements.txt" + assert reqs.name == "requirements-windows.txt" + + +def test_setup_migrations_skip_down_and_order_numerically(): + # Mirrors setup_memory.py forward-migration selection: only .up.sql / bare + # NNN_*.sql, never .down.sql, ordered by integer prefix. + names = [ + "001_initial_schema.sql", + "013_conversation_id.up.sql", + "013_conversation_id.down.sql", + "002_enforce.sql", + "010_tier.sql", + ] + + def mig_key(name): + stem = name.split("_", 1)[0] + try: + return (int(stem), name) + except ValueError: + return (1 << 30, name) + + forward = [n for n in names if not n.endswith(".down.sql")] + ordered = sorted(forward, key=mig_key) + + assert all(not n.endswith(".down.sql") for n in ordered) + # numeric, not lexicographic: 002 before 010 before 013 + prefixes = [int(n.split("_", 1)[0]) for n in ordered] + assert prefixes == sorted(prefixes) + assert prefixes == [1, 2, 10, 13]