From 2807e943af6772b4190eb78ba965d14c66e5171d Mon Sep 17 00:00:00 2001 From: Punleuk Oum <5661986+illeatmyhat@users.noreply.github.com> Date: Mon, 8 Jun 2026 09:56:17 -0700 Subject: [PATCH 01/12] refactor(platform-integrations): always-on EVOLVE.md for Bob+Codex (no hooks/modes) Replace evolve-lite's hook- and custom-mode-driven recall/learn with a self-directed memory methodology (EVOLVE.md) installed as an always-on instruction: - Bob: full EVOLVE.md content as a global rules file (~/.bob/rules/00-evolve-lite.md); drop the custom_modes.yaml merge. - Codex: a single always-read pointer line in ~/.codex/AGENTS.md -> ~/.codex/evolve-lite/EVOLVE.md copy; drop the UserPromptSubmit/SessionStart hooks and their generation code. - Recall is native (model reads ./.evolve/memory/MEMORY.md); a self-contained audit_recall.py logs consulted entries for provenance (session id from $CLAUDE_CODE_SESSION_ID / $CODEX_THREAD_ID, self-mint+echo fallback on bob). - FileOps: add sentinel-block and single-line marker injectors. - Installer tests rewritten to the new contract (229 passing). Phase 1 of the redesign; Claude/claw-code to follow. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../bob/evolve-lite/EVOLVE.md | 82 +++ .../bob/evolve-lite/scripts/audit_recall.py | 66 ++ .../claude/plugins/evolve-lite/EVOLVE.md | 82 +++ .../evolve-lite/scripts/audit_recall.py | 66 ++ .../claw-code/plugins/evolve-lite/EVOLVE.md | 82 +++ .../evolve-lite/scripts/audit_recall.py | 66 ++ .../codex/plugins/evolve-lite/EVOLVE.md | 82 +++ .../evolve-lite/scripts/audit_recall.py | 66 ++ platform-integrations/install.sh | 562 +++++++++--------- plugin-source/EVOLVE.md.j2 | 82 +++ plugin-source/build_plugins.py | 4 + plugin-source/scripts/audit_recall.py | 66 ++ tests/platform_integrations/conftest.py | 94 +++ .../test_audit_recall.py | 101 ++++ tests/platform_integrations/test_codex.py | 285 +++------ tests/platform_integrations/test_dry_run.py | 16 +- .../platform_integrations/test_idempotency.py | 264 +++++--- .../platform_integrations/test_marker_line.py | 116 ++++ .../test_preservation.py | 103 +++- 19 files changed, 1678 insertions(+), 607 deletions(-) create mode 100644 platform-integrations/bob/evolve-lite/EVOLVE.md create mode 100644 platform-integrations/bob/evolve-lite/scripts/audit_recall.py create mode 100644 platform-integrations/claude/plugins/evolve-lite/EVOLVE.md create mode 100644 platform-integrations/claude/plugins/evolve-lite/scripts/audit_recall.py create mode 100644 platform-integrations/claw-code/plugins/evolve-lite/EVOLVE.md create mode 100644 platform-integrations/claw-code/plugins/evolve-lite/scripts/audit_recall.py create mode 100644 platform-integrations/codex/plugins/evolve-lite/EVOLVE.md create mode 100644 platform-integrations/codex/plugins/evolve-lite/scripts/audit_recall.py create mode 100644 plugin-source/EVOLVE.md.j2 create mode 100644 plugin-source/scripts/audit_recall.py create mode 100644 tests/platform_integrations/test_audit_recall.py create mode 100644 tests/platform_integrations/test_marker_line.py diff --git a/platform-integrations/bob/evolve-lite/EVOLVE.md b/platform-integrations/bob/evolve-lite/EVOLVE.md new file mode 100644 index 00000000..94073d4c --- /dev/null +++ b/platform-integrations/bob/evolve-lite/EVOLVE.md @@ -0,0 +1,82 @@ +# Evolve — self-directed memory + +You have a persistent, file-based memory for the current project, stored under +`./.evolve/memory/` (relative to the workspace/project root). You decide, on +your own judgment, when something is worth remembering — nothing forces a save, +and there is no step to "complete." Curate this memory like notes you'll thank +yourself for later: small, accurate, high-signal. + +## Recall — at the start of a non-trivial task + +Before substantive work (code changes, debugging, repo exploration, or +environment/tooling investigation), read your memory index at +`./.evolve/memory/MEMORY.md` if it exists. It holds one line per memory with a +short description. Open the individual memory files whose description looks +relevant to the task at hand, and let them inform what you do. If the index is +missing or nothing looks relevant, just proceed — that's normal. + +Memories reflect what was true when written. If a memory names a file, +function, command, or flag, verify it still exists before relying on it. + +## Record what you consulted + +After recall, log which entries you actually opened, so the value of this memory +can be measured over time. Run: + +``` +python3 ~/.bob/evolve-lite/audit_recall.py [ ...] +``` + +Pass the memory files you read this turn (space-separated paths, relative to the +project root). Skip this step entirely if you consulted no memories. If the +command prints a line beginning `evolve-session:`, include that line once, +verbatim, somewhere in your reply — it lets later analysis tie this session to +what you recalled. + +## Save — only when you learn something durable + +Near the end of a task, if it produced a reusable fact that isn't already +obvious from the code or git history — and only then — write it to memory. +Saving nothing is the right outcome more often than not; never force a +low-value memory just to have saved one. + +Each memory is one file holding one fact, under `./.evolve/memory/` (create the +directory if it doesn't exist), with frontmatter: + +```markdown +--- +name: +description: +metadata: + type: user | feedback | project | reference +--- + + +``` + +Types: +- **user** — who the user is: role, expertise, durable preferences. +- **feedback** — guidance on how you should work, both corrections and + confirmed approaches; always include the why. +- **project** — ongoing work, goals, or constraints not derivable from the code + or git history; convert relative dates ("next week") to absolute ones. +- **reference** — pointers to external resources (URLs, dashboards, tickets). + +In the body, link related memories with `[[name]]`, where `name` is another +memory's `name:` slug. Link liberally; a `[[name]]` with no file yet marks +something worth writing later, not an error. + +After writing the file, add a one-line pointer to `./.evolve/memory/MEMORY.md`: +`- [Title](file.md) — short hook`. MEMORY.md is the index you read during +recall — one line per memory, no frontmatter, never put memory content there. + +## When NOT to save, and housekeeping + +- Don't duplicate what the repo already records: code structure, git history, + READMEs, existing docs. If asked to remember one of those, ask what was + non-obvious about it and save that instead. +- Don't save what only matters to the current conversation. +- Before saving, check for an existing memory that already covers it — update + that file rather than creating a duplicate. +- Delete memories that turn out to be wrong. diff --git a/platform-integrations/bob/evolve-lite/scripts/audit_recall.py b/platform-integrations/bob/evolve-lite/scripts/audit_recall.py new file mode 100644 index 00000000..42dc1104 --- /dev/null +++ b/platform-integrations/bob/evolve-lite/scripts/audit_recall.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +"""Append a recall-audit row to .evolve/audit.log. + +Self-contained (no third-party or evolve-lite lib imports) so it can be dropped +at a single path and run by a model-invoked shell command on any platform. + +Usage: + python3 audit_recall.py [ ...] + +Records which memory entries the model consulted this turn so the `provenance` +analysis can later judge whether they influenced the outcome. Session id is +resolved from the host's environment when available and falls back to a freshly +minted UUID (printed as `evolve-session: ` for the model to echo). +""" + +from __future__ import annotations + +import json +import os +import sys +import uuid +from datetime import datetime, timezone +from pathlib import Path + + +def _evolve_dir() -> Path: + env = os.environ.get("EVOLVE_DIR") + return Path(env) if env else Path.cwd() / ".evolve" + + +def _session_id() -> tuple[str, bool]: + """Return (session_id, self_minted).""" + for var in ("CLAUDE_CODE_SESSION_ID", "CODEX_THREAD_ID"): + val = os.environ.get(var) + if val: + return val, False + return str(uuid.uuid4()), True + + +def main(argv: list[str]) -> int: + entities = [a for a in argv if a.strip()] + if not entities: + return 0 + + session_id, minted = _session_id() + row = { + "event": "recall", + "session_id": session_id, + "entities": entities, + "ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), + } + + log = _evolve_dir() / "audit.log" + log.parent.mkdir(parents=True, exist_ok=True) + with log.open("a", encoding="utf-8") as fh: + fh.write(json.dumps(row) + "\n") + + if minted: + print(f"evolve-session: {session_id}") + count = len(entities) + print(f"Recorded recall of {count} memory entr{'y' if count == 1 else 'ies'}.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md b/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md new file mode 100644 index 00000000..f8eefc6b --- /dev/null +++ b/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md @@ -0,0 +1,82 @@ +# Evolve — self-directed memory + +You have a persistent, file-based memory for the current project, stored under +`./.evolve/memory/` (relative to the workspace/project root). You decide, on +your own judgment, when something is worth remembering — nothing forces a save, +and there is no step to "complete." Curate this memory like notes you'll thank +yourself for later: small, accurate, high-signal. + +## Recall — at the start of a non-trivial task + +Before substantive work (code changes, debugging, repo exploration, or +environment/tooling investigation), read your memory index at +`./.evolve/memory/MEMORY.md` if it exists. It holds one line per memory with a +short description. Open the individual memory files whose description looks +relevant to the task at hand, and let them inform what you do. If the index is +missing or nothing looks relevant, just proceed — that's normal. + +Memories reflect what was true when written. If a memory names a file, +function, command, or flag, verify it still exists before relying on it. + +## Record what you consulted + +After recall, log which entries you actually opened, so the value of this memory +can be measured over time. Run: + +``` +python3 ~/.claude/evolve-lite/audit_recall.py [ ...] +``` + +Pass the memory files you read this turn (space-separated paths, relative to the +project root). Skip this step entirely if you consulted no memories. If the +command prints a line beginning `evolve-session:`, include that line once, +verbatim, somewhere in your reply — it lets later analysis tie this session to +what you recalled. + +## Save — only when you learn something durable + +Near the end of a task, if it produced a reusable fact that isn't already +obvious from the code or git history — and only then — write it to memory. +Saving nothing is the right outcome more often than not; never force a +low-value memory just to have saved one. + +Each memory is one file holding one fact, under `./.evolve/memory/` (create the +directory if it doesn't exist), with frontmatter: + +```markdown +--- +name: +description: +metadata: + type: user | feedback | project | reference +--- + + +``` + +Types: +- **user** — who the user is: role, expertise, durable preferences. +- **feedback** — guidance on how you should work, both corrections and + confirmed approaches; always include the why. +- **project** — ongoing work, goals, or constraints not derivable from the code + or git history; convert relative dates ("next week") to absolute ones. +- **reference** — pointers to external resources (URLs, dashboards, tickets). + +In the body, link related memories with `[[name]]`, where `name` is another +memory's `name:` slug. Link liberally; a `[[name]]` with no file yet marks +something worth writing later, not an error. + +After writing the file, add a one-line pointer to `./.evolve/memory/MEMORY.md`: +`- [Title](file.md) — short hook`. MEMORY.md is the index you read during +recall — one line per memory, no frontmatter, never put memory content there. + +## When NOT to save, and housekeeping + +- Don't duplicate what the repo already records: code structure, git history, + READMEs, existing docs. If asked to remember one of those, ask what was + non-obvious about it and save that instead. +- Don't save what only matters to the current conversation. +- Before saving, check for an existing memory that already covers it — update + that file rather than creating a duplicate. +- Delete memories that turn out to be wrong. diff --git a/platform-integrations/claude/plugins/evolve-lite/scripts/audit_recall.py b/platform-integrations/claude/plugins/evolve-lite/scripts/audit_recall.py new file mode 100644 index 00000000..42dc1104 --- /dev/null +++ b/platform-integrations/claude/plugins/evolve-lite/scripts/audit_recall.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +"""Append a recall-audit row to .evolve/audit.log. + +Self-contained (no third-party or evolve-lite lib imports) so it can be dropped +at a single path and run by a model-invoked shell command on any platform. + +Usage: + python3 audit_recall.py [ ...] + +Records which memory entries the model consulted this turn so the `provenance` +analysis can later judge whether they influenced the outcome. Session id is +resolved from the host's environment when available and falls back to a freshly +minted UUID (printed as `evolve-session: ` for the model to echo). +""" + +from __future__ import annotations + +import json +import os +import sys +import uuid +from datetime import datetime, timezone +from pathlib import Path + + +def _evolve_dir() -> Path: + env = os.environ.get("EVOLVE_DIR") + return Path(env) if env else Path.cwd() / ".evolve" + + +def _session_id() -> tuple[str, bool]: + """Return (session_id, self_minted).""" + for var in ("CLAUDE_CODE_SESSION_ID", "CODEX_THREAD_ID"): + val = os.environ.get(var) + if val: + return val, False + return str(uuid.uuid4()), True + + +def main(argv: list[str]) -> int: + entities = [a for a in argv if a.strip()] + if not entities: + return 0 + + session_id, minted = _session_id() + row = { + "event": "recall", + "session_id": session_id, + "entities": entities, + "ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), + } + + log = _evolve_dir() / "audit.log" + log.parent.mkdir(parents=True, exist_ok=True) + with log.open("a", encoding="utf-8") as fh: + fh.write(json.dumps(row) + "\n") + + if minted: + print(f"evolve-session: {session_id}") + count = len(entities) + print(f"Recorded recall of {count} memory entr{'y' if count == 1 else 'ies'}.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/platform-integrations/claw-code/plugins/evolve-lite/EVOLVE.md b/platform-integrations/claw-code/plugins/evolve-lite/EVOLVE.md new file mode 100644 index 00000000..7b6417f1 --- /dev/null +++ b/platform-integrations/claw-code/plugins/evolve-lite/EVOLVE.md @@ -0,0 +1,82 @@ +# Evolve — self-directed memory + +You have a persistent, file-based memory for the current project, stored under +`./.evolve/memory/` (relative to the workspace/project root). You decide, on +your own judgment, when something is worth remembering — nothing forces a save, +and there is no step to "complete." Curate this memory like notes you'll thank +yourself for later: small, accurate, high-signal. + +## Recall — at the start of a non-trivial task + +Before substantive work (code changes, debugging, repo exploration, or +environment/tooling investigation), read your memory index at +`./.evolve/memory/MEMORY.md` if it exists. It holds one line per memory with a +short description. Open the individual memory files whose description looks +relevant to the task at hand, and let them inform what you do. If the index is +missing or nothing looks relevant, just proceed — that's normal. + +Memories reflect what was true when written. If a memory names a file, +function, command, or flag, verify it still exists before relying on it. + +## Record what you consulted + +After recall, log which entries you actually opened, so the value of this memory +can be measured over time. Run: + +``` +python3 ~/.claw/evolve-lite/audit_recall.py [ ...] +``` + +Pass the memory files you read this turn (space-separated paths, relative to the +project root). Skip this step entirely if you consulted no memories. If the +command prints a line beginning `evolve-session:`, include that line once, +verbatim, somewhere in your reply — it lets later analysis tie this session to +what you recalled. + +## Save — only when you learn something durable + +Near the end of a task, if it produced a reusable fact that isn't already +obvious from the code or git history — and only then — write it to memory. +Saving nothing is the right outcome more often than not; never force a +low-value memory just to have saved one. + +Each memory is one file holding one fact, under `./.evolve/memory/` (create the +directory if it doesn't exist), with frontmatter: + +```markdown +--- +name: +description: +metadata: + type: user | feedback | project | reference +--- + + +``` + +Types: +- **user** — who the user is: role, expertise, durable preferences. +- **feedback** — guidance on how you should work, both corrections and + confirmed approaches; always include the why. +- **project** — ongoing work, goals, or constraints not derivable from the code + or git history; convert relative dates ("next week") to absolute ones. +- **reference** — pointers to external resources (URLs, dashboards, tickets). + +In the body, link related memories with `[[name]]`, where `name` is another +memory's `name:` slug. Link liberally; a `[[name]]` with no file yet marks +something worth writing later, not an error. + +After writing the file, add a one-line pointer to `./.evolve/memory/MEMORY.md`: +`- [Title](file.md) — short hook`. MEMORY.md is the index you read during +recall — one line per memory, no frontmatter, never put memory content there. + +## When NOT to save, and housekeeping + +- Don't duplicate what the repo already records: code structure, git history, + READMEs, existing docs. If asked to remember one of those, ask what was + non-obvious about it and save that instead. +- Don't save what only matters to the current conversation. +- Before saving, check for an existing memory that already covers it — update + that file rather than creating a duplicate. +- Delete memories that turn out to be wrong. diff --git a/platform-integrations/claw-code/plugins/evolve-lite/scripts/audit_recall.py b/platform-integrations/claw-code/plugins/evolve-lite/scripts/audit_recall.py new file mode 100644 index 00000000..42dc1104 --- /dev/null +++ b/platform-integrations/claw-code/plugins/evolve-lite/scripts/audit_recall.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +"""Append a recall-audit row to .evolve/audit.log. + +Self-contained (no third-party or evolve-lite lib imports) so it can be dropped +at a single path and run by a model-invoked shell command on any platform. + +Usage: + python3 audit_recall.py [ ...] + +Records which memory entries the model consulted this turn so the `provenance` +analysis can later judge whether they influenced the outcome. Session id is +resolved from the host's environment when available and falls back to a freshly +minted UUID (printed as `evolve-session: ` for the model to echo). +""" + +from __future__ import annotations + +import json +import os +import sys +import uuid +from datetime import datetime, timezone +from pathlib import Path + + +def _evolve_dir() -> Path: + env = os.environ.get("EVOLVE_DIR") + return Path(env) if env else Path.cwd() / ".evolve" + + +def _session_id() -> tuple[str, bool]: + """Return (session_id, self_minted).""" + for var in ("CLAUDE_CODE_SESSION_ID", "CODEX_THREAD_ID"): + val = os.environ.get(var) + if val: + return val, False + return str(uuid.uuid4()), True + + +def main(argv: list[str]) -> int: + entities = [a for a in argv if a.strip()] + if not entities: + return 0 + + session_id, minted = _session_id() + row = { + "event": "recall", + "session_id": session_id, + "entities": entities, + "ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), + } + + log = _evolve_dir() / "audit.log" + log.parent.mkdir(parents=True, exist_ok=True) + with log.open("a", encoding="utf-8") as fh: + fh.write(json.dumps(row) + "\n") + + if minted: + print(f"evolve-session: {session_id}") + count = len(entities) + print(f"Recorded recall of {count} memory entr{'y' if count == 1 else 'ies'}.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/platform-integrations/codex/plugins/evolve-lite/EVOLVE.md b/platform-integrations/codex/plugins/evolve-lite/EVOLVE.md new file mode 100644 index 00000000..c262f48f --- /dev/null +++ b/platform-integrations/codex/plugins/evolve-lite/EVOLVE.md @@ -0,0 +1,82 @@ +# Evolve — self-directed memory + +You have a persistent, file-based memory for the current project, stored under +`./.evolve/memory/` (relative to the workspace/project root). You decide, on +your own judgment, when something is worth remembering — nothing forces a save, +and there is no step to "complete." Curate this memory like notes you'll thank +yourself for later: small, accurate, high-signal. + +## Recall — at the start of a non-trivial task + +Before substantive work (code changes, debugging, repo exploration, or +environment/tooling investigation), read your memory index at +`./.evolve/memory/MEMORY.md` if it exists. It holds one line per memory with a +short description. Open the individual memory files whose description looks +relevant to the task at hand, and let them inform what you do. If the index is +missing or nothing looks relevant, just proceed — that's normal. + +Memories reflect what was true when written. If a memory names a file, +function, command, or flag, verify it still exists before relying on it. + +## Record what you consulted + +After recall, log which entries you actually opened, so the value of this memory +can be measured over time. Run: + +``` +python3 ~/.codex/evolve-lite/audit_recall.py [ ...] +``` + +Pass the memory files you read this turn (space-separated paths, relative to the +project root). Skip this step entirely if you consulted no memories. If the +command prints a line beginning `evolve-session:`, include that line once, +verbatim, somewhere in your reply — it lets later analysis tie this session to +what you recalled. + +## Save — only when you learn something durable + +Near the end of a task, if it produced a reusable fact that isn't already +obvious from the code or git history — and only then — write it to memory. +Saving nothing is the right outcome more often than not; never force a +low-value memory just to have saved one. + +Each memory is one file holding one fact, under `./.evolve/memory/` (create the +directory if it doesn't exist), with frontmatter: + +```markdown +--- +name: +description: +metadata: + type: user | feedback | project | reference +--- + + +``` + +Types: +- **user** — who the user is: role, expertise, durable preferences. +- **feedback** — guidance on how you should work, both corrections and + confirmed approaches; always include the why. +- **project** — ongoing work, goals, or constraints not derivable from the code + or git history; convert relative dates ("next week") to absolute ones. +- **reference** — pointers to external resources (URLs, dashboards, tickets). + +In the body, link related memories with `[[name]]`, where `name` is another +memory's `name:` slug. Link liberally; a `[[name]]` with no file yet marks +something worth writing later, not an error. + +After writing the file, add a one-line pointer to `./.evolve/memory/MEMORY.md`: +`- [Title](file.md) — short hook`. MEMORY.md is the index you read during +recall — one line per memory, no frontmatter, never put memory content there. + +## When NOT to save, and housekeeping + +- Don't duplicate what the repo already records: code structure, git history, + READMEs, existing docs. If asked to remember one of those, ask what was + non-obvious about it and save that instead. +- Don't save what only matters to the current conversation. +- Before saving, check for an existing memory that already covers it — update + that file rather than creating a duplicate. +- Delete memories that turn out to be wrong. diff --git a/platform-integrations/codex/plugins/evolve-lite/scripts/audit_recall.py b/platform-integrations/codex/plugins/evolve-lite/scripts/audit_recall.py new file mode 100644 index 00000000..42dc1104 --- /dev/null +++ b/platform-integrations/codex/plugins/evolve-lite/scripts/audit_recall.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +"""Append a recall-audit row to .evolve/audit.log. + +Self-contained (no third-party or evolve-lite lib imports) so it can be dropped +at a single path and run by a model-invoked shell command on any platform. + +Usage: + python3 audit_recall.py [ ...] + +Records which memory entries the model consulted this turn so the `provenance` +analysis can later judge whether they influenced the outcome. Session id is +resolved from the host's environment when available and falls back to a freshly +minted UUID (printed as `evolve-session: ` for the model to echo). +""" + +from __future__ import annotations + +import json +import os +import sys +import uuid +from datetime import datetime, timezone +from pathlib import Path + + +def _evolve_dir() -> Path: + env = os.environ.get("EVOLVE_DIR") + return Path(env) if env else Path.cwd() / ".evolve" + + +def _session_id() -> tuple[str, bool]: + """Return (session_id, self_minted).""" + for var in ("CLAUDE_CODE_SESSION_ID", "CODEX_THREAD_ID"): + val = os.environ.get(var) + if val: + return val, False + return str(uuid.uuid4()), True + + +def main(argv: list[str]) -> int: + entities = [a for a in argv if a.strip()] + if not entities: + return 0 + + session_id, minted = _session_id() + row = { + "event": "recall", + "session_id": session_id, + "entities": entities, + "ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), + } + + log = _evolve_dir() / "audit.log" + log.parent.mkdir(parents=True, exist_ok=True) + with log.open("a", encoding="utf-8") as fh: + fh.write(json.dumps(row) + "\n") + + if minted: + print(f"evolve-session: {session_id}") + count = len(entities) + print(f"Recorded recall of {count} memory entr{'y' if count == 1 else 'ies'}.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/platform-integrations/install.sh b/platform-integrations/install.sh index 67dbb67d..b527266d 100755 --- a/platform-integrations/install.sh +++ b/platform-integrations/install.sh @@ -121,10 +121,30 @@ EVOLVE_VERSION = os.environ.get("EVOLVE_VERSION", "main") DRY_RUN = False BOB_SLUG = "evolve-lite" +BOB_RULES_FILE = "00-evolve-lite.md" +AUDIT_SCRIPT = "audit_recall.py" CLAUDE_PLUGIN = "evolve-lite" CLAW_CODE_PLUGIN = "evolve-lite" CODEX_PLUGIN = "evolve-lite" +# Marker used to manage a single greppable instruction line that an installer +# injects into an agent's always-on instruction file (e.g. ~/.codex/AGENTS.md). +# The marker is also the uninstall handle: any line containing it is "ours". +MANAGED_MARKER = "" + +# Codex cannot `@`-import another file, but it can be told to read one on +# demand. We drop a COPY of EVOLVE.md on disk and inject this single pointer +# line into ~/.codex/AGENTS.md instead of inlining the whole document. +CODEX_EVOLVE_MD_PATH = "~/.codex/evolve-lite/EVOLVE.md" + +def _codex_pointer_line(): + return ( + "Evolve memory is active: at the start of every conversation, read " + + CODEX_EVOLVE_MD_PATH + " and follow it — it governs recalling " + "relevant past learnings and saving durable new ones. " + + MANAGED_MARKER + ) + # ── Colour helpers ──────────────────────────────────────────────────────────── IS_TTY = sys.stdout.isatty() @@ -273,6 +293,19 @@ class FileOps: return True return False + def remove_dir_if_empty(self, path): + """Remove `path` only when it exists and contains nothing. + + Used to tidy up a per-plugin dir (e.g. ~/.bob/evolve-lite/) after its + last managed file is removed, while leaving it intact if a user (or + another plugin) dropped sibling content there.""" + path = str(path) + if os.path.isdir(path) and not os.listdir(path): + os.rmdir(path) + debug(f"Removed empty dir: {path}") + return True + return False + def run_subprocess(self, cmd_list): return subprocess.run(cmd_list) @@ -402,6 +435,128 @@ class FileOps: ) self.atomic_write_text(target_yaml_path, pattern.sub("", text)) + # ── Sentinel-block helpers (generic always-on instruction files) ─────────── + + def inject_sentinel_block(self, path, slug, body): + """Idempotently inject a sentinel-wrapped block into a text file. + + Writes: + # >>>evolve:{slug}<<< + {body} + # << 0 - ] - if not hooks["SessionStart"]: - hooks.pop("SessionStart", None) - self.ops.atomic_write_json(path, data) - # ── Public interface ────────────────────────────────────────────────────── def install(self, target_dir): @@ -1104,15 +1079,35 @@ class CodexInstaller: ) success(f"Upserted Codex marketplace entry in {marketplace_target}") - hooks_target = Path(target_dir) / ".codex" / "hooks.json" - self._upsert_user_prompt_hook(hooks_target, self._recall_hook_group()) - self._upsert_session_start_hook(hooks_target, self._sync_hook_group()) - success(f"Upserted Codex UserPromptSubmit hook in {hooks_target}") - success(f"Upserted Codex SessionStart hook in {hooks_target}") - warn("Automatic Codex recall requires hooks to be enabled in ~/.codex/config.toml:") - print(" [features]") - print(" codex_hooks = true") - info("If you do not want to enable Codex hooks, invoke the installed evolve-lite:recall skill manually.") + # Always-on instructions: Codex reads ~/.codex/AGENTS.md verbatim and + # does NOT support `@`-imports. So we drop a COPY of EVOLVE.md on disk + # and inject a single greppable pointer line into AGENTS.md telling the + # agent to read that file on demand. Prefer the rendered codex copy; + # fall back to the shared plugin-source original. + evolve_src = plugin_source / "EVOLVE.md" + if not evolve_src.is_file(): + evolve_src = Path(source_dir) / "plugin-source" / "EVOLVE.md" + evolve_text = "" if self.ops.is_dry_run and not evolve_src.is_file() else evolve_src.read_text() + evolve_dst = Path.home() / ".codex" / "evolve-lite" / "EVOLVE.md" + self.ops.atomic_write_text(evolve_dst, evolve_text) + success(f"Copied EVOLVE.md → {evolve_dst}") + + agents_file = Path.home() / ".codex" / "AGENTS.md" + self.ops.inject_marker_line(agents_file, MANAGED_MARKER, _codex_pointer_line()) + success(f"Injected '{CODEX_PLUGIN}' pointer into {agents_file}") + + # Recall-audit script: the injected AGENTS.md block tells the model to + # run `python3 ~/.codex/evolve-lite/audit_recall.py` after recall, so + # install the script at that GLOBAL absolute path (matching how the + # always-on instructions live globally). Prefer the rendered codex + # copy; fall back to the shared plugin-source original. + audit_src = plugin_source / "scripts" / AUDIT_SCRIPT + if not audit_src.is_file(): + audit_src = Path(source_dir) / "plugin-source" / "scripts" / AUDIT_SCRIPT + audit_text = "" if self.ops.is_dry_run and not audit_src.is_file() else audit_src.read_text() + audit_file = Path.home() / ".codex" / "evolve-lite" / AUDIT_SCRIPT + self.ops.atomic_write_text(audit_file, audit_text) + success(f"Installed recall-audit script → {audit_file}") success("Codex installation complete") @@ -1124,8 +1119,14 @@ class CodexInstaller: Path(target_dir) / ".agents" / "plugins" / "marketplace.json", "plugins", "name", CODEX_PLUGIN, ) - self._remove_user_prompt_hook(Path(target_dir) / ".codex" / "hooks.json") - self._remove_session_start_hook(Path(target_dir) / ".codex" / "hooks.json") + # Drop the single managed pointer line from the always-on instructions. + self.ops.remove_marker_line(Path.home() / ".codex" / "AGENTS.md", MANAGED_MARKER) + # Remove the on-disk EVOLVE.md copy and the recall-audit script, then the + # per-plugin dir if nothing else lives there. + evolve_dir = Path.home() / ".codex" / "evolve-lite" + self.ops.remove_file(evolve_dir / "EVOLVE.md") + self.ops.remove_file(evolve_dir / AUDIT_SCRIPT) + self.ops.remove_dir_if_empty(evolve_dir) success("Codex uninstall complete") @@ -1144,19 +1145,18 @@ class CodexInstaller: ) print(f" marketplace.json entry : {'✓' if marketplace_present else '✗'}") - hooks_path = Path(target_dir) / ".codex" / "hooks.json" - hook_present = ( - any(isinstance(g, dict) and self._group_has_recall(g) - for g in read_json(hooks_path).get("hooks", {}).get("UserPromptSubmit", [])) - if hooks_path.is_file() else False + agents_path = Path.home() / ".codex" / "AGENTS.md" + pointer_present = ( + any(MANAGED_MARKER in ln for ln in agents_path.read_text().splitlines()) + if agents_path.is_file() else False ) - session_hook_present = ( - any(isinstance(g, dict) and self._group_has_sync(g) - for g in read_json(hooks_path).get("hooks", {}).get("SessionStart", [])) - if hooks_path.is_file() else False - ) - print(f" .codex/hooks.json entry : {'✓' if hook_present else '✗'}") - print(f" SessionStart sync hook : {'✓' if session_hook_present else '✗'}") + print(f" ~/.codex/AGENTS.md pointer : {'✓' if pointer_present else '✗'}") + + evolve_md = Path.home() / ".codex" / "evolve-lite" / "EVOLVE.md" + print(f" evolve-lite/EVOLVE.md : {'✓' if evolve_md.is_file() else '✗'}") + + audit_file = Path.home() / ".codex" / "evolve-lite" / AUDIT_SCRIPT + print(f" evolve-lite/{AUDIT_SCRIPT} : {'✓' if audit_file.is_file() else '✗'}") # ── Dispatch ────────────────────────────────────────────────────────────────── diff --git a/plugin-source/EVOLVE.md.j2 b/plugin-source/EVOLVE.md.j2 new file mode 100644 index 00000000..a6be8ea1 --- /dev/null +++ b/plugin-source/EVOLVE.md.j2 @@ -0,0 +1,82 @@ +# Evolve — self-directed memory + +You have a persistent, file-based memory for the current project, stored under +`./.evolve/memory/` (relative to the workspace/project root). You decide, on +your own judgment, when something is worth remembering — nothing forces a save, +and there is no step to "complete." Curate this memory like notes you'll thank +yourself for later: small, accurate, high-signal. + +## Recall — at the start of a non-trivial task + +Before substantive work (code changes, debugging, repo exploration, or +environment/tooling investigation), read your memory index at +`./.evolve/memory/MEMORY.md` if it exists. It holds one line per memory with a +short description. Open the individual memory files whose description looks +relevant to the task at hand, and let them inform what you do. If the index is +missing or nothing looks relevant, just proceed — that's normal. + +Memories reflect what was true when written. If a memory names a file, +function, command, or flag, verify it still exists before relying on it. + +## Record what you consulted + +After recall, log which entries you actually opened, so the value of this memory +can be measured over time. Run: + +``` +python3 {{ audit_script }} [ ...] +``` + +Pass the memory files you read this turn (space-separated paths, relative to the +project root). Skip this step entirely if you consulted no memories. If the +command prints a line beginning `evolve-session:`, include that line once, +verbatim, somewhere in your reply — it lets later analysis tie this session to +what you recalled. + +## Save — only when you learn something durable + +Near the end of a task, if it produced a reusable fact that isn't already +obvious from the code or git history — and only then — write it to memory. +Saving nothing is the right outcome more often than not; never force a +low-value memory just to have saved one. + +Each memory is one file holding one fact, under `./.evolve/memory/` (create the +directory if it doesn't exist), with frontmatter: + +```markdown +--- +name: +description: +metadata: + type: user | feedback | project | reference +--- + + +``` + +Types: +- **user** — who the user is: role, expertise, durable preferences. +- **feedback** — guidance on how you should work, both corrections and + confirmed approaches; always include the why. +- **project** — ongoing work, goals, or constraints not derivable from the code + or git history; convert relative dates ("next week") to absolute ones. +- **reference** — pointers to external resources (URLs, dashboards, tickets). + +In the body, link related memories with `[[name]]`, where `name` is another +memory's `name:` slug. Link liberally; a `[[name]]` with no file yet marks +something worth writing later, not an error. + +After writing the file, add a one-line pointer to `./.evolve/memory/MEMORY.md`: +`- [Title](file.md) — short hook`. MEMORY.md is the index you read during +recall — one line per memory, no frontmatter, never put memory content there. + +## When NOT to save, and housekeeping + +- Don't duplicate what the repo already records: code structure, git history, + READMEs, existing docs. If asked to remember one of those, ask what was + non-obvious about it and save that instead. +- Don't save what only matters to the current conversation. +- Before saving, check for an existing memory that already covers it — update + that file rather than creating a duplicate. +- Delete memories that turn out to be wrong. diff --git a/plugin-source/build_plugins.py b/plugin-source/build_plugins.py index 4906ce38..d8350807 100644 --- a/plugin-source/build_plugins.py +++ b/plugin-source/build_plugins.py @@ -293,6 +293,7 @@ def _codex_plugin_json(meta: PluginMetadata) -> _CodexOut: "forked_context": True, "user_skills_dir": "~/.claude/skills", "save_example_script_root": "${CLAUDE_PLUGIN_ROOT}/skills", + "audit_script": "~/.claude/evolve-lite/audit_recall.py", }, "target_rewrites": [], "target_excludes": [], @@ -304,6 +305,7 @@ def _codex_plugin_json(meta: PluginMetadata) -> _CodexOut: "context": { "user_skills_dir": "~/.claw/skills", "save_example_script_root": "~/.claw/skills", + "audit_script": "~/.claw/evolve-lite/audit_recall.py", }, "target_rewrites": [], "target_excludes": [], @@ -316,6 +318,7 @@ def _codex_plugin_json(meta: PluginMetadata) -> _CodexOut: "context": { "user_skills_dir": "plugins/evolve-lite/skills", "save_example_script_root": "plugins/evolve-lite/skills", + "audit_script": "~/.codex/evolve-lite/audit_recall.py", }, "target_rewrites": [], "target_excludes": [], @@ -327,6 +330,7 @@ def _codex_plugin_json(meta: PluginMetadata) -> _CodexOut: "context": { "user_skills_dir": ".bob/skills", "save_example_script_root": ".bob/skills", + "audit_script": "~/.bob/evolve-lite/audit_recall.py", }, # Bob has no plugin-namespace concept; skill folders are flat # under .bob/skills/. Collapse the source skills/evolve-lite// diff --git a/plugin-source/scripts/audit_recall.py b/plugin-source/scripts/audit_recall.py new file mode 100644 index 00000000..42dc1104 --- /dev/null +++ b/plugin-source/scripts/audit_recall.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +"""Append a recall-audit row to .evolve/audit.log. + +Self-contained (no third-party or evolve-lite lib imports) so it can be dropped +at a single path and run by a model-invoked shell command on any platform. + +Usage: + python3 audit_recall.py [ ...] + +Records which memory entries the model consulted this turn so the `provenance` +analysis can later judge whether they influenced the outcome. Session id is +resolved from the host's environment when available and falls back to a freshly +minted UUID (printed as `evolve-session: ` for the model to echo). +""" + +from __future__ import annotations + +import json +import os +import sys +import uuid +from datetime import datetime, timezone +from pathlib import Path + + +def _evolve_dir() -> Path: + env = os.environ.get("EVOLVE_DIR") + return Path(env) if env else Path.cwd() / ".evolve" + + +def _session_id() -> tuple[str, bool]: + """Return (session_id, self_minted).""" + for var in ("CLAUDE_CODE_SESSION_ID", "CODEX_THREAD_ID"): + val = os.environ.get(var) + if val: + return val, False + return str(uuid.uuid4()), True + + +def main(argv: list[str]) -> int: + entities = [a for a in argv if a.strip()] + if not entities: + return 0 + + session_id, minted = _session_id() + row = { + "event": "recall", + "session_id": session_id, + "entities": entities, + "ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), + } + + log = _evolve_dir() / "audit.log" + log.parent.mkdir(parents=True, exist_ok=True) + with log.open("a", encoding="utf-8") as fh: + fh.write(json.dumps(row) + "\n") + + if minted: + print(f"evolve-session: {session_id}") + count = len(entities) + print(f"Recorded recall of {count} memory entr{'y' if count == 1 else 'ies'}.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/tests/platform_integrations/conftest.py b/tests/platform_integrations/conftest.py index 18ba7660..e04fcc7c 100644 --- a/tests/platform_integrations/conftest.py +++ b/tests/platform_integrations/conftest.py @@ -20,6 +20,75 @@ def pytest_configure(config): config.addinivalue_line("markers", "integration: tests that require git and perform subprocess I/O") +@pytest.fixture(autouse=True) +def sandbox_home(tmp_path, monkeypatch): + """Redirect HOME to a temp dir for every platform-integrations test. + + install.sh resolves a handful of global paths via Python's ``Path.home()`` + (notably the Codex always-on instructions file ``~/.codex/AGENTS.md`` and the + global Bob target ``~/.bob``). Without sandboxing, simply running a codex + install in a test would inject the evolve block into the developer's REAL + ``~/.codex/AGENTS.md``. ``InstallRunner.run`` builds the subprocess env from + ``os.environ`` at call time, so monkeypatching HOME here flows through to the + install.sh child process. + + Returns the sandboxed home directory. + """ + home = tmp_path / "sandbox_home" + home.mkdir() + monkeypatch.setenv("HOME", str(home)) + # Windows/`Path.home()` also consults these; keep them aligned defensively. + monkeypatch.setenv("USERPROFILE", str(home)) + monkeypatch.delenv("HOMEDRIVE", raising=False) + monkeypatch.delenv("HOMEPATH", raising=False) + return home + + +@pytest.fixture +def codex_agents_file(sandbox_home): + """Path to the sandboxed Codex always-on instructions file (~/.codex/AGENTS.md).""" + return sandbox_home / ".codex" / "AGENTS.md" + + +@pytest.fixture +def codex_evolve_md(sandbox_home): + """Path to the sandboxed on-disk COPY of EVOLVE.md (~/.codex/evolve-lite/EVOLVE.md). + + Codex no longer inlines EVOLVE.md into AGENTS.md; it drops a copy here and + points AGENTS.md at it via a single greppable managed line.""" + return sandbox_home / ".codex" / "evolve-lite" / "EVOLVE.md" + + +@pytest.fixture +def bob_rules_file(sandbox_home): + """Path to the sandboxed Bob GLOBAL custom-instructions rules file. + + Bob loads every ``~/.bob/rules/*.md`` into every session, globally and + mode-independent, as the user's custom instructions. The lite installer + owns ``00-evolve-lite.md`` entirely (always global, never a project file).""" + return sandbox_home / ".bob" / "rules" / "00-evolve-lite.md" + + +@pytest.fixture +def bob_audit_script(sandbox_home): + """Path to the sandboxed Bob GLOBAL recall-audit script. + + EVOLVE.md tells the model to run ``python3 ~/.bob/evolve-lite/audit_recall.py`` + after recall, so the lite installer drops the script once at that global + absolute path (matching the always-global rules file).""" + return sandbox_home / ".bob" / "evolve-lite" / "audit_recall.py" + + +@pytest.fixture +def codex_audit_script(sandbox_home): + """Path to the sandboxed Codex GLOBAL recall-audit script. + + The injected ~/.codex/AGENTS.md block tells the model to run + ``python3 ~/.codex/evolve-lite/audit_recall.py`` after recall, so the + installer drops the script once at that global absolute path.""" + return sandbox_home / ".codex" / "evolve-lite" / "audit_recall.py" + + @pytest.fixture def temp_project_dir(tmp_path): """ @@ -227,6 +296,31 @@ def assert_sentinel_block_exists(path: Path, slug: str): assert start_sentinel in content, f"Start sentinel '{start_sentinel}' not found in {path}" assert end_sentinel in content, f"End sentinel '{end_sentinel}' not found in {path}" + @staticmethod + def assert_sentinel_block_count(path: Path, slug: str, expected: int): + """Assert the file contains exactly `expected` REAL sentinel blocks for `slug`. + + A "real" block is a start marker anchored at the beginning of a line followed + by a matching end marker also anchored at the beginning of a line — the same + shape install.sh's inject_sentinel_block treats as a block. This deliberately + ignores a sentinel literal quoted mid-line inside unrelated user prose, so the + helper measures actual injected blocks (an idempotent installer leaves one). + """ + import re + + assert path.is_file(), f"File does not exist: {path}" + content = path.read_text() + start = f"# >>>evolve:{slug}<<<" + end = f"# <<`` marker) into the GLOBAL (sandboxed) + ``~/.codex/AGENTS.md`` telling the agent to read that file, and + * drops the self-contained recall-audit script at the GLOBAL path + ``~/.codex/evolve-lite/audit_recall.py`` referenced by that file. """ import json @@ -8,8 +20,12 @@ EVOLVE_PLUGIN = "evolve-lite" -EVOLVE_HOOK_SNIPPET = "plugins/evolve-lite/skills/evolve-lite/recall/scripts/retrieve_entities.py" -EVOLVE_SYNC_SNIPPET = "plugins/evolve-lite/skills/evolve-lite/sync/scripts/sync.py" +MANAGED_MARKER = "" +EVOLVE_MD_REF = "~/.codex/evolve-lite/EVOLVE.md" +AUDIT_PATH_REF = "~/.codex/evolve-lite/audit_recall.py" +# A distinctive sentence from the body of EVOLVE.md that must live in the copied +# file but must NOT be inlined into AGENTS.md anymore. +EVOLVE_BODY_SENTENCE = "You have a persistent, file-based memory for the current project" def _marketplace_has_evolve_plugin(path): @@ -17,33 +33,9 @@ def _marketplace_has_evolve_plugin(path): return any(entry.get("name") == EVOLVE_PLUGIN for entry in data.get("plugins", [])) -def _hooks_have_evolve_recall(path): - data = json.loads(path.read_text()) - groups = data.get("hooks", {}).get("UserPromptSubmit", []) - for group in groups: - for hook in _iter_group_hooks(group): - if EVOLVE_HOOK_SNIPPET in hook.get("command", ""): - return group.get("matcher") == "" - return False - - -def _hooks_have_evolve_sync(path): - data = json.loads(path.read_text()) - groups = data.get("hooks", {}).get("SessionStart", []) - for group in groups: - for hook in _iter_group_hooks(group): - if EVOLVE_SYNC_SNIPPET in hook.get("command", ""): - return group.get("matcher") == "startup|resume" - return False - - -def _iter_group_hooks(group): - hooks = group.get("hooks", []) - if isinstance(hooks, list): - return hooks - if isinstance(hooks, dict): - return list(hooks.values()) - return [] +def _marker_lines(text): + """Return the list of lines in `text` that carry the managed marker.""" + return [ln for ln in text.splitlines() if MANAGED_MARKER in ln] @pytest.mark.platform_integrations @@ -51,9 +43,17 @@ def _iter_group_hooks(group): class TestCodexInstall: """Test the Codex install flow.""" - def test_install_creates_expected_files(self, temp_project_dir, install_runner, file_assertions): - """Installing Codex should create the plugin tree, marketplace entry, and hook.""" - result = install_runner.run("install", platform="codex") + def test_install_creates_expected_files( + self, + temp_project_dir, + install_runner, + file_assertions, + codex_agents_file, + codex_evolve_md, + codex_audit_script, + ): + """Installing Codex creates the plugin tree, marketplace entry, AGENTS.md pointer, EVOLVE.md copy, and audit script.""" + install_runner.run("install", platform="codex") plugin_dir = temp_project_dir / "plugins" / EVOLVE_PLUGIN file_assertions.assert_dir_exists(plugin_dir) @@ -69,185 +69,82 @@ def test_install_creates_expected_files(self, temp_project_dir, install_runner, file_assertions.assert_dir_exists(plugin_dir / "skills" / "evolve-lite" / "sync") file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "learn" / "scripts" / "save_entities.py") file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "recall" / "scripts" / "retrieve_entities.py") - file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "publish" / "scripts" / "publish.py") - file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "provenance" / "scripts" / "log_influence.py") - file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "save-trajectory" / "scripts" / "save_trajectory.py") - file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "subscribe" / "scripts" / "subscribe.py") - file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "unsubscribe" / "scripts" / "unsubscribe.py") - file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "sync" / "scripts" / "sync.py") file_assertions.assert_file_exists(plugin_dir / "lib" / "evolve-lite" / "entity_io.py") + # The recall-audit script ships in the plugin tree too (root-level scripts/). + file_assertions.assert_file_exists(plugin_dir / "scripts" / "audit_recall.py") marketplace_path = temp_project_dir / ".agents" / "plugins" / "marketplace.json" file_assertions.assert_valid_json(marketplace_path) assert _marketplace_has_evolve_plugin(marketplace_path), "Evolve plugin entry missing from marketplace.json" - hooks_path = temp_project_dir / ".codex" / "hooks.json" - file_assertions.assert_valid_json(hooks_path) - assert _hooks_have_evolve_recall(hooks_path), "Evolve recall hook missing from .codex/hooks.json" - assert _hooks_have_evolve_sync(hooks_path), "Evolve sync hook missing from .codex/hooks.json" - - hooks_data = json.loads(hooks_path.read_text()) - evolve_groups = [ - group - for group in hooks_data.get("hooks", {}).get("UserPromptSubmit", []) - if any(EVOLVE_HOOK_SNIPPET in hook.get("command", "") for hook in group.get("hooks", [])) - ] - assert evolve_groups[0]["matcher"] == "" - evolve_hook = next(hook for hook in evolve_groups[0]["hooks"] if EVOLVE_HOOK_SNIPPET in hook.get("command", "")) - expected_command = ( - "sh -lc '" - 'd="$PWD"; ' - "while :; do " - 'candidate="$d/plugins/evolve-lite/skills/evolve-lite/recall/scripts/retrieve_entities.py"; ' - 'if [ -f "$candidate" ]; then EVOLVE_DIR="$d/.evolve" exec python3 "$candidate"; fi; ' - '[ "$d" = "/" ] && break; ' - 'd="$(dirname "$d")"; ' - "done; " - "exit 1'" - ) - assert evolve_hook["command"] == expected_command - sync_groups = [ - group - for group in hooks_data.get("hooks", {}).get("SessionStart", []) - if any(EVOLVE_SYNC_SNIPPET in hook.get("command", "") for hook in group.get("hooks", [])) - ] - assert sync_groups[0]["matcher"] == "startup|resume" - sync_hook = next(hook for hook in sync_groups[0]["hooks"] if EVOLVE_SYNC_SNIPPET in hook.get("command", "")) - expected_sync_command = ( - "sh -lc '" - 'd="$PWD"; ' - "while :; do " - 'candidate="$d/plugins/evolve-lite/skills/evolve-lite/sync/scripts/sync.py"; ' - 'if [ -f "$candidate" ]; then EVOLVE_DIR="$d/.evolve" exec python3 "$candidate" --quiet --session-start; fi; ' - '[ "$d" = "/" ] && break; ' - 'd="$(dirname "$d")"; ' - "done; " - "exit 1'" - ) - assert sync_hook["command"] == expected_sync_command - assert "~/.codex/config.toml" in result.stdout - assert "codex_hooks = true" in result.stdout - assert "evolve-lite:recall" in result.stdout - - def test_install_preserves_matching_user_prompt_group(self, temp_project_dir, install_runner, codex_fixtures): - """Installing should merge the evolve hook into an existing matching list-based group.""" - hooks_path = codex_fixtures.create_existing_hooks_with_shared_evolve_group(temp_project_dir) - - install_runner.run("install", platform="codex") - - hooks_data = json.loads(hooks_path.read_text()) - prompt_groups = hooks_data["hooks"]["UserPromptSubmit"] - assert len(prompt_groups) == 1 - - merged_group = prompt_groups[0] - assert merged_group["matcher"] == "src/.*" - - custom_hooks = [ - hook for hook in _iter_group_hooks(merged_group) if hook.get("command") == "python3 ~/.codex/hooks/custom_prompt_memory.py" - ] - assert len(custom_hooks) == 1, "Custom prompt hook was removed from the shared group" - - evolve_hooks = [hook for hook in _iter_group_hooks(merged_group) if EVOLVE_HOOK_SNIPPET in hook.get("command", "")] - assert len(evolve_hooks) == 1, "Evolve hook was duplicated or removed from the shared group" - assert evolve_hooks[0]["statusMessage"] == "Loading Evolve guidance" - assert evolve_hooks[0]["delayMs"] == 250 - - def test_install_updates_dict_based_matching_group(self, temp_project_dir, install_runner, codex_fixtures): - """Installing should update a dict-based matching group without adding a replacement group.""" - hooks_path = codex_fixtures.create_existing_hooks_with_dict_evolve_group(temp_project_dir) - - install_runner.run("install", platform="codex") - - hooks_data = json.loads(hooks_path.read_text()) - prompt_groups = hooks_data["hooks"]["UserPromptSubmit"] - assert len(prompt_groups) == 1 - - merged_group = prompt_groups[0] - assert merged_group["matcher"] == "src/.*" - assert isinstance(merged_group["hooks"], dict) - assert "memory" in merged_group["hooks"] - assert "evolve-lite" in merged_group["hooks"] - - evolve_hook = merged_group["hooks"]["evolve-lite"] - assert EVOLVE_HOOK_SNIPPET in evolve_hook["command"] - assert evolve_hook["statusMessage"] == "Loading Evolve guidance" - assert evolve_hook["delayMs"] == 250 - - def test_install_adds_session_start_sync_hook(self, temp_project_dir, install_runner, codex_fixtures): - """Installing should preserve user SessionStart hooks and add the sync hook.""" - hooks_path = codex_fixtures.create_existing_hooks(temp_project_dir) - - install_runner.run("install", platform="codex") - - hooks_data = json.loads(hooks_path.read_text()) - session_groups = hooks_data["hooks"]["SessionStart"] - assert len(session_groups) == 2 - assert any( - any(hook.get("command") == "python3 ~/.codex/hooks/session_start.py" for hook in _iter_group_hooks(group)) - for group in session_groups - ) - assert any(any(EVOLVE_SYNC_SNIPPET in hook.get("command", "") for hook in _iter_group_hooks(group)) for group in session_groups) - - def test_uninstall_removes_only_evolve_hook_from_matching_group(self, temp_project_dir, install_runner, codex_fixtures): - """Uninstalling should remove only the evolve hook entry and preserve the shared group.""" - hooks_path = codex_fixtures.create_existing_hooks_with_dict_evolve_group(temp_project_dir) - - install_runner.run("uninstall", platform="codex") - - hooks_data = json.loads(hooks_path.read_text()) - prompt_groups = hooks_data["hooks"]["UserPromptSubmit"] - assert len(prompt_groups) == 1 - - remaining_group = prompt_groups[0] - assert remaining_group["matcher"] == "src/.*" - assert isinstance(remaining_group["hooks"], dict) - assert "memory" in remaining_group["hooks"] - assert "evolve-lite" not in remaining_group["hooks"] - assert all(EVOLVE_HOOK_SNIPPET not in hook.get("command", "") for hook in _iter_group_hooks(remaining_group)) - - def test_uninstall_removes_session_start_sync_hook_only(self, temp_project_dir, install_runner, codex_fixtures): - """Uninstalling should remove the Evolve SessionStart hook and preserve user hooks.""" - hooks_path = codex_fixtures.create_existing_hooks(temp_project_dir) - install_runner.run("install", platform="codex") - - install_runner.run("uninstall", platform="codex") - - hooks_data = json.loads(hooks_path.read_text()) - session_groups = hooks_data["hooks"]["SessionStart"] - assert len(session_groups) == 1 - assert any(hook.get("command") == "python3 ~/.codex/hooks/session_start.py" for hook in _iter_group_hooks(session_groups[0])) - assert all(EVOLVE_SYNC_SNIPPET not in hook.get("command", "") for group in session_groups for hook in _iter_group_hooks(group)) - - def test_uninstall_prunes_evolve_only_hook_groups(self, temp_project_dir, install_runner, file_assertions): - """Uninstalling after a clean install should remove empty Evolve-only hook groups.""" - install_runner.run("install", platform="codex") - - hooks_path = temp_project_dir / ".codex" / "hooks.json" - file_assertions.assert_valid_json(hooks_path) - - install_runner.run("uninstall", platform="codex") - - hooks_data = json.loads(hooks_path.read_text()) - hooks = hooks_data.get("hooks", {}) - assert "UserPromptSubmit" not in hooks - assert "SessionStart" not in hooks - - def test_codex_dry_run_does_not_write_files(self, temp_project_dir, install_runner): + # A SINGLE greppable pointer line is injected into the GLOBAL ~/.codex/AGENTS.md. + file_assertions.assert_file_exists(codex_agents_file) + agents_text = codex_agents_file.read_text() + marker_lines = _marker_lines(agents_text) + assert len(marker_lines) == 1, f"Expected exactly one managed line, got {marker_lines!r}" + pointer_line = marker_lines[0] + # The pointer references the on-disk EVOLVE.md copy. + assert EVOLVE_MD_REF in pointer_line + # AGENTS.md must NOT inline the full EVOLVE.md body anymore. + assert EVOLVE_BODY_SENTENCE not in agents_text + # The audit-script path is no longer inlined into AGENTS.md (it lives in EVOLVE.md). + assert AUDIT_PATH_REF not in agents_text + + # A COPY of EVOLVE.md is dropped on disk and DOES contain the full body. + file_assertions.assert_file_exists(codex_evolve_md) + evolve_md_text = codex_evolve_md.read_text() + assert EVOLVE_BODY_SENTENCE in evolve_md_text + # EVOLVE.md is what tells the model to run the recall-audit script. + assert AUDIT_PATH_REF in evolve_md_text + + # The recall-audit script is installed alongside EVOLVE.md and is self-contained. + file_assertions.assert_file_exists(codex_audit_script) + assert codex_audit_script.parent == codex_evolve_md.parent + assert "Append a recall-audit row" in codex_audit_script.read_text() + + def test_codex_dry_run_does_not_write_files( + self, temp_project_dir, install_runner, codex_agents_file, codex_evolve_md, codex_audit_script + ): """Dry-run should report actions without writing files.""" result = install_runner.run("install", platform="codex", dry_run=True) assert "DRY RUN" in result.stdout assert not (temp_project_dir / "plugins" / EVOLVE_PLUGIN).exists() assert not (temp_project_dir / ".agents" / "plugins" / "marketplace.json").exists() - assert not (temp_project_dir / ".codex" / "hooks.json").exists() + assert not codex_agents_file.exists() + assert not codex_evolve_md.exists() + assert not codex_audit_script.exists() + + def test_uninstall_removes_pointer_and_files( + self, + temp_project_dir, + install_runner, + file_assertions, + codex_agents_file, + codex_evolve_md, + codex_audit_script, + ): + """Uninstall removes the AGENTS.md pointer line, the EVOLVE.md copy, and the audit script (and the empty dir).""" + install_runner.run("install", platform="codex") + file_assertions.assert_file_exists(codex_evolve_md) + file_assertions.assert_file_exists(codex_audit_script) + assert len(_marker_lines(codex_agents_file.read_text())) == 1 + + install_runner.run("uninstall", platform="codex") + + assert _marker_lines(codex_agents_file.read_text()) == [] + file_assertions.assert_file_not_exists(codex_evolve_md) + file_assertions.assert_file_not_exists(codex_audit_script) + file_assertions.assert_dir_not_exists(codex_evolve_md.parent) def test_status_reports_codex_installation(self, temp_project_dir, install_runner): - """Status should show the Codex installation state.""" + """Status should show the Codex installation state under the new contract.""" install_runner.run("install", platform="codex") result = install_runner.run("status") assert "Codex:" in result.stdout assert "plugins/evolve-lite" in result.stdout assert "marketplace.json entry" in result.stdout - assert ".codex/hooks.json entry" in result.stdout - assert "SessionStart sync hook" in result.stdout + assert "~/.codex/AGENTS.md pointer" in result.stdout + assert "EVOLVE.md" in result.stdout + assert "audit_recall.py" in result.stdout diff --git a/tests/platform_integrations/test_dry_run.py b/tests/platform_integrations/test_dry_run.py index 3c866fad..a7fb0f6e 100644 --- a/tests/platform_integrations/test_dry_run.py +++ b/tests/platform_integrations/test_dry_run.py @@ -27,7 +27,12 @@ def test_all_platforms_dry_run_creates_no_files(self, temp_project_dir, install_ assert not (temp_project_dir / ".codex").exists() def test_bob_dry_run_mentions_expected_operations(self, temp_project_dir, install_runner, platform_integrations_dir): - """Bob dry-run output should name the skills it would copy.""" + """Bob lite dry-run should name the skills it would copy and the always-on instruction wiring. + + Lite no longer merges custom_modes.yaml, copies EVOLVE.md into .bob/, + or injects an AGENTS.md import. It writes the always-on instructions to + Bob's GLOBAL rules dir (~/.bob/rules/00-evolve-lite.md). + """ result = install_runner.run("install", platform="bob", mode="lite", dry_run=True) assert result.returncode == 0 @@ -36,7 +41,14 @@ def test_bob_dry_run_mentions_expected_operations(self, temp_project_dir, instal for skill_dir in skills_src.iterdir(): if skill_dir.is_dir(): assert skill_dir.name in result.stdout, f"Expected skill '{skill_dir.name}' to appear in dry-run output" - assert "custom_modes.yaml" in result.stdout + # New contract: the global rules file is written; no AGENTS.md / EVOLVE.md + # copy / sentinel-block wiring remains. + assert "00-evolve-lite.md" in result.stdout + assert "AGENTS.md" not in result.stdout + assert "inject sentinel block" not in result.stdout + assert "EVOLVE.md" not in result.stdout + # Lite no longer touches custom_modes.yaml. + assert "custom_modes.yaml" not in result.stdout assert not (temp_project_dir / ".bob").exists() def test_codex_dry_run_creates_no_files(self, temp_project_dir, install_runner): diff --git a/tests/platform_integrations/test_idempotency.py b/tests/platform_integrations/test_idempotency.py index 02ebbb83..5b1c8d48 100644 --- a/tests/platform_integrations/test_idempotency.py +++ b/tests/platform_integrations/test_idempotency.py @@ -3,35 +3,59 @@ """ import json -import re import pytest +MANAGED_MARKER = "" + + @pytest.mark.platform_integrations class TestBobIdempotency: """Test that Bob installation is idempotent.""" - def test_multiple_lite_installs(self, temp_project_dir, install_runner, file_assertions): - """Running install twice for Bob lite mode should be safe.""" + def test_multiple_lite_installs(self, temp_project_dir, install_runner, file_assertions, bob_rules_file, bob_audit_script): + """Running install twice for Bob lite mode should be safe. + + Lite writes the always-on instructions to Bob's GLOBAL rules file + ``~/.bob/rules/00-evolve-lite.md`` and the recall-audit script to + ``~/.bob/evolve-lite/audit_recall.py``; a second install must leave + exactly one such file with identical content (no duplication) and must + not create any AGENTS.md or per-project EVOLVE.md copy. + """ # First install install_runner.run("install", platform="bob", mode="lite") - # Capture state after first install bob_dir = temp_project_dir / ".bob" - custom_modes_file = bob_dir / "custom_modes.yaml" - first_content = custom_modes_file.read_text() + file_assertions.assert_file_exists(bob_rules_file) + first_content = bob_rules_file.read_text() + # The rules file holds the full EVOLVE.md text. + assert "self-directed memory" in first_content + # The recall-audit script is installed at its global path, and the rules + # file references that exact path. + file_assertions.assert_file_exists(bob_audit_script) + assert "Append a recall-audit row" in bob_audit_script.read_text() + assert "~/.bob/evolve-lite/audit_recall.py" in first_content # Second install install_runner.run("install", platform="bob", mode="lite") - # Assert: Files are identical - second_content = custom_modes_file.read_text() - assert first_content == second_content, "Content changed after second install" + # Assert: the rules file is identical after the second install. + second_content = bob_rules_file.read_text() + assert first_content == second_content, "rules/00-evolve-lite.md changed after second install" - # Assert: No duplicate sentinel blocks - assert first_content.count("# >>>evolve:evolve-lite<<<") == 1 - assert first_content.count("# <<>>evolve:evolve-lite<<<` in its customInstructions. A naive `if start in - existing` substring check treated that as an existing block, took the replace - branch, found no matching end sentinel, and silently dropped the merge while - still reporting success. The sentinel match must be line-anchored. - """ - bob_dir = temp_project_dir / ".bob" - modes_file = bob_dir / "custom_modes.yaml" - modes_file.parent.mkdir(parents=True, exist_ok=True) - # Reproduce the exact user failure: a 0-indent list (as yaml.safe_dump / - # Bob marketplace tooling writes it) whose quoted text mentions the - # sentinel literal. This trips BOTH the substring false-match and the - # 0-indent-vs-2-indent mismatch. - modes_file.write_text( - "customModes:\n" - "- slug: install-evolve-lite\n" - " name: Install Evolve Lite\n" - ' customInstructions: "Merged between # >>>evolve:evolve-lite<<< sentinel comments."\n' - " groups:\n" - " - read\n" - ) - - install_runner.run("install", platform="bob", mode="lite") - - content = modes_file.read_text() - # The evolve-lite mode was actually merged in (real sentinel block written). - assert "# >>>evolve:evolve-lite<<<" in content - - # All top-level list items share one indentation — a 0-indent/2-indent mix - # would be invalid YAML (the indentation-matching fix). - indents = set(re.findall(r"(?m)^([ \t]*)- slug:", content)) - assert len(indents) == 1, f"mixed custom-mode list indentation: {indents}" - - slugs = re.findall(r"(?m)^[ \t]*- slug:\s*(\S+)", content) - assert "evolve-lite" in slugs, f"evolve-lite mode not merged; slugs={slugs}" - # ...and the pre-existing mode is preserved. - assert "install-evolve-lite" in slugs - def test_install_preserves_user_content_during_legacy_purge(self, temp_project_dir, install_runner, bob_fixtures, file_assertions): """The legacy purge MUST NOT clobber non-evolve user skills/commands.""" bob_dir = temp_project_dir / ".bob" @@ -201,37 +206,47 @@ def test_install_preserves_user_content_during_legacy_purge(self, temp_project_d class TestCodexIdempotency: """Test that Codex installation is idempotent.""" - def test_multiple_installs(self, temp_project_dir, install_runner, file_assertions): - """Running install twice for Codex should be safe.""" + def test_multiple_installs( + self, temp_project_dir, install_runner, file_assertions, codex_agents_file, codex_evolve_md, codex_audit_script + ): + """Running install twice for Codex should be safe. + + Codex now drops a COPY of EVOLVE.md on disk and injects a SINGLE + greppable pointer line (carrying ````) into + the (sandboxed) ~/.codex/AGENTS.md instead of inlining the body. A + second install must not duplicate the marketplace entry or the pointer + line. + """ install_runner.run("install", platform="codex") marketplace_file = temp_project_dir / ".agents" / "plugins" / "marketplace.json" - hooks_file = temp_project_dir / ".codex" / "hooks.json" first_marketplace = json.loads(marketplace_file.read_text()) - first_hooks = json.loads(hooks_file.read_text()) + first_agents = codex_agents_file.read_text() + + # The recall-audit script and the EVOLVE.md copy live together on disk; + # the pointer line in AGENTS.md references the EVOLVE.md path. + file_assertions.assert_file_exists(codex_evolve_md) + file_assertions.assert_file_exists(codex_audit_script) + assert "Append a recall-audit row" in codex_audit_script.read_text() + assert "~/.codex/evolve-lite/EVOLVE.md" in first_agents install_runner.run("install", platform="codex") second_marketplace = json.loads(marketplace_file.read_text()) - second_hooks = json.loads(hooks_file.read_text()) + second_agents = codex_agents_file.read_text() assert first_marketplace == second_marketplace, "marketplace.json changed after second install" - assert first_hooks == second_hooks, ".codex/hooks.json changed after second install" + assert first_agents == second_agents, "~/.codex/AGENTS.md changed after second install" evolve_plugins = [entry for entry in second_marketplace["plugins"] if entry["name"] == "evolve-lite"] assert len(evolve_plugins) == 1, "Duplicate evolve-lite marketplace entries found" - prompt_hooks = second_hooks["hooks"]["UserPromptSubmit"] - evolve_hook_groups = [ - group - for group in prompt_hooks - if any( - "plugins/evolve-lite/skills/evolve-lite/recall/scripts/retrieve_entities.py" in hook.get("command", "") - for hook in group.get("hooks", []) - ) - ] - assert len(evolve_hook_groups) == 1, "Duplicate Evolve UserPromptSubmit hooks found" - assert evolve_hook_groups[0].get("matcher") == "" + # Exactly one managed pointer line in the always-on instructions file. + marker_lines = [ln for ln in second_agents.splitlines() if MANAGED_MARKER in ln] + assert len(marker_lines) == 1, f"Expected exactly one managed line, got {marker_lines!r}" + # The EVOLVE.md copy and audit script are still present after reinstall. + file_assertions.assert_file_exists(codex_evolve_md) + file_assertions.assert_file_exists(codex_audit_script) def test_install_after_partial_uninstall(self, temp_project_dir, install_runner, file_assertions): """Installing after deleting part of the Codex plugin should restore it.""" @@ -249,12 +264,44 @@ def test_install_after_partial_uninstall(self, temp_project_dir, install_runner, file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "learn" / "SKILL.md") file_assertions.assert_file_exists(plugin_dir / "lib" / "evolve-lite" / "entity_io.py") + def test_install_appends_pointer_preserving_user_prose(self, temp_project_dir, install_runner, file_assertions, codex_agents_file): + """Injecting the pointer line must preserve a pre-existing, unrelated AGENTS.md. + + Codex now injects a SINGLE managed pointer line (carrying + ````) via FileOps.inject_marker_line. When + AGENTS.md already has user content but no managed line, the pointer is + APPENDED on its own line — separated from the existing content by a + blank line — and the user's prose is preserved verbatim. Re-running the + install REPLACES that one line in place rather than duplicating it. + """ + # The sandboxed ~/.codex/AGENTS.md, pre-seeded with unrelated user prose. + codex_agents_file.parent.mkdir(parents=True, exist_ok=True) + user_prose = "# My agent instructions\n\nAlways prefer ripgrep over grep, and never edit generated files by hand.\n" + codex_agents_file.write_text(user_prose) + + install_runner.run("install", platform="codex") + + content = codex_agents_file.read_text() + # The user's original prose is preserved verbatim. + assert user_prose.rstrip() in content + # Exactly one managed pointer line was appended, separated by a blank line. + marker_lines = [ln for ln in content.splitlines() if MANAGED_MARKER in ln] + assert len(marker_lines) == 1, f"Expected exactly one managed line, got {marker_lines!r}" + assert content.startswith(user_prose.rstrip() + "\n\n") + + # A second install replaces the line in place — still exactly one. + install_runner.run("install", platform="codex") + content2 = codex_agents_file.read_text() + marker_lines2 = [ln for ln in content2.splitlines() if MANAGED_MARKER in ln] + assert len(marker_lines2) == 1, f"Expected exactly one managed line after reinstall, got {marker_lines2!r}" + assert user_prose.rstrip() in content2 + @pytest.mark.platform_integrations class TestUninstallInstallCycle: """Test that uninstall followed by install works correctly.""" - def test_bob_uninstall_install_cycle(self, temp_project_dir, install_runner, bob_fixtures, file_assertions): + def test_bob_uninstall_install_cycle(self, temp_project_dir, install_runner, bob_fixtures, file_assertions, bob_rules_file): """Uninstalling and reinstalling Bob should work correctly.""" # Create user content bob_fixtures.create_existing_skill(temp_project_dir) @@ -275,28 +322,56 @@ def test_bob_uninstall_install_cycle(self, temp_project_dir, install_runner, bob # Reinstall install_runner.run("install", platform="bob") - # Assert: Evolve content is back + # Assert: Evolve content is back. Lite wires always-on instructions via + # the GLOBAL rules file, not via custom_modes.yaml or any AGENTS.md. file_assertions.assert_all_bob_skills_installed(bob_dir) - file_assertions.assert_sentinel_block_exists(bob_dir / "custom_modes.yaml", "evolve-lite") + file_assertions.assert_file_exists(bob_rules_file) + file_assertions.assert_file_not_exists(temp_project_dir / "AGENTS.md") + file_assertions.assert_file_not_exists(bob_dir / "EVOLVE.md") - # Assert: User content still intact + # Assert: User content still intact — the user's custom_modes.yaml was never + # touched by the lite install, so their mode survives the full cycle. file_assertions.assert_dir_exists(bob_dir / "skills" / "my-custom-skill") custom_modes = (bob_dir / "custom_modes.yaml").read_text() assert "slug: my-mode" in custom_modes - def test_codex_uninstall_install_cycle(self, temp_project_dir, install_runner, codex_fixtures, file_assertions): - """Uninstalling and reinstalling Codex should work correctly.""" + def test_codex_uninstall_install_cycle( + self, + temp_project_dir, + install_runner, + codex_fixtures, + file_assertions, + codex_agents_file, + codex_evolve_md, + codex_audit_script, + ): + """Uninstalling and reinstalling Codex should work correctly. + + Codex now drops a COPY of EVOLVE.md on disk and injects a SINGLE managed + pointer line into the (sandboxed) ~/.codex/AGENTS.md instead of + registering hooks. The user's hooks.json is never touched, so it must + survive the cycle unchanged. + """ custom_plugin = codex_fixtures.create_existing_plugin(temp_project_dir) marketplace_file = codex_fixtures.create_existing_marketplace(temp_project_dir) hooks_file = codex_fixtures.create_existing_hooks(temp_project_dir) plugin_json = custom_plugin / ".codex-plugin" / "plugin.json" original_plugin_content = plugin_json.read_text() + original_hooks_content = hooks_file.read_text() install_runner.run("install", platform="codex") evolve_plugin_dir = temp_project_dir / "plugins" / "evolve-lite" file_assertions.assert_dir_exists(evolve_plugin_dir) + # Install injected exactly one managed pointer line into the always-on instructions. + marker_lines = [ln for ln in codex_agents_file.read_text().splitlines() if MANAGED_MARKER in ln] + assert len(marker_lines) == 1, f"Expected exactly one managed line, got {marker_lines!r}" + # Install dropped the EVOLVE.md copy and the recall-audit script at their global paths. + file_assertions.assert_file_exists(codex_evolve_md) + file_assertions.assert_file_exists(codex_audit_script) + # The user's hooks were left completely untouched. + file_assertions.assert_file_unchanged(hooks_file, original_hooks_content) install_runner.run("uninstall", platform="codex") @@ -304,15 +379,14 @@ def test_codex_uninstall_install_cycle(self, temp_project_dir, install_runner, c current_marketplace = json.loads(marketplace_file.read_text()) assert all(entry["name"] != "evolve-lite" for entry in current_marketplace["plugins"]) - current_hooks = json.loads(hooks_file.read_text()) - prompt_hooks = current_hooks["hooks"].get("UserPromptSubmit", []) - evolve_hooks = [ - hook - for group in prompt_hooks - for hook in group.get("hooks", []) - if "plugins/evolve-lite/skills/evolve-lite/recall/scripts/retrieve_entities.py" in hook.get("command", "") - ] - assert not evolve_hooks, "Evolve hook still present after uninstall" + # The managed pointer line is gone from AGENTS.md after uninstall. + assert [ln for ln in codex_agents_file.read_text().splitlines() if MANAGED_MARKER in ln] == [] + # The EVOLVE.md copy, audit script, and now-empty dir are removed. + file_assertions.assert_file_not_exists(codex_evolve_md) + file_assertions.assert_file_not_exists(codex_audit_script) + file_assertions.assert_dir_not_exists(codex_evolve_md.parent) + # The user's hooks are still untouched. + file_assertions.assert_file_unchanged(hooks_file, original_hooks_content) install_runner.run("install", platform="codex") @@ -323,14 +397,8 @@ def test_codex_uninstall_install_cycle(self, temp_project_dir, install_runner, c assert any(entry["name"] == "my-codex-plugin" for entry in reinstalled_marketplace["plugins"]) assert any(entry["name"] == "evolve-lite" for entry in reinstalled_marketplace["plugins"]) - reinstalled_hooks = json.loads(hooks_file.read_text()) - assert any( - hook.get("command") == "python3 ~/.codex/hooks/custom_prompt_memory.py" - for group in reinstalled_hooks["hooks"]["UserPromptSubmit"] - for hook in group.get("hooks", []) - ) - assert any( - "plugins/evolve-lite/skills/evolve-lite/recall/scripts/retrieve_entities.py" in hook.get("command", "") - for group in reinstalled_hooks["hooks"]["UserPromptSubmit"] - for hook in group.get("hooks", []) - ) + # Reinstall re-injects exactly one managed pointer line and still leaves user hooks alone. + reinstalled_markers = [ln for ln in codex_agents_file.read_text().splitlines() if MANAGED_MARKER in ln] + assert len(reinstalled_markers) == 1, f"Expected exactly one managed line, got {reinstalled_markers!r}" + file_assertions.assert_file_exists(codex_evolve_md) + file_assertions.assert_file_unchanged(hooks_file, original_hooks_content) diff --git a/tests/platform_integrations/test_marker_line.py b/tests/platform_integrations/test_marker_line.py new file mode 100644 index 00000000..11818565 --- /dev/null +++ b/tests/platform_integrations/test_marker_line.py @@ -0,0 +1,116 @@ +""" +Focused unit tests for FileOps.inject_marker_line / remove_marker_line. + +These two generic helpers manage a SINGLE greppable "managed" line in a text +file (the Codex installer uses them to point ~/.codex/AGENTS.md at the on-disk +EVOLVE.md copy; the Claude phase will reuse them). The FileOps class lives +inside the install.sh heredoc, so we extract and exec that Python source into a +throwaway namespace to test the methods in isolation, with no subprocess. +""" + +import re +from pathlib import Path + +import pytest + + +MARKER = "" +LINE = f"Read ~/.codex/evolve-lite/EVOLVE.md and follow it. {MARKER}" + + +@pytest.fixture(scope="module") +def file_ops(): + """Extract the embedded Python from install.sh and return a fresh FileOps().""" + repo_root = Path(__file__).parent.parent.parent + script = (repo_root / "platform-integrations" / "install.sh").read_text() + m = re.search(r"<<'PYEOF'\n(.*)\nPYEOF", script, re.DOTALL) + assert m, "Could not locate the embedded Python heredoc in install.sh" + ns = {} + # Give the module a benign argv so its top-level `sys.argv[1]` read succeeds. + code = "import sys\nsys.argv = ['install.sh', '', 'status']\n" + m.group(1) + # Strip the `if __name__ == '__main__': main()` trailer so exec doesn't run the CLI. + code = code.replace('if __name__ == "__main__":\n main()', "") + exec(compile(code, "install.sh:PYEOF", "exec"), ns) + return ns["FileOps"]() + + +@pytest.mark.platform_integrations +class TestInjectMarkerLine: + def test_creates_file_and_parents_when_missing(self, file_ops, tmp_path): + path = tmp_path / "nested" / "AGENTS.md" + file_ops.inject_marker_line(path, MARKER, LINE) + assert path.read_text() == LINE + "\n" + + def test_appends_with_blank_line_when_content_present(self, file_ops, tmp_path): + path = tmp_path / "AGENTS.md" + path.write_text("# My instructions\n\nPrefer ripgrep.\n") + file_ops.inject_marker_line(path, MARKER, LINE) + text = path.read_text() + # Original content preserved, exactly one managed line, separated by a blank line. + assert text.startswith("# My instructions\n\nPrefer ripgrep.\n\n") + assert text.count(MARKER) == 1 + assert text.endswith(LINE + "\n") + + def test_replaces_existing_managed_line_in_place(self, file_ops, tmp_path): + path = tmp_path / "AGENTS.md" + old_line = f"Stale pointer to /old/path. {MARKER}" + path.write_text(f"# Top\n{old_line}\n# Bottom\n") + file_ops.inject_marker_line(path, MARKER, LINE) + text = path.read_text() + # The whole stale line is replaced; surrounding content untouched. + assert old_line not in text + assert text.count(MARKER) == 1 + assert LINE in text + assert "# Top" in text and "# Bottom" in text + # No line was added or removed (still 3 lines). + assert text.splitlines() == ["# Top", LINE, "# Bottom"] + + def test_idempotent_across_repeats(self, file_ops, tmp_path): + path = tmp_path / "AGENTS.md" + path.write_text("# Existing\n") + file_ops.inject_marker_line(path, MARKER, LINE) + first = path.read_text() + file_ops.inject_marker_line(path, MARKER, LINE) + file_ops.inject_marker_line(path, MARKER, LINE) + assert path.read_text() == first + assert path.read_text().count(MARKER) == 1 + + def test_rejects_line_without_marker(self, file_ops, tmp_path): + path = tmp_path / "AGENTS.md" + with pytest.raises(ValueError): + file_ops.inject_marker_line(path, MARKER, "no marker here") + + +@pytest.mark.platform_integrations +class TestRemoveMarkerLine: + def test_no_op_when_file_missing(self, file_ops, tmp_path): + path = tmp_path / "missing.md" + file_ops.remove_marker_line(path, MARKER) # must not raise + assert not path.exists() + + def test_removes_managed_line_preserving_other_lines(self, file_ops, tmp_path): + path = tmp_path / "AGENTS.md" + path.write_text(f"# Top\n\n{LINE}\n\n# Bottom\n") + file_ops.remove_marker_line(path, MARKER) + text = path.read_text() + assert MARKER not in text + assert "# Top" in text and "# Bottom" in text + # No doubled blank-line gap left where the managed line used to be. + assert "\n\n\n" not in text + + def test_removes_only_marker_lines(self, file_ops, tmp_path): + path = tmp_path / "AGENTS.md" + path.write_text(f"keep me\n{LINE}\nkeep me too\n") + file_ops.remove_marker_line(path, MARKER) + assert path.read_text().splitlines() == ["keep me", "keep me too"] + + def test_inject_then_remove_round_trips(self, file_ops, tmp_path): + path = tmp_path / "AGENTS.md" + original = "# My instructions\n\nPrefer ripgrep.\n" + path.write_text(original) + file_ops.inject_marker_line(path, MARKER, LINE) + file_ops.remove_marker_line(path, MARKER) + text = path.read_text() + assert MARKER not in text + assert "# My instructions" in text and "Prefer ripgrep." in text + assert "\n\n\n" not in text diff --git a/tests/platform_integrations/test_preservation.py b/tests/platform_integrations/test_preservation.py index cbf3092b..4e617207 100644 --- a/tests/platform_integrations/test_preservation.py +++ b/tests/platform_integrations/test_preservation.py @@ -47,25 +47,41 @@ def test_preserves_existing_commands(self, temp_project_dir, install_runner, bob bob_dir = temp_project_dir / ".bob" file_assertions.assert_all_bob_commands_installed(bob_dir) - def test_preserves_existing_custom_modes_yaml(self, temp_project_dir, install_runner, bob_fixtures, file_assertions): - """Install evolve when user has existing custom modes - they must be preserved.""" - # Setup: Create user's custom mode + def test_preserves_existing_custom_modes_and_user_rules( + self, temp_project_dir, install_runner, bob_fixtures, file_assertions, bob_rules_file + ): + """Lite install must leave the user's custom_modes.yaml and unrelated rules untouched. + + Lite no longer merges a mode into custom_modes.yaml, nor injects an + AGENTS.md import. The evolve always-on instructions live in Bob's GLOBAL + rules dir at ~/.bob/rules/00-evolve-lite.md. A pre-existing, unrelated + rules file (e.g. ~/.bob/rules/99-user.md) must be left intact, and no + AGENTS.md must be created. + """ + # Setup: user's custom mode, plus a pre-existing unrelated global rules file. custom_modes_file = bob_fixtures.create_existing_custom_modes(temp_project_dir) + original_modes_content = custom_modes_file.read_text() - # Action: Install evolve + user_rule = bob_rules_file.parent / "99-user.md" + user_rule.parent.mkdir(parents=True, exist_ok=True) + original_rule_content = "# My personal rules\n\nAlways prefer tabs.\n" + user_rule.write_text(original_rule_content) + + # Action: Install evolve (lite is the default mode) install_runner.run("install", platform="bob") - # Assert: User's custom mode is still present - current_content = custom_modes_file.read_text() - assert "slug: my-mode" in current_content, "User's custom mode was removed!" - assert "My Custom Mode" in current_content + # Assert: User's custom_modes.yaml is byte-for-byte unchanged. + file_assertions.assert_file_unchanged(custom_modes_file, original_modes_content) - # Assert: Evolve mode is added with sentinels - file_assertions.assert_sentinel_block_exists(custom_modes_file, "evolve-lite") - assert "slug: evolve-lite" in current_content + # Assert: User's unrelated rules file is byte-for-byte unchanged. + file_assertions.assert_file_unchanged(user_rule, original_rule_content) - # Assert: No duplicate user modes - assert current_content.count("slug: my-mode") == 1 + # Assert: The evolve instructions live in the global rules file, holding + # the full EVOLVE.md text; no AGENTS.md was created. + file_assertions.assert_file_exists(bob_rules_file) + assert "self-directed memory" in bob_rules_file.read_text() + file_assertions.assert_file_not_exists(temp_project_dir / "AGENTS.md") + file_assertions.assert_file_not_exists((temp_project_dir / ".bob") / "AGENTS.md") def test_preserves_existing_mcp_servers(self, temp_project_dir, install_runner, bob_fixtures, file_assertions): """Install evolve full mode when user has existing MCP servers - they must be preserved.""" @@ -111,7 +127,7 @@ def test_refreshes_managed_evolve_mcp_server_fields_and_preserves_custom_fields( assert evolve_server["env"] == {"EVOLVE_PROFILE": "local"} assert evolve_server["metadata"] == {"managedBy": "user"} - def test_preserves_all_bob_content_together_lite(self, temp_project_dir, install_runner, bob_fixtures, file_assertions): + def test_preserves_all_bob_content_together_lite(self, temp_project_dir, install_runner, bob_fixtures, file_assertions, bob_rules_file): """Install evolve lite mode when user has all types of Bob content - all must be preserved.""" # Setup: Create all types of user content custom_skill = bob_fixtures.create_existing_skill(temp_project_dir) @@ -129,12 +145,19 @@ def test_preserves_all_bob_content_together_lite(self, temp_project_dir, install file_assertions.assert_file_unchanged(custom_skill / "SKILL.md", skill_content) file_assertions.assert_file_unchanged(custom_command, command_content) + # User's custom_modes.yaml is untouched by lite (it no longer merges modes). assert "slug: my-mode" in custom_modes.read_text() - # Assert: Evolve lite content is added + # Assert: Evolve lite content is added. Skills/commands/lib are copied, and the + # always-on instructions are wired via the GLOBAL rules file (not custom_modes.yaml). bob_dir = temp_project_dir / ".bob" file_assertions.assert_all_bob_skills_installed(bob_dir) - file_assertions.assert_sentinel_block_exists(custom_modes, "evolve-lite") + file_assertions.assert_all_bob_commands_installed(bob_dir) + file_assertions.assert_dir_exists(bob_dir / "lib" / "evolve-lite") + file_assertions.assert_file_exists(bob_rules_file) + # No AGENTS.md or per-project EVOLVE.md copy is created. + file_assertions.assert_file_not_exists(temp_project_dir / "AGENTS.md") + file_assertions.assert_file_not_exists(bob_dir / "EVOLVE.md") def test_preserves_all_bob_content_together_full(self, temp_project_dir, install_runner, bob_fixtures, file_assertions): """Install evolve full mode when user has all types of Bob content - all must be preserved.""" @@ -188,28 +211,44 @@ def test_preserves_existing_marketplace_entries(self, temp_project_dir, install_ evolve_plugins = [entry for entry in current_data["plugins"] if entry["name"] == "evolve-lite"] assert len(evolve_plugins) == 1, "Evolve plugin entry missing from marketplace.json" - def test_preserves_existing_hooks_and_plugin_files(self, temp_project_dir, install_runner, codex_fixtures, file_assertions): - """Install evolve when user already has hooks and plugins - they must be preserved.""" + def test_preserves_existing_hooks_and_plugin_files( + self, temp_project_dir, install_runner, codex_fixtures, file_assertions, codex_agents_file + ): + """Install evolve when user already has hooks and plugins - they must be preserved. + + Codex no longer registers any hooks; it drops a COPY of EVOLVE.md on disk + and injects a SINGLE managed pointer line into the (sandboxed) + ~/.codex/AGENTS.md. So the user's hooks.json must be left COMPLETELY + UNCHANGED (no Evolve sync/recall hook added), and the pointer line must + appear in AGENTS.md instead. + """ custom_plugin = codex_fixtures.create_existing_plugin(temp_project_dir) plugin_json = custom_plugin / ".codex-plugin" / "plugin.json" original_plugin_content = plugin_json.read_text() hooks_file = codex_fixtures.create_existing_hooks(temp_project_dir) + original_hooks_content = hooks_file.read_text() install_runner.run("install", platform="codex") + # The user's plugin.json is untouched. file_assertions.assert_file_unchanged(plugin_json, original_plugin_content) + # The user's hooks.json is byte-for-byte unchanged: no Evolve hook is added. + file_assertions.assert_file_unchanged(hooks_file, original_hooks_content) + current_hooks = json.loads(hooks_file.read_text()) + # SessionStart count stays at the user's original (1) — no sync hook added. session_start_hooks = current_hooks["hooks"]["SessionStart"] - assert len(session_start_hooks) == 2, "Expected the user's SessionStart hook plus the Evolve sync hook." + assert len(session_start_hooks) == 1, "Codex install must not add a SessionStart hook anymore." assert any( any(hook.get("command") == "python3 ~/.codex/hooks/session_start.py" for hook in group.get("hooks", [])) for group in session_start_hooks ), "User's SessionStart hook was removed!" - assert any( - any("plugins/evolve-lite/skills/evolve-lite/sync/scripts/sync.py" in hook.get("command", "") for hook in group.get("hooks", [])) + assert all( + "plugins/evolve-lite/skills/evolve-lite/sync/scripts/sync.py" not in hook.get("command", "") for group in session_start_hooks - ), "Evolve SessionStart hook was not added!" + for hook in group.get("hooks", []) + ), "Codex install must no longer add an Evolve SessionStart hook." prompt_hooks = current_hooks["hooks"]["UserPromptSubmit"] custom_prompt_hooks = [ @@ -219,17 +258,17 @@ def test_preserves_existing_hooks_and_plugin_files(self, temp_project_dir, insta if hook.get("command") == "python3 ~/.codex/hooks/custom_prompt_memory.py" ] assert len(custom_prompt_hooks) == 1, "User's UserPromptSubmit hook was removed!" - - evolve_hooks = [ - group + assert all( + "plugins/evolve-lite/skills/evolve-lite/recall/scripts/retrieve_entities.py" not in hook.get("command", "") for group in prompt_hooks - if any( - "plugins/evolve-lite/skills/evolve-lite/recall/scripts/retrieve_entities.py" in hook.get("command", "") - for hook in group.get("hooks", []) - ) - ] - assert len(evolve_hooks) == 1, "Evolve UserPromptSubmit hook was not added!" - assert evolve_hooks[0].get("matcher") == "" + for hook in group.get("hooks", []) + ), "Codex install must no longer add an Evolve UserPromptSubmit hook." + + # The evolve always-on instructions now live behind a single managed + # pointer line in ~/.codex/AGENTS.md (sandboxed). + MANAGED_MARKER = "" + marker_lines = [ln for ln in codex_agents_file.read_text().splitlines() if MANAGED_MARKER in ln] + assert len(marker_lines) == 1, f"Expected exactly one managed line, got {marker_lines!r}" @pytest.mark.platform_integrations From 5ca1c2aeaaefe1691c4c725db3216ab6b66adf34 Mon Sep 17 00:00:00 2001 From: Punleuk Oum <5661986+illeatmyhat@users.noreply.github.com> Date: Mon, 8 Jun 2026 11:12:03 -0700 Subject: [PATCH 02/12] feat(platform-integrations): native-memory adapter + thin Claude EVOLVE.md via CLAUDE.md @import Claude leans on its native self-directed memory; evolve adds only sharing + provenance as a thin EVOLVE.md, delivered through a single CLAUDE.md @import pointer (the import reference doubles as its own sentinel). - entity_io: accept arbitrary entity types (sanitized), not just guideline/preference, so Claude's native types pass straight through - new Claude-only adapt-memory skill: mirrors a just-saved native memory into .evolve/entities// with an agent-synthesized trigger - EVOLVE.md.j2: thin Claude variant (native owns recall/save; evolve adds mirror-on-save + audit-on-recall); bob/codex EVOLVE.md unchanged - ClaudeInstaller: copy EVOLVE.md to /.evolve/, inject single @.evolve/EVOLVE.md line into /CLAUDE.md, copy audit_recall.py to ~/.claude/evolve-lite/; warn about the one-time external-import approval Co-Authored-By: Claude Opus 4.8 (1M context) --- .../commands/evolve-lite-adapt-memory.md | 4 + .../evolve-lite/lib/evolve-lite/entity_io.py | 22 ++- .../skills/evolve-lite-adapt-memory/SKILL.md | 13 ++ .../scripts/adapt_memory.py | 147 ++++++++++++++++++ .../claude/plugins/evolve-lite/EVOLVE.md | 78 ++-------- .../evolve-lite/lib/evolve-lite/entity_io.py | 22 ++- .../skills/evolve-lite/adapt-memory/SKILL.md | 59 +++++++ .../adapt-memory/scripts/adapt_memory.py | 147 ++++++++++++++++++ .../evolve-lite/lib/evolve-lite/entity_io.py | 22 ++- .../skills/evolve-lite/adapt-memory/SKILL.md | 13 ++ .../adapt-memory/scripts/adapt_memory.py | 147 ++++++++++++++++++ .../evolve-lite/lib/evolve-lite/entity_io.py | 22 ++- .../skills/evolve-lite/adapt-memory/SKILL.md | 13 ++ .../adapt-memory/scripts/adapt_memory.py | 147 ++++++++++++++++++ platform-integrations/install.sh | 76 +++++++++ plugin-source/EVOLVE.md.j2 | 32 ++++ plugin-source/lib/entity_io.py | 22 ++- .../evolve-lite/adapt-memory/SKILL.md.j2 | 65 ++++++++ .../adapt-memory/scripts/adapt_memory.py | 147 ++++++++++++++++++ tests/platform_integrations/conftest.py | 29 ++++ tests/platform_integrations/test_claude.py | 117 +++++++++++++- .../test_entity_io_core.py | 18 ++- .../test_plugin_structure.py | 1 + 23 files changed, 1271 insertions(+), 92 deletions(-) create mode 100644 platform-integrations/bob/evolve-lite/commands/evolve-lite-adapt-memory.md create mode 100644 platform-integrations/bob/evolve-lite/skills/evolve-lite-adapt-memory/SKILL.md create mode 100644 platform-integrations/bob/evolve-lite/skills/evolve-lite-adapt-memory/scripts/adapt_memory.py create mode 100644 platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md create mode 100644 platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py create mode 100644 platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md create mode 100644 platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py create mode 100644 platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md create mode 100644 platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py create mode 100644 plugin-source/skills/evolve-lite/adapt-memory/SKILL.md.j2 create mode 100644 plugin-source/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py diff --git a/platform-integrations/bob/evolve-lite/commands/evolve-lite-adapt-memory.md b/platform-integrations/bob/evolve-lite/commands/evolve-lite-adapt-memory.md new file mode 100644 index 00000000..9d04f3cc --- /dev/null +++ b/platform-integrations/bob/evolve-lite/commands/evolve-lite-adapt-memory.md @@ -0,0 +1,4 @@ +--- +description: Mirror a just-saved native memory into the shared evolve store so it becomes shareable and auditable +--- +Use the `evolve-lite-adapt-memory` skill on the current conversation. Follow the skill's instructions exactly. diff --git a/platform-integrations/bob/evolve-lite/lib/evolve-lite/entity_io.py b/platform-integrations/bob/evolve-lite/lib/evolve-lite/entity_io.py index 63f77e2c..9b177718 100644 --- a/platform-integrations/bob/evolve-lite/lib/evolve-lite/entity_io.py +++ b/platform-integrations/bob/evolve-lite/lib/evolve-lite/entity_io.py @@ -118,6 +118,21 @@ def slugify(text, max_length=60): return text or "entity" +def sanitize_type(text): + """Sanitize an entity *type* into a filesystem-safe subdirectory name. + + Like :func:`slugify` but without truncation — a type is a short label, + not free-form content, and truncating it could silently merge distinct + types. Returns an empty string for input that contains no usable + characters, leaving the fallback decision to the caller. + """ + if not isinstance(text, str): + return "" + text = text.lower() + text = re.sub(r"[^a-z0-9]+", "-", text) + return text.strip("-") + + def unique_filename(directory, slug): """Return a Path that doesn't collide with existing files in *directory*. @@ -348,10 +363,9 @@ def write_entity_file(directory, entity): Returns: Path to the written file. """ - _ALLOWED_TYPES = {"guideline", "preference"} - entity_type = entity.get("type", "guideline") - if not isinstance(entity_type, str) or entity_type not in _ALLOWED_TYPES: - entity_type = "guideline" + # Any non-empty type is accepted and used (sanitized) as the + # subdirectory. An empty/invalid type falls back to "guideline". + entity_type = sanitize_type(entity.get("type", "guideline")) or "guideline" entity["type"] = entity_type type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-adapt-memory/SKILL.md b/platform-integrations/bob/evolve-lite/skills/evolve-lite-adapt-memory/SKILL.md new file mode 100644 index 00000000..97b9d4a9 --- /dev/null +++ b/platform-integrations/bob/evolve-lite/skills/evolve-lite-adapt-memory/SKILL.md @@ -0,0 +1,13 @@ +--- +name: evolve-lite:adapt-memory +description: Mirror a just-saved native memory into the shared evolve store so it becomes shareable and auditable +--- + +# Adapt Memory + +This skill mirrors a just-saved native memory into the shared evolve store. It +is specific to hosts with native self-directed memory and is a no-op on this +platform — there is no native memory store to mirror from. Use the +`evolve-lite:learn` +skill to capture reusable lessons here. + diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-adapt-memory/scripts/adapt_memory.py b/platform-integrations/bob/evolve-lite/skills/evolve-lite-adapt-memory/scripts/adapt_memory.py new file mode 100644 index 00000000..fb90d93c --- /dev/null +++ b/platform-integrations/bob/evolve-lite/skills/evolve-lite-adapt-memory/scripts/adapt_memory.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +""" +Adapt Memory Script (Claude-only) + +Mirrors a single native Claude memory file into the in-repo evolve store at +``${EVOLVE_DIR:-.evolve}/entities//.md`` so the memory becomes +shareable and auditable like every other evolve entity. + +Native memory files live under ``~/.claude/projects//memory/`` and carry +frontmatter of the form:: + + --- + name: + description: + metadata: + type: user | feedback | project | reference + --- + + + +The agent passes the native ``--type`` through verbatim (native types map +straight onto the entity type — no remapping) and supplies a synthesized +``--trigger`` (the single most important field for future retrieval). The body +of the native file becomes the entity content; the native ``description`` is +carried into the body as a lead line when present. + +Usage: + python3 adapt_memory.py --type --trigger +""" + +import argparse +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from entity_io import ( # noqa: E402 + find_entities_dir, + get_default_entities_dir, + write_entity_file, + log as _log, +) + + +def log(message): + _log("adapt-memory", message) + + +def parse_native_memory(text): + """Split a native memory file into (description, body). + + Native frontmatter is simple ``key: value`` lines plus a nested + ``metadata:`` block; we only need ``description`` and the body, so we + parse the top-level ``description:`` line and treat everything after the + closing ``---`` as the body. Missing frontmatter is tolerated — the whole + text is then the body. + """ + description = None + body = text + if text.startswith("---"): + parts = text.split("---", 2) + if len(parts) >= 3: + frontmatter, body = parts[1], parts[2] + for line in frontmatter.splitlines(): + # Only top-level keys (no leading indentation) — keeps the + # nested metadata.* keys out of the description match. + if line[:1].isspace(): + continue + key, _, value = line.partition(":") + if key.strip() == "description": + value = value.strip() + if value: + description = value + break + return description, body.strip() + + +def main(): + parser = argparse.ArgumentParser(description="Mirror a native memory into the evolve store.") + parser.add_argument("memory_path", help="Path to the just-saved native memory file.") + parser.add_argument( + "--type", + required=True, + help="Native memory type, passed through as the entity type (e.g. user, feedback, project, reference).", + ) + parser.add_argument( + "--trigger", + required=True, + help="Synthesized one-sentence 'when to recall this' description.", + ) + args = parser.parse_args() + + memory_path = Path(args.memory_path).expanduser() + if not memory_path.is_file(): + print(f"Error: native memory file not found: {memory_path}", file=sys.stderr) + sys.exit(1) + + try: + text = memory_path.read_text(encoding="utf-8") + except OSError as exc: + print(f"Error: cannot read {memory_path} - {exc}", file=sys.stderr) + sys.exit(1) + + description, body = parse_native_memory(text) + if not body: + print(f"Error: native memory {memory_path} has no body to mirror.", file=sys.stderr) + sys.exit(1) + + # Carry the native description into the body as a lead line when it isn't + # already echoed there, so the mirrored entity is self-describing. + content = body + if description and description not in body: + content = f"{description}\n\n{body}" + + entity = { + "type": args.type, + "trigger": args.trigger, + "content": content, + "source": "native-memory", + } + + entities_dir = find_entities_dir() + if entities_dir: + entities_dir = entities_dir.resolve() + log(f"Using existing entities dir: {entities_dir}") + else: + entities_dir = get_default_entities_dir() + log(f"Created entities dir: {entities_dir}") + + path = write_entity_file(entities_dir, entity) + log(f"Mirrored {memory_path} -> {path}") + print(f"Mirrored native memory into evolve store: {path}") + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md b/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md index f8eefc6b..1df5f074 100644 --- a/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md +++ b/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md @@ -1,27 +1,21 @@ -# Evolve — self-directed memory +# Evolve — shared, auditable memory -You have a persistent, file-based memory for the current project, stored under -`./.evolve/memory/` (relative to the workspace/project root). You decide, on -your own judgment, when something is worth remembering — nothing forces a save, -and there is no step to "complete." Curate this memory like notes you'll thank -yourself for later: small, accurate, high-signal. +You already have native, self-directed memory: you decide what to recall at the +start of a task and what to save at the end, on your own judgment. Evolve adds +only the two things native memory lacks — sharing and provenance — as two +lightweight lifecycle steps. It does not change how you recall or save. -## Recall — at the start of a non-trivial task +## After you save a memory -Before substantive work (code changes, debugging, repo exploration, or -environment/tooling investigation), read your memory index at -`./.evolve/memory/MEMORY.md` if it exists. It holds one line per memory with a -short description. Open the individual memory files whose description looks -relevant to the task at hand, and let them inform what you do. If the index is -missing or nothing looks relevant, just proceed — that's normal. - -Memories reflect what was true when written. If a memory names a file, -function, command, or flag, verify it still exists before relying on it. +When you save a native memory this turn, invoke /evolve-lite:adapt-memory +to mirror it into the shared evolve store (`./.evolve/entities/`) so the fact is +shareable with the team and auditable like every other evolve entity. Do this +once per memory you saved. ## Record what you consulted -After recall, log which entries you actually opened, so the value of this memory -can be measured over time. Run: +After you read or consult memories this turn, log which ones you actually opened, +so the value of this memory can be measured over time. Run: ``` python3 ~/.claude/evolve-lite/audit_recall.py [ ...] @@ -32,51 +26,3 @@ project root). Skip this step entirely if you consulted no memories. If the command prints a line beginning `evolve-session:`, include that line once, verbatim, somewhere in your reply — it lets later analysis tie this session to what you recalled. - -## Save — only when you learn something durable - -Near the end of a task, if it produced a reusable fact that isn't already -obvious from the code or git history — and only then — write it to memory. -Saving nothing is the right outcome more often than not; never force a -low-value memory just to have saved one. - -Each memory is one file holding one fact, under `./.evolve/memory/` (create the -directory if it doesn't exist), with frontmatter: - -```markdown ---- -name: -description: -metadata: - type: user | feedback | project | reference ---- - - -``` - -Types: -- **user** — who the user is: role, expertise, durable preferences. -- **feedback** — guidance on how you should work, both corrections and - confirmed approaches; always include the why. -- **project** — ongoing work, goals, or constraints not derivable from the code - or git history; convert relative dates ("next week") to absolute ones. -- **reference** — pointers to external resources (URLs, dashboards, tickets). - -In the body, link related memories with `[[name]]`, where `name` is another -memory's `name:` slug. Link liberally; a `[[name]]` with no file yet marks -something worth writing later, not an error. - -After writing the file, add a one-line pointer to `./.evolve/memory/MEMORY.md`: -`- [Title](file.md) — short hook`. MEMORY.md is the index you read during -recall — one line per memory, no frontmatter, never put memory content there. - -## When NOT to save, and housekeeping - -- Don't duplicate what the repo already records: code structure, git history, - READMEs, existing docs. If asked to remember one of those, ask what was - non-obvious about it and save that instead. -- Don't save what only matters to the current conversation. -- Before saving, check for an existing memory that already covers it — update - that file rather than creating a duplicate. -- Delete memories that turn out to be wrong. diff --git a/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/entity_io.py b/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/entity_io.py index 63f77e2c..9b177718 100644 --- a/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/entity_io.py +++ b/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/entity_io.py @@ -118,6 +118,21 @@ def slugify(text, max_length=60): return text or "entity" +def sanitize_type(text): + """Sanitize an entity *type* into a filesystem-safe subdirectory name. + + Like :func:`slugify` but without truncation — a type is a short label, + not free-form content, and truncating it could silently merge distinct + types. Returns an empty string for input that contains no usable + characters, leaving the fallback decision to the caller. + """ + if not isinstance(text, str): + return "" + text = text.lower() + text = re.sub(r"[^a-z0-9]+", "-", text) + return text.strip("-") + + def unique_filename(directory, slug): """Return a Path that doesn't collide with existing files in *directory*. @@ -348,10 +363,9 @@ def write_entity_file(directory, entity): Returns: Path to the written file. """ - _ALLOWED_TYPES = {"guideline", "preference"} - entity_type = entity.get("type", "guideline") - if not isinstance(entity_type, str) or entity_type not in _ALLOWED_TYPES: - entity_type = "guideline" + # Any non-empty type is accepted and used (sanitized) as the + # subdirectory. An empty/invalid type falls back to "guideline". + entity_type = sanitize_type(entity.get("type", "guideline")) or "guideline" entity["type"] = entity_type type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md new file mode 100644 index 00000000..6bd4ee26 --- /dev/null +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md @@ -0,0 +1,59 @@ +--- +name: adapt-memory +description: Mirror a just-saved native memory into the shared evolve store so it becomes shareable and auditable +context: fork +--- + +# Adapt Memory + +## Overview + +You have native, self-directed memory: you decide what to remember and write it +under `~/.claude/projects//memory/`. That store has no provenance or +sharing. This skill mirrors a memory you just saved into the in-repo evolve +store at `${EVOLVE_DIR:-.evolve}/entities//.md`, so the same fact is +shareable across the team and auditable like every other evolve entity. + +Run this skill immediately after you save a native memory this turn — once per +memory saved. + +## Required Action + +For each native memory file you saved this turn: + +1. **Read the just-saved memory file** so you are mirroring its real content, + not a guess. Note its `metadata.type` (one of `user`, `feedback`, `project`, + `reference`) — this passes straight through as the entity type, with no + remapping. + +2. **Compose a high-quality `trigger`.** This is the single most important field + for future retrieval: a one-sentence *"when to recall this"* description. + Base it on what the memory actually says and the situations in which a future + agent would benefit from it — do **not** mechanically copy the memory's + `description`. Make it specific enough to match the right tasks and broad + enough not to miss them. + +3. **Run the adapter script**, passing the native file path, its type, and your + synthesized trigger: + +```bash +python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py \ + \ + --type \ + --trigger "" +``` + +The script parses the native frontmatter and body, builds the entity +(`type` = native type, `trigger` = your synthesized trigger, `content` = the +native body with its `description` carried in as a lead line), and persists it +via the shared entity writer. It is safe to run repeatedly. + +## Notes + +- One invocation per saved memory. If you saved several memories this turn, + invoke the script once for each, with a trigger tailored to each. +- The trigger quality directly determines whether the memory resurfaces when it + matters. Spend a moment on it. +- If you saved no native memory this turn, there is nothing to mirror — skip + this skill. + diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py new file mode 100644 index 00000000..fb90d93c --- /dev/null +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +""" +Adapt Memory Script (Claude-only) + +Mirrors a single native Claude memory file into the in-repo evolve store at +``${EVOLVE_DIR:-.evolve}/entities//.md`` so the memory becomes +shareable and auditable like every other evolve entity. + +Native memory files live under ``~/.claude/projects//memory/`` and carry +frontmatter of the form:: + + --- + name: + description: + metadata: + type: user | feedback | project | reference + --- + + + +The agent passes the native ``--type`` through verbatim (native types map +straight onto the entity type — no remapping) and supplies a synthesized +``--trigger`` (the single most important field for future retrieval). The body +of the native file becomes the entity content; the native ``description`` is +carried into the body as a lead line when present. + +Usage: + python3 adapt_memory.py --type --trigger +""" + +import argparse +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from entity_io import ( # noqa: E402 + find_entities_dir, + get_default_entities_dir, + write_entity_file, + log as _log, +) + + +def log(message): + _log("adapt-memory", message) + + +def parse_native_memory(text): + """Split a native memory file into (description, body). + + Native frontmatter is simple ``key: value`` lines plus a nested + ``metadata:`` block; we only need ``description`` and the body, so we + parse the top-level ``description:`` line and treat everything after the + closing ``---`` as the body. Missing frontmatter is tolerated — the whole + text is then the body. + """ + description = None + body = text + if text.startswith("---"): + parts = text.split("---", 2) + if len(parts) >= 3: + frontmatter, body = parts[1], parts[2] + for line in frontmatter.splitlines(): + # Only top-level keys (no leading indentation) — keeps the + # nested metadata.* keys out of the description match. + if line[:1].isspace(): + continue + key, _, value = line.partition(":") + if key.strip() == "description": + value = value.strip() + if value: + description = value + break + return description, body.strip() + + +def main(): + parser = argparse.ArgumentParser(description="Mirror a native memory into the evolve store.") + parser.add_argument("memory_path", help="Path to the just-saved native memory file.") + parser.add_argument( + "--type", + required=True, + help="Native memory type, passed through as the entity type (e.g. user, feedback, project, reference).", + ) + parser.add_argument( + "--trigger", + required=True, + help="Synthesized one-sentence 'when to recall this' description.", + ) + args = parser.parse_args() + + memory_path = Path(args.memory_path).expanduser() + if not memory_path.is_file(): + print(f"Error: native memory file not found: {memory_path}", file=sys.stderr) + sys.exit(1) + + try: + text = memory_path.read_text(encoding="utf-8") + except OSError as exc: + print(f"Error: cannot read {memory_path} - {exc}", file=sys.stderr) + sys.exit(1) + + description, body = parse_native_memory(text) + if not body: + print(f"Error: native memory {memory_path} has no body to mirror.", file=sys.stderr) + sys.exit(1) + + # Carry the native description into the body as a lead line when it isn't + # already echoed there, so the mirrored entity is self-describing. + content = body + if description and description not in body: + content = f"{description}\n\n{body}" + + entity = { + "type": args.type, + "trigger": args.trigger, + "content": content, + "source": "native-memory", + } + + entities_dir = find_entities_dir() + if entities_dir: + entities_dir = entities_dir.resolve() + log(f"Using existing entities dir: {entities_dir}") + else: + entities_dir = get_default_entities_dir() + log(f"Created entities dir: {entities_dir}") + + path = write_entity_file(entities_dir, entity) + log(f"Mirrored {memory_path} -> {path}") + print(f"Mirrored native memory into evolve store: {path}") + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/entity_io.py b/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/entity_io.py index 63f77e2c..9b177718 100644 --- a/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/entity_io.py +++ b/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/entity_io.py @@ -118,6 +118,21 @@ def slugify(text, max_length=60): return text or "entity" +def sanitize_type(text): + """Sanitize an entity *type* into a filesystem-safe subdirectory name. + + Like :func:`slugify` but without truncation — a type is a short label, + not free-form content, and truncating it could silently merge distinct + types. Returns an empty string for input that contains no usable + characters, leaving the fallback decision to the caller. + """ + if not isinstance(text, str): + return "" + text = text.lower() + text = re.sub(r"[^a-z0-9]+", "-", text) + return text.strip("-") + + def unique_filename(directory, slug): """Return a Path that doesn't collide with existing files in *directory*. @@ -348,10 +363,9 @@ def write_entity_file(directory, entity): Returns: Path to the written file. """ - _ALLOWED_TYPES = {"guideline", "preference"} - entity_type = entity.get("type", "guideline") - if not isinstance(entity_type, str) or entity_type not in _ALLOWED_TYPES: - entity_type = "guideline" + # Any non-empty type is accepted and used (sanitized) as the + # subdirectory. An empty/invalid type falls back to "guideline". + entity_type = sanitize_type(entity.get("type", "guideline")) or "guideline" entity["type"] = entity_type type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md new file mode 100644 index 00000000..ccc0b831 --- /dev/null +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md @@ -0,0 +1,13 @@ +--- +name: adapt-memory +description: Mirror a just-saved native memory into the shared evolve store so it becomes shareable and auditable +--- + +# Adapt Memory + +This skill mirrors a just-saved native memory into the shared evolve store. It +is specific to hosts with native self-directed memory and is a no-op on this +platform — there is no native memory store to mirror from. Use the +/evolve-lite:learn +skill to capture reusable lessons here. + diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py new file mode 100644 index 00000000..fb90d93c --- /dev/null +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +""" +Adapt Memory Script (Claude-only) + +Mirrors a single native Claude memory file into the in-repo evolve store at +``${EVOLVE_DIR:-.evolve}/entities//.md`` so the memory becomes +shareable and auditable like every other evolve entity. + +Native memory files live under ``~/.claude/projects//memory/`` and carry +frontmatter of the form:: + + --- + name: + description: + metadata: + type: user | feedback | project | reference + --- + + + +The agent passes the native ``--type`` through verbatim (native types map +straight onto the entity type — no remapping) and supplies a synthesized +``--trigger`` (the single most important field for future retrieval). The body +of the native file becomes the entity content; the native ``description`` is +carried into the body as a lead line when present. + +Usage: + python3 adapt_memory.py --type --trigger +""" + +import argparse +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from entity_io import ( # noqa: E402 + find_entities_dir, + get_default_entities_dir, + write_entity_file, + log as _log, +) + + +def log(message): + _log("adapt-memory", message) + + +def parse_native_memory(text): + """Split a native memory file into (description, body). + + Native frontmatter is simple ``key: value`` lines plus a nested + ``metadata:`` block; we only need ``description`` and the body, so we + parse the top-level ``description:`` line and treat everything after the + closing ``---`` as the body. Missing frontmatter is tolerated — the whole + text is then the body. + """ + description = None + body = text + if text.startswith("---"): + parts = text.split("---", 2) + if len(parts) >= 3: + frontmatter, body = parts[1], parts[2] + for line in frontmatter.splitlines(): + # Only top-level keys (no leading indentation) — keeps the + # nested metadata.* keys out of the description match. + if line[:1].isspace(): + continue + key, _, value = line.partition(":") + if key.strip() == "description": + value = value.strip() + if value: + description = value + break + return description, body.strip() + + +def main(): + parser = argparse.ArgumentParser(description="Mirror a native memory into the evolve store.") + parser.add_argument("memory_path", help="Path to the just-saved native memory file.") + parser.add_argument( + "--type", + required=True, + help="Native memory type, passed through as the entity type (e.g. user, feedback, project, reference).", + ) + parser.add_argument( + "--trigger", + required=True, + help="Synthesized one-sentence 'when to recall this' description.", + ) + args = parser.parse_args() + + memory_path = Path(args.memory_path).expanduser() + if not memory_path.is_file(): + print(f"Error: native memory file not found: {memory_path}", file=sys.stderr) + sys.exit(1) + + try: + text = memory_path.read_text(encoding="utf-8") + except OSError as exc: + print(f"Error: cannot read {memory_path} - {exc}", file=sys.stderr) + sys.exit(1) + + description, body = parse_native_memory(text) + if not body: + print(f"Error: native memory {memory_path} has no body to mirror.", file=sys.stderr) + sys.exit(1) + + # Carry the native description into the body as a lead line when it isn't + # already echoed there, so the mirrored entity is self-describing. + content = body + if description and description not in body: + content = f"{description}\n\n{body}" + + entity = { + "type": args.type, + "trigger": args.trigger, + "content": content, + "source": "native-memory", + } + + entities_dir = find_entities_dir() + if entities_dir: + entities_dir = entities_dir.resolve() + log(f"Using existing entities dir: {entities_dir}") + else: + entities_dir = get_default_entities_dir() + log(f"Created entities dir: {entities_dir}") + + path = write_entity_file(entities_dir, entity) + log(f"Mirrored {memory_path} -> {path}") + print(f"Mirrored native memory into evolve store: {path}") + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/entity_io.py b/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/entity_io.py index 63f77e2c..9b177718 100644 --- a/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/entity_io.py +++ b/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/entity_io.py @@ -118,6 +118,21 @@ def slugify(text, max_length=60): return text or "entity" +def sanitize_type(text): + """Sanitize an entity *type* into a filesystem-safe subdirectory name. + + Like :func:`slugify` but without truncation — a type is a short label, + not free-form content, and truncating it could silently merge distinct + types. Returns an empty string for input that contains no usable + characters, leaving the fallback decision to the caller. + """ + if not isinstance(text, str): + return "" + text = text.lower() + text = re.sub(r"[^a-z0-9]+", "-", text) + return text.strip("-") + + def unique_filename(directory, slug): """Return a Path that doesn't collide with existing files in *directory*. @@ -348,10 +363,9 @@ def write_entity_file(directory, entity): Returns: Path to the written file. """ - _ALLOWED_TYPES = {"guideline", "preference"} - entity_type = entity.get("type", "guideline") - if not isinstance(entity_type, str) or entity_type not in _ALLOWED_TYPES: - entity_type = "guideline" + # Any non-empty type is accepted and used (sanitized) as the + # subdirectory. An empty/invalid type falls back to "guideline". + entity_type = sanitize_type(entity.get("type", "guideline")) or "guideline" entity["type"] = entity_type type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md new file mode 100644 index 00000000..ccc0b831 --- /dev/null +++ b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md @@ -0,0 +1,13 @@ +--- +name: adapt-memory +description: Mirror a just-saved native memory into the shared evolve store so it becomes shareable and auditable +--- + +# Adapt Memory + +This skill mirrors a just-saved native memory into the shared evolve store. It +is specific to hosts with native self-directed memory and is a no-op on this +platform — there is no native memory store to mirror from. Use the +/evolve-lite:learn +skill to capture reusable lessons here. + diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py new file mode 100644 index 00000000..fb90d93c --- /dev/null +++ b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +""" +Adapt Memory Script (Claude-only) + +Mirrors a single native Claude memory file into the in-repo evolve store at +``${EVOLVE_DIR:-.evolve}/entities//.md`` so the memory becomes +shareable and auditable like every other evolve entity. + +Native memory files live under ``~/.claude/projects//memory/`` and carry +frontmatter of the form:: + + --- + name: + description: + metadata: + type: user | feedback | project | reference + --- + + + +The agent passes the native ``--type`` through verbatim (native types map +straight onto the entity type — no remapping) and supplies a synthesized +``--trigger`` (the single most important field for future retrieval). The body +of the native file becomes the entity content; the native ``description`` is +carried into the body as a lead line when present. + +Usage: + python3 adapt_memory.py --type --trigger +""" + +import argparse +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from entity_io import ( # noqa: E402 + find_entities_dir, + get_default_entities_dir, + write_entity_file, + log as _log, +) + + +def log(message): + _log("adapt-memory", message) + + +def parse_native_memory(text): + """Split a native memory file into (description, body). + + Native frontmatter is simple ``key: value`` lines plus a nested + ``metadata:`` block; we only need ``description`` and the body, so we + parse the top-level ``description:`` line and treat everything after the + closing ``---`` as the body. Missing frontmatter is tolerated — the whole + text is then the body. + """ + description = None + body = text + if text.startswith("---"): + parts = text.split("---", 2) + if len(parts) >= 3: + frontmatter, body = parts[1], parts[2] + for line in frontmatter.splitlines(): + # Only top-level keys (no leading indentation) — keeps the + # nested metadata.* keys out of the description match. + if line[:1].isspace(): + continue + key, _, value = line.partition(":") + if key.strip() == "description": + value = value.strip() + if value: + description = value + break + return description, body.strip() + + +def main(): + parser = argparse.ArgumentParser(description="Mirror a native memory into the evolve store.") + parser.add_argument("memory_path", help="Path to the just-saved native memory file.") + parser.add_argument( + "--type", + required=True, + help="Native memory type, passed through as the entity type (e.g. user, feedback, project, reference).", + ) + parser.add_argument( + "--trigger", + required=True, + help="Synthesized one-sentence 'when to recall this' description.", + ) + args = parser.parse_args() + + memory_path = Path(args.memory_path).expanduser() + if not memory_path.is_file(): + print(f"Error: native memory file not found: {memory_path}", file=sys.stderr) + sys.exit(1) + + try: + text = memory_path.read_text(encoding="utf-8") + except OSError as exc: + print(f"Error: cannot read {memory_path} - {exc}", file=sys.stderr) + sys.exit(1) + + description, body = parse_native_memory(text) + if not body: + print(f"Error: native memory {memory_path} has no body to mirror.", file=sys.stderr) + sys.exit(1) + + # Carry the native description into the body as a lead line when it isn't + # already echoed there, so the mirrored entity is self-describing. + content = body + if description and description not in body: + content = f"{description}\n\n{body}" + + entity = { + "type": args.type, + "trigger": args.trigger, + "content": content, + "source": "native-memory", + } + + entities_dir = find_entities_dir() + if entities_dir: + entities_dir = entities_dir.resolve() + log(f"Using existing entities dir: {entities_dir}") + else: + entities_dir = get_default_entities_dir() + log(f"Created entities dir: {entities_dir}") + + path = write_entity_file(entities_dir, entity) + log(f"Mirrored {memory_path} -> {path}") + print(f"Mirrored native memory into evolve store: {path}") + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/install.sh b/platform-integrations/install.sh index b527266d..69f3b29f 100755 --- a/platform-integrations/install.sh +++ b/platform-integrations/install.sh @@ -146,6 +146,17 @@ def _codex_pointer_line(): ) +# Claude installs via marketplace (`claude plugin install`), which copies +# nothing to the repo and does NOT auto-load an ambient EVOLVE.md. So we drop a +# COPY of the thin EVOLVE.md at /.evolve/EVOLVE.md and inject a single +# native CLAUDE.md `@`-import line pointing at it. The path is repo-relative +# (resolves from CLAUDE.md's directory, i.e. repo root). The line is its own +# uninstall handle (the marker is a substring of the line) — no HTML comment. +CLAUDE_EVOLVE_MD_REL = ".evolve/EVOLVE.md" +CLAUDE_IMPORT_MARKER = CLAUDE_EVOLVE_MD_REL +CLAUDE_IMPORT_LINE = "@" + CLAUDE_EVOLVE_MD_REL + + # ── Colour helpers ──────────────────────────────────────────────────────────── IS_TTY = sys.stdout.isatty() def _c(code, text): return f"\033[{code}m{text}\033[0m" if IS_TTY else text @@ -896,9 +907,64 @@ class ClaudeInstaller: def __init__(self, ops: FileOps): self.ops = ops + def _deliver_files(self, target_dir): + """Per-repo file delivery (independent of the `claude` CLI). + + Claude installs the plugin via marketplace, which copies nothing to the + repo and does NOT auto-load an ambient EVOLVE.md. So we deliver the thin + EVOLVE.md ourselves: drop a COPY at /.evolve/EVOLVE.md and inject a + single native `@`-import pointer line into /CLAUDE.md, exactly as + CodexInstaller injects its pointer into ~/.codex/AGENTS.md. Kept as a + separate method so it is exercisable in tests without the real CLI. + """ + _ensure_source_dir() + source_dir = SOURCE_DIR + plugin_source = Path(source_dir) / "platform-integrations" / "claude" / "plugins" / CLAUDE_PLUGIN + + # Drop a COPY of the thin EVOLVE.md at /.evolve/EVOLVE.md. Prefer + # the rendered claude plugin copy; fall back to the shared original. + evolve_src = plugin_source / "EVOLVE.md" + if not evolve_src.is_file(): + evolve_src = Path(source_dir) / "plugin-source" / "EVOLVE.md" + evolve_text = "" if self.ops.is_dry_run and not evolve_src.is_file() else evolve_src.read_text() + evolve_dst = Path(target_dir) / CLAUDE_EVOLVE_MD_REL + self.ops.atomic_write_text(evolve_dst, evolve_text) + success(f"Copied EVOLVE.md → {evolve_dst}") + + # Inject the single native `@`-import pointer line into /CLAUDE.md. + # The path resolves relative to CLAUDE.md (repo root). The line is its + # own uninstall handle (marker is a substring of the line). + claude_md = Path(target_dir) / "CLAUDE.md" + self.ops.inject_marker_line(claude_md, CLAUDE_IMPORT_MARKER, CLAUDE_IMPORT_LINE) + success(f"Injected '{CLAUDE_PLUGIN}' import pointer into {claude_md}") + if self.ops.is_dry_run: + dryrun("Claude shows a one-time 'allow external imports' dialog on first session") + else: + warn( + "On the first Claude session in this repo, an 'allow external " + "imports' dialog will appear — you must Allow it, or the " + f"{CLAUDE_IMPORT_LINE} import is silently disabled." + ) + + # Recall-audit script: the thin EVOLVE.md instructs running + # `~/.claude/evolve-lite/audit_recall.py`, so install it at that GLOBAL + # absolute path (mirroring CodexInstaller). Prefer the rendered claude + # copy; fall back to the shared plugin-source original. + audit_src = plugin_source / "scripts" / AUDIT_SCRIPT + if not audit_src.is_file(): + audit_src = Path(source_dir) / "plugin-source" / "scripts" / AUDIT_SCRIPT + audit_text = "" if self.ops.is_dry_run and not audit_src.is_file() else audit_src.read_text() + audit_file = Path.home() / ".claude" / "evolve-lite" / AUDIT_SCRIPT + self.ops.atomic_write_text(audit_file, audit_text) + success(f"Installed recall-audit script → {audit_file}") + def install(self, target_dir): info("Installing Claude plugin via marketplace") + # Deliver the per-repo EVOLVE.md + import pointer + global audit script + # regardless of whether the `claude` CLI is present below. + self._deliver_files(target_dir) + marketplace_dir = Path(SOURCE_DIR).resolve() if SOURCE_DIR else None has_local_marketplace = marketplace_dir is not None and (marketplace_dir / ".claude-plugin" / "marketplace.json").is_file() marketplace_source = str(marketplace_dir) if has_local_marketplace else EVOLVE_REPO @@ -938,6 +1004,16 @@ class ClaudeInstaller: def uninstall(self, target_dir): info("Uninstalling Claude plugin") + + # Drop the single managed `@`-import pointer line from /CLAUDE.md, + # remove the per-repo EVOLVE.md copy we placed (NOT the whole .evolve/ + # store), and remove the global recall-audit script (mirrors Codex). + self.ops.remove_marker_line(Path(target_dir) / "CLAUDE.md", CLAUDE_IMPORT_MARKER) + self.ops.remove_file(Path(target_dir) / CLAUDE_EVOLVE_MD_REL) + claude_evolve_dir = Path.home() / ".claude" / "evolve-lite" + self.ops.remove_file(claude_evolve_dir / AUDIT_SCRIPT) + self.ops.remove_dir_if_empty(claude_evolve_dir) + claude = shutil.which("claude") if not claude: warn("Could not uninstall Claude plugin automatically.") diff --git a/plugin-source/EVOLVE.md.j2 b/plugin-source/EVOLVE.md.j2 index a6be8ea1..4c2aa5f5 100644 --- a/plugin-source/EVOLVE.md.j2 +++ b/plugin-source/EVOLVE.md.j2 @@ -1,3 +1,34 @@ +{%- from "_macros.j2" import skill_ref with context -%} +{% if platform == "claude" -%} +# Evolve — shared, auditable memory + +You already have native, self-directed memory: you decide what to recall at the +start of a task and what to save at the end, on your own judgment. Evolve adds +only the two things native memory lacks — sharing and provenance — as two +lightweight lifecycle steps. It does not change how you recall or save. + +## After you save a memory + +When you save a native memory this turn, invoke {{ skill_ref('adapt-memory') }} +to mirror it into the shared evolve store (`./.evolve/entities/`) so the fact is +shareable with the team and auditable like every other evolve entity. Do this +once per memory you saved. + +## Record what you consulted + +After you read or consult memories this turn, log which ones you actually opened, +so the value of this memory can be measured over time. Run: + +``` +python3 {{ audit_script }} [ ...] +``` + +Pass the memory files you read this turn (space-separated paths, relative to the +project root). Skip this step entirely if you consulted no memories. If the +command prints a line beginning `evolve-session:`, include that line once, +verbatim, somewhere in your reply — it lets later analysis tie this session to +what you recalled. +{%- else -%} # Evolve — self-directed memory You have a persistent, file-based memory for the current project, stored under @@ -80,3 +111,4 @@ recall — one line per memory, no frontmatter, never put memory content there. - Before saving, check for an existing memory that already covers it — update that file rather than creating a duplicate. - Delete memories that turn out to be wrong. +{%- endif %} diff --git a/plugin-source/lib/entity_io.py b/plugin-source/lib/entity_io.py index 63f77e2c..9b177718 100644 --- a/plugin-source/lib/entity_io.py +++ b/plugin-source/lib/entity_io.py @@ -118,6 +118,21 @@ def slugify(text, max_length=60): return text or "entity" +def sanitize_type(text): + """Sanitize an entity *type* into a filesystem-safe subdirectory name. + + Like :func:`slugify` but without truncation — a type is a short label, + not free-form content, and truncating it could silently merge distinct + types. Returns an empty string for input that contains no usable + characters, leaving the fallback decision to the caller. + """ + if not isinstance(text, str): + return "" + text = text.lower() + text = re.sub(r"[^a-z0-9]+", "-", text) + return text.strip("-") + + def unique_filename(directory, slug): """Return a Path that doesn't collide with existing files in *directory*. @@ -348,10 +363,9 @@ def write_entity_file(directory, entity): Returns: Path to the written file. """ - _ALLOWED_TYPES = {"guideline", "preference"} - entity_type = entity.get("type", "guideline") - if not isinstance(entity_type, str) or entity_type not in _ALLOWED_TYPES: - entity_type = "guideline" + # Any non-empty type is accepted and used (sanitized) as the + # subdirectory. An empty/invalid type falls back to "guideline". + entity_type = sanitize_type(entity.get("type", "guideline")) or "guideline" entity["type"] = entity_type type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) diff --git a/plugin-source/skills/evolve-lite/adapt-memory/SKILL.md.j2 b/plugin-source/skills/evolve-lite/adapt-memory/SKILL.md.j2 new file mode 100644 index 00000000..39456f9d --- /dev/null +++ b/plugin-source/skills/evolve-lite/adapt-memory/SKILL.md.j2 @@ -0,0 +1,65 @@ +{%- from "_macros.j2" import invoke with context -%} +--- +name: {% if platform == "bob" %}evolve-lite:{% endif %}adapt-memory +description: Mirror a just-saved native memory into the shared evolve store so it becomes shareable and auditable +{% if platform == "claude" -%} +context: fork +{% endif -%} +--- + +# Adapt Memory +{% if platform == "claude" %} +## Overview + +You have native, self-directed memory: you decide what to remember and write it +under `~/.claude/projects//memory/`. That store has no provenance or +sharing. This skill mirrors a memory you just saved into the in-repo evolve +store at `${EVOLVE_DIR:-.evolve}/entities//.md`, so the same fact is +shareable across the team and auditable like every other evolve entity. + +Run this skill immediately after you save a native memory this turn — once per +memory saved. + +## Required Action + +For each native memory file you saved this turn: + +1. **Read the just-saved memory file** so you are mirroring its real content, + not a guess. Note its `metadata.type` (one of `user`, `feedback`, `project`, + `reference`) — this passes straight through as the entity type, with no + remapping. + +2. **Compose a high-quality `trigger`.** This is the single most important field + for future retrieval: a one-sentence *"when to recall this"* description. + Base it on what the memory actually says and the situations in which a future + agent would benefit from it — do **not** mechanically copy the memory's + `description`. Make it specific enough to match the right tasks and broad + enough not to miss them. + +3. **Run the adapter script**, passing the native file path, its type, and your + synthesized trigger: + +```bash +{{ invoke("adapt-memory", "adapt_memory.py", ["", "--type ", "--trigger \"\""]) }} +``` + +The script parses the native frontmatter and body, builds the entity +(`type` = native type, `trigger` = your synthesized trigger, `content` = the +native body with its `description` carried in as a lead line), and persists it +via the shared entity writer. It is safe to run repeatedly. + +## Notes + +- One invocation per saved memory. If you saved several memories this turn, + invoke the script once for each, with a trigger tailored to each. +- The trigger quality directly determines whether the memory resurfaces when it + matters. Spend a moment on it. +- If you saved no native memory this turn, there is nothing to mirror — skip + this skill. +{% else %} +This skill mirrors a just-saved native memory into the shared evolve store. It +is specific to hosts with native self-directed memory and is a no-op on this +platform — there is no native memory store to mirror from. Use the +{% if platform == "bob" %}`evolve-lite:learn`{% else %}/evolve-lite:learn{% endif %} +skill to capture reusable lessons here. +{% endif %} diff --git a/plugin-source/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py b/plugin-source/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py new file mode 100644 index 00000000..fb90d93c --- /dev/null +++ b/plugin-source/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +""" +Adapt Memory Script (Claude-only) + +Mirrors a single native Claude memory file into the in-repo evolve store at +``${EVOLVE_DIR:-.evolve}/entities//.md`` so the memory becomes +shareable and auditable like every other evolve entity. + +Native memory files live under ``~/.claude/projects//memory/`` and carry +frontmatter of the form:: + + --- + name: + description: + metadata: + type: user | feedback | project | reference + --- + + + +The agent passes the native ``--type`` through verbatim (native types map +straight onto the entity type — no remapping) and supplies a synthesized +``--trigger`` (the single most important field for future retrieval). The body +of the native file becomes the entity content; the native ``description`` is +carried into the body as a lead line when present. + +Usage: + python3 adapt_memory.py --type --trigger +""" + +import argparse +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from entity_io import ( # noqa: E402 + find_entities_dir, + get_default_entities_dir, + write_entity_file, + log as _log, +) + + +def log(message): + _log("adapt-memory", message) + + +def parse_native_memory(text): + """Split a native memory file into (description, body). + + Native frontmatter is simple ``key: value`` lines plus a nested + ``metadata:`` block; we only need ``description`` and the body, so we + parse the top-level ``description:`` line and treat everything after the + closing ``---`` as the body. Missing frontmatter is tolerated — the whole + text is then the body. + """ + description = None + body = text + if text.startswith("---"): + parts = text.split("---", 2) + if len(parts) >= 3: + frontmatter, body = parts[1], parts[2] + for line in frontmatter.splitlines(): + # Only top-level keys (no leading indentation) — keeps the + # nested metadata.* keys out of the description match. + if line[:1].isspace(): + continue + key, _, value = line.partition(":") + if key.strip() == "description": + value = value.strip() + if value: + description = value + break + return description, body.strip() + + +def main(): + parser = argparse.ArgumentParser(description="Mirror a native memory into the evolve store.") + parser.add_argument("memory_path", help="Path to the just-saved native memory file.") + parser.add_argument( + "--type", + required=True, + help="Native memory type, passed through as the entity type (e.g. user, feedback, project, reference).", + ) + parser.add_argument( + "--trigger", + required=True, + help="Synthesized one-sentence 'when to recall this' description.", + ) + args = parser.parse_args() + + memory_path = Path(args.memory_path).expanduser() + if not memory_path.is_file(): + print(f"Error: native memory file not found: {memory_path}", file=sys.stderr) + sys.exit(1) + + try: + text = memory_path.read_text(encoding="utf-8") + except OSError as exc: + print(f"Error: cannot read {memory_path} - {exc}", file=sys.stderr) + sys.exit(1) + + description, body = parse_native_memory(text) + if not body: + print(f"Error: native memory {memory_path} has no body to mirror.", file=sys.stderr) + sys.exit(1) + + # Carry the native description into the body as a lead line when it isn't + # already echoed there, so the mirrored entity is self-describing. + content = body + if description and description not in body: + content = f"{description}\n\n{body}" + + entity = { + "type": args.type, + "trigger": args.trigger, + "content": content, + "source": "native-memory", + } + + entities_dir = find_entities_dir() + if entities_dir: + entities_dir = entities_dir.resolve() + log(f"Using existing entities dir: {entities_dir}") + else: + entities_dir = get_default_entities_dir() + log(f"Created entities dir: {entities_dir}") + + path = write_entity_file(entities_dir, entity) + log(f"Mirrored {memory_path} -> {path}") + print(f"Mirrored native memory into evolve store: {path}") + + +if __name__ == "__main__": + main() diff --git a/tests/platform_integrations/conftest.py b/tests/platform_integrations/conftest.py index e04fcc7c..ac95dcac 100644 --- a/tests/platform_integrations/conftest.py +++ b/tests/platform_integrations/conftest.py @@ -89,6 +89,35 @@ def codex_audit_script(sandbox_home): return sandbox_home / ".codex" / "evolve-lite" / "audit_recall.py" +@pytest.fixture +def claude_md_file(temp_project_dir): + """Path to the PER-REPO CLAUDE.md the Claude installer injects into. + + Claude installs the plugin via marketplace (copies nothing to disk) and does + NOT auto-load an ambient EVOLVE.md, so the installer injects a single native + ``@.evolve/EVOLVE.md`` import pointer line into the repo's CLAUDE.md.""" + return temp_project_dir / "CLAUDE.md" + + +@pytest.fixture +def claude_evolve_md(temp_project_dir): + """Path to the PER-REPO COPY of the thin EVOLVE.md (/.evolve/EVOLVE.md). + + The CLAUDE.md ``@``-import points here (path resolves relative to CLAUDE.md, + i.e. the repo root).""" + return temp_project_dir / ".evolve" / "EVOLVE.md" + + +@pytest.fixture +def claude_audit_script(sandbox_home): + """Path to the sandboxed Claude GLOBAL recall-audit script. + + The thin EVOLVE.md instructs running + ``~/.claude/evolve-lite/audit_recall.py`` after recall, so the installer + drops the script once at that global absolute path.""" + return sandbox_home / ".claude" / "evolve-lite" / "audit_recall.py" + + @pytest.fixture def temp_project_dir(tmp_path): """ diff --git a/tests/platform_integrations/test_claude.py b/tests/platform_integrations/test_claude.py index db253838..c125924e 100644 --- a/tests/platform_integrations/test_claude.py +++ b/tests/platform_integrations/test_claude.py @@ -1,9 +1,20 @@ """ Tests for the Claude platform integration installer behavior. -Claude install delegates entirely to the claude CLI via the marketplace workflow. -These tests control PATH to simulate the CLI being absent, which lets us verify -fallback output without needing the actual CLI installed. +Claude installs the plugin via marketplace (``claude plugin install``), which +delegates to the claude CLI and copies nothing to the repo. Separately — and +INDEPENDENTLY of whether the CLI is present — the installer performs a per-repo +file delivery so the thin EVOLVE.md actually reaches Claude's context every +session: + * a COPY of the thin EVOLVE.md at the PER-REPO path ``/.evolve/EVOLVE.md``, + * a SINGLE native ``@``-import pointer line (``@.evolve/EVOLVE.md``) injected + into the PER-REPO ``/CLAUDE.md`` (the line is its own uninstall handle), + * the self-contained recall-audit script at the GLOBAL (sandboxed) path + ``~/.claude/evolve-lite/audit_recall.py`` referenced by that EVOLVE.md. + +Some tests control PATH to simulate the CLI being absent, which lets us verify +the marketplace fallback output without needing the actual CLI installed; the +file delivery still runs in that case. """ import pytest @@ -12,6 +23,18 @@ # PATH that contains no claude binary — forces the "CLI not found" fallback path. _NO_CLAUDE_PATH = "/usr/bin:/bin" +# The single native CLAUDE.md import pointer line (its own uninstall handle). +IMPORT_LINE = "@.evolve/EVOLVE.md" +# A distinctive sentence from the thin EVOLVE.md body that must live in the copy. +EVOLVE_BODY_SENTENCE = "You already have native, self-directed memory" +# A distinctive string from the recall-audit script. +AUDIT_SCRIPT_SENTENCE = "Append a recall-audit row" + + +def _import_lines(text): + """Return the lines in `text` that carry the managed @-import marker.""" + return [ln for ln in text.splitlines() if IMPORT_LINE in ln] + @pytest.mark.platform_integrations class TestClaudeInstall: @@ -29,3 +52,91 @@ def test_cli_absent_exits_success(self, temp_project_dir, install_runner): result = install_runner.run("install", platform="claude", env={"PATH": _NO_CLAUDE_PATH}) assert result.returncode == 0 + + +@pytest.mark.platform_integrations +@pytest.mark.e2e +class TestClaudeFileDelivery: + """Test the per-repo EVOLVE.md import-pointer delivery (independent of the CLI).""" + + def test_install_delivers_pointer_evolve_md_and_audit_script( + self, + temp_project_dir, + install_runner, + file_assertions, + claude_md_file, + claude_evolve_md, + claude_audit_script, + ): + """Install injects one @-import line into CLAUDE.md, copies the thin EVOLVE.md, and installs the global audit script.""" + install_runner.run("install", platform="claude") + + # A SINGLE native @-import pointer line is injected into /CLAUDE.md. + file_assertions.assert_file_exists(claude_md_file) + import_lines = _import_lines(claude_md_file.read_text()) + assert len(import_lines) == 1, f"Expected exactly one import line, got {import_lines!r}" + assert import_lines[0].strip() == IMPORT_LINE + + # A COPY of the thin EVOLVE.md is dropped at /.evolve/EVOLVE.md. + file_assertions.assert_file_exists(claude_evolve_md) + assert EVOLVE_BODY_SENTENCE in claude_evolve_md.read_text() + + # The recall-audit script is installed at the GLOBAL sandboxed path. + file_assertions.assert_file_exists(claude_audit_script) + assert AUDIT_SCRIPT_SENTENCE in claude_audit_script.read_text() + + def test_install_is_idempotent_no_duplicate_pointer(self, temp_project_dir, install_runner, claude_md_file): + """Running install twice must not duplicate the @-import line in CLAUDE.md.""" + install_runner.run("install", platform="claude") + install_runner.run("install", platform="claude") + + import_lines = _import_lines(claude_md_file.read_text()) + assert len(import_lines) == 1, f"Expected exactly one import line after two installs, got {import_lines!r}" + + def test_install_preserves_existing_claude_md_content(self, temp_project_dir, install_runner, claude_md_file): + """Injecting the import line must not clobber pre-existing CLAUDE.md content.""" + claude_md_file.write_text("# Project rules\n\nExisting guidance line.\n") + install_runner.run("install", platform="claude") + + text = claude_md_file.read_text() + assert "Existing guidance line." in text + assert len(_import_lines(text)) == 1 + + def test_claude_dry_run_does_not_write_files( + self, + temp_project_dir, + install_runner, + claude_md_file, + claude_evolve_md, + claude_audit_script, + ): + """Dry-run should report actions without writing any files.""" + result = install_runner.run("install", platform="claude", dry_run=True) + + assert "DRY RUN" in result.stdout + assert not claude_md_file.exists() + assert not claude_evolve_md.exists() + assert not claude_audit_script.exists() + + def test_uninstall_removes_pointer_and_evolve_md_and_audit( + self, + temp_project_dir, + install_runner, + file_assertions, + claude_md_file, + claude_evolve_md, + claude_audit_script, + ): + """Uninstall removes the @-import line, the per-repo EVOLVE.md copy, and the global audit script.""" + install_runner.run("install", platform="claude") + file_assertions.assert_file_exists(claude_evolve_md) + file_assertions.assert_file_exists(claude_audit_script) + assert len(_import_lines(claude_md_file.read_text())) == 1 + + install_runner.run("uninstall", platform="claude") + + # No @-import reference remains in CLAUDE.md. + assert IMPORT_LINE not in claude_md_file.read_text() + # The placed per-repo EVOLVE.md and the global audit script are gone. + file_assertions.assert_file_not_exists(claude_evolve_md) + file_assertions.assert_file_not_exists(claude_audit_script) diff --git a/tests/platform_integrations/test_entity_io_core.py b/tests/platform_integrations/test_entity_io_core.py index 30a68db3..29586878 100644 --- a/tests/platform_integrations/test_entity_io_core.py +++ b/tests/platform_integrations/test_entity_io_core.py @@ -120,10 +120,22 @@ def test_preference_type_goes_in_preference_dir(self, tmp_path): path = entity_io.write_entity_file(tmp_path, entity) assert path.parent == tmp_path / "preference" - def test_invalid_type_defaults_to_guideline(self, tmp_path): - entity = {"type": "badtype", "content": "Some content."} + def test_arbitrary_type_goes_in_its_own_dir(self, tmp_path): + entity = {"type": "feedback", "content": "Some content."} path = entity_io.write_entity_file(tmp_path, entity) - assert path.parent == tmp_path / "guideline" + assert path.parent == tmp_path / "feedback" + + def test_type_is_sanitized_for_filesystem_safety(self, tmp_path): + entity = {"type": "User Preference!", "content": "Some content."} + path = entity_io.write_entity_file(tmp_path, entity) + assert path.parent == tmp_path / "user-preference" + assert entity["type"] == "user-preference" + + def test_empty_or_invalid_type_defaults_to_guideline(self, tmp_path): + for bad_type in ("", " ", "!!!"): + entity = {"type": bad_type, "content": "Some content."} + path = entity_io.write_entity_file(tmp_path, entity) + assert path.parent == tmp_path / "guideline" def test_written_file_is_readable(self, tmp_path): entity = {"type": "guideline", "content": "Write clear commit messages."} diff --git a/tests/platform_integrations/test_plugin_structure.py b/tests/platform_integrations/test_plugin_structure.py index 062d7816..4a996bb6 100644 --- a/tests/platform_integrations/test_plugin_structure.py +++ b/tests/platform_integrations/test_plugin_structure.py @@ -71,6 +71,7 @@ class TestSkillScripts: "skills/evolve-lite/recall/scripts/retrieve_entities.py", "skills/evolve-lite/learn/scripts/save_entities.py", "skills/evolve-lite/provenance/scripts/log_influence.py", + "skills/evolve-lite/adapt-memory/scripts/adapt_memory.py", ], ) def test_script_exists(self, script_rel): From 0dab81dfea7592f5ebd5ce011cf59823b350cd85 Mon Sep 17 00:00:00 2001 From: Punleuk Oum <5661986+illeatmyhat@users.noreply.github.com> Date: Mon, 8 Jun 2026 11:18:43 -0700 Subject: [PATCH 03/12] feat(platform-integrations): detect silently-disabled Claude EVOLVE.md import Claude's CLAUDE.md @import needs a one-time per-project approval; a declined (or previously-declined) approval silently disables the import with no error, making evolve a no-op on Claude. Reading Claude's internal approval flag is unreliable (undocumented ~/.claude.json key), so detect via a canary: - thin Claude EVOLVE.md carries a unique canary token that expands into the session transcript only when the import actually loads - new import-independent doctor skill greps the latest project transcript for the canary and reports OK / IMPORT_DISABLED (with claude project purge remediation) / NOT_INSTALLED / STALE_EVOLVE_MD / UNKNOWN - doctor extracts the token from the installed EVOLVE.md (no double-hardcode) Co-Authored-By: Claude Opus 4.8 (1M context) --- .../commands/evolve-lite-doctor.md | 4 + .../skills/evolve-lite-doctor/SKILL.md | 12 ++ .../evolve-lite-doctor/scripts/doctor.py | 188 ++++++++++++++++++ .../claude/plugins/evolve-lite/EVOLVE.md | 1 + .../skills/evolve-lite/doctor/SKILL.md | 43 ++++ .../evolve-lite/doctor/scripts/doctor.py | 188 ++++++++++++++++++ .../skills/evolve-lite/doctor/SKILL.md | 12 ++ .../evolve-lite/doctor/scripts/doctor.py | 188 ++++++++++++++++++ .../skills/evolve-lite/doctor/SKILL.md | 12 ++ .../evolve-lite/doctor/scripts/doctor.py | 188 ++++++++++++++++++ plugin-source/EVOLVE.md.j2 | 1 + .../skills/evolve-lite/doctor/SKILL.md.j2 | 51 +++++ .../evolve-lite/doctor/scripts/doctor.py | 188 ++++++++++++++++++ tests/platform_integrations/test_doctor.py | 144 ++++++++++++++ .../test_plugin_structure.py | 1 + 15 files changed, 1221 insertions(+) create mode 100644 platform-integrations/bob/evolve-lite/commands/evolve-lite-doctor.md create mode 100644 platform-integrations/bob/evolve-lite/skills/evolve-lite-doctor/SKILL.md create mode 100644 platform-integrations/bob/evolve-lite/skills/evolve-lite-doctor/scripts/doctor.py create mode 100644 platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md create mode 100644 platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py create mode 100644 platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md create mode 100644 platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py create mode 100644 platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md create mode 100644 platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py create mode 100644 plugin-source/skills/evolve-lite/doctor/SKILL.md.j2 create mode 100644 plugin-source/skills/evolve-lite/doctor/scripts/doctor.py create mode 100644 tests/platform_integrations/test_doctor.py diff --git a/platform-integrations/bob/evolve-lite/commands/evolve-lite-doctor.md b/platform-integrations/bob/evolve-lite/commands/evolve-lite-doctor.md new file mode 100644 index 00000000..2320c2ba --- /dev/null +++ b/platform-integrations/bob/evolve-lite/commands/evolve-lite-doctor.md @@ -0,0 +1,4 @@ +--- +description: Diagnose evolve health on Claude — verify the CLAUDE.md @import is actually loading the thin EVOLVE.md into sessions +--- +Use the `evolve-lite-doctor` skill on the current conversation. Follow the skill's instructions exactly. diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-doctor/SKILL.md b/platform-integrations/bob/evolve-lite/skills/evolve-lite-doctor/SKILL.md new file mode 100644 index 00000000..4a29034e --- /dev/null +++ b/platform-integrations/bob/evolve-lite/skills/evolve-lite-doctor/SKILL.md @@ -0,0 +1,12 @@ +--- +name: evolve-lite:doctor +description: Diagnose evolve health on Claude — verify the CLAUDE.md @import is actually loading the thin EVOLVE.md into sessions +--- + +# Doctor + +This skill diagnoses Claude's `@import` delivery of evolve's thin EVOLVE.md. It +is specific to Claude (where evolve loads via a per-project import that can be +silently declined) and is a **no-op on this platform** — here EVOLVE.md is +always-on and there is no import-approval gate to check. Nothing to run. + diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-doctor/scripts/doctor.py b/platform-integrations/bob/evolve-lite/skills/evolve-lite-doctor/scripts/doctor.py new file mode 100644 index 00000000..2c2a5382 --- /dev/null +++ b/platform-integrations/bob/evolve-lite/skills/evolve-lite-doctor/scripts/doctor.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +""" +Doctor Script (Claude-only diagnostic) + +On Claude, evolve is delivered by a single ``@.evolve/EVOLVE.md`` import line in +the repo's ``./CLAUDE.md``. That import requires a one-time, per-project +"allow external imports" approval. If the user declines it (even once, in a past +session) Claude silently disables the import forever — the thin EVOLVE.md never +loads and evolve becomes a no-op with NO error. + +Claude's internal approval flag is undocumented and unreliable to read, so this +script detects delivery *empirically*: the installed thin EVOLVE.md carries a +canary token (``EVOLVE_IMPORT_CANARY_``). When the import loads, that token +expands into the session transcript. The doctor extracts the token from the +installed copy (never hardcoding it twice) and greps the most recent Claude +project transcripts for it. + +Status codes (printed verbatim, always exit 0 — this is a diagnostic): + + OK — canary found in a recent transcript; import is loading. + IMPORT_DISABLED — import line present in CLAUDE.md but canary absent from + every recent transcript; the user likely declined the + external-import approval. + NOT_INSTALLED — the import line is missing from CLAUDE.md, or the installed + .evolve/EVOLVE.md is missing; run the installer. + STALE_EVOLVE_MD — installed EVOLVE.md has no canary; it predates this build, + re-run the installer. + UNKNOWN — no recent Claude transcripts for this project yet. + +Usage: + python3 doctor.py +""" + +import os +import re +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple plugins +# can coexist side by side. The doctor only needs the shared `log` helper, but +# resolving the lib the same way the other scripts do keeps the convention +# uniform (and only works in the rendered tree, same constraint as adapt_memory). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from entity_io import log as _log # noqa: E402 + + +def log(message): + _log("doctor", message) + + +# The line the installer injects into the repo's CLAUDE.md (see install.sh +# CLAUDE_IMPORT_LINE). Matching on this substring is the install-sanity check. +CLAUDE_IMPORT_LINE = "@.evolve/EVOLVE.md" + +# Pattern used to lift the canary token out of the installed EVOLVE.md so the +# exact token lives in exactly one place (the template), never duplicated here. +_CANARY_RE = re.compile(r"EVOLVE_IMPORT_CANARY_\S+") + +# How many of the most-recent transcripts to scan for the canary. +_RECENT_N = 3 + + +def _evolve_dir(root): + """Resolve the .evolve root: $EVOLVE_DIR if set, else /.evolve.""" + env_dir = os.environ.get("EVOLVE_DIR") + if env_dir: + return Path(env_dir) + return root / ".evolve" + + +def _transcript_slug(root): + """Claude derives a project's transcript dir name by replacing every + non-alphanumeric character in the absolute project path with ``-``. + + e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen + """ + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def _recent_transcripts(home, root, limit=_RECENT_N): + """The most recent N ``*.jsonl`` transcripts for this project, by mtime.""" + slug = _transcript_slug(root) + proj_dir = home / ".claude" / "projects" / slug + if not proj_dir.is_dir(): + return [] + jsonl = [p for p in proj_dir.glob("*.jsonl") if p.is_file()] + jsonl.sort(key=lambda p: p.stat().st_mtime, reverse=True) + return jsonl[:limit] + + +def _canary_in_transcripts(transcripts, token): + """True if `token` appears anywhere in any of the given transcript files.""" + for path in transcripts: + try: + text = path.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + if token in text: + return True + return False + + +def diagnose(root, home): + """Core diagnosis. Returns ``(code, message)``; never raises on missing + files/dirs. `root` is the project root; `home` is the user home dir under + which Claude keeps ``~/.claude/projects//``. + """ + root = Path(root) + home = Path(home) + + # --- Install sanity ------------------------------------------------------ + claude_md = root / "CLAUDE.md" + has_import = False + if claude_md.is_file(): + try: + has_import = CLAUDE_IMPORT_LINE in claude_md.read_text(encoding="utf-8", errors="replace") + except OSError: + has_import = False + if not has_import: + return ( + "NOT_INSTALLED", + f"evolve import not wired into this repo's CLAUDE.md (expected a line `{CLAUDE_IMPORT_LINE}`); run the installer.", + ) + + evolve_md = _evolve_dir(root) / "EVOLVE.md" + if not evolve_md.is_file(): + return ( + "NOT_INSTALLED", + f"installed EVOLVE.md is missing at {evolve_md}; run the installer.", + ) + + # --- Extract the canary from the installed file -------------------------- + try: + evolve_text = evolve_md.read_text(encoding="utf-8", errors="replace") + except OSError as exc: + return ( + "NOT_INSTALLED", + f"cannot read installed EVOLVE.md at {evolve_md} - {exc}; run the installer.", + ) + match = _CANARY_RE.search(evolve_text) + if not match: + return ( + "STALE_EVOLVE_MD", + f"installed EVOLVE.md at {evolve_md} has no canary token (it predates this build); re-run the installer to refresh it.", + ) + token = match.group(0) + + # --- Transcript check ---------------------------------------------------- + transcripts = _recent_transcripts(home, root) + if not transcripts: + return ( + "UNKNOWN", + "no recent Claude transcripts for this project yet; open a session, then re-run.", + ) + if _canary_in_transcripts(transcripts, token): + return ("OK", "✓ evolve EVOLVE.md import is loading.") + + return ( + "IMPORT_DISABLED", + "⚠ The @import is present in CLAUDE.md but its content is NOT " + "reaching sessions — you likely declined Claude's external-import " + "approval. Re-enable by running `claude project purge " + f"{root}` then start a new session and Allow the import dialog.", + ) + + +def main(): + root = Path(os.getcwd()).resolve() + home = Path.home() + code, message = diagnose(root, home) + log(f"{code}: {message}") + print(f"evolve doctor [{code}] {message}") + # Diagnostic only — never fail the caller. + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md b/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md index 1df5f074..cf97f4e7 100644 --- a/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md +++ b/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md @@ -1,3 +1,4 @@ + # Evolve — shared, auditable memory You already have native, self-directed memory: you decide what to recall at the diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md new file mode 100644 index 00000000..8b9ece19 --- /dev/null +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md @@ -0,0 +1,43 @@ +--- +name: doctor +description: Diagnose evolve health on Claude — verify the CLAUDE.md @import is actually loading the thin EVOLVE.md into sessions +context: fork +--- + +# Doctor + +## Overview + +On Claude, evolve is delivered by a single `@.evolve/EVOLVE.md` import line in +this repo's `./CLAUDE.md`. That import requires a one-time, per-project "allow +external imports" approval. If you (or a teammate) declined it — even once, in a +past session — Claude silently disables the import forever, the thin EVOLVE.md +never loads, and evolve becomes a no-op with **no error**. + +This skill checks whether the import is actually reaching your sessions, by +looking for a canary token that the installed EVOLVE.md expands into the session +transcript when the import loads. + +## Required Action + +Run the doctor script from the repo root: + +```bash +python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/doctor/scripts/doctor.py +``` + +It is read-only and always exits 0. Read the status code it prints: + +- **OK** — the import is loading; nothing to do. +- **IMPORT_DISABLED** — the `@import` line is in `CLAUDE.md` but its content is + not reaching sessions (you likely declined the external-import approval). + Follow the remediation the script prints: purge the project approval, start a + new session, and **Allow** the import dialog. +- **NOT_INSTALLED** — evolve isn't wired into this repo; re-run the installer. +- **STALE_EVOLVE_MD** — the installed `.evolve/EVOLVE.md` predates the canary; + re-run the installer to refresh it. +- **UNKNOWN** — no recent Claude transcripts for this project yet; open a + session, then re-run. + +Relay the status and any remediation to the user. + diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py new file mode 100644 index 00000000..2c2a5382 --- /dev/null +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +""" +Doctor Script (Claude-only diagnostic) + +On Claude, evolve is delivered by a single ``@.evolve/EVOLVE.md`` import line in +the repo's ``./CLAUDE.md``. That import requires a one-time, per-project +"allow external imports" approval. If the user declines it (even once, in a past +session) Claude silently disables the import forever — the thin EVOLVE.md never +loads and evolve becomes a no-op with NO error. + +Claude's internal approval flag is undocumented and unreliable to read, so this +script detects delivery *empirically*: the installed thin EVOLVE.md carries a +canary token (``EVOLVE_IMPORT_CANARY_``). When the import loads, that token +expands into the session transcript. The doctor extracts the token from the +installed copy (never hardcoding it twice) and greps the most recent Claude +project transcripts for it. + +Status codes (printed verbatim, always exit 0 — this is a diagnostic): + + OK — canary found in a recent transcript; import is loading. + IMPORT_DISABLED — import line present in CLAUDE.md but canary absent from + every recent transcript; the user likely declined the + external-import approval. + NOT_INSTALLED — the import line is missing from CLAUDE.md, or the installed + .evolve/EVOLVE.md is missing; run the installer. + STALE_EVOLVE_MD — installed EVOLVE.md has no canary; it predates this build, + re-run the installer. + UNKNOWN — no recent Claude transcripts for this project yet. + +Usage: + python3 doctor.py +""" + +import os +import re +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple plugins +# can coexist side by side. The doctor only needs the shared `log` helper, but +# resolving the lib the same way the other scripts do keeps the convention +# uniform (and only works in the rendered tree, same constraint as adapt_memory). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from entity_io import log as _log # noqa: E402 + + +def log(message): + _log("doctor", message) + + +# The line the installer injects into the repo's CLAUDE.md (see install.sh +# CLAUDE_IMPORT_LINE). Matching on this substring is the install-sanity check. +CLAUDE_IMPORT_LINE = "@.evolve/EVOLVE.md" + +# Pattern used to lift the canary token out of the installed EVOLVE.md so the +# exact token lives in exactly one place (the template), never duplicated here. +_CANARY_RE = re.compile(r"EVOLVE_IMPORT_CANARY_\S+") + +# How many of the most-recent transcripts to scan for the canary. +_RECENT_N = 3 + + +def _evolve_dir(root): + """Resolve the .evolve root: $EVOLVE_DIR if set, else /.evolve.""" + env_dir = os.environ.get("EVOLVE_DIR") + if env_dir: + return Path(env_dir) + return root / ".evolve" + + +def _transcript_slug(root): + """Claude derives a project's transcript dir name by replacing every + non-alphanumeric character in the absolute project path with ``-``. + + e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen + """ + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def _recent_transcripts(home, root, limit=_RECENT_N): + """The most recent N ``*.jsonl`` transcripts for this project, by mtime.""" + slug = _transcript_slug(root) + proj_dir = home / ".claude" / "projects" / slug + if not proj_dir.is_dir(): + return [] + jsonl = [p for p in proj_dir.glob("*.jsonl") if p.is_file()] + jsonl.sort(key=lambda p: p.stat().st_mtime, reverse=True) + return jsonl[:limit] + + +def _canary_in_transcripts(transcripts, token): + """True if `token` appears anywhere in any of the given transcript files.""" + for path in transcripts: + try: + text = path.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + if token in text: + return True + return False + + +def diagnose(root, home): + """Core diagnosis. Returns ``(code, message)``; never raises on missing + files/dirs. `root` is the project root; `home` is the user home dir under + which Claude keeps ``~/.claude/projects//``. + """ + root = Path(root) + home = Path(home) + + # --- Install sanity ------------------------------------------------------ + claude_md = root / "CLAUDE.md" + has_import = False + if claude_md.is_file(): + try: + has_import = CLAUDE_IMPORT_LINE in claude_md.read_text(encoding="utf-8", errors="replace") + except OSError: + has_import = False + if not has_import: + return ( + "NOT_INSTALLED", + f"evolve import not wired into this repo's CLAUDE.md (expected a line `{CLAUDE_IMPORT_LINE}`); run the installer.", + ) + + evolve_md = _evolve_dir(root) / "EVOLVE.md" + if not evolve_md.is_file(): + return ( + "NOT_INSTALLED", + f"installed EVOLVE.md is missing at {evolve_md}; run the installer.", + ) + + # --- Extract the canary from the installed file -------------------------- + try: + evolve_text = evolve_md.read_text(encoding="utf-8", errors="replace") + except OSError as exc: + return ( + "NOT_INSTALLED", + f"cannot read installed EVOLVE.md at {evolve_md} - {exc}; run the installer.", + ) + match = _CANARY_RE.search(evolve_text) + if not match: + return ( + "STALE_EVOLVE_MD", + f"installed EVOLVE.md at {evolve_md} has no canary token (it predates this build); re-run the installer to refresh it.", + ) + token = match.group(0) + + # --- Transcript check ---------------------------------------------------- + transcripts = _recent_transcripts(home, root) + if not transcripts: + return ( + "UNKNOWN", + "no recent Claude transcripts for this project yet; open a session, then re-run.", + ) + if _canary_in_transcripts(transcripts, token): + return ("OK", "✓ evolve EVOLVE.md import is loading.") + + return ( + "IMPORT_DISABLED", + "⚠ The @import is present in CLAUDE.md but its content is NOT " + "reaching sessions — you likely declined Claude's external-import " + "approval. Re-enable by running `claude project purge " + f"{root}` then start a new session and Allow the import dialog.", + ) + + +def main(): + root = Path(os.getcwd()).resolve() + home = Path.home() + code, message = diagnose(root, home) + log(f"{code}: {message}") + print(f"evolve doctor [{code}] {message}") + # Diagnostic only — never fail the caller. + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md new file mode 100644 index 00000000..0641e810 --- /dev/null +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md @@ -0,0 +1,12 @@ +--- +name: doctor +description: Diagnose evolve health on Claude — verify the CLAUDE.md @import is actually loading the thin EVOLVE.md into sessions +--- + +# Doctor + +This skill diagnoses Claude's `@import` delivery of evolve's thin EVOLVE.md. It +is specific to Claude (where evolve loads via a per-project import that can be +silently declined) and is a **no-op on this platform** — here EVOLVE.md is +always-on and there is no import-approval gate to check. Nothing to run. + diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py new file mode 100644 index 00000000..2c2a5382 --- /dev/null +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +""" +Doctor Script (Claude-only diagnostic) + +On Claude, evolve is delivered by a single ``@.evolve/EVOLVE.md`` import line in +the repo's ``./CLAUDE.md``. That import requires a one-time, per-project +"allow external imports" approval. If the user declines it (even once, in a past +session) Claude silently disables the import forever — the thin EVOLVE.md never +loads and evolve becomes a no-op with NO error. + +Claude's internal approval flag is undocumented and unreliable to read, so this +script detects delivery *empirically*: the installed thin EVOLVE.md carries a +canary token (``EVOLVE_IMPORT_CANARY_``). When the import loads, that token +expands into the session transcript. The doctor extracts the token from the +installed copy (never hardcoding it twice) and greps the most recent Claude +project transcripts for it. + +Status codes (printed verbatim, always exit 0 — this is a diagnostic): + + OK — canary found in a recent transcript; import is loading. + IMPORT_DISABLED — import line present in CLAUDE.md but canary absent from + every recent transcript; the user likely declined the + external-import approval. + NOT_INSTALLED — the import line is missing from CLAUDE.md, or the installed + .evolve/EVOLVE.md is missing; run the installer. + STALE_EVOLVE_MD — installed EVOLVE.md has no canary; it predates this build, + re-run the installer. + UNKNOWN — no recent Claude transcripts for this project yet. + +Usage: + python3 doctor.py +""" + +import os +import re +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple plugins +# can coexist side by side. The doctor only needs the shared `log` helper, but +# resolving the lib the same way the other scripts do keeps the convention +# uniform (and only works in the rendered tree, same constraint as adapt_memory). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from entity_io import log as _log # noqa: E402 + + +def log(message): + _log("doctor", message) + + +# The line the installer injects into the repo's CLAUDE.md (see install.sh +# CLAUDE_IMPORT_LINE). Matching on this substring is the install-sanity check. +CLAUDE_IMPORT_LINE = "@.evolve/EVOLVE.md" + +# Pattern used to lift the canary token out of the installed EVOLVE.md so the +# exact token lives in exactly one place (the template), never duplicated here. +_CANARY_RE = re.compile(r"EVOLVE_IMPORT_CANARY_\S+") + +# How many of the most-recent transcripts to scan for the canary. +_RECENT_N = 3 + + +def _evolve_dir(root): + """Resolve the .evolve root: $EVOLVE_DIR if set, else /.evolve.""" + env_dir = os.environ.get("EVOLVE_DIR") + if env_dir: + return Path(env_dir) + return root / ".evolve" + + +def _transcript_slug(root): + """Claude derives a project's transcript dir name by replacing every + non-alphanumeric character in the absolute project path with ``-``. + + e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen + """ + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def _recent_transcripts(home, root, limit=_RECENT_N): + """The most recent N ``*.jsonl`` transcripts for this project, by mtime.""" + slug = _transcript_slug(root) + proj_dir = home / ".claude" / "projects" / slug + if not proj_dir.is_dir(): + return [] + jsonl = [p for p in proj_dir.glob("*.jsonl") if p.is_file()] + jsonl.sort(key=lambda p: p.stat().st_mtime, reverse=True) + return jsonl[:limit] + + +def _canary_in_transcripts(transcripts, token): + """True if `token` appears anywhere in any of the given transcript files.""" + for path in transcripts: + try: + text = path.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + if token in text: + return True + return False + + +def diagnose(root, home): + """Core diagnosis. Returns ``(code, message)``; never raises on missing + files/dirs. `root` is the project root; `home` is the user home dir under + which Claude keeps ``~/.claude/projects//``. + """ + root = Path(root) + home = Path(home) + + # --- Install sanity ------------------------------------------------------ + claude_md = root / "CLAUDE.md" + has_import = False + if claude_md.is_file(): + try: + has_import = CLAUDE_IMPORT_LINE in claude_md.read_text(encoding="utf-8", errors="replace") + except OSError: + has_import = False + if not has_import: + return ( + "NOT_INSTALLED", + f"evolve import not wired into this repo's CLAUDE.md (expected a line `{CLAUDE_IMPORT_LINE}`); run the installer.", + ) + + evolve_md = _evolve_dir(root) / "EVOLVE.md" + if not evolve_md.is_file(): + return ( + "NOT_INSTALLED", + f"installed EVOLVE.md is missing at {evolve_md}; run the installer.", + ) + + # --- Extract the canary from the installed file -------------------------- + try: + evolve_text = evolve_md.read_text(encoding="utf-8", errors="replace") + except OSError as exc: + return ( + "NOT_INSTALLED", + f"cannot read installed EVOLVE.md at {evolve_md} - {exc}; run the installer.", + ) + match = _CANARY_RE.search(evolve_text) + if not match: + return ( + "STALE_EVOLVE_MD", + f"installed EVOLVE.md at {evolve_md} has no canary token (it predates this build); re-run the installer to refresh it.", + ) + token = match.group(0) + + # --- Transcript check ---------------------------------------------------- + transcripts = _recent_transcripts(home, root) + if not transcripts: + return ( + "UNKNOWN", + "no recent Claude transcripts for this project yet; open a session, then re-run.", + ) + if _canary_in_transcripts(transcripts, token): + return ("OK", "✓ evolve EVOLVE.md import is loading.") + + return ( + "IMPORT_DISABLED", + "⚠ The @import is present in CLAUDE.md but its content is NOT " + "reaching sessions — you likely declined Claude's external-import " + "approval. Re-enable by running `claude project purge " + f"{root}` then start a new session and Allow the import dialog.", + ) + + +def main(): + root = Path(os.getcwd()).resolve() + home = Path.home() + code, message = diagnose(root, home) + log(f"{code}: {message}") + print(f"evolve doctor [{code}] {message}") + # Diagnostic only — never fail the caller. + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md new file mode 100644 index 00000000..0641e810 --- /dev/null +++ b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md @@ -0,0 +1,12 @@ +--- +name: doctor +description: Diagnose evolve health on Claude — verify the CLAUDE.md @import is actually loading the thin EVOLVE.md into sessions +--- + +# Doctor + +This skill diagnoses Claude's `@import` delivery of evolve's thin EVOLVE.md. It +is specific to Claude (where evolve loads via a per-project import that can be +silently declined) and is a **no-op on this platform** — here EVOLVE.md is +always-on and there is no import-approval gate to check. Nothing to run. + diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py new file mode 100644 index 00000000..2c2a5382 --- /dev/null +++ b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +""" +Doctor Script (Claude-only diagnostic) + +On Claude, evolve is delivered by a single ``@.evolve/EVOLVE.md`` import line in +the repo's ``./CLAUDE.md``. That import requires a one-time, per-project +"allow external imports" approval. If the user declines it (even once, in a past +session) Claude silently disables the import forever — the thin EVOLVE.md never +loads and evolve becomes a no-op with NO error. + +Claude's internal approval flag is undocumented and unreliable to read, so this +script detects delivery *empirically*: the installed thin EVOLVE.md carries a +canary token (``EVOLVE_IMPORT_CANARY_``). When the import loads, that token +expands into the session transcript. The doctor extracts the token from the +installed copy (never hardcoding it twice) and greps the most recent Claude +project transcripts for it. + +Status codes (printed verbatim, always exit 0 — this is a diagnostic): + + OK — canary found in a recent transcript; import is loading. + IMPORT_DISABLED — import line present in CLAUDE.md but canary absent from + every recent transcript; the user likely declined the + external-import approval. + NOT_INSTALLED — the import line is missing from CLAUDE.md, or the installed + .evolve/EVOLVE.md is missing; run the installer. + STALE_EVOLVE_MD — installed EVOLVE.md has no canary; it predates this build, + re-run the installer. + UNKNOWN — no recent Claude transcripts for this project yet. + +Usage: + python3 doctor.py +""" + +import os +import re +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple plugins +# can coexist side by side. The doctor only needs the shared `log` helper, but +# resolving the lib the same way the other scripts do keeps the convention +# uniform (and only works in the rendered tree, same constraint as adapt_memory). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from entity_io import log as _log # noqa: E402 + + +def log(message): + _log("doctor", message) + + +# The line the installer injects into the repo's CLAUDE.md (see install.sh +# CLAUDE_IMPORT_LINE). Matching on this substring is the install-sanity check. +CLAUDE_IMPORT_LINE = "@.evolve/EVOLVE.md" + +# Pattern used to lift the canary token out of the installed EVOLVE.md so the +# exact token lives in exactly one place (the template), never duplicated here. +_CANARY_RE = re.compile(r"EVOLVE_IMPORT_CANARY_\S+") + +# How many of the most-recent transcripts to scan for the canary. +_RECENT_N = 3 + + +def _evolve_dir(root): + """Resolve the .evolve root: $EVOLVE_DIR if set, else /.evolve.""" + env_dir = os.environ.get("EVOLVE_DIR") + if env_dir: + return Path(env_dir) + return root / ".evolve" + + +def _transcript_slug(root): + """Claude derives a project's transcript dir name by replacing every + non-alphanumeric character in the absolute project path with ``-``. + + e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen + """ + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def _recent_transcripts(home, root, limit=_RECENT_N): + """The most recent N ``*.jsonl`` transcripts for this project, by mtime.""" + slug = _transcript_slug(root) + proj_dir = home / ".claude" / "projects" / slug + if not proj_dir.is_dir(): + return [] + jsonl = [p for p in proj_dir.glob("*.jsonl") if p.is_file()] + jsonl.sort(key=lambda p: p.stat().st_mtime, reverse=True) + return jsonl[:limit] + + +def _canary_in_transcripts(transcripts, token): + """True if `token` appears anywhere in any of the given transcript files.""" + for path in transcripts: + try: + text = path.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + if token in text: + return True + return False + + +def diagnose(root, home): + """Core diagnosis. Returns ``(code, message)``; never raises on missing + files/dirs. `root` is the project root; `home` is the user home dir under + which Claude keeps ``~/.claude/projects//``. + """ + root = Path(root) + home = Path(home) + + # --- Install sanity ------------------------------------------------------ + claude_md = root / "CLAUDE.md" + has_import = False + if claude_md.is_file(): + try: + has_import = CLAUDE_IMPORT_LINE in claude_md.read_text(encoding="utf-8", errors="replace") + except OSError: + has_import = False + if not has_import: + return ( + "NOT_INSTALLED", + f"evolve import not wired into this repo's CLAUDE.md (expected a line `{CLAUDE_IMPORT_LINE}`); run the installer.", + ) + + evolve_md = _evolve_dir(root) / "EVOLVE.md" + if not evolve_md.is_file(): + return ( + "NOT_INSTALLED", + f"installed EVOLVE.md is missing at {evolve_md}; run the installer.", + ) + + # --- Extract the canary from the installed file -------------------------- + try: + evolve_text = evolve_md.read_text(encoding="utf-8", errors="replace") + except OSError as exc: + return ( + "NOT_INSTALLED", + f"cannot read installed EVOLVE.md at {evolve_md} - {exc}; run the installer.", + ) + match = _CANARY_RE.search(evolve_text) + if not match: + return ( + "STALE_EVOLVE_MD", + f"installed EVOLVE.md at {evolve_md} has no canary token (it predates this build); re-run the installer to refresh it.", + ) + token = match.group(0) + + # --- Transcript check ---------------------------------------------------- + transcripts = _recent_transcripts(home, root) + if not transcripts: + return ( + "UNKNOWN", + "no recent Claude transcripts for this project yet; open a session, then re-run.", + ) + if _canary_in_transcripts(transcripts, token): + return ("OK", "✓ evolve EVOLVE.md import is loading.") + + return ( + "IMPORT_DISABLED", + "⚠ The @import is present in CLAUDE.md but its content is NOT " + "reaching sessions — you likely declined Claude's external-import " + "approval. Re-enable by running `claude project purge " + f"{root}` then start a new session and Allow the import dialog.", + ) + + +def main(): + root = Path(os.getcwd()).resolve() + home = Path.home() + code, message = diagnose(root, home) + log(f"{code}: {message}") + print(f"evolve doctor [{code}] {message}") + # Diagnostic only — never fail the caller. + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/plugin-source/EVOLVE.md.j2 b/plugin-source/EVOLVE.md.j2 index 4c2aa5f5..0930e2ea 100644 --- a/plugin-source/EVOLVE.md.j2 +++ b/plugin-source/EVOLVE.md.j2 @@ -1,5 +1,6 @@ {%- from "_macros.j2" import skill_ref with context -%} {% if platform == "claude" -%} + # Evolve — shared, auditable memory You already have native, self-directed memory: you decide what to recall at the diff --git a/plugin-source/skills/evolve-lite/doctor/SKILL.md.j2 b/plugin-source/skills/evolve-lite/doctor/SKILL.md.j2 new file mode 100644 index 00000000..c2e24254 --- /dev/null +++ b/plugin-source/skills/evolve-lite/doctor/SKILL.md.j2 @@ -0,0 +1,51 @@ +{%- from "_macros.j2" import invoke with context -%} +--- +name: {% if platform == "bob" %}evolve-lite:{% endif %}doctor +description: Diagnose evolve health on Claude — verify the CLAUDE.md @import is actually loading the thin EVOLVE.md into sessions +{% if platform == "claude" -%} +context: fork +{% endif -%} +--- + +# Doctor +{% if platform == "claude" %} +## Overview + +On Claude, evolve is delivered by a single `@.evolve/EVOLVE.md` import line in +this repo's `./CLAUDE.md`. That import requires a one-time, per-project "allow +external imports" approval. If you (or a teammate) declined it — even once, in a +past session — Claude silently disables the import forever, the thin EVOLVE.md +never loads, and evolve becomes a no-op with **no error**. + +This skill checks whether the import is actually reaching your sessions, by +looking for a canary token that the installed EVOLVE.md expands into the session +transcript when the import loads. + +## Required Action + +Run the doctor script from the repo root: + +```bash +{{ invoke("doctor", "doctor.py") }} +``` + +It is read-only and always exits 0. Read the status code it prints: + +- **OK** — the import is loading; nothing to do. +- **IMPORT_DISABLED** — the `@import` line is in `CLAUDE.md` but its content is + not reaching sessions (you likely declined the external-import approval). + Follow the remediation the script prints: purge the project approval, start a + new session, and **Allow** the import dialog. +- **NOT_INSTALLED** — evolve isn't wired into this repo; re-run the installer. +- **STALE_EVOLVE_MD** — the installed `.evolve/EVOLVE.md` predates the canary; + re-run the installer to refresh it. +- **UNKNOWN** — no recent Claude transcripts for this project yet; open a + session, then re-run. + +Relay the status and any remediation to the user. +{% else %} +This skill diagnoses Claude's `@import` delivery of evolve's thin EVOLVE.md. It +is specific to Claude (where evolve loads via a per-project import that can be +silently declined) and is a **no-op on this platform** — here EVOLVE.md is +always-on and there is no import-approval gate to check. Nothing to run. +{% endif %} diff --git a/plugin-source/skills/evolve-lite/doctor/scripts/doctor.py b/plugin-source/skills/evolve-lite/doctor/scripts/doctor.py new file mode 100644 index 00000000..2c2a5382 --- /dev/null +++ b/plugin-source/skills/evolve-lite/doctor/scripts/doctor.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +""" +Doctor Script (Claude-only diagnostic) + +On Claude, evolve is delivered by a single ``@.evolve/EVOLVE.md`` import line in +the repo's ``./CLAUDE.md``. That import requires a one-time, per-project +"allow external imports" approval. If the user declines it (even once, in a past +session) Claude silently disables the import forever — the thin EVOLVE.md never +loads and evolve becomes a no-op with NO error. + +Claude's internal approval flag is undocumented and unreliable to read, so this +script detects delivery *empirically*: the installed thin EVOLVE.md carries a +canary token (``EVOLVE_IMPORT_CANARY_``). When the import loads, that token +expands into the session transcript. The doctor extracts the token from the +installed copy (never hardcoding it twice) and greps the most recent Claude +project transcripts for it. + +Status codes (printed verbatim, always exit 0 — this is a diagnostic): + + OK — canary found in a recent transcript; import is loading. + IMPORT_DISABLED — import line present in CLAUDE.md but canary absent from + every recent transcript; the user likely declined the + external-import approval. + NOT_INSTALLED — the import line is missing from CLAUDE.md, or the installed + .evolve/EVOLVE.md is missing; run the installer. + STALE_EVOLVE_MD — installed EVOLVE.md has no canary; it predates this build, + re-run the installer. + UNKNOWN — no recent Claude transcripts for this project yet. + +Usage: + python3 doctor.py +""" + +import os +import re +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple plugins +# can coexist side by side. The doctor only needs the shared `log` helper, but +# resolving the lib the same way the other scripts do keeps the convention +# uniform (and only works in the rendered tree, same constraint as adapt_memory). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from entity_io import log as _log # noqa: E402 + + +def log(message): + _log("doctor", message) + + +# The line the installer injects into the repo's CLAUDE.md (see install.sh +# CLAUDE_IMPORT_LINE). Matching on this substring is the install-sanity check. +CLAUDE_IMPORT_LINE = "@.evolve/EVOLVE.md" + +# Pattern used to lift the canary token out of the installed EVOLVE.md so the +# exact token lives in exactly one place (the template), never duplicated here. +_CANARY_RE = re.compile(r"EVOLVE_IMPORT_CANARY_\S+") + +# How many of the most-recent transcripts to scan for the canary. +_RECENT_N = 3 + + +def _evolve_dir(root): + """Resolve the .evolve root: $EVOLVE_DIR if set, else /.evolve.""" + env_dir = os.environ.get("EVOLVE_DIR") + if env_dir: + return Path(env_dir) + return root / ".evolve" + + +def _transcript_slug(root): + """Claude derives a project's transcript dir name by replacing every + non-alphanumeric character in the absolute project path with ``-``. + + e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen + """ + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def _recent_transcripts(home, root, limit=_RECENT_N): + """The most recent N ``*.jsonl`` transcripts for this project, by mtime.""" + slug = _transcript_slug(root) + proj_dir = home / ".claude" / "projects" / slug + if not proj_dir.is_dir(): + return [] + jsonl = [p for p in proj_dir.glob("*.jsonl") if p.is_file()] + jsonl.sort(key=lambda p: p.stat().st_mtime, reverse=True) + return jsonl[:limit] + + +def _canary_in_transcripts(transcripts, token): + """True if `token` appears anywhere in any of the given transcript files.""" + for path in transcripts: + try: + text = path.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + if token in text: + return True + return False + + +def diagnose(root, home): + """Core diagnosis. Returns ``(code, message)``; never raises on missing + files/dirs. `root` is the project root; `home` is the user home dir under + which Claude keeps ``~/.claude/projects//``. + """ + root = Path(root) + home = Path(home) + + # --- Install sanity ------------------------------------------------------ + claude_md = root / "CLAUDE.md" + has_import = False + if claude_md.is_file(): + try: + has_import = CLAUDE_IMPORT_LINE in claude_md.read_text(encoding="utf-8", errors="replace") + except OSError: + has_import = False + if not has_import: + return ( + "NOT_INSTALLED", + f"evolve import not wired into this repo's CLAUDE.md (expected a line `{CLAUDE_IMPORT_LINE}`); run the installer.", + ) + + evolve_md = _evolve_dir(root) / "EVOLVE.md" + if not evolve_md.is_file(): + return ( + "NOT_INSTALLED", + f"installed EVOLVE.md is missing at {evolve_md}; run the installer.", + ) + + # --- Extract the canary from the installed file -------------------------- + try: + evolve_text = evolve_md.read_text(encoding="utf-8", errors="replace") + except OSError as exc: + return ( + "NOT_INSTALLED", + f"cannot read installed EVOLVE.md at {evolve_md} - {exc}; run the installer.", + ) + match = _CANARY_RE.search(evolve_text) + if not match: + return ( + "STALE_EVOLVE_MD", + f"installed EVOLVE.md at {evolve_md} has no canary token (it predates this build); re-run the installer to refresh it.", + ) + token = match.group(0) + + # --- Transcript check ---------------------------------------------------- + transcripts = _recent_transcripts(home, root) + if not transcripts: + return ( + "UNKNOWN", + "no recent Claude transcripts for this project yet; open a session, then re-run.", + ) + if _canary_in_transcripts(transcripts, token): + return ("OK", "✓ evolve EVOLVE.md import is loading.") + + return ( + "IMPORT_DISABLED", + "⚠ The @import is present in CLAUDE.md but its content is NOT " + "reaching sessions — you likely declined Claude's external-import " + "approval. Re-enable by running `claude project purge " + f"{root}` then start a new session and Allow the import dialog.", + ) + + +def main(): + root = Path(os.getcwd()).resolve() + home = Path.home() + code, message = diagnose(root, home) + log(f"{code}: {message}") + print(f"evolve doctor [{code}] {message}") + # Diagnostic only — never fail the caller. + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/tests/platform_integrations/test_doctor.py b/tests/platform_integrations/test_doctor.py new file mode 100644 index 00000000..e468bde2 --- /dev/null +++ b/tests/platform_integrations/test_doctor.py @@ -0,0 +1,144 @@ +"""Unit tests for the evolve doctor diagnostic (doctor.py). + +The doctor checks whether Claude's ``@.evolve/EVOLVE.md`` import is actually +reaching sessions, by extracting the canary token from the installed EVOLVE.md +and grepping recent Claude project transcripts for it. + +We exercise the importable ``diagnose(root, home)`` core directly. doctor.py +resolves the shared lib by parent-walking to ``lib/evolve-lite/`` — that only +works in the rendered tree, so we import the RENDERED Claude copy (same +constraint adapt_memory.py has). +""" + +import importlib.util +import re +from pathlib import Path + +import pytest + +pytestmark = pytest.mark.platform_integrations + +_DOCTOR = ( + Path(__file__).parent.parent.parent / "platform-integrations/claude/plugins/evolve-lite" / "skills/evolve-lite/doctor/scripts/doctor.py" +) + +# The canary token the installed EVOLVE.md carries. Kept here ONLY for fixture +# construction; doctor.py itself extracts it from the file via regex. +_CANARY = "EVOLVE_IMPORT_CANARY_v1" +_IMPORT_LINE = "@.evolve/EVOLVE.md" + + +def _load_doctor(): + spec = importlib.util.spec_from_file_location("evolve_doctor", _DOCTOR) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod + + +@pytest.fixture(autouse=True) +def _clear_evolve_dir(monkeypatch): + """doctor.py honors $EVOLVE_DIR; clear it so tests resolve .evolve under the + temp project root, not a developer's ambient override.""" + monkeypatch.delenv("EVOLVE_DIR", raising=False) + + +@pytest.fixture +def doctor(): + return _load_doctor() + + +def _make_project(root, *, claude_md=True, evolve_md=True, canary=True): + """Build a fake project tree under `root`.""" + root.mkdir(parents=True, exist_ok=True) + if claude_md: + (root / "CLAUDE.md").write_text(f"# Project rules\n\n{_IMPORT_LINE}\n", encoding="utf-8") + else: + (root / "CLAUDE.md").write_text("# Project rules\n", encoding="utf-8") + if evolve_md: + evolve_dir = root / ".evolve" + evolve_dir.mkdir(parents=True, exist_ok=True) + body = "# Evolve\n" + if canary: + body = f"\n" + body + (evolve_dir / "EVOLVE.md").write_text(body, encoding="utf-8") + + +def _slug(root): + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def _write_transcript(home, root, *, with_canary): + proj = home / ".claude" / "projects" / _slug(root) + proj.mkdir(parents=True, exist_ok=True) + content = '{"role":"user","content":"hello"}\n' + if with_canary: + content += '{"role":"system","content":"' + _CANARY + '"}\n' + (proj / "session.jsonl").write_text(content, encoding="utf-8") + + +def test_ok_when_canary_in_transcript(doctor, tmp_path): + root = tmp_path / "proj" + home = tmp_path / "home" + home.mkdir() + _make_project(root) + _write_transcript(home, root, with_canary=True) + + code, message = doctor.diagnose(root, home) + assert code == "OK", message + + +def test_import_disabled_when_transcript_lacks_canary(doctor, tmp_path): + root = tmp_path / "proj" + home = tmp_path / "home" + home.mkdir() + _make_project(root) + _write_transcript(home, root, with_canary=False) + + code, message = doctor.diagnose(root, home) + assert code == "IMPORT_DISABLED", message + # The exact project root must appear in the remediation. + assert str(root) in message + + +def test_not_installed_when_no_import_line(doctor, tmp_path): + root = tmp_path / "proj" + home = tmp_path / "home" + home.mkdir() + _make_project(root, claude_md=False) + _write_transcript(home, root, with_canary=True) + + code, _ = doctor.diagnose(root, home) + assert code == "NOT_INSTALLED" + + +def test_not_installed_when_evolve_md_missing(doctor, tmp_path): + root = tmp_path / "proj" + home = tmp_path / "home" + home.mkdir() + _make_project(root, evolve_md=False) + _write_transcript(home, root, with_canary=True) + + code, _ = doctor.diagnose(root, home) + assert code == "NOT_INSTALLED" + + +def test_stale_evolve_md_when_no_canary(doctor, tmp_path): + root = tmp_path / "proj" + home = tmp_path / "home" + home.mkdir() + _make_project(root, canary=False) + _write_transcript(home, root, with_canary=False) + + code, _ = doctor.diagnose(root, home) + assert code == "STALE_EVOLVE_MD" + + +def test_unknown_when_no_transcripts(doctor, tmp_path): + root = tmp_path / "proj" + home = tmp_path / "home" + home.mkdir() + _make_project(root) + # No transcript written. + + code, _ = doctor.diagnose(root, home) + assert code == "UNKNOWN" diff --git a/tests/platform_integrations/test_plugin_structure.py b/tests/platform_integrations/test_plugin_structure.py index 4a996bb6..84e4bf06 100644 --- a/tests/platform_integrations/test_plugin_structure.py +++ b/tests/platform_integrations/test_plugin_structure.py @@ -72,6 +72,7 @@ class TestSkillScripts: "skills/evolve-lite/learn/scripts/save_entities.py", "skills/evolve-lite/provenance/scripts/log_influence.py", "skills/evolve-lite/adapt-memory/scripts/adapt_memory.py", + "skills/evolve-lite/doctor/scripts/doctor.py", ], ) def test_script_exists(self, script_rel): From 75e582cc711fae137fbb15ddab90fe6d1493d70d Mon Sep 17 00:00:00 2001 From: Punleuk Oum <5661986+illeatmyhat@users.noreply.github.com> Date: Mon, 8 Jun 2026 13:11:22 -0700 Subject: [PATCH 04/12] refactor(platform-integrations): drop all Claude auto-firing hooks (fully hookless) The Claude plugin still shipped the old hooks (UserPromptSubmit recall-manifest, SessionStart sync, Stop save-trajectory + learn), which conflict with the new native-memory + CLAUDE.md @import design (double recall/save). Remove the Claude hooks.json entirely so native+import is the sole mechanism; recall is native, save is native, sync/learn/provenance become explicit skills. Skills are unchanged and remain invokable (evolve-lite is not a no-op). Only the hook wiring is removed. bob/codex/claw untouched. Tests updated to assert the Claude plugin ships no hooks. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../plugins/evolve-lite/hooks/hooks.json | 41 ------------------ plugin-source/_claude/hooks/hooks.json | 41 ------------------ .../test_plugin_structure.py | 43 +++++++------------ 3 files changed, 15 insertions(+), 110 deletions(-) delete mode 100644 platform-integrations/claude/plugins/evolve-lite/hooks/hooks.json delete mode 100644 plugin-source/_claude/hooks/hooks.json diff --git a/platform-integrations/claude/plugins/evolve-lite/hooks/hooks.json b/platform-integrations/claude/plugins/evolve-lite/hooks/hooks.json deleted file mode 100644 index 1d282a7e..00000000 --- a/platform-integrations/claude/plugins/evolve-lite/hooks/hooks.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "hooks": { - "UserPromptSubmit": [ - { - "matcher": "", - "hooks": [ - { - "type": "command", - "command": "python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/recall/scripts/retrieve_entities.py" - } - ] - } - ], - "SessionStart": [ - { - "matcher": "", - "hooks": [ - { - "type": "command", - "command": "python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/sync/scripts/sync.py --quiet" - } - ] - } - ], - "Stop": [ - { - "matcher": "", - "hooks": [ - { - "type": "command", - "command": "python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/save-trajectory/scripts/on_stop.py" - }, - { - "type": "command", - "command": "python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/learn/scripts/on_stop.py" - } - ] - } - ] - } -} diff --git a/plugin-source/_claude/hooks/hooks.json b/plugin-source/_claude/hooks/hooks.json deleted file mode 100644 index 1d282a7e..00000000 --- a/plugin-source/_claude/hooks/hooks.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "hooks": { - "UserPromptSubmit": [ - { - "matcher": "", - "hooks": [ - { - "type": "command", - "command": "python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/recall/scripts/retrieve_entities.py" - } - ] - } - ], - "SessionStart": [ - { - "matcher": "", - "hooks": [ - { - "type": "command", - "command": "python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/sync/scripts/sync.py --quiet" - } - ] - } - ], - "Stop": [ - { - "matcher": "", - "hooks": [ - { - "type": "command", - "command": "python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/save-trajectory/scripts/on_stop.py" - }, - { - "type": "command", - "command": "python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/learn/scripts/on_stop.py" - } - ] - } - ] - } -} diff --git a/tests/platform_integrations/test_plugin_structure.py b/tests/platform_integrations/test_plugin_structure.py index 84e4bf06..781b4141 100644 --- a/tests/platform_integrations/test_plugin_structure.py +++ b/tests/platform_integrations/test_plugin_structure.py @@ -28,34 +28,21 @@ def test_plugin_json_skills_path_exists(self): class TestHooksManifest: - def test_hooks_json_is_valid_json(self): - data = json.loads((_PLUGIN_ROOT / "hooks" / "hooks.json").read_text()) - assert isinstance(data, dict) - - def test_hooks_json_has_hooks_key(self): - data = json.loads((_PLUGIN_ROOT / "hooks" / "hooks.json").read_text()) - assert "hooks" in data - - def test_known_lifecycle_events_present(self): - data = json.loads((_PLUGIN_ROOT / "hooks" / "hooks.json").read_text()) - hooks = data["hooks"] - assert "UserPromptSubmit" in hooks - assert "SessionStart" in hooks - assert "Stop" in hooks - - def test_command_hook_scripts_exist(self): - data = json.loads((_PLUGIN_ROOT / "hooks" / "hooks.json").read_text()) - for event, groups in data["hooks"].items(): - for group in groups: - for hook in group.get("hooks", []): - if hook.get("type") == "command": - cmd = hook["command"] - resolved = cmd.replace("${CLAUDE_PLUGIN_ROOT}", str(_PLUGIN_ROOT)) - # Find the script token — commands may have trailing flags - script_tokens = [t for t in resolved.split() if t.endswith((".py", ".sh"))] - assert script_tokens, f"No script found in hook command: {cmd}" - script_path = Path(script_tokens[0]) - assert script_path.exists(), f"Hook script missing: {script_path} (event: {event})" + """The Claude plugin is fully hookless under the native-memory + CLAUDE.md + `@import` redesign. Recall is native and save is native, so the plugin must + register NO auto-firing hooks — otherwise recall/save fire twice. The skills + themselves stay invokable (see TestSkillScripts); only the hook WIRING is gone. + """ + + def test_no_hooks_json_shipped(self): + # No hooks/hooks.json under the rendered Claude plugin: the plugin + # registers no auto-firing lifecycle hooks at all. + assert not (_PLUGIN_ROOT / "hooks" / "hooks.json").exists() + + def test_no_hooks_directory(self): + # The render wipes and rewrites the plugin root from plugin-source/; + # with the source hooks.json removed, no hooks/ dir should remain. + assert not (_PLUGIN_ROOT / "hooks").exists() class TestSkillScripts: From 3f56631b7c02ee4136f2cfae09f8efc471661331 Mon Sep 17 00:00:00 2001 From: Punleuk Oum <5661986+illeatmyhat@users.noreply.github.com> Date: Mon, 8 Jun 2026 13:18:48 -0700 Subject: [PATCH 05/12] feat(platform-integrations): stable native-to-entity id linkage for provenance Closes the correlation-id gap that broke provenance on Claude. The adapter now derives the entity slug from the native memory's name field, so the entity id is a deterministic, derivable / on both the save and recall sides, and re-mirroring overwrites in place (idempotent, no -N suffix). The entity also stamps native_path as a back-reference. - entity_io.write_entity_file: optional filename/overwrite for deterministic in-place writes (default behavior unchanged for existing callers); native_path added to _FRONTMATTER_KEYS - adapt_memory.py: parse native name, write /.md, print the id - Claude EVOLVE.md recall-audit now logs the entity id / (not native paths), which provenance resolves to .evolve/entities//.md Co-Authored-By: Claude Opus 4.8 (1M context) --- .../evolve-lite/lib/evolve-lite/entity_io.py | 25 ++++- .../scripts/adapt_memory.py | 45 ++++++--- .../claude/plugins/evolve-lite/EVOLVE.md | 18 ++-- .../evolve-lite/lib/evolve-lite/entity_io.py | 25 ++++- .../adapt-memory/scripts/adapt_memory.py | 45 ++++++--- .../evolve-lite/lib/evolve-lite/entity_io.py | 25 ++++- .../adapt-memory/scripts/adapt_memory.py | 45 ++++++--- .../evolve-lite/lib/evolve-lite/entity_io.py | 25 ++++- .../adapt-memory/scripts/adapt_memory.py | 45 ++++++--- plugin-source/EVOLVE.md.j2 | 18 ++-- plugin-source/lib/entity_io.py | 25 ++++- .../adapt-memory/scripts/adapt_memory.py | 45 ++++++--- .../test_entity_io_core.py | 97 ++++++++++++++++++- 13 files changed, 372 insertions(+), 111 deletions(-) diff --git a/platform-integrations/bob/evolve-lite/lib/evolve-lite/entity_io.py b/platform-integrations/bob/evolve-lite/lib/evolve-lite/entity_io.py index 9b177718..8887caf0 100644 --- a/platform-integrations/bob/evolve-lite/lib/evolve-lite/entity_io.py +++ b/platform-integrations/bob/evolve-lite/lib/evolve-lite/entity_io.py @@ -154,7 +154,7 @@ def unique_filename(directory, slug): # Markdown <-> dict conversion # --------------------------------------------------------------------------- -_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "visibility", "published_at") +_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "native_path", "visibility", "published_at") def entity_to_markdown(entity): @@ -354,12 +354,24 @@ def load_all_entities(entities_dir): return entities -def write_entity_file(directory, entity): +def write_entity_file(directory, entity, filename=None, overwrite=False): """Write a single entity as a markdown file under *directory*. The file is placed in a ``{type}/`` subdirectory. Uses atomic write (write to ``.tmp``, then ``os.rename``). + Args: + directory: Entities root directory. + entity: The entity dict to serialize. + filename: Optional explicit slug for the target file (without the + ``.md`` suffix). When omitted, the slug is derived from the + entity content (the historical default). + overwrite: When True, the entity is written to a deterministic + ``{type}/{filename}.md`` path, overwriting any existing file in + place (stable id, idempotent re-mirroring). When False (the + default), the historical collision-avoiding behavior is kept — + a ``-2``/``-3`` suffix is appended on collision. + Returns: Path to the written file. """ @@ -370,7 +382,7 @@ def write_entity_file(directory, entity): type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) - slug = slugify(entity.get("content", "entity")) + slug = filename if filename else slugify(entity.get("content", "entity")) content = entity_to_markdown(entity) # Write to a unique temp file first (avoids predictable .tmp collisions) @@ -381,6 +393,13 @@ def write_entity_file(directory, entity): os.close(fd) fd = None + if overwrite: + # Deterministic target: overwrite any existing file in place so + # the entity id is stable across re-mirroring. + target = type_dir / f"{slug}.md" + os.replace(tmp_path, target) + return target + # Atomically claim the target using O_EXCL; retry on race while True: target = unique_filename(type_dir, slug) diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-adapt-memory/scripts/adapt_memory.py b/platform-integrations/bob/evolve-lite/skills/evolve-lite-adapt-memory/scripts/adapt_memory.py index fb90d93c..5bb8fb44 100644 --- a/platform-integrations/bob/evolve-lite/skills/evolve-lite-adapt-memory/scripts/adapt_memory.py +++ b/platform-integrations/bob/evolve-lite/skills/evolve-lite-adapt-memory/scripts/adapt_memory.py @@ -48,6 +48,7 @@ from entity_io import ( # noqa: E402 find_entities_dir, get_default_entities_dir, + slugify, write_entity_file, log as _log, ) @@ -58,14 +59,15 @@ def log(message): def parse_native_memory(text): - """Split a native memory file into (description, body). + """Split a native memory file into (name, description, body). Native frontmatter is simple ``key: value`` lines plus a nested - ``metadata:`` block; we only need ``description`` and the body, so we - parse the top-level ``description:`` line and treat everything after the - closing ``---`` as the body. Missing frontmatter is tolerated — the whole - text is then the body. + ``metadata:`` block; we parse the top-level ``name`` and ``description`` + lines and treat everything after the closing ``---`` as the body. The + ``name`` is the native slug we reuse as the stable entity id. Missing + frontmatter is tolerated — the whole text is then the body. """ + name = None description = None body = text if text.startswith("---"): @@ -74,16 +76,17 @@ def parse_native_memory(text): frontmatter, body = parts[1], parts[2] for line in frontmatter.splitlines(): # Only top-level keys (no leading indentation) — keeps the - # nested metadata.* keys out of the description match. + # nested metadata.* keys out of the top-level matches. if line[:1].isspace(): continue key, _, value = line.partition(":") - if key.strip() == "description": - value = value.strip() - if value: - description = value - break - return description, body.strip() + key = key.strip() + value = value.strip() + if key == "name" and value: + name = value + elif key == "description" and value: + description = value + return name, description, body.strip() def main(): @@ -112,7 +115,7 @@ def main(): print(f"Error: cannot read {memory_path} - {exc}", file=sys.stderr) sys.exit(1) - description, body = parse_native_memory(text) + name, description, body = parse_native_memory(text) if not body: print(f"Error: native memory {memory_path} has no body to mirror.", file=sys.stderr) sys.exit(1) @@ -123,11 +126,18 @@ def main(): if description and description not in body: content = f"{description}\n\n{body}" + # The native ``name`` becomes the stable, derivable entity slug so the + # entity id is ``/`` on both sides — provenance can map an + # audited native memory straight onto its mirror. Fall back to a + # content-derived slug only when the native frontmatter has no name. + slug = slugify(name) if name else slugify(content) + entity = { "type": args.type, "trigger": args.trigger, "content": content, "source": "native-memory", + "native_path": args.memory_path, } entities_dir = find_entities_dir() @@ -138,9 +148,14 @@ def main(): entities_dir = get_default_entities_dir() log(f"Created entities dir: {entities_dir}") - path = write_entity_file(entities_dir, entity) - log(f"Mirrored {memory_path} -> {path}") + # Deterministic, idempotent write: re-mirroring the same native memory + # (same name + type) overwrites /.md in place rather than + # creating -2.md, keeping the entity id stable. + path = write_entity_file(entities_dir, entity, filename=slug, overwrite=True) + entity_id = f"{entity['type']}/{slug}" + log(f"Mirrored {memory_path} -> {path} (id: {entity_id})") print(f"Mirrored native memory into evolve store: {path}") + print(f"Entity id: {entity_id}") if __name__ == "__main__": diff --git a/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md b/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md index cf97f4e7..5b3ff387 100644 --- a/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md +++ b/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md @@ -15,15 +15,17 @@ once per memory you saved. ## Record what you consulted -After you read or consult memories this turn, log which ones you actually opened, -so the value of this memory can be measured over time. Run: +After you read or consult native memories this turn, log which ones you actually +opened, so the value of this memory can be measured over time. Run: ``` -python3 ~/.claude/evolve-lite/audit_recall.py [ ...] +python3 ~/.claude/evolve-lite/audit_recall.py [ ...] ``` -Pass the memory files you read this turn (space-separated paths, relative to the -project root). Skip this step entirely if you consulted no memories. If the -command prints a line beginning `evolve-session:`, include that line once, -verbatim, somewhere in your reply — it lets later analysis tie this session to -what you recalled. +Pass the entity id `/` for each native memory you consulted, where +`` is the memory's `metadata.type` and `` is its top-level `name` +field — this is the id provenance resolves to `./.evolve/entities//.md` +(the same id /evolve-lite:adapt-memory mirrors to). Skip this step entirely +if you consulted no memories. If the command prints a line beginning +`evolve-session:`, include that line once, verbatim, somewhere in your reply — it +lets later analysis tie this session to what you recalled. diff --git a/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/entity_io.py b/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/entity_io.py index 9b177718..8887caf0 100644 --- a/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/entity_io.py +++ b/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/entity_io.py @@ -154,7 +154,7 @@ def unique_filename(directory, slug): # Markdown <-> dict conversion # --------------------------------------------------------------------------- -_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "visibility", "published_at") +_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "native_path", "visibility", "published_at") def entity_to_markdown(entity): @@ -354,12 +354,24 @@ def load_all_entities(entities_dir): return entities -def write_entity_file(directory, entity): +def write_entity_file(directory, entity, filename=None, overwrite=False): """Write a single entity as a markdown file under *directory*. The file is placed in a ``{type}/`` subdirectory. Uses atomic write (write to ``.tmp``, then ``os.rename``). + Args: + directory: Entities root directory. + entity: The entity dict to serialize. + filename: Optional explicit slug for the target file (without the + ``.md`` suffix). When omitted, the slug is derived from the + entity content (the historical default). + overwrite: When True, the entity is written to a deterministic + ``{type}/{filename}.md`` path, overwriting any existing file in + place (stable id, idempotent re-mirroring). When False (the + default), the historical collision-avoiding behavior is kept — + a ``-2``/``-3`` suffix is appended on collision. + Returns: Path to the written file. """ @@ -370,7 +382,7 @@ def write_entity_file(directory, entity): type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) - slug = slugify(entity.get("content", "entity")) + slug = filename if filename else slugify(entity.get("content", "entity")) content = entity_to_markdown(entity) # Write to a unique temp file first (avoids predictable .tmp collisions) @@ -381,6 +393,13 @@ def write_entity_file(directory, entity): os.close(fd) fd = None + if overwrite: + # Deterministic target: overwrite any existing file in place so + # the entity id is stable across re-mirroring. + target = type_dir / f"{slug}.md" + os.replace(tmp_path, target) + return target + # Atomically claim the target using O_EXCL; retry on race while True: target = unique_filename(type_dir, slug) diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py index fb90d93c..5bb8fb44 100644 --- a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py @@ -48,6 +48,7 @@ from entity_io import ( # noqa: E402 find_entities_dir, get_default_entities_dir, + slugify, write_entity_file, log as _log, ) @@ -58,14 +59,15 @@ def log(message): def parse_native_memory(text): - """Split a native memory file into (description, body). + """Split a native memory file into (name, description, body). Native frontmatter is simple ``key: value`` lines plus a nested - ``metadata:`` block; we only need ``description`` and the body, so we - parse the top-level ``description:`` line and treat everything after the - closing ``---`` as the body. Missing frontmatter is tolerated — the whole - text is then the body. + ``metadata:`` block; we parse the top-level ``name`` and ``description`` + lines and treat everything after the closing ``---`` as the body. The + ``name`` is the native slug we reuse as the stable entity id. Missing + frontmatter is tolerated — the whole text is then the body. """ + name = None description = None body = text if text.startswith("---"): @@ -74,16 +76,17 @@ def parse_native_memory(text): frontmatter, body = parts[1], parts[2] for line in frontmatter.splitlines(): # Only top-level keys (no leading indentation) — keeps the - # nested metadata.* keys out of the description match. + # nested metadata.* keys out of the top-level matches. if line[:1].isspace(): continue key, _, value = line.partition(":") - if key.strip() == "description": - value = value.strip() - if value: - description = value - break - return description, body.strip() + key = key.strip() + value = value.strip() + if key == "name" and value: + name = value + elif key == "description" and value: + description = value + return name, description, body.strip() def main(): @@ -112,7 +115,7 @@ def main(): print(f"Error: cannot read {memory_path} - {exc}", file=sys.stderr) sys.exit(1) - description, body = parse_native_memory(text) + name, description, body = parse_native_memory(text) if not body: print(f"Error: native memory {memory_path} has no body to mirror.", file=sys.stderr) sys.exit(1) @@ -123,11 +126,18 @@ def main(): if description and description not in body: content = f"{description}\n\n{body}" + # The native ``name`` becomes the stable, derivable entity slug so the + # entity id is ``/`` on both sides — provenance can map an + # audited native memory straight onto its mirror. Fall back to a + # content-derived slug only when the native frontmatter has no name. + slug = slugify(name) if name else slugify(content) + entity = { "type": args.type, "trigger": args.trigger, "content": content, "source": "native-memory", + "native_path": args.memory_path, } entities_dir = find_entities_dir() @@ -138,9 +148,14 @@ def main(): entities_dir = get_default_entities_dir() log(f"Created entities dir: {entities_dir}") - path = write_entity_file(entities_dir, entity) - log(f"Mirrored {memory_path} -> {path}") + # Deterministic, idempotent write: re-mirroring the same native memory + # (same name + type) overwrites /.md in place rather than + # creating -2.md, keeping the entity id stable. + path = write_entity_file(entities_dir, entity, filename=slug, overwrite=True) + entity_id = f"{entity['type']}/{slug}" + log(f"Mirrored {memory_path} -> {path} (id: {entity_id})") print(f"Mirrored native memory into evolve store: {path}") + print(f"Entity id: {entity_id}") if __name__ == "__main__": diff --git a/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/entity_io.py b/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/entity_io.py index 9b177718..8887caf0 100644 --- a/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/entity_io.py +++ b/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/entity_io.py @@ -154,7 +154,7 @@ def unique_filename(directory, slug): # Markdown <-> dict conversion # --------------------------------------------------------------------------- -_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "visibility", "published_at") +_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "native_path", "visibility", "published_at") def entity_to_markdown(entity): @@ -354,12 +354,24 @@ def load_all_entities(entities_dir): return entities -def write_entity_file(directory, entity): +def write_entity_file(directory, entity, filename=None, overwrite=False): """Write a single entity as a markdown file under *directory*. The file is placed in a ``{type}/`` subdirectory. Uses atomic write (write to ``.tmp``, then ``os.rename``). + Args: + directory: Entities root directory. + entity: The entity dict to serialize. + filename: Optional explicit slug for the target file (without the + ``.md`` suffix). When omitted, the slug is derived from the + entity content (the historical default). + overwrite: When True, the entity is written to a deterministic + ``{type}/{filename}.md`` path, overwriting any existing file in + place (stable id, idempotent re-mirroring). When False (the + default), the historical collision-avoiding behavior is kept — + a ``-2``/``-3`` suffix is appended on collision. + Returns: Path to the written file. """ @@ -370,7 +382,7 @@ def write_entity_file(directory, entity): type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) - slug = slugify(entity.get("content", "entity")) + slug = filename if filename else slugify(entity.get("content", "entity")) content = entity_to_markdown(entity) # Write to a unique temp file first (avoids predictable .tmp collisions) @@ -381,6 +393,13 @@ def write_entity_file(directory, entity): os.close(fd) fd = None + if overwrite: + # Deterministic target: overwrite any existing file in place so + # the entity id is stable across re-mirroring. + target = type_dir / f"{slug}.md" + os.replace(tmp_path, target) + return target + # Atomically claim the target using O_EXCL; retry on race while True: target = unique_filename(type_dir, slug) diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py index fb90d93c..5bb8fb44 100644 --- a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py @@ -48,6 +48,7 @@ from entity_io import ( # noqa: E402 find_entities_dir, get_default_entities_dir, + slugify, write_entity_file, log as _log, ) @@ -58,14 +59,15 @@ def log(message): def parse_native_memory(text): - """Split a native memory file into (description, body). + """Split a native memory file into (name, description, body). Native frontmatter is simple ``key: value`` lines plus a nested - ``metadata:`` block; we only need ``description`` and the body, so we - parse the top-level ``description:`` line and treat everything after the - closing ``---`` as the body. Missing frontmatter is tolerated — the whole - text is then the body. + ``metadata:`` block; we parse the top-level ``name`` and ``description`` + lines and treat everything after the closing ``---`` as the body. The + ``name`` is the native slug we reuse as the stable entity id. Missing + frontmatter is tolerated — the whole text is then the body. """ + name = None description = None body = text if text.startswith("---"): @@ -74,16 +76,17 @@ def parse_native_memory(text): frontmatter, body = parts[1], parts[2] for line in frontmatter.splitlines(): # Only top-level keys (no leading indentation) — keeps the - # nested metadata.* keys out of the description match. + # nested metadata.* keys out of the top-level matches. if line[:1].isspace(): continue key, _, value = line.partition(":") - if key.strip() == "description": - value = value.strip() - if value: - description = value - break - return description, body.strip() + key = key.strip() + value = value.strip() + if key == "name" and value: + name = value + elif key == "description" and value: + description = value + return name, description, body.strip() def main(): @@ -112,7 +115,7 @@ def main(): print(f"Error: cannot read {memory_path} - {exc}", file=sys.stderr) sys.exit(1) - description, body = parse_native_memory(text) + name, description, body = parse_native_memory(text) if not body: print(f"Error: native memory {memory_path} has no body to mirror.", file=sys.stderr) sys.exit(1) @@ -123,11 +126,18 @@ def main(): if description and description not in body: content = f"{description}\n\n{body}" + # The native ``name`` becomes the stable, derivable entity slug so the + # entity id is ``/`` on both sides — provenance can map an + # audited native memory straight onto its mirror. Fall back to a + # content-derived slug only when the native frontmatter has no name. + slug = slugify(name) if name else slugify(content) + entity = { "type": args.type, "trigger": args.trigger, "content": content, "source": "native-memory", + "native_path": args.memory_path, } entities_dir = find_entities_dir() @@ -138,9 +148,14 @@ def main(): entities_dir = get_default_entities_dir() log(f"Created entities dir: {entities_dir}") - path = write_entity_file(entities_dir, entity) - log(f"Mirrored {memory_path} -> {path}") + # Deterministic, idempotent write: re-mirroring the same native memory + # (same name + type) overwrites /.md in place rather than + # creating -2.md, keeping the entity id stable. + path = write_entity_file(entities_dir, entity, filename=slug, overwrite=True) + entity_id = f"{entity['type']}/{slug}" + log(f"Mirrored {memory_path} -> {path} (id: {entity_id})") print(f"Mirrored native memory into evolve store: {path}") + print(f"Entity id: {entity_id}") if __name__ == "__main__": diff --git a/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/entity_io.py b/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/entity_io.py index 9b177718..8887caf0 100644 --- a/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/entity_io.py +++ b/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/entity_io.py @@ -154,7 +154,7 @@ def unique_filename(directory, slug): # Markdown <-> dict conversion # --------------------------------------------------------------------------- -_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "visibility", "published_at") +_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "native_path", "visibility", "published_at") def entity_to_markdown(entity): @@ -354,12 +354,24 @@ def load_all_entities(entities_dir): return entities -def write_entity_file(directory, entity): +def write_entity_file(directory, entity, filename=None, overwrite=False): """Write a single entity as a markdown file under *directory*. The file is placed in a ``{type}/`` subdirectory. Uses atomic write (write to ``.tmp``, then ``os.rename``). + Args: + directory: Entities root directory. + entity: The entity dict to serialize. + filename: Optional explicit slug for the target file (without the + ``.md`` suffix). When omitted, the slug is derived from the + entity content (the historical default). + overwrite: When True, the entity is written to a deterministic + ``{type}/{filename}.md`` path, overwriting any existing file in + place (stable id, idempotent re-mirroring). When False (the + default), the historical collision-avoiding behavior is kept — + a ``-2``/``-3`` suffix is appended on collision. + Returns: Path to the written file. """ @@ -370,7 +382,7 @@ def write_entity_file(directory, entity): type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) - slug = slugify(entity.get("content", "entity")) + slug = filename if filename else slugify(entity.get("content", "entity")) content = entity_to_markdown(entity) # Write to a unique temp file first (avoids predictable .tmp collisions) @@ -381,6 +393,13 @@ def write_entity_file(directory, entity): os.close(fd) fd = None + if overwrite: + # Deterministic target: overwrite any existing file in place so + # the entity id is stable across re-mirroring. + target = type_dir / f"{slug}.md" + os.replace(tmp_path, target) + return target + # Atomically claim the target using O_EXCL; retry on race while True: target = unique_filename(type_dir, slug) diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py index fb90d93c..5bb8fb44 100644 --- a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py +++ b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py @@ -48,6 +48,7 @@ from entity_io import ( # noqa: E402 find_entities_dir, get_default_entities_dir, + slugify, write_entity_file, log as _log, ) @@ -58,14 +59,15 @@ def log(message): def parse_native_memory(text): - """Split a native memory file into (description, body). + """Split a native memory file into (name, description, body). Native frontmatter is simple ``key: value`` lines plus a nested - ``metadata:`` block; we only need ``description`` and the body, so we - parse the top-level ``description:`` line and treat everything after the - closing ``---`` as the body. Missing frontmatter is tolerated — the whole - text is then the body. + ``metadata:`` block; we parse the top-level ``name`` and ``description`` + lines and treat everything after the closing ``---`` as the body. The + ``name`` is the native slug we reuse as the stable entity id. Missing + frontmatter is tolerated — the whole text is then the body. """ + name = None description = None body = text if text.startswith("---"): @@ -74,16 +76,17 @@ def parse_native_memory(text): frontmatter, body = parts[1], parts[2] for line in frontmatter.splitlines(): # Only top-level keys (no leading indentation) — keeps the - # nested metadata.* keys out of the description match. + # nested metadata.* keys out of the top-level matches. if line[:1].isspace(): continue key, _, value = line.partition(":") - if key.strip() == "description": - value = value.strip() - if value: - description = value - break - return description, body.strip() + key = key.strip() + value = value.strip() + if key == "name" and value: + name = value + elif key == "description" and value: + description = value + return name, description, body.strip() def main(): @@ -112,7 +115,7 @@ def main(): print(f"Error: cannot read {memory_path} - {exc}", file=sys.stderr) sys.exit(1) - description, body = parse_native_memory(text) + name, description, body = parse_native_memory(text) if not body: print(f"Error: native memory {memory_path} has no body to mirror.", file=sys.stderr) sys.exit(1) @@ -123,11 +126,18 @@ def main(): if description and description not in body: content = f"{description}\n\n{body}" + # The native ``name`` becomes the stable, derivable entity slug so the + # entity id is ``/`` on both sides — provenance can map an + # audited native memory straight onto its mirror. Fall back to a + # content-derived slug only when the native frontmatter has no name. + slug = slugify(name) if name else slugify(content) + entity = { "type": args.type, "trigger": args.trigger, "content": content, "source": "native-memory", + "native_path": args.memory_path, } entities_dir = find_entities_dir() @@ -138,9 +148,14 @@ def main(): entities_dir = get_default_entities_dir() log(f"Created entities dir: {entities_dir}") - path = write_entity_file(entities_dir, entity) - log(f"Mirrored {memory_path} -> {path}") + # Deterministic, idempotent write: re-mirroring the same native memory + # (same name + type) overwrites /.md in place rather than + # creating -2.md, keeping the entity id stable. + path = write_entity_file(entities_dir, entity, filename=slug, overwrite=True) + entity_id = f"{entity['type']}/{slug}" + log(f"Mirrored {memory_path} -> {path} (id: {entity_id})") print(f"Mirrored native memory into evolve store: {path}") + print(f"Entity id: {entity_id}") if __name__ == "__main__": diff --git a/plugin-source/EVOLVE.md.j2 b/plugin-source/EVOLVE.md.j2 index 0930e2ea..b6d3b3a9 100644 --- a/plugin-source/EVOLVE.md.j2 +++ b/plugin-source/EVOLVE.md.j2 @@ -17,18 +17,20 @@ once per memory you saved. ## Record what you consulted -After you read or consult memories this turn, log which ones you actually opened, -so the value of this memory can be measured over time. Run: +After you read or consult native memories this turn, log which ones you actually +opened, so the value of this memory can be measured over time. Run: ``` -python3 {{ audit_script }} [ ...] +python3 {{ audit_script }} [ ...] ``` -Pass the memory files you read this turn (space-separated paths, relative to the -project root). Skip this step entirely if you consulted no memories. If the -command prints a line beginning `evolve-session:`, include that line once, -verbatim, somewhere in your reply — it lets later analysis tie this session to -what you recalled. +Pass the entity id `/` for each native memory you consulted, where +`` is the memory's `metadata.type` and `` is its top-level `name` +field — this is the id provenance resolves to `./.evolve/entities//.md` +(the same id {{ skill_ref('adapt-memory') }} mirrors to). Skip this step entirely +if you consulted no memories. If the command prints a line beginning +`evolve-session:`, include that line once, verbatim, somewhere in your reply — it +lets later analysis tie this session to what you recalled. {%- else -%} # Evolve — self-directed memory diff --git a/plugin-source/lib/entity_io.py b/plugin-source/lib/entity_io.py index 9b177718..8887caf0 100644 --- a/plugin-source/lib/entity_io.py +++ b/plugin-source/lib/entity_io.py @@ -154,7 +154,7 @@ def unique_filename(directory, slug): # Markdown <-> dict conversion # --------------------------------------------------------------------------- -_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "visibility", "published_at") +_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "native_path", "visibility", "published_at") def entity_to_markdown(entity): @@ -354,12 +354,24 @@ def load_all_entities(entities_dir): return entities -def write_entity_file(directory, entity): +def write_entity_file(directory, entity, filename=None, overwrite=False): """Write a single entity as a markdown file under *directory*. The file is placed in a ``{type}/`` subdirectory. Uses atomic write (write to ``.tmp``, then ``os.rename``). + Args: + directory: Entities root directory. + entity: The entity dict to serialize. + filename: Optional explicit slug for the target file (without the + ``.md`` suffix). When omitted, the slug is derived from the + entity content (the historical default). + overwrite: When True, the entity is written to a deterministic + ``{type}/{filename}.md`` path, overwriting any existing file in + place (stable id, idempotent re-mirroring). When False (the + default), the historical collision-avoiding behavior is kept — + a ``-2``/``-3`` suffix is appended on collision. + Returns: Path to the written file. """ @@ -370,7 +382,7 @@ def write_entity_file(directory, entity): type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) - slug = slugify(entity.get("content", "entity")) + slug = filename if filename else slugify(entity.get("content", "entity")) content = entity_to_markdown(entity) # Write to a unique temp file first (avoids predictable .tmp collisions) @@ -381,6 +393,13 @@ def write_entity_file(directory, entity): os.close(fd) fd = None + if overwrite: + # Deterministic target: overwrite any existing file in place so + # the entity id is stable across re-mirroring. + target = type_dir / f"{slug}.md" + os.replace(tmp_path, target) + return target + # Atomically claim the target using O_EXCL; retry on race while True: target = unique_filename(type_dir, slug) diff --git a/plugin-source/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py b/plugin-source/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py index fb90d93c..5bb8fb44 100644 --- a/plugin-source/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py +++ b/plugin-source/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py @@ -48,6 +48,7 @@ from entity_io import ( # noqa: E402 find_entities_dir, get_default_entities_dir, + slugify, write_entity_file, log as _log, ) @@ -58,14 +59,15 @@ def log(message): def parse_native_memory(text): - """Split a native memory file into (description, body). + """Split a native memory file into (name, description, body). Native frontmatter is simple ``key: value`` lines plus a nested - ``metadata:`` block; we only need ``description`` and the body, so we - parse the top-level ``description:`` line and treat everything after the - closing ``---`` as the body. Missing frontmatter is tolerated — the whole - text is then the body. + ``metadata:`` block; we parse the top-level ``name`` and ``description`` + lines and treat everything after the closing ``---`` as the body. The + ``name`` is the native slug we reuse as the stable entity id. Missing + frontmatter is tolerated — the whole text is then the body. """ + name = None description = None body = text if text.startswith("---"): @@ -74,16 +76,17 @@ def parse_native_memory(text): frontmatter, body = parts[1], parts[2] for line in frontmatter.splitlines(): # Only top-level keys (no leading indentation) — keeps the - # nested metadata.* keys out of the description match. + # nested metadata.* keys out of the top-level matches. if line[:1].isspace(): continue key, _, value = line.partition(":") - if key.strip() == "description": - value = value.strip() - if value: - description = value - break - return description, body.strip() + key = key.strip() + value = value.strip() + if key == "name" and value: + name = value + elif key == "description" and value: + description = value + return name, description, body.strip() def main(): @@ -112,7 +115,7 @@ def main(): print(f"Error: cannot read {memory_path} - {exc}", file=sys.stderr) sys.exit(1) - description, body = parse_native_memory(text) + name, description, body = parse_native_memory(text) if not body: print(f"Error: native memory {memory_path} has no body to mirror.", file=sys.stderr) sys.exit(1) @@ -123,11 +126,18 @@ def main(): if description and description not in body: content = f"{description}\n\n{body}" + # The native ``name`` becomes the stable, derivable entity slug so the + # entity id is ``/`` on both sides — provenance can map an + # audited native memory straight onto its mirror. Fall back to a + # content-derived slug only when the native frontmatter has no name. + slug = slugify(name) if name else slugify(content) + entity = { "type": args.type, "trigger": args.trigger, "content": content, "source": "native-memory", + "native_path": args.memory_path, } entities_dir = find_entities_dir() @@ -138,9 +148,14 @@ def main(): entities_dir = get_default_entities_dir() log(f"Created entities dir: {entities_dir}") - path = write_entity_file(entities_dir, entity) - log(f"Mirrored {memory_path} -> {path}") + # Deterministic, idempotent write: re-mirroring the same native memory + # (same name + type) overwrites /.md in place rather than + # creating -2.md, keeping the entity id stable. + path = write_entity_file(entities_dir, entity, filename=slug, overwrite=True) + entity_id = f"{entity['type']}/{slug}" + log(f"Mirrored {memory_path} -> {path} (id: {entity_id})") print(f"Mirrored native memory into evolve store: {path}") + print(f"Entity id: {entity_id}") if __name__ == "__main__": diff --git a/tests/platform_integrations/test_entity_io_core.py b/tests/platform_integrations/test_entity_io_core.py index 29586878..2bf467c6 100644 --- a/tests/platform_integrations/test_entity_io_core.py +++ b/tests/platform_integrations/test_entity_io_core.py @@ -4,20 +4,32 @@ covers the serialization and I/O functions needed by the sharing feature. """ +import importlib.util import sys from pathlib import Path import pytest -sys.path.insert( - 0, - str(Path(__file__).parent.parent.parent / "platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite"), -) -import entity_io +_CLAUDE_PLUGIN = Path(__file__).parent.parent.parent / "platform-integrations/claude/plugins/evolve-lite" +sys.path.insert(0, str(_CLAUDE_PLUGIN / "lib/evolve-lite")) +import entity_io # noqa: E402 pytestmark = [pytest.mark.platform_integrations, pytest.mark.unit] +def _load_adapt_memory(): + """Load the rendered Claude adapt_memory.py as a module. + + Its lib resolution only works in the rendered tree (it walks up to find + ``lib/evolve-lite/entity_io.py``), so we import the rendered copy. + """ + path = _CLAUDE_PLUGIN / "skills/evolve-lite/adapt-memory/scripts/adapt_memory.py" + spec = importlib.util.spec_from_file_location("adapt_memory_rendered", path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + class TestSlugify: def test_lowercases_and_replaces_spaces(self): assert entity_io.slugify("Hello World") == "hello-world" @@ -151,6 +163,81 @@ def test_no_collision_on_duplicate_slug(self, tmp_path): assert path1.exists() assert path2.exists() + def test_explicit_filename_default_mode_still_suffixes_on_collision(self, tmp_path): + # Default (overwrite=False) behavior is unchanged even with an + # explicit filename: a second write gets a -2 suffix. + entity = {"type": "feedback", "content": "First."} + path1 = entity_io.write_entity_file(tmp_path, entity, filename="my-slug") + path2 = entity_io.write_entity_file(tmp_path, {"type": "feedback", "content": "Second."}, filename="my-slug") + assert path1 == tmp_path / "feedback" / "my-slug.md" + assert path2 == tmp_path / "feedback" / "my-slug-2.md" + + def test_overwrite_mode_writes_deterministic_path_in_place(self, tmp_path): + path1 = entity_io.write_entity_file(tmp_path, {"type": "feedback", "content": "First."}, filename="my-slug", overwrite=True) + path2 = entity_io.write_entity_file(tmp_path, {"type": "feedback", "content": "Second."}, filename="my-slug", overwrite=True) + assert path1 == path2 == tmp_path / "feedback" / "my-slug.md" + assert "Second." in path2.read_text() + assert not (tmp_path / "feedback" / "my-slug-2.md").exists() + + +class TestAdaptMemory: + """Integration tests against the rendered Claude adapt_memory.py.""" + + def _write_native(self, tmp_path, name, mem_type, body, description=None): + lines = ["---"] + if name is not None: + lines.append(f"name: {name}") + if description is not None: + lines.append(f"description: {description}") + lines += ["metadata:", f" type: {mem_type}", "---", "", body, ""] + native = tmp_path / "memory.md" + native.write_text("\n".join(lines), encoding="utf-8") + return native + + def _run(self, adapt, native, mem_type, trigger, monkeypatch, tmp_path): + monkeypatch.chdir(tmp_path) + monkeypatch.setattr(sys, "argv", ["adapt_memory.py", str(native), "--type", mem_type, "--trigger", trigger]) + adapt.main() + + def test_id_is_type_slash_name_and_native_path_stamped(self, tmp_path, monkeypatch, capsys): + adapt = _load_adapt_memory() + native = self._write_native(tmp_path, "my-fact", "feedback", "Always rebase.", "A short hook") + self._run(adapt, native, "feedback", "when rebasing", monkeypatch, tmp_path) + + out = capsys.readouterr().out + assert "Entity id: feedback/my-fact" in out + + entity_file = tmp_path / ".evolve" / "entities" / "feedback" / "my-fact.md" + assert entity_file.exists() + parsed = entity_io.markdown_to_entity(entity_file) + assert parsed["native_path"] == str(native) + assert parsed["source"] == "native-memory" + assert parsed["type"] == "feedback" + + def test_deterministic_overwrite_on_same_name_and_type(self, tmp_path, monkeypatch, capsys): + adapt = _load_adapt_memory() + native = self._write_native(tmp_path, "my-fact", "feedback", "First version.") + self._run(adapt, native, "feedback", "trig", monkeypatch, tmp_path) + capsys.readouterr() + + native.write_text("---\nname: my-fact\nmetadata:\n type: feedback\n---\n\nSecond version.\n", encoding="utf-8") + self._run(adapt, native, "feedback", "trig", monkeypatch, tmp_path) + + feedback_dir = tmp_path / ".evolve" / "entities" / "feedback" + files = sorted(p.name for p in feedback_dir.glob("*.md")) + assert files == ["my-fact.md"] # no my-fact-2.md + assert "Second version." in (feedback_dir / "my-fact.md").read_text() + + def test_falls_back_to_content_slug_when_name_missing(self, tmp_path, monkeypatch, capsys): + adapt = _load_adapt_memory() + native = self._write_native(tmp_path, None, "project", "Use deterministic builds everywhere.") + self._run(adapt, native, "project", "when building", monkeypatch, tmp_path) + + out = capsys.readouterr().out + expected_slug = entity_io.slugify("Use deterministic builds everywhere.") + assert f"Entity id: project/{expected_slug}" in out + assert (tmp_path / ".evolve" / "entities" / "project" / f"{expected_slug}.md").exists() + class TestLoadAllEntities: def test_loads_from_nested_type_dirs(self, temp_project_dir): From 7fa1d0e6d5e81722669cf4fd599fa3cf04d6a7c2 Mon Sep 17 00:00:00 2001 From: Punleuk Oum <5661986+illeatmyhat@users.noreply.github.com> Date: Mon, 8 Jun 2026 13:30:48 -0700 Subject: [PATCH 06/12] feat(platform-integrations): automate provenance matching with native-transcript awareness Provenance was a fully-manual procedure with no deterministic plumbing, so the recall->entity->trajectory loop couldn't be closed or tested. Add provenance.py: - candidates: read audit recall rows, skip already-influenced pairs, resolve each entity id / to its file, locate the session trajectory, and emit JSONL judgment candidates (entities/trajectories that can't be found are emitted with a missing:[...] field, never silently dropped) - record: validate + persist an influence verdict via the existing log_influence writer (no duplicated write logic) - trajectory locator now also reads the NATIVE Claude transcript at ~/.claude/projects//.jsonl (slug logic shared with doctor), so provenance works in the hookless world where no .evolve/trajectories/ is written The semantic verdict (followed/contradicted/not_applicable) stays agent-driven; provenance.py does only the deterministic matching/resolution + recording. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../skills/evolve-lite-provenance/SKILL.md | 117 ++++-- .../scripts/provenance.py | 358 ++++++++++++++++++ .../skills/evolve-lite/provenance/SKILL.md | 117 ++++-- .../provenance/scripts/provenance.py | 358 ++++++++++++++++++ .../skills/evolve-lite/provenance/SKILL.md | 117 ++++-- .../provenance/scripts/provenance.py | 358 ++++++++++++++++++ .../skills/evolve-lite/provenance/SKILL.md | 117 ++++-- .../provenance/scripts/provenance.py | 358 ++++++++++++++++++ .../skills/evolve-lite/provenance/SKILL.md.j2 | 117 ++++-- .../provenance/scripts/provenance.py | 358 ++++++++++++++++++ .../platform_integrations/test_provenance.py | 222 +++++++++++ 11 files changed, 2427 insertions(+), 170 deletions(-) create mode 100644 platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/scripts/provenance.py create mode 100644 platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py create mode 100644 platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py create mode 100644 platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py create mode 100644 plugin-source/skills/evolve-lite/provenance/scripts/provenance.py create mode 100644 tests/platform_integrations/test_provenance.py diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/SKILL.md b/platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/SKILL.md index 25ee891a..af6fdd3b 100644 --- a/platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/SKILL.md +++ b/platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/SKILL.md @@ -7,58 +7,107 @@ description: Analyze saved trajectories and recall audit events offline to recor ## Overview -This skill runs after one or more sessions have completed. It reads saved trajectories from `.evolve/trajectories/`, matches them to `recall` events in `.evolve/audit.log`, and records post-hoc `influence` events for recalled guidelines. +This skill runs after one or more sessions have completed. It reads `recall` +events from `.evolve/audit.log`, locates each session's trajectory, and records +post-hoc `influence` events for the recalled guidelines. -Use this skill when you want to compute usage provenance without coupling the work to the live learn step. +The mechanical work — reading recall rows, skipping already-assessed pairs, +resolving entity files, and locating trajectories — is done deterministically by +`provenance.py candidates`. Your job is the judgment: read each candidate and +decide whether the recalled guideline was `followed`, `contradicted`, or +`not_applicable`, then persist that verdict. -## Workflow - -### Step 1: Load Recall Events - -Read `.evolve/audit.log` as JSONL. Find entries where `event == "recall"` and `entities` is a non-empty list. - -Skip any recall event that already has `influence` entries for the same `session_id` and entity ids. Do not write duplicate influence records. - -### Step 2: Locate Saved Trajectories +Use this skill when you want to compute usage provenance without coupling the +work to the live learn step. -List `.evolve/trajectories/` and match each recall event to a trajectory by `session_id`. - -Matching strategy (in order): -1. `claude-transcript_.jsonl` - the stop-hook transcript dump; the session id is in the filename. -2. `trajectory__.json` - written by the evolve-lite:save-trajectory skill when a session id is available. Match on the `` slice of the filename. -3. `trajectory_.json` - open the file and match its top-level `session_id` field against the recall event. Only fall back to this step when the filename alone does not identify the session. - -If none of the above yields a confident match for a recall event, skip it. Do not guess. - -### Step 3: Read Recalled Entities +## Workflow -For each recalled entity id, open `.evolve/entities/.md`. The id is a path relative to `.evolve/entities/` without the `.md` suffix, such as `guideline/foo` or `subscribed/alice/guideline/foo`. +### Step 1: Get candidates -Read the entity content and trigger. Skip ids whose files are missing. +Run the candidate builder. It emits one JSON object per line (JSONL), one per +unresolved `(session_id, entity)` recall pair: -### Step 4: Assess Influence +```bash +python3 .bob/skills/evolve-lite-provenance/scripts/provenance.py candidates +``` -Compare each recalled entity with the matched trajectory. Pick exactly one verdict: +Each candidate looks like: -- `followed` - the agent's actual actions are consistent with the guideline. -- `contradicted` - the guideline applied, but the agent did the opposite or repeated the avoidable dead end. -- `not_applicable` - the guideline was recalled but did not apply to this session. +```json +{ + "session_id": "", + "entity_id": "/", + "entity_excerpt": "", + "trajectory_path": "/path/to/transcript.jsonl", + "trajectory_excerpt": "", + "missing": ["trajectory"] +} +``` -Keep `evidence` to one short sentence citing a concrete action, tool call, or absence in the trajectory. +Notes: + +- `entity_id` is the path relative to `.evolve/entities/` without the `.md` + suffix, e.g. `feedback/foo`, `guideline/bar`, or + `subscribed/alice/guideline/baz`. +- Pairs that already have an `influence` row are skipped for you — the builder + reuses the same dedup rule used when influence rows are written. You will + never be handed a duplicate. +- The trajectory locator checks `.evolve/trajectories/` first, then falls back + to the native Claude transcript at + `~/.claude/projects//.jsonl`. This means provenance works + even when no `.evolve/trajectories/` file was written. +- If an entity file or trajectory cannot be found, the candidate is still + emitted with a `missing: [...]` field so the gap is visible. When the + trajectory is missing you usually cannot judge the pair — skip it (do not + guess), unless the entity content alone makes `not_applicable` certain. + +### Step 2: Judge each candidate + +For each candidate, read `entity_excerpt` (and open `trajectory_path` for the +full transcript if the excerpt is not enough). Compare the recalled guideline +against the agent's actual actions in the trajectory and pick exactly one +verdict: + +- `followed` — the agent's actual actions are consistent with the guideline. +- `contradicted` — the guideline applied, but the agent did the opposite or + repeated the avoidable dead end. +- `not_applicable` — the guideline was recalled but did not apply to this + session. + +Keep `evidence` to one short sentence citing a concrete action, tool call, or +absence in the trajectory. This judgment is yours — there is no heuristic +fallback. + +### Step 3: Record verdicts + +Persist each verdict. Either pipe one verdict per call to `provenance.py +record`: -### Step 5: Write Influence Events +```bash +echo '{ + "session_id": "", + "entity": "/", + "verdict": "followed", + "evidence": "Agent used the saved parser before trying shell fallbacks." +}' | python3 .bob/skills/evolve-lite-provenance/scripts/provenance.py record +``` -Pipe one JSON payload per assessed session to the helper: +…or, to batch many assessments for one session in a single call, pipe to the +underlying writer directly: ```bash echo '{ "session_id": "", "assessments": [ - {"entity": "guideline/", "verdict": "followed", "evidence": "Agent used the saved parser before trying shell fallbacks."} + {"entity": "feedback/foo", "verdict": "followed", "evidence": "Agent followed it."}, + {"entity": "guideline/bar", "verdict": "not_applicable", "evidence": "Did not apply."} ] }' | python3 .bob/skills/evolve-lite-provenance/scripts/log_influence.py ``` -The `entity` value must match exactly what appeared in the recall event, including any `subscribed//` prefix. +Both paths write the identical `influence` audit row and skip duplicates. The +`entity` value must match the candidate's `entity_id` exactly, including any +`subscribed//` prefix. -It is valid to emit an empty `assessments` list when recall events exist but no recalled guideline can be assessed. +It is valid to record nothing when recall events exist but no recalled guideline +can be assessed (e.g. every candidate is missing its trajectory). diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/scripts/provenance.py b/platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/scripts/provenance.py new file mode 100644 index 00000000..21ed024e --- /dev/null +++ b/platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/scripts/provenance.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 +"""Deterministic provenance plumbing for the evolve-lite provenance skill. + +This script does the *mechanical* half of provenance — the part that can be +made deterministic and tested end to end: + + * read ``recall`` rows from ``.evolve/audit.log``, + * skip ``(session_id, entity)`` pairs that already have an ``influence`` row, + * resolve each recalled entity file and the session's trajectory transcript, + * assemble candidate dicts the agent can judge. + +The *semantic* half — deciding whether a recalled guideline was ``followed``, +``contradicted`` or ``not_applicable`` — is an LLM judgment and is NOT done +here. There is deliberately no heuristic verdict: this module never invents a +verdict. The agent reads each candidate, judges it, and pipes the verdict back +through ``record`` (which delegates to ``log_influence.py``'s writer so the +audit-log format is identical). + +Two modes: + + candidates (default) — emit one JSONL candidate per unresolved + (session_id, entity) recall pair to stdout. + record — read a verdict JSON from stdin and append an + ``influence`` row via log_influence.py's writer. +""" + +import json +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) + +# Provenance reuses log_influence.py's writer + dedup so the audit-log format and +# the duplicate-suppression rule live in exactly one place. log_influence.py sits +# next to this file in the same skill scripts/ directory. +sys.path.insert(0, str(_script.parent)) + +from entity_io import get_evolve_dir, log as _log # noqa: E402 +import log_influence # noqa: E402 + +_ALLOWED_VERDICTS = log_influence._ALLOWED_VERDICTS + +# How many characters of the entity file / trajectory to surface in a candidate. +_ENTITY_EXCERPT_CHARS = 4000 +_TRAJECTORY_EXCERPT_CHARS = 4000 + + +def log(message): + _log("provenance", message) + + +# --------------------------------------------------------------------------- +# Trajectory locator (Task B) +# --------------------------------------------------------------------------- + + +def _claude_transcript_slug(root): + """Claude derives a project's transcript dir name by replacing every + non-alphanumeric character in the absolute project path with ``-``. + + e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen + + This mirrors ``_transcript_slug`` in the doctor skill + (skills/evolve-lite/doctor/scripts/doctor.py). The two are kept in sync by + hand because doctor and provenance ship as independent scripts that do not + import one another in the rendered tree; if you change one, change both. + """ + import re + + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def locate_trajectory(session_id, evolve_dir, *, project_root=None, home=None): + """Locate the saved trajectory transcript for ``session_id``. + + Resolution order (best-effort, returns the first hit or ``None``): + + 1. Legacy ``.evolve/trajectories/`` files: + * ``claude-transcript_.jsonl`` — stop-hook transcript dump. + * ``trajectory__.json`` — save-trajectory skill output; the sid + is the filename slice after the timestamp. + * ``trajectory_.json`` — open and match the inner ``session_id``. + 2. NEW native Claude transcript: ``~/.claude/projects//.jsonl`` + where ```` is the project root path slugified the way Claude does + (every non-alphanumeric char -> ``-``; see ``_claude_transcript_slug``). + + Native discovery makes provenance work in the hookless world where no + ``.evolve/trajectories/`` file is ever written. It is platform-neutral: + Bob/Codex keep their transcripts elsewhere, so the native step simply falls + through to ``None`` for them rather than misfiring. + """ + evolve_dir = Path(evolve_dir) + + # --- 1. Legacy .evolve/trajectories/ ------------------------------------ + traj_dir = evolve_dir / "trajectories" + if traj_dir.is_dir(): + direct = traj_dir / f"claude-transcript_{session_id}.jsonl" + if direct.is_file(): + return direct + + # trajectory__.json — match on the filename sid slice. + for path in sorted(traj_dir.glob("trajectory_*_*.json")): + stem = path.stem # trajectory__ + parts = stem.split("_", 2) + if len(parts) == 3 and parts[2] == session_id: + return path + + # trajectory_.json — open and match the inner session_id field. + for path in sorted(traj_dir.glob("trajectory_*.json")): + # Skip the _ shape already handled above. + if path.stem.count("_") >= 2: + continue + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + continue + if isinstance(data, dict) and data.get("session_id") == session_id: + return path + + # --- 2. Native Claude transcript ---------------------------------------- + # The project root is the parent of the .evolve dir; the home dir holds + # ~/.claude/projects//.jsonl. + root = Path(project_root) if project_root is not None else evolve_dir.resolve().parent + base = Path(home) if home is not None else Path.home() + slug = _claude_transcript_slug(root) + native = base / ".claude" / "projects" / slug / f"{session_id}.jsonl" + if native.is_file(): + return native + + return None + + +# --------------------------------------------------------------------------- +# Recall row reading + entity resolution (Task A — candidates) +# --------------------------------------------------------------------------- + + +def read_recall_rows(evolve_dir): + """Yield ``(session_id, [entity_id, ...])`` for every ``recall`` audit row. + + Rows with no ``session_id`` or an empty ``entities`` list are skipped. + """ + audit_log = Path(evolve_dir) / "audit.log" + if not audit_log.is_file(): + return [] + + rows = [] + for line in audit_log.read_text(encoding="utf-8").splitlines(): + if not line.strip(): + continue + try: + event = json.loads(line) + except json.JSONDecodeError: + continue + if not isinstance(event, dict) or event.get("event") != "recall": + continue + session_id = event.get("session_id") + entities = event.get("entities") + if not isinstance(session_id, str) or not session_id: + continue + if not isinstance(entities, list) or not entities: + continue + clean = [e for e in entities if isinstance(e, str) and e] + if clean: + rows.append((session_id, clean)) + return rows + + +def _read_entity(evolve_dir, entity_id): + """Return ``(path, excerpt)`` for an entity file, or ``(path, None)`` if + the file is missing. ``entity_id`` is a ``/`` id relative to + ``entities/`` (without ``.md``). + """ + entity_path = Path(evolve_dir) / "entities" / f"{entity_id}.md" + if not entity_path.is_file(): + return entity_path, None + try: + text = entity_path.read_text(encoding="utf-8") + except OSError: + return entity_path, None + return entity_path, text[:_ENTITY_EXCERPT_CHARS] + + +def _read_trajectory_excerpt(trajectory_path): + """Return a bounded text excerpt of the trajectory file, or ``None``.""" + if trajectory_path is None: + return None + try: + text = Path(trajectory_path).read_text(encoding="utf-8") + except OSError: + return None + return text[:_TRAJECTORY_EXCERPT_CHARS] + + +def build_candidates(evolve_dir, *, project_root=None, home=None): + """Assemble candidate dicts for every unresolved recall (session, entity). + + Returns a list of dicts shaped:: + + { + "session_id": ..., + "entity_id": "/", + "entity_excerpt": , + "trajectory_path": , + "trajectory_excerpt": , + "missing": ["entity"|"trajectory", ...], # only when non-empty + } + + ``(session_id, entity)`` pairs that already have an ``influence`` row are + skipped via ``log_influence.existing_influence_keys`` — the same dedup rule + used when influence rows are written. Candidates whose entity file or + trajectory cannot be found are still emitted with a ``missing`` list so the + gap is visible rather than silently dropped. + """ + evolve_dir = Path(evolve_dir) + existing = log_influence.existing_influence_keys(evolve_dir) + + candidates = [] + for session_id, entities in read_recall_rows(evolve_dir): + trajectory_path = locate_trajectory(session_id, evolve_dir, project_root=project_root, home=home) + for entity_id in entities: + if (session_id, entity_id) in existing: + continue + entity_path, entity_excerpt = _read_entity(evolve_dir, entity_id) + trajectory_excerpt = _read_trajectory_excerpt(trajectory_path) + + missing = [] + if entity_excerpt is None: + missing.append("entity") + if trajectory_path is None: + missing.append("trajectory") + + candidate = { + "session_id": session_id, + "entity_id": entity_id, + "entity_excerpt": entity_excerpt, + "trajectory_path": str(trajectory_path) if trajectory_path else None, + "trajectory_excerpt": trajectory_excerpt, + } + if missing: + candidate["missing"] = missing + candidates.append(candidate) + return candidates + + +# --------------------------------------------------------------------------- +# record (Task A — record) +# --------------------------------------------------------------------------- + + +def record_verdict(payload, evolve_dir=None): + """Append a single ``influence`` row from an agent verdict. + + ``payload`` is ``{session_id, entity, verdict, evidence}``. The verdict must + be one of ``followed|contradicted|not_applicable`` (the *semantic* judgment + stays agent-driven — this only persists what the agent decided). Writing is + delegated to ``log_influence.py`` so the audit-log row format and the + duplicate-suppression rule are not duplicated here. + + Returns the number of rows written (0 or 1). Raises ``ValueError`` on an + invalid payload / verdict. + """ + if not isinstance(payload, dict): + raise ValueError("verdict payload must be a JSON object") + + session_id = payload.get("session_id") + entity = payload.get("entity") + verdict = payload.get("verdict") + evidence = payload.get("evidence", "") + + if not isinstance(session_id, str) or not session_id: + raise ValueError("verdict payload must include a non-empty string session_id") + if not isinstance(entity, str) or not entity: + raise ValueError("verdict payload must include a non-empty string entity") + if verdict not in _ALLOWED_VERDICTS: + raise ValueError(f"verdict must be one of {sorted(_ALLOWED_VERDICTS)}, got {verdict!r}") + + if evolve_dir is None: + evolve_dir = get_evolve_dir().resolve() + evolve_dir = Path(evolve_dir) + + existing = log_influence.existing_influence_keys(evolve_dir) + if (session_id, entity) in existing: + log(f"Skipping duplicate influence verdict: session_id={session_id} entity={entity}") + return 0 + + if not isinstance(evidence, str): + evidence = str(evidence) + + log_influence.audit.append( + evolve_dir=str(evolve_dir), + event="influence", + session_id=session_id, + entity=entity, + verdict=verdict, + evidence=evidence, + ) + return 1 + + +# --------------------------------------------------------------------------- +# __main__ +# --------------------------------------------------------------------------- + + +def _run_candidates(): + evolve_dir = get_evolve_dir().resolve() + candidates = build_candidates(evolve_dir) + for candidate in candidates: + print(json.dumps(candidate)) + log(f"Emitted {len(candidates)} candidate(s) from {evolve_dir}") + + +def _run_record(): + try: + payload = json.load(sys.stdin) + except json.JSONDecodeError as exc: + log(f"Invalid JSON input: {exc}") + print(f"Error: invalid JSON input - {exc}", file=sys.stderr) + sys.exit(1) + + try: + written = record_verdict(payload) + except ValueError as exc: + log(f"Rejected verdict: {exc}") + print(f"Error: {exc}", file=sys.stderr) + sys.exit(1) + + log(f"Recorded {written} influence verdict(s).") + print(f"Recorded {written} influence verdict(s).") + + +def main(argv=None): + argv = list(sys.argv[1:] if argv is None else argv) + mode = argv[0] if argv else "candidates" + if mode == "candidates": + _run_candidates() + elif mode == "record": + _run_record() + else: + print(f"Error: unknown mode {mode!r}; expected 'candidates' or 'record'.", file=sys.stderr) + sys.exit(2) + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md index e6ff7825..32cd6d08 100644 --- a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md @@ -7,58 +7,107 @@ description: Analyze saved trajectories and recall audit events offline to recor ## Overview -This skill runs after one or more sessions have completed. It reads saved trajectories from `.evolve/trajectories/`, matches them to `recall` events in `.evolve/audit.log`, and records post-hoc `influence` events for recalled guidelines. +This skill runs after one or more sessions have completed. It reads `recall` +events from `.evolve/audit.log`, locates each session's trajectory, and records +post-hoc `influence` events for the recalled guidelines. -Use this skill when you want to compute usage provenance without coupling the work to the live learn step. +The mechanical work — reading recall rows, skipping already-assessed pairs, +resolving entity files, and locating trajectories — is done deterministically by +`provenance.py candidates`. Your job is the judgment: read each candidate and +decide whether the recalled guideline was `followed`, `contradicted`, or +`not_applicable`, then persist that verdict. -## Workflow - -### Step 1: Load Recall Events - -Read `.evolve/audit.log` as JSONL. Find entries where `event == "recall"` and `entities` is a non-empty list. - -Skip any recall event that already has `influence` entries for the same `session_id` and entity ids. Do not write duplicate influence records. - -### Step 2: Locate Saved Trajectories +Use this skill when you want to compute usage provenance without coupling the +work to the live learn step. -List `.evolve/trajectories/` and match each recall event to a trajectory by `session_id`. - -Matching strategy (in order): -1. `claude-transcript_.jsonl` - the stop-hook transcript dump; the session id is in the filename. -2. `trajectory__.json` - written by the /evolve-lite:save-trajectory skill when a session id is available. Match on the `` slice of the filename. -3. `trajectory_.json` - open the file and match its top-level `session_id` field against the recall event. Only fall back to this step when the filename alone does not identify the session. - -If none of the above yields a confident match for a recall event, skip it. Do not guess. - -### Step 3: Read Recalled Entities +## Workflow -For each recalled entity id, open `.evolve/entities/.md`. The id is a path relative to `.evolve/entities/` without the `.md` suffix, such as `guideline/foo` or `subscribed/alice/guideline/foo`. +### Step 1: Get candidates -Read the entity content and trigger. Skip ids whose files are missing. +Run the candidate builder. It emits one JSON object per line (JSONL), one per +unresolved `(session_id, entity)` recall pair: -### Step 4: Assess Influence +```bash +python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/provenance/scripts/provenance.py candidates +``` -Compare each recalled entity with the matched trajectory. Pick exactly one verdict: +Each candidate looks like: -- `followed` - the agent's actual actions are consistent with the guideline. -- `contradicted` - the guideline applied, but the agent did the opposite or repeated the avoidable dead end. -- `not_applicable` - the guideline was recalled but did not apply to this session. +```json +{ + "session_id": "", + "entity_id": "/", + "entity_excerpt": "", + "trajectory_path": "/path/to/transcript.jsonl", + "trajectory_excerpt": "", + "missing": ["trajectory"] +} +``` -Keep `evidence` to one short sentence citing a concrete action, tool call, or absence in the trajectory. +Notes: + +- `entity_id` is the path relative to `.evolve/entities/` without the `.md` + suffix, e.g. `feedback/foo`, `guideline/bar`, or + `subscribed/alice/guideline/baz`. +- Pairs that already have an `influence` row are skipped for you — the builder + reuses the same dedup rule used when influence rows are written. You will + never be handed a duplicate. +- The trajectory locator checks `.evolve/trajectories/` first, then falls back + to the native Claude transcript at + `~/.claude/projects//.jsonl`. This means provenance works + even when no `.evolve/trajectories/` file was written. +- If an entity file or trajectory cannot be found, the candidate is still + emitted with a `missing: [...]` field so the gap is visible. When the + trajectory is missing you usually cannot judge the pair — skip it (do not + guess), unless the entity content alone makes `not_applicable` certain. + +### Step 2: Judge each candidate + +For each candidate, read `entity_excerpt` (and open `trajectory_path` for the +full transcript if the excerpt is not enough). Compare the recalled guideline +against the agent's actual actions in the trajectory and pick exactly one +verdict: + +- `followed` — the agent's actual actions are consistent with the guideline. +- `contradicted` — the guideline applied, but the agent did the opposite or + repeated the avoidable dead end. +- `not_applicable` — the guideline was recalled but did not apply to this + session. + +Keep `evidence` to one short sentence citing a concrete action, tool call, or +absence in the trajectory. This judgment is yours — there is no heuristic +fallback. + +### Step 3: Record verdicts + +Persist each verdict. Either pipe one verdict per call to `provenance.py +record`: -### Step 5: Write Influence Events +```bash +echo '{ + "session_id": "", + "entity": "/", + "verdict": "followed", + "evidence": "Agent used the saved parser before trying shell fallbacks." +}' | python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/provenance/scripts/provenance.py record +``` -Pipe one JSON payload per assessed session to the helper: +…or, to batch many assessments for one session in a single call, pipe to the +underlying writer directly: ```bash echo '{ "session_id": "", "assessments": [ - {"entity": "guideline/", "verdict": "followed", "evidence": "Agent used the saved parser before trying shell fallbacks."} + {"entity": "feedback/foo", "verdict": "followed", "evidence": "Agent followed it."}, + {"entity": "guideline/bar", "verdict": "not_applicable", "evidence": "Did not apply."} ] }' | python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/provenance/scripts/log_influence.py ``` -The `entity` value must match exactly what appeared in the recall event, including any `subscribed//` prefix. +Both paths write the identical `influence` audit row and skip duplicates. The +`entity` value must match the candidate's `entity_id` exactly, including any +`subscribed//` prefix. -It is valid to emit an empty `assessments` list when recall events exist but no recalled guideline can be assessed. +It is valid to record nothing when recall events exist but no recalled guideline +can be assessed (e.g. every candidate is missing its trajectory). diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py new file mode 100644 index 00000000..21ed024e --- /dev/null +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 +"""Deterministic provenance plumbing for the evolve-lite provenance skill. + +This script does the *mechanical* half of provenance — the part that can be +made deterministic and tested end to end: + + * read ``recall`` rows from ``.evolve/audit.log``, + * skip ``(session_id, entity)`` pairs that already have an ``influence`` row, + * resolve each recalled entity file and the session's trajectory transcript, + * assemble candidate dicts the agent can judge. + +The *semantic* half — deciding whether a recalled guideline was ``followed``, +``contradicted`` or ``not_applicable`` — is an LLM judgment and is NOT done +here. There is deliberately no heuristic verdict: this module never invents a +verdict. The agent reads each candidate, judges it, and pipes the verdict back +through ``record`` (which delegates to ``log_influence.py``'s writer so the +audit-log format is identical). + +Two modes: + + candidates (default) — emit one JSONL candidate per unresolved + (session_id, entity) recall pair to stdout. + record — read a verdict JSON from stdin and append an + ``influence`` row via log_influence.py's writer. +""" + +import json +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) + +# Provenance reuses log_influence.py's writer + dedup so the audit-log format and +# the duplicate-suppression rule live in exactly one place. log_influence.py sits +# next to this file in the same skill scripts/ directory. +sys.path.insert(0, str(_script.parent)) + +from entity_io import get_evolve_dir, log as _log # noqa: E402 +import log_influence # noqa: E402 + +_ALLOWED_VERDICTS = log_influence._ALLOWED_VERDICTS + +# How many characters of the entity file / trajectory to surface in a candidate. +_ENTITY_EXCERPT_CHARS = 4000 +_TRAJECTORY_EXCERPT_CHARS = 4000 + + +def log(message): + _log("provenance", message) + + +# --------------------------------------------------------------------------- +# Trajectory locator (Task B) +# --------------------------------------------------------------------------- + + +def _claude_transcript_slug(root): + """Claude derives a project's transcript dir name by replacing every + non-alphanumeric character in the absolute project path with ``-``. + + e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen + + This mirrors ``_transcript_slug`` in the doctor skill + (skills/evolve-lite/doctor/scripts/doctor.py). The two are kept in sync by + hand because doctor and provenance ship as independent scripts that do not + import one another in the rendered tree; if you change one, change both. + """ + import re + + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def locate_trajectory(session_id, evolve_dir, *, project_root=None, home=None): + """Locate the saved trajectory transcript for ``session_id``. + + Resolution order (best-effort, returns the first hit or ``None``): + + 1. Legacy ``.evolve/trajectories/`` files: + * ``claude-transcript_.jsonl`` — stop-hook transcript dump. + * ``trajectory__.json`` — save-trajectory skill output; the sid + is the filename slice after the timestamp. + * ``trajectory_.json`` — open and match the inner ``session_id``. + 2. NEW native Claude transcript: ``~/.claude/projects//.jsonl`` + where ```` is the project root path slugified the way Claude does + (every non-alphanumeric char -> ``-``; see ``_claude_transcript_slug``). + + Native discovery makes provenance work in the hookless world where no + ``.evolve/trajectories/`` file is ever written. It is platform-neutral: + Bob/Codex keep their transcripts elsewhere, so the native step simply falls + through to ``None`` for them rather than misfiring. + """ + evolve_dir = Path(evolve_dir) + + # --- 1. Legacy .evolve/trajectories/ ------------------------------------ + traj_dir = evolve_dir / "trajectories" + if traj_dir.is_dir(): + direct = traj_dir / f"claude-transcript_{session_id}.jsonl" + if direct.is_file(): + return direct + + # trajectory__.json — match on the filename sid slice. + for path in sorted(traj_dir.glob("trajectory_*_*.json")): + stem = path.stem # trajectory__ + parts = stem.split("_", 2) + if len(parts) == 3 and parts[2] == session_id: + return path + + # trajectory_.json — open and match the inner session_id field. + for path in sorted(traj_dir.glob("trajectory_*.json")): + # Skip the _ shape already handled above. + if path.stem.count("_") >= 2: + continue + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + continue + if isinstance(data, dict) and data.get("session_id") == session_id: + return path + + # --- 2. Native Claude transcript ---------------------------------------- + # The project root is the parent of the .evolve dir; the home dir holds + # ~/.claude/projects//.jsonl. + root = Path(project_root) if project_root is not None else evolve_dir.resolve().parent + base = Path(home) if home is not None else Path.home() + slug = _claude_transcript_slug(root) + native = base / ".claude" / "projects" / slug / f"{session_id}.jsonl" + if native.is_file(): + return native + + return None + + +# --------------------------------------------------------------------------- +# Recall row reading + entity resolution (Task A — candidates) +# --------------------------------------------------------------------------- + + +def read_recall_rows(evolve_dir): + """Yield ``(session_id, [entity_id, ...])`` for every ``recall`` audit row. + + Rows with no ``session_id`` or an empty ``entities`` list are skipped. + """ + audit_log = Path(evolve_dir) / "audit.log" + if not audit_log.is_file(): + return [] + + rows = [] + for line in audit_log.read_text(encoding="utf-8").splitlines(): + if not line.strip(): + continue + try: + event = json.loads(line) + except json.JSONDecodeError: + continue + if not isinstance(event, dict) or event.get("event") != "recall": + continue + session_id = event.get("session_id") + entities = event.get("entities") + if not isinstance(session_id, str) or not session_id: + continue + if not isinstance(entities, list) or not entities: + continue + clean = [e for e in entities if isinstance(e, str) and e] + if clean: + rows.append((session_id, clean)) + return rows + + +def _read_entity(evolve_dir, entity_id): + """Return ``(path, excerpt)`` for an entity file, or ``(path, None)`` if + the file is missing. ``entity_id`` is a ``/`` id relative to + ``entities/`` (without ``.md``). + """ + entity_path = Path(evolve_dir) / "entities" / f"{entity_id}.md" + if not entity_path.is_file(): + return entity_path, None + try: + text = entity_path.read_text(encoding="utf-8") + except OSError: + return entity_path, None + return entity_path, text[:_ENTITY_EXCERPT_CHARS] + + +def _read_trajectory_excerpt(trajectory_path): + """Return a bounded text excerpt of the trajectory file, or ``None``.""" + if trajectory_path is None: + return None + try: + text = Path(trajectory_path).read_text(encoding="utf-8") + except OSError: + return None + return text[:_TRAJECTORY_EXCERPT_CHARS] + + +def build_candidates(evolve_dir, *, project_root=None, home=None): + """Assemble candidate dicts for every unresolved recall (session, entity). + + Returns a list of dicts shaped:: + + { + "session_id": ..., + "entity_id": "/", + "entity_excerpt": , + "trajectory_path": , + "trajectory_excerpt": , + "missing": ["entity"|"trajectory", ...], # only when non-empty + } + + ``(session_id, entity)`` pairs that already have an ``influence`` row are + skipped via ``log_influence.existing_influence_keys`` — the same dedup rule + used when influence rows are written. Candidates whose entity file or + trajectory cannot be found are still emitted with a ``missing`` list so the + gap is visible rather than silently dropped. + """ + evolve_dir = Path(evolve_dir) + existing = log_influence.existing_influence_keys(evolve_dir) + + candidates = [] + for session_id, entities in read_recall_rows(evolve_dir): + trajectory_path = locate_trajectory(session_id, evolve_dir, project_root=project_root, home=home) + for entity_id in entities: + if (session_id, entity_id) in existing: + continue + entity_path, entity_excerpt = _read_entity(evolve_dir, entity_id) + trajectory_excerpt = _read_trajectory_excerpt(trajectory_path) + + missing = [] + if entity_excerpt is None: + missing.append("entity") + if trajectory_path is None: + missing.append("trajectory") + + candidate = { + "session_id": session_id, + "entity_id": entity_id, + "entity_excerpt": entity_excerpt, + "trajectory_path": str(trajectory_path) if trajectory_path else None, + "trajectory_excerpt": trajectory_excerpt, + } + if missing: + candidate["missing"] = missing + candidates.append(candidate) + return candidates + + +# --------------------------------------------------------------------------- +# record (Task A — record) +# --------------------------------------------------------------------------- + + +def record_verdict(payload, evolve_dir=None): + """Append a single ``influence`` row from an agent verdict. + + ``payload`` is ``{session_id, entity, verdict, evidence}``. The verdict must + be one of ``followed|contradicted|not_applicable`` (the *semantic* judgment + stays agent-driven — this only persists what the agent decided). Writing is + delegated to ``log_influence.py`` so the audit-log row format and the + duplicate-suppression rule are not duplicated here. + + Returns the number of rows written (0 or 1). Raises ``ValueError`` on an + invalid payload / verdict. + """ + if not isinstance(payload, dict): + raise ValueError("verdict payload must be a JSON object") + + session_id = payload.get("session_id") + entity = payload.get("entity") + verdict = payload.get("verdict") + evidence = payload.get("evidence", "") + + if not isinstance(session_id, str) or not session_id: + raise ValueError("verdict payload must include a non-empty string session_id") + if not isinstance(entity, str) or not entity: + raise ValueError("verdict payload must include a non-empty string entity") + if verdict not in _ALLOWED_VERDICTS: + raise ValueError(f"verdict must be one of {sorted(_ALLOWED_VERDICTS)}, got {verdict!r}") + + if evolve_dir is None: + evolve_dir = get_evolve_dir().resolve() + evolve_dir = Path(evolve_dir) + + existing = log_influence.existing_influence_keys(evolve_dir) + if (session_id, entity) in existing: + log(f"Skipping duplicate influence verdict: session_id={session_id} entity={entity}") + return 0 + + if not isinstance(evidence, str): + evidence = str(evidence) + + log_influence.audit.append( + evolve_dir=str(evolve_dir), + event="influence", + session_id=session_id, + entity=entity, + verdict=verdict, + evidence=evidence, + ) + return 1 + + +# --------------------------------------------------------------------------- +# __main__ +# --------------------------------------------------------------------------- + + +def _run_candidates(): + evolve_dir = get_evolve_dir().resolve() + candidates = build_candidates(evolve_dir) + for candidate in candidates: + print(json.dumps(candidate)) + log(f"Emitted {len(candidates)} candidate(s) from {evolve_dir}") + + +def _run_record(): + try: + payload = json.load(sys.stdin) + except json.JSONDecodeError as exc: + log(f"Invalid JSON input: {exc}") + print(f"Error: invalid JSON input - {exc}", file=sys.stderr) + sys.exit(1) + + try: + written = record_verdict(payload) + except ValueError as exc: + log(f"Rejected verdict: {exc}") + print(f"Error: {exc}", file=sys.stderr) + sys.exit(1) + + log(f"Recorded {written} influence verdict(s).") + print(f"Recorded {written} influence verdict(s).") + + +def main(argv=None): + argv = list(sys.argv[1:] if argv is None else argv) + mode = argv[0] if argv else "candidates" + if mode == "candidates": + _run_candidates() + elif mode == "record": + _run_record() + else: + print(f"Error: unknown mode {mode!r}; expected 'candidates' or 'record'.", file=sys.stderr) + sys.exit(2) + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md index de5023bb..14f152e0 100644 --- a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md @@ -7,58 +7,107 @@ description: Analyze saved trajectories and recall audit events offline to recor ## Overview -This skill runs after one or more sessions have completed. It reads saved trajectories from `.evolve/trajectories/`, matches them to `recall` events in `.evolve/audit.log`, and records post-hoc `influence` events for recalled guidelines. +This skill runs after one or more sessions have completed. It reads `recall` +events from `.evolve/audit.log`, locates each session's trajectory, and records +post-hoc `influence` events for the recalled guidelines. -Use this skill when you want to compute usage provenance without coupling the work to the live learn step. +The mechanical work — reading recall rows, skipping already-assessed pairs, +resolving entity files, and locating trajectories — is done deterministically by +`provenance.py candidates`. Your job is the judgment: read each candidate and +decide whether the recalled guideline was `followed`, `contradicted`, or +`not_applicable`, then persist that verdict. -## Workflow - -### Step 1: Load Recall Events - -Read `.evolve/audit.log` as JSONL. Find entries where `event == "recall"` and `entities` is a non-empty list. - -Skip any recall event that already has `influence` entries for the same `session_id` and entity ids. Do not write duplicate influence records. - -### Step 2: Locate Saved Trajectories +Use this skill when you want to compute usage provenance without coupling the +work to the live learn step. -List `.evolve/trajectories/` and match each recall event to a trajectory by `session_id`. - -Matching strategy (in order): -1. `claude-transcript_.jsonl` - the stop-hook transcript dump; the session id is in the filename. -2. `trajectory__.json` - written by the /evolve-lite:save-trajectory skill when a session id is available. Match on the `` slice of the filename. -3. `trajectory_.json` - open the file and match its top-level `session_id` field against the recall event. Only fall back to this step when the filename alone does not identify the session. - -If none of the above yields a confident match for a recall event, skip it. Do not guess. - -### Step 3: Read Recalled Entities +## Workflow -For each recalled entity id, open `.evolve/entities/.md`. The id is a path relative to `.evolve/entities/` without the `.md` suffix, such as `guideline/foo` or `subscribed/alice/guideline/foo`. +### Step 1: Get candidates -Read the entity content and trigger. Skip ids whose files are missing. +Run the candidate builder. It emits one JSON object per line (JSONL), one per +unresolved `(session_id, entity)` recall pair: -### Step 4: Assess Influence +```bash +sh -lc 'real_home="$(python3 -c "import os,pwd; print(pwd.getpwuid(os.getuid()).pw_dir)")"; config_home="${CLAW_CONFIG_HOME:-$real_home/.claw}"; script=".claw/skills/evolve-lite:provenance/scripts/provenance.py"; [ -f "$script" ] || script="$config_home/skills/evolve-lite:provenance/scripts/provenance.py"; python3 "$script" candidates' +``` -Compare each recalled entity with the matched trajectory. Pick exactly one verdict: +Each candidate looks like: -- `followed` - the agent's actual actions are consistent with the guideline. -- `contradicted` - the guideline applied, but the agent did the opposite or repeated the avoidable dead end. -- `not_applicable` - the guideline was recalled but did not apply to this session. +```json +{ + "session_id": "", + "entity_id": "/", + "entity_excerpt": "", + "trajectory_path": "/path/to/transcript.jsonl", + "trajectory_excerpt": "", + "missing": ["trajectory"] +} +``` -Keep `evidence` to one short sentence citing a concrete action, tool call, or absence in the trajectory. +Notes: + +- `entity_id` is the path relative to `.evolve/entities/` without the `.md` + suffix, e.g. `feedback/foo`, `guideline/bar`, or + `subscribed/alice/guideline/baz`. +- Pairs that already have an `influence` row are skipped for you — the builder + reuses the same dedup rule used when influence rows are written. You will + never be handed a duplicate. +- The trajectory locator checks `.evolve/trajectories/` first, then falls back + to the native Claude transcript at + `~/.claude/projects//.jsonl`. This means provenance works + even when no `.evolve/trajectories/` file was written. +- If an entity file or trajectory cannot be found, the candidate is still + emitted with a `missing: [...]` field so the gap is visible. When the + trajectory is missing you usually cannot judge the pair — skip it (do not + guess), unless the entity content alone makes `not_applicable` certain. + +### Step 2: Judge each candidate + +For each candidate, read `entity_excerpt` (and open `trajectory_path` for the +full transcript if the excerpt is not enough). Compare the recalled guideline +against the agent's actual actions in the trajectory and pick exactly one +verdict: + +- `followed` — the agent's actual actions are consistent with the guideline. +- `contradicted` — the guideline applied, but the agent did the opposite or + repeated the avoidable dead end. +- `not_applicable` — the guideline was recalled but did not apply to this + session. + +Keep `evidence` to one short sentence citing a concrete action, tool call, or +absence in the trajectory. This judgment is yours — there is no heuristic +fallback. + +### Step 3: Record verdicts + +Persist each verdict. Either pipe one verdict per call to `provenance.py +record`: -### Step 5: Write Influence Events +```bash +echo '{ + "session_id": "", + "entity": "/", + "verdict": "followed", + "evidence": "Agent used the saved parser before trying shell fallbacks." +}' | sh -lc 'real_home="$(python3 -c "import os,pwd; print(pwd.getpwuid(os.getuid()).pw_dir)")"; config_home="${CLAW_CONFIG_HOME:-$real_home/.claw}"; script=".claw/skills/evolve-lite:provenance/scripts/provenance.py"; [ -f "$script" ] || script="$config_home/skills/evolve-lite:provenance/scripts/provenance.py"; python3 "$script" record' +``` -Pipe one JSON payload per assessed session to the helper: +…or, to batch many assessments for one session in a single call, pipe to the +underlying writer directly: ```bash echo '{ "session_id": "", "assessments": [ - {"entity": "guideline/", "verdict": "followed", "evidence": "Agent used the saved parser before trying shell fallbacks."} + {"entity": "feedback/foo", "verdict": "followed", "evidence": "Agent followed it."}, + {"entity": "guideline/bar", "verdict": "not_applicable", "evidence": "Did not apply."} ] }' | sh -lc 'real_home="$(python3 -c "import os,pwd; print(pwd.getpwuid(os.getuid()).pw_dir)")"; config_home="${CLAW_CONFIG_HOME:-$real_home/.claw}"; script=".claw/skills/evolve-lite:provenance/scripts/log_influence.py"; [ -f "$script" ] || script="$config_home/skills/evolve-lite:provenance/scripts/log_influence.py"; python3 "$script"' ``` -The `entity` value must match exactly what appeared in the recall event, including any `subscribed//` prefix. +Both paths write the identical `influence` audit row and skip duplicates. The +`entity` value must match the candidate's `entity_id` exactly, including any +`subscribed//` prefix. -It is valid to emit an empty `assessments` list when recall events exist but no recalled guideline can be assessed. +It is valid to record nothing when recall events exist but no recalled guideline +can be assessed (e.g. every candidate is missing its trajectory). diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py new file mode 100644 index 00000000..21ed024e --- /dev/null +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 +"""Deterministic provenance plumbing for the evolve-lite provenance skill. + +This script does the *mechanical* half of provenance — the part that can be +made deterministic and tested end to end: + + * read ``recall`` rows from ``.evolve/audit.log``, + * skip ``(session_id, entity)`` pairs that already have an ``influence`` row, + * resolve each recalled entity file and the session's trajectory transcript, + * assemble candidate dicts the agent can judge. + +The *semantic* half — deciding whether a recalled guideline was ``followed``, +``contradicted`` or ``not_applicable`` — is an LLM judgment and is NOT done +here. There is deliberately no heuristic verdict: this module never invents a +verdict. The agent reads each candidate, judges it, and pipes the verdict back +through ``record`` (which delegates to ``log_influence.py``'s writer so the +audit-log format is identical). + +Two modes: + + candidates (default) — emit one JSONL candidate per unresolved + (session_id, entity) recall pair to stdout. + record — read a verdict JSON from stdin and append an + ``influence`` row via log_influence.py's writer. +""" + +import json +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) + +# Provenance reuses log_influence.py's writer + dedup so the audit-log format and +# the duplicate-suppression rule live in exactly one place. log_influence.py sits +# next to this file in the same skill scripts/ directory. +sys.path.insert(0, str(_script.parent)) + +from entity_io import get_evolve_dir, log as _log # noqa: E402 +import log_influence # noqa: E402 + +_ALLOWED_VERDICTS = log_influence._ALLOWED_VERDICTS + +# How many characters of the entity file / trajectory to surface in a candidate. +_ENTITY_EXCERPT_CHARS = 4000 +_TRAJECTORY_EXCERPT_CHARS = 4000 + + +def log(message): + _log("provenance", message) + + +# --------------------------------------------------------------------------- +# Trajectory locator (Task B) +# --------------------------------------------------------------------------- + + +def _claude_transcript_slug(root): + """Claude derives a project's transcript dir name by replacing every + non-alphanumeric character in the absolute project path with ``-``. + + e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen + + This mirrors ``_transcript_slug`` in the doctor skill + (skills/evolve-lite/doctor/scripts/doctor.py). The two are kept in sync by + hand because doctor and provenance ship as independent scripts that do not + import one another in the rendered tree; if you change one, change both. + """ + import re + + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def locate_trajectory(session_id, evolve_dir, *, project_root=None, home=None): + """Locate the saved trajectory transcript for ``session_id``. + + Resolution order (best-effort, returns the first hit or ``None``): + + 1. Legacy ``.evolve/trajectories/`` files: + * ``claude-transcript_.jsonl`` — stop-hook transcript dump. + * ``trajectory__.json`` — save-trajectory skill output; the sid + is the filename slice after the timestamp. + * ``trajectory_.json`` — open and match the inner ``session_id``. + 2. NEW native Claude transcript: ``~/.claude/projects//.jsonl`` + where ```` is the project root path slugified the way Claude does + (every non-alphanumeric char -> ``-``; see ``_claude_transcript_slug``). + + Native discovery makes provenance work in the hookless world where no + ``.evolve/trajectories/`` file is ever written. It is platform-neutral: + Bob/Codex keep their transcripts elsewhere, so the native step simply falls + through to ``None`` for them rather than misfiring. + """ + evolve_dir = Path(evolve_dir) + + # --- 1. Legacy .evolve/trajectories/ ------------------------------------ + traj_dir = evolve_dir / "trajectories" + if traj_dir.is_dir(): + direct = traj_dir / f"claude-transcript_{session_id}.jsonl" + if direct.is_file(): + return direct + + # trajectory__.json — match on the filename sid slice. + for path in sorted(traj_dir.glob("trajectory_*_*.json")): + stem = path.stem # trajectory__ + parts = stem.split("_", 2) + if len(parts) == 3 and parts[2] == session_id: + return path + + # trajectory_.json — open and match the inner session_id field. + for path in sorted(traj_dir.glob("trajectory_*.json")): + # Skip the _ shape already handled above. + if path.stem.count("_") >= 2: + continue + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + continue + if isinstance(data, dict) and data.get("session_id") == session_id: + return path + + # --- 2. Native Claude transcript ---------------------------------------- + # The project root is the parent of the .evolve dir; the home dir holds + # ~/.claude/projects//.jsonl. + root = Path(project_root) if project_root is not None else evolve_dir.resolve().parent + base = Path(home) if home is not None else Path.home() + slug = _claude_transcript_slug(root) + native = base / ".claude" / "projects" / slug / f"{session_id}.jsonl" + if native.is_file(): + return native + + return None + + +# --------------------------------------------------------------------------- +# Recall row reading + entity resolution (Task A — candidates) +# --------------------------------------------------------------------------- + + +def read_recall_rows(evolve_dir): + """Yield ``(session_id, [entity_id, ...])`` for every ``recall`` audit row. + + Rows with no ``session_id`` or an empty ``entities`` list are skipped. + """ + audit_log = Path(evolve_dir) / "audit.log" + if not audit_log.is_file(): + return [] + + rows = [] + for line in audit_log.read_text(encoding="utf-8").splitlines(): + if not line.strip(): + continue + try: + event = json.loads(line) + except json.JSONDecodeError: + continue + if not isinstance(event, dict) or event.get("event") != "recall": + continue + session_id = event.get("session_id") + entities = event.get("entities") + if not isinstance(session_id, str) or not session_id: + continue + if not isinstance(entities, list) or not entities: + continue + clean = [e for e in entities if isinstance(e, str) and e] + if clean: + rows.append((session_id, clean)) + return rows + + +def _read_entity(evolve_dir, entity_id): + """Return ``(path, excerpt)`` for an entity file, or ``(path, None)`` if + the file is missing. ``entity_id`` is a ``/`` id relative to + ``entities/`` (without ``.md``). + """ + entity_path = Path(evolve_dir) / "entities" / f"{entity_id}.md" + if not entity_path.is_file(): + return entity_path, None + try: + text = entity_path.read_text(encoding="utf-8") + except OSError: + return entity_path, None + return entity_path, text[:_ENTITY_EXCERPT_CHARS] + + +def _read_trajectory_excerpt(trajectory_path): + """Return a bounded text excerpt of the trajectory file, or ``None``.""" + if trajectory_path is None: + return None + try: + text = Path(trajectory_path).read_text(encoding="utf-8") + except OSError: + return None + return text[:_TRAJECTORY_EXCERPT_CHARS] + + +def build_candidates(evolve_dir, *, project_root=None, home=None): + """Assemble candidate dicts for every unresolved recall (session, entity). + + Returns a list of dicts shaped:: + + { + "session_id": ..., + "entity_id": "/", + "entity_excerpt": , + "trajectory_path": , + "trajectory_excerpt": , + "missing": ["entity"|"trajectory", ...], # only when non-empty + } + + ``(session_id, entity)`` pairs that already have an ``influence`` row are + skipped via ``log_influence.existing_influence_keys`` — the same dedup rule + used when influence rows are written. Candidates whose entity file or + trajectory cannot be found are still emitted with a ``missing`` list so the + gap is visible rather than silently dropped. + """ + evolve_dir = Path(evolve_dir) + existing = log_influence.existing_influence_keys(evolve_dir) + + candidates = [] + for session_id, entities in read_recall_rows(evolve_dir): + trajectory_path = locate_trajectory(session_id, evolve_dir, project_root=project_root, home=home) + for entity_id in entities: + if (session_id, entity_id) in existing: + continue + entity_path, entity_excerpt = _read_entity(evolve_dir, entity_id) + trajectory_excerpt = _read_trajectory_excerpt(trajectory_path) + + missing = [] + if entity_excerpt is None: + missing.append("entity") + if trajectory_path is None: + missing.append("trajectory") + + candidate = { + "session_id": session_id, + "entity_id": entity_id, + "entity_excerpt": entity_excerpt, + "trajectory_path": str(trajectory_path) if trajectory_path else None, + "trajectory_excerpt": trajectory_excerpt, + } + if missing: + candidate["missing"] = missing + candidates.append(candidate) + return candidates + + +# --------------------------------------------------------------------------- +# record (Task A — record) +# --------------------------------------------------------------------------- + + +def record_verdict(payload, evolve_dir=None): + """Append a single ``influence`` row from an agent verdict. + + ``payload`` is ``{session_id, entity, verdict, evidence}``. The verdict must + be one of ``followed|contradicted|not_applicable`` (the *semantic* judgment + stays agent-driven — this only persists what the agent decided). Writing is + delegated to ``log_influence.py`` so the audit-log row format and the + duplicate-suppression rule are not duplicated here. + + Returns the number of rows written (0 or 1). Raises ``ValueError`` on an + invalid payload / verdict. + """ + if not isinstance(payload, dict): + raise ValueError("verdict payload must be a JSON object") + + session_id = payload.get("session_id") + entity = payload.get("entity") + verdict = payload.get("verdict") + evidence = payload.get("evidence", "") + + if not isinstance(session_id, str) or not session_id: + raise ValueError("verdict payload must include a non-empty string session_id") + if not isinstance(entity, str) or not entity: + raise ValueError("verdict payload must include a non-empty string entity") + if verdict not in _ALLOWED_VERDICTS: + raise ValueError(f"verdict must be one of {sorted(_ALLOWED_VERDICTS)}, got {verdict!r}") + + if evolve_dir is None: + evolve_dir = get_evolve_dir().resolve() + evolve_dir = Path(evolve_dir) + + existing = log_influence.existing_influence_keys(evolve_dir) + if (session_id, entity) in existing: + log(f"Skipping duplicate influence verdict: session_id={session_id} entity={entity}") + return 0 + + if not isinstance(evidence, str): + evidence = str(evidence) + + log_influence.audit.append( + evolve_dir=str(evolve_dir), + event="influence", + session_id=session_id, + entity=entity, + verdict=verdict, + evidence=evidence, + ) + return 1 + + +# --------------------------------------------------------------------------- +# __main__ +# --------------------------------------------------------------------------- + + +def _run_candidates(): + evolve_dir = get_evolve_dir().resolve() + candidates = build_candidates(evolve_dir) + for candidate in candidates: + print(json.dumps(candidate)) + log(f"Emitted {len(candidates)} candidate(s) from {evolve_dir}") + + +def _run_record(): + try: + payload = json.load(sys.stdin) + except json.JSONDecodeError as exc: + log(f"Invalid JSON input: {exc}") + print(f"Error: invalid JSON input - {exc}", file=sys.stderr) + sys.exit(1) + + try: + written = record_verdict(payload) + except ValueError as exc: + log(f"Rejected verdict: {exc}") + print(f"Error: {exc}", file=sys.stderr) + sys.exit(1) + + log(f"Recorded {written} influence verdict(s).") + print(f"Recorded {written} influence verdict(s).") + + +def main(argv=None): + argv = list(sys.argv[1:] if argv is None else argv) + mode = argv[0] if argv else "candidates" + if mode == "candidates": + _run_candidates() + elif mode == "record": + _run_record() + else: + print(f"Error: unknown mode {mode!r}; expected 'candidates' or 'record'.", file=sys.stderr) + sys.exit(2) + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md index 349ac090..d919b538 100644 --- a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md +++ b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md @@ -7,58 +7,107 @@ description: Analyze saved trajectories and recall audit events offline to recor ## Overview -This skill runs after one or more sessions have completed. It reads saved trajectories from `.evolve/trajectories/`, matches them to `recall` events in `.evolve/audit.log`, and records post-hoc `influence` events for recalled guidelines. +This skill runs after one or more sessions have completed. It reads `recall` +events from `.evolve/audit.log`, locates each session's trajectory, and records +post-hoc `influence` events for the recalled guidelines. -Use this skill when you want to compute usage provenance without coupling the work to the live learn step. +The mechanical work — reading recall rows, skipping already-assessed pairs, +resolving entity files, and locating trajectories — is done deterministically by +`provenance.py candidates`. Your job is the judgment: read each candidate and +decide whether the recalled guideline was `followed`, `contradicted`, or +`not_applicable`, then persist that verdict. -## Workflow - -### Step 1: Load Recall Events - -Read `.evolve/audit.log` as JSONL. Find entries where `event == "recall"` and `entities` is a non-empty list. - -Skip any recall event that already has `influence` entries for the same `session_id` and entity ids. Do not write duplicate influence records. - -### Step 2: Locate Saved Trajectories +Use this skill when you want to compute usage provenance without coupling the +work to the live learn step. -List `.evolve/trajectories/` and match each recall event to a trajectory by `session_id`. - -Matching strategy (in order): -1. `claude-transcript_.jsonl` - the stop-hook transcript dump; the session id is in the filename. -2. `trajectory__.json` - written by the evolve-lite:save-trajectory skill when a session id is available. Match on the `` slice of the filename. -3. `trajectory_.json` - open the file and match its top-level `session_id` field against the recall event. Only fall back to this step when the filename alone does not identify the session. - -If none of the above yields a confident match for a recall event, skip it. Do not guess. - -### Step 3: Read Recalled Entities +## Workflow -For each recalled entity id, open `.evolve/entities/.md`. The id is a path relative to `.evolve/entities/` without the `.md` suffix, such as `guideline/foo` or `subscribed/alice/guideline/foo`. +### Step 1: Get candidates -Read the entity content and trigger. Skip ids whose files are missing. +Run the candidate builder. It emits one JSON object per line (JSONL), one per +unresolved `(session_id, entity)` recall pair: -### Step 4: Assess Influence +```bash +python3 "$(git rev-parse --show-toplevel 2>/dev/null || pwd)/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py" candidates +``` -Compare each recalled entity with the matched trajectory. Pick exactly one verdict: +Each candidate looks like: -- `followed` - the agent's actual actions are consistent with the guideline. -- `contradicted` - the guideline applied, but the agent did the opposite or repeated the avoidable dead end. -- `not_applicable` - the guideline was recalled but did not apply to this session. +```json +{ + "session_id": "", + "entity_id": "/", + "entity_excerpt": "", + "trajectory_path": "/path/to/transcript.jsonl", + "trajectory_excerpt": "", + "missing": ["trajectory"] +} +``` -Keep `evidence` to one short sentence citing a concrete action, tool call, or absence in the trajectory. +Notes: + +- `entity_id` is the path relative to `.evolve/entities/` without the `.md` + suffix, e.g. `feedback/foo`, `guideline/bar`, or + `subscribed/alice/guideline/baz`. +- Pairs that already have an `influence` row are skipped for you — the builder + reuses the same dedup rule used when influence rows are written. You will + never be handed a duplicate. +- The trajectory locator checks `.evolve/trajectories/` first, then falls back + to the native Claude transcript at + `~/.claude/projects//.jsonl`. This means provenance works + even when no `.evolve/trajectories/` file was written. +- If an entity file or trajectory cannot be found, the candidate is still + emitted with a `missing: [...]` field so the gap is visible. When the + trajectory is missing you usually cannot judge the pair — skip it (do not + guess), unless the entity content alone makes `not_applicable` certain. + +### Step 2: Judge each candidate + +For each candidate, read `entity_excerpt` (and open `trajectory_path` for the +full transcript if the excerpt is not enough). Compare the recalled guideline +against the agent's actual actions in the trajectory and pick exactly one +verdict: + +- `followed` — the agent's actual actions are consistent with the guideline. +- `contradicted` — the guideline applied, but the agent did the opposite or + repeated the avoidable dead end. +- `not_applicable` — the guideline was recalled but did not apply to this + session. + +Keep `evidence` to one short sentence citing a concrete action, tool call, or +absence in the trajectory. This judgment is yours — there is no heuristic +fallback. + +### Step 3: Record verdicts + +Persist each verdict. Either pipe one verdict per call to `provenance.py +record`: -### Step 5: Write Influence Events +```bash +echo '{ + "session_id": "", + "entity": "/", + "verdict": "followed", + "evidence": "Agent used the saved parser before trying shell fallbacks." +}' | python3 "$(git rev-parse --show-toplevel 2>/dev/null || pwd)/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py" record +``` -Pipe one JSON payload per assessed session to the helper: +…or, to batch many assessments for one session in a single call, pipe to the +underlying writer directly: ```bash echo '{ "session_id": "", "assessments": [ - {"entity": "guideline/", "verdict": "followed", "evidence": "Agent used the saved parser before trying shell fallbacks."} + {"entity": "feedback/foo", "verdict": "followed", "evidence": "Agent followed it."}, + {"entity": "guideline/bar", "verdict": "not_applicable", "evidence": "Did not apply."} ] }' | python3 "$(git rev-parse --show-toplevel 2>/dev/null || pwd)/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/log_influence.py" ``` -The `entity` value must match exactly what appeared in the recall event, including any `subscribed//` prefix. +Both paths write the identical `influence` audit row and skip duplicates. The +`entity` value must match the candidate's `entity_id` exactly, including any +`subscribed//` prefix. -It is valid to emit an empty `assessments` list when recall events exist but no recalled guideline can be assessed. +It is valid to record nothing when recall events exist but no recalled guideline +can be assessed (e.g. every candidate is missing its trajectory). diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py new file mode 100644 index 00000000..21ed024e --- /dev/null +++ b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 +"""Deterministic provenance plumbing for the evolve-lite provenance skill. + +This script does the *mechanical* half of provenance — the part that can be +made deterministic and tested end to end: + + * read ``recall`` rows from ``.evolve/audit.log``, + * skip ``(session_id, entity)`` pairs that already have an ``influence`` row, + * resolve each recalled entity file and the session's trajectory transcript, + * assemble candidate dicts the agent can judge. + +The *semantic* half — deciding whether a recalled guideline was ``followed``, +``contradicted`` or ``not_applicable`` — is an LLM judgment and is NOT done +here. There is deliberately no heuristic verdict: this module never invents a +verdict. The agent reads each candidate, judges it, and pipes the verdict back +through ``record`` (which delegates to ``log_influence.py``'s writer so the +audit-log format is identical). + +Two modes: + + candidates (default) — emit one JSONL candidate per unresolved + (session_id, entity) recall pair to stdout. + record — read a verdict JSON from stdin and append an + ``influence`` row via log_influence.py's writer. +""" + +import json +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) + +# Provenance reuses log_influence.py's writer + dedup so the audit-log format and +# the duplicate-suppression rule live in exactly one place. log_influence.py sits +# next to this file in the same skill scripts/ directory. +sys.path.insert(0, str(_script.parent)) + +from entity_io import get_evolve_dir, log as _log # noqa: E402 +import log_influence # noqa: E402 + +_ALLOWED_VERDICTS = log_influence._ALLOWED_VERDICTS + +# How many characters of the entity file / trajectory to surface in a candidate. +_ENTITY_EXCERPT_CHARS = 4000 +_TRAJECTORY_EXCERPT_CHARS = 4000 + + +def log(message): + _log("provenance", message) + + +# --------------------------------------------------------------------------- +# Trajectory locator (Task B) +# --------------------------------------------------------------------------- + + +def _claude_transcript_slug(root): + """Claude derives a project's transcript dir name by replacing every + non-alphanumeric character in the absolute project path with ``-``. + + e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen + + This mirrors ``_transcript_slug`` in the doctor skill + (skills/evolve-lite/doctor/scripts/doctor.py). The two are kept in sync by + hand because doctor and provenance ship as independent scripts that do not + import one another in the rendered tree; if you change one, change both. + """ + import re + + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def locate_trajectory(session_id, evolve_dir, *, project_root=None, home=None): + """Locate the saved trajectory transcript for ``session_id``. + + Resolution order (best-effort, returns the first hit or ``None``): + + 1. Legacy ``.evolve/trajectories/`` files: + * ``claude-transcript_.jsonl`` — stop-hook transcript dump. + * ``trajectory__.json`` — save-trajectory skill output; the sid + is the filename slice after the timestamp. + * ``trajectory_.json`` — open and match the inner ``session_id``. + 2. NEW native Claude transcript: ``~/.claude/projects//.jsonl`` + where ```` is the project root path slugified the way Claude does + (every non-alphanumeric char -> ``-``; see ``_claude_transcript_slug``). + + Native discovery makes provenance work in the hookless world where no + ``.evolve/trajectories/`` file is ever written. It is platform-neutral: + Bob/Codex keep their transcripts elsewhere, so the native step simply falls + through to ``None`` for them rather than misfiring. + """ + evolve_dir = Path(evolve_dir) + + # --- 1. Legacy .evolve/trajectories/ ------------------------------------ + traj_dir = evolve_dir / "trajectories" + if traj_dir.is_dir(): + direct = traj_dir / f"claude-transcript_{session_id}.jsonl" + if direct.is_file(): + return direct + + # trajectory__.json — match on the filename sid slice. + for path in sorted(traj_dir.glob("trajectory_*_*.json")): + stem = path.stem # trajectory__ + parts = stem.split("_", 2) + if len(parts) == 3 and parts[2] == session_id: + return path + + # trajectory_.json — open and match the inner session_id field. + for path in sorted(traj_dir.glob("trajectory_*.json")): + # Skip the _ shape already handled above. + if path.stem.count("_") >= 2: + continue + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + continue + if isinstance(data, dict) and data.get("session_id") == session_id: + return path + + # --- 2. Native Claude transcript ---------------------------------------- + # The project root is the parent of the .evolve dir; the home dir holds + # ~/.claude/projects//.jsonl. + root = Path(project_root) if project_root is not None else evolve_dir.resolve().parent + base = Path(home) if home is not None else Path.home() + slug = _claude_transcript_slug(root) + native = base / ".claude" / "projects" / slug / f"{session_id}.jsonl" + if native.is_file(): + return native + + return None + + +# --------------------------------------------------------------------------- +# Recall row reading + entity resolution (Task A — candidates) +# --------------------------------------------------------------------------- + + +def read_recall_rows(evolve_dir): + """Yield ``(session_id, [entity_id, ...])`` for every ``recall`` audit row. + + Rows with no ``session_id`` or an empty ``entities`` list are skipped. + """ + audit_log = Path(evolve_dir) / "audit.log" + if not audit_log.is_file(): + return [] + + rows = [] + for line in audit_log.read_text(encoding="utf-8").splitlines(): + if not line.strip(): + continue + try: + event = json.loads(line) + except json.JSONDecodeError: + continue + if not isinstance(event, dict) or event.get("event") != "recall": + continue + session_id = event.get("session_id") + entities = event.get("entities") + if not isinstance(session_id, str) or not session_id: + continue + if not isinstance(entities, list) or not entities: + continue + clean = [e for e in entities if isinstance(e, str) and e] + if clean: + rows.append((session_id, clean)) + return rows + + +def _read_entity(evolve_dir, entity_id): + """Return ``(path, excerpt)`` for an entity file, or ``(path, None)`` if + the file is missing. ``entity_id`` is a ``/`` id relative to + ``entities/`` (without ``.md``). + """ + entity_path = Path(evolve_dir) / "entities" / f"{entity_id}.md" + if not entity_path.is_file(): + return entity_path, None + try: + text = entity_path.read_text(encoding="utf-8") + except OSError: + return entity_path, None + return entity_path, text[:_ENTITY_EXCERPT_CHARS] + + +def _read_trajectory_excerpt(trajectory_path): + """Return a bounded text excerpt of the trajectory file, or ``None``.""" + if trajectory_path is None: + return None + try: + text = Path(trajectory_path).read_text(encoding="utf-8") + except OSError: + return None + return text[:_TRAJECTORY_EXCERPT_CHARS] + + +def build_candidates(evolve_dir, *, project_root=None, home=None): + """Assemble candidate dicts for every unresolved recall (session, entity). + + Returns a list of dicts shaped:: + + { + "session_id": ..., + "entity_id": "/", + "entity_excerpt": , + "trajectory_path": , + "trajectory_excerpt": , + "missing": ["entity"|"trajectory", ...], # only when non-empty + } + + ``(session_id, entity)`` pairs that already have an ``influence`` row are + skipped via ``log_influence.existing_influence_keys`` — the same dedup rule + used when influence rows are written. Candidates whose entity file or + trajectory cannot be found are still emitted with a ``missing`` list so the + gap is visible rather than silently dropped. + """ + evolve_dir = Path(evolve_dir) + existing = log_influence.existing_influence_keys(evolve_dir) + + candidates = [] + for session_id, entities in read_recall_rows(evolve_dir): + trajectory_path = locate_trajectory(session_id, evolve_dir, project_root=project_root, home=home) + for entity_id in entities: + if (session_id, entity_id) in existing: + continue + entity_path, entity_excerpt = _read_entity(evolve_dir, entity_id) + trajectory_excerpt = _read_trajectory_excerpt(trajectory_path) + + missing = [] + if entity_excerpt is None: + missing.append("entity") + if trajectory_path is None: + missing.append("trajectory") + + candidate = { + "session_id": session_id, + "entity_id": entity_id, + "entity_excerpt": entity_excerpt, + "trajectory_path": str(trajectory_path) if trajectory_path else None, + "trajectory_excerpt": trajectory_excerpt, + } + if missing: + candidate["missing"] = missing + candidates.append(candidate) + return candidates + + +# --------------------------------------------------------------------------- +# record (Task A — record) +# --------------------------------------------------------------------------- + + +def record_verdict(payload, evolve_dir=None): + """Append a single ``influence`` row from an agent verdict. + + ``payload`` is ``{session_id, entity, verdict, evidence}``. The verdict must + be one of ``followed|contradicted|not_applicable`` (the *semantic* judgment + stays agent-driven — this only persists what the agent decided). Writing is + delegated to ``log_influence.py`` so the audit-log row format and the + duplicate-suppression rule are not duplicated here. + + Returns the number of rows written (0 or 1). Raises ``ValueError`` on an + invalid payload / verdict. + """ + if not isinstance(payload, dict): + raise ValueError("verdict payload must be a JSON object") + + session_id = payload.get("session_id") + entity = payload.get("entity") + verdict = payload.get("verdict") + evidence = payload.get("evidence", "") + + if not isinstance(session_id, str) or not session_id: + raise ValueError("verdict payload must include a non-empty string session_id") + if not isinstance(entity, str) or not entity: + raise ValueError("verdict payload must include a non-empty string entity") + if verdict not in _ALLOWED_VERDICTS: + raise ValueError(f"verdict must be one of {sorted(_ALLOWED_VERDICTS)}, got {verdict!r}") + + if evolve_dir is None: + evolve_dir = get_evolve_dir().resolve() + evolve_dir = Path(evolve_dir) + + existing = log_influence.existing_influence_keys(evolve_dir) + if (session_id, entity) in existing: + log(f"Skipping duplicate influence verdict: session_id={session_id} entity={entity}") + return 0 + + if not isinstance(evidence, str): + evidence = str(evidence) + + log_influence.audit.append( + evolve_dir=str(evolve_dir), + event="influence", + session_id=session_id, + entity=entity, + verdict=verdict, + evidence=evidence, + ) + return 1 + + +# --------------------------------------------------------------------------- +# __main__ +# --------------------------------------------------------------------------- + + +def _run_candidates(): + evolve_dir = get_evolve_dir().resolve() + candidates = build_candidates(evolve_dir) + for candidate in candidates: + print(json.dumps(candidate)) + log(f"Emitted {len(candidates)} candidate(s) from {evolve_dir}") + + +def _run_record(): + try: + payload = json.load(sys.stdin) + except json.JSONDecodeError as exc: + log(f"Invalid JSON input: {exc}") + print(f"Error: invalid JSON input - {exc}", file=sys.stderr) + sys.exit(1) + + try: + written = record_verdict(payload) + except ValueError as exc: + log(f"Rejected verdict: {exc}") + print(f"Error: {exc}", file=sys.stderr) + sys.exit(1) + + log(f"Recorded {written} influence verdict(s).") + print(f"Recorded {written} influence verdict(s).") + + +def main(argv=None): + argv = list(sys.argv[1:] if argv is None else argv) + mode = argv[0] if argv else "candidates" + if mode == "candidates": + _run_candidates() + elif mode == "record": + _run_record() + else: + print(f"Error: unknown mode {mode!r}; expected 'candidates' or 'record'.", file=sys.stderr) + sys.exit(2) + + +if __name__ == "__main__": + main() diff --git a/plugin-source/skills/evolve-lite/provenance/SKILL.md.j2 b/plugin-source/skills/evolve-lite/provenance/SKILL.md.j2 index ee704616..9e8aa47e 100644 --- a/plugin-source/skills/evolve-lite/provenance/SKILL.md.j2 +++ b/plugin-source/skills/evolve-lite/provenance/SKILL.md.j2 @@ -8,58 +8,107 @@ description: Analyze saved trajectories and recall audit events offline to recor ## Overview -This skill runs after one or more sessions have completed. It reads saved trajectories from `.evolve/trajectories/`, matches them to `recall` events in `.evolve/audit.log`, and records post-hoc `influence` events for recalled guidelines. +This skill runs after one or more sessions have completed. It reads `recall` +events from `.evolve/audit.log`, locates each session's trajectory, and records +post-hoc `influence` events for the recalled guidelines. -Use this skill when you want to compute usage provenance without coupling the work to the live learn step. +The mechanical work — reading recall rows, skipping already-assessed pairs, +resolving entity files, and locating trajectories — is done deterministically by +`provenance.py candidates`. Your job is the judgment: read each candidate and +decide whether the recalled guideline was `followed`, `contradicted`, or +`not_applicable`, then persist that verdict. -## Workflow - -### Step 1: Load Recall Events - -Read `.evolve/audit.log` as JSONL. Find entries where `event == "recall"` and `entities` is a non-empty list. - -Skip any recall event that already has `influence` entries for the same `session_id` and entity ids. Do not write duplicate influence records. - -### Step 2: Locate Saved Trajectories +Use this skill when you want to compute usage provenance without coupling the +work to the live learn step. -List `.evolve/trajectories/` and match each recall event to a trajectory by `session_id`. - -Matching strategy (in order): -1. `claude-transcript_.jsonl` - the stop-hook transcript dump; the session id is in the filename. -2. `trajectory__.json` - written by the {{ skill_ref("save-trajectory") }} skill when a session id is available. Match on the `` slice of the filename. -3. `trajectory_.json` - open the file and match its top-level `session_id` field against the recall event. Only fall back to this step when the filename alone does not identify the session. - -If none of the above yields a confident match for a recall event, skip it. Do not guess. - -### Step 3: Read Recalled Entities +## Workflow -For each recalled entity id, open `.evolve/entities/.md`. The id is a path relative to `.evolve/entities/` without the `.md` suffix, such as `guideline/foo` or `subscribed/alice/guideline/foo`. +### Step 1: Get candidates -Read the entity content and trigger. Skip ids whose files are missing. +Run the candidate builder. It emits one JSON object per line (JSONL), one per +unresolved `(session_id, entity)` recall pair: -### Step 4: Assess Influence +```bash +{{ invoke("provenance", "provenance.py", "candidates") }} +``` -Compare each recalled entity with the matched trajectory. Pick exactly one verdict: +Each candidate looks like: -- `followed` - the agent's actual actions are consistent with the guideline. -- `contradicted` - the guideline applied, but the agent did the opposite or repeated the avoidable dead end. -- `not_applicable` - the guideline was recalled but did not apply to this session. +```json +{ + "session_id": "", + "entity_id": "/", + "entity_excerpt": "", + "trajectory_path": "/path/to/transcript.jsonl", + "trajectory_excerpt": "", + "missing": ["trajectory"] +} +``` -Keep `evidence` to one short sentence citing a concrete action, tool call, or absence in the trajectory. +Notes: + +- `entity_id` is the path relative to `.evolve/entities/` without the `.md` + suffix, e.g. `feedback/foo`, `guideline/bar`, or + `subscribed/alice/guideline/baz`. +- Pairs that already have an `influence` row are skipped for you — the builder + reuses the same dedup rule used when influence rows are written. You will + never be handed a duplicate. +- The trajectory locator checks `.evolve/trajectories/` first, then falls back + to the native Claude transcript at + `~/.claude/projects//.jsonl`. This means provenance works + even when no `.evolve/trajectories/` file was written. +- If an entity file or trajectory cannot be found, the candidate is still + emitted with a `missing: [...]` field so the gap is visible. When the + trajectory is missing you usually cannot judge the pair — skip it (do not + guess), unless the entity content alone makes `not_applicable` certain. + +### Step 2: Judge each candidate + +For each candidate, read `entity_excerpt` (and open `trajectory_path` for the +full transcript if the excerpt is not enough). Compare the recalled guideline +against the agent's actual actions in the trajectory and pick exactly one +verdict: + +- `followed` — the agent's actual actions are consistent with the guideline. +- `contradicted` — the guideline applied, but the agent did the opposite or + repeated the avoidable dead end. +- `not_applicable` — the guideline was recalled but did not apply to this + session. + +Keep `evidence` to one short sentence citing a concrete action, tool call, or +absence in the trajectory. This judgment is yours — there is no heuristic +fallback. + +### Step 3: Record verdicts + +Persist each verdict. Either pipe one verdict per call to `provenance.py +record`: -### Step 5: Write Influence Events +```bash +echo '{ + "session_id": "", + "entity": "/", + "verdict": "followed", + "evidence": "Agent used the saved parser before trying shell fallbacks." +}' | {{ invoke("provenance", "provenance.py", "record") }} +``` -Pipe one JSON payload per assessed session to the helper: +…or, to batch many assessments for one session in a single call, pipe to the +underlying writer directly: ```bash echo '{ "session_id": "", "assessments": [ - {"entity": "guideline/", "verdict": "followed", "evidence": "Agent used the saved parser before trying shell fallbacks."} + {"entity": "feedback/foo", "verdict": "followed", "evidence": "Agent followed it."}, + {"entity": "guideline/bar", "verdict": "not_applicable", "evidence": "Did not apply."} ] }' | {{ invoke("provenance", "log_influence.py") }} ``` -The `entity` value must match exactly what appeared in the recall event, including any `subscribed//` prefix. +Both paths write the identical `influence` audit row and skip duplicates. The +`entity` value must match the candidate's `entity_id` exactly, including any +`subscribed//` prefix. -It is valid to emit an empty `assessments` list when recall events exist but no recalled guideline can be assessed. +It is valid to record nothing when recall events exist but no recalled guideline +can be assessed (e.g. every candidate is missing its trajectory). diff --git a/plugin-source/skills/evolve-lite/provenance/scripts/provenance.py b/plugin-source/skills/evolve-lite/provenance/scripts/provenance.py new file mode 100644 index 00000000..21ed024e --- /dev/null +++ b/plugin-source/skills/evolve-lite/provenance/scripts/provenance.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 +"""Deterministic provenance plumbing for the evolve-lite provenance skill. + +This script does the *mechanical* half of provenance — the part that can be +made deterministic and tested end to end: + + * read ``recall`` rows from ``.evolve/audit.log``, + * skip ``(session_id, entity)`` pairs that already have an ``influence`` row, + * resolve each recalled entity file and the session's trajectory transcript, + * assemble candidate dicts the agent can judge. + +The *semantic* half — deciding whether a recalled guideline was ``followed``, +``contradicted`` or ``not_applicable`` — is an LLM judgment and is NOT done +here. There is deliberately no heuristic verdict: this module never invents a +verdict. The agent reads each candidate, judges it, and pipes the verdict back +through ``record`` (which delegates to ``log_influence.py``'s writer so the +audit-log format is identical). + +Two modes: + + candidates (default) — emit one JSONL candidate per unresolved + (session_id, entity) recall pair to stdout. + record — read a verdict JSON from stdin and append an + ``influence`` row via log_influence.py's writer. +""" + +import json +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) + +# Provenance reuses log_influence.py's writer + dedup so the audit-log format and +# the duplicate-suppression rule live in exactly one place. log_influence.py sits +# next to this file in the same skill scripts/ directory. +sys.path.insert(0, str(_script.parent)) + +from entity_io import get_evolve_dir, log as _log # noqa: E402 +import log_influence # noqa: E402 + +_ALLOWED_VERDICTS = log_influence._ALLOWED_VERDICTS + +# How many characters of the entity file / trajectory to surface in a candidate. +_ENTITY_EXCERPT_CHARS = 4000 +_TRAJECTORY_EXCERPT_CHARS = 4000 + + +def log(message): + _log("provenance", message) + + +# --------------------------------------------------------------------------- +# Trajectory locator (Task B) +# --------------------------------------------------------------------------- + + +def _claude_transcript_slug(root): + """Claude derives a project's transcript dir name by replacing every + non-alphanumeric character in the absolute project path with ``-``. + + e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen + + This mirrors ``_transcript_slug`` in the doctor skill + (skills/evolve-lite/doctor/scripts/doctor.py). The two are kept in sync by + hand because doctor and provenance ship as independent scripts that do not + import one another in the rendered tree; if you change one, change both. + """ + import re + + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def locate_trajectory(session_id, evolve_dir, *, project_root=None, home=None): + """Locate the saved trajectory transcript for ``session_id``. + + Resolution order (best-effort, returns the first hit or ``None``): + + 1. Legacy ``.evolve/trajectories/`` files: + * ``claude-transcript_.jsonl`` — stop-hook transcript dump. + * ``trajectory__.json`` — save-trajectory skill output; the sid + is the filename slice after the timestamp. + * ``trajectory_.json`` — open and match the inner ``session_id``. + 2. NEW native Claude transcript: ``~/.claude/projects//.jsonl`` + where ```` is the project root path slugified the way Claude does + (every non-alphanumeric char -> ``-``; see ``_claude_transcript_slug``). + + Native discovery makes provenance work in the hookless world where no + ``.evolve/trajectories/`` file is ever written. It is platform-neutral: + Bob/Codex keep their transcripts elsewhere, so the native step simply falls + through to ``None`` for them rather than misfiring. + """ + evolve_dir = Path(evolve_dir) + + # --- 1. Legacy .evolve/trajectories/ ------------------------------------ + traj_dir = evolve_dir / "trajectories" + if traj_dir.is_dir(): + direct = traj_dir / f"claude-transcript_{session_id}.jsonl" + if direct.is_file(): + return direct + + # trajectory__.json — match on the filename sid slice. + for path in sorted(traj_dir.glob("trajectory_*_*.json")): + stem = path.stem # trajectory__ + parts = stem.split("_", 2) + if len(parts) == 3 and parts[2] == session_id: + return path + + # trajectory_.json — open and match the inner session_id field. + for path in sorted(traj_dir.glob("trajectory_*.json")): + # Skip the _ shape already handled above. + if path.stem.count("_") >= 2: + continue + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + continue + if isinstance(data, dict) and data.get("session_id") == session_id: + return path + + # --- 2. Native Claude transcript ---------------------------------------- + # The project root is the parent of the .evolve dir; the home dir holds + # ~/.claude/projects//.jsonl. + root = Path(project_root) if project_root is not None else evolve_dir.resolve().parent + base = Path(home) if home is not None else Path.home() + slug = _claude_transcript_slug(root) + native = base / ".claude" / "projects" / slug / f"{session_id}.jsonl" + if native.is_file(): + return native + + return None + + +# --------------------------------------------------------------------------- +# Recall row reading + entity resolution (Task A — candidates) +# --------------------------------------------------------------------------- + + +def read_recall_rows(evolve_dir): + """Yield ``(session_id, [entity_id, ...])`` for every ``recall`` audit row. + + Rows with no ``session_id`` or an empty ``entities`` list are skipped. + """ + audit_log = Path(evolve_dir) / "audit.log" + if not audit_log.is_file(): + return [] + + rows = [] + for line in audit_log.read_text(encoding="utf-8").splitlines(): + if not line.strip(): + continue + try: + event = json.loads(line) + except json.JSONDecodeError: + continue + if not isinstance(event, dict) or event.get("event") != "recall": + continue + session_id = event.get("session_id") + entities = event.get("entities") + if not isinstance(session_id, str) or not session_id: + continue + if not isinstance(entities, list) or not entities: + continue + clean = [e for e in entities if isinstance(e, str) and e] + if clean: + rows.append((session_id, clean)) + return rows + + +def _read_entity(evolve_dir, entity_id): + """Return ``(path, excerpt)`` for an entity file, or ``(path, None)`` if + the file is missing. ``entity_id`` is a ``/`` id relative to + ``entities/`` (without ``.md``). + """ + entity_path = Path(evolve_dir) / "entities" / f"{entity_id}.md" + if not entity_path.is_file(): + return entity_path, None + try: + text = entity_path.read_text(encoding="utf-8") + except OSError: + return entity_path, None + return entity_path, text[:_ENTITY_EXCERPT_CHARS] + + +def _read_trajectory_excerpt(trajectory_path): + """Return a bounded text excerpt of the trajectory file, or ``None``.""" + if trajectory_path is None: + return None + try: + text = Path(trajectory_path).read_text(encoding="utf-8") + except OSError: + return None + return text[:_TRAJECTORY_EXCERPT_CHARS] + + +def build_candidates(evolve_dir, *, project_root=None, home=None): + """Assemble candidate dicts for every unresolved recall (session, entity). + + Returns a list of dicts shaped:: + + { + "session_id": ..., + "entity_id": "/", + "entity_excerpt": , + "trajectory_path": , + "trajectory_excerpt": , + "missing": ["entity"|"trajectory", ...], # only when non-empty + } + + ``(session_id, entity)`` pairs that already have an ``influence`` row are + skipped via ``log_influence.existing_influence_keys`` — the same dedup rule + used when influence rows are written. Candidates whose entity file or + trajectory cannot be found are still emitted with a ``missing`` list so the + gap is visible rather than silently dropped. + """ + evolve_dir = Path(evolve_dir) + existing = log_influence.existing_influence_keys(evolve_dir) + + candidates = [] + for session_id, entities in read_recall_rows(evolve_dir): + trajectory_path = locate_trajectory(session_id, evolve_dir, project_root=project_root, home=home) + for entity_id in entities: + if (session_id, entity_id) in existing: + continue + entity_path, entity_excerpt = _read_entity(evolve_dir, entity_id) + trajectory_excerpt = _read_trajectory_excerpt(trajectory_path) + + missing = [] + if entity_excerpt is None: + missing.append("entity") + if trajectory_path is None: + missing.append("trajectory") + + candidate = { + "session_id": session_id, + "entity_id": entity_id, + "entity_excerpt": entity_excerpt, + "trajectory_path": str(trajectory_path) if trajectory_path else None, + "trajectory_excerpt": trajectory_excerpt, + } + if missing: + candidate["missing"] = missing + candidates.append(candidate) + return candidates + + +# --------------------------------------------------------------------------- +# record (Task A — record) +# --------------------------------------------------------------------------- + + +def record_verdict(payload, evolve_dir=None): + """Append a single ``influence`` row from an agent verdict. + + ``payload`` is ``{session_id, entity, verdict, evidence}``. The verdict must + be one of ``followed|contradicted|not_applicable`` (the *semantic* judgment + stays agent-driven — this only persists what the agent decided). Writing is + delegated to ``log_influence.py`` so the audit-log row format and the + duplicate-suppression rule are not duplicated here. + + Returns the number of rows written (0 or 1). Raises ``ValueError`` on an + invalid payload / verdict. + """ + if not isinstance(payload, dict): + raise ValueError("verdict payload must be a JSON object") + + session_id = payload.get("session_id") + entity = payload.get("entity") + verdict = payload.get("verdict") + evidence = payload.get("evidence", "") + + if not isinstance(session_id, str) or not session_id: + raise ValueError("verdict payload must include a non-empty string session_id") + if not isinstance(entity, str) or not entity: + raise ValueError("verdict payload must include a non-empty string entity") + if verdict not in _ALLOWED_VERDICTS: + raise ValueError(f"verdict must be one of {sorted(_ALLOWED_VERDICTS)}, got {verdict!r}") + + if evolve_dir is None: + evolve_dir = get_evolve_dir().resolve() + evolve_dir = Path(evolve_dir) + + existing = log_influence.existing_influence_keys(evolve_dir) + if (session_id, entity) in existing: + log(f"Skipping duplicate influence verdict: session_id={session_id} entity={entity}") + return 0 + + if not isinstance(evidence, str): + evidence = str(evidence) + + log_influence.audit.append( + evolve_dir=str(evolve_dir), + event="influence", + session_id=session_id, + entity=entity, + verdict=verdict, + evidence=evidence, + ) + return 1 + + +# --------------------------------------------------------------------------- +# __main__ +# --------------------------------------------------------------------------- + + +def _run_candidates(): + evolve_dir = get_evolve_dir().resolve() + candidates = build_candidates(evolve_dir) + for candidate in candidates: + print(json.dumps(candidate)) + log(f"Emitted {len(candidates)} candidate(s) from {evolve_dir}") + + +def _run_record(): + try: + payload = json.load(sys.stdin) + except json.JSONDecodeError as exc: + log(f"Invalid JSON input: {exc}") + print(f"Error: invalid JSON input - {exc}", file=sys.stderr) + sys.exit(1) + + try: + written = record_verdict(payload) + except ValueError as exc: + log(f"Rejected verdict: {exc}") + print(f"Error: {exc}", file=sys.stderr) + sys.exit(1) + + log(f"Recorded {written} influence verdict(s).") + print(f"Recorded {written} influence verdict(s).") + + +def main(argv=None): + argv = list(sys.argv[1:] if argv is None else argv) + mode = argv[0] if argv else "candidates" + if mode == "candidates": + _run_candidates() + elif mode == "record": + _run_record() + else: + print(f"Error: unknown mode {mode!r}; expected 'candidates' or 'record'.", file=sys.stderr) + sys.exit(2) + + +if __name__ == "__main__": + main() diff --git a/tests/platform_integrations/test_provenance.py b/tests/platform_integrations/test_provenance.py new file mode 100644 index 00000000..39762df2 --- /dev/null +++ b/tests/platform_integrations/test_provenance.py @@ -0,0 +1,222 @@ +"""Tests for skills/evolve-lite/provenance/scripts/provenance.py. + +These exercise the rendered Claude provenance.py end to end (lib resolution only +works in the rendered tree). They cover the deterministic plumbing — recall-row +reading, entity resolution, the trajectory locator (BOTH legacy +``.evolve/trajectories/`` and the native ``~/.claude/projects//`` paths), +dedup against existing influence rows, and the ``record`` writer. The semantic +verdict is agent-driven and is NOT tested here (there is no heuristic to test). +""" + +import json +import os +import re +import subprocess +import sys +from pathlib import Path + +import pytest + +pytestmark = [pytest.mark.platform_integrations, pytest.mark.e2e] + +_REPO_ROOT = Path(__file__).parent.parent.parent +PROVENANCE_SCRIPT = _REPO_ROOT / "platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py" + + +def _claude_slug(root: Path) -> str: + """Mirror provenance.py / doctor.py slugging: non-alphanumerics -> '-'.""" + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def run_provenance(mode, *, evolve_dir, home=None, cwd=None, stdin=None): + env = {**os.environ} + env["EVOLVE_DIR"] = str(evolve_dir) + if home is not None: + env["HOME"] = str(home) + env["USERPROFILE"] = str(home) + return subprocess.run( + [sys.executable, str(PROVENANCE_SCRIPT), mode], + input=stdin, + capture_output=True, + text=True, + cwd=str(cwd) if cwd else None, + env=env, + check=False, + ) + + +def parse_jsonl(text): + return [json.loads(line) for line in text.splitlines() if line.strip()] + + +def read_audit(evolve_dir): + path = Path(evolve_dir) / "audit.log" + if not path.is_file(): + return [] + return [json.loads(line) for line in path.read_text(encoding="utf-8").splitlines() if line.strip()] + + +def write_audit(evolve_dir, rows): + path = Path(evolve_dir) / "audit.log" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("".join(json.dumps(r) + "\n" for r in rows), encoding="utf-8") + + +def write_entity(evolve_dir, entity_id, body="Do the foo thing."): + path = Path(evolve_dir) / "entities" / f"{entity_id}.md" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(f"---\ntype: {entity_id.split('/')[0]}\ntrigger: when foo\n---\n\n{body}\n", encoding="utf-8") + return path + + +class TestCandidatesLegacyTrajectory: + def test_resolves_entity_and_legacy_trajectory(self, tmp_path): + evolve_dir = tmp_path / "proj" / ".evolve" + evolve_dir.mkdir(parents=True) + write_audit(evolve_dir, [{"event": "recall", "session_id": "sid-1", "entities": ["feedback/foo"]}]) + write_entity(evolve_dir, "feedback/foo") + traj = evolve_dir / "trajectories" / "claude-transcript_sid-1.jsonl" + traj.parent.mkdir(parents=True) + traj.write_text('{"type":"user","content":"hi"}\n', encoding="utf-8") + + result = run_provenance("candidates", evolve_dir=evolve_dir) + assert result.returncode == 0, result.stderr + candidates = parse_jsonl(result.stdout) + assert len(candidates) == 1 + cand = candidates[0] + assert cand["session_id"] == "sid-1" + assert cand["entity_id"] == "feedback/foo" + assert "Do the foo thing." in cand["entity_excerpt"] + assert cand["trajectory_path"] == str(traj) + assert "hi" in cand["trajectory_excerpt"] + assert "missing" not in cand + + +class TestCandidatesNativeTranscript: + def test_locates_native_claude_transcript(self, tmp_path): + # Sandbox a fake HOME and project root; the native locator builds + # ~/.claude/projects//.jsonl from the RESOLVED project root. + home = tmp_path / "home" + evolve_dir = tmp_path / "proj" / ".evolve" + evolve_dir.mkdir(parents=True) + write_audit(evolve_dir, [{"event": "recall", "session_id": "nat-1", "entities": ["feedback/bar"]}]) + write_entity(evolve_dir, "feedback/bar", body="bar guidance") + + project_root = evolve_dir.resolve().parent + slug = _claude_slug(project_root) + native = home / ".claude" / "projects" / slug / "nat-1.jsonl" + native.parent.mkdir(parents=True) + native.write_text('{"x":1}\n', encoding="utf-8") + + result = run_provenance("candidates", evolve_dir=evolve_dir, home=home) + assert result.returncode == 0, result.stderr + candidates = parse_jsonl(result.stdout) + assert len(candidates) == 1 + cand = candidates[0] + assert cand["entity_id"] == "feedback/bar" + assert cand["trajectory_path"] == str(native) + assert "missing" not in cand + + +class TestCandidatesMissing: + def test_missing_trajectory_still_emitted(self, tmp_path): + # Empty HOME -> no native transcript, no legacy dir -> trajectory missing. + home = tmp_path / "home" + home.mkdir() + evolve_dir = tmp_path / "proj" / ".evolve" + evolve_dir.mkdir(parents=True) + write_audit(evolve_dir, [{"event": "recall", "session_id": "sid-x", "entities": ["feedback/foo"]}]) + write_entity(evolve_dir, "feedback/foo") + + result = run_provenance("candidates", evolve_dir=evolve_dir, home=home) + assert result.returncode == 0, result.stderr + candidates = parse_jsonl(result.stdout) + assert len(candidates) == 1 + assert candidates[0]["trajectory_path"] is None + assert candidates[0]["missing"] == ["trajectory"] + + def test_missing_entity_still_emitted(self, tmp_path): + home = tmp_path / "home" + evolve_dir = tmp_path / "proj" / ".evolve" + evolve_dir.mkdir(parents=True) + write_audit(evolve_dir, [{"event": "recall", "session_id": "sid-y", "entities": ["feedback/ghost"]}]) + traj = evolve_dir / "trajectories" / "claude-transcript_sid-y.jsonl" + traj.parent.mkdir(parents=True) + traj.write_text("{}\n", encoding="utf-8") + + result = run_provenance("candidates", evolve_dir=evolve_dir, home=home) + assert result.returncode == 0, result.stderr + candidates = parse_jsonl(result.stdout) + assert len(candidates) == 1 + assert candidates[0]["entity_excerpt"] is None + assert candidates[0]["missing"] == ["entity"] + + +class TestCandidatesDedup: + def test_skips_pairs_with_existing_influence_row(self, tmp_path): + evolve_dir = tmp_path / "proj" / ".evolve" + evolve_dir.mkdir(parents=True) + write_audit( + evolve_dir, + [ + {"event": "recall", "session_id": "sid-1", "entities": ["feedback/foo", "feedback/bar"]}, + {"event": "influence", "session_id": "sid-1", "entity": "feedback/foo", "verdict": "followed", "evidence": "x"}, + ], + ) + write_entity(evolve_dir, "feedback/foo") + write_entity(evolve_dir, "feedback/bar") + + result = run_provenance("candidates", evolve_dir=evolve_dir, home=tmp_path / "home") + assert result.returncode == 0, result.stderr + candidates = parse_jsonl(result.stdout) + ids = {c["entity_id"] for c in candidates} + # feedback/foo already assessed -> only feedback/bar remains. + assert ids == {"feedback/bar"} + + +class TestRecord: + def test_writes_valid_influence_row(self, tmp_path): + evolve_dir = tmp_path / "proj" / ".evolve" + evolve_dir.mkdir(parents=True) + payload = { + "session_id": "sid-1", + "entity": "feedback/foo", + "verdict": "followed", + "evidence": "Agent used the saved parser first.", + } + result = run_provenance("record", evolve_dir=evolve_dir, stdin=json.dumps(payload)) + assert result.returncode == 0, result.stderr + events = read_audit(evolve_dir) + assert len(events) == 1 + row = events[0] + assert row["event"] == "influence" + assert row["session_id"] == "sid-1" + assert row["entity"] == "feedback/foo" + assert row["verdict"] == "followed" + assert row["evidence"] == "Agent used the saved parser first." + assert "ts" in row + + def test_rejects_invalid_verdict(self, tmp_path): + evolve_dir = tmp_path / "proj" / ".evolve" + evolve_dir.mkdir(parents=True) + payload = {"session_id": "sid-1", "entity": "feedback/foo", "verdict": "bogus", "evidence": "no"} + result = run_provenance("record", evolve_dir=evolve_dir, stdin=json.dumps(payload)) + assert result.returncode == 1 + assert "verdict" in result.stderr.lower() + assert read_audit(evolve_dir) == [] + + def test_record_dedups_existing_pair(self, tmp_path): + evolve_dir = tmp_path / "proj" / ".evolve" + evolve_dir.mkdir(parents=True) + payload = {"session_id": "sid-1", "entity": "feedback/foo", "verdict": "followed", "evidence": "e"} + first = run_provenance("record", evolve_dir=evolve_dir, stdin=json.dumps(payload)) + second = run_provenance( + "record", + evolve_dir=evolve_dir, + stdin=json.dumps({**payload, "verdict": "contradicted", "evidence": "e2"}), + ) + assert first.returncode == 0, first.stderr + assert second.returncode == 0, second.stderr + events = read_audit(evolve_dir) + assert len(events) == 1 + assert events[0]["verdict"] == "followed" From e5110e42b33bed2cb35a1fdd27fd30d8e0247b84 Mon Sep 17 00:00:00 2001 From: Punleuk Oum <5661986+illeatmyhat@users.noreply.github.com> Date: Mon, 8 Jun 2026 13:40:24 -0700 Subject: [PATCH 07/12] test(platform-integrations): end-to-end chain test + run provenance/e2e guards in CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add test_end_to_end_claude.py: drives the REAL rendered Claude scripts as subprocesses in sequence (save->adapt->audit->provenance->record) and asserts the entity id stays identical across adapt_memory, audit_recall, and provenance, that the native-transcript locator resolves, and that record+dedup closes the loop. A second test asserts gaps are surfaced (missing entity/trajectory) not dropped. No production code needed — the chain closes as built. Also drop the e2e marker from test_provenance.py and the new chain test: CI runs pytest with the default '-m not llm and not e2e' filter, so e2e-marked tests never execute in CI. These are sandboxed and fast (no real CLI/network), so they belong in the default suite as guards — matching test_doctor/test_entity_io_core. Default suite: 242 -> 252. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../test_end_to_end_claude.py | 289 ++++++++++++++++++ .../platform_integrations/test_provenance.py | 2 +- 2 files changed, 290 insertions(+), 1 deletion(-) create mode 100644 tests/platform_integrations/test_end_to_end_claude.py diff --git a/tests/platform_integrations/test_end_to_end_claude.py b/tests/platform_integrations/test_end_to_end_claude.py new file mode 100644 index 00000000..2d68c3a0 --- /dev/null +++ b/tests/platform_integrations/test_end_to_end_claude.py @@ -0,0 +1,289 @@ +"""End-to-end data-flow test for the rendered Claude evolve-lite scripts. + +This is the ONE integration test that proves the correlation ids line up across +the whole chain on Claude — the integration that was broken in the pre-redesign +world (native transcript path vs. entity id) and the reason the hookless redesign +exists. It drives the REAL rendered Claude scripts as subprocesses, in sequence, +with nothing mocked in the data flow: + + adapt_memory.py -> mirrors a native memory into the evolve store, emitting + the entity id ``feedback/prefer-ripgrep``. + audit_recall.py -> records a ``recall`` row keyed by that exact entity id + and the host session id. + provenance.py -> reads the recall row, resolves the mirrored entity AND + the NATIVE Claude transcript, and emits exactly one + candidate whose ids line up end to end. + provenance.py -> records a ``followed`` verdict, then dedups the pair. + +Lib resolution (``lib/evolve-lite/entity_io.py``) only works in the rendered +tree, so we point at the rendered Claude copies under ``platform-integrations/``. + +The scripts are driven as real subprocesses (closest to actual agent usage); +nothing in the data flow is mocked. +""" + +import json +import os +import re +import subprocess +import sys +from pathlib import Path + +import pytest + +pytestmark = [pytest.mark.platform_integrations] + +_REPO_ROOT = Path(__file__).parent.parent.parent +_PLUGIN = _REPO_ROOT / "platform-integrations/claude/plugins/evolve-lite" +ADAPT_SCRIPT = _PLUGIN / "skills/evolve-lite/adapt-memory/scripts/adapt_memory.py" +AUDIT_SCRIPT = _PLUGIN / "scripts/audit_recall.py" +PROVENANCE_SCRIPT = _PLUGIN / "skills/evolve-lite/provenance/scripts/provenance.py" + +SID = "claude-e2e-session-0001" + +NATIVE_MEMORY = """\ +--- +name: prefer-ripgrep +description: use ripgrep over grep +metadata: + type: feedback +--- +Always reach for ripgrep (rg) instead of grep. +""" + + +def _claude_slug(root: Path) -> str: + """Mirror provenance.py / doctor.py slugging: non-alphanumerics -> '-'.""" + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def _run(script: Path, *args, evolve_dir: Path, home: Path, cwd: Path, stdin=None, sid=None): + """Run a rendered Claude script as a real subprocess in the sandbox. + + Every host path is sandboxed: ``$EVOLVE_DIR`` points at the temp store, + ``$HOME``/``$USERPROFILE`` at a sandboxed home, cwd at the temp project root, + and ``$CLAUDE_CODE_SESSION_ID`` at a known SID when supplied. + """ + env = {**os.environ} + env["EVOLVE_DIR"] = str(evolve_dir) + env["HOME"] = str(home) + env["USERPROFILE"] = str(home) + env.pop("HOMEDRIVE", None) + env.pop("HOMEPATH", None) + if sid is not None: + env["CLAUDE_CODE_SESSION_ID"] = sid + else: + env.pop("CLAUDE_CODE_SESSION_ID", None) + return subprocess.run( + [sys.executable, str(script), *args], + input=stdin, + capture_output=True, + text=True, + cwd=str(cwd), + env=env, + check=False, + ) + + +def _parse_jsonl(text: str): + return [json.loads(line) for line in text.splitlines() if line.strip()] + + +def _read_audit(evolve_dir: Path): + path = evolve_dir / "audit.log" + if not path.is_file(): + return [] + return _parse_jsonl(path.read_text(encoding="utf-8")) + + +@pytest.fixture +def sandbox(tmp_path, sandbox_home): + """Build the sandbox dirs the chain needs and return the salient paths. + + ``sandbox_home`` (autouse) already redirects ``$HOME``; we reuse it as the + home that holds the native Claude transcript tree. The project root lives + under tmp_path with its own ``.evolve`` store, kept separate from HOME so + the native-transcript slug (derived from the project root) is exercised for + real. + """ + project_root = tmp_path / "proj" + project_root.mkdir() + evolve_dir = project_root / ".evolve" + evolve_dir.mkdir() + return { + "home": sandbox_home, + "project_root": project_root, + "evolve_dir": evolve_dir, + } + + +def test_chain_closes_ids_line_up(sandbox): + """The whole chain closes: the entity adapt() creates is the entity audit() + records is the entity provenance() resolves against the native transcript. + + Steps (each runs the real rendered script as a subprocess): + 1. save — write the native Claude memory file. + 2. adapt — mirror it; assert entities/feedback/prefer-ripgrep.md exists and + the printed entity id is ``feedback/prefer-ripgrep``. + 3. audit — record a recall row for that exact entity id under the SID. + 4. native transcript — drop ~/.claude/projects//.jsonl. + 5. candidates — assert EXACTLY ONE candidate whose entity_id == + ``feedback/prefer-ripgrep``, whose excerpt holds the mirrored + content, whose trajectory_path is the native transcript, with + NO ``missing`` field (entity + trajectory both resolved). This + is the id-alignment assertion. + 6. record + dedup — pipe a ``followed`` verdict; assert an influence row is + appended; re-run candidates and assert it's now empty. + """ + home = sandbox["home"] + project_root = sandbox["project_root"] + evolve_dir = sandbox["evolve_dir"] + + # --- 1. save: native memory file (Claude format) ------------------------ + native_file = project_root / "native_memory.md" + native_file.write_text(NATIVE_MEMORY, encoding="utf-8") + + # --- 2. adapt: mirror native memory into the evolve store --------------- + adapt = _run( + ADAPT_SCRIPT, + str(native_file), + "--type", + "feedback", + "--trigger", + "when searching code, prefer ripgrep", + evolve_dir=evolve_dir, + home=home, + cwd=project_root, + ) + assert adapt.returncode == 0, adapt.stderr + + mirrored = evolve_dir / "entities" / "feedback" / "prefer-ripgrep.md" + assert mirrored.is_file(), f"adapt did not mirror the entity: {adapt.stdout}\n{adapt.stderr}" + + # Capture the entity id from adapt's stdout ("Entity id: "). + id_lines = [ln for ln in adapt.stdout.splitlines() if ln.startswith("Entity id:")] + assert id_lines, f"adapt did not print an entity id:\n{adapt.stdout}" + adapted_entity_id = id_lines[0].split("Entity id:", 1)[1].strip() + assert adapted_entity_id == "feedback/prefer-ripgrep" + + # --- 3. audit: record a recall row for that exact entity id ------------- + audit = _run( + AUDIT_SCRIPT, + adapted_entity_id, # exactly as EVOLVE.md instructs the agent to pass it + evolve_dir=evolve_dir, + home=home, + cwd=project_root, + sid=SID, + ) + assert audit.returncode == 0, audit.stderr + + recall_rows = [r for r in _read_audit(evolve_dir) if r.get("event") == "recall"] + assert len(recall_rows) == 1, _read_audit(evolve_dir) + assert recall_rows[0]["session_id"] == SID + assert recall_rows[0]["entities"] == ["feedback/prefer-ripgrep"] + + # --- 4. native transcript fixture --------------------------------------- + slug = _claude_slug(project_root) + native_transcript = home / ".claude" / "projects" / slug / f"{SID}.jsonl" + native_transcript.parent.mkdir(parents=True) + native_transcript.write_text( + '{"type":"user","message":{"role":"user","content":"search the repo for TODOs"}}\n' + '{"type":"assistant","message":{"role":"assistant","content":"Using rg to search."}}\n', + encoding="utf-8", + ) + + # --- 5. candidates: the id-alignment assertion -------------------------- + cand_result = _run( + PROVENANCE_SCRIPT, + "candidates", + evolve_dir=evolve_dir, + home=home, + cwd=project_root, + ) + assert cand_result.returncode == 0, cand_result.stderr + candidates = _parse_jsonl(cand_result.stdout) + assert len(candidates) == 1, f"expected exactly one candidate, got: {candidates}" + cand = candidates[0] + + # KEY ASSERTION: the entity adapt() created == the entity audit() recorded + # == the entity provenance() resolved, and the native transcript located by + # the resolved project-root slug lines up with the audited session id. + assert cand["session_id"] == SID + assert cand["entity_id"] == adapted_entity_id == "feedback/prefer-ripgrep" + assert "Always reach for ripgrep (rg) instead of grep." in cand["entity_excerpt"] + assert cand["trajectory_path"] == str(native_transcript) + assert "rg to search" in cand["trajectory_excerpt"] + assert "missing" not in cand, f"chain did not fully resolve: {cand}" + + # --- 6. record a verdict, then assert dedup ----------------------------- + verdict = { + "session_id": SID, + "entity": adapted_entity_id, + "verdict": "followed", + "evidence": "Assistant used rg (ripgrep) to search the repo.", + } + record = _run( + PROVENANCE_SCRIPT, + "record", + evolve_dir=evolve_dir, + home=home, + cwd=project_root, + stdin=json.dumps(verdict), + ) + assert record.returncode == 0, record.stderr + + influence_rows = [r for r in _read_audit(evolve_dir) if r.get("event") == "influence"] + assert len(influence_rows) == 1, _read_audit(evolve_dir) + assert influence_rows[0]["session_id"] == SID + assert influence_rows[0]["entity"] == "feedback/prefer-ripgrep" + assert influence_rows[0]["verdict"] == "followed" + + # Re-run candidates: the judged pair is deduped -> nothing left. + cand_again = _run( + PROVENANCE_SCRIPT, + "candidates", + evolve_dir=evolve_dir, + home=home, + cwd=project_root, + ) + assert cand_again.returncode == 0, cand_again.stderr + assert _parse_jsonl(cand_again.stdout) == [], cand_again.stdout + + +def test_candidates_surface_gaps_when_nothing_lines_up(sandbox): + """Negative/robustness: when the audited entity id was NEVER mirrored AND no + transcript exists, the candidate is still emitted with ``missing`` listing + BOTH ``entity`` and ``trajectory`` — the chain surfaces gaps instead of + silently dropping them. + """ + home = sandbox["home"] + project_root = sandbox["project_root"] + evolve_dir = sandbox["evolve_dir"] + + # Record a recall for an entity id that was never adapted/mirrored, with no + # native transcript on disk for the session. + audit = _run( + AUDIT_SCRIPT, + "feedback/does-not-exist", + evolve_dir=evolve_dir, + home=home, + cwd=project_root, + sid="ghost-session-0002", + ) + assert audit.returncode == 0, audit.stderr + + cand_result = _run( + PROVENANCE_SCRIPT, + "candidates", + evolve_dir=evolve_dir, + home=home, + cwd=project_root, + ) + assert cand_result.returncode == 0, cand_result.stderr + candidates = _parse_jsonl(cand_result.stdout) + assert len(candidates) == 1, candidates + cand = candidates[0] + assert cand["entity_id"] == "feedback/does-not-exist" + assert cand["entity_excerpt"] is None + assert cand["trajectory_path"] is None + assert set(cand["missing"]) == {"entity", "trajectory"} diff --git a/tests/platform_integrations/test_provenance.py b/tests/platform_integrations/test_provenance.py index 39762df2..5636f584 100644 --- a/tests/platform_integrations/test_provenance.py +++ b/tests/platform_integrations/test_provenance.py @@ -17,7 +17,7 @@ import pytest -pytestmark = [pytest.mark.platform_integrations, pytest.mark.e2e] +pytestmark = [pytest.mark.platform_integrations] _REPO_ROOT = Path(__file__).parent.parent.parent PROVENANCE_SCRIPT = _REPO_ROOT / "platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py" From 37f7ba4ad5e639a78c7e7da209326c20668345d7 Mon Sep 17 00:00:00 2001 From: Punleuk Oum <5661986+illeatmyhat@users.noreply.github.com> Date: Tue, 9 Jun 2026 05:00:18 -0700 Subject: [PATCH 08/12] refactor(platform-integrations): ship audit_recall.py from lib/ not a top-level scripts/ dir audit_recall.py is a self-contained, model-invoked executable; it lived in a new top-level plugin-source/scripts/ dir that existed only for this one file. Move it next to the shared lib (plugin-source/lib/ -> lib/evolve-lite/ on every host) so it ships alongside entity_io/audit/config instead of carving out a parallel scripts/ tree. The installed, model-facing path is UNCHANGED: the installer still drops it at ~/.{claude,codex,bob}/evolve-lite/audit_recall.py (no lib/ segment) and EVOLVE.md still invokes it there. Only the rendered SOURCE location moved; installer source paths and three test path constants updated to match. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../{scripts => lib/evolve-lite}/audit_recall.py | 0 .../{scripts => lib/evolve-lite}/audit_recall.py | 0 .../{scripts => lib/evolve-lite}/audit_recall.py | 0 .../{scripts => lib/evolve-lite}/audit_recall.py | 0 platform-integrations/install.sh | 12 ++++++------ plugin-source/{scripts => lib}/audit_recall.py | 0 tests/platform_integrations/test_audit_recall.py | 2 +- tests/platform_integrations/test_codex.py | 5 +++-- .../platform_integrations/test_end_to_end_claude.py | 2 +- 9 files changed, 11 insertions(+), 10 deletions(-) rename platform-integrations/bob/evolve-lite/{scripts => lib/evolve-lite}/audit_recall.py (100%) rename platform-integrations/claude/plugins/evolve-lite/{scripts => lib/evolve-lite}/audit_recall.py (100%) rename platform-integrations/claw-code/plugins/evolve-lite/{scripts => lib/evolve-lite}/audit_recall.py (100%) rename platform-integrations/codex/plugins/evolve-lite/{scripts => lib/evolve-lite}/audit_recall.py (100%) rename plugin-source/{scripts => lib}/audit_recall.py (100%) diff --git a/platform-integrations/bob/evolve-lite/scripts/audit_recall.py b/platform-integrations/bob/evolve-lite/lib/evolve-lite/audit_recall.py similarity index 100% rename from platform-integrations/bob/evolve-lite/scripts/audit_recall.py rename to platform-integrations/bob/evolve-lite/lib/evolve-lite/audit_recall.py diff --git a/platform-integrations/claude/plugins/evolve-lite/scripts/audit_recall.py b/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/audit_recall.py similarity index 100% rename from platform-integrations/claude/plugins/evolve-lite/scripts/audit_recall.py rename to platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/audit_recall.py diff --git a/platform-integrations/claw-code/plugins/evolve-lite/scripts/audit_recall.py b/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/audit_recall.py similarity index 100% rename from platform-integrations/claw-code/plugins/evolve-lite/scripts/audit_recall.py rename to platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/audit_recall.py diff --git a/platform-integrations/codex/plugins/evolve-lite/scripts/audit_recall.py b/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/audit_recall.py similarity index 100% rename from platform-integrations/codex/plugins/evolve-lite/scripts/audit_recall.py rename to platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/audit_recall.py diff --git a/platform-integrations/install.sh b/platform-integrations/install.sh index 69f3b29f..bc042a8b 100755 --- a/platform-integrations/install.sh +++ b/platform-integrations/install.sh @@ -816,9 +816,9 @@ class BobInstaller: # install the script once at that GLOBAL absolute path (matching # the always-global rules file). Prefer the rendered bob copy; # fall back to the shared plugin-source original. - audit_src = bob_source_lite / "scripts" / AUDIT_SCRIPT + audit_src = bob_source_lite / "lib" / "evolve-lite" / AUDIT_SCRIPT if not self.ops.is_dry_run and not audit_src.is_file(): - audit_src = Path(source_dir) / "plugin-source" / "scripts" / AUDIT_SCRIPT + audit_src = Path(source_dir) / "plugin-source" / "lib" / AUDIT_SCRIPT audit_file = self._audit_script_file() if not self.ops.is_dry_run: self.ops.atomic_write_text(audit_file, audit_src.read_text()) @@ -950,9 +950,9 @@ class ClaudeInstaller: # `~/.claude/evolve-lite/audit_recall.py`, so install it at that GLOBAL # absolute path (mirroring CodexInstaller). Prefer the rendered claude # copy; fall back to the shared plugin-source original. - audit_src = plugin_source / "scripts" / AUDIT_SCRIPT + audit_src = plugin_source / "lib" / "evolve-lite" / AUDIT_SCRIPT if not audit_src.is_file(): - audit_src = Path(source_dir) / "plugin-source" / "scripts" / AUDIT_SCRIPT + audit_src = Path(source_dir) / "plugin-source" / "lib" / AUDIT_SCRIPT audit_text = "" if self.ops.is_dry_run and not audit_src.is_file() else audit_src.read_text() audit_file = Path.home() / ".claude" / "evolve-lite" / AUDIT_SCRIPT self.ops.atomic_write_text(audit_file, audit_text) @@ -1177,9 +1177,9 @@ class CodexInstaller: # install the script at that GLOBAL absolute path (matching how the # always-on instructions live globally). Prefer the rendered codex # copy; fall back to the shared plugin-source original. - audit_src = plugin_source / "scripts" / AUDIT_SCRIPT + audit_src = plugin_source / "lib" / "evolve-lite" / AUDIT_SCRIPT if not audit_src.is_file(): - audit_src = Path(source_dir) / "plugin-source" / "scripts" / AUDIT_SCRIPT + audit_src = Path(source_dir) / "plugin-source" / "lib" / AUDIT_SCRIPT audit_text = "" if self.ops.is_dry_run and not audit_src.is_file() else audit_src.read_text() audit_file = Path.home() / ".codex" / "evolve-lite" / AUDIT_SCRIPT self.ops.atomic_write_text(audit_file, audit_text) diff --git a/plugin-source/scripts/audit_recall.py b/plugin-source/lib/audit_recall.py similarity index 100% rename from plugin-source/scripts/audit_recall.py rename to plugin-source/lib/audit_recall.py diff --git a/tests/platform_integrations/test_audit_recall.py b/tests/platform_integrations/test_audit_recall.py index c3bf73c4..66e23610 100644 --- a/tests/platform_integrations/test_audit_recall.py +++ b/tests/platform_integrations/test_audit_recall.py @@ -14,7 +14,7 @@ import pytest -_SCRIPT = Path(__file__).parent.parent.parent / "plugin-source" / "scripts" / "audit_recall.py" +_SCRIPT = Path(__file__).parent.parent.parent / "plugin-source" / "lib" / "audit_recall.py" def _run(cwd, args, env_overrides): diff --git a/tests/platform_integrations/test_codex.py b/tests/platform_integrations/test_codex.py index 47dec218..2f5a7440 100644 --- a/tests/platform_integrations/test_codex.py +++ b/tests/platform_integrations/test_codex.py @@ -70,8 +70,9 @@ def test_install_creates_expected_files( file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "learn" / "scripts" / "save_entities.py") file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "recall" / "scripts" / "retrieve_entities.py") file_assertions.assert_file_exists(plugin_dir / "lib" / "evolve-lite" / "entity_io.py") - # The recall-audit script ships in the plugin tree too (root-level scripts/). - file_assertions.assert_file_exists(plugin_dir / "scripts" / "audit_recall.py") + # The recall-audit script ships in the plugin tree too, alongside the + # shared lib (lib/evolve-lite/). + file_assertions.assert_file_exists(plugin_dir / "lib" / "evolve-lite" / "audit_recall.py") marketplace_path = temp_project_dir / ".agents" / "plugins" / "marketplace.json" file_assertions.assert_valid_json(marketplace_path) diff --git a/tests/platform_integrations/test_end_to_end_claude.py b/tests/platform_integrations/test_end_to_end_claude.py index 2d68c3a0..30f527a4 100644 --- a/tests/platform_integrations/test_end_to_end_claude.py +++ b/tests/platform_integrations/test_end_to_end_claude.py @@ -36,7 +36,7 @@ _REPO_ROOT = Path(__file__).parent.parent.parent _PLUGIN = _REPO_ROOT / "platform-integrations/claude/plugins/evolve-lite" ADAPT_SCRIPT = _PLUGIN / "skills/evolve-lite/adapt-memory/scripts/adapt_memory.py" -AUDIT_SCRIPT = _PLUGIN / "scripts/audit_recall.py" +AUDIT_SCRIPT = _PLUGIN / "lib/evolve-lite/audit_recall.py" PROVENANCE_SCRIPT = _PLUGIN / "skills/evolve-lite/provenance/scripts/provenance.py" SID = "claude-e2e-session-0001" From eebdfc0605b218d3427a0668112b6689c471ab24 Mon Sep 17 00:00:00 2001 From: Punleuk Oum <5661986+illeatmyhat@users.noreply.github.com> Date: Tue, 9 Jun 2026 05:16:11 -0700 Subject: [PATCH 09/12] fix(platform-integrations): address CodeRabbit review on PR #266 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - entity_io.write_entity_file: sanitize the explicit `filename` arg through slugify() to harden against path traversal (.., /, \ collapse to a safe single segment); slugify is idempotent on already-slugged input so all existing callers/tests stay green. - provenance.read_recall_rows: fix docstring — it returns a list, not a generator, so "Yield ..." becomes "Return a list of ... tuples ...". - EVOLVE.md.j2: add `bash` language to the bare fenced audit_recall command blocks so all rendered platform EVOLVE.md files get a fenced language. Co-Authored-By: Claude Opus 4.8 (1M context) --- platform-integrations/bob/evolve-lite/EVOLVE.md | 2 +- .../bob/evolve-lite/lib/evolve-lite/entity_io.py | 2 +- .../skills/evolve-lite-provenance/scripts/provenance.py | 2 +- platform-integrations/claude/plugins/evolve-lite/EVOLVE.md | 2 +- .../claude/plugins/evolve-lite/lib/evolve-lite/entity_io.py | 2 +- .../skills/evolve-lite/provenance/scripts/provenance.py | 2 +- platform-integrations/claw-code/plugins/evolve-lite/EVOLVE.md | 2 +- .../plugins/evolve-lite/lib/evolve-lite/entity_io.py | 2 +- .../skills/evolve-lite/provenance/scripts/provenance.py | 2 +- platform-integrations/codex/plugins/evolve-lite/EVOLVE.md | 2 +- .../codex/plugins/evolve-lite/lib/evolve-lite/entity_io.py | 2 +- .../skills/evolve-lite/provenance/scripts/provenance.py | 2 +- plugin-source/EVOLVE.md.j2 | 4 ++-- plugin-source/lib/entity_io.py | 2 +- .../skills/evolve-lite/provenance/scripts/provenance.py | 2 +- 15 files changed, 16 insertions(+), 16 deletions(-) diff --git a/platform-integrations/bob/evolve-lite/EVOLVE.md b/platform-integrations/bob/evolve-lite/EVOLVE.md index 94073d4c..a85f2ed7 100644 --- a/platform-integrations/bob/evolve-lite/EVOLVE.md +++ b/platform-integrations/bob/evolve-lite/EVOLVE.md @@ -23,7 +23,7 @@ function, command, or flag, verify it still exists before relying on it. After recall, log which entries you actually opened, so the value of this memory can be measured over time. Run: -``` +```bash python3 ~/.bob/evolve-lite/audit_recall.py [ ...] ``` diff --git a/platform-integrations/bob/evolve-lite/lib/evolve-lite/entity_io.py b/platform-integrations/bob/evolve-lite/lib/evolve-lite/entity_io.py index 8887caf0..0d4ccace 100644 --- a/platform-integrations/bob/evolve-lite/lib/evolve-lite/entity_io.py +++ b/platform-integrations/bob/evolve-lite/lib/evolve-lite/entity_io.py @@ -382,7 +382,7 @@ def write_entity_file(directory, entity, filename=None, overwrite=False): type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) - slug = filename if filename else slugify(entity.get("content", "entity")) + slug = slugify(filename) if filename else slugify(entity.get("content", "entity")) content = entity_to_markdown(entity) # Write to a unique temp file first (avoids predictable .tmp collisions) diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/scripts/provenance.py b/platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/scripts/provenance.py index 21ed024e..c2272501 100644 --- a/platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/scripts/provenance.py +++ b/platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/scripts/provenance.py @@ -148,7 +148,7 @@ def locate_trajectory(session_id, evolve_dir, *, project_root=None, home=None): def read_recall_rows(evolve_dir): - """Yield ``(session_id, [entity_id, ...])`` for every ``recall`` audit row. + """Return a list of ``(session_id, [entity_id, ...])`` tuples for every ``recall`` audit row. Rows with no ``session_id`` or an empty ``entities`` list are skipped. """ diff --git a/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md b/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md index 5b3ff387..fbc810fe 100644 --- a/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md +++ b/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md @@ -18,7 +18,7 @@ once per memory you saved. After you read or consult native memories this turn, log which ones you actually opened, so the value of this memory can be measured over time. Run: -``` +```bash python3 ~/.claude/evolve-lite/audit_recall.py [ ...] ``` diff --git a/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/entity_io.py b/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/entity_io.py index 8887caf0..0d4ccace 100644 --- a/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/entity_io.py +++ b/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/entity_io.py @@ -382,7 +382,7 @@ def write_entity_file(directory, entity, filename=None, overwrite=False): type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) - slug = filename if filename else slugify(entity.get("content", "entity")) + slug = slugify(filename) if filename else slugify(entity.get("content", "entity")) content = entity_to_markdown(entity) # Write to a unique temp file first (avoids predictable .tmp collisions) diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py index 21ed024e..c2272501 100644 --- a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py @@ -148,7 +148,7 @@ def locate_trajectory(session_id, evolve_dir, *, project_root=None, home=None): def read_recall_rows(evolve_dir): - """Yield ``(session_id, [entity_id, ...])`` for every ``recall`` audit row. + """Return a list of ``(session_id, [entity_id, ...])`` tuples for every ``recall`` audit row. Rows with no ``session_id`` or an empty ``entities`` list are skipped. """ diff --git a/platform-integrations/claw-code/plugins/evolve-lite/EVOLVE.md b/platform-integrations/claw-code/plugins/evolve-lite/EVOLVE.md index 7b6417f1..3192ad3c 100644 --- a/platform-integrations/claw-code/plugins/evolve-lite/EVOLVE.md +++ b/platform-integrations/claw-code/plugins/evolve-lite/EVOLVE.md @@ -23,7 +23,7 @@ function, command, or flag, verify it still exists before relying on it. After recall, log which entries you actually opened, so the value of this memory can be measured over time. Run: -``` +```bash python3 ~/.claw/evolve-lite/audit_recall.py [ ...] ``` diff --git a/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/entity_io.py b/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/entity_io.py index 8887caf0..0d4ccace 100644 --- a/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/entity_io.py +++ b/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/entity_io.py @@ -382,7 +382,7 @@ def write_entity_file(directory, entity, filename=None, overwrite=False): type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) - slug = filename if filename else slugify(entity.get("content", "entity")) + slug = slugify(filename) if filename else slugify(entity.get("content", "entity")) content = entity_to_markdown(entity) # Write to a unique temp file first (avoids predictable .tmp collisions) diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py index 21ed024e..c2272501 100644 --- a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py @@ -148,7 +148,7 @@ def locate_trajectory(session_id, evolve_dir, *, project_root=None, home=None): def read_recall_rows(evolve_dir): - """Yield ``(session_id, [entity_id, ...])`` for every ``recall`` audit row. + """Return a list of ``(session_id, [entity_id, ...])`` tuples for every ``recall`` audit row. Rows with no ``session_id`` or an empty ``entities`` list are skipped. """ diff --git a/platform-integrations/codex/plugins/evolve-lite/EVOLVE.md b/platform-integrations/codex/plugins/evolve-lite/EVOLVE.md index c262f48f..0c6c99e4 100644 --- a/platform-integrations/codex/plugins/evolve-lite/EVOLVE.md +++ b/platform-integrations/codex/plugins/evolve-lite/EVOLVE.md @@ -23,7 +23,7 @@ function, command, or flag, verify it still exists before relying on it. After recall, log which entries you actually opened, so the value of this memory can be measured over time. Run: -``` +```bash python3 ~/.codex/evolve-lite/audit_recall.py [ ...] ``` diff --git a/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/entity_io.py b/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/entity_io.py index 8887caf0..0d4ccace 100644 --- a/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/entity_io.py +++ b/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/entity_io.py @@ -382,7 +382,7 @@ def write_entity_file(directory, entity, filename=None, overwrite=False): type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) - slug = filename if filename else slugify(entity.get("content", "entity")) + slug = slugify(filename) if filename else slugify(entity.get("content", "entity")) content = entity_to_markdown(entity) # Write to a unique temp file first (avoids predictable .tmp collisions) diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py index 21ed024e..c2272501 100644 --- a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py +++ b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py @@ -148,7 +148,7 @@ def locate_trajectory(session_id, evolve_dir, *, project_root=None, home=None): def read_recall_rows(evolve_dir): - """Yield ``(session_id, [entity_id, ...])`` for every ``recall`` audit row. + """Return a list of ``(session_id, [entity_id, ...])`` tuples for every ``recall`` audit row. Rows with no ``session_id`` or an empty ``entities`` list are skipped. """ diff --git a/plugin-source/EVOLVE.md.j2 b/plugin-source/EVOLVE.md.j2 index b6d3b3a9..a7597948 100644 --- a/plugin-source/EVOLVE.md.j2 +++ b/plugin-source/EVOLVE.md.j2 @@ -20,7 +20,7 @@ once per memory you saved. After you read or consult native memories this turn, log which ones you actually opened, so the value of this memory can be measured over time. Run: -``` +```bash python3 {{ audit_script }} [ ...] ``` @@ -57,7 +57,7 @@ function, command, or flag, verify it still exists before relying on it. After recall, log which entries you actually opened, so the value of this memory can be measured over time. Run: -``` +```bash python3 {{ audit_script }} [ ...] ``` diff --git a/plugin-source/lib/entity_io.py b/plugin-source/lib/entity_io.py index 8887caf0..0d4ccace 100644 --- a/plugin-source/lib/entity_io.py +++ b/plugin-source/lib/entity_io.py @@ -382,7 +382,7 @@ def write_entity_file(directory, entity, filename=None, overwrite=False): type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) - slug = filename if filename else slugify(entity.get("content", "entity")) + slug = slugify(filename) if filename else slugify(entity.get("content", "entity")) content = entity_to_markdown(entity) # Write to a unique temp file first (avoids predictable .tmp collisions) diff --git a/plugin-source/skills/evolve-lite/provenance/scripts/provenance.py b/plugin-source/skills/evolve-lite/provenance/scripts/provenance.py index 21ed024e..c2272501 100644 --- a/plugin-source/skills/evolve-lite/provenance/scripts/provenance.py +++ b/plugin-source/skills/evolve-lite/provenance/scripts/provenance.py @@ -148,7 +148,7 @@ def locate_trajectory(session_id, evolve_dir, *, project_root=None, home=None): def read_recall_rows(evolve_dir): - """Yield ``(session_id, [entity_id, ...])`` for every ``recall`` audit row. + """Return a list of ``(session_id, [entity_id, ...])`` tuples for every ``recall`` audit row. Rows with no ``session_id`` or an empty ``entities`` list are skipped. """ From 63176c6afec832d1b4f76f1aa3a6fded671d1372 Mon Sep 17 00:00:00 2001 From: Punleuk Oum <5661986+illeatmyhat@users.noreply.github.com> Date: Tue, 9 Jun 2026 05:23:19 -0700 Subject: [PATCH 10/12] fix(platform-integrations): scope Claude-only doctor skill out of codex/bob plugins doctor's @import-canary diagnostic greps ~/.claude transcripts and is Claude-specific, so it's meaningless on codex/bob (Codex uses an ~/.codex/AGENTS.md pointer); exclude it from those plugins (and bob's auto-generated command), addressing CodeRabbit's Critical on PR #266. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../commands/evolve-lite-doctor.md | 4 - .../skills/evolve-lite-doctor/SKILL.md | 12 -- .../evolve-lite-doctor/scripts/doctor.py | 188 ------------------ .../skills/evolve-lite/doctor/SKILL.md | 12 -- .../evolve-lite/doctor/scripts/doctor.py | 188 ------------------ plugin-source/build_plugins.py | 22 +- .../test_build_pipeline.py | 19 +- 7 files changed, 33 insertions(+), 412 deletions(-) delete mode 100644 platform-integrations/bob/evolve-lite/commands/evolve-lite-doctor.md delete mode 100644 platform-integrations/bob/evolve-lite/skills/evolve-lite-doctor/SKILL.md delete mode 100644 platform-integrations/bob/evolve-lite/skills/evolve-lite-doctor/scripts/doctor.py delete mode 100644 platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md delete mode 100644 platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py diff --git a/platform-integrations/bob/evolve-lite/commands/evolve-lite-doctor.md b/platform-integrations/bob/evolve-lite/commands/evolve-lite-doctor.md deleted file mode 100644 index 2320c2ba..00000000 --- a/platform-integrations/bob/evolve-lite/commands/evolve-lite-doctor.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -description: Diagnose evolve health on Claude — verify the CLAUDE.md @import is actually loading the thin EVOLVE.md into sessions ---- -Use the `evolve-lite-doctor` skill on the current conversation. Follow the skill's instructions exactly. diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-doctor/SKILL.md b/platform-integrations/bob/evolve-lite/skills/evolve-lite-doctor/SKILL.md deleted file mode 100644 index 4a29034e..00000000 --- a/platform-integrations/bob/evolve-lite/skills/evolve-lite-doctor/SKILL.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -name: evolve-lite:doctor -description: Diagnose evolve health on Claude — verify the CLAUDE.md @import is actually loading the thin EVOLVE.md into sessions ---- - -# Doctor - -This skill diagnoses Claude's `@import` delivery of evolve's thin EVOLVE.md. It -is specific to Claude (where evolve loads via a per-project import that can be -silently declined) and is a **no-op on this platform** — here EVOLVE.md is -always-on and there is no import-approval gate to check. Nothing to run. - diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-doctor/scripts/doctor.py b/platform-integrations/bob/evolve-lite/skills/evolve-lite-doctor/scripts/doctor.py deleted file mode 100644 index 2c2a5382..00000000 --- a/platform-integrations/bob/evolve-lite/skills/evolve-lite-doctor/scripts/doctor.py +++ /dev/null @@ -1,188 +0,0 @@ -#!/usr/bin/env python3 -""" -Doctor Script (Claude-only diagnostic) - -On Claude, evolve is delivered by a single ``@.evolve/EVOLVE.md`` import line in -the repo's ``./CLAUDE.md``. That import requires a one-time, per-project -"allow external imports" approval. If the user declines it (even once, in a past -session) Claude silently disables the import forever — the thin EVOLVE.md never -loads and evolve becomes a no-op with NO error. - -Claude's internal approval flag is undocumented and unreliable to read, so this -script detects delivery *empirically*: the installed thin EVOLVE.md carries a -canary token (``EVOLVE_IMPORT_CANARY_``). When the import loads, that token -expands into the session transcript. The doctor extracts the token from the -installed copy (never hardcoding it twice) and greps the most recent Claude -project transcripts for it. - -Status codes (printed verbatim, always exit 0 — this is a diagnostic): - - OK — canary found in a recent transcript; import is loading. - IMPORT_DISABLED — import line present in CLAUDE.md but canary absent from - every recent transcript; the user likely declined the - external-import approval. - NOT_INSTALLED — the import line is missing from CLAUDE.md, or the installed - .evolve/EVOLVE.md is missing; run the installer. - STALE_EVOLVE_MD — installed EVOLVE.md has no canary; it predates this build, - re-run the installer. - UNKNOWN — no recent Claude transcripts for this project yet. - -Usage: - python3 doctor.py -""" - -import os -import re -import sys -from pathlib import Path - -# Walk up from the script location to find the installed plugin lib directory. -# Every host installs the shared lib under lib/evolve-lite/ so multiple plugins -# can coexist side by side. The doctor only needs the shared `log` helper, but -# resolving the lib the same way the other scripts do keeps the convention -# uniform (and only works in the rendered tree, same constraint as adapt_memory). -_script = Path(__file__).resolve() -_lib = None -for _ancestor in _script.parents: - _candidate = _ancestor / "lib" / "evolve-lite" - if (_candidate / "entity_io.py").is_file(): - _lib = _candidate - break -if _lib is None: - raise ImportError(f"Cannot find plugin lib directory above {_script}") -sys.path.insert(0, str(_lib)) -from entity_io import log as _log # noqa: E402 - - -def log(message): - _log("doctor", message) - - -# The line the installer injects into the repo's CLAUDE.md (see install.sh -# CLAUDE_IMPORT_LINE). Matching on this substring is the install-sanity check. -CLAUDE_IMPORT_LINE = "@.evolve/EVOLVE.md" - -# Pattern used to lift the canary token out of the installed EVOLVE.md so the -# exact token lives in exactly one place (the template), never duplicated here. -_CANARY_RE = re.compile(r"EVOLVE_IMPORT_CANARY_\S+") - -# How many of the most-recent transcripts to scan for the canary. -_RECENT_N = 3 - - -def _evolve_dir(root): - """Resolve the .evolve root: $EVOLVE_DIR if set, else /.evolve.""" - env_dir = os.environ.get("EVOLVE_DIR") - if env_dir: - return Path(env_dir) - return root / ".evolve" - - -def _transcript_slug(root): - """Claude derives a project's transcript dir name by replacing every - non-alphanumeric character in the absolute project path with ``-``. - - e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen - """ - return re.sub(r"[^A-Za-z0-9]", "-", str(root)) - - -def _recent_transcripts(home, root, limit=_RECENT_N): - """The most recent N ``*.jsonl`` transcripts for this project, by mtime.""" - slug = _transcript_slug(root) - proj_dir = home / ".claude" / "projects" / slug - if not proj_dir.is_dir(): - return [] - jsonl = [p for p in proj_dir.glob("*.jsonl") if p.is_file()] - jsonl.sort(key=lambda p: p.stat().st_mtime, reverse=True) - return jsonl[:limit] - - -def _canary_in_transcripts(transcripts, token): - """True if `token` appears anywhere in any of the given transcript files.""" - for path in transcripts: - try: - text = path.read_text(encoding="utf-8", errors="replace") - except OSError: - continue - if token in text: - return True - return False - - -def diagnose(root, home): - """Core diagnosis. Returns ``(code, message)``; never raises on missing - files/dirs. `root` is the project root; `home` is the user home dir under - which Claude keeps ``~/.claude/projects//``. - """ - root = Path(root) - home = Path(home) - - # --- Install sanity ------------------------------------------------------ - claude_md = root / "CLAUDE.md" - has_import = False - if claude_md.is_file(): - try: - has_import = CLAUDE_IMPORT_LINE in claude_md.read_text(encoding="utf-8", errors="replace") - except OSError: - has_import = False - if not has_import: - return ( - "NOT_INSTALLED", - f"evolve import not wired into this repo's CLAUDE.md (expected a line `{CLAUDE_IMPORT_LINE}`); run the installer.", - ) - - evolve_md = _evolve_dir(root) / "EVOLVE.md" - if not evolve_md.is_file(): - return ( - "NOT_INSTALLED", - f"installed EVOLVE.md is missing at {evolve_md}; run the installer.", - ) - - # --- Extract the canary from the installed file -------------------------- - try: - evolve_text = evolve_md.read_text(encoding="utf-8", errors="replace") - except OSError as exc: - return ( - "NOT_INSTALLED", - f"cannot read installed EVOLVE.md at {evolve_md} - {exc}; run the installer.", - ) - match = _CANARY_RE.search(evolve_text) - if not match: - return ( - "STALE_EVOLVE_MD", - f"installed EVOLVE.md at {evolve_md} has no canary token (it predates this build); re-run the installer to refresh it.", - ) - token = match.group(0) - - # --- Transcript check ---------------------------------------------------- - transcripts = _recent_transcripts(home, root) - if not transcripts: - return ( - "UNKNOWN", - "no recent Claude transcripts for this project yet; open a session, then re-run.", - ) - if _canary_in_transcripts(transcripts, token): - return ("OK", "✓ evolve EVOLVE.md import is loading.") - - return ( - "IMPORT_DISABLED", - "⚠ The @import is present in CLAUDE.md but its content is NOT " - "reaching sessions — you likely declined Claude's external-import " - "approval. Re-enable by running `claude project purge " - f"{root}` then start a new session and Allow the import dialog.", - ) - - -def main(): - root = Path(os.getcwd()).resolve() - home = Path.home() - code, message = diagnose(root, home) - log(f"{code}: {message}") - print(f"evolve doctor [{code}] {message}") - # Diagnostic only — never fail the caller. - sys.exit(0) - - -if __name__ == "__main__": - main() diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md deleted file mode 100644 index 0641e810..00000000 --- a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -name: doctor -description: Diagnose evolve health on Claude — verify the CLAUDE.md @import is actually loading the thin EVOLVE.md into sessions ---- - -# Doctor - -This skill diagnoses Claude's `@import` delivery of evolve's thin EVOLVE.md. It -is specific to Claude (where evolve loads via a per-project import that can be -silently declined) and is a **no-op on this platform** — here EVOLVE.md is -always-on and there is no import-approval gate to check. Nothing to run. - diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py deleted file mode 100644 index 2c2a5382..00000000 --- a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py +++ /dev/null @@ -1,188 +0,0 @@ -#!/usr/bin/env python3 -""" -Doctor Script (Claude-only diagnostic) - -On Claude, evolve is delivered by a single ``@.evolve/EVOLVE.md`` import line in -the repo's ``./CLAUDE.md``. That import requires a one-time, per-project -"allow external imports" approval. If the user declines it (even once, in a past -session) Claude silently disables the import forever — the thin EVOLVE.md never -loads and evolve becomes a no-op with NO error. - -Claude's internal approval flag is undocumented and unreliable to read, so this -script detects delivery *empirically*: the installed thin EVOLVE.md carries a -canary token (``EVOLVE_IMPORT_CANARY_``). When the import loads, that token -expands into the session transcript. The doctor extracts the token from the -installed copy (never hardcoding it twice) and greps the most recent Claude -project transcripts for it. - -Status codes (printed verbatim, always exit 0 — this is a diagnostic): - - OK — canary found in a recent transcript; import is loading. - IMPORT_DISABLED — import line present in CLAUDE.md but canary absent from - every recent transcript; the user likely declined the - external-import approval. - NOT_INSTALLED — the import line is missing from CLAUDE.md, or the installed - .evolve/EVOLVE.md is missing; run the installer. - STALE_EVOLVE_MD — installed EVOLVE.md has no canary; it predates this build, - re-run the installer. - UNKNOWN — no recent Claude transcripts for this project yet. - -Usage: - python3 doctor.py -""" - -import os -import re -import sys -from pathlib import Path - -# Walk up from the script location to find the installed plugin lib directory. -# Every host installs the shared lib under lib/evolve-lite/ so multiple plugins -# can coexist side by side. The doctor only needs the shared `log` helper, but -# resolving the lib the same way the other scripts do keeps the convention -# uniform (and only works in the rendered tree, same constraint as adapt_memory). -_script = Path(__file__).resolve() -_lib = None -for _ancestor in _script.parents: - _candidate = _ancestor / "lib" / "evolve-lite" - if (_candidate / "entity_io.py").is_file(): - _lib = _candidate - break -if _lib is None: - raise ImportError(f"Cannot find plugin lib directory above {_script}") -sys.path.insert(0, str(_lib)) -from entity_io import log as _log # noqa: E402 - - -def log(message): - _log("doctor", message) - - -# The line the installer injects into the repo's CLAUDE.md (see install.sh -# CLAUDE_IMPORT_LINE). Matching on this substring is the install-sanity check. -CLAUDE_IMPORT_LINE = "@.evolve/EVOLVE.md" - -# Pattern used to lift the canary token out of the installed EVOLVE.md so the -# exact token lives in exactly one place (the template), never duplicated here. -_CANARY_RE = re.compile(r"EVOLVE_IMPORT_CANARY_\S+") - -# How many of the most-recent transcripts to scan for the canary. -_RECENT_N = 3 - - -def _evolve_dir(root): - """Resolve the .evolve root: $EVOLVE_DIR if set, else /.evolve.""" - env_dir = os.environ.get("EVOLVE_DIR") - if env_dir: - return Path(env_dir) - return root / ".evolve" - - -def _transcript_slug(root): - """Claude derives a project's transcript dir name by replacing every - non-alphanumeric character in the absolute project path with ``-``. - - e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen - """ - return re.sub(r"[^A-Za-z0-9]", "-", str(root)) - - -def _recent_transcripts(home, root, limit=_RECENT_N): - """The most recent N ``*.jsonl`` transcripts for this project, by mtime.""" - slug = _transcript_slug(root) - proj_dir = home / ".claude" / "projects" / slug - if not proj_dir.is_dir(): - return [] - jsonl = [p for p in proj_dir.glob("*.jsonl") if p.is_file()] - jsonl.sort(key=lambda p: p.stat().st_mtime, reverse=True) - return jsonl[:limit] - - -def _canary_in_transcripts(transcripts, token): - """True if `token` appears anywhere in any of the given transcript files.""" - for path in transcripts: - try: - text = path.read_text(encoding="utf-8", errors="replace") - except OSError: - continue - if token in text: - return True - return False - - -def diagnose(root, home): - """Core diagnosis. Returns ``(code, message)``; never raises on missing - files/dirs. `root` is the project root; `home` is the user home dir under - which Claude keeps ``~/.claude/projects//``. - """ - root = Path(root) - home = Path(home) - - # --- Install sanity ------------------------------------------------------ - claude_md = root / "CLAUDE.md" - has_import = False - if claude_md.is_file(): - try: - has_import = CLAUDE_IMPORT_LINE in claude_md.read_text(encoding="utf-8", errors="replace") - except OSError: - has_import = False - if not has_import: - return ( - "NOT_INSTALLED", - f"evolve import not wired into this repo's CLAUDE.md (expected a line `{CLAUDE_IMPORT_LINE}`); run the installer.", - ) - - evolve_md = _evolve_dir(root) / "EVOLVE.md" - if not evolve_md.is_file(): - return ( - "NOT_INSTALLED", - f"installed EVOLVE.md is missing at {evolve_md}; run the installer.", - ) - - # --- Extract the canary from the installed file -------------------------- - try: - evolve_text = evolve_md.read_text(encoding="utf-8", errors="replace") - except OSError as exc: - return ( - "NOT_INSTALLED", - f"cannot read installed EVOLVE.md at {evolve_md} - {exc}; run the installer.", - ) - match = _CANARY_RE.search(evolve_text) - if not match: - return ( - "STALE_EVOLVE_MD", - f"installed EVOLVE.md at {evolve_md} has no canary token (it predates this build); re-run the installer to refresh it.", - ) - token = match.group(0) - - # --- Transcript check ---------------------------------------------------- - transcripts = _recent_transcripts(home, root) - if not transcripts: - return ( - "UNKNOWN", - "no recent Claude transcripts for this project yet; open a session, then re-run.", - ) - if _canary_in_transcripts(transcripts, token): - return ("OK", "✓ evolve EVOLVE.md import is loading.") - - return ( - "IMPORT_DISABLED", - "⚠ The @import is present in CLAUDE.md but its content is NOT " - "reaching sessions — you likely declined Claude's external-import " - "approval. Re-enable by running `claude project purge " - f"{root}` then start a new session and Allow the import dialog.", - ) - - -def main(): - root = Path(os.getcwd()).resolve() - home = Path.home() - code, message = diagnose(root, home) - log(f"{code}: {message}") - print(f"evolve doctor [{code}] {message}") - # Diagnostic only — never fail the caller. - sys.exit(0) - - -if __name__ == "__main__": - main() diff --git a/plugin-source/build_plugins.py b/plugin-source/build_plugins.py index d8350807..38c2ccf8 100644 --- a/plugin-source/build_plugins.py +++ b/plugin-source/build_plugins.py @@ -321,7 +321,10 @@ def _codex_plugin_json(meta: PluginMetadata) -> _CodexOut: "audit_script": "~/.codex/evolve-lite/audit_recall.py", }, "target_rewrites": [], - "target_excludes": [], + # The `doctor` skill diagnoses Claude's @import canary in + # ~/.claude transcripts; that mechanism doesn't exist on codex + # (codex uses an ~/.codex/AGENTS.md pointer), so exclude it. + "target_excludes": [r"^skills/evolve-lite/doctor/"], "metadata_target": ".codex-plugin/plugin.json", "metadata_emit": _codex_plugin_json, }, @@ -336,7 +339,11 @@ def _codex_plugin_json(meta: PluginMetadata) -> _CodexOut: # under .bob/skills/. Collapse the source skills/evolve-lite// # layout to skills/evolve-lite-/ for bob's render output. "target_rewrites": [(r"^skills/evolve-lite/([^/]+)/", r"skills/evolve-lite-\1/")], - "target_excludes": [], + # Exclude the Claude-only `doctor` skill (matches the source-side + # path, before the rewrite above flattens it to + # skills/evolve-lite-doctor/). Its @import-canary diagnostic is + # meaningless on bob, which has no ~/.claude transcript layout. + "target_excludes": [r"^skills/evolve-lite/doctor/"], # Bob has no plugin system, so no plugin.json is emitted. Bob's # commands/ directory is generated 1:1 from the skills walk by # _bob_command_targets(); no static command files exist in @@ -396,10 +403,19 @@ def _bob_command_bytes(skill_dir: Path) -> bytes: def _bob_command_targets() -> list[tuple[Path, Path, bytes]]: """Triples of (skill_source_for_drift_label, target_rel_to_repo_root, content) - for every bob command — one per skill — derived from the skills walk.""" + for every bob command — one per skill — derived from the skills walk. + + Skills excluded by bob's `target_excludes` get no command file: a skill + that isn't rendered into bob's skills/ must not leave a dangling slash + command pointing at it (e.g. the Claude-only `doctor` skill).""" bob_root_rel = Path(PLATFORMS["bob"]["plugin_root"]) + bob_excludes = [re.compile(pat) for pat in PLATFORMS["bob"].get("target_excludes", [])] out: list[tuple[Path, Path, bytes]] = [] for skill_dir in _discover_skills(): + # Match against the source-side path, mirroring PlatformConfig.excludes. + source_rel = f"skills/evolve-lite/{skill_dir.name}/" + if any(p.search(source_rel) for p in bob_excludes): + continue target_rel = bob_root_rel / "commands" / f"evolve-lite-{skill_dir.name}.md" out.append((skill_dir / "SKILL.md.j2", target_rel, _bob_command_bytes(skill_dir))) return out diff --git a/tests/platform_integrations/test_build_pipeline.py b/tests/platform_integrations/test_build_pipeline.py index 0ae18c29..e05859ff 100644 --- a/tests/platform_integrations/test_build_pipeline.py +++ b/tests/platform_integrations/test_build_pipeline.py @@ -205,9 +205,14 @@ def _bob_commands_dir(self, rendered_repo, build_module) -> Path: return _plugin_root(manifest, "bob") / "commands" def test_one_command_per_skill(self, rendered_repo, build_module): - skill_names = sorted(d.name for d in build_module._discover_skills()) + # Bob commands are 1:1 with the skills bob actually renders, which + # excludes skills filtered by bob's `target_excludes` (the Claude-only + # `doctor` skill). Derive the expected set from _bob_command_targets() + # so this stays in sync with the exclusion logic. + expected = sorted(target_rel.stem.removeprefix("evolve-lite-") for _, target_rel, _ in build_module._bob_command_targets()) commands = sorted(p.stem.removeprefix("evolve-lite-") for p in self._bob_commands_dir(rendered_repo, build_module).glob("*.md")) - assert commands == skill_names, "bob commands are not 1:1 with skills" + assert commands == expected, "bob commands are not 1:1 with bob-rendered skills" + assert "doctor" not in commands, "Claude-only `doctor` skill must not produce a bob command" def test_command_body_references_dash_form(self, rendered_repo, build_module): for cmd_file in self._bob_commands_dir(rendered_repo, build_module).glob("*.md"): @@ -217,9 +222,13 @@ def test_command_body_references_dash_form(self, rendered_repo, build_module): assert f"evolve-lite:{skill}" not in body, f"{cmd_file.name} body should not use the colon form (bob resolves by folder)" def test_command_description_comes_from_skill_frontmatter(self, rendered_repo, build_module): - for skill_dir in build_module._discover_skills(): - description = build_module._read_skill_description(skill_dir) - cmd_file = self._bob_commands_dir(rendered_repo, build_module) / f"evolve-lite-{skill_dir.name}.md" + # Only skills bob actually renders get a command file; iterate the + # command targets (which honor bob's `target_excludes`) rather than + # every discovered skill, so the Claude-only `doctor` skill — which + # bob doesn't render — isn't expected to have a command. + for skill_src, target_rel, _ in build_module._bob_command_targets(): + description = build_module._read_skill_description(skill_src.parent) + cmd_file = self._bob_commands_dir(rendered_repo, build_module) / target_rel.name assert f"description: {description}\n" in cmd_file.read_text() def test_command_frontmatter_has_no_name_field(self, rendered_repo, build_module): From 48e2ae5a1e4ace6952e73e1b9aa00d8d8f539121 Mon Sep 17 00:00:00 2001 From: Punleuk Oum <5661986+illeatmyhat@users.noreply.github.com> Date: Tue, 9 Jun 2026 10:36:18 -0700 Subject: [PATCH 11/12] feat(platform-integrations): make uninstall reverse legacy pre-redesign artifacts uninstall is now migration-aware: it reverses the OLD pre-redesign install artifacts in addition to the new-design ones, so an upgrading user gets a true clean slate. - Codex (GLOBAL ~/.codex/, independent of --dir): strip legacy plugin tables [plugins."evolve-lite@"] from config.toml via line-surgery (new FileOps.remove_toml_tables), then tomllib-validate the result; remove legacy plugin caches plugins/cache//evolve-lite/ and rmdir the emptied marketplace parent. - Claude: add `claude plugin marketplace remove evolve-marketplace` (best-effort, tolerates non-zero exit / missing CLI, mirrors the existing uninstall call); remove orphan ~/.claude/plugins/data/evolve-lite-* data dirs; remove legacy plugin caches plugins/cache//evolve-lite/ (the orphaned OLD hooks/ bundle left behind by `claude plugin uninstall`, which could otherwise resurrect the old bundle on reinstall) and rmdir the emptied marketplace parent. - Bob: remove the legacy `install-evolve-lite` bootstrap mode (a bare YAML list item, not a sentinel block) via new FileOps.remove_yaml_custom_mode_by_slug. All new removals are defensive, idempotent, and dry-run-aware (routed through DryRunFileOps), so `uninstall --dry-run` prints the intended legacy removals and changes nothing on disk. Co-Authored-By: Claude Opus 4.8 (1M context) --- platform-integrations/install.sh | 187 +++++++++++++ .../test_legacy_migration.py | 249 ++++++++++++++++++ 2 files changed, 436 insertions(+) create mode 100644 tests/platform_integrations/test_legacy_migration.py diff --git a/platform-integrations/install.sh b/platform-integrations/install.sh index bc042a8b..4608aef6 100755 --- a/platform-integrations/install.sh +++ b/platform-integrations/install.sh @@ -446,6 +446,52 @@ class FileOps: ) self.atomic_write_text(target_yaml_path, pattern.sub("", text)) + def remove_yaml_custom_mode_by_slug(self, target_yaml_path, slug): + """Remove a plain ``- slug: `` sequence item from a custom_modes file. + + The new-design modes are sentinel-wrapped (see remove_yaml_custom_mode), + but the legacy ``install-evolve-lite`` bootstrap mode was written as a + bare YAML list item with no sentinels. Drop the whole item: the + ``- slug: `` line plus every following line indented deeper than + the dash (the item body), stopping at the next sibling item or any + less-indented line. No-op when the file or the slug is absent.""" + target_yaml_path = str(target_yaml_path) + if not os.path.isfile(target_yaml_path): + return + with open(target_yaml_path) as f: + lines = f.read().splitlines(keepends=True) + + # A list item header for this slug: optional indent, `- `, then + # `slug: ` (quoted or bare), to end of line. + head_re = re.compile( + r"^(\s*)-\s+slug:\s*[\"']?" + re.escape(slug) + r"[\"']?\s*$" + ) + out = [] + i = 0 + removed = False + while i < len(lines): + m = head_re.match(lines[i]) + if not m: + out.append(lines[i]) + i += 1 + continue + removed = True + dash_indent = len(m.group(1)) + i += 1 + # Consume body lines: blank lines, or lines indented past the dash. + while i < len(lines): + ln = lines[i] + if ln.strip() == "": + i += 1 + continue + indent = len(ln) - len(ln.lstrip()) + if indent <= dash_indent: + break + i += 1 + if removed: + self.atomic_write_text(target_yaml_path, "".join(out)) + debug(f"Removed YAML custom mode (slug '{slug}'): {target_yaml_path}") + # ── Sentinel-block helpers (generic always-on instruction files) ─────────── def inject_sentinel_block(self, path, slug, body): @@ -568,6 +614,50 @@ class FileOps: self.atomic_write_text(path, new_text) debug(f"Removed marker line ({marker}): {path}") + # ── TOML helpers (legacy codex config.toml migration) ────────────────────── + + def remove_toml_tables(self, path, header_pred): + """Remove every top-level TOML table whose header matches `header_pred`. + + `header_pred(header_name)` is called with the bare table name from a + `[name]` header line (e.g. `plugins."evolve-lite@evolve-marketplace"`); + when it returns True the header line plus all its body lines (up to the + next top-level `[` table header or EOF) are dropped. There is no toml + writer in the 3.11 stdlib, so this is line-surgery, mirroring the + marker/sentinel helpers. No-op when the file is absent. Returns True if + anything was removed. + """ + path = str(path) + if not os.path.isfile(path): + return False + with open(path) as f: + lines = f.read().splitlines(keepends=True) + + # A plain `[name]` table header; `[[name]]` array-of-tables and nested + # subtables of a removed table also start with `[`, so any line whose + # first non-space char is `[` ends the previous table's body. + header_re = re.compile(r"^\s*\[([^\[\]]+)\]\s*$") + is_table_line = re.compile(r"^\s*\[") + out = [] + skipping = False + removed = False + for ln in lines: + if is_table_line.match(ln): + m = header_re.match(ln) + # A new top-level table header decides whether we keep skipping. + if m and header_pred(m.group(1).strip()): + skipping = True + removed = True + continue + skipping = False + if not skipping: + out.append(ln) + + if removed: + self.atomic_write_text(path, "".join(out)) + debug(f"Removed legacy TOML tables: {path}") + return removed + class DryRunFileOps(FileOps): """No-op variant: logs what would happen instead of writing anything.""" @@ -609,6 +699,9 @@ class DryRunFileOps(FileOps): def merge_yaml_custom_mode(self, source_yaml_path, target_yaml_path, slug): dryrun(f"merge YAML custom mode '{slug}' → {target_yaml_path}") + def remove_yaml_custom_mode_by_slug(self, target_yaml_path, slug): + dryrun(f"remove YAML custom mode (slug '{slug}') → {target_yaml_path}") + def inject_sentinel_block(self, path, slug, body): dryrun(f"inject sentinel block '{slug}' → {path}") @@ -621,6 +714,11 @@ class DryRunFileOps(FileOps): def remove_marker_line(self, path, marker): dryrun(f"remove marker line ({marker}) → {path}") + def remove_toml_tables(self, path, header_pred): + if os.path.isfile(str(path)): + dryrun(f"remove legacy TOML tables → {path}") + return True + # ── Platform detection ──────────────────────────────────────────────────────── @@ -864,8 +962,12 @@ class BobInstaller: # from a pre-redesign lite install is also swept up here. modes_files = {self._modes_file(bob_target), bob_target / "custom_modes.yaml"} for mf in modes_files: + # New-design modes are sentinel-wrapped blocks. self.ops.remove_yaml_custom_mode(mf, BOB_SLUG) self.ops.remove_yaml_custom_mode(mf, "Evolve") + # Legacy migration: the pre-redesign `install-evolve-lite` bootstrap + # mode was a bare YAML list item (no sentinels), so remove it by slug. + self.ops.remove_yaml_custom_mode_by_slug(mf, "install-evolve-lite") for mcpf in {self._mcp_file(bob_target), bob_target / "mcp.json"}: self.ops.remove_json_key(mcpf, ["mcpServers", "evolve"]) @@ -1014,6 +1116,32 @@ class ClaudeInstaller: self.ops.remove_file(claude_evolve_dir / AUDIT_SCRIPT) self.ops.remove_dir_if_empty(claude_evolve_dir) + # Legacy migration: remove orphan plugin data dirs left by older installs + # (e.g. evolve-lite-inline, evolve-lite-evolve-marketplace). GLOBAL, only + # dirs whose name starts with `evolve-lite-` under plugins/data/. + data_dir = Path.home() / ".claude" / "plugins" / "data" + if data_dir.is_dir(): + for entry in sorted(data_dir.iterdir()): + if entry.is_dir() and entry.name.startswith("evolve-lite-"): + self.ops.remove_dir(entry) + + # Legacy migration: remove orphan plugin caches left by older installs at + # plugins/cache//evolve-lite/ (e.g. the OLD hooks/ bundle). + # `claude plugin uninstall` leaves these behind; because the plugin version + # isn't bumped, a stale cache can resurrect the OLD bundle on reinstall. + # Remove cache//evolve-lite/, then rmdir the marketplace parent + # if it is now empty. Only ever delete a dir whose final component is + # `evolve-lite` (or its emptied parent). GLOBAL, defensive, idempotent. + cache_root = Path.home() / ".claude" / "plugins" / "cache" + if cache_root.is_dir(): + for marketplace_dir in sorted(cache_root.iterdir()): + if not marketplace_dir.is_dir(): + continue + evolve_cache = marketplace_dir / "evolve-lite" + if evolve_cache.is_dir(): + self.ops.remove_dir(evolve_cache) + self.ops.remove_dir_if_empty(marketplace_dir) + claude = shutil.which("claude") if not claude: warn("Could not uninstall Claude plugin automatically.") @@ -1027,6 +1155,15 @@ class ClaudeInstaller: warn(f"claude plugin uninstall exited with code {result.returncode}") warn(f"Run manually: claude plugin uninstall {CLAUDE_PLUGIN}") + # Legacy migration: install added the marketplace but uninstall never + # removed it. Tolerate non-zero exit / missing entry (mirrors the + # uninstall call above — best-effort, never fatal). + result = self.ops.run_subprocess([claude, "plugin", "marketplace", "remove", "evolve-marketplace"]) + if result.returncode == 0: + success("Removed claude marketplace 'evolve-marketplace'") + else: + warn(f"claude plugin marketplace remove exited with code {result.returncode} (ignored)") + def status(self, target_dir): print(f" Claude:") claude = shutil.which("claude") @@ -1131,6 +1268,52 @@ class CodexInstaller: plugins.append(copy.deepcopy(item)) self.ops.atomic_write_json(path, data) + # ── Legacy (pre-redesign) global migration ───────────────────────────────── + + def _purge_legacy_global(self): + """Reverse pre-redesign GLOBAL ~/.codex/ artifacts (migration cleanup). + + Old installs registered the plugin globally in ~/.codex/config.toml as + `[plugins."evolve-lite@"]` tables and left plugin caches at + ~/.codex/plugins/cache//evolve-lite/. The new design never + writes these, but an upgrading user still has them on disk — strip them + so uninstall is a true clean slate. GLOBAL regardless of --dir; defensive + and idempotent (no-op when absent).""" + codex_home = Path.home() / ".codex" + + # 1. config.toml: drop every `[plugins."evolve-lite@..."]` table. + config_toml = codex_home / "config.toml" + legacy_plugin_re = re.compile(r'^plugins\.\s*"evolve-lite@[^"]*"\s*$') + self.ops.remove_toml_tables( + config_toml, lambda header: bool(legacy_plugin_re.match(header)) + ) + # Post-condition (skipped in dry-run, which doesn't mutate the file): + # the result must still parse and carry no evolve-lite@* plugin key. + if not self.ops.is_dry_run and config_toml.is_file(): + try: + import tomllib + + with open(config_toml, "rb") as f: + parsed = tomllib.load(f) + stray = [k for k in parsed.get("plugins", {}) if k.startswith("evolve-lite@")] + if stray: + warn(f"Legacy codex plugin keys remain in {config_toml}: {stray}") + except Exception as e: # tomllib missing (<3.11) or unparseable + debug(f"Skipped config.toml validation: {e}") + + # 2. plugin caches: remove cache//evolve-lite/, then rmdir + # the marketplace parent if it is now empty. Only ever delete a dir + # whose final component is `evolve-lite` (or its emptied parent). + cache_root = codex_home / "plugins" / "cache" + if cache_root.is_dir(): + for marketplace_dir in sorted(cache_root.iterdir()): + if not marketplace_dir.is_dir(): + continue + evolve_cache = marketplace_dir / "evolve-lite" + if evolve_cache.is_dir(): + self.ops.remove_dir(evolve_cache) + self.ops.remove_dir_if_empty(marketplace_dir) + # ── Public interface ────────────────────────────────────────────────────── def install(self, target_dir): @@ -1204,6 +1387,10 @@ class CodexInstaller: self.ops.remove_file(evolve_dir / AUDIT_SCRIPT) self.ops.remove_dir_if_empty(evolve_dir) + # Reverse pre-redesign GLOBAL artifacts (config.toml plugin tables + + # plugin caches). GLOBAL migration, independent of --dir. + self._purge_legacy_global() + success("Codex uninstall complete") def status(self, target_dir): diff --git a/tests/platform_integrations/test_legacy_migration.py b/tests/platform_integrations/test_legacy_migration.py new file mode 100644 index 00000000..209bdc6f --- /dev/null +++ b/tests/platform_integrations/test_legacy_migration.py @@ -0,0 +1,249 @@ +""" +Tests for the migration-aware ``uninstall`` path. + +An upgrading user still has PRE-REDESIGN ("legacy") artifacts on disk that the +new design never writes. ``uninstall`` must reverse them too, so the user lands +on a true clean slate: + + * Codex (GLOBAL ~/.codex/): legacy plugin registrations in ``config.toml`` + (``[plugins."evolve-lite@"]`` tables) and plugin caches + (``plugins/cache//evolve-lite/``). + * Claude (GLOBAL ~/.claude/): orphan plugin data dirs + (``plugins/data/evolve-lite-*``) and the ``evolve-marketplace`` registration. + * Bob: the legacy ``install-evolve-lite`` bootstrap custom mode (a bare YAML + list item, not a sentinel block). + +All removals are defensive, idempotent, and dry-run aware. These tests reuse the +``sandbox_home`` conftest seam (monkeypatches HOME → tmp dir, flows through to +the install.sh subprocess) so we never touch the developer's real home. +""" + +import tomllib + +import pytest + + +# ── Codex config.toml fixtures ───────────────────────────────────────────────── + +LEGACY_CONFIG_TOML = """\ +model = "gpt-5" + +[plugins."other@x"] +enabled = true + +[plugins."evolve-lite@evolve-marketplace"] +enabled = true +source = "evolve-marketplace" + +[plugins."evolve-lite@evolve-local"] +enabled = true +source = "evolve-local" + +[history] +persistence = "save-all" +""" + + +def _seed_legacy_codex(sandbox_home): + """Write a legacy ~/.codex/config.toml + plugin caches; return key paths.""" + codex = sandbox_home / ".codex" + config = codex / "config.toml" + config.parent.mkdir(parents=True, exist_ok=True) + config.write_text(LEGACY_CONFIG_TOML) + + cache = codex / "plugins" / "cache" / "evolve-marketplace" + (cache / "evolve-lite").mkdir(parents=True, exist_ok=True) + (cache / "evolve-lite" / "manifest.json").write_text("{}\n") + (cache / "other-plugin").mkdir(parents=True, exist_ok=True) + (cache / "other-plugin" / "manifest.json").write_text("{}\n") + return config, cache + + +@pytest.mark.platform_integrations +class TestCodexLegacyMigration: + def test_uninstall_strips_legacy_config_tables(self, sandbox_home, install_runner): + config, _ = _seed_legacy_codex(sandbox_home) + + install_runner.run("uninstall", platform="codex") + + text = config.read_text() + assert "evolve-lite@evolve-marketplace" not in text + assert "evolve-lite@evolve-local" not in text + # Unrelated tables and top-level keys are preserved. + assert "other@x" in text + assert 'model = "gpt-5"' in text + assert "[history]" in text + # Result is still valid TOML with no evolve-lite@* plugin key. + parsed = tomllib.loads(text) + assert all(not k.startswith("evolve-lite@") for k in parsed.get("plugins", {})) + assert "other@x" in parsed["plugins"] + assert parsed["history"]["persistence"] == "save-all" + + def test_uninstall_removes_legacy_plugin_cache(self, sandbox_home, install_runner): + _, cache = _seed_legacy_codex(sandbox_home) + + install_runner.run("uninstall", platform="codex") + + # evolve-lite subdir gone; its now-empty marketplace parent gone too, + # BUT only because the sibling other-plugin keeps it alive here. + assert not (cache / "evolve-lite").exists() + assert cache.exists(), "marketplace dir with surviving siblings must remain" + assert (cache / "other-plugin").exists(), "sibling plugin cache preserved" + + def test_uninstall_rmdirs_emptied_marketplace_parent(self, sandbox_home, install_runner): + codex = sandbox_home / ".codex" + cache = codex / "plugins" / "cache" / "evolve-local" + (cache / "evolve-lite").mkdir(parents=True, exist_ok=True) + (cache / "evolve-lite" / "x.json").write_text("{}\n") + + install_runner.run("uninstall", platform="codex") + + assert not (cache / "evolve-lite").exists() + assert not cache.exists(), "emptied marketplace parent should be rmdir'd" + + def test_uninstall_no_codex_config_is_noop(self, sandbox_home, install_runner): + """Absent legacy artifacts: uninstall must not error or create anything.""" + result = install_runner.run("uninstall", platform="codex") + assert result.returncode == 0 + assert not (sandbox_home / ".codex" / "config.toml").exists() + + def test_uninstall_codex_legacy_is_idempotent(self, sandbox_home, install_runner): + config, cache = _seed_legacy_codex(sandbox_home) + install_runner.run("uninstall", platform="codex") + first = config.read_text() + # Second run over the already-cleaned state is a clean no-op. + install_runner.run("uninstall", platform="codex") + assert config.read_text() == first + assert not (cache / "evolve-lite").exists() + assert (cache / "other-plugin").exists() + + +# ── Claude orphan data dirs + marketplace removal ────────────────────────────── + + +@pytest.mark.platform_integrations +class TestClaudeLegacyMigration: + def test_uninstall_removes_orphan_data_dirs(self, sandbox_home, install_runner, temp_project_dir): + data = sandbox_home / ".claude" / "plugins" / "data" + for name in ("evolve-lite-inline", "evolve-lite-evolve-marketplace", "other"): + (data / name).mkdir(parents=True, exist_ok=True) + (data / name / "store.json").write_text("{}\n") + + install_runner.run("uninstall", platform="claude") + + assert not (data / "evolve-lite-inline").exists() + assert not (data / "evolve-lite-evolve-marketplace").exists() + assert (data / "other").exists(), "unrelated plugin data dir preserved" + + def test_uninstall_invokes_marketplace_remove(self, sandbox_home, install_runner, tmp_path): + """The `claude plugin marketplace remove evolve-marketplace` shell-out is + + attempted. We don't require a real `claude` binary: drop a stub on PATH + that records its argv, then assert it was called with the remove verb. + """ + bin_dir = tmp_path / "fakebin" + bin_dir.mkdir() + log = tmp_path / "claude_calls.log" + stub = bin_dir / "claude" + stub.write_text(f'#!/usr/bin/env bash\necho "$@" >> "{log}"\nexit 0\n') + stub.chmod(0o755) + + install_runner.run( + "uninstall", + platform="claude", + env={"PATH": f"{bin_dir}:/usr/bin:/bin"}, + ) + + calls = log.read_text() + assert "plugin uninstall evolve-lite" in calls + assert "plugin marketplace remove evolve-marketplace" in calls + + def test_uninstall_removes_legacy_plugin_cache(self, sandbox_home, install_runner, temp_project_dir): + cache = sandbox_home / ".claude" / "plugins" / "cache" / "evolve-marketplace" + (cache / "evolve-lite" / "1.1.0").mkdir(parents=True, exist_ok=True) + (cache / "evolve-lite" / "1.1.0" / "manifest.json").write_text("{}\n") + (cache / "other-plugin").mkdir(parents=True, exist_ok=True) + (cache / "other-plugin" / "manifest.json").write_text("{}\n") + + install_runner.run("uninstall", platform="claude") + + # evolve-lite cache subtree gone; its marketplace parent survives because + # an unrelated sibling plugin cache still lives there. + assert not (cache / "evolve-lite").exists() + assert cache.exists(), "marketplace dir with surviving siblings must remain" + assert (cache / "other-plugin").exists(), "sibling plugin cache preserved" + + +# ── Bob legacy install-evolve-lite mode ──────────────────────────────────────── + +LEGACY_BOB_MODES = """\ +customModes: + - slug: install-evolve-lite + name: Install Evolve Lite + roleDefinition: |- + Bootstrap mode. Mentions the sentinel literal # >>>evolve:evolve-lite<<< + inside its instructions, which must not confuse removal. + customInstructions: |- + Run the installer. + groups: + - read + - edit + - slug: my-mode + name: My Custom Mode + roleDefinition: |- + This is my own mode. + groups: + - read +""" + + +@pytest.mark.platform_integrations +class TestBobLegacyMigration: + def test_uninstall_removes_legacy_bootstrap_mode(self, temp_project_dir, install_runner): + modes = temp_project_dir / ".bob" / "custom_modes.yaml" + modes.parent.mkdir(parents=True, exist_ok=True) + modes.write_text(LEGACY_BOB_MODES) + + install_runner.run("uninstall", platform="bob") + + text = modes.read_text() + assert "install-evolve-lite" not in text + assert "Bootstrap mode" not in text + # The unrelated user mode survives intact. + assert "slug: my-mode" in text + assert "This is my own mode." in text + + +# ── Dry-run must change nothing on disk ───────────────────────────────────────── + + +@pytest.mark.platform_integrations +class TestLegacyDryRun: + def test_dry_run_removes_nothing(self, sandbox_home, install_runner, temp_project_dir): + config, cache = _seed_legacy_codex(sandbox_home) + config_before = config.read_text() + + data = sandbox_home / ".claude" / "plugins" / "data" + (data / "evolve-lite-inline").mkdir(parents=True, exist_ok=True) + (data / "evolve-lite-inline" / "store.json").write_text("{}\n") + + claude_cache = sandbox_home / ".claude" / "plugins" / "cache" / "evolve-marketplace" + (claude_cache / "evolve-lite" / "1.1.0").mkdir(parents=True, exist_ok=True) + (claude_cache / "evolve-lite" / "1.1.0" / "manifest.json").write_text("{}\n") + + modes = temp_project_dir / ".bob" / "custom_modes.yaml" + modes.parent.mkdir(parents=True, exist_ok=True) + modes.write_text(LEGACY_BOB_MODES) + modes_before = modes.read_text() + + result = install_runner.run("uninstall", platform="all", dry_run=True) + + assert result.returncode == 0 + assert "DRY RUN" in result.stdout + # Nothing on disk changed. + assert config.read_text() == config_before + assert (cache / "evolve-lite").exists() + assert (cache / "other-plugin").exists() + assert (data / "evolve-lite-inline").exists() + assert (claude_cache / "evolve-lite").exists() + assert modes.read_text() == modes_before From 0e82b5f64cb544767b85a45d44e45d5919b3935a Mon Sep 17 00:00:00 2001 From: Punleuk Oum <5661986+illeatmyhat@users.noreply.github.com> Date: Tue, 9 Jun 2026 12:19:50 -0700 Subject: [PATCH 12/12] feat(platform-integrations): ship adapt_memory to stable path + auto-allowlist evolve scripts (no permission prompts) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Claude plugins cannot self-declare tool permissions, env vars are not expanded in permission rules, and plugin install dirs are version-unstable, so the adapt-memory skill's Python invocation and its .evolve/ writes triggered a per-use permission prompt on every run. Fix (Claude-scoped; adapt-memory is functionally Claude-only — it is a no-op stub on bob/codex/claw): 1. Ship adapt_memory.py to the version-stable global path ~/.claude/evolve-lite/adapt_memory.py (mirroring the existing audit_recall.py delivery). Unlike audit_recall.py (self-contained), adapt_memory.py imports entity_io from the shared lib and resolves it by walking up its ancestors for lib/evolve-lite/entity_io.py, so the shared lib is shipped alongside at ~/.claude/evolve-lite/lib/evolve-lite/. The rendered adapt-memory SKILL.md now invokes that stable path instead of ${CLAUDE_PLUGIN_ROOT}/... (new adapt_memory_script render context + invoke() path_override). 2. The installer merges five allow-rules (the two stable script paths plus Read/Edit/Write on .evolve/**) into /.claude/settings.json on install and removes exactly those rules on uninstall, preserving any user-added rules/keys and cleaning up empties (allow key, then permissions, then file, then .claude dir). New FileOps helpers merge_json_permission_rules / remove_json_permission_rules with DryRunFileOps overrides so --dry-run writes nothing. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../skills/evolve-lite/adapt-memory/SKILL.md | 2 +- platform-integrations/install.sh | 116 +++++++++- plugin-source/_macros.j2 | 21 +- plugin-source/build_plugins.py | 4 + .../evolve-lite/adapt-memory/SKILL.md.j2 | 2 +- tests/platform_integrations/conftest.py | 31 +++ tests/platform_integrations/test_claude.py | 217 ++++++++++++++++++ 7 files changed, 386 insertions(+), 7 deletions(-) diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md index 6bd4ee26..34d2fab6 100644 --- a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md @@ -37,7 +37,7 @@ For each native memory file you saved this turn: synthesized trigger: ```bash -python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py \ +python3 ~/.claude/evolve-lite/adapt_memory.py \ \ --type \ --trigger "" diff --git a/platform-integrations/install.sh b/platform-integrations/install.sh index 4608aef6..a0bafc7e 100755 --- a/platform-integrations/install.sh +++ b/platform-integrations/install.sh @@ -123,6 +123,7 @@ DRY_RUN = False BOB_SLUG = "evolve-lite" BOB_RULES_FILE = "00-evolve-lite.md" AUDIT_SCRIPT = "audit_recall.py" +ADAPT_SCRIPT = "adapt_memory.py" CLAUDE_PLUGIN = "evolve-lite" CLAW_CODE_PLUGIN = "evolve-lite" CODEX_PLUGIN = "evolve-lite" @@ -156,6 +157,24 @@ CLAUDE_EVOLVE_MD_REL = ".evolve/EVOLVE.md" CLAUDE_IMPORT_MARKER = CLAUDE_EVOLVE_MD_REL CLAUDE_IMPORT_LINE = "@" + CLAUDE_EVOLVE_MD_REL +# Claude plugins cannot self-declare tool permissions, env vars aren't expanded +# in permission rules, and plugin install dirs are version-unstable — so the +# only way to pre-authorize evolve's scripts/.evolve writes without a per-use +# prompt is to merge these allow-rules into the repo's project settings at +# /.claude/settings.json. The script paths use the GLOBAL stable paths the +# installer ships to (`~/.claude/evolve-lite/*.py`), which are allowlistable +# because they never move between plugin versions. The `~/` prefix and the +# trailing `:*` (match-any-args) suffix are both valid per the Claude Code +# settings docs. +CLAUDE_SETTINGS_REL = ".claude/settings.json" +CLAUDE_ALLOW_RULES = [ + "Bash(python3 ~/.claude/evolve-lite/" + ADAPT_SCRIPT + ":*)", + "Bash(python3 ~/.claude/evolve-lite/" + AUDIT_SCRIPT + ":*)", + "Read(.evolve/**)", + "Edit(.evolve/**)", + "Write(.evolve/**)", +] + # ── Colour helpers ──────────────────────────────────────────────────────────── IS_TTY = sys.stdout.isatty() @@ -364,6 +383,47 @@ class FileOps: data[array_key] = [item for item in data.get(array_key, []) if item.get(id_key) != id_val] self.atomic_write_json(path, data) + def merge_json_permission_rules(self, path, rules): + """Idempotently merge `rules` into a Claude settings file's + ``permissions.allow`` array, preserving every rule already present and + any other settings keys. Creates the file/parents if missing. No + duplicates on re-run (set-membership against the existing list).""" + data = read_json(path) + permissions = data.get("permissions") + if not isinstance(permissions, dict): + permissions = {} + data["permissions"] = permissions + allow = permissions.get("allow") + if not isinstance(allow, list): + allow = [] + permissions["allow"] = allow + for rule in rules: + if rule not in allow: + allow.append(rule) + self.atomic_write_json(path, data) + + def remove_json_permission_rules(self, path, rules): + """Remove exactly `rules` from ``permissions.allow`` in a Claude settings + file, leaving any user-added rules intact. Empties clean up: when + ``allow`` becomes empty drop the key; when ``permissions`` becomes empty + drop it too; when the whole file reduces to ``{}`` remove the file. No-op + when the file is absent.""" + if not os.path.isfile(str(path)): + return + data = read_json(path) + permissions = data.get("permissions") + if isinstance(permissions, dict) and isinstance(permissions.get("allow"), list): + drop = set(rules) + permissions["allow"] = [r for r in permissions["allow"] if r not in drop] + if not permissions["allow"]: + permissions.pop("allow", None) + if not permissions: + data.pop("permissions", None) + if not data: + self.remove_file(path) + else: + self.atomic_write_json(path, data) + # ── YAML helpers ────────────────────────────────────────────────────────── def merge_yaml_custom_mode(self, source_yaml_path, target_yaml_path, slug): @@ -696,6 +756,16 @@ class DryRunFileOps(FileOps): dryrun(f"run: {' '.join(cmd_list)}") return types.SimpleNamespace(returncode=0, stdout="", stderr="") + def merge_json_permission_rules(self, path, rules): + dryrun(f"merge {len(rules)} permission allow-rule(s) → {path}") + for rule in rules: + debug(f" + {rule}") + + def remove_json_permission_rules(self, path, rules): + dryrun(f"remove {len(rules)} permission allow-rule(s) → {path}") + for rule in rules: + debug(f" - {rule}") + def merge_yaml_custom_mode(self, source_yaml_path, target_yaml_path, slug): dryrun(f"merge YAML custom mode '{slug}' → {target_yaml_path}") @@ -1060,13 +1130,46 @@ class ClaudeInstaller: self.ops.atomic_write_text(audit_file, audit_text) success(f"Installed recall-audit script → {audit_file}") + # adapt-memory adapter script: the adapt-memory skill invokes + # `python3 ~/.claude/evolve-lite/adapt_memory.py` (a STABLE, version-proof + # path so it can be permission-allowlisted — the versioned plugin dir + # cannot). Ship it to that GLOBAL path, mirroring the audit script above. + # Unlike audit_recall.py (self-contained), adapt_memory.py imports + # `entity_io` from the shared lib: it walks up its own ancestors looking + # for `lib/evolve-lite/entity_io.py`, so ship the shared lib alongside it + # at ~/.claude/evolve-lite/lib/evolve-lite/ (matching bob/codex, which + # also ship a sibling lib/ for their scripts). + claude_evolve_dir = Path.home() / ".claude" / "evolve-lite" + adapt_src = plugin_source / "skills" / "evolve-lite" / "adapt-memory" / "scripts" / ADAPT_SCRIPT + if not adapt_src.is_file(): + adapt_src = Path(source_dir) / "plugin-source" / "skills" / "evolve-lite" / "adapt-memory" / "scripts" / ADAPT_SCRIPT + adapt_text = "" if self.ops.is_dry_run and not adapt_src.is_file() else adapt_src.read_text() + adapt_file = claude_evolve_dir / ADAPT_SCRIPT + self.ops.atomic_write_text(adapt_file, adapt_text) + success(f"Installed adapt-memory script → {adapt_file}") + + lib_src = plugin_source / "lib" / "evolve-lite" + if not (lib_src / "entity_io.py").is_file(): + lib_src = Path(source_dir) / "plugin-source" / "lib" + lib_dst = claude_evolve_dir / "lib" / "evolve-lite" + self.ops.copy_tree(lib_src, lib_dst) + success(f"Installed shared lib → {lib_dst}") + def install(self, target_dir): info("Installing Claude plugin via marketplace") - # Deliver the per-repo EVOLVE.md + import pointer + global audit script - # regardless of whether the `claude` CLI is present below. + # Deliver the per-repo EVOLVE.md + import pointer + global audit/adapt + # scripts regardless of whether the `claude` CLI is present below. self._deliver_files(target_dir) + # Pre-authorize evolve's scripts + .evolve writes so they never trigger a + # per-use permission prompt. Plugins can't self-declare permissions, so + # merge the allow-rules into the repo's project settings (idempotent, + # preserves existing rules/keys). See CLAUDE_ALLOW_RULES for the rationale. + settings_path = Path(target_dir) / CLAUDE_SETTINGS_REL + self.ops.merge_json_permission_rules(settings_path, CLAUDE_ALLOW_RULES) + success(f"Allowlisted evolve scripts + .evolve writes in {settings_path} (no per-use prompts)") + marketplace_dir = Path(SOURCE_DIR).resolve() if SOURCE_DIR else None has_local_marketplace = marketplace_dir is not None and (marketplace_dir / ".claude-plugin" / "marketplace.json").is_file() marketplace_source = str(marketplace_dir) if has_local_marketplace else EVOLVE_REPO @@ -1109,11 +1212,18 @@ class ClaudeInstaller: # Drop the single managed `@`-import pointer line from /CLAUDE.md, # remove the per-repo EVOLVE.md copy we placed (NOT the whole .evolve/ - # store), and remove the global recall-audit script (mirrors Codex). + # store), remove the project-settings allow-rules we merged in, and + # remove the global recall-audit + adapt-memory scripts and the shared + # lib we shipped alongside them (mirrors Codex). self.ops.remove_marker_line(Path(target_dir) / "CLAUDE.md", CLAUDE_IMPORT_MARKER) self.ops.remove_file(Path(target_dir) / CLAUDE_EVOLVE_MD_REL) + settings_path = Path(target_dir) / CLAUDE_SETTINGS_REL + self.ops.remove_json_permission_rules(settings_path, CLAUDE_ALLOW_RULES) + self.ops.remove_dir_if_empty(Path(target_dir) / ".claude") claude_evolve_dir = Path.home() / ".claude" / "evolve-lite" self.ops.remove_file(claude_evolve_dir / AUDIT_SCRIPT) + self.ops.remove_file(claude_evolve_dir / ADAPT_SCRIPT) + self.ops.remove_dir(claude_evolve_dir / "lib") self.ops.remove_dir_if_empty(claude_evolve_dir) # Legacy migration: remove orphan plugin data dirs left by older installs diff --git a/plugin-source/_macros.j2 b/plugin-source/_macros.j2 index a3bc0ab2..0ba282c4 100644 --- a/plugin-source/_macros.j2 +++ b/plugin-source/_macros.j2 @@ -10,6 +10,13 @@ other platforms stay single-line because the whole command is either wrapped in `sh -lc '...'` (claw-code) or invoked through a single python3 call (codex, bob). + path_override — when set, the script is invoked from this exact path on + EVERY platform (e.g. "~/.claude/evolve-lite/adapt_memory.py"), and + the per-platform plugin-relative path resolution is bypassed. Used + for scripts the installer ships to a stable, version-proof global + path so they can be permission-allowlisted (mirrors how EVOLVE.md + invokes `python3 {{ audit_script }}`). The arg rendering still + follows the per-platform rules above. Path resolution per platform: claude — ${CLAUDE_PLUGIN_ROOT} expanded by the Claude plugin runtime. @@ -17,8 +24,18 @@ codex — git-rev-parse from any cwd inside the project clone. bob — project-rooted .bob/skills/evolve-lite-/ (post-rename). #} -{%- macro invoke(skill, script, args=None) -%} -{%- if platform == "claude" -%} +{%- macro invoke(skill, script, args=None, path_override=None) -%} +{%- if path_override is not none -%} +{#- Stable global path (installer-shipped, version-proof, allowlistable). Same + head on every platform; arg formatting follows the per-platform rules. -#} +python3 {{ path_override }} +{%- if args is none %}{# no args; nothing appended #} +{%- elif args is string %} {{ args }} +{%- elif platform == "claude" %} \ + {{ args | join(" \\\n ") }} +{%- else %} {{ args | join(" ") }} +{%- endif -%} +{%- elif platform == "claude" -%} python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/{{ skill }}/scripts/{{ script }} {%- if args is none %}{# no args; nothing appended #} {%- elif args is string %} {{ args }} diff --git a/plugin-source/build_plugins.py b/plugin-source/build_plugins.py index 38c2ccf8..07a26ed1 100644 --- a/plugin-source/build_plugins.py +++ b/plugin-source/build_plugins.py @@ -294,6 +294,7 @@ def _codex_plugin_json(meta: PluginMetadata) -> _CodexOut: "user_skills_dir": "~/.claude/skills", "save_example_script_root": "${CLAUDE_PLUGIN_ROOT}/skills", "audit_script": "~/.claude/evolve-lite/audit_recall.py", + "adapt_memory_script": "~/.claude/evolve-lite/adapt_memory.py", }, "target_rewrites": [], "target_excludes": [], @@ -306,6 +307,7 @@ def _codex_plugin_json(meta: PluginMetadata) -> _CodexOut: "user_skills_dir": "~/.claw/skills", "save_example_script_root": "~/.claw/skills", "audit_script": "~/.claw/evolve-lite/audit_recall.py", + "adapt_memory_script": "~/.claw/evolve-lite/adapt_memory.py", }, "target_rewrites": [], "target_excludes": [], @@ -319,6 +321,7 @@ def _codex_plugin_json(meta: PluginMetadata) -> _CodexOut: "user_skills_dir": "plugins/evolve-lite/skills", "save_example_script_root": "plugins/evolve-lite/skills", "audit_script": "~/.codex/evolve-lite/audit_recall.py", + "adapt_memory_script": "~/.codex/evolve-lite/adapt_memory.py", }, "target_rewrites": [], # The `doctor` skill diagnoses Claude's @import canary in @@ -334,6 +337,7 @@ def _codex_plugin_json(meta: PluginMetadata) -> _CodexOut: "user_skills_dir": ".bob/skills", "save_example_script_root": ".bob/skills", "audit_script": "~/.bob/evolve-lite/audit_recall.py", + "adapt_memory_script": "~/.bob/evolve-lite/adapt_memory.py", }, # Bob has no plugin-namespace concept; skill folders are flat # under .bob/skills/. Collapse the source skills/evolve-lite// diff --git a/plugin-source/skills/evolve-lite/adapt-memory/SKILL.md.j2 b/plugin-source/skills/evolve-lite/adapt-memory/SKILL.md.j2 index 39456f9d..02db92bc 100644 --- a/plugin-source/skills/evolve-lite/adapt-memory/SKILL.md.j2 +++ b/plugin-source/skills/evolve-lite/adapt-memory/SKILL.md.j2 @@ -40,7 +40,7 @@ For each native memory file you saved this turn: synthesized trigger: ```bash -{{ invoke("adapt-memory", "adapt_memory.py", ["", "--type ", "--trigger \"\""]) }} +{{ invoke("adapt-memory", "adapt_memory.py", ["", "--type ", "--trigger \"\""], path_override=adapt_memory_script) }} ``` The script parses the native frontmatter and body, builds the entity diff --git a/tests/platform_integrations/conftest.py b/tests/platform_integrations/conftest.py index ac95dcac..546061a1 100644 --- a/tests/platform_integrations/conftest.py +++ b/tests/platform_integrations/conftest.py @@ -118,6 +118,37 @@ def claude_audit_script(sandbox_home): return sandbox_home / ".claude" / "evolve-lite" / "audit_recall.py" +@pytest.fixture +def claude_adapt_script(sandbox_home): + """Path to the sandboxed Claude GLOBAL adapt-memory adapter script. + + The adapt-memory skill invokes ``python3 ~/.claude/evolve-lite/adapt_memory.py`` + (a stable, version-proof path that can be permission-allowlisted), so the + installer ships the script to that global absolute path alongside the audit + script.""" + return sandbox_home / ".claude" / "evolve-lite" / "adapt_memory.py" + + +@pytest.fixture +def claude_adapt_lib(sandbox_home): + """Path to the sandboxed shared lib shipped beside the global adapt script. + + adapt_memory.py imports ``entity_io`` from the shared lib, resolving it by + walking up its own ancestors for ``lib/evolve-lite/entity_io.py``; the + installer ships the lib here so that walk succeeds from the global path.""" + return sandbox_home / ".claude" / "evolve-lite" / "lib" / "evolve-lite" / "entity_io.py" + + +@pytest.fixture +def claude_settings_file(temp_project_dir): + """Path to the PER-REPO project settings the Claude installer allowlists in. + + Claude plugins cannot self-declare permissions, so the installer pre-authorizes + the evolve scripts and ``.evolve/`` writes by merging allow-rules into the + repo's ``/.claude/settings.json`` (idempotent; removed on uninstall).""" + return temp_project_dir / ".claude" / "settings.json" + + @pytest.fixture def temp_project_dir(tmp_path): """ diff --git a/tests/platform_integrations/test_claude.py b/tests/platform_integrations/test_claude.py index c125924e..5a3f6166 100644 --- a/tests/platform_integrations/test_claude.py +++ b/tests/platform_integrations/test_claude.py @@ -17,6 +17,12 @@ file delivery still runs in that case. """ +import json +import os +import subprocess +import sys +from pathlib import Path + import pytest @@ -30,6 +36,20 @@ # A distinctive string from the recall-audit script. AUDIT_SCRIPT_SENTENCE = "Append a recall-audit row" +# The exact set of allow-rules the installer merges into /.claude/settings.json. +EXPECTED_ALLOW_RULES = [ + "Bash(python3 ~/.claude/evolve-lite/adapt_memory.py:*)", + "Bash(python3 ~/.claude/evolve-lite/audit_recall.py:*)", + "Read(.evolve/**)", + "Edit(.evolve/**)", + "Write(.evolve/**)", +] + +_REPO_ROOT = Path(__file__).parent.parent.parent +# The rendered Claude adapt-memory skill — its invocation must point at the +# stable global path, not the version-unstable ${CLAUDE_PLUGIN_ROOT} dir. +_RENDERED_ADAPT_SKILL = _REPO_ROOT / "platform-integrations/claude/plugins/evolve-lite" / "skills/evolve-lite/adapt-memory/SKILL.md" + def _import_lines(text): """Return the lines in `text` that carry the managed @-import marker.""" @@ -140,3 +160,200 @@ def test_uninstall_removes_pointer_and_evolve_md_and_audit( # The placed per-repo EVOLVE.md and the global audit script are gone. file_assertions.assert_file_not_exists(claude_evolve_md) file_assertions.assert_file_not_exists(claude_audit_script) + + +@pytest.mark.platform_integrations +class TestClaudeRenderedAdaptSkill: + """The rendered adapt-memory skill must invoke the stable global path.""" + + def test_rendered_skill_uses_stable_path_not_plugin_root(self): + text = _RENDERED_ADAPT_SKILL.read_text() + # The version-unstable plugin-root form must be gone entirely. + assert "${CLAUDE_PLUGIN_ROOT}" not in text + # The stable, allowlistable global path must be the invocation target. + assert "python3 ~/.claude/evolve-lite/adapt_memory.py" in text + + +@pytest.mark.platform_integrations +@pytest.mark.e2e +class TestClaudeAdaptScriptDelivery: + """The adapt-memory adapter + its lib land at the stable global path.""" + + def test_install_ships_adapt_script_and_lib( + self, + install_runner, + file_assertions, + claude_adapt_script, + claude_adapt_lib, + ): + """adapt_memory.py and the shared lib (entity_io.py) land at the global path.""" + install_runner.run("install", platform="claude") + + file_assertions.assert_file_exists(claude_adapt_script) + # The shipped script invokes itself from the stable path (no plugin root). + assert "entity_io" in claude_adapt_script.read_text() + # The shared lib must ship alongside so adapt_memory's import-walk resolves. + file_assertions.assert_file_exists(claude_adapt_lib) + + def test_installed_adapt_script_is_runnable_from_stable_path( + self, + install_runner, + temp_project_dir, + sandbox_home, + claude_adapt_script, + ): + """Run the GLOBALLY-installed adapt_memory.py: its `entity_io` import must + resolve from ~/.claude/evolve-lite/lib/evolve-lite/ and it must write the + mirrored entity into the project's .evolve store.""" + install_runner.run("install", platform="claude") + + native = temp_project_dir / "native_memory.md" + native.write_text( + "---\nname: prefer-ripgrep\ndescription: use ripgrep over grep\n" + "metadata:\n type: feedback\n---\nAlways reach for ripgrep (rg).\n" + ) + evolve_dir = temp_project_dir / ".evolve" + + env = { + **os.environ, + "HOME": str(sandbox_home), + "USERPROFILE": str(sandbox_home), + "EVOLVE_DIR": str(evolve_dir), + } + env.pop("HOMEDRIVE", None) + env.pop("HOMEPATH", None) + result = subprocess.run( + [sys.executable, str(claude_adapt_script), str(native), "--type", "feedback", "--trigger", "when searching files"], + capture_output=True, + text=True, + cwd=str(temp_project_dir), + env=env, + check=False, + ) + + assert result.returncode == 0, f"adapt_memory.py failed: {result.stderr}" + entity = evolve_dir / "entities" / "feedback" / "prefer-ripgrep.md" + assert entity.is_file(), f"entity not written; stdout={result.stdout} stderr={result.stderr}" + + def test_uninstall_removes_adapt_script_and_lib( + self, + install_runner, + file_assertions, + claude_adapt_script, + claude_adapt_lib, + ): + """Uninstall removes the global adapter script and the shipped lib.""" + install_runner.run("install", platform="claude") + file_assertions.assert_file_exists(claude_adapt_script) + file_assertions.assert_file_exists(claude_adapt_lib) + + install_runner.run("uninstall", platform="claude") + + file_assertions.assert_file_not_exists(claude_adapt_script) + file_assertions.assert_file_not_exists(claude_adapt_lib) + # The whole global evolve-lite dir (scripts + lib) is gone when emptied. + file_assertions.assert_dir_not_exists(claude_adapt_script.parent) + + def test_dry_run_writes_no_adapt_artifacts( + self, + install_runner, + claude_adapt_script, + claude_adapt_lib, + ): + result = install_runner.run("install", platform="claude", dry_run=True) + assert "DRY RUN" in result.stdout + assert not claude_adapt_script.exists() + assert not claude_adapt_lib.exists() + + +def _allow(settings_path): + """The permissions.allow list from a settings.json (empty list if absent).""" + if not settings_path.is_file(): + return [] + return json.loads(settings_path.read_text()).get("permissions", {}).get("allow", []) + + +@pytest.mark.platform_integrations +@pytest.mark.e2e +class TestClaudePermissionAllowlist: + """Install pre-authorizes the evolve scripts + .evolve writes in project settings.""" + + def test_install_merges_all_allow_rules(self, install_runner, claude_settings_file): + install_runner.run("install", platform="claude") + allow = _allow(claude_settings_file) + for rule in EXPECTED_ALLOW_RULES: + assert rule in allow, f"missing allow-rule {rule!r}; got {allow!r}" + + def test_reinstall_does_not_duplicate_rules(self, install_runner, claude_settings_file): + install_runner.run("install", platform="claude") + install_runner.run("install", platform="claude") + allow = _allow(claude_settings_file) + for rule in EXPECTED_ALLOW_RULES: + assert allow.count(rule) == 1, f"rule {rule!r} duplicated: {allow!r}" + + def test_install_preserves_existing_rules_and_keys(self, install_runner, claude_settings_file): + """A pre-existing unrelated allow-rule and other settings keys survive.""" + claude_settings_file.parent.mkdir(parents=True, exist_ok=True) + claude_settings_file.write_text( + json.dumps( + { + "model": "opus", + "permissions": { + "allow": ["Bash(ls:*)"], + "deny": ["Bash(rm:*)"], + }, + }, + indent=2, + ) + + "\n" + ) + + install_runner.run("install", platform="claude") + + data = json.loads(claude_settings_file.read_text()) + # Unrelated top-level key preserved. + assert data["model"] == "opus" + # Unrelated permissions sibling preserved. + assert data["permissions"]["deny"] == ["Bash(rm:*)"] + allow = data["permissions"]["allow"] + # Pre-existing rule preserved and our rules merged in (no duplicates). + assert "Bash(ls:*)" in allow + for rule in EXPECTED_ALLOW_RULES: + assert allow.count(rule) == 1 + + def test_uninstall_removes_only_evolve_rules(self, install_runner, claude_settings_file): + """Uninstall drops exactly the 5 evolve rules, leaving user rules + keys.""" + claude_settings_file.parent.mkdir(parents=True, exist_ok=True) + claude_settings_file.write_text( + json.dumps( + {"model": "opus", "permissions": {"allow": ["Bash(ls:*)"], "deny": ["Bash(rm:*)"]}}, + indent=2, + ) + + "\n" + ) + install_runner.run("install", platform="claude") + install_runner.run("uninstall", platform="claude") + + data = json.loads(claude_settings_file.read_text()) + assert data["model"] == "opus" + assert data["permissions"]["deny"] == ["Bash(rm:*)"] + assert data["permissions"]["allow"] == ["Bash(ls:*)"] + for rule in EXPECTED_ALLOW_RULES: + assert rule not in data["permissions"]["allow"] + + def test_uninstall_cleans_up_empties(self, install_runner, claude_settings_file, file_assertions): + """When only evolve rules existed, uninstall removes the empty allow key, + the settings file, and the .claude dir (if otherwise empty).""" + install_runner.run("install", platform="claude") + file_assertions.assert_file_exists(claude_settings_file) + + install_runner.run("uninstall", platform="claude") + + # Settings file removed (it reduced to {}), and .claude/ dir removed. + file_assertions.assert_file_not_exists(claude_settings_file) + file_assertions.assert_dir_not_exists(claude_settings_file.parent) + + def test_dry_run_writes_no_settings(self, install_runner, claude_settings_file): + result = install_runner.run("install", platform="claude", dry_run=True) + assert "DRY RUN" in result.stdout + assert not claude_settings_file.exists()