diff --git a/platform-integrations/bob/evolve-lite/EVOLVE.md b/platform-integrations/bob/evolve-lite/EVOLVE.md new file mode 100644 index 00000000..a85f2ed7 --- /dev/null +++ b/platform-integrations/bob/evolve-lite/EVOLVE.md @@ -0,0 +1,82 @@ +# Evolve — self-directed memory + +You have a persistent, file-based memory for the current project, stored under +`./.evolve/memory/` (relative to the workspace/project root). You decide, on +your own judgment, when something is worth remembering — nothing forces a save, +and there is no step to "complete." Curate this memory like notes you'll thank +yourself for later: small, accurate, high-signal. + +## Recall — at the start of a non-trivial task + +Before substantive work (code changes, debugging, repo exploration, or +environment/tooling investigation), read your memory index at +`./.evolve/memory/MEMORY.md` if it exists. It holds one line per memory with a +short description. Open the individual memory files whose description looks +relevant to the task at hand, and let them inform what you do. If the index is +missing or nothing looks relevant, just proceed — that's normal. + +Memories reflect what was true when written. If a memory names a file, +function, command, or flag, verify it still exists before relying on it. + +## Record what you consulted + +After recall, log which entries you actually opened, so the value of this memory +can be measured over time. Run: + +```bash +python3 ~/.bob/evolve-lite/audit_recall.py [ ...] +``` + +Pass the memory files you read this turn (space-separated paths, relative to the +project root). Skip this step entirely if you consulted no memories. If the +command prints a line beginning `evolve-session:`, include that line once, +verbatim, somewhere in your reply — it lets later analysis tie this session to +what you recalled. + +## Save — only when you learn something durable + +Near the end of a task, if it produced a reusable fact that isn't already +obvious from the code or git history — and only then — write it to memory. +Saving nothing is the right outcome more often than not; never force a +low-value memory just to have saved one. + +Each memory is one file holding one fact, under `./.evolve/memory/` (create the +directory if it doesn't exist), with frontmatter: + +```markdown +--- +name: +description: +metadata: + type: user | feedback | project | reference +--- + + +``` + +Types: +- **user** — who the user is: role, expertise, durable preferences. +- **feedback** — guidance on how you should work, both corrections and + confirmed approaches; always include the why. +- **project** — ongoing work, goals, or constraints not derivable from the code + or git history; convert relative dates ("next week") to absolute ones. +- **reference** — pointers to external resources (URLs, dashboards, tickets). + +In the body, link related memories with `[[name]]`, where `name` is another +memory's `name:` slug. Link liberally; a `[[name]]` with no file yet marks +something worth writing later, not an error. + +After writing the file, add a one-line pointer to `./.evolve/memory/MEMORY.md`: +`- [Title](file.md) — short hook`. MEMORY.md is the index you read during +recall — one line per memory, no frontmatter, never put memory content there. + +## When NOT to save, and housekeeping + +- Don't duplicate what the repo already records: code structure, git history, + READMEs, existing docs. If asked to remember one of those, ask what was + non-obvious about it and save that instead. +- Don't save what only matters to the current conversation. +- Before saving, check for an existing memory that already covers it — update + that file rather than creating a duplicate. +- Delete memories that turn out to be wrong. diff --git a/platform-integrations/bob/evolve-lite/commands/evolve-lite-adapt-memory.md b/platform-integrations/bob/evolve-lite/commands/evolve-lite-adapt-memory.md new file mode 100644 index 00000000..9d04f3cc --- /dev/null +++ b/platform-integrations/bob/evolve-lite/commands/evolve-lite-adapt-memory.md @@ -0,0 +1,4 @@ +--- +description: Mirror a just-saved native memory into the shared evolve store so it becomes shareable and auditable +--- +Use the `evolve-lite-adapt-memory` skill on the current conversation. Follow the skill's instructions exactly. diff --git a/platform-integrations/bob/evolve-lite/lib/evolve-lite/audit_recall.py b/platform-integrations/bob/evolve-lite/lib/evolve-lite/audit_recall.py new file mode 100644 index 00000000..42dc1104 --- /dev/null +++ b/platform-integrations/bob/evolve-lite/lib/evolve-lite/audit_recall.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +"""Append a recall-audit row to .evolve/audit.log. + +Self-contained (no third-party or evolve-lite lib imports) so it can be dropped +at a single path and run by a model-invoked shell command on any platform. + +Usage: + python3 audit_recall.py [ ...] + +Records which memory entries the model consulted this turn so the `provenance` +analysis can later judge whether they influenced the outcome. Session id is +resolved from the host's environment when available and falls back to a freshly +minted UUID (printed as `evolve-session: ` for the model to echo). +""" + +from __future__ import annotations + +import json +import os +import sys +import uuid +from datetime import datetime, timezone +from pathlib import Path + + +def _evolve_dir() -> Path: + env = os.environ.get("EVOLVE_DIR") + return Path(env) if env else Path.cwd() / ".evolve" + + +def _session_id() -> tuple[str, bool]: + """Return (session_id, self_minted).""" + for var in ("CLAUDE_CODE_SESSION_ID", "CODEX_THREAD_ID"): + val = os.environ.get(var) + if val: + return val, False + return str(uuid.uuid4()), True + + +def main(argv: list[str]) -> int: + entities = [a for a in argv if a.strip()] + if not entities: + return 0 + + session_id, minted = _session_id() + row = { + "event": "recall", + "session_id": session_id, + "entities": entities, + "ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), + } + + log = _evolve_dir() / "audit.log" + log.parent.mkdir(parents=True, exist_ok=True) + with log.open("a", encoding="utf-8") as fh: + fh.write(json.dumps(row) + "\n") + + if minted: + print(f"evolve-session: {session_id}") + count = len(entities) + print(f"Recorded recall of {count} memory entr{'y' if count == 1 else 'ies'}.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/platform-integrations/bob/evolve-lite/lib/evolve-lite/entity_io.py b/platform-integrations/bob/evolve-lite/lib/evolve-lite/entity_io.py index 63f77e2c..0d4ccace 100644 --- a/platform-integrations/bob/evolve-lite/lib/evolve-lite/entity_io.py +++ b/platform-integrations/bob/evolve-lite/lib/evolve-lite/entity_io.py @@ -118,6 +118,21 @@ def slugify(text, max_length=60): return text or "entity" +def sanitize_type(text): + """Sanitize an entity *type* into a filesystem-safe subdirectory name. + + Like :func:`slugify` but without truncation — a type is a short label, + not free-form content, and truncating it could silently merge distinct + types. Returns an empty string for input that contains no usable + characters, leaving the fallback decision to the caller. + """ + if not isinstance(text, str): + return "" + text = text.lower() + text = re.sub(r"[^a-z0-9]+", "-", text) + return text.strip("-") + + def unique_filename(directory, slug): """Return a Path that doesn't collide with existing files in *directory*. @@ -139,7 +154,7 @@ def unique_filename(directory, slug): # Markdown <-> dict conversion # --------------------------------------------------------------------------- -_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "visibility", "published_at") +_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "native_path", "visibility", "published_at") def entity_to_markdown(entity): @@ -339,24 +354,35 @@ def load_all_entities(entities_dir): return entities -def write_entity_file(directory, entity): +def write_entity_file(directory, entity, filename=None, overwrite=False): """Write a single entity as a markdown file under *directory*. The file is placed in a ``{type}/`` subdirectory. Uses atomic write (write to ``.tmp``, then ``os.rename``). + Args: + directory: Entities root directory. + entity: The entity dict to serialize. + filename: Optional explicit slug for the target file (without the + ``.md`` suffix). When omitted, the slug is derived from the + entity content (the historical default). + overwrite: When True, the entity is written to a deterministic + ``{type}/{filename}.md`` path, overwriting any existing file in + place (stable id, idempotent re-mirroring). When False (the + default), the historical collision-avoiding behavior is kept — + a ``-2``/``-3`` suffix is appended on collision. + Returns: Path to the written file. """ - _ALLOWED_TYPES = {"guideline", "preference"} - entity_type = entity.get("type", "guideline") - if not isinstance(entity_type, str) or entity_type not in _ALLOWED_TYPES: - entity_type = "guideline" + # Any non-empty type is accepted and used (sanitized) as the + # subdirectory. An empty/invalid type falls back to "guideline". + entity_type = sanitize_type(entity.get("type", "guideline")) or "guideline" entity["type"] = entity_type type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) - slug = slugify(entity.get("content", "entity")) + slug = slugify(filename) if filename else slugify(entity.get("content", "entity")) content = entity_to_markdown(entity) # Write to a unique temp file first (avoids predictable .tmp collisions) @@ -367,6 +393,13 @@ def write_entity_file(directory, entity): os.close(fd) fd = None + if overwrite: + # Deterministic target: overwrite any existing file in place so + # the entity id is stable across re-mirroring. + target = type_dir / f"{slug}.md" + os.replace(tmp_path, target) + return target + # Atomically claim the target using O_EXCL; retry on race while True: target = unique_filename(type_dir, slug) diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-adapt-memory/SKILL.md b/platform-integrations/bob/evolve-lite/skills/evolve-lite-adapt-memory/SKILL.md new file mode 100644 index 00000000..97b9d4a9 --- /dev/null +++ b/platform-integrations/bob/evolve-lite/skills/evolve-lite-adapt-memory/SKILL.md @@ -0,0 +1,13 @@ +--- +name: evolve-lite:adapt-memory +description: Mirror a just-saved native memory into the shared evolve store so it becomes shareable and auditable +--- + +# Adapt Memory + +This skill mirrors a just-saved native memory into the shared evolve store. It +is specific to hosts with native self-directed memory and is a no-op on this +platform — there is no native memory store to mirror from. Use the +`evolve-lite:learn` +skill to capture reusable lessons here. + diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-adapt-memory/scripts/adapt_memory.py b/platform-integrations/bob/evolve-lite/skills/evolve-lite-adapt-memory/scripts/adapt_memory.py new file mode 100644 index 00000000..5bb8fb44 --- /dev/null +++ b/platform-integrations/bob/evolve-lite/skills/evolve-lite-adapt-memory/scripts/adapt_memory.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +""" +Adapt Memory Script (Claude-only) + +Mirrors a single native Claude memory file into the in-repo evolve store at +``${EVOLVE_DIR:-.evolve}/entities//.md`` so the memory becomes +shareable and auditable like every other evolve entity. + +Native memory files live under ``~/.claude/projects//memory/`` and carry +frontmatter of the form:: + + --- + name: + description: + metadata: + type: user | feedback | project | reference + --- + + + +The agent passes the native ``--type`` through verbatim (native types map +straight onto the entity type — no remapping) and supplies a synthesized +``--trigger`` (the single most important field for future retrieval). The body +of the native file becomes the entity content; the native ``description`` is +carried into the body as a lead line when present. + +Usage: + python3 adapt_memory.py --type --trigger +""" + +import argparse +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from entity_io import ( # noqa: E402 + find_entities_dir, + get_default_entities_dir, + slugify, + write_entity_file, + log as _log, +) + + +def log(message): + _log("adapt-memory", message) + + +def parse_native_memory(text): + """Split a native memory file into (name, description, body). + + Native frontmatter is simple ``key: value`` lines plus a nested + ``metadata:`` block; we parse the top-level ``name`` and ``description`` + lines and treat everything after the closing ``---`` as the body. The + ``name`` is the native slug we reuse as the stable entity id. Missing + frontmatter is tolerated — the whole text is then the body. + """ + name = None + description = None + body = text + if text.startswith("---"): + parts = text.split("---", 2) + if len(parts) >= 3: + frontmatter, body = parts[1], parts[2] + for line in frontmatter.splitlines(): + # Only top-level keys (no leading indentation) — keeps the + # nested metadata.* keys out of the top-level matches. + if line[:1].isspace(): + continue + key, _, value = line.partition(":") + key = key.strip() + value = value.strip() + if key == "name" and value: + name = value + elif key == "description" and value: + description = value + return name, description, body.strip() + + +def main(): + parser = argparse.ArgumentParser(description="Mirror a native memory into the evolve store.") + parser.add_argument("memory_path", help="Path to the just-saved native memory file.") + parser.add_argument( + "--type", + required=True, + help="Native memory type, passed through as the entity type (e.g. user, feedback, project, reference).", + ) + parser.add_argument( + "--trigger", + required=True, + help="Synthesized one-sentence 'when to recall this' description.", + ) + args = parser.parse_args() + + memory_path = Path(args.memory_path).expanduser() + if not memory_path.is_file(): + print(f"Error: native memory file not found: {memory_path}", file=sys.stderr) + sys.exit(1) + + try: + text = memory_path.read_text(encoding="utf-8") + except OSError as exc: + print(f"Error: cannot read {memory_path} - {exc}", file=sys.stderr) + sys.exit(1) + + name, description, body = parse_native_memory(text) + if not body: + print(f"Error: native memory {memory_path} has no body to mirror.", file=sys.stderr) + sys.exit(1) + + # Carry the native description into the body as a lead line when it isn't + # already echoed there, so the mirrored entity is self-describing. + content = body + if description and description not in body: + content = f"{description}\n\n{body}" + + # The native ``name`` becomes the stable, derivable entity slug so the + # entity id is ``/`` on both sides — provenance can map an + # audited native memory straight onto its mirror. Fall back to a + # content-derived slug only when the native frontmatter has no name. + slug = slugify(name) if name else slugify(content) + + entity = { + "type": args.type, + "trigger": args.trigger, + "content": content, + "source": "native-memory", + "native_path": args.memory_path, + } + + entities_dir = find_entities_dir() + if entities_dir: + entities_dir = entities_dir.resolve() + log(f"Using existing entities dir: {entities_dir}") + else: + entities_dir = get_default_entities_dir() + log(f"Created entities dir: {entities_dir}") + + # Deterministic, idempotent write: re-mirroring the same native memory + # (same name + type) overwrites /.md in place rather than + # creating -2.md, keeping the entity id stable. + path = write_entity_file(entities_dir, entity, filename=slug, overwrite=True) + entity_id = f"{entity['type']}/{slug}" + log(f"Mirrored {memory_path} -> {path} (id: {entity_id})") + print(f"Mirrored native memory into evolve store: {path}") + print(f"Entity id: {entity_id}") + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/SKILL.md b/platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/SKILL.md index 25ee891a..af6fdd3b 100644 --- a/platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/SKILL.md +++ b/platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/SKILL.md @@ -7,58 +7,107 @@ description: Analyze saved trajectories and recall audit events offline to recor ## Overview -This skill runs after one or more sessions have completed. It reads saved trajectories from `.evolve/trajectories/`, matches them to `recall` events in `.evolve/audit.log`, and records post-hoc `influence` events for recalled guidelines. +This skill runs after one or more sessions have completed. It reads `recall` +events from `.evolve/audit.log`, locates each session's trajectory, and records +post-hoc `influence` events for the recalled guidelines. -Use this skill when you want to compute usage provenance without coupling the work to the live learn step. +The mechanical work — reading recall rows, skipping already-assessed pairs, +resolving entity files, and locating trajectories — is done deterministically by +`provenance.py candidates`. Your job is the judgment: read each candidate and +decide whether the recalled guideline was `followed`, `contradicted`, or +`not_applicable`, then persist that verdict. -## Workflow - -### Step 1: Load Recall Events - -Read `.evolve/audit.log` as JSONL. Find entries where `event == "recall"` and `entities` is a non-empty list. - -Skip any recall event that already has `influence` entries for the same `session_id` and entity ids. Do not write duplicate influence records. - -### Step 2: Locate Saved Trajectories +Use this skill when you want to compute usage provenance without coupling the +work to the live learn step. -List `.evolve/trajectories/` and match each recall event to a trajectory by `session_id`. - -Matching strategy (in order): -1. `claude-transcript_.jsonl` - the stop-hook transcript dump; the session id is in the filename. -2. `trajectory__.json` - written by the evolve-lite:save-trajectory skill when a session id is available. Match on the `` slice of the filename. -3. `trajectory_.json` - open the file and match its top-level `session_id` field against the recall event. Only fall back to this step when the filename alone does not identify the session. - -If none of the above yields a confident match for a recall event, skip it. Do not guess. - -### Step 3: Read Recalled Entities +## Workflow -For each recalled entity id, open `.evolve/entities/.md`. The id is a path relative to `.evolve/entities/` without the `.md` suffix, such as `guideline/foo` or `subscribed/alice/guideline/foo`. +### Step 1: Get candidates -Read the entity content and trigger. Skip ids whose files are missing. +Run the candidate builder. It emits one JSON object per line (JSONL), one per +unresolved `(session_id, entity)` recall pair: -### Step 4: Assess Influence +```bash +python3 .bob/skills/evolve-lite-provenance/scripts/provenance.py candidates +``` -Compare each recalled entity with the matched trajectory. Pick exactly one verdict: +Each candidate looks like: -- `followed` - the agent's actual actions are consistent with the guideline. -- `contradicted` - the guideline applied, but the agent did the opposite or repeated the avoidable dead end. -- `not_applicable` - the guideline was recalled but did not apply to this session. +```json +{ + "session_id": "", + "entity_id": "/", + "entity_excerpt": "", + "trajectory_path": "/path/to/transcript.jsonl", + "trajectory_excerpt": "", + "missing": ["trajectory"] +} +``` -Keep `evidence` to one short sentence citing a concrete action, tool call, or absence in the trajectory. +Notes: + +- `entity_id` is the path relative to `.evolve/entities/` without the `.md` + suffix, e.g. `feedback/foo`, `guideline/bar`, or + `subscribed/alice/guideline/baz`. +- Pairs that already have an `influence` row are skipped for you — the builder + reuses the same dedup rule used when influence rows are written. You will + never be handed a duplicate. +- The trajectory locator checks `.evolve/trajectories/` first, then falls back + to the native Claude transcript at + `~/.claude/projects//.jsonl`. This means provenance works + even when no `.evolve/trajectories/` file was written. +- If an entity file or trajectory cannot be found, the candidate is still + emitted with a `missing: [...]` field so the gap is visible. When the + trajectory is missing you usually cannot judge the pair — skip it (do not + guess), unless the entity content alone makes `not_applicable` certain. + +### Step 2: Judge each candidate + +For each candidate, read `entity_excerpt` (and open `trajectory_path` for the +full transcript if the excerpt is not enough). Compare the recalled guideline +against the agent's actual actions in the trajectory and pick exactly one +verdict: + +- `followed` — the agent's actual actions are consistent with the guideline. +- `contradicted` — the guideline applied, but the agent did the opposite or + repeated the avoidable dead end. +- `not_applicable` — the guideline was recalled but did not apply to this + session. + +Keep `evidence` to one short sentence citing a concrete action, tool call, or +absence in the trajectory. This judgment is yours — there is no heuristic +fallback. + +### Step 3: Record verdicts + +Persist each verdict. Either pipe one verdict per call to `provenance.py +record`: -### Step 5: Write Influence Events +```bash +echo '{ + "session_id": "", + "entity": "/", + "verdict": "followed", + "evidence": "Agent used the saved parser before trying shell fallbacks." +}' | python3 .bob/skills/evolve-lite-provenance/scripts/provenance.py record +``` -Pipe one JSON payload per assessed session to the helper: +…or, to batch many assessments for one session in a single call, pipe to the +underlying writer directly: ```bash echo '{ "session_id": "", "assessments": [ - {"entity": "guideline/", "verdict": "followed", "evidence": "Agent used the saved parser before trying shell fallbacks."} + {"entity": "feedback/foo", "verdict": "followed", "evidence": "Agent followed it."}, + {"entity": "guideline/bar", "verdict": "not_applicable", "evidence": "Did not apply."} ] }' | python3 .bob/skills/evolve-lite-provenance/scripts/log_influence.py ``` -The `entity` value must match exactly what appeared in the recall event, including any `subscribed//` prefix. +Both paths write the identical `influence` audit row and skip duplicates. The +`entity` value must match the candidate's `entity_id` exactly, including any +`subscribed//` prefix. -It is valid to emit an empty `assessments` list when recall events exist but no recalled guideline can be assessed. +It is valid to record nothing when recall events exist but no recalled guideline +can be assessed (e.g. every candidate is missing its trajectory). diff --git a/platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/scripts/provenance.py b/platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/scripts/provenance.py new file mode 100644 index 00000000..c2272501 --- /dev/null +++ b/platform-integrations/bob/evolve-lite/skills/evolve-lite-provenance/scripts/provenance.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 +"""Deterministic provenance plumbing for the evolve-lite provenance skill. + +This script does the *mechanical* half of provenance — the part that can be +made deterministic and tested end to end: + + * read ``recall`` rows from ``.evolve/audit.log``, + * skip ``(session_id, entity)`` pairs that already have an ``influence`` row, + * resolve each recalled entity file and the session's trajectory transcript, + * assemble candidate dicts the agent can judge. + +The *semantic* half — deciding whether a recalled guideline was ``followed``, +``contradicted`` or ``not_applicable`` — is an LLM judgment and is NOT done +here. There is deliberately no heuristic verdict: this module never invents a +verdict. The agent reads each candidate, judges it, and pipes the verdict back +through ``record`` (which delegates to ``log_influence.py``'s writer so the +audit-log format is identical). + +Two modes: + + candidates (default) — emit one JSONL candidate per unresolved + (session_id, entity) recall pair to stdout. + record — read a verdict JSON from stdin and append an + ``influence`` row via log_influence.py's writer. +""" + +import json +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) + +# Provenance reuses log_influence.py's writer + dedup so the audit-log format and +# the duplicate-suppression rule live in exactly one place. log_influence.py sits +# next to this file in the same skill scripts/ directory. +sys.path.insert(0, str(_script.parent)) + +from entity_io import get_evolve_dir, log as _log # noqa: E402 +import log_influence # noqa: E402 + +_ALLOWED_VERDICTS = log_influence._ALLOWED_VERDICTS + +# How many characters of the entity file / trajectory to surface in a candidate. +_ENTITY_EXCERPT_CHARS = 4000 +_TRAJECTORY_EXCERPT_CHARS = 4000 + + +def log(message): + _log("provenance", message) + + +# --------------------------------------------------------------------------- +# Trajectory locator (Task B) +# --------------------------------------------------------------------------- + + +def _claude_transcript_slug(root): + """Claude derives a project's transcript dir name by replacing every + non-alphanumeric character in the absolute project path with ``-``. + + e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen + + This mirrors ``_transcript_slug`` in the doctor skill + (skills/evolve-lite/doctor/scripts/doctor.py). The two are kept in sync by + hand because doctor and provenance ship as independent scripts that do not + import one another in the rendered tree; if you change one, change both. + """ + import re + + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def locate_trajectory(session_id, evolve_dir, *, project_root=None, home=None): + """Locate the saved trajectory transcript for ``session_id``. + + Resolution order (best-effort, returns the first hit or ``None``): + + 1. Legacy ``.evolve/trajectories/`` files: + * ``claude-transcript_.jsonl`` — stop-hook transcript dump. + * ``trajectory__.json`` — save-trajectory skill output; the sid + is the filename slice after the timestamp. + * ``trajectory_.json`` — open and match the inner ``session_id``. + 2. NEW native Claude transcript: ``~/.claude/projects//.jsonl`` + where ```` is the project root path slugified the way Claude does + (every non-alphanumeric char -> ``-``; see ``_claude_transcript_slug``). + + Native discovery makes provenance work in the hookless world where no + ``.evolve/trajectories/`` file is ever written. It is platform-neutral: + Bob/Codex keep their transcripts elsewhere, so the native step simply falls + through to ``None`` for them rather than misfiring. + """ + evolve_dir = Path(evolve_dir) + + # --- 1. Legacy .evolve/trajectories/ ------------------------------------ + traj_dir = evolve_dir / "trajectories" + if traj_dir.is_dir(): + direct = traj_dir / f"claude-transcript_{session_id}.jsonl" + if direct.is_file(): + return direct + + # trajectory__.json — match on the filename sid slice. + for path in sorted(traj_dir.glob("trajectory_*_*.json")): + stem = path.stem # trajectory__ + parts = stem.split("_", 2) + if len(parts) == 3 and parts[2] == session_id: + return path + + # trajectory_.json — open and match the inner session_id field. + for path in sorted(traj_dir.glob("trajectory_*.json")): + # Skip the _ shape already handled above. + if path.stem.count("_") >= 2: + continue + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + continue + if isinstance(data, dict) and data.get("session_id") == session_id: + return path + + # --- 2. Native Claude transcript ---------------------------------------- + # The project root is the parent of the .evolve dir; the home dir holds + # ~/.claude/projects//.jsonl. + root = Path(project_root) if project_root is not None else evolve_dir.resolve().parent + base = Path(home) if home is not None else Path.home() + slug = _claude_transcript_slug(root) + native = base / ".claude" / "projects" / slug / f"{session_id}.jsonl" + if native.is_file(): + return native + + return None + + +# --------------------------------------------------------------------------- +# Recall row reading + entity resolution (Task A — candidates) +# --------------------------------------------------------------------------- + + +def read_recall_rows(evolve_dir): + """Return a list of ``(session_id, [entity_id, ...])`` tuples for every ``recall`` audit row. + + Rows with no ``session_id`` or an empty ``entities`` list are skipped. + """ + audit_log = Path(evolve_dir) / "audit.log" + if not audit_log.is_file(): + return [] + + rows = [] + for line in audit_log.read_text(encoding="utf-8").splitlines(): + if not line.strip(): + continue + try: + event = json.loads(line) + except json.JSONDecodeError: + continue + if not isinstance(event, dict) or event.get("event") != "recall": + continue + session_id = event.get("session_id") + entities = event.get("entities") + if not isinstance(session_id, str) or not session_id: + continue + if not isinstance(entities, list) or not entities: + continue + clean = [e for e in entities if isinstance(e, str) and e] + if clean: + rows.append((session_id, clean)) + return rows + + +def _read_entity(evolve_dir, entity_id): + """Return ``(path, excerpt)`` for an entity file, or ``(path, None)`` if + the file is missing. ``entity_id`` is a ``/`` id relative to + ``entities/`` (without ``.md``). + """ + entity_path = Path(evolve_dir) / "entities" / f"{entity_id}.md" + if not entity_path.is_file(): + return entity_path, None + try: + text = entity_path.read_text(encoding="utf-8") + except OSError: + return entity_path, None + return entity_path, text[:_ENTITY_EXCERPT_CHARS] + + +def _read_trajectory_excerpt(trajectory_path): + """Return a bounded text excerpt of the trajectory file, or ``None``.""" + if trajectory_path is None: + return None + try: + text = Path(trajectory_path).read_text(encoding="utf-8") + except OSError: + return None + return text[:_TRAJECTORY_EXCERPT_CHARS] + + +def build_candidates(evolve_dir, *, project_root=None, home=None): + """Assemble candidate dicts for every unresolved recall (session, entity). + + Returns a list of dicts shaped:: + + { + "session_id": ..., + "entity_id": "/", + "entity_excerpt": , + "trajectory_path": , + "trajectory_excerpt": , + "missing": ["entity"|"trajectory", ...], # only when non-empty + } + + ``(session_id, entity)`` pairs that already have an ``influence`` row are + skipped via ``log_influence.existing_influence_keys`` — the same dedup rule + used when influence rows are written. Candidates whose entity file or + trajectory cannot be found are still emitted with a ``missing`` list so the + gap is visible rather than silently dropped. + """ + evolve_dir = Path(evolve_dir) + existing = log_influence.existing_influence_keys(evolve_dir) + + candidates = [] + for session_id, entities in read_recall_rows(evolve_dir): + trajectory_path = locate_trajectory(session_id, evolve_dir, project_root=project_root, home=home) + for entity_id in entities: + if (session_id, entity_id) in existing: + continue + entity_path, entity_excerpt = _read_entity(evolve_dir, entity_id) + trajectory_excerpt = _read_trajectory_excerpt(trajectory_path) + + missing = [] + if entity_excerpt is None: + missing.append("entity") + if trajectory_path is None: + missing.append("trajectory") + + candidate = { + "session_id": session_id, + "entity_id": entity_id, + "entity_excerpt": entity_excerpt, + "trajectory_path": str(trajectory_path) if trajectory_path else None, + "trajectory_excerpt": trajectory_excerpt, + } + if missing: + candidate["missing"] = missing + candidates.append(candidate) + return candidates + + +# --------------------------------------------------------------------------- +# record (Task A — record) +# --------------------------------------------------------------------------- + + +def record_verdict(payload, evolve_dir=None): + """Append a single ``influence`` row from an agent verdict. + + ``payload`` is ``{session_id, entity, verdict, evidence}``. The verdict must + be one of ``followed|contradicted|not_applicable`` (the *semantic* judgment + stays agent-driven — this only persists what the agent decided). Writing is + delegated to ``log_influence.py`` so the audit-log row format and the + duplicate-suppression rule are not duplicated here. + + Returns the number of rows written (0 or 1). Raises ``ValueError`` on an + invalid payload / verdict. + """ + if not isinstance(payload, dict): + raise ValueError("verdict payload must be a JSON object") + + session_id = payload.get("session_id") + entity = payload.get("entity") + verdict = payload.get("verdict") + evidence = payload.get("evidence", "") + + if not isinstance(session_id, str) or not session_id: + raise ValueError("verdict payload must include a non-empty string session_id") + if not isinstance(entity, str) or not entity: + raise ValueError("verdict payload must include a non-empty string entity") + if verdict not in _ALLOWED_VERDICTS: + raise ValueError(f"verdict must be one of {sorted(_ALLOWED_VERDICTS)}, got {verdict!r}") + + if evolve_dir is None: + evolve_dir = get_evolve_dir().resolve() + evolve_dir = Path(evolve_dir) + + existing = log_influence.existing_influence_keys(evolve_dir) + if (session_id, entity) in existing: + log(f"Skipping duplicate influence verdict: session_id={session_id} entity={entity}") + return 0 + + if not isinstance(evidence, str): + evidence = str(evidence) + + log_influence.audit.append( + evolve_dir=str(evolve_dir), + event="influence", + session_id=session_id, + entity=entity, + verdict=verdict, + evidence=evidence, + ) + return 1 + + +# --------------------------------------------------------------------------- +# __main__ +# --------------------------------------------------------------------------- + + +def _run_candidates(): + evolve_dir = get_evolve_dir().resolve() + candidates = build_candidates(evolve_dir) + for candidate in candidates: + print(json.dumps(candidate)) + log(f"Emitted {len(candidates)} candidate(s) from {evolve_dir}") + + +def _run_record(): + try: + payload = json.load(sys.stdin) + except json.JSONDecodeError as exc: + log(f"Invalid JSON input: {exc}") + print(f"Error: invalid JSON input - {exc}", file=sys.stderr) + sys.exit(1) + + try: + written = record_verdict(payload) + except ValueError as exc: + log(f"Rejected verdict: {exc}") + print(f"Error: {exc}", file=sys.stderr) + sys.exit(1) + + log(f"Recorded {written} influence verdict(s).") + print(f"Recorded {written} influence verdict(s).") + + +def main(argv=None): + argv = list(sys.argv[1:] if argv is None else argv) + mode = argv[0] if argv else "candidates" + if mode == "candidates": + _run_candidates() + elif mode == "record": + _run_record() + else: + print(f"Error: unknown mode {mode!r}; expected 'candidates' or 'record'.", file=sys.stderr) + sys.exit(2) + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md b/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md new file mode 100644 index 00000000..fbc810fe --- /dev/null +++ b/platform-integrations/claude/plugins/evolve-lite/EVOLVE.md @@ -0,0 +1,31 @@ + +# Evolve — shared, auditable memory + +You already have native, self-directed memory: you decide what to recall at the +start of a task and what to save at the end, on your own judgment. Evolve adds +only the two things native memory lacks — sharing and provenance — as two +lightweight lifecycle steps. It does not change how you recall or save. + +## After you save a memory + +When you save a native memory this turn, invoke /evolve-lite:adapt-memory +to mirror it into the shared evolve store (`./.evolve/entities/`) so the fact is +shareable with the team and auditable like every other evolve entity. Do this +once per memory you saved. + +## Record what you consulted + +After you read or consult native memories this turn, log which ones you actually +opened, so the value of this memory can be measured over time. Run: + +```bash +python3 ~/.claude/evolve-lite/audit_recall.py [ ...] +``` + +Pass the entity id `/` for each native memory you consulted, where +`` is the memory's `metadata.type` and `` is its top-level `name` +field — this is the id provenance resolves to `./.evolve/entities//.md` +(the same id /evolve-lite:adapt-memory mirrors to). Skip this step entirely +if you consulted no memories. If the command prints a line beginning +`evolve-session:`, include that line once, verbatim, somewhere in your reply — it +lets later analysis tie this session to what you recalled. diff --git a/platform-integrations/claude/plugins/evolve-lite/hooks/hooks.json b/platform-integrations/claude/plugins/evolve-lite/hooks/hooks.json deleted file mode 100644 index 1d282a7e..00000000 --- a/platform-integrations/claude/plugins/evolve-lite/hooks/hooks.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "hooks": { - "UserPromptSubmit": [ - { - "matcher": "", - "hooks": [ - { - "type": "command", - "command": "python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/recall/scripts/retrieve_entities.py" - } - ] - } - ], - "SessionStart": [ - { - "matcher": "", - "hooks": [ - { - "type": "command", - "command": "python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/sync/scripts/sync.py --quiet" - } - ] - } - ], - "Stop": [ - { - "matcher": "", - "hooks": [ - { - "type": "command", - "command": "python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/save-trajectory/scripts/on_stop.py" - }, - { - "type": "command", - "command": "python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/learn/scripts/on_stop.py" - } - ] - } - ] - } -} diff --git a/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/audit_recall.py b/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/audit_recall.py new file mode 100644 index 00000000..42dc1104 --- /dev/null +++ b/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/audit_recall.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +"""Append a recall-audit row to .evolve/audit.log. + +Self-contained (no third-party or evolve-lite lib imports) so it can be dropped +at a single path and run by a model-invoked shell command on any platform. + +Usage: + python3 audit_recall.py [ ...] + +Records which memory entries the model consulted this turn so the `provenance` +analysis can later judge whether they influenced the outcome. Session id is +resolved from the host's environment when available and falls back to a freshly +minted UUID (printed as `evolve-session: ` for the model to echo). +""" + +from __future__ import annotations + +import json +import os +import sys +import uuid +from datetime import datetime, timezone +from pathlib import Path + + +def _evolve_dir() -> Path: + env = os.environ.get("EVOLVE_DIR") + return Path(env) if env else Path.cwd() / ".evolve" + + +def _session_id() -> tuple[str, bool]: + """Return (session_id, self_minted).""" + for var in ("CLAUDE_CODE_SESSION_ID", "CODEX_THREAD_ID"): + val = os.environ.get(var) + if val: + return val, False + return str(uuid.uuid4()), True + + +def main(argv: list[str]) -> int: + entities = [a for a in argv if a.strip()] + if not entities: + return 0 + + session_id, minted = _session_id() + row = { + "event": "recall", + "session_id": session_id, + "entities": entities, + "ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), + } + + log = _evolve_dir() / "audit.log" + log.parent.mkdir(parents=True, exist_ok=True) + with log.open("a", encoding="utf-8") as fh: + fh.write(json.dumps(row) + "\n") + + if minted: + print(f"evolve-session: {session_id}") + count = len(entities) + print(f"Recorded recall of {count} memory entr{'y' if count == 1 else 'ies'}.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/entity_io.py b/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/entity_io.py index 63f77e2c..0d4ccace 100644 --- a/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/entity_io.py +++ b/platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite/entity_io.py @@ -118,6 +118,21 @@ def slugify(text, max_length=60): return text or "entity" +def sanitize_type(text): + """Sanitize an entity *type* into a filesystem-safe subdirectory name. + + Like :func:`slugify` but without truncation — a type is a short label, + not free-form content, and truncating it could silently merge distinct + types. Returns an empty string for input that contains no usable + characters, leaving the fallback decision to the caller. + """ + if not isinstance(text, str): + return "" + text = text.lower() + text = re.sub(r"[^a-z0-9]+", "-", text) + return text.strip("-") + + def unique_filename(directory, slug): """Return a Path that doesn't collide with existing files in *directory*. @@ -139,7 +154,7 @@ def unique_filename(directory, slug): # Markdown <-> dict conversion # --------------------------------------------------------------------------- -_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "visibility", "published_at") +_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "native_path", "visibility", "published_at") def entity_to_markdown(entity): @@ -339,24 +354,35 @@ def load_all_entities(entities_dir): return entities -def write_entity_file(directory, entity): +def write_entity_file(directory, entity, filename=None, overwrite=False): """Write a single entity as a markdown file under *directory*. The file is placed in a ``{type}/`` subdirectory. Uses atomic write (write to ``.tmp``, then ``os.rename``). + Args: + directory: Entities root directory. + entity: The entity dict to serialize. + filename: Optional explicit slug for the target file (without the + ``.md`` suffix). When omitted, the slug is derived from the + entity content (the historical default). + overwrite: When True, the entity is written to a deterministic + ``{type}/{filename}.md`` path, overwriting any existing file in + place (stable id, idempotent re-mirroring). When False (the + default), the historical collision-avoiding behavior is kept — + a ``-2``/``-3`` suffix is appended on collision. + Returns: Path to the written file. """ - _ALLOWED_TYPES = {"guideline", "preference"} - entity_type = entity.get("type", "guideline") - if not isinstance(entity_type, str) or entity_type not in _ALLOWED_TYPES: - entity_type = "guideline" + # Any non-empty type is accepted and used (sanitized) as the + # subdirectory. An empty/invalid type falls back to "guideline". + entity_type = sanitize_type(entity.get("type", "guideline")) or "guideline" entity["type"] = entity_type type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) - slug = slugify(entity.get("content", "entity")) + slug = slugify(filename) if filename else slugify(entity.get("content", "entity")) content = entity_to_markdown(entity) # Write to a unique temp file first (avoids predictable .tmp collisions) @@ -367,6 +393,13 @@ def write_entity_file(directory, entity): os.close(fd) fd = None + if overwrite: + # Deterministic target: overwrite any existing file in place so + # the entity id is stable across re-mirroring. + target = type_dir / f"{slug}.md" + os.replace(tmp_path, target) + return target + # Atomically claim the target using O_EXCL; retry on race while True: target = unique_filename(type_dir, slug) diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md new file mode 100644 index 00000000..34d2fab6 --- /dev/null +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md @@ -0,0 +1,59 @@ +--- +name: adapt-memory +description: Mirror a just-saved native memory into the shared evolve store so it becomes shareable and auditable +context: fork +--- + +# Adapt Memory + +## Overview + +You have native, self-directed memory: you decide what to remember and write it +under `~/.claude/projects//memory/`. That store has no provenance or +sharing. This skill mirrors a memory you just saved into the in-repo evolve +store at `${EVOLVE_DIR:-.evolve}/entities//.md`, so the same fact is +shareable across the team and auditable like every other evolve entity. + +Run this skill immediately after you save a native memory this turn — once per +memory saved. + +## Required Action + +For each native memory file you saved this turn: + +1. **Read the just-saved memory file** so you are mirroring its real content, + not a guess. Note its `metadata.type` (one of `user`, `feedback`, `project`, + `reference`) — this passes straight through as the entity type, with no + remapping. + +2. **Compose a high-quality `trigger`.** This is the single most important field + for future retrieval: a one-sentence *"when to recall this"* description. + Base it on what the memory actually says and the situations in which a future + agent would benefit from it — do **not** mechanically copy the memory's + `description`. Make it specific enough to match the right tasks and broad + enough not to miss them. + +3. **Run the adapter script**, passing the native file path, its type, and your + synthesized trigger: + +```bash +python3 ~/.claude/evolve-lite/adapt_memory.py \ + \ + --type \ + --trigger "" +``` + +The script parses the native frontmatter and body, builds the entity +(`type` = native type, `trigger` = your synthesized trigger, `content` = the +native body with its `description` carried in as a lead line), and persists it +via the shared entity writer. It is safe to run repeatedly. + +## Notes + +- One invocation per saved memory. If you saved several memories this turn, + invoke the script once for each, with a trigger tailored to each. +- The trigger quality directly determines whether the memory resurfaces when it + matters. Spend a moment on it. +- If you saved no native memory this turn, there is nothing to mirror — skip + this skill. + diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py new file mode 100644 index 00000000..5bb8fb44 --- /dev/null +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +""" +Adapt Memory Script (Claude-only) + +Mirrors a single native Claude memory file into the in-repo evolve store at +``${EVOLVE_DIR:-.evolve}/entities//.md`` so the memory becomes +shareable and auditable like every other evolve entity. + +Native memory files live under ``~/.claude/projects//memory/`` and carry +frontmatter of the form:: + + --- + name: + description: + metadata: + type: user | feedback | project | reference + --- + + + +The agent passes the native ``--type`` through verbatim (native types map +straight onto the entity type — no remapping) and supplies a synthesized +``--trigger`` (the single most important field for future retrieval). The body +of the native file becomes the entity content; the native ``description`` is +carried into the body as a lead line when present. + +Usage: + python3 adapt_memory.py --type --trigger +""" + +import argparse +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from entity_io import ( # noqa: E402 + find_entities_dir, + get_default_entities_dir, + slugify, + write_entity_file, + log as _log, +) + + +def log(message): + _log("adapt-memory", message) + + +def parse_native_memory(text): + """Split a native memory file into (name, description, body). + + Native frontmatter is simple ``key: value`` lines plus a nested + ``metadata:`` block; we parse the top-level ``name`` and ``description`` + lines and treat everything after the closing ``---`` as the body. The + ``name`` is the native slug we reuse as the stable entity id. Missing + frontmatter is tolerated — the whole text is then the body. + """ + name = None + description = None + body = text + if text.startswith("---"): + parts = text.split("---", 2) + if len(parts) >= 3: + frontmatter, body = parts[1], parts[2] + for line in frontmatter.splitlines(): + # Only top-level keys (no leading indentation) — keeps the + # nested metadata.* keys out of the top-level matches. + if line[:1].isspace(): + continue + key, _, value = line.partition(":") + key = key.strip() + value = value.strip() + if key == "name" and value: + name = value + elif key == "description" and value: + description = value + return name, description, body.strip() + + +def main(): + parser = argparse.ArgumentParser(description="Mirror a native memory into the evolve store.") + parser.add_argument("memory_path", help="Path to the just-saved native memory file.") + parser.add_argument( + "--type", + required=True, + help="Native memory type, passed through as the entity type (e.g. user, feedback, project, reference).", + ) + parser.add_argument( + "--trigger", + required=True, + help="Synthesized one-sentence 'when to recall this' description.", + ) + args = parser.parse_args() + + memory_path = Path(args.memory_path).expanduser() + if not memory_path.is_file(): + print(f"Error: native memory file not found: {memory_path}", file=sys.stderr) + sys.exit(1) + + try: + text = memory_path.read_text(encoding="utf-8") + except OSError as exc: + print(f"Error: cannot read {memory_path} - {exc}", file=sys.stderr) + sys.exit(1) + + name, description, body = parse_native_memory(text) + if not body: + print(f"Error: native memory {memory_path} has no body to mirror.", file=sys.stderr) + sys.exit(1) + + # Carry the native description into the body as a lead line when it isn't + # already echoed there, so the mirrored entity is self-describing. + content = body + if description and description not in body: + content = f"{description}\n\n{body}" + + # The native ``name`` becomes the stable, derivable entity slug so the + # entity id is ``/`` on both sides — provenance can map an + # audited native memory straight onto its mirror. Fall back to a + # content-derived slug only when the native frontmatter has no name. + slug = slugify(name) if name else slugify(content) + + entity = { + "type": args.type, + "trigger": args.trigger, + "content": content, + "source": "native-memory", + "native_path": args.memory_path, + } + + entities_dir = find_entities_dir() + if entities_dir: + entities_dir = entities_dir.resolve() + log(f"Using existing entities dir: {entities_dir}") + else: + entities_dir = get_default_entities_dir() + log(f"Created entities dir: {entities_dir}") + + # Deterministic, idempotent write: re-mirroring the same native memory + # (same name + type) overwrites /.md in place rather than + # creating -2.md, keeping the entity id stable. + path = write_entity_file(entities_dir, entity, filename=slug, overwrite=True) + entity_id = f"{entity['type']}/{slug}" + log(f"Mirrored {memory_path} -> {path} (id: {entity_id})") + print(f"Mirrored native memory into evolve store: {path}") + print(f"Entity id: {entity_id}") + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md new file mode 100644 index 00000000..8b9ece19 --- /dev/null +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md @@ -0,0 +1,43 @@ +--- +name: doctor +description: Diagnose evolve health on Claude — verify the CLAUDE.md @import is actually loading the thin EVOLVE.md into sessions +context: fork +--- + +# Doctor + +## Overview + +On Claude, evolve is delivered by a single `@.evolve/EVOLVE.md` import line in +this repo's `./CLAUDE.md`. That import requires a one-time, per-project "allow +external imports" approval. If you (or a teammate) declined it — even once, in a +past session — Claude silently disables the import forever, the thin EVOLVE.md +never loads, and evolve becomes a no-op with **no error**. + +This skill checks whether the import is actually reaching your sessions, by +looking for a canary token that the installed EVOLVE.md expands into the session +transcript when the import loads. + +## Required Action + +Run the doctor script from the repo root: + +```bash +python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/doctor/scripts/doctor.py +``` + +It is read-only and always exits 0. Read the status code it prints: + +- **OK** — the import is loading; nothing to do. +- **IMPORT_DISABLED** — the `@import` line is in `CLAUDE.md` but its content is + not reaching sessions (you likely declined the external-import approval). + Follow the remediation the script prints: purge the project approval, start a + new session, and **Allow** the import dialog. +- **NOT_INSTALLED** — evolve isn't wired into this repo; re-run the installer. +- **STALE_EVOLVE_MD** — the installed `.evolve/EVOLVE.md` predates the canary; + re-run the installer to refresh it. +- **UNKNOWN** — no recent Claude transcripts for this project yet; open a + session, then re-run. + +Relay the status and any remediation to the user. + diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py new file mode 100644 index 00000000..2c2a5382 --- /dev/null +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +""" +Doctor Script (Claude-only diagnostic) + +On Claude, evolve is delivered by a single ``@.evolve/EVOLVE.md`` import line in +the repo's ``./CLAUDE.md``. That import requires a one-time, per-project +"allow external imports" approval. If the user declines it (even once, in a past +session) Claude silently disables the import forever — the thin EVOLVE.md never +loads and evolve becomes a no-op with NO error. + +Claude's internal approval flag is undocumented and unreliable to read, so this +script detects delivery *empirically*: the installed thin EVOLVE.md carries a +canary token (``EVOLVE_IMPORT_CANARY_``). When the import loads, that token +expands into the session transcript. The doctor extracts the token from the +installed copy (never hardcoding it twice) and greps the most recent Claude +project transcripts for it. + +Status codes (printed verbatim, always exit 0 — this is a diagnostic): + + OK — canary found in a recent transcript; import is loading. + IMPORT_DISABLED — import line present in CLAUDE.md but canary absent from + every recent transcript; the user likely declined the + external-import approval. + NOT_INSTALLED — the import line is missing from CLAUDE.md, or the installed + .evolve/EVOLVE.md is missing; run the installer. + STALE_EVOLVE_MD — installed EVOLVE.md has no canary; it predates this build, + re-run the installer. + UNKNOWN — no recent Claude transcripts for this project yet. + +Usage: + python3 doctor.py +""" + +import os +import re +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple plugins +# can coexist side by side. The doctor only needs the shared `log` helper, but +# resolving the lib the same way the other scripts do keeps the convention +# uniform (and only works in the rendered tree, same constraint as adapt_memory). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from entity_io import log as _log # noqa: E402 + + +def log(message): + _log("doctor", message) + + +# The line the installer injects into the repo's CLAUDE.md (see install.sh +# CLAUDE_IMPORT_LINE). Matching on this substring is the install-sanity check. +CLAUDE_IMPORT_LINE = "@.evolve/EVOLVE.md" + +# Pattern used to lift the canary token out of the installed EVOLVE.md so the +# exact token lives in exactly one place (the template), never duplicated here. +_CANARY_RE = re.compile(r"EVOLVE_IMPORT_CANARY_\S+") + +# How many of the most-recent transcripts to scan for the canary. +_RECENT_N = 3 + + +def _evolve_dir(root): + """Resolve the .evolve root: $EVOLVE_DIR if set, else /.evolve.""" + env_dir = os.environ.get("EVOLVE_DIR") + if env_dir: + return Path(env_dir) + return root / ".evolve" + + +def _transcript_slug(root): + """Claude derives a project's transcript dir name by replacing every + non-alphanumeric character in the absolute project path with ``-``. + + e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen + """ + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def _recent_transcripts(home, root, limit=_RECENT_N): + """The most recent N ``*.jsonl`` transcripts for this project, by mtime.""" + slug = _transcript_slug(root) + proj_dir = home / ".claude" / "projects" / slug + if not proj_dir.is_dir(): + return [] + jsonl = [p for p in proj_dir.glob("*.jsonl") if p.is_file()] + jsonl.sort(key=lambda p: p.stat().st_mtime, reverse=True) + return jsonl[:limit] + + +def _canary_in_transcripts(transcripts, token): + """True if `token` appears anywhere in any of the given transcript files.""" + for path in transcripts: + try: + text = path.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + if token in text: + return True + return False + + +def diagnose(root, home): + """Core diagnosis. Returns ``(code, message)``; never raises on missing + files/dirs. `root` is the project root; `home` is the user home dir under + which Claude keeps ``~/.claude/projects//``. + """ + root = Path(root) + home = Path(home) + + # --- Install sanity ------------------------------------------------------ + claude_md = root / "CLAUDE.md" + has_import = False + if claude_md.is_file(): + try: + has_import = CLAUDE_IMPORT_LINE in claude_md.read_text(encoding="utf-8", errors="replace") + except OSError: + has_import = False + if not has_import: + return ( + "NOT_INSTALLED", + f"evolve import not wired into this repo's CLAUDE.md (expected a line `{CLAUDE_IMPORT_LINE}`); run the installer.", + ) + + evolve_md = _evolve_dir(root) / "EVOLVE.md" + if not evolve_md.is_file(): + return ( + "NOT_INSTALLED", + f"installed EVOLVE.md is missing at {evolve_md}; run the installer.", + ) + + # --- Extract the canary from the installed file -------------------------- + try: + evolve_text = evolve_md.read_text(encoding="utf-8", errors="replace") + except OSError as exc: + return ( + "NOT_INSTALLED", + f"cannot read installed EVOLVE.md at {evolve_md} - {exc}; run the installer.", + ) + match = _CANARY_RE.search(evolve_text) + if not match: + return ( + "STALE_EVOLVE_MD", + f"installed EVOLVE.md at {evolve_md} has no canary token (it predates this build); re-run the installer to refresh it.", + ) + token = match.group(0) + + # --- Transcript check ---------------------------------------------------- + transcripts = _recent_transcripts(home, root) + if not transcripts: + return ( + "UNKNOWN", + "no recent Claude transcripts for this project yet; open a session, then re-run.", + ) + if _canary_in_transcripts(transcripts, token): + return ("OK", "✓ evolve EVOLVE.md import is loading.") + + return ( + "IMPORT_DISABLED", + "⚠ The @import is present in CLAUDE.md but its content is NOT " + "reaching sessions — you likely declined Claude's external-import " + "approval. Re-enable by running `claude project purge " + f"{root}` then start a new session and Allow the import dialog.", + ) + + +def main(): + root = Path(os.getcwd()).resolve() + home = Path.home() + code, message = diagnose(root, home) + log(f"{code}: {message}") + print(f"evolve doctor [{code}] {message}") + # Diagnostic only — never fail the caller. + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md index e6ff7825..32cd6d08 100644 --- a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md @@ -7,58 +7,107 @@ description: Analyze saved trajectories and recall audit events offline to recor ## Overview -This skill runs after one or more sessions have completed. It reads saved trajectories from `.evolve/trajectories/`, matches them to `recall` events in `.evolve/audit.log`, and records post-hoc `influence` events for recalled guidelines. +This skill runs after one or more sessions have completed. It reads `recall` +events from `.evolve/audit.log`, locates each session's trajectory, and records +post-hoc `influence` events for the recalled guidelines. -Use this skill when you want to compute usage provenance without coupling the work to the live learn step. +The mechanical work — reading recall rows, skipping already-assessed pairs, +resolving entity files, and locating trajectories — is done deterministically by +`provenance.py candidates`. Your job is the judgment: read each candidate and +decide whether the recalled guideline was `followed`, `contradicted`, or +`not_applicable`, then persist that verdict. -## Workflow - -### Step 1: Load Recall Events - -Read `.evolve/audit.log` as JSONL. Find entries where `event == "recall"` and `entities` is a non-empty list. - -Skip any recall event that already has `influence` entries for the same `session_id` and entity ids. Do not write duplicate influence records. - -### Step 2: Locate Saved Trajectories +Use this skill when you want to compute usage provenance without coupling the +work to the live learn step. -List `.evolve/trajectories/` and match each recall event to a trajectory by `session_id`. - -Matching strategy (in order): -1. `claude-transcript_.jsonl` - the stop-hook transcript dump; the session id is in the filename. -2. `trajectory__.json` - written by the /evolve-lite:save-trajectory skill when a session id is available. Match on the `` slice of the filename. -3. `trajectory_.json` - open the file and match its top-level `session_id` field against the recall event. Only fall back to this step when the filename alone does not identify the session. - -If none of the above yields a confident match for a recall event, skip it. Do not guess. - -### Step 3: Read Recalled Entities +## Workflow -For each recalled entity id, open `.evolve/entities/.md`. The id is a path relative to `.evolve/entities/` without the `.md` suffix, such as `guideline/foo` or `subscribed/alice/guideline/foo`. +### Step 1: Get candidates -Read the entity content and trigger. Skip ids whose files are missing. +Run the candidate builder. It emits one JSON object per line (JSONL), one per +unresolved `(session_id, entity)` recall pair: -### Step 4: Assess Influence +```bash +python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/provenance/scripts/provenance.py candidates +``` -Compare each recalled entity with the matched trajectory. Pick exactly one verdict: +Each candidate looks like: -- `followed` - the agent's actual actions are consistent with the guideline. -- `contradicted` - the guideline applied, but the agent did the opposite or repeated the avoidable dead end. -- `not_applicable` - the guideline was recalled but did not apply to this session. +```json +{ + "session_id": "", + "entity_id": "/", + "entity_excerpt": "", + "trajectory_path": "/path/to/transcript.jsonl", + "trajectory_excerpt": "", + "missing": ["trajectory"] +} +``` -Keep `evidence` to one short sentence citing a concrete action, tool call, or absence in the trajectory. +Notes: + +- `entity_id` is the path relative to `.evolve/entities/` without the `.md` + suffix, e.g. `feedback/foo`, `guideline/bar`, or + `subscribed/alice/guideline/baz`. +- Pairs that already have an `influence` row are skipped for you — the builder + reuses the same dedup rule used when influence rows are written. You will + never be handed a duplicate. +- The trajectory locator checks `.evolve/trajectories/` first, then falls back + to the native Claude transcript at + `~/.claude/projects//.jsonl`. This means provenance works + even when no `.evolve/trajectories/` file was written. +- If an entity file or trajectory cannot be found, the candidate is still + emitted with a `missing: [...]` field so the gap is visible. When the + trajectory is missing you usually cannot judge the pair — skip it (do not + guess), unless the entity content alone makes `not_applicable` certain. + +### Step 2: Judge each candidate + +For each candidate, read `entity_excerpt` (and open `trajectory_path` for the +full transcript if the excerpt is not enough). Compare the recalled guideline +against the agent's actual actions in the trajectory and pick exactly one +verdict: + +- `followed` — the agent's actual actions are consistent with the guideline. +- `contradicted` — the guideline applied, but the agent did the opposite or + repeated the avoidable dead end. +- `not_applicable` — the guideline was recalled but did not apply to this + session. + +Keep `evidence` to one short sentence citing a concrete action, tool call, or +absence in the trajectory. This judgment is yours — there is no heuristic +fallback. + +### Step 3: Record verdicts + +Persist each verdict. Either pipe one verdict per call to `provenance.py +record`: -### Step 5: Write Influence Events +```bash +echo '{ + "session_id": "", + "entity": "/", + "verdict": "followed", + "evidence": "Agent used the saved parser before trying shell fallbacks." +}' | python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/provenance/scripts/provenance.py record +``` -Pipe one JSON payload per assessed session to the helper: +…or, to batch many assessments for one session in a single call, pipe to the +underlying writer directly: ```bash echo '{ "session_id": "", "assessments": [ - {"entity": "guideline/", "verdict": "followed", "evidence": "Agent used the saved parser before trying shell fallbacks."} + {"entity": "feedback/foo", "verdict": "followed", "evidence": "Agent followed it."}, + {"entity": "guideline/bar", "verdict": "not_applicable", "evidence": "Did not apply."} ] }' | python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/provenance/scripts/log_influence.py ``` -The `entity` value must match exactly what appeared in the recall event, including any `subscribed//` prefix. +Both paths write the identical `influence` audit row and skip duplicates. The +`entity` value must match the candidate's `entity_id` exactly, including any +`subscribed//` prefix. -It is valid to emit an empty `assessments` list when recall events exist but no recalled guideline can be assessed. +It is valid to record nothing when recall events exist but no recalled guideline +can be assessed (e.g. every candidate is missing its trajectory). diff --git a/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py new file mode 100644 index 00000000..c2272501 --- /dev/null +++ b/platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 +"""Deterministic provenance plumbing for the evolve-lite provenance skill. + +This script does the *mechanical* half of provenance — the part that can be +made deterministic and tested end to end: + + * read ``recall`` rows from ``.evolve/audit.log``, + * skip ``(session_id, entity)`` pairs that already have an ``influence`` row, + * resolve each recalled entity file and the session's trajectory transcript, + * assemble candidate dicts the agent can judge. + +The *semantic* half — deciding whether a recalled guideline was ``followed``, +``contradicted`` or ``not_applicable`` — is an LLM judgment and is NOT done +here. There is deliberately no heuristic verdict: this module never invents a +verdict. The agent reads each candidate, judges it, and pipes the verdict back +through ``record`` (which delegates to ``log_influence.py``'s writer so the +audit-log format is identical). + +Two modes: + + candidates (default) — emit one JSONL candidate per unresolved + (session_id, entity) recall pair to stdout. + record — read a verdict JSON from stdin and append an + ``influence`` row via log_influence.py's writer. +""" + +import json +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) + +# Provenance reuses log_influence.py's writer + dedup so the audit-log format and +# the duplicate-suppression rule live in exactly one place. log_influence.py sits +# next to this file in the same skill scripts/ directory. +sys.path.insert(0, str(_script.parent)) + +from entity_io import get_evolve_dir, log as _log # noqa: E402 +import log_influence # noqa: E402 + +_ALLOWED_VERDICTS = log_influence._ALLOWED_VERDICTS + +# How many characters of the entity file / trajectory to surface in a candidate. +_ENTITY_EXCERPT_CHARS = 4000 +_TRAJECTORY_EXCERPT_CHARS = 4000 + + +def log(message): + _log("provenance", message) + + +# --------------------------------------------------------------------------- +# Trajectory locator (Task B) +# --------------------------------------------------------------------------- + + +def _claude_transcript_slug(root): + """Claude derives a project's transcript dir name by replacing every + non-alphanumeric character in the absolute project path with ``-``. + + e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen + + This mirrors ``_transcript_slug`` in the doctor skill + (skills/evolve-lite/doctor/scripts/doctor.py). The two are kept in sync by + hand because doctor and provenance ship as independent scripts that do not + import one another in the rendered tree; if you change one, change both. + """ + import re + + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def locate_trajectory(session_id, evolve_dir, *, project_root=None, home=None): + """Locate the saved trajectory transcript for ``session_id``. + + Resolution order (best-effort, returns the first hit or ``None``): + + 1. Legacy ``.evolve/trajectories/`` files: + * ``claude-transcript_.jsonl`` — stop-hook transcript dump. + * ``trajectory__.json`` — save-trajectory skill output; the sid + is the filename slice after the timestamp. + * ``trajectory_.json`` — open and match the inner ``session_id``. + 2. NEW native Claude transcript: ``~/.claude/projects//.jsonl`` + where ```` is the project root path slugified the way Claude does + (every non-alphanumeric char -> ``-``; see ``_claude_transcript_slug``). + + Native discovery makes provenance work in the hookless world where no + ``.evolve/trajectories/`` file is ever written. It is platform-neutral: + Bob/Codex keep their transcripts elsewhere, so the native step simply falls + through to ``None`` for them rather than misfiring. + """ + evolve_dir = Path(evolve_dir) + + # --- 1. Legacy .evolve/trajectories/ ------------------------------------ + traj_dir = evolve_dir / "trajectories" + if traj_dir.is_dir(): + direct = traj_dir / f"claude-transcript_{session_id}.jsonl" + if direct.is_file(): + return direct + + # trajectory__.json — match on the filename sid slice. + for path in sorted(traj_dir.glob("trajectory_*_*.json")): + stem = path.stem # trajectory__ + parts = stem.split("_", 2) + if len(parts) == 3 and parts[2] == session_id: + return path + + # trajectory_.json — open and match the inner session_id field. + for path in sorted(traj_dir.glob("trajectory_*.json")): + # Skip the _ shape already handled above. + if path.stem.count("_") >= 2: + continue + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + continue + if isinstance(data, dict) and data.get("session_id") == session_id: + return path + + # --- 2. Native Claude transcript ---------------------------------------- + # The project root is the parent of the .evolve dir; the home dir holds + # ~/.claude/projects//.jsonl. + root = Path(project_root) if project_root is not None else evolve_dir.resolve().parent + base = Path(home) if home is not None else Path.home() + slug = _claude_transcript_slug(root) + native = base / ".claude" / "projects" / slug / f"{session_id}.jsonl" + if native.is_file(): + return native + + return None + + +# --------------------------------------------------------------------------- +# Recall row reading + entity resolution (Task A — candidates) +# --------------------------------------------------------------------------- + + +def read_recall_rows(evolve_dir): + """Return a list of ``(session_id, [entity_id, ...])`` tuples for every ``recall`` audit row. + + Rows with no ``session_id`` or an empty ``entities`` list are skipped. + """ + audit_log = Path(evolve_dir) / "audit.log" + if not audit_log.is_file(): + return [] + + rows = [] + for line in audit_log.read_text(encoding="utf-8").splitlines(): + if not line.strip(): + continue + try: + event = json.loads(line) + except json.JSONDecodeError: + continue + if not isinstance(event, dict) or event.get("event") != "recall": + continue + session_id = event.get("session_id") + entities = event.get("entities") + if not isinstance(session_id, str) or not session_id: + continue + if not isinstance(entities, list) or not entities: + continue + clean = [e for e in entities if isinstance(e, str) and e] + if clean: + rows.append((session_id, clean)) + return rows + + +def _read_entity(evolve_dir, entity_id): + """Return ``(path, excerpt)`` for an entity file, or ``(path, None)`` if + the file is missing. ``entity_id`` is a ``/`` id relative to + ``entities/`` (without ``.md``). + """ + entity_path = Path(evolve_dir) / "entities" / f"{entity_id}.md" + if not entity_path.is_file(): + return entity_path, None + try: + text = entity_path.read_text(encoding="utf-8") + except OSError: + return entity_path, None + return entity_path, text[:_ENTITY_EXCERPT_CHARS] + + +def _read_trajectory_excerpt(trajectory_path): + """Return a bounded text excerpt of the trajectory file, or ``None``.""" + if trajectory_path is None: + return None + try: + text = Path(trajectory_path).read_text(encoding="utf-8") + except OSError: + return None + return text[:_TRAJECTORY_EXCERPT_CHARS] + + +def build_candidates(evolve_dir, *, project_root=None, home=None): + """Assemble candidate dicts for every unresolved recall (session, entity). + + Returns a list of dicts shaped:: + + { + "session_id": ..., + "entity_id": "/", + "entity_excerpt": , + "trajectory_path": , + "trajectory_excerpt": , + "missing": ["entity"|"trajectory", ...], # only when non-empty + } + + ``(session_id, entity)`` pairs that already have an ``influence`` row are + skipped via ``log_influence.existing_influence_keys`` — the same dedup rule + used when influence rows are written. Candidates whose entity file or + trajectory cannot be found are still emitted with a ``missing`` list so the + gap is visible rather than silently dropped. + """ + evolve_dir = Path(evolve_dir) + existing = log_influence.existing_influence_keys(evolve_dir) + + candidates = [] + for session_id, entities in read_recall_rows(evolve_dir): + trajectory_path = locate_trajectory(session_id, evolve_dir, project_root=project_root, home=home) + for entity_id in entities: + if (session_id, entity_id) in existing: + continue + entity_path, entity_excerpt = _read_entity(evolve_dir, entity_id) + trajectory_excerpt = _read_trajectory_excerpt(trajectory_path) + + missing = [] + if entity_excerpt is None: + missing.append("entity") + if trajectory_path is None: + missing.append("trajectory") + + candidate = { + "session_id": session_id, + "entity_id": entity_id, + "entity_excerpt": entity_excerpt, + "trajectory_path": str(trajectory_path) if trajectory_path else None, + "trajectory_excerpt": trajectory_excerpt, + } + if missing: + candidate["missing"] = missing + candidates.append(candidate) + return candidates + + +# --------------------------------------------------------------------------- +# record (Task A — record) +# --------------------------------------------------------------------------- + + +def record_verdict(payload, evolve_dir=None): + """Append a single ``influence`` row from an agent verdict. + + ``payload`` is ``{session_id, entity, verdict, evidence}``. The verdict must + be one of ``followed|contradicted|not_applicable`` (the *semantic* judgment + stays agent-driven — this only persists what the agent decided). Writing is + delegated to ``log_influence.py`` so the audit-log row format and the + duplicate-suppression rule are not duplicated here. + + Returns the number of rows written (0 or 1). Raises ``ValueError`` on an + invalid payload / verdict. + """ + if not isinstance(payload, dict): + raise ValueError("verdict payload must be a JSON object") + + session_id = payload.get("session_id") + entity = payload.get("entity") + verdict = payload.get("verdict") + evidence = payload.get("evidence", "") + + if not isinstance(session_id, str) or not session_id: + raise ValueError("verdict payload must include a non-empty string session_id") + if not isinstance(entity, str) or not entity: + raise ValueError("verdict payload must include a non-empty string entity") + if verdict not in _ALLOWED_VERDICTS: + raise ValueError(f"verdict must be one of {sorted(_ALLOWED_VERDICTS)}, got {verdict!r}") + + if evolve_dir is None: + evolve_dir = get_evolve_dir().resolve() + evolve_dir = Path(evolve_dir) + + existing = log_influence.existing_influence_keys(evolve_dir) + if (session_id, entity) in existing: + log(f"Skipping duplicate influence verdict: session_id={session_id} entity={entity}") + return 0 + + if not isinstance(evidence, str): + evidence = str(evidence) + + log_influence.audit.append( + evolve_dir=str(evolve_dir), + event="influence", + session_id=session_id, + entity=entity, + verdict=verdict, + evidence=evidence, + ) + return 1 + + +# --------------------------------------------------------------------------- +# __main__ +# --------------------------------------------------------------------------- + + +def _run_candidates(): + evolve_dir = get_evolve_dir().resolve() + candidates = build_candidates(evolve_dir) + for candidate in candidates: + print(json.dumps(candidate)) + log(f"Emitted {len(candidates)} candidate(s) from {evolve_dir}") + + +def _run_record(): + try: + payload = json.load(sys.stdin) + except json.JSONDecodeError as exc: + log(f"Invalid JSON input: {exc}") + print(f"Error: invalid JSON input - {exc}", file=sys.stderr) + sys.exit(1) + + try: + written = record_verdict(payload) + except ValueError as exc: + log(f"Rejected verdict: {exc}") + print(f"Error: {exc}", file=sys.stderr) + sys.exit(1) + + log(f"Recorded {written} influence verdict(s).") + print(f"Recorded {written} influence verdict(s).") + + +def main(argv=None): + argv = list(sys.argv[1:] if argv is None else argv) + mode = argv[0] if argv else "candidates" + if mode == "candidates": + _run_candidates() + elif mode == "record": + _run_record() + else: + print(f"Error: unknown mode {mode!r}; expected 'candidates' or 'record'.", file=sys.stderr) + sys.exit(2) + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/claw-code/plugins/evolve-lite/EVOLVE.md b/platform-integrations/claw-code/plugins/evolve-lite/EVOLVE.md new file mode 100644 index 00000000..3192ad3c --- /dev/null +++ b/platform-integrations/claw-code/plugins/evolve-lite/EVOLVE.md @@ -0,0 +1,82 @@ +# Evolve — self-directed memory + +You have a persistent, file-based memory for the current project, stored under +`./.evolve/memory/` (relative to the workspace/project root). You decide, on +your own judgment, when something is worth remembering — nothing forces a save, +and there is no step to "complete." Curate this memory like notes you'll thank +yourself for later: small, accurate, high-signal. + +## Recall — at the start of a non-trivial task + +Before substantive work (code changes, debugging, repo exploration, or +environment/tooling investigation), read your memory index at +`./.evolve/memory/MEMORY.md` if it exists. It holds one line per memory with a +short description. Open the individual memory files whose description looks +relevant to the task at hand, and let them inform what you do. If the index is +missing or nothing looks relevant, just proceed — that's normal. + +Memories reflect what was true when written. If a memory names a file, +function, command, or flag, verify it still exists before relying on it. + +## Record what you consulted + +After recall, log which entries you actually opened, so the value of this memory +can be measured over time. Run: + +```bash +python3 ~/.claw/evolve-lite/audit_recall.py [ ...] +``` + +Pass the memory files you read this turn (space-separated paths, relative to the +project root). Skip this step entirely if you consulted no memories. If the +command prints a line beginning `evolve-session:`, include that line once, +verbatim, somewhere in your reply — it lets later analysis tie this session to +what you recalled. + +## Save — only when you learn something durable + +Near the end of a task, if it produced a reusable fact that isn't already +obvious from the code or git history — and only then — write it to memory. +Saving nothing is the right outcome more often than not; never force a +low-value memory just to have saved one. + +Each memory is one file holding one fact, under `./.evolve/memory/` (create the +directory if it doesn't exist), with frontmatter: + +```markdown +--- +name: +description: +metadata: + type: user | feedback | project | reference +--- + + +``` + +Types: +- **user** — who the user is: role, expertise, durable preferences. +- **feedback** — guidance on how you should work, both corrections and + confirmed approaches; always include the why. +- **project** — ongoing work, goals, or constraints not derivable from the code + or git history; convert relative dates ("next week") to absolute ones. +- **reference** — pointers to external resources (URLs, dashboards, tickets). + +In the body, link related memories with `[[name]]`, where `name` is another +memory's `name:` slug. Link liberally; a `[[name]]` with no file yet marks +something worth writing later, not an error. + +After writing the file, add a one-line pointer to `./.evolve/memory/MEMORY.md`: +`- [Title](file.md) — short hook`. MEMORY.md is the index you read during +recall — one line per memory, no frontmatter, never put memory content there. + +## When NOT to save, and housekeeping + +- Don't duplicate what the repo already records: code structure, git history, + READMEs, existing docs. If asked to remember one of those, ask what was + non-obvious about it and save that instead. +- Don't save what only matters to the current conversation. +- Before saving, check for an existing memory that already covers it — update + that file rather than creating a duplicate. +- Delete memories that turn out to be wrong. diff --git a/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/audit_recall.py b/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/audit_recall.py new file mode 100644 index 00000000..42dc1104 --- /dev/null +++ b/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/audit_recall.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +"""Append a recall-audit row to .evolve/audit.log. + +Self-contained (no third-party or evolve-lite lib imports) so it can be dropped +at a single path and run by a model-invoked shell command on any platform. + +Usage: + python3 audit_recall.py [ ...] + +Records which memory entries the model consulted this turn so the `provenance` +analysis can later judge whether they influenced the outcome. Session id is +resolved from the host's environment when available and falls back to a freshly +minted UUID (printed as `evolve-session: ` for the model to echo). +""" + +from __future__ import annotations + +import json +import os +import sys +import uuid +from datetime import datetime, timezone +from pathlib import Path + + +def _evolve_dir() -> Path: + env = os.environ.get("EVOLVE_DIR") + return Path(env) if env else Path.cwd() / ".evolve" + + +def _session_id() -> tuple[str, bool]: + """Return (session_id, self_minted).""" + for var in ("CLAUDE_CODE_SESSION_ID", "CODEX_THREAD_ID"): + val = os.environ.get(var) + if val: + return val, False + return str(uuid.uuid4()), True + + +def main(argv: list[str]) -> int: + entities = [a for a in argv if a.strip()] + if not entities: + return 0 + + session_id, minted = _session_id() + row = { + "event": "recall", + "session_id": session_id, + "entities": entities, + "ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), + } + + log = _evolve_dir() / "audit.log" + log.parent.mkdir(parents=True, exist_ok=True) + with log.open("a", encoding="utf-8") as fh: + fh.write(json.dumps(row) + "\n") + + if minted: + print(f"evolve-session: {session_id}") + count = len(entities) + print(f"Recorded recall of {count} memory entr{'y' if count == 1 else 'ies'}.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/entity_io.py b/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/entity_io.py index 63f77e2c..0d4ccace 100644 --- a/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/entity_io.py +++ b/platform-integrations/claw-code/plugins/evolve-lite/lib/evolve-lite/entity_io.py @@ -118,6 +118,21 @@ def slugify(text, max_length=60): return text or "entity" +def sanitize_type(text): + """Sanitize an entity *type* into a filesystem-safe subdirectory name. + + Like :func:`slugify` but without truncation — a type is a short label, + not free-form content, and truncating it could silently merge distinct + types. Returns an empty string for input that contains no usable + characters, leaving the fallback decision to the caller. + """ + if not isinstance(text, str): + return "" + text = text.lower() + text = re.sub(r"[^a-z0-9]+", "-", text) + return text.strip("-") + + def unique_filename(directory, slug): """Return a Path that doesn't collide with existing files in *directory*. @@ -139,7 +154,7 @@ def unique_filename(directory, slug): # Markdown <-> dict conversion # --------------------------------------------------------------------------- -_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "visibility", "published_at") +_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "native_path", "visibility", "published_at") def entity_to_markdown(entity): @@ -339,24 +354,35 @@ def load_all_entities(entities_dir): return entities -def write_entity_file(directory, entity): +def write_entity_file(directory, entity, filename=None, overwrite=False): """Write a single entity as a markdown file under *directory*. The file is placed in a ``{type}/`` subdirectory. Uses atomic write (write to ``.tmp``, then ``os.rename``). + Args: + directory: Entities root directory. + entity: The entity dict to serialize. + filename: Optional explicit slug for the target file (without the + ``.md`` suffix). When omitted, the slug is derived from the + entity content (the historical default). + overwrite: When True, the entity is written to a deterministic + ``{type}/{filename}.md`` path, overwriting any existing file in + place (stable id, idempotent re-mirroring). When False (the + default), the historical collision-avoiding behavior is kept — + a ``-2``/``-3`` suffix is appended on collision. + Returns: Path to the written file. """ - _ALLOWED_TYPES = {"guideline", "preference"} - entity_type = entity.get("type", "guideline") - if not isinstance(entity_type, str) or entity_type not in _ALLOWED_TYPES: - entity_type = "guideline" + # Any non-empty type is accepted and used (sanitized) as the + # subdirectory. An empty/invalid type falls back to "guideline". + entity_type = sanitize_type(entity.get("type", "guideline")) or "guideline" entity["type"] = entity_type type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) - slug = slugify(entity.get("content", "entity")) + slug = slugify(filename) if filename else slugify(entity.get("content", "entity")) content = entity_to_markdown(entity) # Write to a unique temp file first (avoids predictable .tmp collisions) @@ -367,6 +393,13 @@ def write_entity_file(directory, entity): os.close(fd) fd = None + if overwrite: + # Deterministic target: overwrite any existing file in place so + # the entity id is stable across re-mirroring. + target = type_dir / f"{slug}.md" + os.replace(tmp_path, target) + return target + # Atomically claim the target using O_EXCL; retry on race while True: target = unique_filename(type_dir, slug) diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md new file mode 100644 index 00000000..ccc0b831 --- /dev/null +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md @@ -0,0 +1,13 @@ +--- +name: adapt-memory +description: Mirror a just-saved native memory into the shared evolve store so it becomes shareable and auditable +--- + +# Adapt Memory + +This skill mirrors a just-saved native memory into the shared evolve store. It +is specific to hosts with native self-directed memory and is a no-op on this +platform — there is no native memory store to mirror from. Use the +/evolve-lite:learn +skill to capture reusable lessons here. + diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py new file mode 100644 index 00000000..5bb8fb44 --- /dev/null +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +""" +Adapt Memory Script (Claude-only) + +Mirrors a single native Claude memory file into the in-repo evolve store at +``${EVOLVE_DIR:-.evolve}/entities//.md`` so the memory becomes +shareable and auditable like every other evolve entity. + +Native memory files live under ``~/.claude/projects//memory/`` and carry +frontmatter of the form:: + + --- + name: + description: + metadata: + type: user | feedback | project | reference + --- + + + +The agent passes the native ``--type`` through verbatim (native types map +straight onto the entity type — no remapping) and supplies a synthesized +``--trigger`` (the single most important field for future retrieval). The body +of the native file becomes the entity content; the native ``description`` is +carried into the body as a lead line when present. + +Usage: + python3 adapt_memory.py --type --trigger +""" + +import argparse +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from entity_io import ( # noqa: E402 + find_entities_dir, + get_default_entities_dir, + slugify, + write_entity_file, + log as _log, +) + + +def log(message): + _log("adapt-memory", message) + + +def parse_native_memory(text): + """Split a native memory file into (name, description, body). + + Native frontmatter is simple ``key: value`` lines plus a nested + ``metadata:`` block; we parse the top-level ``name`` and ``description`` + lines and treat everything after the closing ``---`` as the body. The + ``name`` is the native slug we reuse as the stable entity id. Missing + frontmatter is tolerated — the whole text is then the body. + """ + name = None + description = None + body = text + if text.startswith("---"): + parts = text.split("---", 2) + if len(parts) >= 3: + frontmatter, body = parts[1], parts[2] + for line in frontmatter.splitlines(): + # Only top-level keys (no leading indentation) — keeps the + # nested metadata.* keys out of the top-level matches. + if line[:1].isspace(): + continue + key, _, value = line.partition(":") + key = key.strip() + value = value.strip() + if key == "name" and value: + name = value + elif key == "description" and value: + description = value + return name, description, body.strip() + + +def main(): + parser = argparse.ArgumentParser(description="Mirror a native memory into the evolve store.") + parser.add_argument("memory_path", help="Path to the just-saved native memory file.") + parser.add_argument( + "--type", + required=True, + help="Native memory type, passed through as the entity type (e.g. user, feedback, project, reference).", + ) + parser.add_argument( + "--trigger", + required=True, + help="Synthesized one-sentence 'when to recall this' description.", + ) + args = parser.parse_args() + + memory_path = Path(args.memory_path).expanduser() + if not memory_path.is_file(): + print(f"Error: native memory file not found: {memory_path}", file=sys.stderr) + sys.exit(1) + + try: + text = memory_path.read_text(encoding="utf-8") + except OSError as exc: + print(f"Error: cannot read {memory_path} - {exc}", file=sys.stderr) + sys.exit(1) + + name, description, body = parse_native_memory(text) + if not body: + print(f"Error: native memory {memory_path} has no body to mirror.", file=sys.stderr) + sys.exit(1) + + # Carry the native description into the body as a lead line when it isn't + # already echoed there, so the mirrored entity is self-describing. + content = body + if description and description not in body: + content = f"{description}\n\n{body}" + + # The native ``name`` becomes the stable, derivable entity slug so the + # entity id is ``/`` on both sides — provenance can map an + # audited native memory straight onto its mirror. Fall back to a + # content-derived slug only when the native frontmatter has no name. + slug = slugify(name) if name else slugify(content) + + entity = { + "type": args.type, + "trigger": args.trigger, + "content": content, + "source": "native-memory", + "native_path": args.memory_path, + } + + entities_dir = find_entities_dir() + if entities_dir: + entities_dir = entities_dir.resolve() + log(f"Using existing entities dir: {entities_dir}") + else: + entities_dir = get_default_entities_dir() + log(f"Created entities dir: {entities_dir}") + + # Deterministic, idempotent write: re-mirroring the same native memory + # (same name + type) overwrites /.md in place rather than + # creating -2.md, keeping the entity id stable. + path = write_entity_file(entities_dir, entity, filename=slug, overwrite=True) + entity_id = f"{entity['type']}/{slug}" + log(f"Mirrored {memory_path} -> {path} (id: {entity_id})") + print(f"Mirrored native memory into evolve store: {path}") + print(f"Entity id: {entity_id}") + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md new file mode 100644 index 00000000..0641e810 --- /dev/null +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/doctor/SKILL.md @@ -0,0 +1,12 @@ +--- +name: doctor +description: Diagnose evolve health on Claude — verify the CLAUDE.md @import is actually loading the thin EVOLVE.md into sessions +--- + +# Doctor + +This skill diagnoses Claude's `@import` delivery of evolve's thin EVOLVE.md. It +is specific to Claude (where evolve loads via a per-project import that can be +silently declined) and is a **no-op on this platform** — here EVOLVE.md is +always-on and there is no import-approval gate to check. Nothing to run. + diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py new file mode 100644 index 00000000..2c2a5382 --- /dev/null +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/doctor/scripts/doctor.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +""" +Doctor Script (Claude-only diagnostic) + +On Claude, evolve is delivered by a single ``@.evolve/EVOLVE.md`` import line in +the repo's ``./CLAUDE.md``. That import requires a one-time, per-project +"allow external imports" approval. If the user declines it (even once, in a past +session) Claude silently disables the import forever — the thin EVOLVE.md never +loads and evolve becomes a no-op with NO error. + +Claude's internal approval flag is undocumented and unreliable to read, so this +script detects delivery *empirically*: the installed thin EVOLVE.md carries a +canary token (``EVOLVE_IMPORT_CANARY_``). When the import loads, that token +expands into the session transcript. The doctor extracts the token from the +installed copy (never hardcoding it twice) and greps the most recent Claude +project transcripts for it. + +Status codes (printed verbatim, always exit 0 — this is a diagnostic): + + OK — canary found in a recent transcript; import is loading. + IMPORT_DISABLED — import line present in CLAUDE.md but canary absent from + every recent transcript; the user likely declined the + external-import approval. + NOT_INSTALLED — the import line is missing from CLAUDE.md, or the installed + .evolve/EVOLVE.md is missing; run the installer. + STALE_EVOLVE_MD — installed EVOLVE.md has no canary; it predates this build, + re-run the installer. + UNKNOWN — no recent Claude transcripts for this project yet. + +Usage: + python3 doctor.py +""" + +import os +import re +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple plugins +# can coexist side by side. The doctor only needs the shared `log` helper, but +# resolving the lib the same way the other scripts do keeps the convention +# uniform (and only works in the rendered tree, same constraint as adapt_memory). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from entity_io import log as _log # noqa: E402 + + +def log(message): + _log("doctor", message) + + +# The line the installer injects into the repo's CLAUDE.md (see install.sh +# CLAUDE_IMPORT_LINE). Matching on this substring is the install-sanity check. +CLAUDE_IMPORT_LINE = "@.evolve/EVOLVE.md" + +# Pattern used to lift the canary token out of the installed EVOLVE.md so the +# exact token lives in exactly one place (the template), never duplicated here. +_CANARY_RE = re.compile(r"EVOLVE_IMPORT_CANARY_\S+") + +# How many of the most-recent transcripts to scan for the canary. +_RECENT_N = 3 + + +def _evolve_dir(root): + """Resolve the .evolve root: $EVOLVE_DIR if set, else /.evolve.""" + env_dir = os.environ.get("EVOLVE_DIR") + if env_dir: + return Path(env_dir) + return root / ".evolve" + + +def _transcript_slug(root): + """Claude derives a project's transcript dir name by replacing every + non-alphanumeric character in the absolute project path with ``-``. + + e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen + """ + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def _recent_transcripts(home, root, limit=_RECENT_N): + """The most recent N ``*.jsonl`` transcripts for this project, by mtime.""" + slug = _transcript_slug(root) + proj_dir = home / ".claude" / "projects" / slug + if not proj_dir.is_dir(): + return [] + jsonl = [p for p in proj_dir.glob("*.jsonl") if p.is_file()] + jsonl.sort(key=lambda p: p.stat().st_mtime, reverse=True) + return jsonl[:limit] + + +def _canary_in_transcripts(transcripts, token): + """True if `token` appears anywhere in any of the given transcript files.""" + for path in transcripts: + try: + text = path.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + if token in text: + return True + return False + + +def diagnose(root, home): + """Core diagnosis. Returns ``(code, message)``; never raises on missing + files/dirs. `root` is the project root; `home` is the user home dir under + which Claude keeps ``~/.claude/projects//``. + """ + root = Path(root) + home = Path(home) + + # --- Install sanity ------------------------------------------------------ + claude_md = root / "CLAUDE.md" + has_import = False + if claude_md.is_file(): + try: + has_import = CLAUDE_IMPORT_LINE in claude_md.read_text(encoding="utf-8", errors="replace") + except OSError: + has_import = False + if not has_import: + return ( + "NOT_INSTALLED", + f"evolve import not wired into this repo's CLAUDE.md (expected a line `{CLAUDE_IMPORT_LINE}`); run the installer.", + ) + + evolve_md = _evolve_dir(root) / "EVOLVE.md" + if not evolve_md.is_file(): + return ( + "NOT_INSTALLED", + f"installed EVOLVE.md is missing at {evolve_md}; run the installer.", + ) + + # --- Extract the canary from the installed file -------------------------- + try: + evolve_text = evolve_md.read_text(encoding="utf-8", errors="replace") + except OSError as exc: + return ( + "NOT_INSTALLED", + f"cannot read installed EVOLVE.md at {evolve_md} - {exc}; run the installer.", + ) + match = _CANARY_RE.search(evolve_text) + if not match: + return ( + "STALE_EVOLVE_MD", + f"installed EVOLVE.md at {evolve_md} has no canary token (it predates this build); re-run the installer to refresh it.", + ) + token = match.group(0) + + # --- Transcript check ---------------------------------------------------- + transcripts = _recent_transcripts(home, root) + if not transcripts: + return ( + "UNKNOWN", + "no recent Claude transcripts for this project yet; open a session, then re-run.", + ) + if _canary_in_transcripts(transcripts, token): + return ("OK", "✓ evolve EVOLVE.md import is loading.") + + return ( + "IMPORT_DISABLED", + "⚠ The @import is present in CLAUDE.md but its content is NOT " + "reaching sessions — you likely declined Claude's external-import " + "approval. Re-enable by running `claude project purge " + f"{root}` then start a new session and Allow the import dialog.", + ) + + +def main(): + root = Path(os.getcwd()).resolve() + home = Path.home() + code, message = diagnose(root, home) + log(f"{code}: {message}") + print(f"evolve doctor [{code}] {message}") + # Diagnostic only — never fail the caller. + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md index de5023bb..14f152e0 100644 --- a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md @@ -7,58 +7,107 @@ description: Analyze saved trajectories and recall audit events offline to recor ## Overview -This skill runs after one or more sessions have completed. It reads saved trajectories from `.evolve/trajectories/`, matches them to `recall` events in `.evolve/audit.log`, and records post-hoc `influence` events for recalled guidelines. +This skill runs after one or more sessions have completed. It reads `recall` +events from `.evolve/audit.log`, locates each session's trajectory, and records +post-hoc `influence` events for the recalled guidelines. -Use this skill when you want to compute usage provenance without coupling the work to the live learn step. +The mechanical work — reading recall rows, skipping already-assessed pairs, +resolving entity files, and locating trajectories — is done deterministically by +`provenance.py candidates`. Your job is the judgment: read each candidate and +decide whether the recalled guideline was `followed`, `contradicted`, or +`not_applicable`, then persist that verdict. -## Workflow - -### Step 1: Load Recall Events - -Read `.evolve/audit.log` as JSONL. Find entries where `event == "recall"` and `entities` is a non-empty list. - -Skip any recall event that already has `influence` entries for the same `session_id` and entity ids. Do not write duplicate influence records. - -### Step 2: Locate Saved Trajectories +Use this skill when you want to compute usage provenance without coupling the +work to the live learn step. -List `.evolve/trajectories/` and match each recall event to a trajectory by `session_id`. - -Matching strategy (in order): -1. `claude-transcript_.jsonl` - the stop-hook transcript dump; the session id is in the filename. -2. `trajectory__.json` - written by the /evolve-lite:save-trajectory skill when a session id is available. Match on the `` slice of the filename. -3. `trajectory_.json` - open the file and match its top-level `session_id` field against the recall event. Only fall back to this step when the filename alone does not identify the session. - -If none of the above yields a confident match for a recall event, skip it. Do not guess. - -### Step 3: Read Recalled Entities +## Workflow -For each recalled entity id, open `.evolve/entities/.md`. The id is a path relative to `.evolve/entities/` without the `.md` suffix, such as `guideline/foo` or `subscribed/alice/guideline/foo`. +### Step 1: Get candidates -Read the entity content and trigger. Skip ids whose files are missing. +Run the candidate builder. It emits one JSON object per line (JSONL), one per +unresolved `(session_id, entity)` recall pair: -### Step 4: Assess Influence +```bash +sh -lc 'real_home="$(python3 -c "import os,pwd; print(pwd.getpwuid(os.getuid()).pw_dir)")"; config_home="${CLAW_CONFIG_HOME:-$real_home/.claw}"; script=".claw/skills/evolve-lite:provenance/scripts/provenance.py"; [ -f "$script" ] || script="$config_home/skills/evolve-lite:provenance/scripts/provenance.py"; python3 "$script" candidates' +``` -Compare each recalled entity with the matched trajectory. Pick exactly one verdict: +Each candidate looks like: -- `followed` - the agent's actual actions are consistent with the guideline. -- `contradicted` - the guideline applied, but the agent did the opposite or repeated the avoidable dead end. -- `not_applicable` - the guideline was recalled but did not apply to this session. +```json +{ + "session_id": "", + "entity_id": "/", + "entity_excerpt": "", + "trajectory_path": "/path/to/transcript.jsonl", + "trajectory_excerpt": "", + "missing": ["trajectory"] +} +``` -Keep `evidence` to one short sentence citing a concrete action, tool call, or absence in the trajectory. +Notes: + +- `entity_id` is the path relative to `.evolve/entities/` without the `.md` + suffix, e.g. `feedback/foo`, `guideline/bar`, or + `subscribed/alice/guideline/baz`. +- Pairs that already have an `influence` row are skipped for you — the builder + reuses the same dedup rule used when influence rows are written. You will + never be handed a duplicate. +- The trajectory locator checks `.evolve/trajectories/` first, then falls back + to the native Claude transcript at + `~/.claude/projects//.jsonl`. This means provenance works + even when no `.evolve/trajectories/` file was written. +- If an entity file or trajectory cannot be found, the candidate is still + emitted with a `missing: [...]` field so the gap is visible. When the + trajectory is missing you usually cannot judge the pair — skip it (do not + guess), unless the entity content alone makes `not_applicable` certain. + +### Step 2: Judge each candidate + +For each candidate, read `entity_excerpt` (and open `trajectory_path` for the +full transcript if the excerpt is not enough). Compare the recalled guideline +against the agent's actual actions in the trajectory and pick exactly one +verdict: + +- `followed` — the agent's actual actions are consistent with the guideline. +- `contradicted` — the guideline applied, but the agent did the opposite or + repeated the avoidable dead end. +- `not_applicable` — the guideline was recalled but did not apply to this + session. + +Keep `evidence` to one short sentence citing a concrete action, tool call, or +absence in the trajectory. This judgment is yours — there is no heuristic +fallback. + +### Step 3: Record verdicts + +Persist each verdict. Either pipe one verdict per call to `provenance.py +record`: -### Step 5: Write Influence Events +```bash +echo '{ + "session_id": "", + "entity": "/", + "verdict": "followed", + "evidence": "Agent used the saved parser before trying shell fallbacks." +}' | sh -lc 'real_home="$(python3 -c "import os,pwd; print(pwd.getpwuid(os.getuid()).pw_dir)")"; config_home="${CLAW_CONFIG_HOME:-$real_home/.claw}"; script=".claw/skills/evolve-lite:provenance/scripts/provenance.py"; [ -f "$script" ] || script="$config_home/skills/evolve-lite:provenance/scripts/provenance.py"; python3 "$script" record' +``` -Pipe one JSON payload per assessed session to the helper: +…or, to batch many assessments for one session in a single call, pipe to the +underlying writer directly: ```bash echo '{ "session_id": "", "assessments": [ - {"entity": "guideline/", "verdict": "followed", "evidence": "Agent used the saved parser before trying shell fallbacks."} + {"entity": "feedback/foo", "verdict": "followed", "evidence": "Agent followed it."}, + {"entity": "guideline/bar", "verdict": "not_applicable", "evidence": "Did not apply."} ] }' | sh -lc 'real_home="$(python3 -c "import os,pwd; print(pwd.getpwuid(os.getuid()).pw_dir)")"; config_home="${CLAW_CONFIG_HOME:-$real_home/.claw}"; script=".claw/skills/evolve-lite:provenance/scripts/log_influence.py"; [ -f "$script" ] || script="$config_home/skills/evolve-lite:provenance/scripts/log_influence.py"; python3 "$script"' ``` -The `entity` value must match exactly what appeared in the recall event, including any `subscribed//` prefix. +Both paths write the identical `influence` audit row and skip duplicates. The +`entity` value must match the candidate's `entity_id` exactly, including any +`subscribed//` prefix. -It is valid to emit an empty `assessments` list when recall events exist but no recalled guideline can be assessed. +It is valid to record nothing when recall events exist but no recalled guideline +can be assessed (e.g. every candidate is missing its trajectory). diff --git a/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py new file mode 100644 index 00000000..c2272501 --- /dev/null +++ b/platform-integrations/claw-code/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 +"""Deterministic provenance plumbing for the evolve-lite provenance skill. + +This script does the *mechanical* half of provenance — the part that can be +made deterministic and tested end to end: + + * read ``recall`` rows from ``.evolve/audit.log``, + * skip ``(session_id, entity)`` pairs that already have an ``influence`` row, + * resolve each recalled entity file and the session's trajectory transcript, + * assemble candidate dicts the agent can judge. + +The *semantic* half — deciding whether a recalled guideline was ``followed``, +``contradicted`` or ``not_applicable`` — is an LLM judgment and is NOT done +here. There is deliberately no heuristic verdict: this module never invents a +verdict. The agent reads each candidate, judges it, and pipes the verdict back +through ``record`` (which delegates to ``log_influence.py``'s writer so the +audit-log format is identical). + +Two modes: + + candidates (default) — emit one JSONL candidate per unresolved + (session_id, entity) recall pair to stdout. + record — read a verdict JSON from stdin and append an + ``influence`` row via log_influence.py's writer. +""" + +import json +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) + +# Provenance reuses log_influence.py's writer + dedup so the audit-log format and +# the duplicate-suppression rule live in exactly one place. log_influence.py sits +# next to this file in the same skill scripts/ directory. +sys.path.insert(0, str(_script.parent)) + +from entity_io import get_evolve_dir, log as _log # noqa: E402 +import log_influence # noqa: E402 + +_ALLOWED_VERDICTS = log_influence._ALLOWED_VERDICTS + +# How many characters of the entity file / trajectory to surface in a candidate. +_ENTITY_EXCERPT_CHARS = 4000 +_TRAJECTORY_EXCERPT_CHARS = 4000 + + +def log(message): + _log("provenance", message) + + +# --------------------------------------------------------------------------- +# Trajectory locator (Task B) +# --------------------------------------------------------------------------- + + +def _claude_transcript_slug(root): + """Claude derives a project's transcript dir name by replacing every + non-alphanumeric character in the absolute project path with ``-``. + + e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen + + This mirrors ``_transcript_slug`` in the doctor skill + (skills/evolve-lite/doctor/scripts/doctor.py). The two are kept in sync by + hand because doctor and provenance ship as independent scripts that do not + import one another in the rendered tree; if you change one, change both. + """ + import re + + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def locate_trajectory(session_id, evolve_dir, *, project_root=None, home=None): + """Locate the saved trajectory transcript for ``session_id``. + + Resolution order (best-effort, returns the first hit or ``None``): + + 1. Legacy ``.evolve/trajectories/`` files: + * ``claude-transcript_.jsonl`` — stop-hook transcript dump. + * ``trajectory__.json`` — save-trajectory skill output; the sid + is the filename slice after the timestamp. + * ``trajectory_.json`` — open and match the inner ``session_id``. + 2. NEW native Claude transcript: ``~/.claude/projects//.jsonl`` + where ```` is the project root path slugified the way Claude does + (every non-alphanumeric char -> ``-``; see ``_claude_transcript_slug``). + + Native discovery makes provenance work in the hookless world where no + ``.evolve/trajectories/`` file is ever written. It is platform-neutral: + Bob/Codex keep their transcripts elsewhere, so the native step simply falls + through to ``None`` for them rather than misfiring. + """ + evolve_dir = Path(evolve_dir) + + # --- 1. Legacy .evolve/trajectories/ ------------------------------------ + traj_dir = evolve_dir / "trajectories" + if traj_dir.is_dir(): + direct = traj_dir / f"claude-transcript_{session_id}.jsonl" + if direct.is_file(): + return direct + + # trajectory__.json — match on the filename sid slice. + for path in sorted(traj_dir.glob("trajectory_*_*.json")): + stem = path.stem # trajectory__ + parts = stem.split("_", 2) + if len(parts) == 3 and parts[2] == session_id: + return path + + # trajectory_.json — open and match the inner session_id field. + for path in sorted(traj_dir.glob("trajectory_*.json")): + # Skip the _ shape already handled above. + if path.stem.count("_") >= 2: + continue + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + continue + if isinstance(data, dict) and data.get("session_id") == session_id: + return path + + # --- 2. Native Claude transcript ---------------------------------------- + # The project root is the parent of the .evolve dir; the home dir holds + # ~/.claude/projects//.jsonl. + root = Path(project_root) if project_root is not None else evolve_dir.resolve().parent + base = Path(home) if home is not None else Path.home() + slug = _claude_transcript_slug(root) + native = base / ".claude" / "projects" / slug / f"{session_id}.jsonl" + if native.is_file(): + return native + + return None + + +# --------------------------------------------------------------------------- +# Recall row reading + entity resolution (Task A — candidates) +# --------------------------------------------------------------------------- + + +def read_recall_rows(evolve_dir): + """Return a list of ``(session_id, [entity_id, ...])`` tuples for every ``recall`` audit row. + + Rows with no ``session_id`` or an empty ``entities`` list are skipped. + """ + audit_log = Path(evolve_dir) / "audit.log" + if not audit_log.is_file(): + return [] + + rows = [] + for line in audit_log.read_text(encoding="utf-8").splitlines(): + if not line.strip(): + continue + try: + event = json.loads(line) + except json.JSONDecodeError: + continue + if not isinstance(event, dict) or event.get("event") != "recall": + continue + session_id = event.get("session_id") + entities = event.get("entities") + if not isinstance(session_id, str) or not session_id: + continue + if not isinstance(entities, list) or not entities: + continue + clean = [e for e in entities if isinstance(e, str) and e] + if clean: + rows.append((session_id, clean)) + return rows + + +def _read_entity(evolve_dir, entity_id): + """Return ``(path, excerpt)`` for an entity file, or ``(path, None)`` if + the file is missing. ``entity_id`` is a ``/`` id relative to + ``entities/`` (without ``.md``). + """ + entity_path = Path(evolve_dir) / "entities" / f"{entity_id}.md" + if not entity_path.is_file(): + return entity_path, None + try: + text = entity_path.read_text(encoding="utf-8") + except OSError: + return entity_path, None + return entity_path, text[:_ENTITY_EXCERPT_CHARS] + + +def _read_trajectory_excerpt(trajectory_path): + """Return a bounded text excerpt of the trajectory file, or ``None``.""" + if trajectory_path is None: + return None + try: + text = Path(trajectory_path).read_text(encoding="utf-8") + except OSError: + return None + return text[:_TRAJECTORY_EXCERPT_CHARS] + + +def build_candidates(evolve_dir, *, project_root=None, home=None): + """Assemble candidate dicts for every unresolved recall (session, entity). + + Returns a list of dicts shaped:: + + { + "session_id": ..., + "entity_id": "/", + "entity_excerpt": , + "trajectory_path": , + "trajectory_excerpt": , + "missing": ["entity"|"trajectory", ...], # only when non-empty + } + + ``(session_id, entity)`` pairs that already have an ``influence`` row are + skipped via ``log_influence.existing_influence_keys`` — the same dedup rule + used when influence rows are written. Candidates whose entity file or + trajectory cannot be found are still emitted with a ``missing`` list so the + gap is visible rather than silently dropped. + """ + evolve_dir = Path(evolve_dir) + existing = log_influence.existing_influence_keys(evolve_dir) + + candidates = [] + for session_id, entities in read_recall_rows(evolve_dir): + trajectory_path = locate_trajectory(session_id, evolve_dir, project_root=project_root, home=home) + for entity_id in entities: + if (session_id, entity_id) in existing: + continue + entity_path, entity_excerpt = _read_entity(evolve_dir, entity_id) + trajectory_excerpt = _read_trajectory_excerpt(trajectory_path) + + missing = [] + if entity_excerpt is None: + missing.append("entity") + if trajectory_path is None: + missing.append("trajectory") + + candidate = { + "session_id": session_id, + "entity_id": entity_id, + "entity_excerpt": entity_excerpt, + "trajectory_path": str(trajectory_path) if trajectory_path else None, + "trajectory_excerpt": trajectory_excerpt, + } + if missing: + candidate["missing"] = missing + candidates.append(candidate) + return candidates + + +# --------------------------------------------------------------------------- +# record (Task A — record) +# --------------------------------------------------------------------------- + + +def record_verdict(payload, evolve_dir=None): + """Append a single ``influence`` row from an agent verdict. + + ``payload`` is ``{session_id, entity, verdict, evidence}``. The verdict must + be one of ``followed|contradicted|not_applicable`` (the *semantic* judgment + stays agent-driven — this only persists what the agent decided). Writing is + delegated to ``log_influence.py`` so the audit-log row format and the + duplicate-suppression rule are not duplicated here. + + Returns the number of rows written (0 or 1). Raises ``ValueError`` on an + invalid payload / verdict. + """ + if not isinstance(payload, dict): + raise ValueError("verdict payload must be a JSON object") + + session_id = payload.get("session_id") + entity = payload.get("entity") + verdict = payload.get("verdict") + evidence = payload.get("evidence", "") + + if not isinstance(session_id, str) or not session_id: + raise ValueError("verdict payload must include a non-empty string session_id") + if not isinstance(entity, str) or not entity: + raise ValueError("verdict payload must include a non-empty string entity") + if verdict not in _ALLOWED_VERDICTS: + raise ValueError(f"verdict must be one of {sorted(_ALLOWED_VERDICTS)}, got {verdict!r}") + + if evolve_dir is None: + evolve_dir = get_evolve_dir().resolve() + evolve_dir = Path(evolve_dir) + + existing = log_influence.existing_influence_keys(evolve_dir) + if (session_id, entity) in existing: + log(f"Skipping duplicate influence verdict: session_id={session_id} entity={entity}") + return 0 + + if not isinstance(evidence, str): + evidence = str(evidence) + + log_influence.audit.append( + evolve_dir=str(evolve_dir), + event="influence", + session_id=session_id, + entity=entity, + verdict=verdict, + evidence=evidence, + ) + return 1 + + +# --------------------------------------------------------------------------- +# __main__ +# --------------------------------------------------------------------------- + + +def _run_candidates(): + evolve_dir = get_evolve_dir().resolve() + candidates = build_candidates(evolve_dir) + for candidate in candidates: + print(json.dumps(candidate)) + log(f"Emitted {len(candidates)} candidate(s) from {evolve_dir}") + + +def _run_record(): + try: + payload = json.load(sys.stdin) + except json.JSONDecodeError as exc: + log(f"Invalid JSON input: {exc}") + print(f"Error: invalid JSON input - {exc}", file=sys.stderr) + sys.exit(1) + + try: + written = record_verdict(payload) + except ValueError as exc: + log(f"Rejected verdict: {exc}") + print(f"Error: {exc}", file=sys.stderr) + sys.exit(1) + + log(f"Recorded {written} influence verdict(s).") + print(f"Recorded {written} influence verdict(s).") + + +def main(argv=None): + argv = list(sys.argv[1:] if argv is None else argv) + mode = argv[0] if argv else "candidates" + if mode == "candidates": + _run_candidates() + elif mode == "record": + _run_record() + else: + print(f"Error: unknown mode {mode!r}; expected 'candidates' or 'record'.", file=sys.stderr) + sys.exit(2) + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/codex/plugins/evolve-lite/EVOLVE.md b/platform-integrations/codex/plugins/evolve-lite/EVOLVE.md new file mode 100644 index 00000000..0c6c99e4 --- /dev/null +++ b/platform-integrations/codex/plugins/evolve-lite/EVOLVE.md @@ -0,0 +1,82 @@ +# Evolve — self-directed memory + +You have a persistent, file-based memory for the current project, stored under +`./.evolve/memory/` (relative to the workspace/project root). You decide, on +your own judgment, when something is worth remembering — nothing forces a save, +and there is no step to "complete." Curate this memory like notes you'll thank +yourself for later: small, accurate, high-signal. + +## Recall — at the start of a non-trivial task + +Before substantive work (code changes, debugging, repo exploration, or +environment/tooling investigation), read your memory index at +`./.evolve/memory/MEMORY.md` if it exists. It holds one line per memory with a +short description. Open the individual memory files whose description looks +relevant to the task at hand, and let them inform what you do. If the index is +missing or nothing looks relevant, just proceed — that's normal. + +Memories reflect what was true when written. If a memory names a file, +function, command, or flag, verify it still exists before relying on it. + +## Record what you consulted + +After recall, log which entries you actually opened, so the value of this memory +can be measured over time. Run: + +```bash +python3 ~/.codex/evolve-lite/audit_recall.py [ ...] +``` + +Pass the memory files you read this turn (space-separated paths, relative to the +project root). Skip this step entirely if you consulted no memories. If the +command prints a line beginning `evolve-session:`, include that line once, +verbatim, somewhere in your reply — it lets later analysis tie this session to +what you recalled. + +## Save — only when you learn something durable + +Near the end of a task, if it produced a reusable fact that isn't already +obvious from the code or git history — and only then — write it to memory. +Saving nothing is the right outcome more often than not; never force a +low-value memory just to have saved one. + +Each memory is one file holding one fact, under `./.evolve/memory/` (create the +directory if it doesn't exist), with frontmatter: + +```markdown +--- +name: +description: +metadata: + type: user | feedback | project | reference +--- + + +``` + +Types: +- **user** — who the user is: role, expertise, durable preferences. +- **feedback** — guidance on how you should work, both corrections and + confirmed approaches; always include the why. +- **project** — ongoing work, goals, or constraints not derivable from the code + or git history; convert relative dates ("next week") to absolute ones. +- **reference** — pointers to external resources (URLs, dashboards, tickets). + +In the body, link related memories with `[[name]]`, where `name` is another +memory's `name:` slug. Link liberally; a `[[name]]` with no file yet marks +something worth writing later, not an error. + +After writing the file, add a one-line pointer to `./.evolve/memory/MEMORY.md`: +`- [Title](file.md) — short hook`. MEMORY.md is the index you read during +recall — one line per memory, no frontmatter, never put memory content there. + +## When NOT to save, and housekeeping + +- Don't duplicate what the repo already records: code structure, git history, + READMEs, existing docs. If asked to remember one of those, ask what was + non-obvious about it and save that instead. +- Don't save what only matters to the current conversation. +- Before saving, check for an existing memory that already covers it — update + that file rather than creating a duplicate. +- Delete memories that turn out to be wrong. diff --git a/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/audit_recall.py b/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/audit_recall.py new file mode 100644 index 00000000..42dc1104 --- /dev/null +++ b/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/audit_recall.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +"""Append a recall-audit row to .evolve/audit.log. + +Self-contained (no third-party or evolve-lite lib imports) so it can be dropped +at a single path and run by a model-invoked shell command on any platform. + +Usage: + python3 audit_recall.py [ ...] + +Records which memory entries the model consulted this turn so the `provenance` +analysis can later judge whether they influenced the outcome. Session id is +resolved from the host's environment when available and falls back to a freshly +minted UUID (printed as `evolve-session: ` for the model to echo). +""" + +from __future__ import annotations + +import json +import os +import sys +import uuid +from datetime import datetime, timezone +from pathlib import Path + + +def _evolve_dir() -> Path: + env = os.environ.get("EVOLVE_DIR") + return Path(env) if env else Path.cwd() / ".evolve" + + +def _session_id() -> tuple[str, bool]: + """Return (session_id, self_minted).""" + for var in ("CLAUDE_CODE_SESSION_ID", "CODEX_THREAD_ID"): + val = os.environ.get(var) + if val: + return val, False + return str(uuid.uuid4()), True + + +def main(argv: list[str]) -> int: + entities = [a for a in argv if a.strip()] + if not entities: + return 0 + + session_id, minted = _session_id() + row = { + "event": "recall", + "session_id": session_id, + "entities": entities, + "ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), + } + + log = _evolve_dir() / "audit.log" + log.parent.mkdir(parents=True, exist_ok=True) + with log.open("a", encoding="utf-8") as fh: + fh.write(json.dumps(row) + "\n") + + if minted: + print(f"evolve-session: {session_id}") + count = len(entities) + print(f"Recorded recall of {count} memory entr{'y' if count == 1 else 'ies'}.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/entity_io.py b/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/entity_io.py index 63f77e2c..0d4ccace 100644 --- a/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/entity_io.py +++ b/platform-integrations/codex/plugins/evolve-lite/lib/evolve-lite/entity_io.py @@ -118,6 +118,21 @@ def slugify(text, max_length=60): return text or "entity" +def sanitize_type(text): + """Sanitize an entity *type* into a filesystem-safe subdirectory name. + + Like :func:`slugify` but without truncation — a type is a short label, + not free-form content, and truncating it could silently merge distinct + types. Returns an empty string for input that contains no usable + characters, leaving the fallback decision to the caller. + """ + if not isinstance(text, str): + return "" + text = text.lower() + text = re.sub(r"[^a-z0-9]+", "-", text) + return text.strip("-") + + def unique_filename(directory, slug): """Return a Path that doesn't collide with existing files in *directory*. @@ -139,7 +154,7 @@ def unique_filename(directory, slug): # Markdown <-> dict conversion # --------------------------------------------------------------------------- -_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "visibility", "published_at") +_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "native_path", "visibility", "published_at") def entity_to_markdown(entity): @@ -339,24 +354,35 @@ def load_all_entities(entities_dir): return entities -def write_entity_file(directory, entity): +def write_entity_file(directory, entity, filename=None, overwrite=False): """Write a single entity as a markdown file under *directory*. The file is placed in a ``{type}/`` subdirectory. Uses atomic write (write to ``.tmp``, then ``os.rename``). + Args: + directory: Entities root directory. + entity: The entity dict to serialize. + filename: Optional explicit slug for the target file (without the + ``.md`` suffix). When omitted, the slug is derived from the + entity content (the historical default). + overwrite: When True, the entity is written to a deterministic + ``{type}/{filename}.md`` path, overwriting any existing file in + place (stable id, idempotent re-mirroring). When False (the + default), the historical collision-avoiding behavior is kept — + a ``-2``/``-3`` suffix is appended on collision. + Returns: Path to the written file. """ - _ALLOWED_TYPES = {"guideline", "preference"} - entity_type = entity.get("type", "guideline") - if not isinstance(entity_type, str) or entity_type not in _ALLOWED_TYPES: - entity_type = "guideline" + # Any non-empty type is accepted and used (sanitized) as the + # subdirectory. An empty/invalid type falls back to "guideline". + entity_type = sanitize_type(entity.get("type", "guideline")) or "guideline" entity["type"] = entity_type type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) - slug = slugify(entity.get("content", "entity")) + slug = slugify(filename) if filename else slugify(entity.get("content", "entity")) content = entity_to_markdown(entity) # Write to a unique temp file first (avoids predictable .tmp collisions) @@ -367,6 +393,13 @@ def write_entity_file(directory, entity): os.close(fd) fd = None + if overwrite: + # Deterministic target: overwrite any existing file in place so + # the entity id is stable across re-mirroring. + target = type_dir / f"{slug}.md" + os.replace(tmp_path, target) + return target + # Atomically claim the target using O_EXCL; retry on race while True: target = unique_filename(type_dir, slug) diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md new file mode 100644 index 00000000..ccc0b831 --- /dev/null +++ b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/adapt-memory/SKILL.md @@ -0,0 +1,13 @@ +--- +name: adapt-memory +description: Mirror a just-saved native memory into the shared evolve store so it becomes shareable and auditable +--- + +# Adapt Memory + +This skill mirrors a just-saved native memory into the shared evolve store. It +is specific to hosts with native self-directed memory and is a no-op on this +platform — there is no native memory store to mirror from. Use the +/evolve-lite:learn +skill to capture reusable lessons here. + diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py new file mode 100644 index 00000000..5bb8fb44 --- /dev/null +++ b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +""" +Adapt Memory Script (Claude-only) + +Mirrors a single native Claude memory file into the in-repo evolve store at +``${EVOLVE_DIR:-.evolve}/entities//.md`` so the memory becomes +shareable and auditable like every other evolve entity. + +Native memory files live under ``~/.claude/projects//memory/`` and carry +frontmatter of the form:: + + --- + name: + description: + metadata: + type: user | feedback | project | reference + --- + + + +The agent passes the native ``--type`` through verbatim (native types map +straight onto the entity type — no remapping) and supplies a synthesized +``--trigger`` (the single most important field for future retrieval). The body +of the native file becomes the entity content; the native ``description`` is +carried into the body as a lead line when present. + +Usage: + python3 adapt_memory.py --type --trigger +""" + +import argparse +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from entity_io import ( # noqa: E402 + find_entities_dir, + get_default_entities_dir, + slugify, + write_entity_file, + log as _log, +) + + +def log(message): + _log("adapt-memory", message) + + +def parse_native_memory(text): + """Split a native memory file into (name, description, body). + + Native frontmatter is simple ``key: value`` lines plus a nested + ``metadata:`` block; we parse the top-level ``name`` and ``description`` + lines and treat everything after the closing ``---`` as the body. The + ``name`` is the native slug we reuse as the stable entity id. Missing + frontmatter is tolerated — the whole text is then the body. + """ + name = None + description = None + body = text + if text.startswith("---"): + parts = text.split("---", 2) + if len(parts) >= 3: + frontmatter, body = parts[1], parts[2] + for line in frontmatter.splitlines(): + # Only top-level keys (no leading indentation) — keeps the + # nested metadata.* keys out of the top-level matches. + if line[:1].isspace(): + continue + key, _, value = line.partition(":") + key = key.strip() + value = value.strip() + if key == "name" and value: + name = value + elif key == "description" and value: + description = value + return name, description, body.strip() + + +def main(): + parser = argparse.ArgumentParser(description="Mirror a native memory into the evolve store.") + parser.add_argument("memory_path", help="Path to the just-saved native memory file.") + parser.add_argument( + "--type", + required=True, + help="Native memory type, passed through as the entity type (e.g. user, feedback, project, reference).", + ) + parser.add_argument( + "--trigger", + required=True, + help="Synthesized one-sentence 'when to recall this' description.", + ) + args = parser.parse_args() + + memory_path = Path(args.memory_path).expanduser() + if not memory_path.is_file(): + print(f"Error: native memory file not found: {memory_path}", file=sys.stderr) + sys.exit(1) + + try: + text = memory_path.read_text(encoding="utf-8") + except OSError as exc: + print(f"Error: cannot read {memory_path} - {exc}", file=sys.stderr) + sys.exit(1) + + name, description, body = parse_native_memory(text) + if not body: + print(f"Error: native memory {memory_path} has no body to mirror.", file=sys.stderr) + sys.exit(1) + + # Carry the native description into the body as a lead line when it isn't + # already echoed there, so the mirrored entity is self-describing. + content = body + if description and description not in body: + content = f"{description}\n\n{body}" + + # The native ``name`` becomes the stable, derivable entity slug so the + # entity id is ``/`` on both sides — provenance can map an + # audited native memory straight onto its mirror. Fall back to a + # content-derived slug only when the native frontmatter has no name. + slug = slugify(name) if name else slugify(content) + + entity = { + "type": args.type, + "trigger": args.trigger, + "content": content, + "source": "native-memory", + "native_path": args.memory_path, + } + + entities_dir = find_entities_dir() + if entities_dir: + entities_dir = entities_dir.resolve() + log(f"Using existing entities dir: {entities_dir}") + else: + entities_dir = get_default_entities_dir() + log(f"Created entities dir: {entities_dir}") + + # Deterministic, idempotent write: re-mirroring the same native memory + # (same name + type) overwrites /.md in place rather than + # creating -2.md, keeping the entity id stable. + path = write_entity_file(entities_dir, entity, filename=slug, overwrite=True) + entity_id = f"{entity['type']}/{slug}" + log(f"Mirrored {memory_path} -> {path} (id: {entity_id})") + print(f"Mirrored native memory into evolve store: {path}") + print(f"Entity id: {entity_id}") + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md index 349ac090..d919b538 100644 --- a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md +++ b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/SKILL.md @@ -7,58 +7,107 @@ description: Analyze saved trajectories and recall audit events offline to recor ## Overview -This skill runs after one or more sessions have completed. It reads saved trajectories from `.evolve/trajectories/`, matches them to `recall` events in `.evolve/audit.log`, and records post-hoc `influence` events for recalled guidelines. +This skill runs after one or more sessions have completed. It reads `recall` +events from `.evolve/audit.log`, locates each session's trajectory, and records +post-hoc `influence` events for the recalled guidelines. -Use this skill when you want to compute usage provenance without coupling the work to the live learn step. +The mechanical work — reading recall rows, skipping already-assessed pairs, +resolving entity files, and locating trajectories — is done deterministically by +`provenance.py candidates`. Your job is the judgment: read each candidate and +decide whether the recalled guideline was `followed`, `contradicted`, or +`not_applicable`, then persist that verdict. -## Workflow - -### Step 1: Load Recall Events - -Read `.evolve/audit.log` as JSONL. Find entries where `event == "recall"` and `entities` is a non-empty list. - -Skip any recall event that already has `influence` entries for the same `session_id` and entity ids. Do not write duplicate influence records. - -### Step 2: Locate Saved Trajectories +Use this skill when you want to compute usage provenance without coupling the +work to the live learn step. -List `.evolve/trajectories/` and match each recall event to a trajectory by `session_id`. - -Matching strategy (in order): -1. `claude-transcript_.jsonl` - the stop-hook transcript dump; the session id is in the filename. -2. `trajectory__.json` - written by the evolve-lite:save-trajectory skill when a session id is available. Match on the `` slice of the filename. -3. `trajectory_.json` - open the file and match its top-level `session_id` field against the recall event. Only fall back to this step when the filename alone does not identify the session. - -If none of the above yields a confident match for a recall event, skip it. Do not guess. - -### Step 3: Read Recalled Entities +## Workflow -For each recalled entity id, open `.evolve/entities/.md`. The id is a path relative to `.evolve/entities/` without the `.md` suffix, such as `guideline/foo` or `subscribed/alice/guideline/foo`. +### Step 1: Get candidates -Read the entity content and trigger. Skip ids whose files are missing. +Run the candidate builder. It emits one JSON object per line (JSONL), one per +unresolved `(session_id, entity)` recall pair: -### Step 4: Assess Influence +```bash +python3 "$(git rev-parse --show-toplevel 2>/dev/null || pwd)/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py" candidates +``` -Compare each recalled entity with the matched trajectory. Pick exactly one verdict: +Each candidate looks like: -- `followed` - the agent's actual actions are consistent with the guideline. -- `contradicted` - the guideline applied, but the agent did the opposite or repeated the avoidable dead end. -- `not_applicable` - the guideline was recalled but did not apply to this session. +```json +{ + "session_id": "", + "entity_id": "/", + "entity_excerpt": "", + "trajectory_path": "/path/to/transcript.jsonl", + "trajectory_excerpt": "", + "missing": ["trajectory"] +} +``` -Keep `evidence` to one short sentence citing a concrete action, tool call, or absence in the trajectory. +Notes: + +- `entity_id` is the path relative to `.evolve/entities/` without the `.md` + suffix, e.g. `feedback/foo`, `guideline/bar`, or + `subscribed/alice/guideline/baz`. +- Pairs that already have an `influence` row are skipped for you — the builder + reuses the same dedup rule used when influence rows are written. You will + never be handed a duplicate. +- The trajectory locator checks `.evolve/trajectories/` first, then falls back + to the native Claude transcript at + `~/.claude/projects//.jsonl`. This means provenance works + even when no `.evolve/trajectories/` file was written. +- If an entity file or trajectory cannot be found, the candidate is still + emitted with a `missing: [...]` field so the gap is visible. When the + trajectory is missing you usually cannot judge the pair — skip it (do not + guess), unless the entity content alone makes `not_applicable` certain. + +### Step 2: Judge each candidate + +For each candidate, read `entity_excerpt` (and open `trajectory_path` for the +full transcript if the excerpt is not enough). Compare the recalled guideline +against the agent's actual actions in the trajectory and pick exactly one +verdict: + +- `followed` — the agent's actual actions are consistent with the guideline. +- `contradicted` — the guideline applied, but the agent did the opposite or + repeated the avoidable dead end. +- `not_applicable` — the guideline was recalled but did not apply to this + session. + +Keep `evidence` to one short sentence citing a concrete action, tool call, or +absence in the trajectory. This judgment is yours — there is no heuristic +fallback. + +### Step 3: Record verdicts + +Persist each verdict. Either pipe one verdict per call to `provenance.py +record`: -### Step 5: Write Influence Events +```bash +echo '{ + "session_id": "", + "entity": "/", + "verdict": "followed", + "evidence": "Agent used the saved parser before trying shell fallbacks." +}' | python3 "$(git rev-parse --show-toplevel 2>/dev/null || pwd)/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py" record +``` -Pipe one JSON payload per assessed session to the helper: +…or, to batch many assessments for one session in a single call, pipe to the +underlying writer directly: ```bash echo '{ "session_id": "", "assessments": [ - {"entity": "guideline/", "verdict": "followed", "evidence": "Agent used the saved parser before trying shell fallbacks."} + {"entity": "feedback/foo", "verdict": "followed", "evidence": "Agent followed it."}, + {"entity": "guideline/bar", "verdict": "not_applicable", "evidence": "Did not apply."} ] }' | python3 "$(git rev-parse --show-toplevel 2>/dev/null || pwd)/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/log_influence.py" ``` -The `entity` value must match exactly what appeared in the recall event, including any `subscribed//` prefix. +Both paths write the identical `influence` audit row and skip duplicates. The +`entity` value must match the candidate's `entity_id` exactly, including any +`subscribed//` prefix. -It is valid to emit an empty `assessments` list when recall events exist but no recalled guideline can be assessed. +It is valid to record nothing when recall events exist but no recalled guideline +can be assessed (e.g. every candidate is missing its trajectory). diff --git a/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py new file mode 100644 index 00000000..c2272501 --- /dev/null +++ b/platform-integrations/codex/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 +"""Deterministic provenance plumbing for the evolve-lite provenance skill. + +This script does the *mechanical* half of provenance — the part that can be +made deterministic and tested end to end: + + * read ``recall`` rows from ``.evolve/audit.log``, + * skip ``(session_id, entity)`` pairs that already have an ``influence`` row, + * resolve each recalled entity file and the session's trajectory transcript, + * assemble candidate dicts the agent can judge. + +The *semantic* half — deciding whether a recalled guideline was ``followed``, +``contradicted`` or ``not_applicable`` — is an LLM judgment and is NOT done +here. There is deliberately no heuristic verdict: this module never invents a +verdict. The agent reads each candidate, judges it, and pipes the verdict back +through ``record`` (which delegates to ``log_influence.py``'s writer so the +audit-log format is identical). + +Two modes: + + candidates (default) — emit one JSONL candidate per unresolved + (session_id, entity) recall pair to stdout. + record — read a verdict JSON from stdin and append an + ``influence`` row via log_influence.py's writer. +""" + +import json +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) + +# Provenance reuses log_influence.py's writer + dedup so the audit-log format and +# the duplicate-suppression rule live in exactly one place. log_influence.py sits +# next to this file in the same skill scripts/ directory. +sys.path.insert(0, str(_script.parent)) + +from entity_io import get_evolve_dir, log as _log # noqa: E402 +import log_influence # noqa: E402 + +_ALLOWED_VERDICTS = log_influence._ALLOWED_VERDICTS + +# How many characters of the entity file / trajectory to surface in a candidate. +_ENTITY_EXCERPT_CHARS = 4000 +_TRAJECTORY_EXCERPT_CHARS = 4000 + + +def log(message): + _log("provenance", message) + + +# --------------------------------------------------------------------------- +# Trajectory locator (Task B) +# --------------------------------------------------------------------------- + + +def _claude_transcript_slug(root): + """Claude derives a project's transcript dir name by replacing every + non-alphanumeric character in the absolute project path with ``-``. + + e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen + + This mirrors ``_transcript_slug`` in the doctor skill + (skills/evolve-lite/doctor/scripts/doctor.py). The two are kept in sync by + hand because doctor and provenance ship as independent scripts that do not + import one another in the rendered tree; if you change one, change both. + """ + import re + + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def locate_trajectory(session_id, evolve_dir, *, project_root=None, home=None): + """Locate the saved trajectory transcript for ``session_id``. + + Resolution order (best-effort, returns the first hit or ``None``): + + 1. Legacy ``.evolve/trajectories/`` files: + * ``claude-transcript_.jsonl`` — stop-hook transcript dump. + * ``trajectory__.json`` — save-trajectory skill output; the sid + is the filename slice after the timestamp. + * ``trajectory_.json`` — open and match the inner ``session_id``. + 2. NEW native Claude transcript: ``~/.claude/projects//.jsonl`` + where ```` is the project root path slugified the way Claude does + (every non-alphanumeric char -> ``-``; see ``_claude_transcript_slug``). + + Native discovery makes provenance work in the hookless world where no + ``.evolve/trajectories/`` file is ever written. It is platform-neutral: + Bob/Codex keep their transcripts elsewhere, so the native step simply falls + through to ``None`` for them rather than misfiring. + """ + evolve_dir = Path(evolve_dir) + + # --- 1. Legacy .evolve/trajectories/ ------------------------------------ + traj_dir = evolve_dir / "trajectories" + if traj_dir.is_dir(): + direct = traj_dir / f"claude-transcript_{session_id}.jsonl" + if direct.is_file(): + return direct + + # trajectory__.json — match on the filename sid slice. + for path in sorted(traj_dir.glob("trajectory_*_*.json")): + stem = path.stem # trajectory__ + parts = stem.split("_", 2) + if len(parts) == 3 and parts[2] == session_id: + return path + + # trajectory_.json — open and match the inner session_id field. + for path in sorted(traj_dir.glob("trajectory_*.json")): + # Skip the _ shape already handled above. + if path.stem.count("_") >= 2: + continue + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + continue + if isinstance(data, dict) and data.get("session_id") == session_id: + return path + + # --- 2. Native Claude transcript ---------------------------------------- + # The project root is the parent of the .evolve dir; the home dir holds + # ~/.claude/projects//.jsonl. + root = Path(project_root) if project_root is not None else evolve_dir.resolve().parent + base = Path(home) if home is not None else Path.home() + slug = _claude_transcript_slug(root) + native = base / ".claude" / "projects" / slug / f"{session_id}.jsonl" + if native.is_file(): + return native + + return None + + +# --------------------------------------------------------------------------- +# Recall row reading + entity resolution (Task A — candidates) +# --------------------------------------------------------------------------- + + +def read_recall_rows(evolve_dir): + """Return a list of ``(session_id, [entity_id, ...])`` tuples for every ``recall`` audit row. + + Rows with no ``session_id`` or an empty ``entities`` list are skipped. + """ + audit_log = Path(evolve_dir) / "audit.log" + if not audit_log.is_file(): + return [] + + rows = [] + for line in audit_log.read_text(encoding="utf-8").splitlines(): + if not line.strip(): + continue + try: + event = json.loads(line) + except json.JSONDecodeError: + continue + if not isinstance(event, dict) or event.get("event") != "recall": + continue + session_id = event.get("session_id") + entities = event.get("entities") + if not isinstance(session_id, str) or not session_id: + continue + if not isinstance(entities, list) or not entities: + continue + clean = [e for e in entities if isinstance(e, str) and e] + if clean: + rows.append((session_id, clean)) + return rows + + +def _read_entity(evolve_dir, entity_id): + """Return ``(path, excerpt)`` for an entity file, or ``(path, None)`` if + the file is missing. ``entity_id`` is a ``/`` id relative to + ``entities/`` (without ``.md``). + """ + entity_path = Path(evolve_dir) / "entities" / f"{entity_id}.md" + if not entity_path.is_file(): + return entity_path, None + try: + text = entity_path.read_text(encoding="utf-8") + except OSError: + return entity_path, None + return entity_path, text[:_ENTITY_EXCERPT_CHARS] + + +def _read_trajectory_excerpt(trajectory_path): + """Return a bounded text excerpt of the trajectory file, or ``None``.""" + if trajectory_path is None: + return None + try: + text = Path(trajectory_path).read_text(encoding="utf-8") + except OSError: + return None + return text[:_TRAJECTORY_EXCERPT_CHARS] + + +def build_candidates(evolve_dir, *, project_root=None, home=None): + """Assemble candidate dicts for every unresolved recall (session, entity). + + Returns a list of dicts shaped:: + + { + "session_id": ..., + "entity_id": "/", + "entity_excerpt": , + "trajectory_path": , + "trajectory_excerpt": , + "missing": ["entity"|"trajectory", ...], # only when non-empty + } + + ``(session_id, entity)`` pairs that already have an ``influence`` row are + skipped via ``log_influence.existing_influence_keys`` — the same dedup rule + used when influence rows are written. Candidates whose entity file or + trajectory cannot be found are still emitted with a ``missing`` list so the + gap is visible rather than silently dropped. + """ + evolve_dir = Path(evolve_dir) + existing = log_influence.existing_influence_keys(evolve_dir) + + candidates = [] + for session_id, entities in read_recall_rows(evolve_dir): + trajectory_path = locate_trajectory(session_id, evolve_dir, project_root=project_root, home=home) + for entity_id in entities: + if (session_id, entity_id) in existing: + continue + entity_path, entity_excerpt = _read_entity(evolve_dir, entity_id) + trajectory_excerpt = _read_trajectory_excerpt(trajectory_path) + + missing = [] + if entity_excerpt is None: + missing.append("entity") + if trajectory_path is None: + missing.append("trajectory") + + candidate = { + "session_id": session_id, + "entity_id": entity_id, + "entity_excerpt": entity_excerpt, + "trajectory_path": str(trajectory_path) if trajectory_path else None, + "trajectory_excerpt": trajectory_excerpt, + } + if missing: + candidate["missing"] = missing + candidates.append(candidate) + return candidates + + +# --------------------------------------------------------------------------- +# record (Task A — record) +# --------------------------------------------------------------------------- + + +def record_verdict(payload, evolve_dir=None): + """Append a single ``influence`` row from an agent verdict. + + ``payload`` is ``{session_id, entity, verdict, evidence}``. The verdict must + be one of ``followed|contradicted|not_applicable`` (the *semantic* judgment + stays agent-driven — this only persists what the agent decided). Writing is + delegated to ``log_influence.py`` so the audit-log row format and the + duplicate-suppression rule are not duplicated here. + + Returns the number of rows written (0 or 1). Raises ``ValueError`` on an + invalid payload / verdict. + """ + if not isinstance(payload, dict): + raise ValueError("verdict payload must be a JSON object") + + session_id = payload.get("session_id") + entity = payload.get("entity") + verdict = payload.get("verdict") + evidence = payload.get("evidence", "") + + if not isinstance(session_id, str) or not session_id: + raise ValueError("verdict payload must include a non-empty string session_id") + if not isinstance(entity, str) or not entity: + raise ValueError("verdict payload must include a non-empty string entity") + if verdict not in _ALLOWED_VERDICTS: + raise ValueError(f"verdict must be one of {sorted(_ALLOWED_VERDICTS)}, got {verdict!r}") + + if evolve_dir is None: + evolve_dir = get_evolve_dir().resolve() + evolve_dir = Path(evolve_dir) + + existing = log_influence.existing_influence_keys(evolve_dir) + if (session_id, entity) in existing: + log(f"Skipping duplicate influence verdict: session_id={session_id} entity={entity}") + return 0 + + if not isinstance(evidence, str): + evidence = str(evidence) + + log_influence.audit.append( + evolve_dir=str(evolve_dir), + event="influence", + session_id=session_id, + entity=entity, + verdict=verdict, + evidence=evidence, + ) + return 1 + + +# --------------------------------------------------------------------------- +# __main__ +# --------------------------------------------------------------------------- + + +def _run_candidates(): + evolve_dir = get_evolve_dir().resolve() + candidates = build_candidates(evolve_dir) + for candidate in candidates: + print(json.dumps(candidate)) + log(f"Emitted {len(candidates)} candidate(s) from {evolve_dir}") + + +def _run_record(): + try: + payload = json.load(sys.stdin) + except json.JSONDecodeError as exc: + log(f"Invalid JSON input: {exc}") + print(f"Error: invalid JSON input - {exc}", file=sys.stderr) + sys.exit(1) + + try: + written = record_verdict(payload) + except ValueError as exc: + log(f"Rejected verdict: {exc}") + print(f"Error: {exc}", file=sys.stderr) + sys.exit(1) + + log(f"Recorded {written} influence verdict(s).") + print(f"Recorded {written} influence verdict(s).") + + +def main(argv=None): + argv = list(sys.argv[1:] if argv is None else argv) + mode = argv[0] if argv else "candidates" + if mode == "candidates": + _run_candidates() + elif mode == "record": + _run_record() + else: + print(f"Error: unknown mode {mode!r}; expected 'candidates' or 'record'.", file=sys.stderr) + sys.exit(2) + + +if __name__ == "__main__": + main() diff --git a/platform-integrations/install.sh b/platform-integrations/install.sh index 67dbb67d..a0bafc7e 100755 --- a/platform-integrations/install.sh +++ b/platform-integrations/install.sh @@ -121,10 +121,60 @@ EVOLVE_VERSION = os.environ.get("EVOLVE_VERSION", "main") DRY_RUN = False BOB_SLUG = "evolve-lite" +BOB_RULES_FILE = "00-evolve-lite.md" +AUDIT_SCRIPT = "audit_recall.py" +ADAPT_SCRIPT = "adapt_memory.py" CLAUDE_PLUGIN = "evolve-lite" CLAW_CODE_PLUGIN = "evolve-lite" CODEX_PLUGIN = "evolve-lite" +# Marker used to manage a single greppable instruction line that an installer +# injects into an agent's always-on instruction file (e.g. ~/.codex/AGENTS.md). +# The marker is also the uninstall handle: any line containing it is "ours". +MANAGED_MARKER = "" + +# Codex cannot `@`-import another file, but it can be told to read one on +# demand. We drop a COPY of EVOLVE.md on disk and inject this single pointer +# line into ~/.codex/AGENTS.md instead of inlining the whole document. +CODEX_EVOLVE_MD_PATH = "~/.codex/evolve-lite/EVOLVE.md" + +def _codex_pointer_line(): + return ( + "Evolve memory is active: at the start of every conversation, read " + + CODEX_EVOLVE_MD_PATH + " and follow it — it governs recalling " + "relevant past learnings and saving durable new ones. " + + MANAGED_MARKER + ) + + +# Claude installs via marketplace (`claude plugin install`), which copies +# nothing to the repo and does NOT auto-load an ambient EVOLVE.md. So we drop a +# COPY of the thin EVOLVE.md at /.evolve/EVOLVE.md and inject a single +# native CLAUDE.md `@`-import line pointing at it. The path is repo-relative +# (resolves from CLAUDE.md's directory, i.e. repo root). The line is its own +# uninstall handle (the marker is a substring of the line) — no HTML comment. +CLAUDE_EVOLVE_MD_REL = ".evolve/EVOLVE.md" +CLAUDE_IMPORT_MARKER = CLAUDE_EVOLVE_MD_REL +CLAUDE_IMPORT_LINE = "@" + CLAUDE_EVOLVE_MD_REL + +# Claude plugins cannot self-declare tool permissions, env vars aren't expanded +# in permission rules, and plugin install dirs are version-unstable — so the +# only way to pre-authorize evolve's scripts/.evolve writes without a per-use +# prompt is to merge these allow-rules into the repo's project settings at +# /.claude/settings.json. The script paths use the GLOBAL stable paths the +# installer ships to (`~/.claude/evolve-lite/*.py`), which are allowlistable +# because they never move between plugin versions. The `~/` prefix and the +# trailing `:*` (match-any-args) suffix are both valid per the Claude Code +# settings docs. +CLAUDE_SETTINGS_REL = ".claude/settings.json" +CLAUDE_ALLOW_RULES = [ + "Bash(python3 ~/.claude/evolve-lite/" + ADAPT_SCRIPT + ":*)", + "Bash(python3 ~/.claude/evolve-lite/" + AUDIT_SCRIPT + ":*)", + "Read(.evolve/**)", + "Edit(.evolve/**)", + "Write(.evolve/**)", +] + # ── Colour helpers ──────────────────────────────────────────────────────────── IS_TTY = sys.stdout.isatty() @@ -273,6 +323,19 @@ class FileOps: return True return False + def remove_dir_if_empty(self, path): + """Remove `path` only when it exists and contains nothing. + + Used to tidy up a per-plugin dir (e.g. ~/.bob/evolve-lite/) after its + last managed file is removed, while leaving it intact if a user (or + another plugin) dropped sibling content there.""" + path = str(path) + if os.path.isdir(path) and not os.listdir(path): + os.rmdir(path) + debug(f"Removed empty dir: {path}") + return True + return False + def run_subprocess(self, cmd_list): return subprocess.run(cmd_list) @@ -320,6 +383,47 @@ class FileOps: data[array_key] = [item for item in data.get(array_key, []) if item.get(id_key) != id_val] self.atomic_write_json(path, data) + def merge_json_permission_rules(self, path, rules): + """Idempotently merge `rules` into a Claude settings file's + ``permissions.allow`` array, preserving every rule already present and + any other settings keys. Creates the file/parents if missing. No + duplicates on re-run (set-membership against the existing list).""" + data = read_json(path) + permissions = data.get("permissions") + if not isinstance(permissions, dict): + permissions = {} + data["permissions"] = permissions + allow = permissions.get("allow") + if not isinstance(allow, list): + allow = [] + permissions["allow"] = allow + for rule in rules: + if rule not in allow: + allow.append(rule) + self.atomic_write_json(path, data) + + def remove_json_permission_rules(self, path, rules): + """Remove exactly `rules` from ``permissions.allow`` in a Claude settings + file, leaving any user-added rules intact. Empties clean up: when + ``allow`` becomes empty drop the key; when ``permissions`` becomes empty + drop it too; when the whole file reduces to ``{}`` remove the file. No-op + when the file is absent.""" + if not os.path.isfile(str(path)): + return + data = read_json(path) + permissions = data.get("permissions") + if isinstance(permissions, dict) and isinstance(permissions.get("allow"), list): + drop = set(rules) + permissions["allow"] = [r for r in permissions["allow"] if r not in drop] + if not permissions["allow"]: + permissions.pop("allow", None) + if not permissions: + data.pop("permissions", None) + if not data: + self.remove_file(path) + else: + self.atomic_write_json(path, data) + # ── YAML helpers ────────────────────────────────────────────────────────── def merge_yaml_custom_mode(self, source_yaml_path, target_yaml_path, slug): @@ -402,6 +506,218 @@ class FileOps: ) self.atomic_write_text(target_yaml_path, pattern.sub("", text)) + def remove_yaml_custom_mode_by_slug(self, target_yaml_path, slug): + """Remove a plain ``- slug: `` sequence item from a custom_modes file. + + The new-design modes are sentinel-wrapped (see remove_yaml_custom_mode), + but the legacy ``install-evolve-lite`` bootstrap mode was written as a + bare YAML list item with no sentinels. Drop the whole item: the + ``- slug: `` line plus every following line indented deeper than + the dash (the item body), stopping at the next sibling item or any + less-indented line. No-op when the file or the slug is absent.""" + target_yaml_path = str(target_yaml_path) + if not os.path.isfile(target_yaml_path): + return + with open(target_yaml_path) as f: + lines = f.read().splitlines(keepends=True) + + # A list item header for this slug: optional indent, `- `, then + # `slug: ` (quoted or bare), to end of line. + head_re = re.compile( + r"^(\s*)-\s+slug:\s*[\"']?" + re.escape(slug) + r"[\"']?\s*$" + ) + out = [] + i = 0 + removed = False + while i < len(lines): + m = head_re.match(lines[i]) + if not m: + out.append(lines[i]) + i += 1 + continue + removed = True + dash_indent = len(m.group(1)) + i += 1 + # Consume body lines: blank lines, or lines indented past the dash. + while i < len(lines): + ln = lines[i] + if ln.strip() == "": + i += 1 + continue + indent = len(ln) - len(ln.lstrip()) + if indent <= dash_indent: + break + i += 1 + if removed: + self.atomic_write_text(target_yaml_path, "".join(out)) + debug(f"Removed YAML custom mode (slug '{slug}'): {target_yaml_path}") + + # ── Sentinel-block helpers (generic always-on instruction files) ─────────── + + def inject_sentinel_block(self, path, slug, body): + """Idempotently inject a sentinel-wrapped block into a text file. + + Writes: + # >>>evolve:{slug}<<< + {body} + # <</.evolve/EVOLVE.md and inject a + single native `@`-import pointer line into /CLAUDE.md, exactly as + CodexInstaller injects its pointer into ~/.codex/AGENTS.md. Kept as a + separate method so it is exercisable in tests without the real CLI. + """ + _ensure_source_dir() + source_dir = SOURCE_DIR + plugin_source = Path(source_dir) / "platform-integrations" / "claude" / "plugins" / CLAUDE_PLUGIN + + # Drop a COPY of the thin EVOLVE.md at /.evolve/EVOLVE.md. Prefer + # the rendered claude plugin copy; fall back to the shared original. + evolve_src = plugin_source / "EVOLVE.md" + if not evolve_src.is_file(): + evolve_src = Path(source_dir) / "plugin-source" / "EVOLVE.md" + evolve_text = "" if self.ops.is_dry_run and not evolve_src.is_file() else evolve_src.read_text() + evolve_dst = Path(target_dir) / CLAUDE_EVOLVE_MD_REL + self.ops.atomic_write_text(evolve_dst, evolve_text) + success(f"Copied EVOLVE.md → {evolve_dst}") + + # Inject the single native `@`-import pointer line into /CLAUDE.md. + # The path resolves relative to CLAUDE.md (repo root). The line is its + # own uninstall handle (marker is a substring of the line). + claude_md = Path(target_dir) / "CLAUDE.md" + self.ops.inject_marker_line(claude_md, CLAUDE_IMPORT_MARKER, CLAUDE_IMPORT_LINE) + success(f"Injected '{CLAUDE_PLUGIN}' import pointer into {claude_md}") + if self.ops.is_dry_run: + dryrun("Claude shows a one-time 'allow external imports' dialog on first session") + else: + warn( + "On the first Claude session in this repo, an 'allow external " + "imports' dialog will appear — you must Allow it, or the " + f"{CLAUDE_IMPORT_LINE} import is silently disabled." + ) + + # Recall-audit script: the thin EVOLVE.md instructs running + # `~/.claude/evolve-lite/audit_recall.py`, so install it at that GLOBAL + # absolute path (mirroring CodexInstaller). Prefer the rendered claude + # copy; fall back to the shared plugin-source original. + audit_src = plugin_source / "lib" / "evolve-lite" / AUDIT_SCRIPT + if not audit_src.is_file(): + audit_src = Path(source_dir) / "plugin-source" / "lib" / AUDIT_SCRIPT + audit_text = "" if self.ops.is_dry_run and not audit_src.is_file() else audit_src.read_text() + audit_file = Path.home() / ".claude" / "evolve-lite" / AUDIT_SCRIPT + self.ops.atomic_write_text(audit_file, audit_text) + success(f"Installed recall-audit script → {audit_file}") + + # adapt-memory adapter script: the adapt-memory skill invokes + # `python3 ~/.claude/evolve-lite/adapt_memory.py` (a STABLE, version-proof + # path so it can be permission-allowlisted — the versioned plugin dir + # cannot). Ship it to that GLOBAL path, mirroring the audit script above. + # Unlike audit_recall.py (self-contained), adapt_memory.py imports + # `entity_io` from the shared lib: it walks up its own ancestors looking + # for `lib/evolve-lite/entity_io.py`, so ship the shared lib alongside it + # at ~/.claude/evolve-lite/lib/evolve-lite/ (matching bob/codex, which + # also ship a sibling lib/ for their scripts). + claude_evolve_dir = Path.home() / ".claude" / "evolve-lite" + adapt_src = plugin_source / "skills" / "evolve-lite" / "adapt-memory" / "scripts" / ADAPT_SCRIPT + if not adapt_src.is_file(): + adapt_src = Path(source_dir) / "plugin-source" / "skills" / "evolve-lite" / "adapt-memory" / "scripts" / ADAPT_SCRIPT + adapt_text = "" if self.ops.is_dry_run and not adapt_src.is_file() else adapt_src.read_text() + adapt_file = claude_evolve_dir / ADAPT_SCRIPT + self.ops.atomic_write_text(adapt_file, adapt_text) + success(f"Installed adapt-memory script → {adapt_file}") + + lib_src = plugin_source / "lib" / "evolve-lite" + if not (lib_src / "entity_io.py").is_file(): + lib_src = Path(source_dir) / "plugin-source" / "lib" + lib_dst = claude_evolve_dir / "lib" / "evolve-lite" + self.ops.copy_tree(lib_src, lib_dst) + success(f"Installed shared lib → {lib_dst}") + def install(self, target_dir): info("Installing Claude plugin via marketplace") + # Deliver the per-repo EVOLVE.md + import pointer + global audit/adapt + # scripts regardless of whether the `claude` CLI is present below. + self._deliver_files(target_dir) + + # Pre-authorize evolve's scripts + .evolve writes so they never trigger a + # per-use permission prompt. Plugins can't self-declare permissions, so + # merge the allow-rules into the repo's project settings (idempotent, + # preserves existing rules/keys). See CLAUDE_ALLOW_RULES for the rationale. + settings_path = Path(target_dir) / CLAUDE_SETTINGS_REL + self.ops.merge_json_permission_rules(settings_path, CLAUDE_ALLOW_RULES) + success(f"Allowlisted evolve scripts + .evolve writes in {settings_path} (no per-use prompts)") + marketplace_dir = Path(SOURCE_DIR).resolve() if SOURCE_DIR else None has_local_marketplace = marketplace_dir is not None and (marketplace_dir / ".claude-plugin" / "marketplace.json").is_file() marketplace_source = str(marketplace_dir) if has_local_marketplace else EVOLVE_REPO @@ -715,6 +1209,49 @@ class ClaudeInstaller: def uninstall(self, target_dir): info("Uninstalling Claude plugin") + + # Drop the single managed `@`-import pointer line from /CLAUDE.md, + # remove the per-repo EVOLVE.md copy we placed (NOT the whole .evolve/ + # store), remove the project-settings allow-rules we merged in, and + # remove the global recall-audit + adapt-memory scripts and the shared + # lib we shipped alongside them (mirrors Codex). + self.ops.remove_marker_line(Path(target_dir) / "CLAUDE.md", CLAUDE_IMPORT_MARKER) + self.ops.remove_file(Path(target_dir) / CLAUDE_EVOLVE_MD_REL) + settings_path = Path(target_dir) / CLAUDE_SETTINGS_REL + self.ops.remove_json_permission_rules(settings_path, CLAUDE_ALLOW_RULES) + self.ops.remove_dir_if_empty(Path(target_dir) / ".claude") + claude_evolve_dir = Path.home() / ".claude" / "evolve-lite" + self.ops.remove_file(claude_evolve_dir / AUDIT_SCRIPT) + self.ops.remove_file(claude_evolve_dir / ADAPT_SCRIPT) + self.ops.remove_dir(claude_evolve_dir / "lib") + self.ops.remove_dir_if_empty(claude_evolve_dir) + + # Legacy migration: remove orphan plugin data dirs left by older installs + # (e.g. evolve-lite-inline, evolve-lite-evolve-marketplace). GLOBAL, only + # dirs whose name starts with `evolve-lite-` under plugins/data/. + data_dir = Path.home() / ".claude" / "plugins" / "data" + if data_dir.is_dir(): + for entry in sorted(data_dir.iterdir()): + if entry.is_dir() and entry.name.startswith("evolve-lite-"): + self.ops.remove_dir(entry) + + # Legacy migration: remove orphan plugin caches left by older installs at + # plugins/cache//evolve-lite/ (e.g. the OLD hooks/ bundle). + # `claude plugin uninstall` leaves these behind; because the plugin version + # isn't bumped, a stale cache can resurrect the OLD bundle on reinstall. + # Remove cache//evolve-lite/, then rmdir the marketplace parent + # if it is now empty. Only ever delete a dir whose final component is + # `evolve-lite` (or its emptied parent). GLOBAL, defensive, idempotent. + cache_root = Path.home() / ".claude" / "plugins" / "cache" + if cache_root.is_dir(): + for marketplace_dir in sorted(cache_root.iterdir()): + if not marketplace_dir.is_dir(): + continue + evolve_cache = marketplace_dir / "evolve-lite" + if evolve_cache.is_dir(): + self.ops.remove_dir(evolve_cache) + self.ops.remove_dir_if_empty(marketplace_dir) + claude = shutil.which("claude") if not claude: warn("Could not uninstall Claude plugin automatically.") @@ -728,6 +1265,15 @@ class ClaudeInstaller: warn(f"claude plugin uninstall exited with code {result.returncode}") warn(f"Run manually: claude plugin uninstall {CLAUDE_PLUGIN}") + # Legacy migration: install added the marketplace but uninstall never + # removed it. Tolerate non-zero exit / missing entry (mirrors the + # uninstall call above — best-effort, never fatal). + result = self.ops.run_subprocess([claude, "plugin", "marketplace", "remove", "evolve-marketplace"]) + if result.returncode == 0: + success("Removed claude marketplace 'evolve-marketplace'") + else: + warn(f"claude plugin marketplace remove exited with code {result.returncode} (ignored)") + def status(self, target_dir): print(f" Claude:") claude = shutil.which("claude") @@ -811,166 +1357,7 @@ class CodexInstaller: def __init__(self, ops: FileOps): self.ops = ops - # ── Codex hook/marketplace schema helpers ───────────────────────────────── - - @staticmethod - def _recall_hook_command(): - return ( - "sh -lc '" - 'd=\"$PWD\"; ' - "while :; do " - 'candidate=\"$d/plugins/evolve-lite/skills/evolve-lite/recall/scripts/retrieve_entities.py\"; ' - 'if [ -f \"$candidate\" ]; then EVOLVE_DIR=\"$d/.evolve\" exec python3 \"$candidate\"; fi; ' - '[ \"$d\" = \"/\" ] && break; ' - 'd=\"$(dirname \"$d\")\"; ' - "done; " - "exit 1'" - ) - - @staticmethod - def _is_recall_command(command): - return isinstance(command, str) and "plugins/evolve-lite/skills/evolve-lite/recall/scripts/retrieve_entities.py" in command - - @staticmethod - def _recall_hook(): - return { - "type": "command", - "command": CodexInstaller._recall_hook_command(), - "statusMessage": "Loading Evolve guidance", - } - - @staticmethod - def _recall_hook_group(): - return {"matcher": "", "hooks": [CodexInstaller._recall_hook()]} - - @staticmethod - def _sync_hook_command(): - return ( - "sh -lc '" - 'd=\"$PWD\"; ' - "while :; do " - 'candidate=\"$d/plugins/evolve-lite/skills/evolve-lite/sync/scripts/sync.py\"; ' - 'if [ -f \"$candidate\" ]; then EVOLVE_DIR=\"$d/.evolve\" exec python3 \"$candidate\" --quiet --session-start; fi; ' - '[ \"$d\" = \"/\" ] && break; ' - 'd=\"$(dirname \"$d\")\"; ' - "done; " - "exit 1'" - ) - - @staticmethod - def _is_sync_command(command): - return isinstance(command, str) and "plugins/evolve-lite/skills/evolve-lite/sync/scripts/sync.py" in command - - @staticmethod - def _sync_hook(): - return { - "type": "command", - "command": CodexInstaller._sync_hook_command(), - "statusMessage": "Syncing Evolve subscriptions", - } - - @staticmethod - def _sync_hook_group(): - return {"matcher": "startup|resume", "hooks": [CodexInstaller._sync_hook()]} - - @staticmethod - def _iter_group_hooks(group): - hooks = group.get("hooks", []) - if isinstance(hooks, list): return hooks - if isinstance(hooks, dict): return list(hooks.values()) - return [] - - @staticmethod - def _group_has_recall(group): - return any( - isinstance(h, dict) and CodexInstaller._is_recall_command(h.get("command")) - for h in CodexInstaller._iter_group_hooks(group) - ) - - @staticmethod - def _group_has_sync(group): - return any( - isinstance(h, dict) and CodexInstaller._is_sync_command(h.get("command")) - for h in CodexInstaller._iter_group_hooks(group) - ) - - @staticmethod - def _upsert_recall_into_group(group): - updated = copy.deepcopy(group) - recall = CodexInstaller._recall_hook() - hooks = updated.get("hooks") - if isinstance(hooks, list): - for i, h in enumerate(hooks): - if isinstance(h, dict) and CodexInstaller._is_recall_command(h.get("command")): - hooks[i] = merge_json_value(h, recall) - break - else: - hooks.append(copy.deepcopy(recall)) - elif isinstance(hooks, dict): - for key, h in hooks.items(): - if isinstance(h, dict) and CodexInstaller._is_recall_command(h.get("command")): - hooks[key] = merge_json_value(h, recall) - break - else: - hooks["evolve-lite"] = copy.deepcopy(recall) - else: - updated["hooks"] = [copy.deepcopy(recall)] - return updated - - @staticmethod - def _upsert_sync_into_group(group): - updated = copy.deepcopy(group) - sync = CodexInstaller._sync_hook() - hooks = updated.get("hooks") - if isinstance(hooks, list): - for i, h in enumerate(hooks): - if isinstance(h, dict) and CodexInstaller._is_sync_command(h.get("command")): - hooks[i] = merge_json_value(h, sync) - break - else: - hooks.append(copy.deepcopy(sync)) - elif isinstance(hooks, dict): - for key, h in hooks.items(): - if isinstance(h, dict) and CodexInstaller._is_sync_command(h.get("command")): - hooks[key] = merge_json_value(h, sync) - break - else: - hooks["evolve-lite"] = copy.deepcopy(sync) - else: - updated["hooks"] = [copy.deepcopy(sync)] - return updated - - @staticmethod - def _remove_recall_from_group(group): - updated = copy.deepcopy(group) - hooks = updated.get("hooks") - if isinstance(hooks, list): - updated["hooks"] = [ - h for h in hooks - if not (isinstance(h, dict) and CodexInstaller._is_recall_command(h.get("command"))) - ] - elif isinstance(hooks, dict): - updated["hooks"] = { - k: h for k, h in hooks.items() - if not (isinstance(h, dict) and CodexInstaller._is_recall_command(h.get("command"))) - } - return updated - - @staticmethod - def _remove_sync_from_group(group): - updated = copy.deepcopy(group) - hooks = updated.get("hooks") - if isinstance(hooks, list): - updated["hooks"] = [ - h for h in hooks - if not (isinstance(h, dict) and CodexInstaller._is_sync_command(h.get("command"))) - ] - elif isinstance(hooks, dict): - updated["hooks"] = { - k: h for k, h in hooks.items() - if not (isinstance(h, dict) and CodexInstaller._is_sync_command(h.get("command"))) - } - return updated + # ── Codex marketplace schema helpers ────────────────────────────────────── def _upsert_marketplace_entry(self, path, item): data = read_json(path) @@ -991,94 +1378,51 @@ class CodexInstaller: plugins.append(copy.deepcopy(item)) self.ops.atomic_write_json(path, data) - def _upsert_user_prompt_hook(self, path, group): - data = read_json(path) - if not data: - data = {"hooks": {}} - if not isinstance(data, dict): - raise ValueError(f"{path} must contain a JSON object.") - hooks = data.setdefault("hooks", {}) - if not isinstance(hooks, dict): - hooks = {} - data["hooks"] = hooks - groups = hooks.setdefault("UserPromptSubmit", []) - if not isinstance(groups, list): - groups = [] - hooks["UserPromptSubmit"] = groups - for i, existing in enumerate(groups): - if isinstance(existing, dict) and self._group_has_recall(existing): - groups[i] = self._upsert_recall_into_group(existing) - break - else: - groups.append(copy.deepcopy(group)) - self.ops.atomic_write_json(path, data) + # ── Legacy (pre-redesign) global migration ───────────────────────────────── - def _remove_user_prompt_hook(self, path): - if not os.path.isfile(str(path)): - return - data = read_json(path) - hooks = data.get("hooks") - if not isinstance(hooks, dict): - return - groups = hooks.get("UserPromptSubmit", []) - if not isinstance(groups, list): - return - hooks["UserPromptSubmit"] = [ - self._remove_recall_from_group(g) if isinstance(g, dict) and self._group_has_recall(g) else g - for g in groups - ] - # Prune empty groups (groups with no hooks left) - hooks["UserPromptSubmit"] = [ - group for group in hooks["UserPromptSubmit"] - if not isinstance(group, dict) or self._iter_group_hooks(group) - ] - if not hooks["UserPromptSubmit"]: - hooks.pop("UserPromptSubmit", None) - self.ops.atomic_write_json(path, data) + def _purge_legacy_global(self): + """Reverse pre-redesign GLOBAL ~/.codex/ artifacts (migration cleanup). - def _upsert_session_start_hook(self, path, group): - data = read_json(path) - if not data: - data = {"hooks": {}} - if not isinstance(data, dict): - raise ValueError(f"{path} must contain a JSON object.") - hooks = data.setdefault("hooks", {}) - if not isinstance(hooks, dict): - hooks = {} - data["hooks"] = hooks - groups = hooks.setdefault("SessionStart", []) - if not isinstance(groups, list): - groups = [] - hooks["SessionStart"] = groups - for i, existing in enumerate(groups): - if isinstance(existing, dict) and self._group_has_sync(existing): - groups[i] = self._upsert_sync_into_group(existing) - break - else: - groups.append(copy.deepcopy(group)) - self.ops.atomic_write_json(path, data) + Old installs registered the plugin globally in ~/.codex/config.toml as + `[plugins."evolve-lite@"]` tables and left plugin caches at + ~/.codex/plugins/cache//evolve-lite/. The new design never + writes these, but an upgrading user still has them on disk — strip them + so uninstall is a true clean slate. GLOBAL regardless of --dir; defensive + and idempotent (no-op when absent).""" + codex_home = Path.home() / ".codex" - def _remove_session_start_hook(self, path): - if not os.path.isfile(str(path)): - return - data = read_json(path) - hooks = data.get("hooks") - if not isinstance(hooks, dict): - return - groups = hooks.get("SessionStart", []) - if not isinstance(groups, list): - return - hooks["SessionStart"] = [ - self._remove_sync_from_group(g) if isinstance(g, dict) and self._group_has_sync(g) else g - for g in groups - ] - hooks["SessionStart"] = [ - group for group in hooks["SessionStart"] - if not isinstance(group, dict) or len(self._iter_group_hooks(group)) > 0 - ] - if not hooks["SessionStart"]: - hooks.pop("SessionStart", None) - self.ops.atomic_write_json(path, data) + # 1. config.toml: drop every `[plugins."evolve-lite@..."]` table. + config_toml = codex_home / "config.toml" + legacy_plugin_re = re.compile(r'^plugins\.\s*"evolve-lite@[^"]*"\s*$') + self.ops.remove_toml_tables( + config_toml, lambda header: bool(legacy_plugin_re.match(header)) + ) + # Post-condition (skipped in dry-run, which doesn't mutate the file): + # the result must still parse and carry no evolve-lite@* plugin key. + if not self.ops.is_dry_run and config_toml.is_file(): + try: + import tomllib + + with open(config_toml, "rb") as f: + parsed = tomllib.load(f) + stray = [k for k in parsed.get("plugins", {}) if k.startswith("evolve-lite@")] + if stray: + warn(f"Legacy codex plugin keys remain in {config_toml}: {stray}") + except Exception as e: # tomllib missing (<3.11) or unparseable + debug(f"Skipped config.toml validation: {e}") + + # 2. plugin caches: remove cache//evolve-lite/, then rmdir + # the marketplace parent if it is now empty. Only ever delete a dir + # whose final component is `evolve-lite` (or its emptied parent). + cache_root = codex_home / "plugins" / "cache" + if cache_root.is_dir(): + for marketplace_dir in sorted(cache_root.iterdir()): + if not marketplace_dir.is_dir(): + continue + evolve_cache = marketplace_dir / "evolve-lite" + if evolve_cache.is_dir(): + self.ops.remove_dir(evolve_cache) + self.ops.remove_dir_if_empty(marketplace_dir) # ── Public interface ────────────────────────────────────────────────────── @@ -1104,15 +1448,35 @@ class CodexInstaller: ) success(f"Upserted Codex marketplace entry in {marketplace_target}") - hooks_target = Path(target_dir) / ".codex" / "hooks.json" - self._upsert_user_prompt_hook(hooks_target, self._recall_hook_group()) - self._upsert_session_start_hook(hooks_target, self._sync_hook_group()) - success(f"Upserted Codex UserPromptSubmit hook in {hooks_target}") - success(f"Upserted Codex SessionStart hook in {hooks_target}") - warn("Automatic Codex recall requires hooks to be enabled in ~/.codex/config.toml:") - print(" [features]") - print(" codex_hooks = true") - info("If you do not want to enable Codex hooks, invoke the installed evolve-lite:recall skill manually.") + # Always-on instructions: Codex reads ~/.codex/AGENTS.md verbatim and + # does NOT support `@`-imports. So we drop a COPY of EVOLVE.md on disk + # and inject a single greppable pointer line into AGENTS.md telling the + # agent to read that file on demand. Prefer the rendered codex copy; + # fall back to the shared plugin-source original. + evolve_src = plugin_source / "EVOLVE.md" + if not evolve_src.is_file(): + evolve_src = Path(source_dir) / "plugin-source" / "EVOLVE.md" + evolve_text = "" if self.ops.is_dry_run and not evolve_src.is_file() else evolve_src.read_text() + evolve_dst = Path.home() / ".codex" / "evolve-lite" / "EVOLVE.md" + self.ops.atomic_write_text(evolve_dst, evolve_text) + success(f"Copied EVOLVE.md → {evolve_dst}") + + agents_file = Path.home() / ".codex" / "AGENTS.md" + self.ops.inject_marker_line(agents_file, MANAGED_MARKER, _codex_pointer_line()) + success(f"Injected '{CODEX_PLUGIN}' pointer into {agents_file}") + + # Recall-audit script: the injected AGENTS.md block tells the model to + # run `python3 ~/.codex/evolve-lite/audit_recall.py` after recall, so + # install the script at that GLOBAL absolute path (matching how the + # always-on instructions live globally). Prefer the rendered codex + # copy; fall back to the shared plugin-source original. + audit_src = plugin_source / "lib" / "evolve-lite" / AUDIT_SCRIPT + if not audit_src.is_file(): + audit_src = Path(source_dir) / "plugin-source" / "lib" / AUDIT_SCRIPT + audit_text = "" if self.ops.is_dry_run and not audit_src.is_file() else audit_src.read_text() + audit_file = Path.home() / ".codex" / "evolve-lite" / AUDIT_SCRIPT + self.ops.atomic_write_text(audit_file, audit_text) + success(f"Installed recall-audit script → {audit_file}") success("Codex installation complete") @@ -1124,8 +1488,18 @@ class CodexInstaller: Path(target_dir) / ".agents" / "plugins" / "marketplace.json", "plugins", "name", CODEX_PLUGIN, ) - self._remove_user_prompt_hook(Path(target_dir) / ".codex" / "hooks.json") - self._remove_session_start_hook(Path(target_dir) / ".codex" / "hooks.json") + # Drop the single managed pointer line from the always-on instructions. + self.ops.remove_marker_line(Path.home() / ".codex" / "AGENTS.md", MANAGED_MARKER) + # Remove the on-disk EVOLVE.md copy and the recall-audit script, then the + # per-plugin dir if nothing else lives there. + evolve_dir = Path.home() / ".codex" / "evolve-lite" + self.ops.remove_file(evolve_dir / "EVOLVE.md") + self.ops.remove_file(evolve_dir / AUDIT_SCRIPT) + self.ops.remove_dir_if_empty(evolve_dir) + + # Reverse pre-redesign GLOBAL artifacts (config.toml plugin tables + + # plugin caches). GLOBAL migration, independent of --dir. + self._purge_legacy_global() success("Codex uninstall complete") @@ -1144,19 +1518,18 @@ class CodexInstaller: ) print(f" marketplace.json entry : {'✓' if marketplace_present else '✗'}") - hooks_path = Path(target_dir) / ".codex" / "hooks.json" - hook_present = ( - any(isinstance(g, dict) and self._group_has_recall(g) - for g in read_json(hooks_path).get("hooks", {}).get("UserPromptSubmit", [])) - if hooks_path.is_file() else False - ) - session_hook_present = ( - any(isinstance(g, dict) and self._group_has_sync(g) - for g in read_json(hooks_path).get("hooks", {}).get("SessionStart", [])) - if hooks_path.is_file() else False + agents_path = Path.home() / ".codex" / "AGENTS.md" + pointer_present = ( + any(MANAGED_MARKER in ln for ln in agents_path.read_text().splitlines()) + if agents_path.is_file() else False ) - print(f" .codex/hooks.json entry : {'✓' if hook_present else '✗'}") - print(f" SessionStart sync hook : {'✓' if session_hook_present else '✗'}") + print(f" ~/.codex/AGENTS.md pointer : {'✓' if pointer_present else '✗'}") + + evolve_md = Path.home() / ".codex" / "evolve-lite" / "EVOLVE.md" + print(f" evolve-lite/EVOLVE.md : {'✓' if evolve_md.is_file() else '✗'}") + + audit_file = Path.home() / ".codex" / "evolve-lite" / AUDIT_SCRIPT + print(f" evolve-lite/{AUDIT_SCRIPT} : {'✓' if audit_file.is_file() else '✗'}") # ── Dispatch ────────────────────────────────────────────────────────────────── diff --git a/plugin-source/EVOLVE.md.j2 b/plugin-source/EVOLVE.md.j2 new file mode 100644 index 00000000..a7597948 --- /dev/null +++ b/plugin-source/EVOLVE.md.j2 @@ -0,0 +1,117 @@ +{%- from "_macros.j2" import skill_ref with context -%} +{% if platform == "claude" -%} + +# Evolve — shared, auditable memory + +You already have native, self-directed memory: you decide what to recall at the +start of a task and what to save at the end, on your own judgment. Evolve adds +only the two things native memory lacks — sharing and provenance — as two +lightweight lifecycle steps. It does not change how you recall or save. + +## After you save a memory + +When you save a native memory this turn, invoke {{ skill_ref('adapt-memory') }} +to mirror it into the shared evolve store (`./.evolve/entities/`) so the fact is +shareable with the team and auditable like every other evolve entity. Do this +once per memory you saved. + +## Record what you consulted + +After you read or consult native memories this turn, log which ones you actually +opened, so the value of this memory can be measured over time. Run: + +```bash +python3 {{ audit_script }} [ ...] +``` + +Pass the entity id `/` for each native memory you consulted, where +`` is the memory's `metadata.type` and `` is its top-level `name` +field — this is the id provenance resolves to `./.evolve/entities//.md` +(the same id {{ skill_ref('adapt-memory') }} mirrors to). Skip this step entirely +if you consulted no memories. If the command prints a line beginning +`evolve-session:`, include that line once, verbatim, somewhere in your reply — it +lets later analysis tie this session to what you recalled. +{%- else -%} +# Evolve — self-directed memory + +You have a persistent, file-based memory for the current project, stored under +`./.evolve/memory/` (relative to the workspace/project root). You decide, on +your own judgment, when something is worth remembering — nothing forces a save, +and there is no step to "complete." Curate this memory like notes you'll thank +yourself for later: small, accurate, high-signal. + +## Recall — at the start of a non-trivial task + +Before substantive work (code changes, debugging, repo exploration, or +environment/tooling investigation), read your memory index at +`./.evolve/memory/MEMORY.md` if it exists. It holds one line per memory with a +short description. Open the individual memory files whose description looks +relevant to the task at hand, and let them inform what you do. If the index is +missing or nothing looks relevant, just proceed — that's normal. + +Memories reflect what was true when written. If a memory names a file, +function, command, or flag, verify it still exists before relying on it. + +## Record what you consulted + +After recall, log which entries you actually opened, so the value of this memory +can be measured over time. Run: + +```bash +python3 {{ audit_script }} [ ...] +``` + +Pass the memory files you read this turn (space-separated paths, relative to the +project root). Skip this step entirely if you consulted no memories. If the +command prints a line beginning `evolve-session:`, include that line once, +verbatim, somewhere in your reply — it lets later analysis tie this session to +what you recalled. + +## Save — only when you learn something durable + +Near the end of a task, if it produced a reusable fact that isn't already +obvious from the code or git history — and only then — write it to memory. +Saving nothing is the right outcome more often than not; never force a +low-value memory just to have saved one. + +Each memory is one file holding one fact, under `./.evolve/memory/` (create the +directory if it doesn't exist), with frontmatter: + +```markdown +--- +name: +description: +metadata: + type: user | feedback | project | reference +--- + + +``` + +Types: +- **user** — who the user is: role, expertise, durable preferences. +- **feedback** — guidance on how you should work, both corrections and + confirmed approaches; always include the why. +- **project** — ongoing work, goals, or constraints not derivable from the code + or git history; convert relative dates ("next week") to absolute ones. +- **reference** — pointers to external resources (URLs, dashboards, tickets). + +In the body, link related memories with `[[name]]`, where `name` is another +memory's `name:` slug. Link liberally; a `[[name]]` with no file yet marks +something worth writing later, not an error. + +After writing the file, add a one-line pointer to `./.evolve/memory/MEMORY.md`: +`- [Title](file.md) — short hook`. MEMORY.md is the index you read during +recall — one line per memory, no frontmatter, never put memory content there. + +## When NOT to save, and housekeeping + +- Don't duplicate what the repo already records: code structure, git history, + READMEs, existing docs. If asked to remember one of those, ask what was + non-obvious about it and save that instead. +- Don't save what only matters to the current conversation. +- Before saving, check for an existing memory that already covers it — update + that file rather than creating a duplicate. +- Delete memories that turn out to be wrong. +{%- endif %} diff --git a/plugin-source/_claude/hooks/hooks.json b/plugin-source/_claude/hooks/hooks.json deleted file mode 100644 index 1d282a7e..00000000 --- a/plugin-source/_claude/hooks/hooks.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "hooks": { - "UserPromptSubmit": [ - { - "matcher": "", - "hooks": [ - { - "type": "command", - "command": "python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/recall/scripts/retrieve_entities.py" - } - ] - } - ], - "SessionStart": [ - { - "matcher": "", - "hooks": [ - { - "type": "command", - "command": "python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/sync/scripts/sync.py --quiet" - } - ] - } - ], - "Stop": [ - { - "matcher": "", - "hooks": [ - { - "type": "command", - "command": "python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/save-trajectory/scripts/on_stop.py" - }, - { - "type": "command", - "command": "python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/learn/scripts/on_stop.py" - } - ] - } - ] - } -} diff --git a/plugin-source/_macros.j2 b/plugin-source/_macros.j2 index a3bc0ab2..0ba282c4 100644 --- a/plugin-source/_macros.j2 +++ b/plugin-source/_macros.j2 @@ -10,6 +10,13 @@ other platforms stay single-line because the whole command is either wrapped in `sh -lc '...'` (claw-code) or invoked through a single python3 call (codex, bob). + path_override — when set, the script is invoked from this exact path on + EVERY platform (e.g. "~/.claude/evolve-lite/adapt_memory.py"), and + the per-platform plugin-relative path resolution is bypassed. Used + for scripts the installer ships to a stable, version-proof global + path so they can be permission-allowlisted (mirrors how EVOLVE.md + invokes `python3 {{ audit_script }}`). The arg rendering still + follows the per-platform rules above. Path resolution per platform: claude — ${CLAUDE_PLUGIN_ROOT} expanded by the Claude plugin runtime. @@ -17,8 +24,18 @@ codex — git-rev-parse from any cwd inside the project clone. bob — project-rooted .bob/skills/evolve-lite-/ (post-rename). #} -{%- macro invoke(skill, script, args=None) -%} -{%- if platform == "claude" -%} +{%- macro invoke(skill, script, args=None, path_override=None) -%} +{%- if path_override is not none -%} +{#- Stable global path (installer-shipped, version-proof, allowlistable). Same + head on every platform; arg formatting follows the per-platform rules. -#} +python3 {{ path_override }} +{%- if args is none %}{# no args; nothing appended #} +{%- elif args is string %} {{ args }} +{%- elif platform == "claude" %} \ + {{ args | join(" \\\n ") }} +{%- else %} {{ args | join(" ") }} +{%- endif -%} +{%- elif platform == "claude" -%} python3 ${CLAUDE_PLUGIN_ROOT}/skills/evolve-lite/{{ skill }}/scripts/{{ script }} {%- if args is none %}{# no args; nothing appended #} {%- elif args is string %} {{ args }} diff --git a/plugin-source/build_plugins.py b/plugin-source/build_plugins.py index 4906ce38..07a26ed1 100644 --- a/plugin-source/build_plugins.py +++ b/plugin-source/build_plugins.py @@ -293,6 +293,8 @@ def _codex_plugin_json(meta: PluginMetadata) -> _CodexOut: "forked_context": True, "user_skills_dir": "~/.claude/skills", "save_example_script_root": "${CLAUDE_PLUGIN_ROOT}/skills", + "audit_script": "~/.claude/evolve-lite/audit_recall.py", + "adapt_memory_script": "~/.claude/evolve-lite/adapt_memory.py", }, "target_rewrites": [], "target_excludes": [], @@ -304,6 +306,8 @@ def _codex_plugin_json(meta: PluginMetadata) -> _CodexOut: "context": { "user_skills_dir": "~/.claw/skills", "save_example_script_root": "~/.claw/skills", + "audit_script": "~/.claw/evolve-lite/audit_recall.py", + "adapt_memory_script": "~/.claw/evolve-lite/adapt_memory.py", }, "target_rewrites": [], "target_excludes": [], @@ -316,9 +320,14 @@ def _codex_plugin_json(meta: PluginMetadata) -> _CodexOut: "context": { "user_skills_dir": "plugins/evolve-lite/skills", "save_example_script_root": "plugins/evolve-lite/skills", + "audit_script": "~/.codex/evolve-lite/audit_recall.py", + "adapt_memory_script": "~/.codex/evolve-lite/adapt_memory.py", }, "target_rewrites": [], - "target_excludes": [], + # The `doctor` skill diagnoses Claude's @import canary in + # ~/.claude transcripts; that mechanism doesn't exist on codex + # (codex uses an ~/.codex/AGENTS.md pointer), so exclude it. + "target_excludes": [r"^skills/evolve-lite/doctor/"], "metadata_target": ".codex-plugin/plugin.json", "metadata_emit": _codex_plugin_json, }, @@ -327,12 +336,18 @@ def _codex_plugin_json(meta: PluginMetadata) -> _CodexOut: "context": { "user_skills_dir": ".bob/skills", "save_example_script_root": ".bob/skills", + "audit_script": "~/.bob/evolve-lite/audit_recall.py", + "adapt_memory_script": "~/.bob/evolve-lite/adapt_memory.py", }, # Bob has no plugin-namespace concept; skill folders are flat # under .bob/skills/. Collapse the source skills/evolve-lite// # layout to skills/evolve-lite-/ for bob's render output. "target_rewrites": [(r"^skills/evolve-lite/([^/]+)/", r"skills/evolve-lite-\1/")], - "target_excludes": [], + # Exclude the Claude-only `doctor` skill (matches the source-side + # path, before the rewrite above flattens it to + # skills/evolve-lite-doctor/). Its @import-canary diagnostic is + # meaningless on bob, which has no ~/.claude transcript layout. + "target_excludes": [r"^skills/evolve-lite/doctor/"], # Bob has no plugin system, so no plugin.json is emitted. Bob's # commands/ directory is generated 1:1 from the skills walk by # _bob_command_targets(); no static command files exist in @@ -392,10 +407,19 @@ def _bob_command_bytes(skill_dir: Path) -> bytes: def _bob_command_targets() -> list[tuple[Path, Path, bytes]]: """Triples of (skill_source_for_drift_label, target_rel_to_repo_root, content) - for every bob command — one per skill — derived from the skills walk.""" + for every bob command — one per skill — derived from the skills walk. + + Skills excluded by bob's `target_excludes` get no command file: a skill + that isn't rendered into bob's skills/ must not leave a dangling slash + command pointing at it (e.g. the Claude-only `doctor` skill).""" bob_root_rel = Path(PLATFORMS["bob"]["plugin_root"]) + bob_excludes = [re.compile(pat) for pat in PLATFORMS["bob"].get("target_excludes", [])] out: list[tuple[Path, Path, bytes]] = [] for skill_dir in _discover_skills(): + # Match against the source-side path, mirroring PlatformConfig.excludes. + source_rel = f"skills/evolve-lite/{skill_dir.name}/" + if any(p.search(source_rel) for p in bob_excludes): + continue target_rel = bob_root_rel / "commands" / f"evolve-lite-{skill_dir.name}.md" out.append((skill_dir / "SKILL.md.j2", target_rel, _bob_command_bytes(skill_dir))) return out diff --git a/plugin-source/lib/audit_recall.py b/plugin-source/lib/audit_recall.py new file mode 100644 index 00000000..42dc1104 --- /dev/null +++ b/plugin-source/lib/audit_recall.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +"""Append a recall-audit row to .evolve/audit.log. + +Self-contained (no third-party or evolve-lite lib imports) so it can be dropped +at a single path and run by a model-invoked shell command on any platform. + +Usage: + python3 audit_recall.py [ ...] + +Records which memory entries the model consulted this turn so the `provenance` +analysis can later judge whether they influenced the outcome. Session id is +resolved from the host's environment when available and falls back to a freshly +minted UUID (printed as `evolve-session: ` for the model to echo). +""" + +from __future__ import annotations + +import json +import os +import sys +import uuid +from datetime import datetime, timezone +from pathlib import Path + + +def _evolve_dir() -> Path: + env = os.environ.get("EVOLVE_DIR") + return Path(env) if env else Path.cwd() / ".evolve" + + +def _session_id() -> tuple[str, bool]: + """Return (session_id, self_minted).""" + for var in ("CLAUDE_CODE_SESSION_ID", "CODEX_THREAD_ID"): + val = os.environ.get(var) + if val: + return val, False + return str(uuid.uuid4()), True + + +def main(argv: list[str]) -> int: + entities = [a for a in argv if a.strip()] + if not entities: + return 0 + + session_id, minted = _session_id() + row = { + "event": "recall", + "session_id": session_id, + "entities": entities, + "ts": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), + } + + log = _evolve_dir() / "audit.log" + log.parent.mkdir(parents=True, exist_ok=True) + with log.open("a", encoding="utf-8") as fh: + fh.write(json.dumps(row) + "\n") + + if minted: + print(f"evolve-session: {session_id}") + count = len(entities) + print(f"Recorded recall of {count} memory entr{'y' if count == 1 else 'ies'}.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/plugin-source/lib/entity_io.py b/plugin-source/lib/entity_io.py index 63f77e2c..0d4ccace 100644 --- a/plugin-source/lib/entity_io.py +++ b/plugin-source/lib/entity_io.py @@ -118,6 +118,21 @@ def slugify(text, max_length=60): return text or "entity" +def sanitize_type(text): + """Sanitize an entity *type* into a filesystem-safe subdirectory name. + + Like :func:`slugify` but without truncation — a type is a short label, + not free-form content, and truncating it could silently merge distinct + types. Returns an empty string for input that contains no usable + characters, leaving the fallback decision to the caller. + """ + if not isinstance(text, str): + return "" + text = text.lower() + text = re.sub(r"[^a-z0-9]+", "-", text) + return text.strip("-") + + def unique_filename(directory, slug): """Return a Path that doesn't collide with existing files in *directory*. @@ -139,7 +154,7 @@ def unique_filename(directory, slug): # Markdown <-> dict conversion # --------------------------------------------------------------------------- -_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "visibility", "published_at") +_FRONTMATTER_KEYS = ("type", "trigger", "trajectory", "owner", "source", "native_path", "visibility", "published_at") def entity_to_markdown(entity): @@ -339,24 +354,35 @@ def load_all_entities(entities_dir): return entities -def write_entity_file(directory, entity): +def write_entity_file(directory, entity, filename=None, overwrite=False): """Write a single entity as a markdown file under *directory*. The file is placed in a ``{type}/`` subdirectory. Uses atomic write (write to ``.tmp``, then ``os.rename``). + Args: + directory: Entities root directory. + entity: The entity dict to serialize. + filename: Optional explicit slug for the target file (without the + ``.md`` suffix). When omitted, the slug is derived from the + entity content (the historical default). + overwrite: When True, the entity is written to a deterministic + ``{type}/{filename}.md`` path, overwriting any existing file in + place (stable id, idempotent re-mirroring). When False (the + default), the historical collision-avoiding behavior is kept — + a ``-2``/``-3`` suffix is appended on collision. + Returns: Path to the written file. """ - _ALLOWED_TYPES = {"guideline", "preference"} - entity_type = entity.get("type", "guideline") - if not isinstance(entity_type, str) or entity_type not in _ALLOWED_TYPES: - entity_type = "guideline" + # Any non-empty type is accepted and used (sanitized) as the + # subdirectory. An empty/invalid type falls back to "guideline". + entity_type = sanitize_type(entity.get("type", "guideline")) or "guideline" entity["type"] = entity_type type_dir = Path(directory) / entity_type type_dir.mkdir(parents=True, exist_ok=True) - slug = slugify(entity.get("content", "entity")) + slug = slugify(filename) if filename else slugify(entity.get("content", "entity")) content = entity_to_markdown(entity) # Write to a unique temp file first (avoids predictable .tmp collisions) @@ -367,6 +393,13 @@ def write_entity_file(directory, entity): os.close(fd) fd = None + if overwrite: + # Deterministic target: overwrite any existing file in place so + # the entity id is stable across re-mirroring. + target = type_dir / f"{slug}.md" + os.replace(tmp_path, target) + return target + # Atomically claim the target using O_EXCL; retry on race while True: target = unique_filename(type_dir, slug) diff --git a/plugin-source/skills/evolve-lite/adapt-memory/SKILL.md.j2 b/plugin-source/skills/evolve-lite/adapt-memory/SKILL.md.j2 new file mode 100644 index 00000000..02db92bc --- /dev/null +++ b/plugin-source/skills/evolve-lite/adapt-memory/SKILL.md.j2 @@ -0,0 +1,65 @@ +{%- from "_macros.j2" import invoke with context -%} +--- +name: {% if platform == "bob" %}evolve-lite:{% endif %}adapt-memory +description: Mirror a just-saved native memory into the shared evolve store so it becomes shareable and auditable +{% if platform == "claude" -%} +context: fork +{% endif -%} +--- + +# Adapt Memory +{% if platform == "claude" %} +## Overview + +You have native, self-directed memory: you decide what to remember and write it +under `~/.claude/projects//memory/`. That store has no provenance or +sharing. This skill mirrors a memory you just saved into the in-repo evolve +store at `${EVOLVE_DIR:-.evolve}/entities//.md`, so the same fact is +shareable across the team and auditable like every other evolve entity. + +Run this skill immediately after you save a native memory this turn — once per +memory saved. + +## Required Action + +For each native memory file you saved this turn: + +1. **Read the just-saved memory file** so you are mirroring its real content, + not a guess. Note its `metadata.type` (one of `user`, `feedback`, `project`, + `reference`) — this passes straight through as the entity type, with no + remapping. + +2. **Compose a high-quality `trigger`.** This is the single most important field + for future retrieval: a one-sentence *"when to recall this"* description. + Base it on what the memory actually says and the situations in which a future + agent would benefit from it — do **not** mechanically copy the memory's + `description`. Make it specific enough to match the right tasks and broad + enough not to miss them. + +3. **Run the adapter script**, passing the native file path, its type, and your + synthesized trigger: + +```bash +{{ invoke("adapt-memory", "adapt_memory.py", ["", "--type ", "--trigger \"\""], path_override=adapt_memory_script) }} +``` + +The script parses the native frontmatter and body, builds the entity +(`type` = native type, `trigger` = your synthesized trigger, `content` = the +native body with its `description` carried in as a lead line), and persists it +via the shared entity writer. It is safe to run repeatedly. + +## Notes + +- One invocation per saved memory. If you saved several memories this turn, + invoke the script once for each, with a trigger tailored to each. +- The trigger quality directly determines whether the memory resurfaces when it + matters. Spend a moment on it. +- If you saved no native memory this turn, there is nothing to mirror — skip + this skill. +{% else %} +This skill mirrors a just-saved native memory into the shared evolve store. It +is specific to hosts with native self-directed memory and is a no-op on this +platform — there is no native memory store to mirror from. Use the +{% if platform == "bob" %}`evolve-lite:learn`{% else %}/evolve-lite:learn{% endif %} +skill to capture reusable lessons here. +{% endif %} diff --git a/plugin-source/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py b/plugin-source/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py new file mode 100644 index 00000000..5bb8fb44 --- /dev/null +++ b/plugin-source/skills/evolve-lite/adapt-memory/scripts/adapt_memory.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +""" +Adapt Memory Script (Claude-only) + +Mirrors a single native Claude memory file into the in-repo evolve store at +``${EVOLVE_DIR:-.evolve}/entities//.md`` so the memory becomes +shareable and auditable like every other evolve entity. + +Native memory files live under ``~/.claude/projects//memory/`` and carry +frontmatter of the form:: + + --- + name: + description: + metadata: + type: user | feedback | project | reference + --- + + + +The agent passes the native ``--type`` through verbatim (native types map +straight onto the entity type — no remapping) and supplies a synthesized +``--trigger`` (the single most important field for future retrieval). The body +of the native file becomes the entity content; the native ``description`` is +carried into the body as a lead line when present. + +Usage: + python3 adapt_memory.py --type --trigger +""" + +import argparse +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from entity_io import ( # noqa: E402 + find_entities_dir, + get_default_entities_dir, + slugify, + write_entity_file, + log as _log, +) + + +def log(message): + _log("adapt-memory", message) + + +def parse_native_memory(text): + """Split a native memory file into (name, description, body). + + Native frontmatter is simple ``key: value`` lines plus a nested + ``metadata:`` block; we parse the top-level ``name`` and ``description`` + lines and treat everything after the closing ``---`` as the body. The + ``name`` is the native slug we reuse as the stable entity id. Missing + frontmatter is tolerated — the whole text is then the body. + """ + name = None + description = None + body = text + if text.startswith("---"): + parts = text.split("---", 2) + if len(parts) >= 3: + frontmatter, body = parts[1], parts[2] + for line in frontmatter.splitlines(): + # Only top-level keys (no leading indentation) — keeps the + # nested metadata.* keys out of the top-level matches. + if line[:1].isspace(): + continue + key, _, value = line.partition(":") + key = key.strip() + value = value.strip() + if key == "name" and value: + name = value + elif key == "description" and value: + description = value + return name, description, body.strip() + + +def main(): + parser = argparse.ArgumentParser(description="Mirror a native memory into the evolve store.") + parser.add_argument("memory_path", help="Path to the just-saved native memory file.") + parser.add_argument( + "--type", + required=True, + help="Native memory type, passed through as the entity type (e.g. user, feedback, project, reference).", + ) + parser.add_argument( + "--trigger", + required=True, + help="Synthesized one-sentence 'when to recall this' description.", + ) + args = parser.parse_args() + + memory_path = Path(args.memory_path).expanduser() + if not memory_path.is_file(): + print(f"Error: native memory file not found: {memory_path}", file=sys.stderr) + sys.exit(1) + + try: + text = memory_path.read_text(encoding="utf-8") + except OSError as exc: + print(f"Error: cannot read {memory_path} - {exc}", file=sys.stderr) + sys.exit(1) + + name, description, body = parse_native_memory(text) + if not body: + print(f"Error: native memory {memory_path} has no body to mirror.", file=sys.stderr) + sys.exit(1) + + # Carry the native description into the body as a lead line when it isn't + # already echoed there, so the mirrored entity is self-describing. + content = body + if description and description not in body: + content = f"{description}\n\n{body}" + + # The native ``name`` becomes the stable, derivable entity slug so the + # entity id is ``/`` on both sides — provenance can map an + # audited native memory straight onto its mirror. Fall back to a + # content-derived slug only when the native frontmatter has no name. + slug = slugify(name) if name else slugify(content) + + entity = { + "type": args.type, + "trigger": args.trigger, + "content": content, + "source": "native-memory", + "native_path": args.memory_path, + } + + entities_dir = find_entities_dir() + if entities_dir: + entities_dir = entities_dir.resolve() + log(f"Using existing entities dir: {entities_dir}") + else: + entities_dir = get_default_entities_dir() + log(f"Created entities dir: {entities_dir}") + + # Deterministic, idempotent write: re-mirroring the same native memory + # (same name + type) overwrites /.md in place rather than + # creating -2.md, keeping the entity id stable. + path = write_entity_file(entities_dir, entity, filename=slug, overwrite=True) + entity_id = f"{entity['type']}/{slug}" + log(f"Mirrored {memory_path} -> {path} (id: {entity_id})") + print(f"Mirrored native memory into evolve store: {path}") + print(f"Entity id: {entity_id}") + + +if __name__ == "__main__": + main() diff --git a/plugin-source/skills/evolve-lite/doctor/SKILL.md.j2 b/plugin-source/skills/evolve-lite/doctor/SKILL.md.j2 new file mode 100644 index 00000000..c2e24254 --- /dev/null +++ b/plugin-source/skills/evolve-lite/doctor/SKILL.md.j2 @@ -0,0 +1,51 @@ +{%- from "_macros.j2" import invoke with context -%} +--- +name: {% if platform == "bob" %}evolve-lite:{% endif %}doctor +description: Diagnose evolve health on Claude — verify the CLAUDE.md @import is actually loading the thin EVOLVE.md into sessions +{% if platform == "claude" -%} +context: fork +{% endif -%} +--- + +# Doctor +{% if platform == "claude" %} +## Overview + +On Claude, evolve is delivered by a single `@.evolve/EVOLVE.md` import line in +this repo's `./CLAUDE.md`. That import requires a one-time, per-project "allow +external imports" approval. If you (or a teammate) declined it — even once, in a +past session — Claude silently disables the import forever, the thin EVOLVE.md +never loads, and evolve becomes a no-op with **no error**. + +This skill checks whether the import is actually reaching your sessions, by +looking for a canary token that the installed EVOLVE.md expands into the session +transcript when the import loads. + +## Required Action + +Run the doctor script from the repo root: + +```bash +{{ invoke("doctor", "doctor.py") }} +``` + +It is read-only and always exits 0. Read the status code it prints: + +- **OK** — the import is loading; nothing to do. +- **IMPORT_DISABLED** — the `@import` line is in `CLAUDE.md` but its content is + not reaching sessions (you likely declined the external-import approval). + Follow the remediation the script prints: purge the project approval, start a + new session, and **Allow** the import dialog. +- **NOT_INSTALLED** — evolve isn't wired into this repo; re-run the installer. +- **STALE_EVOLVE_MD** — the installed `.evolve/EVOLVE.md` predates the canary; + re-run the installer to refresh it. +- **UNKNOWN** — no recent Claude transcripts for this project yet; open a + session, then re-run. + +Relay the status and any remediation to the user. +{% else %} +This skill diagnoses Claude's `@import` delivery of evolve's thin EVOLVE.md. It +is specific to Claude (where evolve loads via a per-project import that can be +silently declined) and is a **no-op on this platform** — here EVOLVE.md is +always-on and there is no import-approval gate to check. Nothing to run. +{% endif %} diff --git a/plugin-source/skills/evolve-lite/doctor/scripts/doctor.py b/plugin-source/skills/evolve-lite/doctor/scripts/doctor.py new file mode 100644 index 00000000..2c2a5382 --- /dev/null +++ b/plugin-source/skills/evolve-lite/doctor/scripts/doctor.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +""" +Doctor Script (Claude-only diagnostic) + +On Claude, evolve is delivered by a single ``@.evolve/EVOLVE.md`` import line in +the repo's ``./CLAUDE.md``. That import requires a one-time, per-project +"allow external imports" approval. If the user declines it (even once, in a past +session) Claude silently disables the import forever — the thin EVOLVE.md never +loads and evolve becomes a no-op with NO error. + +Claude's internal approval flag is undocumented and unreliable to read, so this +script detects delivery *empirically*: the installed thin EVOLVE.md carries a +canary token (``EVOLVE_IMPORT_CANARY_``). When the import loads, that token +expands into the session transcript. The doctor extracts the token from the +installed copy (never hardcoding it twice) and greps the most recent Claude +project transcripts for it. + +Status codes (printed verbatim, always exit 0 — this is a diagnostic): + + OK — canary found in a recent transcript; import is loading. + IMPORT_DISABLED — import line present in CLAUDE.md but canary absent from + every recent transcript; the user likely declined the + external-import approval. + NOT_INSTALLED — the import line is missing from CLAUDE.md, or the installed + .evolve/EVOLVE.md is missing; run the installer. + STALE_EVOLVE_MD — installed EVOLVE.md has no canary; it predates this build, + re-run the installer. + UNKNOWN — no recent Claude transcripts for this project yet. + +Usage: + python3 doctor.py +""" + +import os +import re +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple plugins +# can coexist side by side. The doctor only needs the shared `log` helper, but +# resolving the lib the same way the other scripts do keeps the convention +# uniform (and only works in the rendered tree, same constraint as adapt_memory). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) +from entity_io import log as _log # noqa: E402 + + +def log(message): + _log("doctor", message) + + +# The line the installer injects into the repo's CLAUDE.md (see install.sh +# CLAUDE_IMPORT_LINE). Matching on this substring is the install-sanity check. +CLAUDE_IMPORT_LINE = "@.evolve/EVOLVE.md" + +# Pattern used to lift the canary token out of the installed EVOLVE.md so the +# exact token lives in exactly one place (the template), never duplicated here. +_CANARY_RE = re.compile(r"EVOLVE_IMPORT_CANARY_\S+") + +# How many of the most-recent transcripts to scan for the canary. +_RECENT_N = 3 + + +def _evolve_dir(root): + """Resolve the .evolve root: $EVOLVE_DIR if set, else /.evolve.""" + env_dir = os.environ.get("EVOLVE_DIR") + if env_dir: + return Path(env_dir) + return root / ".evolve" + + +def _transcript_slug(root): + """Claude derives a project's transcript dir name by replacing every + non-alphanumeric character in the absolute project path with ``-``. + + e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen + """ + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def _recent_transcripts(home, root, limit=_RECENT_N): + """The most recent N ``*.jsonl`` transcripts for this project, by mtime.""" + slug = _transcript_slug(root) + proj_dir = home / ".claude" / "projects" / slug + if not proj_dir.is_dir(): + return [] + jsonl = [p for p in proj_dir.glob("*.jsonl") if p.is_file()] + jsonl.sort(key=lambda p: p.stat().st_mtime, reverse=True) + return jsonl[:limit] + + +def _canary_in_transcripts(transcripts, token): + """True if `token` appears anywhere in any of the given transcript files.""" + for path in transcripts: + try: + text = path.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + if token in text: + return True + return False + + +def diagnose(root, home): + """Core diagnosis. Returns ``(code, message)``; never raises on missing + files/dirs. `root` is the project root; `home` is the user home dir under + which Claude keeps ``~/.claude/projects//``. + """ + root = Path(root) + home = Path(home) + + # --- Install sanity ------------------------------------------------------ + claude_md = root / "CLAUDE.md" + has_import = False + if claude_md.is_file(): + try: + has_import = CLAUDE_IMPORT_LINE in claude_md.read_text(encoding="utf-8", errors="replace") + except OSError: + has_import = False + if not has_import: + return ( + "NOT_INSTALLED", + f"evolve import not wired into this repo's CLAUDE.md (expected a line `{CLAUDE_IMPORT_LINE}`); run the installer.", + ) + + evolve_md = _evolve_dir(root) / "EVOLVE.md" + if not evolve_md.is_file(): + return ( + "NOT_INSTALLED", + f"installed EVOLVE.md is missing at {evolve_md}; run the installer.", + ) + + # --- Extract the canary from the installed file -------------------------- + try: + evolve_text = evolve_md.read_text(encoding="utf-8", errors="replace") + except OSError as exc: + return ( + "NOT_INSTALLED", + f"cannot read installed EVOLVE.md at {evolve_md} - {exc}; run the installer.", + ) + match = _CANARY_RE.search(evolve_text) + if not match: + return ( + "STALE_EVOLVE_MD", + f"installed EVOLVE.md at {evolve_md} has no canary token (it predates this build); re-run the installer to refresh it.", + ) + token = match.group(0) + + # --- Transcript check ---------------------------------------------------- + transcripts = _recent_transcripts(home, root) + if not transcripts: + return ( + "UNKNOWN", + "no recent Claude transcripts for this project yet; open a session, then re-run.", + ) + if _canary_in_transcripts(transcripts, token): + return ("OK", "✓ evolve EVOLVE.md import is loading.") + + return ( + "IMPORT_DISABLED", + "⚠ The @import is present in CLAUDE.md but its content is NOT " + "reaching sessions — you likely declined Claude's external-import " + "approval. Re-enable by running `claude project purge " + f"{root}` then start a new session and Allow the import dialog.", + ) + + +def main(): + root = Path(os.getcwd()).resolve() + home = Path.home() + code, message = diagnose(root, home) + log(f"{code}: {message}") + print(f"evolve doctor [{code}] {message}") + # Diagnostic only — never fail the caller. + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/plugin-source/skills/evolve-lite/provenance/SKILL.md.j2 b/plugin-source/skills/evolve-lite/provenance/SKILL.md.j2 index ee704616..9e8aa47e 100644 --- a/plugin-source/skills/evolve-lite/provenance/SKILL.md.j2 +++ b/plugin-source/skills/evolve-lite/provenance/SKILL.md.j2 @@ -8,58 +8,107 @@ description: Analyze saved trajectories and recall audit events offline to recor ## Overview -This skill runs after one or more sessions have completed. It reads saved trajectories from `.evolve/trajectories/`, matches them to `recall` events in `.evolve/audit.log`, and records post-hoc `influence` events for recalled guidelines. +This skill runs after one or more sessions have completed. It reads `recall` +events from `.evolve/audit.log`, locates each session's trajectory, and records +post-hoc `influence` events for the recalled guidelines. -Use this skill when you want to compute usage provenance without coupling the work to the live learn step. +The mechanical work — reading recall rows, skipping already-assessed pairs, +resolving entity files, and locating trajectories — is done deterministically by +`provenance.py candidates`. Your job is the judgment: read each candidate and +decide whether the recalled guideline was `followed`, `contradicted`, or +`not_applicable`, then persist that verdict. -## Workflow - -### Step 1: Load Recall Events - -Read `.evolve/audit.log` as JSONL. Find entries where `event == "recall"` and `entities` is a non-empty list. - -Skip any recall event that already has `influence` entries for the same `session_id` and entity ids. Do not write duplicate influence records. - -### Step 2: Locate Saved Trajectories +Use this skill when you want to compute usage provenance without coupling the +work to the live learn step. -List `.evolve/trajectories/` and match each recall event to a trajectory by `session_id`. - -Matching strategy (in order): -1. `claude-transcript_.jsonl` - the stop-hook transcript dump; the session id is in the filename. -2. `trajectory__.json` - written by the {{ skill_ref("save-trajectory") }} skill when a session id is available. Match on the `` slice of the filename. -3. `trajectory_.json` - open the file and match its top-level `session_id` field against the recall event. Only fall back to this step when the filename alone does not identify the session. - -If none of the above yields a confident match for a recall event, skip it. Do not guess. - -### Step 3: Read Recalled Entities +## Workflow -For each recalled entity id, open `.evolve/entities/.md`. The id is a path relative to `.evolve/entities/` without the `.md` suffix, such as `guideline/foo` or `subscribed/alice/guideline/foo`. +### Step 1: Get candidates -Read the entity content and trigger. Skip ids whose files are missing. +Run the candidate builder. It emits one JSON object per line (JSONL), one per +unresolved `(session_id, entity)` recall pair: -### Step 4: Assess Influence +```bash +{{ invoke("provenance", "provenance.py", "candidates") }} +``` -Compare each recalled entity with the matched trajectory. Pick exactly one verdict: +Each candidate looks like: -- `followed` - the agent's actual actions are consistent with the guideline. -- `contradicted` - the guideline applied, but the agent did the opposite or repeated the avoidable dead end. -- `not_applicable` - the guideline was recalled but did not apply to this session. +```json +{ + "session_id": "", + "entity_id": "/", + "entity_excerpt": "", + "trajectory_path": "/path/to/transcript.jsonl", + "trajectory_excerpt": "", + "missing": ["trajectory"] +} +``` -Keep `evidence` to one short sentence citing a concrete action, tool call, or absence in the trajectory. +Notes: + +- `entity_id` is the path relative to `.evolve/entities/` without the `.md` + suffix, e.g. `feedback/foo`, `guideline/bar`, or + `subscribed/alice/guideline/baz`. +- Pairs that already have an `influence` row are skipped for you — the builder + reuses the same dedup rule used when influence rows are written. You will + never be handed a duplicate. +- The trajectory locator checks `.evolve/trajectories/` first, then falls back + to the native Claude transcript at + `~/.claude/projects//.jsonl`. This means provenance works + even when no `.evolve/trajectories/` file was written. +- If an entity file or trajectory cannot be found, the candidate is still + emitted with a `missing: [...]` field so the gap is visible. When the + trajectory is missing you usually cannot judge the pair — skip it (do not + guess), unless the entity content alone makes `not_applicable` certain. + +### Step 2: Judge each candidate + +For each candidate, read `entity_excerpt` (and open `trajectory_path` for the +full transcript if the excerpt is not enough). Compare the recalled guideline +against the agent's actual actions in the trajectory and pick exactly one +verdict: + +- `followed` — the agent's actual actions are consistent with the guideline. +- `contradicted` — the guideline applied, but the agent did the opposite or + repeated the avoidable dead end. +- `not_applicable` — the guideline was recalled but did not apply to this + session. + +Keep `evidence` to one short sentence citing a concrete action, tool call, or +absence in the trajectory. This judgment is yours — there is no heuristic +fallback. + +### Step 3: Record verdicts + +Persist each verdict. Either pipe one verdict per call to `provenance.py +record`: -### Step 5: Write Influence Events +```bash +echo '{ + "session_id": "", + "entity": "/", + "verdict": "followed", + "evidence": "Agent used the saved parser before trying shell fallbacks." +}' | {{ invoke("provenance", "provenance.py", "record") }} +``` -Pipe one JSON payload per assessed session to the helper: +…or, to batch many assessments for one session in a single call, pipe to the +underlying writer directly: ```bash echo '{ "session_id": "", "assessments": [ - {"entity": "guideline/", "verdict": "followed", "evidence": "Agent used the saved parser before trying shell fallbacks."} + {"entity": "feedback/foo", "verdict": "followed", "evidence": "Agent followed it."}, + {"entity": "guideline/bar", "verdict": "not_applicable", "evidence": "Did not apply."} ] }' | {{ invoke("provenance", "log_influence.py") }} ``` -The `entity` value must match exactly what appeared in the recall event, including any `subscribed//` prefix. +Both paths write the identical `influence` audit row and skip duplicates. The +`entity` value must match the candidate's `entity_id` exactly, including any +`subscribed//` prefix. -It is valid to emit an empty `assessments` list when recall events exist but no recalled guideline can be assessed. +It is valid to record nothing when recall events exist but no recalled guideline +can be assessed (e.g. every candidate is missing its trajectory). diff --git a/plugin-source/skills/evolve-lite/provenance/scripts/provenance.py b/plugin-source/skills/evolve-lite/provenance/scripts/provenance.py new file mode 100644 index 00000000..c2272501 --- /dev/null +++ b/plugin-source/skills/evolve-lite/provenance/scripts/provenance.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 +"""Deterministic provenance plumbing for the evolve-lite provenance skill. + +This script does the *mechanical* half of provenance — the part that can be +made deterministic and tested end to end: + + * read ``recall`` rows from ``.evolve/audit.log``, + * skip ``(session_id, entity)`` pairs that already have an ``influence`` row, + * resolve each recalled entity file and the session's trajectory transcript, + * assemble candidate dicts the agent can judge. + +The *semantic* half — deciding whether a recalled guideline was ``followed``, +``contradicted`` or ``not_applicable`` — is an LLM judgment and is NOT done +here. There is deliberately no heuristic verdict: this module never invents a +verdict. The agent reads each candidate, judges it, and pipes the verdict back +through ``record`` (which delegates to ``log_influence.py``'s writer so the +audit-log format is identical). + +Two modes: + + candidates (default) — emit one JSONL candidate per unresolved + (session_id, entity) recall pair to stdout. + record — read a verdict JSON from stdin and append an + ``influence`` row via log_influence.py's writer. +""" + +import json +import sys +from pathlib import Path + +# Walk up from the script location to find the installed plugin lib directory. +# Every host installs the shared lib under lib/evolve-lite/ so multiple +# plugins can coexist side by side (e.g. .bob/lib/evolve-lite/). +_script = Path(__file__).resolve() +_lib = None +for _ancestor in _script.parents: + _candidate = _ancestor / "lib" / "evolve-lite" + if (_candidate / "entity_io.py").is_file(): + _lib = _candidate + break +if _lib is None: + raise ImportError(f"Cannot find plugin lib directory above {_script}") +sys.path.insert(0, str(_lib)) + +# Provenance reuses log_influence.py's writer + dedup so the audit-log format and +# the duplicate-suppression rule live in exactly one place. log_influence.py sits +# next to this file in the same skill scripts/ directory. +sys.path.insert(0, str(_script.parent)) + +from entity_io import get_evolve_dir, log as _log # noqa: E402 +import log_influence # noqa: E402 + +_ALLOWED_VERDICTS = log_influence._ALLOWED_VERDICTS + +# How many characters of the entity file / trajectory to surface in a candidate. +_ENTITY_EXCERPT_CHARS = 4000 +_TRAJECTORY_EXCERPT_CHARS = 4000 + + +def log(message): + _log("provenance", message) + + +# --------------------------------------------------------------------------- +# Trajectory locator (Task B) +# --------------------------------------------------------------------------- + + +def _claude_transcript_slug(root): + """Claude derives a project's transcript dir name by replacing every + non-alphanumeric character in the absolute project path with ``-``. + + e.g. /Users/x/Documents/kaizen -> -Users-x-Documents-kaizen + + This mirrors ``_transcript_slug`` in the doctor skill + (skills/evolve-lite/doctor/scripts/doctor.py). The two are kept in sync by + hand because doctor and provenance ship as independent scripts that do not + import one another in the rendered tree; if you change one, change both. + """ + import re + + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def locate_trajectory(session_id, evolve_dir, *, project_root=None, home=None): + """Locate the saved trajectory transcript for ``session_id``. + + Resolution order (best-effort, returns the first hit or ``None``): + + 1. Legacy ``.evolve/trajectories/`` files: + * ``claude-transcript_.jsonl`` — stop-hook transcript dump. + * ``trajectory__.json`` — save-trajectory skill output; the sid + is the filename slice after the timestamp. + * ``trajectory_.json`` — open and match the inner ``session_id``. + 2. NEW native Claude transcript: ``~/.claude/projects//.jsonl`` + where ```` is the project root path slugified the way Claude does + (every non-alphanumeric char -> ``-``; see ``_claude_transcript_slug``). + + Native discovery makes provenance work in the hookless world where no + ``.evolve/trajectories/`` file is ever written. It is platform-neutral: + Bob/Codex keep their transcripts elsewhere, so the native step simply falls + through to ``None`` for them rather than misfiring. + """ + evolve_dir = Path(evolve_dir) + + # --- 1. Legacy .evolve/trajectories/ ------------------------------------ + traj_dir = evolve_dir / "trajectories" + if traj_dir.is_dir(): + direct = traj_dir / f"claude-transcript_{session_id}.jsonl" + if direct.is_file(): + return direct + + # trajectory__.json — match on the filename sid slice. + for path in sorted(traj_dir.glob("trajectory_*_*.json")): + stem = path.stem # trajectory__ + parts = stem.split("_", 2) + if len(parts) == 3 and parts[2] == session_id: + return path + + # trajectory_.json — open and match the inner session_id field. + for path in sorted(traj_dir.glob("trajectory_*.json")): + # Skip the _ shape already handled above. + if path.stem.count("_") >= 2: + continue + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + continue + if isinstance(data, dict) and data.get("session_id") == session_id: + return path + + # --- 2. Native Claude transcript ---------------------------------------- + # The project root is the parent of the .evolve dir; the home dir holds + # ~/.claude/projects//.jsonl. + root = Path(project_root) if project_root is not None else evolve_dir.resolve().parent + base = Path(home) if home is not None else Path.home() + slug = _claude_transcript_slug(root) + native = base / ".claude" / "projects" / slug / f"{session_id}.jsonl" + if native.is_file(): + return native + + return None + + +# --------------------------------------------------------------------------- +# Recall row reading + entity resolution (Task A — candidates) +# --------------------------------------------------------------------------- + + +def read_recall_rows(evolve_dir): + """Return a list of ``(session_id, [entity_id, ...])`` tuples for every ``recall`` audit row. + + Rows with no ``session_id`` or an empty ``entities`` list are skipped. + """ + audit_log = Path(evolve_dir) / "audit.log" + if not audit_log.is_file(): + return [] + + rows = [] + for line in audit_log.read_text(encoding="utf-8").splitlines(): + if not line.strip(): + continue + try: + event = json.loads(line) + except json.JSONDecodeError: + continue + if not isinstance(event, dict) or event.get("event") != "recall": + continue + session_id = event.get("session_id") + entities = event.get("entities") + if not isinstance(session_id, str) or not session_id: + continue + if not isinstance(entities, list) or not entities: + continue + clean = [e for e in entities if isinstance(e, str) and e] + if clean: + rows.append((session_id, clean)) + return rows + + +def _read_entity(evolve_dir, entity_id): + """Return ``(path, excerpt)`` for an entity file, or ``(path, None)`` if + the file is missing. ``entity_id`` is a ``/`` id relative to + ``entities/`` (without ``.md``). + """ + entity_path = Path(evolve_dir) / "entities" / f"{entity_id}.md" + if not entity_path.is_file(): + return entity_path, None + try: + text = entity_path.read_text(encoding="utf-8") + except OSError: + return entity_path, None + return entity_path, text[:_ENTITY_EXCERPT_CHARS] + + +def _read_trajectory_excerpt(trajectory_path): + """Return a bounded text excerpt of the trajectory file, or ``None``.""" + if trajectory_path is None: + return None + try: + text = Path(trajectory_path).read_text(encoding="utf-8") + except OSError: + return None + return text[:_TRAJECTORY_EXCERPT_CHARS] + + +def build_candidates(evolve_dir, *, project_root=None, home=None): + """Assemble candidate dicts for every unresolved recall (session, entity). + + Returns a list of dicts shaped:: + + { + "session_id": ..., + "entity_id": "/", + "entity_excerpt": , + "trajectory_path": , + "trajectory_excerpt": , + "missing": ["entity"|"trajectory", ...], # only when non-empty + } + + ``(session_id, entity)`` pairs that already have an ``influence`` row are + skipped via ``log_influence.existing_influence_keys`` — the same dedup rule + used when influence rows are written. Candidates whose entity file or + trajectory cannot be found are still emitted with a ``missing`` list so the + gap is visible rather than silently dropped. + """ + evolve_dir = Path(evolve_dir) + existing = log_influence.existing_influence_keys(evolve_dir) + + candidates = [] + for session_id, entities in read_recall_rows(evolve_dir): + trajectory_path = locate_trajectory(session_id, evolve_dir, project_root=project_root, home=home) + for entity_id in entities: + if (session_id, entity_id) in existing: + continue + entity_path, entity_excerpt = _read_entity(evolve_dir, entity_id) + trajectory_excerpt = _read_trajectory_excerpt(trajectory_path) + + missing = [] + if entity_excerpt is None: + missing.append("entity") + if trajectory_path is None: + missing.append("trajectory") + + candidate = { + "session_id": session_id, + "entity_id": entity_id, + "entity_excerpt": entity_excerpt, + "trajectory_path": str(trajectory_path) if trajectory_path else None, + "trajectory_excerpt": trajectory_excerpt, + } + if missing: + candidate["missing"] = missing + candidates.append(candidate) + return candidates + + +# --------------------------------------------------------------------------- +# record (Task A — record) +# --------------------------------------------------------------------------- + + +def record_verdict(payload, evolve_dir=None): + """Append a single ``influence`` row from an agent verdict. + + ``payload`` is ``{session_id, entity, verdict, evidence}``. The verdict must + be one of ``followed|contradicted|not_applicable`` (the *semantic* judgment + stays agent-driven — this only persists what the agent decided). Writing is + delegated to ``log_influence.py`` so the audit-log row format and the + duplicate-suppression rule are not duplicated here. + + Returns the number of rows written (0 or 1). Raises ``ValueError`` on an + invalid payload / verdict. + """ + if not isinstance(payload, dict): + raise ValueError("verdict payload must be a JSON object") + + session_id = payload.get("session_id") + entity = payload.get("entity") + verdict = payload.get("verdict") + evidence = payload.get("evidence", "") + + if not isinstance(session_id, str) or not session_id: + raise ValueError("verdict payload must include a non-empty string session_id") + if not isinstance(entity, str) or not entity: + raise ValueError("verdict payload must include a non-empty string entity") + if verdict not in _ALLOWED_VERDICTS: + raise ValueError(f"verdict must be one of {sorted(_ALLOWED_VERDICTS)}, got {verdict!r}") + + if evolve_dir is None: + evolve_dir = get_evolve_dir().resolve() + evolve_dir = Path(evolve_dir) + + existing = log_influence.existing_influence_keys(evolve_dir) + if (session_id, entity) in existing: + log(f"Skipping duplicate influence verdict: session_id={session_id} entity={entity}") + return 0 + + if not isinstance(evidence, str): + evidence = str(evidence) + + log_influence.audit.append( + evolve_dir=str(evolve_dir), + event="influence", + session_id=session_id, + entity=entity, + verdict=verdict, + evidence=evidence, + ) + return 1 + + +# --------------------------------------------------------------------------- +# __main__ +# --------------------------------------------------------------------------- + + +def _run_candidates(): + evolve_dir = get_evolve_dir().resolve() + candidates = build_candidates(evolve_dir) + for candidate in candidates: + print(json.dumps(candidate)) + log(f"Emitted {len(candidates)} candidate(s) from {evolve_dir}") + + +def _run_record(): + try: + payload = json.load(sys.stdin) + except json.JSONDecodeError as exc: + log(f"Invalid JSON input: {exc}") + print(f"Error: invalid JSON input - {exc}", file=sys.stderr) + sys.exit(1) + + try: + written = record_verdict(payload) + except ValueError as exc: + log(f"Rejected verdict: {exc}") + print(f"Error: {exc}", file=sys.stderr) + sys.exit(1) + + log(f"Recorded {written} influence verdict(s).") + print(f"Recorded {written} influence verdict(s).") + + +def main(argv=None): + argv = list(sys.argv[1:] if argv is None else argv) + mode = argv[0] if argv else "candidates" + if mode == "candidates": + _run_candidates() + elif mode == "record": + _run_record() + else: + print(f"Error: unknown mode {mode!r}; expected 'candidates' or 'record'.", file=sys.stderr) + sys.exit(2) + + +if __name__ == "__main__": + main() diff --git a/tests/platform_integrations/conftest.py b/tests/platform_integrations/conftest.py index 18ba7660..546061a1 100644 --- a/tests/platform_integrations/conftest.py +++ b/tests/platform_integrations/conftest.py @@ -20,6 +20,135 @@ def pytest_configure(config): config.addinivalue_line("markers", "integration: tests that require git and perform subprocess I/O") +@pytest.fixture(autouse=True) +def sandbox_home(tmp_path, monkeypatch): + """Redirect HOME to a temp dir for every platform-integrations test. + + install.sh resolves a handful of global paths via Python's ``Path.home()`` + (notably the Codex always-on instructions file ``~/.codex/AGENTS.md`` and the + global Bob target ``~/.bob``). Without sandboxing, simply running a codex + install in a test would inject the evolve block into the developer's REAL + ``~/.codex/AGENTS.md``. ``InstallRunner.run`` builds the subprocess env from + ``os.environ`` at call time, so monkeypatching HOME here flows through to the + install.sh child process. + + Returns the sandboxed home directory. + """ + home = tmp_path / "sandbox_home" + home.mkdir() + monkeypatch.setenv("HOME", str(home)) + # Windows/`Path.home()` also consults these; keep them aligned defensively. + monkeypatch.setenv("USERPROFILE", str(home)) + monkeypatch.delenv("HOMEDRIVE", raising=False) + monkeypatch.delenv("HOMEPATH", raising=False) + return home + + +@pytest.fixture +def codex_agents_file(sandbox_home): + """Path to the sandboxed Codex always-on instructions file (~/.codex/AGENTS.md).""" + return sandbox_home / ".codex" / "AGENTS.md" + + +@pytest.fixture +def codex_evolve_md(sandbox_home): + """Path to the sandboxed on-disk COPY of EVOLVE.md (~/.codex/evolve-lite/EVOLVE.md). + + Codex no longer inlines EVOLVE.md into AGENTS.md; it drops a copy here and + points AGENTS.md at it via a single greppable managed line.""" + return sandbox_home / ".codex" / "evolve-lite" / "EVOLVE.md" + + +@pytest.fixture +def bob_rules_file(sandbox_home): + """Path to the sandboxed Bob GLOBAL custom-instructions rules file. + + Bob loads every ``~/.bob/rules/*.md`` into every session, globally and + mode-independent, as the user's custom instructions. The lite installer + owns ``00-evolve-lite.md`` entirely (always global, never a project file).""" + return sandbox_home / ".bob" / "rules" / "00-evolve-lite.md" + + +@pytest.fixture +def bob_audit_script(sandbox_home): + """Path to the sandboxed Bob GLOBAL recall-audit script. + + EVOLVE.md tells the model to run ``python3 ~/.bob/evolve-lite/audit_recall.py`` + after recall, so the lite installer drops the script once at that global + absolute path (matching the always-global rules file).""" + return sandbox_home / ".bob" / "evolve-lite" / "audit_recall.py" + + +@pytest.fixture +def codex_audit_script(sandbox_home): + """Path to the sandboxed Codex GLOBAL recall-audit script. + + The injected ~/.codex/AGENTS.md block tells the model to run + ``python3 ~/.codex/evolve-lite/audit_recall.py`` after recall, so the + installer drops the script once at that global absolute path.""" + return sandbox_home / ".codex" / "evolve-lite" / "audit_recall.py" + + +@pytest.fixture +def claude_md_file(temp_project_dir): + """Path to the PER-REPO CLAUDE.md the Claude installer injects into. + + Claude installs the plugin via marketplace (copies nothing to disk) and does + NOT auto-load an ambient EVOLVE.md, so the installer injects a single native + ``@.evolve/EVOLVE.md`` import pointer line into the repo's CLAUDE.md.""" + return temp_project_dir / "CLAUDE.md" + + +@pytest.fixture +def claude_evolve_md(temp_project_dir): + """Path to the PER-REPO COPY of the thin EVOLVE.md (/.evolve/EVOLVE.md). + + The CLAUDE.md ``@``-import points here (path resolves relative to CLAUDE.md, + i.e. the repo root).""" + return temp_project_dir / ".evolve" / "EVOLVE.md" + + +@pytest.fixture +def claude_audit_script(sandbox_home): + """Path to the sandboxed Claude GLOBAL recall-audit script. + + The thin EVOLVE.md instructs running + ``~/.claude/evolve-lite/audit_recall.py`` after recall, so the installer + drops the script once at that global absolute path.""" + return sandbox_home / ".claude" / "evolve-lite" / "audit_recall.py" + + +@pytest.fixture +def claude_adapt_script(sandbox_home): + """Path to the sandboxed Claude GLOBAL adapt-memory adapter script. + + The adapt-memory skill invokes ``python3 ~/.claude/evolve-lite/adapt_memory.py`` + (a stable, version-proof path that can be permission-allowlisted), so the + installer ships the script to that global absolute path alongside the audit + script.""" + return sandbox_home / ".claude" / "evolve-lite" / "adapt_memory.py" + + +@pytest.fixture +def claude_adapt_lib(sandbox_home): + """Path to the sandboxed shared lib shipped beside the global adapt script. + + adapt_memory.py imports ``entity_io`` from the shared lib, resolving it by + walking up its own ancestors for ``lib/evolve-lite/entity_io.py``; the + installer ships the lib here so that walk succeeds from the global path.""" + return sandbox_home / ".claude" / "evolve-lite" / "lib" / "evolve-lite" / "entity_io.py" + + +@pytest.fixture +def claude_settings_file(temp_project_dir): + """Path to the PER-REPO project settings the Claude installer allowlists in. + + Claude plugins cannot self-declare permissions, so the installer pre-authorizes + the evolve scripts and ``.evolve/`` writes by merging allow-rules into the + repo's ``/.claude/settings.json`` (idempotent; removed on uninstall).""" + return temp_project_dir / ".claude" / "settings.json" + + @pytest.fixture def temp_project_dir(tmp_path): """ @@ -227,6 +356,31 @@ def assert_sentinel_block_exists(path: Path, slug: str): assert start_sentinel in content, f"Start sentinel '{start_sentinel}' not found in {path}" assert end_sentinel in content, f"End sentinel '{end_sentinel}' not found in {path}" + @staticmethod + def assert_sentinel_block_count(path: Path, slug: str, expected: int): + """Assert the file contains exactly `expected` REAL sentinel blocks for `slug`. + + A "real" block is a start marker anchored at the beginning of a line followed + by a matching end marker also anchored at the beginning of a line — the same + shape install.sh's inject_sentinel_block treats as a block. This deliberately + ignores a sentinel literal quoted mid-line inside unrelated user prose, so the + helper measures actual injected blocks (an idempotent installer leaves one). + """ + import re + + assert path.is_file(), f"File does not exist: {path}" + content = path.read_text() + start = f"# >>>evolve:{slug}<<<" + end = f"# << Path: return _plugin_root(manifest, "bob") / "commands" def test_one_command_per_skill(self, rendered_repo, build_module): - skill_names = sorted(d.name for d in build_module._discover_skills()) + # Bob commands are 1:1 with the skills bob actually renders, which + # excludes skills filtered by bob's `target_excludes` (the Claude-only + # `doctor` skill). Derive the expected set from _bob_command_targets() + # so this stays in sync with the exclusion logic. + expected = sorted(target_rel.stem.removeprefix("evolve-lite-") for _, target_rel, _ in build_module._bob_command_targets()) commands = sorted(p.stem.removeprefix("evolve-lite-") for p in self._bob_commands_dir(rendered_repo, build_module).glob("*.md")) - assert commands == skill_names, "bob commands are not 1:1 with skills" + assert commands == expected, "bob commands are not 1:1 with bob-rendered skills" + assert "doctor" not in commands, "Claude-only `doctor` skill must not produce a bob command" def test_command_body_references_dash_form(self, rendered_repo, build_module): for cmd_file in self._bob_commands_dir(rendered_repo, build_module).glob("*.md"): @@ -217,9 +222,13 @@ def test_command_body_references_dash_form(self, rendered_repo, build_module): assert f"evolve-lite:{skill}" not in body, f"{cmd_file.name} body should not use the colon form (bob resolves by folder)" def test_command_description_comes_from_skill_frontmatter(self, rendered_repo, build_module): - for skill_dir in build_module._discover_skills(): - description = build_module._read_skill_description(skill_dir) - cmd_file = self._bob_commands_dir(rendered_repo, build_module) / f"evolve-lite-{skill_dir.name}.md" + # Only skills bob actually renders get a command file; iterate the + # command targets (which honor bob's `target_excludes`) rather than + # every discovered skill, so the Claude-only `doctor` skill — which + # bob doesn't render — isn't expected to have a command. + for skill_src, target_rel, _ in build_module._bob_command_targets(): + description = build_module._read_skill_description(skill_src.parent) + cmd_file = self._bob_commands_dir(rendered_repo, build_module) / target_rel.name assert f"description: {description}\n" in cmd_file.read_text() def test_command_frontmatter_has_no_name_field(self, rendered_repo, build_module): diff --git a/tests/platform_integrations/test_claude.py b/tests/platform_integrations/test_claude.py index db253838..5a3f6166 100644 --- a/tests/platform_integrations/test_claude.py +++ b/tests/platform_integrations/test_claude.py @@ -1,17 +1,60 @@ """ Tests for the Claude platform integration installer behavior. -Claude install delegates entirely to the claude CLI via the marketplace workflow. -These tests control PATH to simulate the CLI being absent, which lets us verify -fallback output without needing the actual CLI installed. +Claude installs the plugin via marketplace (``claude plugin install``), which +delegates to the claude CLI and copies nothing to the repo. Separately — and +INDEPENDENTLY of whether the CLI is present — the installer performs a per-repo +file delivery so the thin EVOLVE.md actually reaches Claude's context every +session: + * a COPY of the thin EVOLVE.md at the PER-REPO path ``/.evolve/EVOLVE.md``, + * a SINGLE native ``@``-import pointer line (``@.evolve/EVOLVE.md``) injected + into the PER-REPO ``/CLAUDE.md`` (the line is its own uninstall handle), + * the self-contained recall-audit script at the GLOBAL (sandboxed) path + ``~/.claude/evolve-lite/audit_recall.py`` referenced by that EVOLVE.md. + +Some tests control PATH to simulate the CLI being absent, which lets us verify +the marketplace fallback output without needing the actual CLI installed; the +file delivery still runs in that case. """ +import json +import os +import subprocess +import sys +from pathlib import Path + import pytest # PATH that contains no claude binary — forces the "CLI not found" fallback path. _NO_CLAUDE_PATH = "/usr/bin:/bin" +# The single native CLAUDE.md import pointer line (its own uninstall handle). +IMPORT_LINE = "@.evolve/EVOLVE.md" +# A distinctive sentence from the thin EVOLVE.md body that must live in the copy. +EVOLVE_BODY_SENTENCE = "You already have native, self-directed memory" +# A distinctive string from the recall-audit script. +AUDIT_SCRIPT_SENTENCE = "Append a recall-audit row" + +# The exact set of allow-rules the installer merges into /.claude/settings.json. +EXPECTED_ALLOW_RULES = [ + "Bash(python3 ~/.claude/evolve-lite/adapt_memory.py:*)", + "Bash(python3 ~/.claude/evolve-lite/audit_recall.py:*)", + "Read(.evolve/**)", + "Edit(.evolve/**)", + "Write(.evolve/**)", +] + +_REPO_ROOT = Path(__file__).parent.parent.parent +# The rendered Claude adapt-memory skill — its invocation must point at the +# stable global path, not the version-unstable ${CLAUDE_PLUGIN_ROOT} dir. +_RENDERED_ADAPT_SKILL = _REPO_ROOT / "platform-integrations/claude/plugins/evolve-lite" / "skills/evolve-lite/adapt-memory/SKILL.md" + + +def _import_lines(text): + """Return the lines in `text` that carry the managed @-import marker.""" + return [ln for ln in text.splitlines() if IMPORT_LINE in ln] + @pytest.mark.platform_integrations class TestClaudeInstall: @@ -29,3 +72,288 @@ def test_cli_absent_exits_success(self, temp_project_dir, install_runner): result = install_runner.run("install", platform="claude", env={"PATH": _NO_CLAUDE_PATH}) assert result.returncode == 0 + + +@pytest.mark.platform_integrations +@pytest.mark.e2e +class TestClaudeFileDelivery: + """Test the per-repo EVOLVE.md import-pointer delivery (independent of the CLI).""" + + def test_install_delivers_pointer_evolve_md_and_audit_script( + self, + temp_project_dir, + install_runner, + file_assertions, + claude_md_file, + claude_evolve_md, + claude_audit_script, + ): + """Install injects one @-import line into CLAUDE.md, copies the thin EVOLVE.md, and installs the global audit script.""" + install_runner.run("install", platform="claude") + + # A SINGLE native @-import pointer line is injected into /CLAUDE.md. + file_assertions.assert_file_exists(claude_md_file) + import_lines = _import_lines(claude_md_file.read_text()) + assert len(import_lines) == 1, f"Expected exactly one import line, got {import_lines!r}" + assert import_lines[0].strip() == IMPORT_LINE + + # A COPY of the thin EVOLVE.md is dropped at /.evolve/EVOLVE.md. + file_assertions.assert_file_exists(claude_evolve_md) + assert EVOLVE_BODY_SENTENCE in claude_evolve_md.read_text() + + # The recall-audit script is installed at the GLOBAL sandboxed path. + file_assertions.assert_file_exists(claude_audit_script) + assert AUDIT_SCRIPT_SENTENCE in claude_audit_script.read_text() + + def test_install_is_idempotent_no_duplicate_pointer(self, temp_project_dir, install_runner, claude_md_file): + """Running install twice must not duplicate the @-import line in CLAUDE.md.""" + install_runner.run("install", platform="claude") + install_runner.run("install", platform="claude") + + import_lines = _import_lines(claude_md_file.read_text()) + assert len(import_lines) == 1, f"Expected exactly one import line after two installs, got {import_lines!r}" + + def test_install_preserves_existing_claude_md_content(self, temp_project_dir, install_runner, claude_md_file): + """Injecting the import line must not clobber pre-existing CLAUDE.md content.""" + claude_md_file.write_text("# Project rules\n\nExisting guidance line.\n") + install_runner.run("install", platform="claude") + + text = claude_md_file.read_text() + assert "Existing guidance line." in text + assert len(_import_lines(text)) == 1 + + def test_claude_dry_run_does_not_write_files( + self, + temp_project_dir, + install_runner, + claude_md_file, + claude_evolve_md, + claude_audit_script, + ): + """Dry-run should report actions without writing any files.""" + result = install_runner.run("install", platform="claude", dry_run=True) + + assert "DRY RUN" in result.stdout + assert not claude_md_file.exists() + assert not claude_evolve_md.exists() + assert not claude_audit_script.exists() + + def test_uninstall_removes_pointer_and_evolve_md_and_audit( + self, + temp_project_dir, + install_runner, + file_assertions, + claude_md_file, + claude_evolve_md, + claude_audit_script, + ): + """Uninstall removes the @-import line, the per-repo EVOLVE.md copy, and the global audit script.""" + install_runner.run("install", platform="claude") + file_assertions.assert_file_exists(claude_evolve_md) + file_assertions.assert_file_exists(claude_audit_script) + assert len(_import_lines(claude_md_file.read_text())) == 1 + + install_runner.run("uninstall", platform="claude") + + # No @-import reference remains in CLAUDE.md. + assert IMPORT_LINE not in claude_md_file.read_text() + # The placed per-repo EVOLVE.md and the global audit script are gone. + file_assertions.assert_file_not_exists(claude_evolve_md) + file_assertions.assert_file_not_exists(claude_audit_script) + + +@pytest.mark.platform_integrations +class TestClaudeRenderedAdaptSkill: + """The rendered adapt-memory skill must invoke the stable global path.""" + + def test_rendered_skill_uses_stable_path_not_plugin_root(self): + text = _RENDERED_ADAPT_SKILL.read_text() + # The version-unstable plugin-root form must be gone entirely. + assert "${CLAUDE_PLUGIN_ROOT}" not in text + # The stable, allowlistable global path must be the invocation target. + assert "python3 ~/.claude/evolve-lite/adapt_memory.py" in text + + +@pytest.mark.platform_integrations +@pytest.mark.e2e +class TestClaudeAdaptScriptDelivery: + """The adapt-memory adapter + its lib land at the stable global path.""" + + def test_install_ships_adapt_script_and_lib( + self, + install_runner, + file_assertions, + claude_adapt_script, + claude_adapt_lib, + ): + """adapt_memory.py and the shared lib (entity_io.py) land at the global path.""" + install_runner.run("install", platform="claude") + + file_assertions.assert_file_exists(claude_adapt_script) + # The shipped script invokes itself from the stable path (no plugin root). + assert "entity_io" in claude_adapt_script.read_text() + # The shared lib must ship alongside so adapt_memory's import-walk resolves. + file_assertions.assert_file_exists(claude_adapt_lib) + + def test_installed_adapt_script_is_runnable_from_stable_path( + self, + install_runner, + temp_project_dir, + sandbox_home, + claude_adapt_script, + ): + """Run the GLOBALLY-installed adapt_memory.py: its `entity_io` import must + resolve from ~/.claude/evolve-lite/lib/evolve-lite/ and it must write the + mirrored entity into the project's .evolve store.""" + install_runner.run("install", platform="claude") + + native = temp_project_dir / "native_memory.md" + native.write_text( + "---\nname: prefer-ripgrep\ndescription: use ripgrep over grep\n" + "metadata:\n type: feedback\n---\nAlways reach for ripgrep (rg).\n" + ) + evolve_dir = temp_project_dir / ".evolve" + + env = { + **os.environ, + "HOME": str(sandbox_home), + "USERPROFILE": str(sandbox_home), + "EVOLVE_DIR": str(evolve_dir), + } + env.pop("HOMEDRIVE", None) + env.pop("HOMEPATH", None) + result = subprocess.run( + [sys.executable, str(claude_adapt_script), str(native), "--type", "feedback", "--trigger", "when searching files"], + capture_output=True, + text=True, + cwd=str(temp_project_dir), + env=env, + check=False, + ) + + assert result.returncode == 0, f"adapt_memory.py failed: {result.stderr}" + entity = evolve_dir / "entities" / "feedback" / "prefer-ripgrep.md" + assert entity.is_file(), f"entity not written; stdout={result.stdout} stderr={result.stderr}" + + def test_uninstall_removes_adapt_script_and_lib( + self, + install_runner, + file_assertions, + claude_adapt_script, + claude_adapt_lib, + ): + """Uninstall removes the global adapter script and the shipped lib.""" + install_runner.run("install", platform="claude") + file_assertions.assert_file_exists(claude_adapt_script) + file_assertions.assert_file_exists(claude_adapt_lib) + + install_runner.run("uninstall", platform="claude") + + file_assertions.assert_file_not_exists(claude_adapt_script) + file_assertions.assert_file_not_exists(claude_adapt_lib) + # The whole global evolve-lite dir (scripts + lib) is gone when emptied. + file_assertions.assert_dir_not_exists(claude_adapt_script.parent) + + def test_dry_run_writes_no_adapt_artifacts( + self, + install_runner, + claude_adapt_script, + claude_adapt_lib, + ): + result = install_runner.run("install", platform="claude", dry_run=True) + assert "DRY RUN" in result.stdout + assert not claude_adapt_script.exists() + assert not claude_adapt_lib.exists() + + +def _allow(settings_path): + """The permissions.allow list from a settings.json (empty list if absent).""" + if not settings_path.is_file(): + return [] + return json.loads(settings_path.read_text()).get("permissions", {}).get("allow", []) + + +@pytest.mark.platform_integrations +@pytest.mark.e2e +class TestClaudePermissionAllowlist: + """Install pre-authorizes the evolve scripts + .evolve writes in project settings.""" + + def test_install_merges_all_allow_rules(self, install_runner, claude_settings_file): + install_runner.run("install", platform="claude") + allow = _allow(claude_settings_file) + for rule in EXPECTED_ALLOW_RULES: + assert rule in allow, f"missing allow-rule {rule!r}; got {allow!r}" + + def test_reinstall_does_not_duplicate_rules(self, install_runner, claude_settings_file): + install_runner.run("install", platform="claude") + install_runner.run("install", platform="claude") + allow = _allow(claude_settings_file) + for rule in EXPECTED_ALLOW_RULES: + assert allow.count(rule) == 1, f"rule {rule!r} duplicated: {allow!r}" + + def test_install_preserves_existing_rules_and_keys(self, install_runner, claude_settings_file): + """A pre-existing unrelated allow-rule and other settings keys survive.""" + claude_settings_file.parent.mkdir(parents=True, exist_ok=True) + claude_settings_file.write_text( + json.dumps( + { + "model": "opus", + "permissions": { + "allow": ["Bash(ls:*)"], + "deny": ["Bash(rm:*)"], + }, + }, + indent=2, + ) + + "\n" + ) + + install_runner.run("install", platform="claude") + + data = json.loads(claude_settings_file.read_text()) + # Unrelated top-level key preserved. + assert data["model"] == "opus" + # Unrelated permissions sibling preserved. + assert data["permissions"]["deny"] == ["Bash(rm:*)"] + allow = data["permissions"]["allow"] + # Pre-existing rule preserved and our rules merged in (no duplicates). + assert "Bash(ls:*)" in allow + for rule in EXPECTED_ALLOW_RULES: + assert allow.count(rule) == 1 + + def test_uninstall_removes_only_evolve_rules(self, install_runner, claude_settings_file): + """Uninstall drops exactly the 5 evolve rules, leaving user rules + keys.""" + claude_settings_file.parent.mkdir(parents=True, exist_ok=True) + claude_settings_file.write_text( + json.dumps( + {"model": "opus", "permissions": {"allow": ["Bash(ls:*)"], "deny": ["Bash(rm:*)"]}}, + indent=2, + ) + + "\n" + ) + install_runner.run("install", platform="claude") + install_runner.run("uninstall", platform="claude") + + data = json.loads(claude_settings_file.read_text()) + assert data["model"] == "opus" + assert data["permissions"]["deny"] == ["Bash(rm:*)"] + assert data["permissions"]["allow"] == ["Bash(ls:*)"] + for rule in EXPECTED_ALLOW_RULES: + assert rule not in data["permissions"]["allow"] + + def test_uninstall_cleans_up_empties(self, install_runner, claude_settings_file, file_assertions): + """When only evolve rules existed, uninstall removes the empty allow key, + the settings file, and the .claude dir (if otherwise empty).""" + install_runner.run("install", platform="claude") + file_assertions.assert_file_exists(claude_settings_file) + + install_runner.run("uninstall", platform="claude") + + # Settings file removed (it reduced to {}), and .claude/ dir removed. + file_assertions.assert_file_not_exists(claude_settings_file) + file_assertions.assert_dir_not_exists(claude_settings_file.parent) + + def test_dry_run_writes_no_settings(self, install_runner, claude_settings_file): + result = install_runner.run("install", platform="claude", dry_run=True) + assert "DRY RUN" in result.stdout + assert not claude_settings_file.exists() diff --git a/tests/platform_integrations/test_codex.py b/tests/platform_integrations/test_codex.py index 6815dd7b..2f5a7440 100644 --- a/tests/platform_integrations/test_codex.py +++ b/tests/platform_integrations/test_codex.py @@ -1,5 +1,17 @@ """ Tests for the Codex platform integration installer behavior. + +The Codex redesign no longer registers UserPromptSubmit/SessionStart hooks, and +no longer INLINES the full EVOLVE.md into ``~/.codex/AGENTS.md``. Instead the +installer: + * copies the plugin tree + upserts the marketplace entry, and + * drops a COPY of EVOLVE.md at the GLOBAL path + ``~/.codex/evolve-lite/EVOLVE.md``, and + * injects a SINGLE greppable pointer line (carrying the + ```` marker) into the GLOBAL (sandboxed) + ``~/.codex/AGENTS.md`` telling the agent to read that file, and + * drops the self-contained recall-audit script at the GLOBAL path + ``~/.codex/evolve-lite/audit_recall.py`` referenced by that file. """ import json @@ -8,8 +20,12 @@ EVOLVE_PLUGIN = "evolve-lite" -EVOLVE_HOOK_SNIPPET = "plugins/evolve-lite/skills/evolve-lite/recall/scripts/retrieve_entities.py" -EVOLVE_SYNC_SNIPPET = "plugins/evolve-lite/skills/evolve-lite/sync/scripts/sync.py" +MANAGED_MARKER = "" +EVOLVE_MD_REF = "~/.codex/evolve-lite/EVOLVE.md" +AUDIT_PATH_REF = "~/.codex/evolve-lite/audit_recall.py" +# A distinctive sentence from the body of EVOLVE.md that must live in the copied +# file but must NOT be inlined into AGENTS.md anymore. +EVOLVE_BODY_SENTENCE = "You have a persistent, file-based memory for the current project" def _marketplace_has_evolve_plugin(path): @@ -17,33 +33,9 @@ def _marketplace_has_evolve_plugin(path): return any(entry.get("name") == EVOLVE_PLUGIN for entry in data.get("plugins", [])) -def _hooks_have_evolve_recall(path): - data = json.loads(path.read_text()) - groups = data.get("hooks", {}).get("UserPromptSubmit", []) - for group in groups: - for hook in _iter_group_hooks(group): - if EVOLVE_HOOK_SNIPPET in hook.get("command", ""): - return group.get("matcher") == "" - return False - - -def _hooks_have_evolve_sync(path): - data = json.loads(path.read_text()) - groups = data.get("hooks", {}).get("SessionStart", []) - for group in groups: - for hook in _iter_group_hooks(group): - if EVOLVE_SYNC_SNIPPET in hook.get("command", ""): - return group.get("matcher") == "startup|resume" - return False - - -def _iter_group_hooks(group): - hooks = group.get("hooks", []) - if isinstance(hooks, list): - return hooks - if isinstance(hooks, dict): - return list(hooks.values()) - return [] +def _marker_lines(text): + """Return the list of lines in `text` that carry the managed marker.""" + return [ln for ln in text.splitlines() if MANAGED_MARKER in ln] @pytest.mark.platform_integrations @@ -51,9 +43,17 @@ def _iter_group_hooks(group): class TestCodexInstall: """Test the Codex install flow.""" - def test_install_creates_expected_files(self, temp_project_dir, install_runner, file_assertions): - """Installing Codex should create the plugin tree, marketplace entry, and hook.""" - result = install_runner.run("install", platform="codex") + def test_install_creates_expected_files( + self, + temp_project_dir, + install_runner, + file_assertions, + codex_agents_file, + codex_evolve_md, + codex_audit_script, + ): + """Installing Codex creates the plugin tree, marketplace entry, AGENTS.md pointer, EVOLVE.md copy, and audit script.""" + install_runner.run("install", platform="codex") plugin_dir = temp_project_dir / "plugins" / EVOLVE_PLUGIN file_assertions.assert_dir_exists(plugin_dir) @@ -69,185 +69,83 @@ def test_install_creates_expected_files(self, temp_project_dir, install_runner, file_assertions.assert_dir_exists(plugin_dir / "skills" / "evolve-lite" / "sync") file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "learn" / "scripts" / "save_entities.py") file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "recall" / "scripts" / "retrieve_entities.py") - file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "publish" / "scripts" / "publish.py") - file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "provenance" / "scripts" / "log_influence.py") - file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "save-trajectory" / "scripts" / "save_trajectory.py") - file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "subscribe" / "scripts" / "subscribe.py") - file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "unsubscribe" / "scripts" / "unsubscribe.py") - file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "sync" / "scripts" / "sync.py") file_assertions.assert_file_exists(plugin_dir / "lib" / "evolve-lite" / "entity_io.py") + # The recall-audit script ships in the plugin tree too, alongside the + # shared lib (lib/evolve-lite/). + file_assertions.assert_file_exists(plugin_dir / "lib" / "evolve-lite" / "audit_recall.py") marketplace_path = temp_project_dir / ".agents" / "plugins" / "marketplace.json" file_assertions.assert_valid_json(marketplace_path) assert _marketplace_has_evolve_plugin(marketplace_path), "Evolve plugin entry missing from marketplace.json" - hooks_path = temp_project_dir / ".codex" / "hooks.json" - file_assertions.assert_valid_json(hooks_path) - assert _hooks_have_evolve_recall(hooks_path), "Evolve recall hook missing from .codex/hooks.json" - assert _hooks_have_evolve_sync(hooks_path), "Evolve sync hook missing from .codex/hooks.json" - - hooks_data = json.loads(hooks_path.read_text()) - evolve_groups = [ - group - for group in hooks_data.get("hooks", {}).get("UserPromptSubmit", []) - if any(EVOLVE_HOOK_SNIPPET in hook.get("command", "") for hook in group.get("hooks", [])) - ] - assert evolve_groups[0]["matcher"] == "" - evolve_hook = next(hook for hook in evolve_groups[0]["hooks"] if EVOLVE_HOOK_SNIPPET in hook.get("command", "")) - expected_command = ( - "sh -lc '" - 'd="$PWD"; ' - "while :; do " - 'candidate="$d/plugins/evolve-lite/skills/evolve-lite/recall/scripts/retrieve_entities.py"; ' - 'if [ -f "$candidate" ]; then EVOLVE_DIR="$d/.evolve" exec python3 "$candidate"; fi; ' - '[ "$d" = "/" ] && break; ' - 'd="$(dirname "$d")"; ' - "done; " - "exit 1'" - ) - assert evolve_hook["command"] == expected_command - sync_groups = [ - group - for group in hooks_data.get("hooks", {}).get("SessionStart", []) - if any(EVOLVE_SYNC_SNIPPET in hook.get("command", "") for hook in group.get("hooks", [])) - ] - assert sync_groups[0]["matcher"] == "startup|resume" - sync_hook = next(hook for hook in sync_groups[0]["hooks"] if EVOLVE_SYNC_SNIPPET in hook.get("command", "")) - expected_sync_command = ( - "sh -lc '" - 'd="$PWD"; ' - "while :; do " - 'candidate="$d/plugins/evolve-lite/skills/evolve-lite/sync/scripts/sync.py"; ' - 'if [ -f "$candidate" ]; then EVOLVE_DIR="$d/.evolve" exec python3 "$candidate" --quiet --session-start; fi; ' - '[ "$d" = "/" ] && break; ' - 'd="$(dirname "$d")"; ' - "done; " - "exit 1'" - ) - assert sync_hook["command"] == expected_sync_command - assert "~/.codex/config.toml" in result.stdout - assert "codex_hooks = true" in result.stdout - assert "evolve-lite:recall" in result.stdout - - def test_install_preserves_matching_user_prompt_group(self, temp_project_dir, install_runner, codex_fixtures): - """Installing should merge the evolve hook into an existing matching list-based group.""" - hooks_path = codex_fixtures.create_existing_hooks_with_shared_evolve_group(temp_project_dir) - - install_runner.run("install", platform="codex") - - hooks_data = json.loads(hooks_path.read_text()) - prompt_groups = hooks_data["hooks"]["UserPromptSubmit"] - assert len(prompt_groups) == 1 - - merged_group = prompt_groups[0] - assert merged_group["matcher"] == "src/.*" - - custom_hooks = [ - hook for hook in _iter_group_hooks(merged_group) if hook.get("command") == "python3 ~/.codex/hooks/custom_prompt_memory.py" - ] - assert len(custom_hooks) == 1, "Custom prompt hook was removed from the shared group" - - evolve_hooks = [hook for hook in _iter_group_hooks(merged_group) if EVOLVE_HOOK_SNIPPET in hook.get("command", "")] - assert len(evolve_hooks) == 1, "Evolve hook was duplicated or removed from the shared group" - assert evolve_hooks[0]["statusMessage"] == "Loading Evolve guidance" - assert evolve_hooks[0]["delayMs"] == 250 - - def test_install_updates_dict_based_matching_group(self, temp_project_dir, install_runner, codex_fixtures): - """Installing should update a dict-based matching group without adding a replacement group.""" - hooks_path = codex_fixtures.create_existing_hooks_with_dict_evolve_group(temp_project_dir) - - install_runner.run("install", platform="codex") - - hooks_data = json.loads(hooks_path.read_text()) - prompt_groups = hooks_data["hooks"]["UserPromptSubmit"] - assert len(prompt_groups) == 1 - - merged_group = prompt_groups[0] - assert merged_group["matcher"] == "src/.*" - assert isinstance(merged_group["hooks"], dict) - assert "memory" in merged_group["hooks"] - assert "evolve-lite" in merged_group["hooks"] - - evolve_hook = merged_group["hooks"]["evolve-lite"] - assert EVOLVE_HOOK_SNIPPET in evolve_hook["command"] - assert evolve_hook["statusMessage"] == "Loading Evolve guidance" - assert evolve_hook["delayMs"] == 250 - - def test_install_adds_session_start_sync_hook(self, temp_project_dir, install_runner, codex_fixtures): - """Installing should preserve user SessionStart hooks and add the sync hook.""" - hooks_path = codex_fixtures.create_existing_hooks(temp_project_dir) - - install_runner.run("install", platform="codex") - - hooks_data = json.loads(hooks_path.read_text()) - session_groups = hooks_data["hooks"]["SessionStart"] - assert len(session_groups) == 2 - assert any( - any(hook.get("command") == "python3 ~/.codex/hooks/session_start.py" for hook in _iter_group_hooks(group)) - for group in session_groups - ) - assert any(any(EVOLVE_SYNC_SNIPPET in hook.get("command", "") for hook in _iter_group_hooks(group)) for group in session_groups) - - def test_uninstall_removes_only_evolve_hook_from_matching_group(self, temp_project_dir, install_runner, codex_fixtures): - """Uninstalling should remove only the evolve hook entry and preserve the shared group.""" - hooks_path = codex_fixtures.create_existing_hooks_with_dict_evolve_group(temp_project_dir) - - install_runner.run("uninstall", platform="codex") - - hooks_data = json.loads(hooks_path.read_text()) - prompt_groups = hooks_data["hooks"]["UserPromptSubmit"] - assert len(prompt_groups) == 1 - - remaining_group = prompt_groups[0] - assert remaining_group["matcher"] == "src/.*" - assert isinstance(remaining_group["hooks"], dict) - assert "memory" in remaining_group["hooks"] - assert "evolve-lite" not in remaining_group["hooks"] - assert all(EVOLVE_HOOK_SNIPPET not in hook.get("command", "") for hook in _iter_group_hooks(remaining_group)) - - def test_uninstall_removes_session_start_sync_hook_only(self, temp_project_dir, install_runner, codex_fixtures): - """Uninstalling should remove the Evolve SessionStart hook and preserve user hooks.""" - hooks_path = codex_fixtures.create_existing_hooks(temp_project_dir) - install_runner.run("install", platform="codex") - - install_runner.run("uninstall", platform="codex") - - hooks_data = json.loads(hooks_path.read_text()) - session_groups = hooks_data["hooks"]["SessionStart"] - assert len(session_groups) == 1 - assert any(hook.get("command") == "python3 ~/.codex/hooks/session_start.py" for hook in _iter_group_hooks(session_groups[0])) - assert all(EVOLVE_SYNC_SNIPPET not in hook.get("command", "") for group in session_groups for hook in _iter_group_hooks(group)) - - def test_uninstall_prunes_evolve_only_hook_groups(self, temp_project_dir, install_runner, file_assertions): - """Uninstalling after a clean install should remove empty Evolve-only hook groups.""" - install_runner.run("install", platform="codex") - - hooks_path = temp_project_dir / ".codex" / "hooks.json" - file_assertions.assert_valid_json(hooks_path) - - install_runner.run("uninstall", platform="codex") - - hooks_data = json.loads(hooks_path.read_text()) - hooks = hooks_data.get("hooks", {}) - assert "UserPromptSubmit" not in hooks - assert "SessionStart" not in hooks - - def test_codex_dry_run_does_not_write_files(self, temp_project_dir, install_runner): + # A SINGLE greppable pointer line is injected into the GLOBAL ~/.codex/AGENTS.md. + file_assertions.assert_file_exists(codex_agents_file) + agents_text = codex_agents_file.read_text() + marker_lines = _marker_lines(agents_text) + assert len(marker_lines) == 1, f"Expected exactly one managed line, got {marker_lines!r}" + pointer_line = marker_lines[0] + # The pointer references the on-disk EVOLVE.md copy. + assert EVOLVE_MD_REF in pointer_line + # AGENTS.md must NOT inline the full EVOLVE.md body anymore. + assert EVOLVE_BODY_SENTENCE not in agents_text + # The audit-script path is no longer inlined into AGENTS.md (it lives in EVOLVE.md). + assert AUDIT_PATH_REF not in agents_text + + # A COPY of EVOLVE.md is dropped on disk and DOES contain the full body. + file_assertions.assert_file_exists(codex_evolve_md) + evolve_md_text = codex_evolve_md.read_text() + assert EVOLVE_BODY_SENTENCE in evolve_md_text + # EVOLVE.md is what tells the model to run the recall-audit script. + assert AUDIT_PATH_REF in evolve_md_text + + # The recall-audit script is installed alongside EVOLVE.md and is self-contained. + file_assertions.assert_file_exists(codex_audit_script) + assert codex_audit_script.parent == codex_evolve_md.parent + assert "Append a recall-audit row" in codex_audit_script.read_text() + + def test_codex_dry_run_does_not_write_files( + self, temp_project_dir, install_runner, codex_agents_file, codex_evolve_md, codex_audit_script + ): """Dry-run should report actions without writing files.""" result = install_runner.run("install", platform="codex", dry_run=True) assert "DRY RUN" in result.stdout assert not (temp_project_dir / "plugins" / EVOLVE_PLUGIN).exists() assert not (temp_project_dir / ".agents" / "plugins" / "marketplace.json").exists() - assert not (temp_project_dir / ".codex" / "hooks.json").exists() + assert not codex_agents_file.exists() + assert not codex_evolve_md.exists() + assert not codex_audit_script.exists() + + def test_uninstall_removes_pointer_and_files( + self, + temp_project_dir, + install_runner, + file_assertions, + codex_agents_file, + codex_evolve_md, + codex_audit_script, + ): + """Uninstall removes the AGENTS.md pointer line, the EVOLVE.md copy, and the audit script (and the empty dir).""" + install_runner.run("install", platform="codex") + file_assertions.assert_file_exists(codex_evolve_md) + file_assertions.assert_file_exists(codex_audit_script) + assert len(_marker_lines(codex_agents_file.read_text())) == 1 + + install_runner.run("uninstall", platform="codex") + + assert _marker_lines(codex_agents_file.read_text()) == [] + file_assertions.assert_file_not_exists(codex_evolve_md) + file_assertions.assert_file_not_exists(codex_audit_script) + file_assertions.assert_dir_not_exists(codex_evolve_md.parent) def test_status_reports_codex_installation(self, temp_project_dir, install_runner): - """Status should show the Codex installation state.""" + """Status should show the Codex installation state under the new contract.""" install_runner.run("install", platform="codex") result = install_runner.run("status") assert "Codex:" in result.stdout assert "plugins/evolve-lite" in result.stdout assert "marketplace.json entry" in result.stdout - assert ".codex/hooks.json entry" in result.stdout - assert "SessionStart sync hook" in result.stdout + assert "~/.codex/AGENTS.md pointer" in result.stdout + assert "EVOLVE.md" in result.stdout + assert "audit_recall.py" in result.stdout diff --git a/tests/platform_integrations/test_doctor.py b/tests/platform_integrations/test_doctor.py new file mode 100644 index 00000000..e468bde2 --- /dev/null +++ b/tests/platform_integrations/test_doctor.py @@ -0,0 +1,144 @@ +"""Unit tests for the evolve doctor diagnostic (doctor.py). + +The doctor checks whether Claude's ``@.evolve/EVOLVE.md`` import is actually +reaching sessions, by extracting the canary token from the installed EVOLVE.md +and grepping recent Claude project transcripts for it. + +We exercise the importable ``diagnose(root, home)`` core directly. doctor.py +resolves the shared lib by parent-walking to ``lib/evolve-lite/`` — that only +works in the rendered tree, so we import the RENDERED Claude copy (same +constraint adapt_memory.py has). +""" + +import importlib.util +import re +from pathlib import Path + +import pytest + +pytestmark = pytest.mark.platform_integrations + +_DOCTOR = ( + Path(__file__).parent.parent.parent / "platform-integrations/claude/plugins/evolve-lite" / "skills/evolve-lite/doctor/scripts/doctor.py" +) + +# The canary token the installed EVOLVE.md carries. Kept here ONLY for fixture +# construction; doctor.py itself extracts it from the file via regex. +_CANARY = "EVOLVE_IMPORT_CANARY_v1" +_IMPORT_LINE = "@.evolve/EVOLVE.md" + + +def _load_doctor(): + spec = importlib.util.spec_from_file_location("evolve_doctor", _DOCTOR) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod + + +@pytest.fixture(autouse=True) +def _clear_evolve_dir(monkeypatch): + """doctor.py honors $EVOLVE_DIR; clear it so tests resolve .evolve under the + temp project root, not a developer's ambient override.""" + monkeypatch.delenv("EVOLVE_DIR", raising=False) + + +@pytest.fixture +def doctor(): + return _load_doctor() + + +def _make_project(root, *, claude_md=True, evolve_md=True, canary=True): + """Build a fake project tree under `root`.""" + root.mkdir(parents=True, exist_ok=True) + if claude_md: + (root / "CLAUDE.md").write_text(f"# Project rules\n\n{_IMPORT_LINE}\n", encoding="utf-8") + else: + (root / "CLAUDE.md").write_text("# Project rules\n", encoding="utf-8") + if evolve_md: + evolve_dir = root / ".evolve" + evolve_dir.mkdir(parents=True, exist_ok=True) + body = "# Evolve\n" + if canary: + body = f"\n" + body + (evolve_dir / "EVOLVE.md").write_text(body, encoding="utf-8") + + +def _slug(root): + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def _write_transcript(home, root, *, with_canary): + proj = home / ".claude" / "projects" / _slug(root) + proj.mkdir(parents=True, exist_ok=True) + content = '{"role":"user","content":"hello"}\n' + if with_canary: + content += '{"role":"system","content":"' + _CANARY + '"}\n' + (proj / "session.jsonl").write_text(content, encoding="utf-8") + + +def test_ok_when_canary_in_transcript(doctor, tmp_path): + root = tmp_path / "proj" + home = tmp_path / "home" + home.mkdir() + _make_project(root) + _write_transcript(home, root, with_canary=True) + + code, message = doctor.diagnose(root, home) + assert code == "OK", message + + +def test_import_disabled_when_transcript_lacks_canary(doctor, tmp_path): + root = tmp_path / "proj" + home = tmp_path / "home" + home.mkdir() + _make_project(root) + _write_transcript(home, root, with_canary=False) + + code, message = doctor.diagnose(root, home) + assert code == "IMPORT_DISABLED", message + # The exact project root must appear in the remediation. + assert str(root) in message + + +def test_not_installed_when_no_import_line(doctor, tmp_path): + root = tmp_path / "proj" + home = tmp_path / "home" + home.mkdir() + _make_project(root, claude_md=False) + _write_transcript(home, root, with_canary=True) + + code, _ = doctor.diagnose(root, home) + assert code == "NOT_INSTALLED" + + +def test_not_installed_when_evolve_md_missing(doctor, tmp_path): + root = tmp_path / "proj" + home = tmp_path / "home" + home.mkdir() + _make_project(root, evolve_md=False) + _write_transcript(home, root, with_canary=True) + + code, _ = doctor.diagnose(root, home) + assert code == "NOT_INSTALLED" + + +def test_stale_evolve_md_when_no_canary(doctor, tmp_path): + root = tmp_path / "proj" + home = tmp_path / "home" + home.mkdir() + _make_project(root, canary=False) + _write_transcript(home, root, with_canary=False) + + code, _ = doctor.diagnose(root, home) + assert code == "STALE_EVOLVE_MD" + + +def test_unknown_when_no_transcripts(doctor, tmp_path): + root = tmp_path / "proj" + home = tmp_path / "home" + home.mkdir() + _make_project(root) + # No transcript written. + + code, _ = doctor.diagnose(root, home) + assert code == "UNKNOWN" diff --git a/tests/platform_integrations/test_dry_run.py b/tests/platform_integrations/test_dry_run.py index 3c866fad..a7fb0f6e 100644 --- a/tests/platform_integrations/test_dry_run.py +++ b/tests/platform_integrations/test_dry_run.py @@ -27,7 +27,12 @@ def test_all_platforms_dry_run_creates_no_files(self, temp_project_dir, install_ assert not (temp_project_dir / ".codex").exists() def test_bob_dry_run_mentions_expected_operations(self, temp_project_dir, install_runner, platform_integrations_dir): - """Bob dry-run output should name the skills it would copy.""" + """Bob lite dry-run should name the skills it would copy and the always-on instruction wiring. + + Lite no longer merges custom_modes.yaml, copies EVOLVE.md into .bob/, + or injects an AGENTS.md import. It writes the always-on instructions to + Bob's GLOBAL rules dir (~/.bob/rules/00-evolve-lite.md). + """ result = install_runner.run("install", platform="bob", mode="lite", dry_run=True) assert result.returncode == 0 @@ -36,7 +41,14 @@ def test_bob_dry_run_mentions_expected_operations(self, temp_project_dir, instal for skill_dir in skills_src.iterdir(): if skill_dir.is_dir(): assert skill_dir.name in result.stdout, f"Expected skill '{skill_dir.name}' to appear in dry-run output" - assert "custom_modes.yaml" in result.stdout + # New contract: the global rules file is written; no AGENTS.md / EVOLVE.md + # copy / sentinel-block wiring remains. + assert "00-evolve-lite.md" in result.stdout + assert "AGENTS.md" not in result.stdout + assert "inject sentinel block" not in result.stdout + assert "EVOLVE.md" not in result.stdout + # Lite no longer touches custom_modes.yaml. + assert "custom_modes.yaml" not in result.stdout assert not (temp_project_dir / ".bob").exists() def test_codex_dry_run_creates_no_files(self, temp_project_dir, install_runner): diff --git a/tests/platform_integrations/test_end_to_end_claude.py b/tests/platform_integrations/test_end_to_end_claude.py new file mode 100644 index 00000000..30f527a4 --- /dev/null +++ b/tests/platform_integrations/test_end_to_end_claude.py @@ -0,0 +1,289 @@ +"""End-to-end data-flow test for the rendered Claude evolve-lite scripts. + +This is the ONE integration test that proves the correlation ids line up across +the whole chain on Claude — the integration that was broken in the pre-redesign +world (native transcript path vs. entity id) and the reason the hookless redesign +exists. It drives the REAL rendered Claude scripts as subprocesses, in sequence, +with nothing mocked in the data flow: + + adapt_memory.py -> mirrors a native memory into the evolve store, emitting + the entity id ``feedback/prefer-ripgrep``. + audit_recall.py -> records a ``recall`` row keyed by that exact entity id + and the host session id. + provenance.py -> reads the recall row, resolves the mirrored entity AND + the NATIVE Claude transcript, and emits exactly one + candidate whose ids line up end to end. + provenance.py -> records a ``followed`` verdict, then dedups the pair. + +Lib resolution (``lib/evolve-lite/entity_io.py``) only works in the rendered +tree, so we point at the rendered Claude copies under ``platform-integrations/``. + +The scripts are driven as real subprocesses (closest to actual agent usage); +nothing in the data flow is mocked. +""" + +import json +import os +import re +import subprocess +import sys +from pathlib import Path + +import pytest + +pytestmark = [pytest.mark.platform_integrations] + +_REPO_ROOT = Path(__file__).parent.parent.parent +_PLUGIN = _REPO_ROOT / "platform-integrations/claude/plugins/evolve-lite" +ADAPT_SCRIPT = _PLUGIN / "skills/evolve-lite/adapt-memory/scripts/adapt_memory.py" +AUDIT_SCRIPT = _PLUGIN / "lib/evolve-lite/audit_recall.py" +PROVENANCE_SCRIPT = _PLUGIN / "skills/evolve-lite/provenance/scripts/provenance.py" + +SID = "claude-e2e-session-0001" + +NATIVE_MEMORY = """\ +--- +name: prefer-ripgrep +description: use ripgrep over grep +metadata: + type: feedback +--- +Always reach for ripgrep (rg) instead of grep. +""" + + +def _claude_slug(root: Path) -> str: + """Mirror provenance.py / doctor.py slugging: non-alphanumerics -> '-'.""" + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def _run(script: Path, *args, evolve_dir: Path, home: Path, cwd: Path, stdin=None, sid=None): + """Run a rendered Claude script as a real subprocess in the sandbox. + + Every host path is sandboxed: ``$EVOLVE_DIR`` points at the temp store, + ``$HOME``/``$USERPROFILE`` at a sandboxed home, cwd at the temp project root, + and ``$CLAUDE_CODE_SESSION_ID`` at a known SID when supplied. + """ + env = {**os.environ} + env["EVOLVE_DIR"] = str(evolve_dir) + env["HOME"] = str(home) + env["USERPROFILE"] = str(home) + env.pop("HOMEDRIVE", None) + env.pop("HOMEPATH", None) + if sid is not None: + env["CLAUDE_CODE_SESSION_ID"] = sid + else: + env.pop("CLAUDE_CODE_SESSION_ID", None) + return subprocess.run( + [sys.executable, str(script), *args], + input=stdin, + capture_output=True, + text=True, + cwd=str(cwd), + env=env, + check=False, + ) + + +def _parse_jsonl(text: str): + return [json.loads(line) for line in text.splitlines() if line.strip()] + + +def _read_audit(evolve_dir: Path): + path = evolve_dir / "audit.log" + if not path.is_file(): + return [] + return _parse_jsonl(path.read_text(encoding="utf-8")) + + +@pytest.fixture +def sandbox(tmp_path, sandbox_home): + """Build the sandbox dirs the chain needs and return the salient paths. + + ``sandbox_home`` (autouse) already redirects ``$HOME``; we reuse it as the + home that holds the native Claude transcript tree. The project root lives + under tmp_path with its own ``.evolve`` store, kept separate from HOME so + the native-transcript slug (derived from the project root) is exercised for + real. + """ + project_root = tmp_path / "proj" + project_root.mkdir() + evolve_dir = project_root / ".evolve" + evolve_dir.mkdir() + return { + "home": sandbox_home, + "project_root": project_root, + "evolve_dir": evolve_dir, + } + + +def test_chain_closes_ids_line_up(sandbox): + """The whole chain closes: the entity adapt() creates is the entity audit() + records is the entity provenance() resolves against the native transcript. + + Steps (each runs the real rendered script as a subprocess): + 1. save — write the native Claude memory file. + 2. adapt — mirror it; assert entities/feedback/prefer-ripgrep.md exists and + the printed entity id is ``feedback/prefer-ripgrep``. + 3. audit — record a recall row for that exact entity id under the SID. + 4. native transcript — drop ~/.claude/projects//.jsonl. + 5. candidates — assert EXACTLY ONE candidate whose entity_id == + ``feedback/prefer-ripgrep``, whose excerpt holds the mirrored + content, whose trajectory_path is the native transcript, with + NO ``missing`` field (entity + trajectory both resolved). This + is the id-alignment assertion. + 6. record + dedup — pipe a ``followed`` verdict; assert an influence row is + appended; re-run candidates and assert it's now empty. + """ + home = sandbox["home"] + project_root = sandbox["project_root"] + evolve_dir = sandbox["evolve_dir"] + + # --- 1. save: native memory file (Claude format) ------------------------ + native_file = project_root / "native_memory.md" + native_file.write_text(NATIVE_MEMORY, encoding="utf-8") + + # --- 2. adapt: mirror native memory into the evolve store --------------- + adapt = _run( + ADAPT_SCRIPT, + str(native_file), + "--type", + "feedback", + "--trigger", + "when searching code, prefer ripgrep", + evolve_dir=evolve_dir, + home=home, + cwd=project_root, + ) + assert adapt.returncode == 0, adapt.stderr + + mirrored = evolve_dir / "entities" / "feedback" / "prefer-ripgrep.md" + assert mirrored.is_file(), f"adapt did not mirror the entity: {adapt.stdout}\n{adapt.stderr}" + + # Capture the entity id from adapt's stdout ("Entity id: "). + id_lines = [ln for ln in adapt.stdout.splitlines() if ln.startswith("Entity id:")] + assert id_lines, f"adapt did not print an entity id:\n{adapt.stdout}" + adapted_entity_id = id_lines[0].split("Entity id:", 1)[1].strip() + assert adapted_entity_id == "feedback/prefer-ripgrep" + + # --- 3. audit: record a recall row for that exact entity id ------------- + audit = _run( + AUDIT_SCRIPT, + adapted_entity_id, # exactly as EVOLVE.md instructs the agent to pass it + evolve_dir=evolve_dir, + home=home, + cwd=project_root, + sid=SID, + ) + assert audit.returncode == 0, audit.stderr + + recall_rows = [r for r in _read_audit(evolve_dir) if r.get("event") == "recall"] + assert len(recall_rows) == 1, _read_audit(evolve_dir) + assert recall_rows[0]["session_id"] == SID + assert recall_rows[0]["entities"] == ["feedback/prefer-ripgrep"] + + # --- 4. native transcript fixture --------------------------------------- + slug = _claude_slug(project_root) + native_transcript = home / ".claude" / "projects" / slug / f"{SID}.jsonl" + native_transcript.parent.mkdir(parents=True) + native_transcript.write_text( + '{"type":"user","message":{"role":"user","content":"search the repo for TODOs"}}\n' + '{"type":"assistant","message":{"role":"assistant","content":"Using rg to search."}}\n', + encoding="utf-8", + ) + + # --- 5. candidates: the id-alignment assertion -------------------------- + cand_result = _run( + PROVENANCE_SCRIPT, + "candidates", + evolve_dir=evolve_dir, + home=home, + cwd=project_root, + ) + assert cand_result.returncode == 0, cand_result.stderr + candidates = _parse_jsonl(cand_result.stdout) + assert len(candidates) == 1, f"expected exactly one candidate, got: {candidates}" + cand = candidates[0] + + # KEY ASSERTION: the entity adapt() created == the entity audit() recorded + # == the entity provenance() resolved, and the native transcript located by + # the resolved project-root slug lines up with the audited session id. + assert cand["session_id"] == SID + assert cand["entity_id"] == adapted_entity_id == "feedback/prefer-ripgrep" + assert "Always reach for ripgrep (rg) instead of grep." in cand["entity_excerpt"] + assert cand["trajectory_path"] == str(native_transcript) + assert "rg to search" in cand["trajectory_excerpt"] + assert "missing" not in cand, f"chain did not fully resolve: {cand}" + + # --- 6. record a verdict, then assert dedup ----------------------------- + verdict = { + "session_id": SID, + "entity": adapted_entity_id, + "verdict": "followed", + "evidence": "Assistant used rg (ripgrep) to search the repo.", + } + record = _run( + PROVENANCE_SCRIPT, + "record", + evolve_dir=evolve_dir, + home=home, + cwd=project_root, + stdin=json.dumps(verdict), + ) + assert record.returncode == 0, record.stderr + + influence_rows = [r for r in _read_audit(evolve_dir) if r.get("event") == "influence"] + assert len(influence_rows) == 1, _read_audit(evolve_dir) + assert influence_rows[0]["session_id"] == SID + assert influence_rows[0]["entity"] == "feedback/prefer-ripgrep" + assert influence_rows[0]["verdict"] == "followed" + + # Re-run candidates: the judged pair is deduped -> nothing left. + cand_again = _run( + PROVENANCE_SCRIPT, + "candidates", + evolve_dir=evolve_dir, + home=home, + cwd=project_root, + ) + assert cand_again.returncode == 0, cand_again.stderr + assert _parse_jsonl(cand_again.stdout) == [], cand_again.stdout + + +def test_candidates_surface_gaps_when_nothing_lines_up(sandbox): + """Negative/robustness: when the audited entity id was NEVER mirrored AND no + transcript exists, the candidate is still emitted with ``missing`` listing + BOTH ``entity`` and ``trajectory`` — the chain surfaces gaps instead of + silently dropping them. + """ + home = sandbox["home"] + project_root = sandbox["project_root"] + evolve_dir = sandbox["evolve_dir"] + + # Record a recall for an entity id that was never adapted/mirrored, with no + # native transcript on disk for the session. + audit = _run( + AUDIT_SCRIPT, + "feedback/does-not-exist", + evolve_dir=evolve_dir, + home=home, + cwd=project_root, + sid="ghost-session-0002", + ) + assert audit.returncode == 0, audit.stderr + + cand_result = _run( + PROVENANCE_SCRIPT, + "candidates", + evolve_dir=evolve_dir, + home=home, + cwd=project_root, + ) + assert cand_result.returncode == 0, cand_result.stderr + candidates = _parse_jsonl(cand_result.stdout) + assert len(candidates) == 1, candidates + cand = candidates[0] + assert cand["entity_id"] == "feedback/does-not-exist" + assert cand["entity_excerpt"] is None + assert cand["trajectory_path"] is None + assert set(cand["missing"]) == {"entity", "trajectory"} diff --git a/tests/platform_integrations/test_entity_io_core.py b/tests/platform_integrations/test_entity_io_core.py index 30a68db3..2bf467c6 100644 --- a/tests/platform_integrations/test_entity_io_core.py +++ b/tests/platform_integrations/test_entity_io_core.py @@ -4,20 +4,32 @@ covers the serialization and I/O functions needed by the sharing feature. """ +import importlib.util import sys from pathlib import Path import pytest -sys.path.insert( - 0, - str(Path(__file__).parent.parent.parent / "platform-integrations/claude/plugins/evolve-lite/lib/evolve-lite"), -) -import entity_io +_CLAUDE_PLUGIN = Path(__file__).parent.parent.parent / "platform-integrations/claude/plugins/evolve-lite" +sys.path.insert(0, str(_CLAUDE_PLUGIN / "lib/evolve-lite")) +import entity_io # noqa: E402 pytestmark = [pytest.mark.platform_integrations, pytest.mark.unit] +def _load_adapt_memory(): + """Load the rendered Claude adapt_memory.py as a module. + + Its lib resolution only works in the rendered tree (it walks up to find + ``lib/evolve-lite/entity_io.py``), so we import the rendered copy. + """ + path = _CLAUDE_PLUGIN / "skills/evolve-lite/adapt-memory/scripts/adapt_memory.py" + spec = importlib.util.spec_from_file_location("adapt_memory_rendered", path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + class TestSlugify: def test_lowercases_and_replaces_spaces(self): assert entity_io.slugify("Hello World") == "hello-world" @@ -120,10 +132,22 @@ def test_preference_type_goes_in_preference_dir(self, tmp_path): path = entity_io.write_entity_file(tmp_path, entity) assert path.parent == tmp_path / "preference" - def test_invalid_type_defaults_to_guideline(self, tmp_path): - entity = {"type": "badtype", "content": "Some content."} + def test_arbitrary_type_goes_in_its_own_dir(self, tmp_path): + entity = {"type": "feedback", "content": "Some content."} path = entity_io.write_entity_file(tmp_path, entity) - assert path.parent == tmp_path / "guideline" + assert path.parent == tmp_path / "feedback" + + def test_type_is_sanitized_for_filesystem_safety(self, tmp_path): + entity = {"type": "User Preference!", "content": "Some content."} + path = entity_io.write_entity_file(tmp_path, entity) + assert path.parent == tmp_path / "user-preference" + assert entity["type"] == "user-preference" + + def test_empty_or_invalid_type_defaults_to_guideline(self, tmp_path): + for bad_type in ("", " ", "!!!"): + entity = {"type": bad_type, "content": "Some content."} + path = entity_io.write_entity_file(tmp_path, entity) + assert path.parent == tmp_path / "guideline" def test_written_file_is_readable(self, tmp_path): entity = {"type": "guideline", "content": "Write clear commit messages."} @@ -139,6 +163,81 @@ def test_no_collision_on_duplicate_slug(self, tmp_path): assert path1.exists() assert path2.exists() + def test_explicit_filename_default_mode_still_suffixes_on_collision(self, tmp_path): + # Default (overwrite=False) behavior is unchanged even with an + # explicit filename: a second write gets a -2 suffix. + entity = {"type": "feedback", "content": "First."} + path1 = entity_io.write_entity_file(tmp_path, entity, filename="my-slug") + path2 = entity_io.write_entity_file(tmp_path, {"type": "feedback", "content": "Second."}, filename="my-slug") + assert path1 == tmp_path / "feedback" / "my-slug.md" + assert path2 == tmp_path / "feedback" / "my-slug-2.md" + + def test_overwrite_mode_writes_deterministic_path_in_place(self, tmp_path): + path1 = entity_io.write_entity_file(tmp_path, {"type": "feedback", "content": "First."}, filename="my-slug", overwrite=True) + path2 = entity_io.write_entity_file(tmp_path, {"type": "feedback", "content": "Second."}, filename="my-slug", overwrite=True) + assert path1 == path2 == tmp_path / "feedback" / "my-slug.md" + assert "Second." in path2.read_text() + assert not (tmp_path / "feedback" / "my-slug-2.md").exists() + + +class TestAdaptMemory: + """Integration tests against the rendered Claude adapt_memory.py.""" + + def _write_native(self, tmp_path, name, mem_type, body, description=None): + lines = ["---"] + if name is not None: + lines.append(f"name: {name}") + if description is not None: + lines.append(f"description: {description}") + lines += ["metadata:", f" type: {mem_type}", "---", "", body, ""] + native = tmp_path / "memory.md" + native.write_text("\n".join(lines), encoding="utf-8") + return native + + def _run(self, adapt, native, mem_type, trigger, monkeypatch, tmp_path): + monkeypatch.chdir(tmp_path) + monkeypatch.setattr(sys, "argv", ["adapt_memory.py", str(native), "--type", mem_type, "--trigger", trigger]) + adapt.main() + + def test_id_is_type_slash_name_and_native_path_stamped(self, tmp_path, monkeypatch, capsys): + adapt = _load_adapt_memory() + native = self._write_native(tmp_path, "my-fact", "feedback", "Always rebase.", "A short hook") + self._run(adapt, native, "feedback", "when rebasing", monkeypatch, tmp_path) + + out = capsys.readouterr().out + assert "Entity id: feedback/my-fact" in out + + entity_file = tmp_path / ".evolve" / "entities" / "feedback" / "my-fact.md" + assert entity_file.exists() + parsed = entity_io.markdown_to_entity(entity_file) + assert parsed["native_path"] == str(native) + assert parsed["source"] == "native-memory" + assert parsed["type"] == "feedback" + + def test_deterministic_overwrite_on_same_name_and_type(self, tmp_path, monkeypatch, capsys): + adapt = _load_adapt_memory() + native = self._write_native(tmp_path, "my-fact", "feedback", "First version.") + self._run(adapt, native, "feedback", "trig", monkeypatch, tmp_path) + capsys.readouterr() + + native.write_text("---\nname: my-fact\nmetadata:\n type: feedback\n---\n\nSecond version.\n", encoding="utf-8") + self._run(adapt, native, "feedback", "trig", monkeypatch, tmp_path) + + feedback_dir = tmp_path / ".evolve" / "entities" / "feedback" + files = sorted(p.name for p in feedback_dir.glob("*.md")) + assert files == ["my-fact.md"] # no my-fact-2.md + assert "Second version." in (feedback_dir / "my-fact.md").read_text() + + def test_falls_back_to_content_slug_when_name_missing(self, tmp_path, monkeypatch, capsys): + adapt = _load_adapt_memory() + native = self._write_native(tmp_path, None, "project", "Use deterministic builds everywhere.") + self._run(adapt, native, "project", "when building", monkeypatch, tmp_path) + + out = capsys.readouterr().out + expected_slug = entity_io.slugify("Use deterministic builds everywhere.") + assert f"Entity id: project/{expected_slug}" in out + assert (tmp_path / ".evolve" / "entities" / "project" / f"{expected_slug}.md").exists() + class TestLoadAllEntities: def test_loads_from_nested_type_dirs(self, temp_project_dir): diff --git a/tests/platform_integrations/test_idempotency.py b/tests/platform_integrations/test_idempotency.py index 02ebbb83..5b1c8d48 100644 --- a/tests/platform_integrations/test_idempotency.py +++ b/tests/platform_integrations/test_idempotency.py @@ -3,35 +3,59 @@ """ import json -import re import pytest +MANAGED_MARKER = "" + + @pytest.mark.platform_integrations class TestBobIdempotency: """Test that Bob installation is idempotent.""" - def test_multiple_lite_installs(self, temp_project_dir, install_runner, file_assertions): - """Running install twice for Bob lite mode should be safe.""" + def test_multiple_lite_installs(self, temp_project_dir, install_runner, file_assertions, bob_rules_file, bob_audit_script): + """Running install twice for Bob lite mode should be safe. + + Lite writes the always-on instructions to Bob's GLOBAL rules file + ``~/.bob/rules/00-evolve-lite.md`` and the recall-audit script to + ``~/.bob/evolve-lite/audit_recall.py``; a second install must leave + exactly one such file with identical content (no duplication) and must + not create any AGENTS.md or per-project EVOLVE.md copy. + """ # First install install_runner.run("install", platform="bob", mode="lite") - # Capture state after first install bob_dir = temp_project_dir / ".bob" - custom_modes_file = bob_dir / "custom_modes.yaml" - first_content = custom_modes_file.read_text() + file_assertions.assert_file_exists(bob_rules_file) + first_content = bob_rules_file.read_text() + # The rules file holds the full EVOLVE.md text. + assert "self-directed memory" in first_content + # The recall-audit script is installed at its global path, and the rules + # file references that exact path. + file_assertions.assert_file_exists(bob_audit_script) + assert "Append a recall-audit row" in bob_audit_script.read_text() + assert "~/.bob/evolve-lite/audit_recall.py" in first_content # Second install install_runner.run("install", platform="bob", mode="lite") - # Assert: Files are identical - second_content = custom_modes_file.read_text() - assert first_content == second_content, "Content changed after second install" + # Assert: the rules file is identical after the second install. + second_content = bob_rules_file.read_text() + assert first_content == second_content, "rules/00-evolve-lite.md changed after second install" - # Assert: No duplicate sentinel blocks - assert first_content.count("# >>>evolve:evolve-lite<<<") == 1 - assert first_content.count("# <<>>evolve:evolve-lite<<<` in its customInstructions. A naive `if start in - existing` substring check treated that as an existing block, took the replace - branch, found no matching end sentinel, and silently dropped the merge while - still reporting success. The sentinel match must be line-anchored. - """ - bob_dir = temp_project_dir / ".bob" - modes_file = bob_dir / "custom_modes.yaml" - modes_file.parent.mkdir(parents=True, exist_ok=True) - # Reproduce the exact user failure: a 0-indent list (as yaml.safe_dump / - # Bob marketplace tooling writes it) whose quoted text mentions the - # sentinel literal. This trips BOTH the substring false-match and the - # 0-indent-vs-2-indent mismatch. - modes_file.write_text( - "customModes:\n" - "- slug: install-evolve-lite\n" - " name: Install Evolve Lite\n" - ' customInstructions: "Merged between # >>>evolve:evolve-lite<<< sentinel comments."\n' - " groups:\n" - " - read\n" - ) - - install_runner.run("install", platform="bob", mode="lite") - - content = modes_file.read_text() - # The evolve-lite mode was actually merged in (real sentinel block written). - assert "# >>>evolve:evolve-lite<<<" in content - - # All top-level list items share one indentation — a 0-indent/2-indent mix - # would be invalid YAML (the indentation-matching fix). - indents = set(re.findall(r"(?m)^([ \t]*)- slug:", content)) - assert len(indents) == 1, f"mixed custom-mode list indentation: {indents}" - - slugs = re.findall(r"(?m)^[ \t]*- slug:\s*(\S+)", content) - assert "evolve-lite" in slugs, f"evolve-lite mode not merged; slugs={slugs}" - # ...and the pre-existing mode is preserved. - assert "install-evolve-lite" in slugs - def test_install_preserves_user_content_during_legacy_purge(self, temp_project_dir, install_runner, bob_fixtures, file_assertions): """The legacy purge MUST NOT clobber non-evolve user skills/commands.""" bob_dir = temp_project_dir / ".bob" @@ -201,37 +206,47 @@ def test_install_preserves_user_content_during_legacy_purge(self, temp_project_d class TestCodexIdempotency: """Test that Codex installation is idempotent.""" - def test_multiple_installs(self, temp_project_dir, install_runner, file_assertions): - """Running install twice for Codex should be safe.""" + def test_multiple_installs( + self, temp_project_dir, install_runner, file_assertions, codex_agents_file, codex_evolve_md, codex_audit_script + ): + """Running install twice for Codex should be safe. + + Codex now drops a COPY of EVOLVE.md on disk and injects a SINGLE + greppable pointer line (carrying ````) into + the (sandboxed) ~/.codex/AGENTS.md instead of inlining the body. A + second install must not duplicate the marketplace entry or the pointer + line. + """ install_runner.run("install", platform="codex") marketplace_file = temp_project_dir / ".agents" / "plugins" / "marketplace.json" - hooks_file = temp_project_dir / ".codex" / "hooks.json" first_marketplace = json.loads(marketplace_file.read_text()) - first_hooks = json.loads(hooks_file.read_text()) + first_agents = codex_agents_file.read_text() + + # The recall-audit script and the EVOLVE.md copy live together on disk; + # the pointer line in AGENTS.md references the EVOLVE.md path. + file_assertions.assert_file_exists(codex_evolve_md) + file_assertions.assert_file_exists(codex_audit_script) + assert "Append a recall-audit row" in codex_audit_script.read_text() + assert "~/.codex/evolve-lite/EVOLVE.md" in first_agents install_runner.run("install", platform="codex") second_marketplace = json.loads(marketplace_file.read_text()) - second_hooks = json.loads(hooks_file.read_text()) + second_agents = codex_agents_file.read_text() assert first_marketplace == second_marketplace, "marketplace.json changed after second install" - assert first_hooks == second_hooks, ".codex/hooks.json changed after second install" + assert first_agents == second_agents, "~/.codex/AGENTS.md changed after second install" evolve_plugins = [entry for entry in second_marketplace["plugins"] if entry["name"] == "evolve-lite"] assert len(evolve_plugins) == 1, "Duplicate evolve-lite marketplace entries found" - prompt_hooks = second_hooks["hooks"]["UserPromptSubmit"] - evolve_hook_groups = [ - group - for group in prompt_hooks - if any( - "plugins/evolve-lite/skills/evolve-lite/recall/scripts/retrieve_entities.py" in hook.get("command", "") - for hook in group.get("hooks", []) - ) - ] - assert len(evolve_hook_groups) == 1, "Duplicate Evolve UserPromptSubmit hooks found" - assert evolve_hook_groups[0].get("matcher") == "" + # Exactly one managed pointer line in the always-on instructions file. + marker_lines = [ln for ln in second_agents.splitlines() if MANAGED_MARKER in ln] + assert len(marker_lines) == 1, f"Expected exactly one managed line, got {marker_lines!r}" + # The EVOLVE.md copy and audit script are still present after reinstall. + file_assertions.assert_file_exists(codex_evolve_md) + file_assertions.assert_file_exists(codex_audit_script) def test_install_after_partial_uninstall(self, temp_project_dir, install_runner, file_assertions): """Installing after deleting part of the Codex plugin should restore it.""" @@ -249,12 +264,44 @@ def test_install_after_partial_uninstall(self, temp_project_dir, install_runner, file_assertions.assert_file_exists(plugin_dir / "skills" / "evolve-lite" / "learn" / "SKILL.md") file_assertions.assert_file_exists(plugin_dir / "lib" / "evolve-lite" / "entity_io.py") + def test_install_appends_pointer_preserving_user_prose(self, temp_project_dir, install_runner, file_assertions, codex_agents_file): + """Injecting the pointer line must preserve a pre-existing, unrelated AGENTS.md. + + Codex now injects a SINGLE managed pointer line (carrying + ````) via FileOps.inject_marker_line. When + AGENTS.md already has user content but no managed line, the pointer is + APPENDED on its own line — separated from the existing content by a + blank line — and the user's prose is preserved verbatim. Re-running the + install REPLACES that one line in place rather than duplicating it. + """ + # The sandboxed ~/.codex/AGENTS.md, pre-seeded with unrelated user prose. + codex_agents_file.parent.mkdir(parents=True, exist_ok=True) + user_prose = "# My agent instructions\n\nAlways prefer ripgrep over grep, and never edit generated files by hand.\n" + codex_agents_file.write_text(user_prose) + + install_runner.run("install", platform="codex") + + content = codex_agents_file.read_text() + # The user's original prose is preserved verbatim. + assert user_prose.rstrip() in content + # Exactly one managed pointer line was appended, separated by a blank line. + marker_lines = [ln for ln in content.splitlines() if MANAGED_MARKER in ln] + assert len(marker_lines) == 1, f"Expected exactly one managed line, got {marker_lines!r}" + assert content.startswith(user_prose.rstrip() + "\n\n") + + # A second install replaces the line in place — still exactly one. + install_runner.run("install", platform="codex") + content2 = codex_agents_file.read_text() + marker_lines2 = [ln for ln in content2.splitlines() if MANAGED_MARKER in ln] + assert len(marker_lines2) == 1, f"Expected exactly one managed line after reinstall, got {marker_lines2!r}" + assert user_prose.rstrip() in content2 + @pytest.mark.platform_integrations class TestUninstallInstallCycle: """Test that uninstall followed by install works correctly.""" - def test_bob_uninstall_install_cycle(self, temp_project_dir, install_runner, bob_fixtures, file_assertions): + def test_bob_uninstall_install_cycle(self, temp_project_dir, install_runner, bob_fixtures, file_assertions, bob_rules_file): """Uninstalling and reinstalling Bob should work correctly.""" # Create user content bob_fixtures.create_existing_skill(temp_project_dir) @@ -275,28 +322,56 @@ def test_bob_uninstall_install_cycle(self, temp_project_dir, install_runner, bob # Reinstall install_runner.run("install", platform="bob") - # Assert: Evolve content is back + # Assert: Evolve content is back. Lite wires always-on instructions via + # the GLOBAL rules file, not via custom_modes.yaml or any AGENTS.md. file_assertions.assert_all_bob_skills_installed(bob_dir) - file_assertions.assert_sentinel_block_exists(bob_dir / "custom_modes.yaml", "evolve-lite") + file_assertions.assert_file_exists(bob_rules_file) + file_assertions.assert_file_not_exists(temp_project_dir / "AGENTS.md") + file_assertions.assert_file_not_exists(bob_dir / "EVOLVE.md") - # Assert: User content still intact + # Assert: User content still intact — the user's custom_modes.yaml was never + # touched by the lite install, so their mode survives the full cycle. file_assertions.assert_dir_exists(bob_dir / "skills" / "my-custom-skill") custom_modes = (bob_dir / "custom_modes.yaml").read_text() assert "slug: my-mode" in custom_modes - def test_codex_uninstall_install_cycle(self, temp_project_dir, install_runner, codex_fixtures, file_assertions): - """Uninstalling and reinstalling Codex should work correctly.""" + def test_codex_uninstall_install_cycle( + self, + temp_project_dir, + install_runner, + codex_fixtures, + file_assertions, + codex_agents_file, + codex_evolve_md, + codex_audit_script, + ): + """Uninstalling and reinstalling Codex should work correctly. + + Codex now drops a COPY of EVOLVE.md on disk and injects a SINGLE managed + pointer line into the (sandboxed) ~/.codex/AGENTS.md instead of + registering hooks. The user's hooks.json is never touched, so it must + survive the cycle unchanged. + """ custom_plugin = codex_fixtures.create_existing_plugin(temp_project_dir) marketplace_file = codex_fixtures.create_existing_marketplace(temp_project_dir) hooks_file = codex_fixtures.create_existing_hooks(temp_project_dir) plugin_json = custom_plugin / ".codex-plugin" / "plugin.json" original_plugin_content = plugin_json.read_text() + original_hooks_content = hooks_file.read_text() install_runner.run("install", platform="codex") evolve_plugin_dir = temp_project_dir / "plugins" / "evolve-lite" file_assertions.assert_dir_exists(evolve_plugin_dir) + # Install injected exactly one managed pointer line into the always-on instructions. + marker_lines = [ln for ln in codex_agents_file.read_text().splitlines() if MANAGED_MARKER in ln] + assert len(marker_lines) == 1, f"Expected exactly one managed line, got {marker_lines!r}" + # Install dropped the EVOLVE.md copy and the recall-audit script at their global paths. + file_assertions.assert_file_exists(codex_evolve_md) + file_assertions.assert_file_exists(codex_audit_script) + # The user's hooks were left completely untouched. + file_assertions.assert_file_unchanged(hooks_file, original_hooks_content) install_runner.run("uninstall", platform="codex") @@ -304,15 +379,14 @@ def test_codex_uninstall_install_cycle(self, temp_project_dir, install_runner, c current_marketplace = json.loads(marketplace_file.read_text()) assert all(entry["name"] != "evolve-lite" for entry in current_marketplace["plugins"]) - current_hooks = json.loads(hooks_file.read_text()) - prompt_hooks = current_hooks["hooks"].get("UserPromptSubmit", []) - evolve_hooks = [ - hook - for group in prompt_hooks - for hook in group.get("hooks", []) - if "plugins/evolve-lite/skills/evolve-lite/recall/scripts/retrieve_entities.py" in hook.get("command", "") - ] - assert not evolve_hooks, "Evolve hook still present after uninstall" + # The managed pointer line is gone from AGENTS.md after uninstall. + assert [ln for ln in codex_agents_file.read_text().splitlines() if MANAGED_MARKER in ln] == [] + # The EVOLVE.md copy, audit script, and now-empty dir are removed. + file_assertions.assert_file_not_exists(codex_evolve_md) + file_assertions.assert_file_not_exists(codex_audit_script) + file_assertions.assert_dir_not_exists(codex_evolve_md.parent) + # The user's hooks are still untouched. + file_assertions.assert_file_unchanged(hooks_file, original_hooks_content) install_runner.run("install", platform="codex") @@ -323,14 +397,8 @@ def test_codex_uninstall_install_cycle(self, temp_project_dir, install_runner, c assert any(entry["name"] == "my-codex-plugin" for entry in reinstalled_marketplace["plugins"]) assert any(entry["name"] == "evolve-lite" for entry in reinstalled_marketplace["plugins"]) - reinstalled_hooks = json.loads(hooks_file.read_text()) - assert any( - hook.get("command") == "python3 ~/.codex/hooks/custom_prompt_memory.py" - for group in reinstalled_hooks["hooks"]["UserPromptSubmit"] - for hook in group.get("hooks", []) - ) - assert any( - "plugins/evolve-lite/skills/evolve-lite/recall/scripts/retrieve_entities.py" in hook.get("command", "") - for group in reinstalled_hooks["hooks"]["UserPromptSubmit"] - for hook in group.get("hooks", []) - ) + # Reinstall re-injects exactly one managed pointer line and still leaves user hooks alone. + reinstalled_markers = [ln for ln in codex_agents_file.read_text().splitlines() if MANAGED_MARKER in ln] + assert len(reinstalled_markers) == 1, f"Expected exactly one managed line, got {reinstalled_markers!r}" + file_assertions.assert_file_exists(codex_evolve_md) + file_assertions.assert_file_unchanged(hooks_file, original_hooks_content) diff --git a/tests/platform_integrations/test_legacy_migration.py b/tests/platform_integrations/test_legacy_migration.py new file mode 100644 index 00000000..209bdc6f --- /dev/null +++ b/tests/platform_integrations/test_legacy_migration.py @@ -0,0 +1,249 @@ +""" +Tests for the migration-aware ``uninstall`` path. + +An upgrading user still has PRE-REDESIGN ("legacy") artifacts on disk that the +new design never writes. ``uninstall`` must reverse them too, so the user lands +on a true clean slate: + + * Codex (GLOBAL ~/.codex/): legacy plugin registrations in ``config.toml`` + (``[plugins."evolve-lite@"]`` tables) and plugin caches + (``plugins/cache//evolve-lite/``). + * Claude (GLOBAL ~/.claude/): orphan plugin data dirs + (``plugins/data/evolve-lite-*``) and the ``evolve-marketplace`` registration. + * Bob: the legacy ``install-evolve-lite`` bootstrap custom mode (a bare YAML + list item, not a sentinel block). + +All removals are defensive, idempotent, and dry-run aware. These tests reuse the +``sandbox_home`` conftest seam (monkeypatches HOME → tmp dir, flows through to +the install.sh subprocess) so we never touch the developer's real home. +""" + +import tomllib + +import pytest + + +# ── Codex config.toml fixtures ───────────────────────────────────────────────── + +LEGACY_CONFIG_TOML = """\ +model = "gpt-5" + +[plugins."other@x"] +enabled = true + +[plugins."evolve-lite@evolve-marketplace"] +enabled = true +source = "evolve-marketplace" + +[plugins."evolve-lite@evolve-local"] +enabled = true +source = "evolve-local" + +[history] +persistence = "save-all" +""" + + +def _seed_legacy_codex(sandbox_home): + """Write a legacy ~/.codex/config.toml + plugin caches; return key paths.""" + codex = sandbox_home / ".codex" + config = codex / "config.toml" + config.parent.mkdir(parents=True, exist_ok=True) + config.write_text(LEGACY_CONFIG_TOML) + + cache = codex / "plugins" / "cache" / "evolve-marketplace" + (cache / "evolve-lite").mkdir(parents=True, exist_ok=True) + (cache / "evolve-lite" / "manifest.json").write_text("{}\n") + (cache / "other-plugin").mkdir(parents=True, exist_ok=True) + (cache / "other-plugin" / "manifest.json").write_text("{}\n") + return config, cache + + +@pytest.mark.platform_integrations +class TestCodexLegacyMigration: + def test_uninstall_strips_legacy_config_tables(self, sandbox_home, install_runner): + config, _ = _seed_legacy_codex(sandbox_home) + + install_runner.run("uninstall", platform="codex") + + text = config.read_text() + assert "evolve-lite@evolve-marketplace" not in text + assert "evolve-lite@evolve-local" not in text + # Unrelated tables and top-level keys are preserved. + assert "other@x" in text + assert 'model = "gpt-5"' in text + assert "[history]" in text + # Result is still valid TOML with no evolve-lite@* plugin key. + parsed = tomllib.loads(text) + assert all(not k.startswith("evolve-lite@") for k in parsed.get("plugins", {})) + assert "other@x" in parsed["plugins"] + assert parsed["history"]["persistence"] == "save-all" + + def test_uninstall_removes_legacy_plugin_cache(self, sandbox_home, install_runner): + _, cache = _seed_legacy_codex(sandbox_home) + + install_runner.run("uninstall", platform="codex") + + # evolve-lite subdir gone; its now-empty marketplace parent gone too, + # BUT only because the sibling other-plugin keeps it alive here. + assert not (cache / "evolve-lite").exists() + assert cache.exists(), "marketplace dir with surviving siblings must remain" + assert (cache / "other-plugin").exists(), "sibling plugin cache preserved" + + def test_uninstall_rmdirs_emptied_marketplace_parent(self, sandbox_home, install_runner): + codex = sandbox_home / ".codex" + cache = codex / "plugins" / "cache" / "evolve-local" + (cache / "evolve-lite").mkdir(parents=True, exist_ok=True) + (cache / "evolve-lite" / "x.json").write_text("{}\n") + + install_runner.run("uninstall", platform="codex") + + assert not (cache / "evolve-lite").exists() + assert not cache.exists(), "emptied marketplace parent should be rmdir'd" + + def test_uninstall_no_codex_config_is_noop(self, sandbox_home, install_runner): + """Absent legacy artifacts: uninstall must not error or create anything.""" + result = install_runner.run("uninstall", platform="codex") + assert result.returncode == 0 + assert not (sandbox_home / ".codex" / "config.toml").exists() + + def test_uninstall_codex_legacy_is_idempotent(self, sandbox_home, install_runner): + config, cache = _seed_legacy_codex(sandbox_home) + install_runner.run("uninstall", platform="codex") + first = config.read_text() + # Second run over the already-cleaned state is a clean no-op. + install_runner.run("uninstall", platform="codex") + assert config.read_text() == first + assert not (cache / "evolve-lite").exists() + assert (cache / "other-plugin").exists() + + +# ── Claude orphan data dirs + marketplace removal ────────────────────────────── + + +@pytest.mark.platform_integrations +class TestClaudeLegacyMigration: + def test_uninstall_removes_orphan_data_dirs(self, sandbox_home, install_runner, temp_project_dir): + data = sandbox_home / ".claude" / "plugins" / "data" + for name in ("evolve-lite-inline", "evolve-lite-evolve-marketplace", "other"): + (data / name).mkdir(parents=True, exist_ok=True) + (data / name / "store.json").write_text("{}\n") + + install_runner.run("uninstall", platform="claude") + + assert not (data / "evolve-lite-inline").exists() + assert not (data / "evolve-lite-evolve-marketplace").exists() + assert (data / "other").exists(), "unrelated plugin data dir preserved" + + def test_uninstall_invokes_marketplace_remove(self, sandbox_home, install_runner, tmp_path): + """The `claude plugin marketplace remove evolve-marketplace` shell-out is + + attempted. We don't require a real `claude` binary: drop a stub on PATH + that records its argv, then assert it was called with the remove verb. + """ + bin_dir = tmp_path / "fakebin" + bin_dir.mkdir() + log = tmp_path / "claude_calls.log" + stub = bin_dir / "claude" + stub.write_text(f'#!/usr/bin/env bash\necho "$@" >> "{log}"\nexit 0\n') + stub.chmod(0o755) + + install_runner.run( + "uninstall", + platform="claude", + env={"PATH": f"{bin_dir}:/usr/bin:/bin"}, + ) + + calls = log.read_text() + assert "plugin uninstall evolve-lite" in calls + assert "plugin marketplace remove evolve-marketplace" in calls + + def test_uninstall_removes_legacy_plugin_cache(self, sandbox_home, install_runner, temp_project_dir): + cache = sandbox_home / ".claude" / "plugins" / "cache" / "evolve-marketplace" + (cache / "evolve-lite" / "1.1.0").mkdir(parents=True, exist_ok=True) + (cache / "evolve-lite" / "1.1.0" / "manifest.json").write_text("{}\n") + (cache / "other-plugin").mkdir(parents=True, exist_ok=True) + (cache / "other-plugin" / "manifest.json").write_text("{}\n") + + install_runner.run("uninstall", platform="claude") + + # evolve-lite cache subtree gone; its marketplace parent survives because + # an unrelated sibling plugin cache still lives there. + assert not (cache / "evolve-lite").exists() + assert cache.exists(), "marketplace dir with surviving siblings must remain" + assert (cache / "other-plugin").exists(), "sibling plugin cache preserved" + + +# ── Bob legacy install-evolve-lite mode ──────────────────────────────────────── + +LEGACY_BOB_MODES = """\ +customModes: + - slug: install-evolve-lite + name: Install Evolve Lite + roleDefinition: |- + Bootstrap mode. Mentions the sentinel literal # >>>evolve:evolve-lite<<< + inside its instructions, which must not confuse removal. + customInstructions: |- + Run the installer. + groups: + - read + - edit + - slug: my-mode + name: My Custom Mode + roleDefinition: |- + This is my own mode. + groups: + - read +""" + + +@pytest.mark.platform_integrations +class TestBobLegacyMigration: + def test_uninstall_removes_legacy_bootstrap_mode(self, temp_project_dir, install_runner): + modes = temp_project_dir / ".bob" / "custom_modes.yaml" + modes.parent.mkdir(parents=True, exist_ok=True) + modes.write_text(LEGACY_BOB_MODES) + + install_runner.run("uninstall", platform="bob") + + text = modes.read_text() + assert "install-evolve-lite" not in text + assert "Bootstrap mode" not in text + # The unrelated user mode survives intact. + assert "slug: my-mode" in text + assert "This is my own mode." in text + + +# ── Dry-run must change nothing on disk ───────────────────────────────────────── + + +@pytest.mark.platform_integrations +class TestLegacyDryRun: + def test_dry_run_removes_nothing(self, sandbox_home, install_runner, temp_project_dir): + config, cache = _seed_legacy_codex(sandbox_home) + config_before = config.read_text() + + data = sandbox_home / ".claude" / "plugins" / "data" + (data / "evolve-lite-inline").mkdir(parents=True, exist_ok=True) + (data / "evolve-lite-inline" / "store.json").write_text("{}\n") + + claude_cache = sandbox_home / ".claude" / "plugins" / "cache" / "evolve-marketplace" + (claude_cache / "evolve-lite" / "1.1.0").mkdir(parents=True, exist_ok=True) + (claude_cache / "evolve-lite" / "1.1.0" / "manifest.json").write_text("{}\n") + + modes = temp_project_dir / ".bob" / "custom_modes.yaml" + modes.parent.mkdir(parents=True, exist_ok=True) + modes.write_text(LEGACY_BOB_MODES) + modes_before = modes.read_text() + + result = install_runner.run("uninstall", platform="all", dry_run=True) + + assert result.returncode == 0 + assert "DRY RUN" in result.stdout + # Nothing on disk changed. + assert config.read_text() == config_before + assert (cache / "evolve-lite").exists() + assert (cache / "other-plugin").exists() + assert (data / "evolve-lite-inline").exists() + assert (claude_cache / "evolve-lite").exists() + assert modes.read_text() == modes_before diff --git a/tests/platform_integrations/test_marker_line.py b/tests/platform_integrations/test_marker_line.py new file mode 100644 index 00000000..11818565 --- /dev/null +++ b/tests/platform_integrations/test_marker_line.py @@ -0,0 +1,116 @@ +""" +Focused unit tests for FileOps.inject_marker_line / remove_marker_line. + +These two generic helpers manage a SINGLE greppable "managed" line in a text +file (the Codex installer uses them to point ~/.codex/AGENTS.md at the on-disk +EVOLVE.md copy; the Claude phase will reuse them). The FileOps class lives +inside the install.sh heredoc, so we extract and exec that Python source into a +throwaway namespace to test the methods in isolation, with no subprocess. +""" + +import re +from pathlib import Path + +import pytest + + +MARKER = "" +LINE = f"Read ~/.codex/evolve-lite/EVOLVE.md and follow it. {MARKER}" + + +@pytest.fixture(scope="module") +def file_ops(): + """Extract the embedded Python from install.sh and return a fresh FileOps().""" + repo_root = Path(__file__).parent.parent.parent + script = (repo_root / "platform-integrations" / "install.sh").read_text() + m = re.search(r"<<'PYEOF'\n(.*)\nPYEOF", script, re.DOTALL) + assert m, "Could not locate the embedded Python heredoc in install.sh" + ns = {} + # Give the module a benign argv so its top-level `sys.argv[1]` read succeeds. + code = "import sys\nsys.argv = ['install.sh', '', 'status']\n" + m.group(1) + # Strip the `if __name__ == '__main__': main()` trailer so exec doesn't run the CLI. + code = code.replace('if __name__ == "__main__":\n main()', "") + exec(compile(code, "install.sh:PYEOF", "exec"), ns) + return ns["FileOps"]() + + +@pytest.mark.platform_integrations +class TestInjectMarkerLine: + def test_creates_file_and_parents_when_missing(self, file_ops, tmp_path): + path = tmp_path / "nested" / "AGENTS.md" + file_ops.inject_marker_line(path, MARKER, LINE) + assert path.read_text() == LINE + "\n" + + def test_appends_with_blank_line_when_content_present(self, file_ops, tmp_path): + path = tmp_path / "AGENTS.md" + path.write_text("# My instructions\n\nPrefer ripgrep.\n") + file_ops.inject_marker_line(path, MARKER, LINE) + text = path.read_text() + # Original content preserved, exactly one managed line, separated by a blank line. + assert text.startswith("# My instructions\n\nPrefer ripgrep.\n\n") + assert text.count(MARKER) == 1 + assert text.endswith(LINE + "\n") + + def test_replaces_existing_managed_line_in_place(self, file_ops, tmp_path): + path = tmp_path / "AGENTS.md" + old_line = f"Stale pointer to /old/path. {MARKER}" + path.write_text(f"# Top\n{old_line}\n# Bottom\n") + file_ops.inject_marker_line(path, MARKER, LINE) + text = path.read_text() + # The whole stale line is replaced; surrounding content untouched. + assert old_line not in text + assert text.count(MARKER) == 1 + assert LINE in text + assert "# Top" in text and "# Bottom" in text + # No line was added or removed (still 3 lines). + assert text.splitlines() == ["# Top", LINE, "# Bottom"] + + def test_idempotent_across_repeats(self, file_ops, tmp_path): + path = tmp_path / "AGENTS.md" + path.write_text("# Existing\n") + file_ops.inject_marker_line(path, MARKER, LINE) + first = path.read_text() + file_ops.inject_marker_line(path, MARKER, LINE) + file_ops.inject_marker_line(path, MARKER, LINE) + assert path.read_text() == first + assert path.read_text().count(MARKER) == 1 + + def test_rejects_line_without_marker(self, file_ops, tmp_path): + path = tmp_path / "AGENTS.md" + with pytest.raises(ValueError): + file_ops.inject_marker_line(path, MARKER, "no marker here") + + +@pytest.mark.platform_integrations +class TestRemoveMarkerLine: + def test_no_op_when_file_missing(self, file_ops, tmp_path): + path = tmp_path / "missing.md" + file_ops.remove_marker_line(path, MARKER) # must not raise + assert not path.exists() + + def test_removes_managed_line_preserving_other_lines(self, file_ops, tmp_path): + path = tmp_path / "AGENTS.md" + path.write_text(f"# Top\n\n{LINE}\n\n# Bottom\n") + file_ops.remove_marker_line(path, MARKER) + text = path.read_text() + assert MARKER not in text + assert "# Top" in text and "# Bottom" in text + # No doubled blank-line gap left where the managed line used to be. + assert "\n\n\n" not in text + + def test_removes_only_marker_lines(self, file_ops, tmp_path): + path = tmp_path / "AGENTS.md" + path.write_text(f"keep me\n{LINE}\nkeep me too\n") + file_ops.remove_marker_line(path, MARKER) + assert path.read_text().splitlines() == ["keep me", "keep me too"] + + def test_inject_then_remove_round_trips(self, file_ops, tmp_path): + path = tmp_path / "AGENTS.md" + original = "# My instructions\n\nPrefer ripgrep.\n" + path.write_text(original) + file_ops.inject_marker_line(path, MARKER, LINE) + file_ops.remove_marker_line(path, MARKER) + text = path.read_text() + assert MARKER not in text + assert "# My instructions" in text and "Prefer ripgrep." in text + assert "\n\n\n" not in text diff --git a/tests/platform_integrations/test_plugin_structure.py b/tests/platform_integrations/test_plugin_structure.py index 062d7816..781b4141 100644 --- a/tests/platform_integrations/test_plugin_structure.py +++ b/tests/platform_integrations/test_plugin_structure.py @@ -28,34 +28,21 @@ def test_plugin_json_skills_path_exists(self): class TestHooksManifest: - def test_hooks_json_is_valid_json(self): - data = json.loads((_PLUGIN_ROOT / "hooks" / "hooks.json").read_text()) - assert isinstance(data, dict) + """The Claude plugin is fully hookless under the native-memory + CLAUDE.md + `@import` redesign. Recall is native and save is native, so the plugin must + register NO auto-firing hooks — otherwise recall/save fire twice. The skills + themselves stay invokable (see TestSkillScripts); only the hook WIRING is gone. + """ + + def test_no_hooks_json_shipped(self): + # No hooks/hooks.json under the rendered Claude plugin: the plugin + # registers no auto-firing lifecycle hooks at all. + assert not (_PLUGIN_ROOT / "hooks" / "hooks.json").exists() - def test_hooks_json_has_hooks_key(self): - data = json.loads((_PLUGIN_ROOT / "hooks" / "hooks.json").read_text()) - assert "hooks" in data - - def test_known_lifecycle_events_present(self): - data = json.loads((_PLUGIN_ROOT / "hooks" / "hooks.json").read_text()) - hooks = data["hooks"] - assert "UserPromptSubmit" in hooks - assert "SessionStart" in hooks - assert "Stop" in hooks - - def test_command_hook_scripts_exist(self): - data = json.loads((_PLUGIN_ROOT / "hooks" / "hooks.json").read_text()) - for event, groups in data["hooks"].items(): - for group in groups: - for hook in group.get("hooks", []): - if hook.get("type") == "command": - cmd = hook["command"] - resolved = cmd.replace("${CLAUDE_PLUGIN_ROOT}", str(_PLUGIN_ROOT)) - # Find the script token — commands may have trailing flags - script_tokens = [t for t in resolved.split() if t.endswith((".py", ".sh"))] - assert script_tokens, f"No script found in hook command: {cmd}" - script_path = Path(script_tokens[0]) - assert script_path.exists(), f"Hook script missing: {script_path} (event: {event})" + def test_no_hooks_directory(self): + # The render wipes and rewrites the plugin root from plugin-source/; + # with the source hooks.json removed, no hooks/ dir should remain. + assert not (_PLUGIN_ROOT / "hooks").exists() class TestSkillScripts: @@ -71,6 +58,8 @@ class TestSkillScripts: "skills/evolve-lite/recall/scripts/retrieve_entities.py", "skills/evolve-lite/learn/scripts/save_entities.py", "skills/evolve-lite/provenance/scripts/log_influence.py", + "skills/evolve-lite/adapt-memory/scripts/adapt_memory.py", + "skills/evolve-lite/doctor/scripts/doctor.py", ], ) def test_script_exists(self, script_rel): diff --git a/tests/platform_integrations/test_preservation.py b/tests/platform_integrations/test_preservation.py index cbf3092b..4e617207 100644 --- a/tests/platform_integrations/test_preservation.py +++ b/tests/platform_integrations/test_preservation.py @@ -47,25 +47,41 @@ def test_preserves_existing_commands(self, temp_project_dir, install_runner, bob bob_dir = temp_project_dir / ".bob" file_assertions.assert_all_bob_commands_installed(bob_dir) - def test_preserves_existing_custom_modes_yaml(self, temp_project_dir, install_runner, bob_fixtures, file_assertions): - """Install evolve when user has existing custom modes - they must be preserved.""" - # Setup: Create user's custom mode + def test_preserves_existing_custom_modes_and_user_rules( + self, temp_project_dir, install_runner, bob_fixtures, file_assertions, bob_rules_file + ): + """Lite install must leave the user's custom_modes.yaml and unrelated rules untouched. + + Lite no longer merges a mode into custom_modes.yaml, nor injects an + AGENTS.md import. The evolve always-on instructions live in Bob's GLOBAL + rules dir at ~/.bob/rules/00-evolve-lite.md. A pre-existing, unrelated + rules file (e.g. ~/.bob/rules/99-user.md) must be left intact, and no + AGENTS.md must be created. + """ + # Setup: user's custom mode, plus a pre-existing unrelated global rules file. custom_modes_file = bob_fixtures.create_existing_custom_modes(temp_project_dir) + original_modes_content = custom_modes_file.read_text() - # Action: Install evolve + user_rule = bob_rules_file.parent / "99-user.md" + user_rule.parent.mkdir(parents=True, exist_ok=True) + original_rule_content = "# My personal rules\n\nAlways prefer tabs.\n" + user_rule.write_text(original_rule_content) + + # Action: Install evolve (lite is the default mode) install_runner.run("install", platform="bob") - # Assert: User's custom mode is still present - current_content = custom_modes_file.read_text() - assert "slug: my-mode" in current_content, "User's custom mode was removed!" - assert "My Custom Mode" in current_content + # Assert: User's custom_modes.yaml is byte-for-byte unchanged. + file_assertions.assert_file_unchanged(custom_modes_file, original_modes_content) - # Assert: Evolve mode is added with sentinels - file_assertions.assert_sentinel_block_exists(custom_modes_file, "evolve-lite") - assert "slug: evolve-lite" in current_content + # Assert: User's unrelated rules file is byte-for-byte unchanged. + file_assertions.assert_file_unchanged(user_rule, original_rule_content) - # Assert: No duplicate user modes - assert current_content.count("slug: my-mode") == 1 + # Assert: The evolve instructions live in the global rules file, holding + # the full EVOLVE.md text; no AGENTS.md was created. + file_assertions.assert_file_exists(bob_rules_file) + assert "self-directed memory" in bob_rules_file.read_text() + file_assertions.assert_file_not_exists(temp_project_dir / "AGENTS.md") + file_assertions.assert_file_not_exists((temp_project_dir / ".bob") / "AGENTS.md") def test_preserves_existing_mcp_servers(self, temp_project_dir, install_runner, bob_fixtures, file_assertions): """Install evolve full mode when user has existing MCP servers - they must be preserved.""" @@ -111,7 +127,7 @@ def test_refreshes_managed_evolve_mcp_server_fields_and_preserves_custom_fields( assert evolve_server["env"] == {"EVOLVE_PROFILE": "local"} assert evolve_server["metadata"] == {"managedBy": "user"} - def test_preserves_all_bob_content_together_lite(self, temp_project_dir, install_runner, bob_fixtures, file_assertions): + def test_preserves_all_bob_content_together_lite(self, temp_project_dir, install_runner, bob_fixtures, file_assertions, bob_rules_file): """Install evolve lite mode when user has all types of Bob content - all must be preserved.""" # Setup: Create all types of user content custom_skill = bob_fixtures.create_existing_skill(temp_project_dir) @@ -129,12 +145,19 @@ def test_preserves_all_bob_content_together_lite(self, temp_project_dir, install file_assertions.assert_file_unchanged(custom_skill / "SKILL.md", skill_content) file_assertions.assert_file_unchanged(custom_command, command_content) + # User's custom_modes.yaml is untouched by lite (it no longer merges modes). assert "slug: my-mode" in custom_modes.read_text() - # Assert: Evolve lite content is added + # Assert: Evolve lite content is added. Skills/commands/lib are copied, and the + # always-on instructions are wired via the GLOBAL rules file (not custom_modes.yaml). bob_dir = temp_project_dir / ".bob" file_assertions.assert_all_bob_skills_installed(bob_dir) - file_assertions.assert_sentinel_block_exists(custom_modes, "evolve-lite") + file_assertions.assert_all_bob_commands_installed(bob_dir) + file_assertions.assert_dir_exists(bob_dir / "lib" / "evolve-lite") + file_assertions.assert_file_exists(bob_rules_file) + # No AGENTS.md or per-project EVOLVE.md copy is created. + file_assertions.assert_file_not_exists(temp_project_dir / "AGENTS.md") + file_assertions.assert_file_not_exists(bob_dir / "EVOLVE.md") def test_preserves_all_bob_content_together_full(self, temp_project_dir, install_runner, bob_fixtures, file_assertions): """Install evolve full mode when user has all types of Bob content - all must be preserved.""" @@ -188,28 +211,44 @@ def test_preserves_existing_marketplace_entries(self, temp_project_dir, install_ evolve_plugins = [entry for entry in current_data["plugins"] if entry["name"] == "evolve-lite"] assert len(evolve_plugins) == 1, "Evolve plugin entry missing from marketplace.json" - def test_preserves_existing_hooks_and_plugin_files(self, temp_project_dir, install_runner, codex_fixtures, file_assertions): - """Install evolve when user already has hooks and plugins - they must be preserved.""" + def test_preserves_existing_hooks_and_plugin_files( + self, temp_project_dir, install_runner, codex_fixtures, file_assertions, codex_agents_file + ): + """Install evolve when user already has hooks and plugins - they must be preserved. + + Codex no longer registers any hooks; it drops a COPY of EVOLVE.md on disk + and injects a SINGLE managed pointer line into the (sandboxed) + ~/.codex/AGENTS.md. So the user's hooks.json must be left COMPLETELY + UNCHANGED (no Evolve sync/recall hook added), and the pointer line must + appear in AGENTS.md instead. + """ custom_plugin = codex_fixtures.create_existing_plugin(temp_project_dir) plugin_json = custom_plugin / ".codex-plugin" / "plugin.json" original_plugin_content = plugin_json.read_text() hooks_file = codex_fixtures.create_existing_hooks(temp_project_dir) + original_hooks_content = hooks_file.read_text() install_runner.run("install", platform="codex") + # The user's plugin.json is untouched. file_assertions.assert_file_unchanged(plugin_json, original_plugin_content) + # The user's hooks.json is byte-for-byte unchanged: no Evolve hook is added. + file_assertions.assert_file_unchanged(hooks_file, original_hooks_content) + current_hooks = json.loads(hooks_file.read_text()) + # SessionStart count stays at the user's original (1) — no sync hook added. session_start_hooks = current_hooks["hooks"]["SessionStart"] - assert len(session_start_hooks) == 2, "Expected the user's SessionStart hook plus the Evolve sync hook." + assert len(session_start_hooks) == 1, "Codex install must not add a SessionStart hook anymore." assert any( any(hook.get("command") == "python3 ~/.codex/hooks/session_start.py" for hook in group.get("hooks", [])) for group in session_start_hooks ), "User's SessionStart hook was removed!" - assert any( - any("plugins/evolve-lite/skills/evolve-lite/sync/scripts/sync.py" in hook.get("command", "") for hook in group.get("hooks", [])) + assert all( + "plugins/evolve-lite/skills/evolve-lite/sync/scripts/sync.py" not in hook.get("command", "") for group in session_start_hooks - ), "Evolve SessionStart hook was not added!" + for hook in group.get("hooks", []) + ), "Codex install must no longer add an Evolve SessionStart hook." prompt_hooks = current_hooks["hooks"]["UserPromptSubmit"] custom_prompt_hooks = [ @@ -219,17 +258,17 @@ def test_preserves_existing_hooks_and_plugin_files(self, temp_project_dir, insta if hook.get("command") == "python3 ~/.codex/hooks/custom_prompt_memory.py" ] assert len(custom_prompt_hooks) == 1, "User's UserPromptSubmit hook was removed!" - - evolve_hooks = [ - group + assert all( + "plugins/evolve-lite/skills/evolve-lite/recall/scripts/retrieve_entities.py" not in hook.get("command", "") for group in prompt_hooks - if any( - "plugins/evolve-lite/skills/evolve-lite/recall/scripts/retrieve_entities.py" in hook.get("command", "") - for hook in group.get("hooks", []) - ) - ] - assert len(evolve_hooks) == 1, "Evolve UserPromptSubmit hook was not added!" - assert evolve_hooks[0].get("matcher") == "" + for hook in group.get("hooks", []) + ), "Codex install must no longer add an Evolve UserPromptSubmit hook." + + # The evolve always-on instructions now live behind a single managed + # pointer line in ~/.codex/AGENTS.md (sandboxed). + MANAGED_MARKER = "" + marker_lines = [ln for ln in codex_agents_file.read_text().splitlines() if MANAGED_MARKER in ln] + assert len(marker_lines) == 1, f"Expected exactly one managed line, got {marker_lines!r}" @pytest.mark.platform_integrations diff --git a/tests/platform_integrations/test_provenance.py b/tests/platform_integrations/test_provenance.py new file mode 100644 index 00000000..5636f584 --- /dev/null +++ b/tests/platform_integrations/test_provenance.py @@ -0,0 +1,222 @@ +"""Tests for skills/evolve-lite/provenance/scripts/provenance.py. + +These exercise the rendered Claude provenance.py end to end (lib resolution only +works in the rendered tree). They cover the deterministic plumbing — recall-row +reading, entity resolution, the trajectory locator (BOTH legacy +``.evolve/trajectories/`` and the native ``~/.claude/projects//`` paths), +dedup against existing influence rows, and the ``record`` writer. The semantic +verdict is agent-driven and is NOT tested here (there is no heuristic to test). +""" + +import json +import os +import re +import subprocess +import sys +from pathlib import Path + +import pytest + +pytestmark = [pytest.mark.platform_integrations] + +_REPO_ROOT = Path(__file__).parent.parent.parent +PROVENANCE_SCRIPT = _REPO_ROOT / "platform-integrations/claude/plugins/evolve-lite/skills/evolve-lite/provenance/scripts/provenance.py" + + +def _claude_slug(root: Path) -> str: + """Mirror provenance.py / doctor.py slugging: non-alphanumerics -> '-'.""" + return re.sub(r"[^A-Za-z0-9]", "-", str(root)) + + +def run_provenance(mode, *, evolve_dir, home=None, cwd=None, stdin=None): + env = {**os.environ} + env["EVOLVE_DIR"] = str(evolve_dir) + if home is not None: + env["HOME"] = str(home) + env["USERPROFILE"] = str(home) + return subprocess.run( + [sys.executable, str(PROVENANCE_SCRIPT), mode], + input=stdin, + capture_output=True, + text=True, + cwd=str(cwd) if cwd else None, + env=env, + check=False, + ) + + +def parse_jsonl(text): + return [json.loads(line) for line in text.splitlines() if line.strip()] + + +def read_audit(evolve_dir): + path = Path(evolve_dir) / "audit.log" + if not path.is_file(): + return [] + return [json.loads(line) for line in path.read_text(encoding="utf-8").splitlines() if line.strip()] + + +def write_audit(evolve_dir, rows): + path = Path(evolve_dir) / "audit.log" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("".join(json.dumps(r) + "\n" for r in rows), encoding="utf-8") + + +def write_entity(evolve_dir, entity_id, body="Do the foo thing."): + path = Path(evolve_dir) / "entities" / f"{entity_id}.md" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(f"---\ntype: {entity_id.split('/')[0]}\ntrigger: when foo\n---\n\n{body}\n", encoding="utf-8") + return path + + +class TestCandidatesLegacyTrajectory: + def test_resolves_entity_and_legacy_trajectory(self, tmp_path): + evolve_dir = tmp_path / "proj" / ".evolve" + evolve_dir.mkdir(parents=True) + write_audit(evolve_dir, [{"event": "recall", "session_id": "sid-1", "entities": ["feedback/foo"]}]) + write_entity(evolve_dir, "feedback/foo") + traj = evolve_dir / "trajectories" / "claude-transcript_sid-1.jsonl" + traj.parent.mkdir(parents=True) + traj.write_text('{"type":"user","content":"hi"}\n', encoding="utf-8") + + result = run_provenance("candidates", evolve_dir=evolve_dir) + assert result.returncode == 0, result.stderr + candidates = parse_jsonl(result.stdout) + assert len(candidates) == 1 + cand = candidates[0] + assert cand["session_id"] == "sid-1" + assert cand["entity_id"] == "feedback/foo" + assert "Do the foo thing." in cand["entity_excerpt"] + assert cand["trajectory_path"] == str(traj) + assert "hi" in cand["trajectory_excerpt"] + assert "missing" not in cand + + +class TestCandidatesNativeTranscript: + def test_locates_native_claude_transcript(self, tmp_path): + # Sandbox a fake HOME and project root; the native locator builds + # ~/.claude/projects//.jsonl from the RESOLVED project root. + home = tmp_path / "home" + evolve_dir = tmp_path / "proj" / ".evolve" + evolve_dir.mkdir(parents=True) + write_audit(evolve_dir, [{"event": "recall", "session_id": "nat-1", "entities": ["feedback/bar"]}]) + write_entity(evolve_dir, "feedback/bar", body="bar guidance") + + project_root = evolve_dir.resolve().parent + slug = _claude_slug(project_root) + native = home / ".claude" / "projects" / slug / "nat-1.jsonl" + native.parent.mkdir(parents=True) + native.write_text('{"x":1}\n', encoding="utf-8") + + result = run_provenance("candidates", evolve_dir=evolve_dir, home=home) + assert result.returncode == 0, result.stderr + candidates = parse_jsonl(result.stdout) + assert len(candidates) == 1 + cand = candidates[0] + assert cand["entity_id"] == "feedback/bar" + assert cand["trajectory_path"] == str(native) + assert "missing" not in cand + + +class TestCandidatesMissing: + def test_missing_trajectory_still_emitted(self, tmp_path): + # Empty HOME -> no native transcript, no legacy dir -> trajectory missing. + home = tmp_path / "home" + home.mkdir() + evolve_dir = tmp_path / "proj" / ".evolve" + evolve_dir.mkdir(parents=True) + write_audit(evolve_dir, [{"event": "recall", "session_id": "sid-x", "entities": ["feedback/foo"]}]) + write_entity(evolve_dir, "feedback/foo") + + result = run_provenance("candidates", evolve_dir=evolve_dir, home=home) + assert result.returncode == 0, result.stderr + candidates = parse_jsonl(result.stdout) + assert len(candidates) == 1 + assert candidates[0]["trajectory_path"] is None + assert candidates[0]["missing"] == ["trajectory"] + + def test_missing_entity_still_emitted(self, tmp_path): + home = tmp_path / "home" + evolve_dir = tmp_path / "proj" / ".evolve" + evolve_dir.mkdir(parents=True) + write_audit(evolve_dir, [{"event": "recall", "session_id": "sid-y", "entities": ["feedback/ghost"]}]) + traj = evolve_dir / "trajectories" / "claude-transcript_sid-y.jsonl" + traj.parent.mkdir(parents=True) + traj.write_text("{}\n", encoding="utf-8") + + result = run_provenance("candidates", evolve_dir=evolve_dir, home=home) + assert result.returncode == 0, result.stderr + candidates = parse_jsonl(result.stdout) + assert len(candidates) == 1 + assert candidates[0]["entity_excerpt"] is None + assert candidates[0]["missing"] == ["entity"] + + +class TestCandidatesDedup: + def test_skips_pairs_with_existing_influence_row(self, tmp_path): + evolve_dir = tmp_path / "proj" / ".evolve" + evolve_dir.mkdir(parents=True) + write_audit( + evolve_dir, + [ + {"event": "recall", "session_id": "sid-1", "entities": ["feedback/foo", "feedback/bar"]}, + {"event": "influence", "session_id": "sid-1", "entity": "feedback/foo", "verdict": "followed", "evidence": "x"}, + ], + ) + write_entity(evolve_dir, "feedback/foo") + write_entity(evolve_dir, "feedback/bar") + + result = run_provenance("candidates", evolve_dir=evolve_dir, home=tmp_path / "home") + assert result.returncode == 0, result.stderr + candidates = parse_jsonl(result.stdout) + ids = {c["entity_id"] for c in candidates} + # feedback/foo already assessed -> only feedback/bar remains. + assert ids == {"feedback/bar"} + + +class TestRecord: + def test_writes_valid_influence_row(self, tmp_path): + evolve_dir = tmp_path / "proj" / ".evolve" + evolve_dir.mkdir(parents=True) + payload = { + "session_id": "sid-1", + "entity": "feedback/foo", + "verdict": "followed", + "evidence": "Agent used the saved parser first.", + } + result = run_provenance("record", evolve_dir=evolve_dir, stdin=json.dumps(payload)) + assert result.returncode == 0, result.stderr + events = read_audit(evolve_dir) + assert len(events) == 1 + row = events[0] + assert row["event"] == "influence" + assert row["session_id"] == "sid-1" + assert row["entity"] == "feedback/foo" + assert row["verdict"] == "followed" + assert row["evidence"] == "Agent used the saved parser first." + assert "ts" in row + + def test_rejects_invalid_verdict(self, tmp_path): + evolve_dir = tmp_path / "proj" / ".evolve" + evolve_dir.mkdir(parents=True) + payload = {"session_id": "sid-1", "entity": "feedback/foo", "verdict": "bogus", "evidence": "no"} + result = run_provenance("record", evolve_dir=evolve_dir, stdin=json.dumps(payload)) + assert result.returncode == 1 + assert "verdict" in result.stderr.lower() + assert read_audit(evolve_dir) == [] + + def test_record_dedups_existing_pair(self, tmp_path): + evolve_dir = tmp_path / "proj" / ".evolve" + evolve_dir.mkdir(parents=True) + payload = {"session_id": "sid-1", "entity": "feedback/foo", "verdict": "followed", "evidence": "e"} + first = run_provenance("record", evolve_dir=evolve_dir, stdin=json.dumps(payload)) + second = run_provenance( + "record", + evolve_dir=evolve_dir, + stdin=json.dumps({**payload, "verdict": "contradicted", "evidence": "e2"}), + ) + assert first.returncode == 0, first.stderr + assert second.returncode == 0, second.stderr + events = read_audit(evolve_dir) + assert len(events) == 1 + assert events[0]["verdict"] == "followed"