From e61ad727e9b78a83101bc7570799e3288a768d36 Mon Sep 17 00:00:00 2001 From: Ashmit Biswas Date: Sat, 2 May 2026 18:00:16 +0530 Subject: [PATCH] =?UTF-8?q?feat(historian):=20M4=20=E2=80=94=20cross-sessi?= =?UTF-8?q?on=20feature=20memory?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-feature persistent memory file at /.canopy/memory/.md that survives session boundaries. Auto-read by canopy switch so a fresh agent picks up where the last one (or another agent on a different day) left off — eliminating the "re-derive PR state, past decisions, file context every session" tax. Three sections, newest content first within each: - Resolutions log — per-comment outcomes (✓ resolved, ⊙ likely-resolved by classifier, ⊘ deferred). Never compacted. - PR context — one block per PR with rationale + chronological updates. Never compacted. - Sessions — per-session narrative entries (decisions, pauses, events). Trimmed by historian_compact. Pieces ------ - actions/historian.py (new): record_decision, record_pause, record_event, record_comment_read, record_comment_resolved, record_comment_deferred, record_classifier_resolved, record_pr_context, record_pr_update, read, format_for_agent, compact. Storage is line-delimited JSON behind the scenes; rendered .md kept alongside for grep / external tooling. Append writes use fcntl.flock for cross-worktree safety. Decision / comment_read / classifier_resolved are deduped per-session via CANOPY_SESSION_ID (defaults to UTC date for manual / test runs). - 5 new MCP tools: historian_decide, historian_pause, historian_defer_comment, feature_memory, historian_compact. All fall back to the canonical feature when feature= is omitted. - 2 new CLI commands: canopy historian show / compact (read-only + manual compaction). - Switch integration: switch_impl response gains a memory: field (empty string when no memory yet). Agent reads it immediately on switch — no extra MCP call. - commit --address: on success, mirrors the resolution into historian via record_comment_resolved (extends M3's flow). - github_get_pr_comments: when the alias resolves to a tracked feature, records each actionable thread (record_comment_read, deduped per-session by id) plus the temporal classifier's likely-resolved batch (record_classifier_resolved, once per session). - .canopy/memory/.gitignore auto-written on first use so per-feature memory files don't accidentally get committed. - using-canopy SKILL.md: new "Cross-session memory (Historian)" section teaching when to call each tool, the auto-capture surfaces, and the tail-fallback format for the (deferred) Stop-hook capture. Compaction (v1) --------------- Mechanical only — drops session entries beyond keep_sessions while preserving Resolutions log + PR context. The plan reserves an LLM summarization pass for a follow-up; storage shape is forward-compatible (JSONL + per-entry kind tags). Deferred (per the plan's contingency) -------------------------------------- Auto-capture for events / pauses / pr_context / pr_update via PostToolUse + Stop hooks ships when the autopilot hook infrastructure lands. Until then, decisions and pauses are explicit MCP calls (which the skill teaches), and comment-read / comment-resolved fire from in-canopy code paths rather than external hooks. Categories 1, 3, 5, 6 are live in v1; 2, 7, 8 wait on hooks. Tests ----- +22 new in tests/test_historian.py: paths, append+read round-trip, decision dedup (within-session + across-session), comment_read dedup, classifier dedup, comment_resolved + comment_deferred, PR context + updates, format_for_agent (empty + 3-section render + missing-section placeholders), .md mirror, compact (noop within limit + drops old sessions while preserving structural). Suite: 602 → 624 passing. Docs ---- - concepts.md: new §3.x "Cross-session memory (M4)" describing the three sections, append-only semantics, and auto-capture surfaces. - commands.md: historian show / compact. - mcp.md: 5 new historian tools with one-line descriptions. - agents.md / CLAUDE.md: tool count bumped to 54; package layout shows actions/historian.py. Status: feature-complete; flip frontmatter + archive on PR merge. --- CLAUDE.md | 5 +- docs/agents.md | 2 +- docs/commands.md | 2 + docs/concepts.md | 10 + docs/mcp.md | 5 + docs/plans/INDEX.md | 4 +- docs/plans/historian.md | 2 +- src/canopy/actions/commit.py | 15 + src/canopy/actions/historian.py | 594 ++++++++++++++++++ src/canopy/actions/reads.py | 54 ++ src/canopy/actions/switch.py | 9 + .../agent_setup/skills/using-canopy/SKILL.md | 21 + src/canopy/cli/main.py | 85 +++ src/canopy/mcp/server.py | 111 ++++ tests/test_historian.py | 240 +++++++ 15 files changed, 1153 insertions(+), 6 deletions(-) create mode 100644 src/canopy/actions/historian.py create mode 100644 tests/test_historian.py diff --git a/CLAUDE.md b/CLAUDE.md index cee55c5..1134d31 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -33,6 +33,7 @@ src/canopy/ │ ├── bot_resolutions.py # M3: persistent log of bot comments addressed via `commit --address` │ ├── bot_status.py # M3: per-feature bot-comment rollup │ ├── augments.py # M2: per-workspace augment resolver (preflight_cmd, review_bots, ...) +│ ├── historian.py # M4: cross-session feature memory at .canopy/memory/.md │ ├── preflight_state.py # records preflight result for state machine │ ├── reads.py # 4 alias-aware read primitives │ ├── realign.py # internal helper used by switch (deprecated from CLI/MCP in Wave 2.9) @@ -53,7 +54,7 @@ src/canopy/ │ ├── github.py # GitHub PR + comments (MCP or gh CLI fallback) │ └── precommit.py # detect + run pre-commit hooks └── mcp/ - ├── server.py # MCP server — 49 tools, stdio transport + ├── server.py # MCP server — 54 tools, stdio transport └── client.py # MCP client — stdio + HTTP+OAuth transports ``` @@ -98,7 +99,7 @@ For integration testing against real services, see `~/projects/canopy-test/` (me - **Action contract:** `actions/protocol.py` (planned) will formalize the per-repo `{status, before, after, reason?}` shape. For now, each action returns it ad-hoc. - **Skill bundling:** Bundled skills live at `src/canopy/agent_setup/skills//SKILL.md`. `canopy setup-agent` copies them to `~/.claude/skills//SKILL.md`. The default `using-canopy` skill always installs; opt-in extras (e.g. `augment-canopy`) install via `--skill ` (repeatable). Foreign skills with the same path are not overwritten without `--reinstall`. The `_SKILL_SOURCE` constant remains as a backward-compat alias pointing at `using-canopy`'s source. -## MCP Server (49 tools) +## MCP Server (54 tools) Grouped by topic. Run with `canopy-mcp` (entry point) or `python -m canopy.mcp.server`. diff --git a/docs/agents.md b/docs/agents.md index ab69b7e..310ae53 100644 --- a/docs/agents.md +++ b/docs/agents.md @@ -6,7 +6,7 @@ How AI coding agents (Claude Code primarily; others by analogy) integrate with c Three pieces, all installed in one step by `canopy init`: -1. **Canopy MCP server** (`canopy-mcp` binary) — 49 tools exposing every canopy operation. Registered in `/.mcp.json`. +1. **Canopy MCP server** (`canopy-mcp` binary) — 54 tools exposing every canopy operation. Registered in `/.mcp.json`. 2. **`using-canopy` skill** at `~/.claude/skills/using-canopy/SKILL.md` — tells the agent *when* to prefer canopy MCP over raw bash. 3. **Per-workspace MCP config** in `/.mcp.json` with `CANOPY_ROOT` set so the server scopes to the right workspace. diff --git a/docs/commands.md b/docs/commands.md index 02774e4..24a3e9f 100644 --- a/docs/commands.md +++ b/docs/commands.md @@ -54,6 +54,8 @@ Write actions and execution. | `canopy sync` | Pull default branch + rebase feature branches across repos. | | `canopy commit -m [--feature ] [--repo ] [--paths

] [--no-hooks] [--amend] [--address ]` | **Wave 2.3 + M3.** Commit across every repo in the canonical (or named) feature with a single message. Pre-flight refuses with `BlockerError(code='wrong_branch')` if any in-scope repo has drifted; per-repo hook failures don't cancel the others (status: `hooks_failed`). `--address ` (numeric id or GitHub URL) auto-suffixes the message with the bot comment's title + URL and records the resolution in `.canopy/state/bot_resolutions.json`. Non-bot comments raise `BlockerError(code='not_a_bot_comment')`. | | `canopy bot-status [--feature ] [--unresolved-only]` | **M3.** Per-feature rollup of bot review comments — total / resolved / unresolved per repo + an `all_resolved` flag. Bot vs human classification respects `[augments] review_bots` in canopy.toml. | +| `canopy historian show []` | **M4.** Print the rendered memory file for a feature (3 sections: resolutions log, PR context, sessions). Returns empty when no memory has been recorded yet. | +| `canopy historian compact [] [--keep-sessions ]` | **M4.** Trim the Sessions section to the most-recent N (default 5). Resolutions log + PR context are preserved regardless. v1 is mechanical (no LLM); future iterations will summarize. | | `canopy push [--feature ] [--repo ] [--set-upstream] [--force-with-lease] [--dry-run]` | **Wave 2.3.** Push the feature branch in every in-scope repo. Pre-flight raises `BlockerError(code='no_upstream')` if any repo lacks an upstream and `--set-upstream` was not passed; the fix-action carries the same args + `--set-upstream` so an agent retries mechanically. Per-repo statuses: `ok`, `up_to_date`, `rejected`, `failed`. | ## Verify diff --git a/docs/concepts.md b/docs/concepts.md index 6b06ae9..d83bde5 100644 --- a/docs/concepts.md +++ b/docs/concepts.md @@ -127,6 +127,16 @@ The dashboard's CTA is whichever node you're sitting on. Drift always wins — i For **worktree-backed** features, the drift detection runs against the worktree path (not main), so a worktree-backed feature is only `drifted` if someone manually `git checkout`'d to a different branch *inside the worktree*. The fix is `switch` (re-establishes the feature context), not `realign` (which would touch main and undo the protection worktrees were supposed to provide). +### Cross-session memory (M4) + +`canopy switch` returns a `memory: ` field rendered from `/.canopy/memory/.md` — a per-feature persistent log of decisions, comment activity, PR context, and session entries. Agents read it on switch instead of re-deriving "where was I, what's resolved, what's blocked." The memory is append-only (concurrent agents on the same feature flock-serialize), with three top-level sections: + +- **Resolutions log** — per-comment outcomes (✓ resolved, ⊙ likely-resolved by classifier, ⊘ deferred). Never compacted. +- **PR context** — one block per PR with rationale + chronological updates. Never compacted. +- **Sessions** — newest-first per-session entries (decisions, pauses, events). Trimmed by `historian_compact`. + +Auto-capture wires existing canopy actions: `commit --address` mirrors the bot resolution into memory; `github_get_pr_comments` records each actionable thread + the temporal classifier's likely-resolved batch (deduped per session). Explicit `historian_decide` / `historian_pause` cover the agent's narrative side. See [docs/plans/historian.md](plans/historian.md) for the full design. + ## 4. The canonical-slot model Every feature in canopy lives in exactly one of three states: diff --git a/docs/mcp.md b/docs/mcp.md index 8114561..c14e657 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -59,6 +59,11 @@ Grouped by topic. Every tool is alias-aware where it accepts a feature input. | `feature_link_linear` | Attach a Linear issue to a feature | | `feature_state` | **Dashboard backend.** Returns `{state, summary, next_actions, warnings}`. State ∈ `{drifted, needs_work, in_progress, ready_to_commit, ready_to_push, awaiting_bot_resolution, awaiting_review, approved, no_prs}`. The `summary` carries split `actionable_human_count` + `actionable_bot_count` (M3). See [concepts.md](concepts.md#3-the-9-state-machine). | | `bot_comments_status` | **M3.** Per-feature rollup of bot review comments — `{feature, repos: {: {pr_number, total, resolved, unresolved, threads}}, all_resolved, any_bot_comments}`. Resolutions come from the persistent log written by `commit --address`. | +| `historian_decide` | **M4.** Record one or more decisions in the feature's memory file. Accepts `decisions: [{title, rationale}, ...]`. Deduped per-session by title. | +| `historian_pause` | **M4.** Record why the agent stopped — what's blocked, what's needed next. | +| `historian_defer_comment` | **M4.** Mark a review comment as intentionally deferred with a reason. | +| `feature_memory` | **M4.** Read the rendered memory file as markdown — `{feature, memory: }`. | +| `historian_compact` | **M4.** Trim the Sessions section to the most-recent N (default 5). Resolutions log + PR context are always preserved. | #### Action (Wave 2) diff --git a/docs/plans/INDEX.md b/docs/plans/INDEX.md index 579bbb6..1625c6b 100644 --- a/docs/plans/INDEX.md +++ b/docs/plans/INDEX.md @@ -2,7 +2,7 @@ Live status of canopy's planned work. Update this file as milestones progress; each plan's frontmatter is the per-plan source of truth, this doc is the rolled-up dashboard. -**Last updated:** 2026-05-02 (M0, M1, M2, M3, M5 shipped) +**Last updated:** 2026-05-02 (M0, M1, M2, M3, M5 shipped; M4 in-progress) **Roadmap:** [roadmap.md](roadmap.md) — full architecture context, cross-cutting decisions, sequencing rationale ## Status legend @@ -28,7 +28,7 @@ Live status of canopy's planned work. Update this file as milestones progress; e Per-workspace `[augments]` block in canopy.toml + opt-in `augment-canopy` skill. Wires `preflight_cmd`; reserves `review_bots` (M3) and `test_cmd` (future). - [x] ✅ **M3 — Bot-comment tracking** — [archive/bot-tracking.md](archive/bot-tracking.md) · shipped 2026-05-02 Bot vs human comment classification, `commit --address `, `awaiting_bot_resolution` state, `bot-status` rollup. -- [ ] 🟦 **M4 — Historian** — [historian.md](historian.md) · P1 · ~5-6d · depends on M3 +- [ ] 🟨 **M4 — Historian** — [historian.md](historian.md) · P1 · ~5-6d · depends on M3 Cross-session feature memory at `.canopy/memory/.md`. Auto-read on `canopy switch`. - [x] ✅ **M5 — Issue-provider scaffold** — [archive/issue-providers.md](archive/issue-providers.md) · shipped 2026-04-27 Linear refactored into the contract; GitHub Issues backend. New `issue_get` / `issue_list_my_issues` MCP tools; old `linear_*` retained as deprecated aliases. Closes [#5](https://github.com/ashmitb95/canopy/issues/5). diff --git a/docs/plans/historian.md b/docs/plans/historian.md index 127566c..16086c0 100644 --- a/docs/plans/historian.md +++ b/docs/plans/historian.md @@ -1,5 +1,5 @@ --- -status: queued +status: in-progress priority: P1 effort: ~5-6d depends_on: ["bot-tracking.md"] diff --git a/src/canopy/actions/commit.py b/src/canopy/actions/commit.py index 10ec5e4..1d25904 100644 --- a/src/canopy/actions/commit.py +++ b/src/canopy/actions/commit.py @@ -309,6 +309,21 @@ def commit( comment_title=addressed_info["title"], comment_url=addressed_info["url"], ) + # Mirror the resolution into historian (M4) so the per-feature + # memory file's Resolutions log stays current. Non-fatal if + # the historian write fails — the canonical state is still in + # bot_resolutions.json. + try: + from . import historian + historian.record_comment_resolved( + workspace.config.root, feature_name, + comment_id=addressed_info["comment_id"], + commit_sha=sha, + gist=addressed_info["title"], + url=addressed_info["url"], + ) + except Exception: + pass addressed_info["sha"] = sha addressed_info["recorded"] = True else: diff --git a/src/canopy/actions/historian.py b/src/canopy/actions/historian.py new file mode 100644 index 0000000..b36b762 --- /dev/null +++ b/src/canopy/actions/historian.py @@ -0,0 +1,594 @@ +"""Cross-session feature memory (M4). + +Per-feature markdown file at ``/.canopy/memory/.md`` +that captures decisions, events, comment activity, and PR context across +agent sessions. Auto-read by ``canopy switch`` so a fresh agent picks up +where the last one left off. + +Three top-level sections (newest content first within each): + +1. **Resolutions log** — per-comment outcomes; ``✓`` resolved, ``⊙`` likely- + resolved by classifier, ``⚠`` unresolved, ``⊘`` deferred. Never + compacted (the always-current source of truth for review state). +2. **PR context** — one block per PR opened against the feature, plus + per-PR update entries. Never compacted. +3. **Sessions** — per-session narrative entries. The only section that + gets compacted on switch-away. + +API contract: every record function appends a structured entry; reads +return either raw structured entries (for tests / extensions) or rendered +markdown (for the agent / dashboard). Storage is line-delimited JSON +under the hood, rendered to markdown on demand. This keeps writes O(1) +and lets the rendering layer evolve without a data migration. + +File concurrency: writes use ``fcntl.flock`` with the same pattern as +``.canopy/state/heads.json`` so concurrent agents on the same feature +across worktrees don't corrupt the log. +""" +from __future__ import annotations + +import fcntl +import json +import os +import tempfile +from contextlib import contextmanager +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Iterable + + +_MEMORY_DIR = ".canopy/memory" + +# Storage is JSONL; the public surface is the rendered .md. We keep both +# alongside each other so external tools can grep the markdown while the +# write path stays append-only. +_STORE_SUFFIX = ".jsonl" +_RENDER_SUFFIX = ".md" + + +# ── Paths ──────────────────────────────────────────────────────────────── + + +def _memory_dir(workspace_root: Path) -> Path: + return workspace_root / _MEMORY_DIR + + +def store_path(workspace_root: Path, feature: str) -> Path: + """Append-only JSONL store for the feature's memory entries.""" + return _memory_dir(workspace_root) / f"{feature}{_STORE_SUFFIX}" + + +def render_path(workspace_root: Path, feature: str) -> Path: + """Rendered markdown view written alongside the store.""" + return _memory_dir(workspace_root) / f"{feature}{_RENDER_SUFFIX}" + + +def _now_iso() -> str: + return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + +# ── Locking + atomic write helpers ────────────────────────────────────── + + +@contextmanager +def _locked_append(path: Path): + """Append-mode file handle with an exclusive flock. + + Same pattern the post-checkout hook uses for heads.json — concurrent + agents writing to the same feature's memory queue safely. The first + write into the memory directory drops a ``.gitignore`` so the + per-feature memory files don't accidentally get committed. + """ + path.parent.mkdir(parents=True, exist_ok=True) + _ensure_memory_gitignore(path.parent) + with open(path, "a", encoding="utf-8") as f: + try: + fcntl.flock(f.fileno(), fcntl.LOCK_EX) + yield f + finally: + fcntl.flock(f.fileno(), fcntl.LOCK_UN) + + +def _ensure_memory_gitignore(memory_dir: Path) -> None: + """Drop a ``.gitignore`` that ignores everything under .canopy/memory/. + + Memory files are local working state — useful to the agent on this + machine, not something to commit to the workspace's repos. The + .gitignore itself stays tracked so the policy is visible in the diff. + """ + gi = memory_dir / ".gitignore" + if gi.exists(): + return + gi.write_text("# Auto-written by canopy historian (M4).\n*\n!.gitignore\n") + + +def _atomic_write(path: Path, text: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp = tempfile.mkstemp( + prefix=f".{path.name}.", suffix=".tmp", dir=str(path.parent), + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + f.write(text) + os.replace(tmp, path) + except Exception: + try: + os.unlink(tmp) + except FileNotFoundError: + pass + raise + + +# ── Append + load primitives ──────────────────────────────────────────── + + +def _append_entry(workspace_root: Path, feature: str, entry: dict[str, Any]) -> None: + entry.setdefault("at", _now_iso()) + line = json.dumps(entry, sort_keys=True, ensure_ascii=False) + with _locked_append(store_path(workspace_root, feature)) as f: + f.write(line + "\n") + # Re-render the markdown view so external readers see fresh state. + _refresh_render(workspace_root, feature) + + +def _load_entries(workspace_root: Path, feature: str) -> list[dict[str, Any]]: + path = store_path(workspace_root, feature) + if not path.exists(): + return [] + out: list[dict[str, Any]] = [] + with open(path, "r", encoding="utf-8") as f: + for line in f: + line = line.rstrip("\n") + if not line: + continue + try: + out.append(json.loads(line)) + except json.JSONDecodeError: + continue + return out + + +# ── Public record API ─────────────────────────────────────────────────── + + +def record_decision( + workspace_root: Path, feature: str, *, + title: str, rationale: str = "", at: str | None = None, +) -> dict[str, Any]: + """Capture a decision the agent made (e.g. choosing one library over another). + + Decisions are deduplicated by ``title`` within the most-recent session + so the hybrid capture mechanism (explicit tool call + Stop-hook + tail-parse) doesn't double-log. + """ + entry = { + "kind": "decision", "title": title, "rationale": rationale, + "at": at or _now_iso(), "session": _current_session_id(), + } + if _decision_already_logged(workspace_root, feature, title, entry["session"]): + return {"action": "deduped", "title": title} + _append_entry(workspace_root, feature, entry) + return {"action": "recorded", "title": title} + + +def record_event( + workspace_root: Path, feature: str, *, + summary: str, kind: str = "event", at: str | None = None, +) -> dict[str, Any]: + """One-line summary of a tool invocation (Edit, Bash, preflight, etc.). + + The ``kind`` field lets later renderers group events by type + (e.g. "edited" vs "ran" vs "preflight"). Defaults to ``event``. + """ + entry = { + "kind": kind, "summary": summary, + "at": at or _now_iso(), "session": _current_session_id(), + } + _append_entry(workspace_root, feature, entry) + return {"action": "recorded", "summary": summary} + + +def record_pause( + workspace_root: Path, feature: str, *, + reason: str, at: str | None = None, +) -> dict[str, Any]: + """Capture why the agent stopped — what's blocked, what's needed next.""" + entry = { + "kind": "pause", "reason": reason, + "at": at or _now_iso(), "session": _current_session_id(), + } + _append_entry(workspace_root, feature, entry) + return {"action": "recorded"} + + +def record_comment_read( + workspace_root: Path, feature: str, *, + comment_id: str | int, author: str, path: str, line: int = 0, + body_excerpt: str = "", url: str = "", at: str | None = None, +) -> dict[str, Any]: + """Log that the agent read a specific comment. Deduped per-session by id.""" + cid = str(comment_id) + if _comment_read_already_logged(workspace_root, feature, cid, _current_session_id()): + return {"action": "deduped", "comment_id": cid} + entry = { + "kind": "comment_read", "comment_id": cid, "author": author, + "path": path, "line": line, "body_excerpt": body_excerpt, "url": url, + "at": at or _now_iso(), "session": _current_session_id(), + } + _append_entry(workspace_root, feature, entry) + return {"action": "recorded", "comment_id": cid} + + +def record_comment_resolved( + workspace_root: Path, feature: str, *, + comment_id: str | int, author: str = "", path: str = "", line: int = 0, + commit_sha: str, gist: str = "", url: str = "", at: str | None = None, +) -> dict[str, Any]: + """Log that a comment was addressed by a specific commit.""" + entry = { + "kind": "comment_resolved", "comment_id": str(comment_id), + "author": author, "path": path, "line": line, + "commit_sha": commit_sha, "gist": gist, "url": url, + "at": at or _now_iso(), "session": _current_session_id(), + } + _append_entry(workspace_root, feature, entry) + return {"action": "recorded", "comment_id": str(comment_id)} + + +def record_comment_deferred( + workspace_root: Path, feature: str, *, + comment_id: str | int, reason: str, author: str = "", path: str = "", + line: int = 0, url: str = "", at: str | None = None, +) -> dict[str, Any]: + """Log a comment the user / agent intentionally deferred.""" + entry = { + "kind": "comment_deferred", "comment_id": str(comment_id), + "reason": reason, "author": author, "path": path, "line": line, + "url": url, "at": at or _now_iso(), "session": _current_session_id(), + } + _append_entry(workspace_root, feature, entry) + return {"action": "recorded", "comment_id": str(comment_id)} + + +def record_classifier_resolved( + workspace_root: Path, feature: str, *, + threads: list[dict], at: str | None = None, +) -> dict[str, Any]: + """Log the temporal classifier's likely-resolved set (one batch per session).""" + if not threads: + return {"action": "noop"} + if _classifier_already_logged(workspace_root, feature, _current_session_id()): + return {"action": "deduped"} + entry = { + "kind": "classifier_resolved", "threads": threads, + "at": at or _now_iso(), "session": _current_session_id(), + } + _append_entry(workspace_root, feature, entry) + return {"action": "recorded", "count": len(threads)} + + +def record_pr_context( + workspace_root: Path, feature: str, *, + pr_number: int, repo: str, title: str, base: str = "main", + rationale: str = "", url: str = "", at: str | None = None, +) -> dict[str, Any]: + """Log when a PR is opened for the feature.""" + entry = { + "kind": "pr_context", "pr_number": pr_number, "repo": repo, + "title": title, "base": base, "rationale": rationale, "url": url, + "at": at or _now_iso(), "session": _current_session_id(), + } + _append_entry(workspace_root, feature, entry) + return {"action": "recorded", "pr_number": pr_number} + + +def record_pr_update( + workspace_root: Path, feature: str, *, + pr_number: int, repo: str, summary: str, at: str | None = None, +) -> dict[str, Any]: + """Log an update pushed to an existing PR.""" + entry = { + "kind": "pr_update", "pr_number": pr_number, "repo": repo, + "summary": summary, + "at": at or _now_iso(), "session": _current_session_id(), + } + _append_entry(workspace_root, feature, entry) + return {"action": "recorded", "pr_number": pr_number} + + +# ── Read API ──────────────────────────────────────────────────────────── + + +def read(workspace_root: Path, feature: str) -> list[dict[str, Any]]: + """Return the raw entries (oldest → newest).""" + return _load_entries(workspace_root, feature) + + +def format_for_agent(workspace_root: Path, feature: str) -> str: + """Render the memory as markdown for inclusion in switch responses. + + Returns an empty string when no memory exists yet (so callers can + cheaply check truthiness before embedding). + """ + entries = _load_entries(workspace_root, feature) + if not entries: + return "" + return _render(feature, entries) + + +# ── Compaction ────────────────────────────────────────────────────────── + + +def compact( + workspace_root: Path, feature: str, *, keep_sessions: int = 5, +) -> dict[str, Any]: + """Trim the Sessions section to the most-recent ``keep_sessions``. + + v1 deliberately avoids an LLM call — it just drops session entries + older than the cutoff. The Resolutions log + PR context entries are + always preserved, regardless of session age. The plan reserves a + future LLM-based summarization pass; until then this keeps the file + bounded without losing structured state. + """ + entries = _load_entries(workspace_root, feature) + if not entries: + return {"action": "noop", "reason": "no memory file"} + + sessions_seen: list[str] = [] + for e in reversed(entries): + s = e.get("session") + if s and s not in sessions_seen: + sessions_seen.append(s) + if len(sessions_seen) > keep_sessions: + break + + if len(sessions_seen) <= keep_sessions: + return {"action": "noop", "reason": "already within keep_sessions"} + + keep_ids = set(sessions_seen[:keep_sessions]) + structural_kinds = { + "comment_resolved", "comment_deferred", "classifier_resolved", + "pr_context", "pr_update", + } + kept = [ + e for e in entries + if e.get("kind") in structural_kinds + or e.get("session") in keep_ids + or e.get("session") is None # legacy entries without session + ] + dropped = len(entries) - len(kept) + + # Rewrite the JSONL store atomically. + text = "\n".join( + json.dumps(e, sort_keys=True, ensure_ascii=False) for e in kept + ) + if text: + text += "\n" + _atomic_write(store_path(workspace_root, feature), text) + _refresh_render(workspace_root, feature) + return {"action": "compacted", "kept": len(kept), "dropped": dropped} + + +# ── Internals ─────────────────────────────────────────────────────────── + + +def _current_session_id() -> str: + """Stable per-process id so dedup-per-session works. + + Defaults to ``CANOPY_SESSION_ID`` when set (autopilot / external + runners can pass a stable id across tool calls). Falls back to the + UTC date so manual CLI / test runs still cluster sensibly. + """ + explicit = os.environ.get("CANOPY_SESSION_ID") + if explicit: + return explicit + return datetime.now(timezone.utc).strftime("%Y-%m-%d") + + +def _decision_already_logged( + workspace_root: Path, feature: str, title: str, session: str, +) -> bool: + for e in reversed(_load_entries(workspace_root, feature)): + if e.get("session") != session: + return False + if e.get("kind") == "decision" and e.get("title") == title: + return True + return False + + +def _comment_read_already_logged( + workspace_root: Path, feature: str, comment_id: str, session: str, +) -> bool: + for e in reversed(_load_entries(workspace_root, feature)): + if e.get("session") != session: + return False + if e.get("kind") == "comment_read" and e.get("comment_id") == comment_id: + return True + return False + + +def _classifier_already_logged( + workspace_root: Path, feature: str, session: str, +) -> bool: + for e in reversed(_load_entries(workspace_root, feature)): + if e.get("session") != session: + return False + if e.get("kind") == "classifier_resolved": + return True + return False + + +def _refresh_render(workspace_root: Path, feature: str) -> None: + entries = _load_entries(workspace_root, feature) + text = _render(feature, entries) if entries else "" + _atomic_write(render_path(workspace_root, feature), text) + + +# ── Markdown rendering ────────────────────────────────────────────────── + + +def _render(feature: str, entries: list[dict[str, Any]]) -> str: + resolutions = _render_resolutions(entries) + pr_context = _render_pr_context(entries) + sessions = _render_sessions(entries) + parts = [f"# Feature: {feature}\n"] + parts.append("## Resolutions log\n\n" + (resolutions or "_(no comment activity yet)_\n")) + parts.append("## PR context\n\n" + (pr_context or "_(no PRs opened yet)_\n")) + parts.append("## Sessions (newest first)\n\n" + (sessions or "_(no sessions logged yet)_\n")) + return "\n".join(parts) + + +def _render_resolutions(entries: list[dict[str, Any]]) -> str: + """Per-comment outcomes — never compacted.""" + items: list[str] = [] + for e in entries: + kind = e.get("kind") + if kind == "comment_resolved": + sha = (e.get("commit_sha") or "")[:8] + cid = e.get("comment_id", "?") + author = e.get("author", "?") + file_loc = _file_loc(e) + gist = e.get("gist", "") + items.append(_resolution_line("✓", cid, author, file_loc, f"resolved by {sha}", gist)) + elif kind == "classifier_resolved": + for t in e.get("threads", []): + cid = t.get("id", t.get("comment_id", "?")) + author = t.get("author", "?") + file_loc = _thread_file_loc(t) + reason = t.get("reason", "file modified since") + items.append(_resolution_line("⊙", cid, author, file_loc, "likely-resolved by classifier", reason)) + elif kind == "comment_deferred": + cid = e.get("comment_id", "?") + author = e.get("author", "?") + file_loc = _file_loc(e) + items.append(_resolution_line("⊘", cid, author, file_loc, "DEFERRED", e.get("reason", ""))) + if not items: + return "" + # Newest first. + return "\n".join(reversed(items)) + "\n" + + +def _render_pr_context(entries: list[dict[str, Any]]) -> str: + """One block per PR + ordered updates.""" + by_pr: dict[tuple[str, int], dict[str, Any]] = {} + for e in entries: + if e.get("kind") == "pr_context": + key = (e.get("repo", ""), e.get("pr_number", 0)) + by_pr.setdefault(key, {"context": None, "updates": []}) + by_pr[key]["context"] = e + elif e.get("kind") == "pr_update": + key = (e.get("repo", ""), e.get("pr_number", 0)) + by_pr.setdefault(key, {"context": None, "updates": []}) + by_pr[key]["updates"].append(e) + if not by_pr: + return "" + + blocks: list[str] = [] + for (repo, pr_num), data in sorted(by_pr.items(), key=lambda kv: -kv[0][1]): + ctx = data["context"] or {} + title = ctx.get("title", "(no title recorded)") + opened = ctx.get("at", "")[:10] + base = ctx.get("base", "main") + url = ctx.get("url", "") + rationale = ctx.get("rationale", "") + header = f"### PR #{pr_num} — {repo} — {title}\n" + body_lines = [f"**Opened:** {opened} against `{base}`"] + if url: + body_lines.append(f"**URL:** {url}") + if rationale: + body_lines.append(f"**Rationale:** {rationale}") + if data["updates"]: + body_lines.append("") + body_lines.append("**Updates:**") + # Newest update first. + for u in reversed(data["updates"]): + body_lines.append(f"- {u.get('at', '')[:10]}: {u.get('summary', '')}") + blocks.append(header + "\n".join(body_lines) + "\n") + return "\n".join(blocks) + + +def _render_sessions(entries: list[dict[str, Any]]) -> str: + """Group by session id, newest session first, with a per-entry digest.""" + sessions: dict[str, list[dict[str, Any]]] = {} + order: list[str] = [] + for e in entries: + sid = e.get("session") or "_unsessioned" + if sid not in sessions: + sessions[sid] = [] + order.append(sid) + sessions[sid].append(e) + if not sessions: + return "" + + blocks: list[str] = [] + for sid in reversed(order): + block = [f"### {sid}"] + for e in sessions[sid]: + block.append(_session_line(e)) + blocks.append("\n".join(block) + "\n") + return "\n".join(blocks) + + +# ── Tiny render helpers ───────────────────────────────────────────────── + + +def _resolution_line( + glyph: str, cid: Any, author: str, file_loc: str, status: str, gist: str, +) -> str: + head = f"- {glyph} comment {cid} ({author}{file_loc}) {status}" + if gist: + return head + f"\n {gist}" + return head + + +def _file_loc(entry: dict[str, Any]) -> str: + path = entry.get("path", "") + line = entry.get("line", 0) + if not path: + return "" + if line: + return f", {path}:{line}" + return f", {path}" + + +def _thread_file_loc(thread: dict[str, Any]) -> str: + return _file_loc(thread) + + +def _session_line(entry: dict[str, Any]) -> str: + kind = entry.get("kind", "") + when = entry.get("at", "")[11:19] # HH:MM:SS slice of ISO + if kind == "decision": + title = entry.get("title", "") + rationale = entry.get("rationale", "") + if rationale: + return f"- [{when}] **decision:** {title} — {rationale}" + return f"- [{when}] **decision:** {title}" + if kind == "pause": + return f"- [{when}] **pause:** {entry.get('reason', '')}" + if kind == "comment_read": + cid = entry.get("comment_id", "?") + author = entry.get("author", "?") + path = entry.get("path", "") + line = entry.get("line", 0) + loc = f" {path}:{line}" if path else "" + excerpt = entry.get("body_excerpt", "") + suffix = f" — {excerpt}" if excerpt else "" + return f"- [{when}] read comment {cid} ({author}{loc}){suffix}" + if kind == "comment_resolved": + cid = entry.get("comment_id", "?") + sha = (entry.get("commit_sha") or "")[:8] + return f"- [{when}] resolved comment {cid} → {sha}" + if kind == "comment_deferred": + cid = entry.get("comment_id", "?") + return f"- [{when}] deferred comment {cid}: {entry.get('reason', '')}" + if kind == "classifier_resolved": + n = len(entry.get("threads", [])) + return f"- [{when}] classifier marked {n} thread(s) likely-resolved" + if kind == "pr_context": + return f"- [{when}] opened PR #{entry.get('pr_number', '?')} ({entry.get('repo', '')})" + if kind == "pr_update": + return f"- [{when}] PR #{entry.get('pr_number', '?')}: {entry.get('summary', '')}" + if kind == "event": + return f"- [{when}] {entry.get('summary', '')}" + return f"- [{when}] {kind}: {entry.get('summary', entry.get('title', ''))}" diff --git a/src/canopy/actions/reads.py b/src/canopy/actions/reads.py index 323b2b6..943e82f 100644 --- a/src/canopy/actions/reads.py +++ b/src/canopy/actions/reads.py @@ -161,6 +161,11 @@ def github_get_pr_comments(workspace: Workspace, alias: str) -> dict: ``actionable_threads`` / ``likely_resolved_threads`` / ``resolved_thread_count`` / ``latest_commit_at``), but accepts the full alias surface — feature alias, ``#``, or PR URL. + + M4 hook: when ``alias`` resolves to a tracked feature, each comment + seen here is logged into the feature's historian memory (deduped + per-session by id), and the temporal classifier's ``likely_resolved`` + set is logged once per session. """ from .review_filter import classify_threads @@ -194,6 +199,9 @@ def github_get_pr_comments(workspace: Workspace, alias: str) -> dict: **classification, } + # M4: mirror into historian when this alias maps to a tracked feature. + _historian_record_comments_read(workspace, alias, repos) + return { "alias": alias, "actionable_count": actionable_total, @@ -201,3 +209,49 @@ def github_get_pr_comments(workspace: Workspace, alias: str) -> dict: "resolved_thread_count": resolved_total, "repos": repos, } + + +def _historian_record_comments_read( + workspace: Workspace, alias: str, repos: dict[str, dict], +) -> None: + """Best-effort historian capture for `review_comments` reads (M4). + + Fails silently — the canonical comment data is the GitHub response; + historian is only a narrative layer. We only write when the alias + resolves cleanly to a feature in features.json. + """ + try: + from .aliases import resolve_feature + from . import historian + + feature_name = resolve_feature(workspace, alias) + except Exception: + return + + for repo_data in repos.values(): + for thread in repo_data.get("actionable_threads", []) or []: + cid = thread.get("id") + if cid is None: + continue + try: + historian.record_comment_read( + workspace.config.root, feature_name, + comment_id=cid, + author=thread.get("author", ""), + path=thread.get("path", ""), + line=thread.get("line", 0), + body_excerpt=(thread.get("body") or "").splitlines()[0][:120] + if thread.get("body") else "", + url=thread.get("url", ""), + ) + except Exception: + continue + # Classifier-resolved batch (one entry per session per call). + likely = repo_data.get("likely_resolved_threads", []) or [] + if likely: + try: + historian.record_classifier_resolved( + workspace.config.root, feature_name, threads=likely, + ) + except Exception: + pass diff --git a/src/canopy/actions/switch.py b/src/canopy/actions/switch.py index d8433e5..f9590bd 100644 --- a/src/canopy/actions/switch.py +++ b/src/canopy/actions/switch.py @@ -137,6 +137,15 @@ def switch( workspace, feature_name, new_canonical_paths, previously_canonical, out, release_current=release_current, per_repo_results=per_repo_results, ) + + # M4: include the new feature's persistent memory so the agent picks + # up cross-session context immediately. Empty string when no memory + # has been recorded yet — caller can ignore. + from . import historian + out["memory"] = historian.format_for_agent( + workspace.config.root, feature_name, + ) + return out diff --git a/src/canopy/agent_setup/skills/using-canopy/SKILL.md b/src/canopy/agent_setup/skills/using-canopy/SKILL.md index 7723c94..931f8b5 100644 --- a/src/canopy/agent_setup/skills/using-canopy/SKILL.md +++ b/src/canopy/agent_setup/skills/using-canopy/SKILL.md @@ -92,6 +92,27 @@ The `mcp__canopy__version` tool returns `{cli_version, mcp_version, schema_versi If the user wants canopy to behave differently here — *"use ruff for preflight"*, *"track CodeRabbit and Korbit as bots"*, *"the api repo runs `uv run pytest tests/fast` before commits"* — that's a **canopy.toml augment**. Suggest invoking the `augment-canopy` skill, which knows the schema and how to mutate the file safely. Install it with `canopy setup-agent --skill augment-canopy` if it isn't already. +## Cross-session memory (Historian) + +Each feature has a persistent memory file at `/.canopy/memory/.md` that survives session boundaries. When you call `mcp__canopy__switch(feature)`, the response includes a `memory: ` field — read it first before re-deriving anything. + +Three sections in the memory: **Resolutions log** (per-comment outcomes — never compacted), **PR context** (one block per PR), and **Sessions** (newest first; older sessions get trimmed by `historian_compact`). + +What to call when: + +- `mcp__canopy__historian_decide(feature, decisions=[{title, rationale}, ...])` — after picking an approach, after a pivot, before pausing. Decisions are deduped per-session by title, so it's safe to call repeatedly. +- `mcp__canopy__historian_pause(feature, reason)` — when stopping work mid-flow. The next session reads it on switch. +- `mcp__canopy__historian_defer_comment(feature, comment_id, reason)` — when intentionally skipping a review comment for a stated reason. +- `mcp__canopy__feature_memory(feature)` — re-read the memory at any point in the same session. +- `mcp__canopy__historian_compact(feature, keep_sessions=5)` — manual trim when the file grows long. Resolutions + PR context are never compacted. + +Auto-capture from canopy actions (no extra calls needed): + +- `mcp__canopy__commit(address=...)` records the resolution into memory automatically (mirrors `bot_resolutions.json`). +- `mcp__canopy__github_get_pr_comments(alias)` records `comment_read` for each actionable thread + `classifier_resolved` for the temporal-classifier output, deduped per-session. + +If you decided something but forgot to call `historian_decide`, end the turn with a `[{"title": "...", "rationale": "..."}, ...]` block. A future Stop hook (autopilot) will tail-parse it and persist (deduped against the explicit calls). + ## Bot review comments When `mcp__canopy__feature_state` returns state `awaiting_bot_resolution`, only bot nits (CodeRabbit, Korbit, Cubic, etc.) are blocking — humans haven't requested changes. The `summary` splits the actionable count into `actionable_bot_count` and `actionable_human_count` so you can tell which side needs attention. diff --git a/src/canopy/cli/main.py b/src/canopy/cli/main.py index ab543c2..47dc7f2 100644 --- a/src/canopy/cli/main.py +++ b/src/canopy/cli/main.py @@ -2250,6 +2250,65 @@ def cmd_bot_status(args: argparse.Namespace) -> None: console.print() +def _resolve_historian_feature(workspace, feature: str | None): + """Resolve (workspace_root, feature_name) for a historian CLI call.""" + from ..actions import active_feature as af + from ..actions.aliases import resolve_feature + from ..actions.errors import BlockerError + if feature: + return workspace.config.root, resolve_feature(workspace, feature) + active = af.read_active(workspace) + if active is None: + raise BlockerError( + code="no_canonical_feature", + what="no active feature; pass or run `canopy switch ` first", + ) + return workspace.config.root, active.feature + + +def cmd_historian(args: argparse.Namespace) -> None: + """Read or compact a feature's historian memory file (M4).""" + from ..actions import historian + from ..actions.errors import ActionError + from .render import render_blocker + from .ui import console + + workspace = _load_workspace() + try: + root, name = _resolve_historian_feature(workspace, args.feature) + except ActionError as err: + if args.json: + _print_json(err.to_dict()) + else: + render_blocker(err, action=f"historian {args.subcommand}") + sys.exit(1) + + if args.subcommand == "show": + memory = historian.format_for_agent(root, name) + if args.json: + _print_json({"feature": name, "memory": memory}) + return + if not memory: + console.print() + console.print(f" [muted]no memory recorded yet for [feature]{name}[/][/]") + console.print() + return + console.print(memory) + return + + if args.subcommand == "compact": + result = historian.compact(root, name, keep_sessions=args.keep_sessions) + if args.json: + _print_json({"feature": name, **result}) + return + console.print() + console.print(f" [feature]{name}[/] {result.get('action')}: " + f"kept {result.get('kept', '?')} entries, " + f"dropped {result.get('dropped', 0)}") + console.print() + return + + def cmd_push(args: argparse.Namespace) -> None: """Feature-scoped multi-repo push (Wave 2.3).""" from ..actions.errors import ActionError @@ -2807,6 +2866,31 @@ def main() -> None: help="Only list unresolved threads") bot_status_p.add_argument("--json", action="store_true", help="Output as JSON") + # historian — cross-session feature memory (M4) + historian_p = subparsers.add_parser( + "historian", + help="Read or compact a feature's persistent memory file (M4)", + ) + historian_sub = historian_p.add_subparsers(dest="subcommand", required=True) + + historian_show = historian_sub.add_parser( + "show", help="Print the rendered memory file for the feature", + ) + historian_show.add_argument("feature", nargs="?", default=None, + help="Feature alias; defaults to canonical feature") + historian_show.add_argument("--json", action="store_true", help="Output as JSON") + + historian_compact = historian_sub.add_parser( + "compact", + help="Trim the Sessions section to the most recent N entries", + ) + historian_compact.add_argument("feature", nargs="?", default=None, + help="Feature alias; defaults to canonical feature") + historian_compact.add_argument("--keep-sessions", type=int, default=5, + dest="keep_sessions", + help="Number of most-recent sessions to keep (default 5)") + historian_compact.add_argument("--json", action="store_true", help="Output as JSON") + # push (feature-scoped multi-repo push — Wave 2.3) push_p = subparsers.add_parser( "push", @@ -3002,6 +3086,7 @@ def main() -> None: "switch": cmd_switch, "commit": cmd_commit, "bot-status": cmd_bot_status, + "historian": cmd_historian, "push": cmd_push, "triage": cmd_triage, "state": cmd_state, diff --git a/src/canopy/mcp/server.py b/src/canopy/mcp/server.py index 550290a..a5572b3 100644 --- a/src/canopy/mcp/server.py +++ b/src/canopy/mcp/server.py @@ -312,6 +312,117 @@ def bot_comments_status(feature: str | None = None) -> dict: return e.to_dict() +# ── Historian (M4) ────────────────────────────────────────────────────── + + +def _historian_feature(feature: str | None) -> tuple: + """Resolve (workspace_root, feature_name) for a historian call. + + Falls back to the canonical feature when ``feature`` is omitted. + """ + from ..actions import active_feature as af + from ..actions.aliases import resolve_feature + from ..actions.errors import BlockerError + ws = _get_workspace() + if feature: + return ws.config.root, resolve_feature(ws, feature) + active = af.read_active(ws) + if active is None: + raise BlockerError( + code="no_canonical_feature", + what="no active feature; pass `feature` or run `canopy switch ` first", + ) + return ws.config.root, active.feature + + +@mcp.tool() +def historian_decide(feature: str | None = None, + decisions: list[dict] | None = None) -> dict: + """Record one or more agent decisions in the feature's memory file (M4). + + ``decisions`` is a list of ``{"title": str, "rationale": str}`` dicts. + Decisions are deduped per-session by title — calling the tool twice + with the same title within a session is a no-op (the hybrid Stop-hook + backup mechanism relies on this). + """ + from ..actions import historian + from ..actions.errors import ActionError + try: + root, name = _historian_feature(feature) + except ActionError as e: + return e.to_dict() + out = [] + for d in (decisions or []): + out.append(historian.record_decision( + root, name, title=d.get("title", ""), rationale=d.get("rationale", ""), + )) + return {"feature": name, "results": out} + + +@mcp.tool() +def historian_pause(feature: str | None = None, reason: str = "") -> dict: + """Record a pause / blocker for the feature (M4).""" + from ..actions import historian + from ..actions.errors import ActionError + try: + root, name = _historian_feature(feature) + except ActionError as e: + return e.to_dict() + return {"feature": name, **historian.record_pause(root, name, reason=reason)} + + +@mcp.tool() +def historian_defer_comment(feature: str | None = None, + comment_id: str = "", reason: str = "") -> dict: + """Mark a review comment as intentionally deferred (M4).""" + from ..actions import historian + from ..actions.errors import ActionError + try: + root, name = _historian_feature(feature) + except ActionError as e: + return e.to_dict() + return {"feature": name, **historian.record_comment_deferred( + root, name, comment_id=comment_id, reason=reason, + )} + + +@mcp.tool() +def feature_memory(feature: str | None = None) -> dict: + """Read the rendered feature memory as markdown (M4). + + Returns ``{feature, memory: }`` — empty string when + no memory has been recorded yet. + """ + from ..actions import historian + from ..actions.errors import ActionError + try: + root, name = _historian_feature(feature) + except ActionError as e: + return e.to_dict() + return {"feature": name, "memory": historian.format_for_agent(root, name)} + + +@mcp.tool() +def historian_compact(feature: str | None = None, + keep_sessions: int = 5) -> dict: + """Trim the Sessions section to the most-recent ``keep_sessions`` (M4). + + v1 is mechanical — it drops session entries beyond the cutoff while + preserving the Resolutions log + PR context entries. A future LLM + pass can replace this with summarized recaps; the storage shape is + forward-compatible. + """ + from ..actions import historian + from ..actions.errors import ActionError + try: + root, name = _historian_feature(feature) + except ActionError as e: + return e.to_dict() + return {"feature": name, **historian.compact( + root, name, keep_sessions=keep_sessions, + )} + + @mcp.tool() def push(feature: str | None = None, repos: list[str] | None = None, set_upstream: bool = False, force_with_lease: bool = False, diff --git a/tests/test_historian.py b/tests/test_historian.py new file mode 100644 index 0000000..c65b79e --- /dev/null +++ b/tests/test_historian.py @@ -0,0 +1,240 @@ +"""Tests for canopy.actions.historian — cross-session feature memory (M4).""" +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from canopy.actions import historian +from canopy.actions.historian import ( + compact, format_for_agent, read, record_classifier_resolved, + record_comment_deferred, record_comment_read, record_comment_resolved, + record_decision, record_event, record_pause, record_pr_context, + record_pr_update, render_path, store_path, +) + + +@pytest.fixture(autouse=True) +def fixed_session(monkeypatch): + """Pin CANOPY_SESSION_ID so dedup-per-session is predictable in tests.""" + monkeypatch.setenv("CANOPY_SESSION_ID", "session-A") + + +# ── Storage paths ──────────────────────────────────────────────────────── + + +def test_paths_under_canopy_memory(tmp_path): + assert store_path(tmp_path, "f").parent == tmp_path / ".canopy" / "memory" + assert store_path(tmp_path, "f").name == "f.jsonl" + assert render_path(tmp_path, "f").name == "f.md" + + +# ── Append + read round-trip ──────────────────────────────────────────── + + +def test_record_decision_persists(tmp_path): + record_decision(tmp_path, "feat-1", title="use jwt.decode", rationale="stdlib only") + entries = read(tmp_path, "feat-1") + assert len(entries) == 1 + assert entries[0]["kind"] == "decision" + assert entries[0]["title"] == "use jwt.decode" + assert entries[0]["rationale"] == "stdlib only" + assert entries[0]["session"] == "session-A" + assert entries[0]["at"] # timestamp populated + + +def test_record_event_persists(tmp_path): + record_event(tmp_path, "feat-1", summary="ran preflight (passed)") + entries = read(tmp_path, "feat-1") + assert entries[0]["kind"] == "event" + assert entries[0]["summary"] == "ran preflight (passed)" + + +def test_record_pause_persists(tmp_path): + record_pause(tmp_path, "feat-1", reason="blocked on design copy") + entries = read(tmp_path, "feat-1") + assert entries[0]["kind"] == "pause" + assert entries[0]["reason"] == "blocked on design copy" + + +def test_read_returns_empty_when_no_memory(tmp_path): + assert read(tmp_path, "ghost") == [] + + +# ── Decision dedup ─────────────────────────────────────────────────────── + + +def test_decision_deduped_within_session(tmp_path): + out1 = record_decision(tmp_path, "feat-1", title="same title") + out2 = record_decision(tmp_path, "feat-1", title="same title") + assert out1["action"] == "recorded" + assert out2["action"] == "deduped" + assert len(read(tmp_path, "feat-1")) == 1 + + +def test_decision_not_deduped_across_sessions(tmp_path, monkeypatch): + record_decision(tmp_path, "feat-1", title="cross-session") + monkeypatch.setenv("CANOPY_SESSION_ID", "session-B") + record_decision(tmp_path, "feat-1", title="cross-session") + assert len(read(tmp_path, "feat-1")) == 2 + + +# ── Comment read dedup ────────────────────────────────────────────────── + + +def test_comment_read_deduped_per_session(tmp_path): + record_comment_read(tmp_path, "feat-1", comment_id=42, author="bot", + path="x.py", line=1) + record_comment_read(tmp_path, "feat-1", comment_id=42, author="bot", + path="x.py", line=1) + assert len(read(tmp_path, "feat-1")) == 1 + + +def test_comment_read_int_or_str_id(tmp_path): + record_comment_read(tmp_path, "feat-1", comment_id=42, author="bot", + path="", line=0) + record_comment_read(tmp_path, "feat-1", comment_id="42", author="bot", + path="", line=0) + assert len(read(tmp_path, "feat-1")) == 1 + + +# ── Classifier dedup ──────────────────────────────────────────────────── + + +def test_classifier_resolved_logs_once_per_session(tmp_path): + threads = [{"id": 1, "author": "bot", "path": "a.py"}] + record_classifier_resolved(tmp_path, "feat-1", threads=threads) + record_classifier_resolved(tmp_path, "feat-1", threads=threads) + assert len(read(tmp_path, "feat-1")) == 1 + + +def test_classifier_resolved_noop_when_empty(tmp_path): + record_classifier_resolved(tmp_path, "feat-1", threads=[]) + assert read(tmp_path, "feat-1") == [] + + +# ── Comment resolved + deferred ───────────────────────────────────────── + + +def test_record_comment_resolved(tmp_path): + record_comment_resolved(tmp_path, "feat-1", comment_id=99, commit_sha="abc12345", + gist="renamed foo to bar", author="bot", + path="src/x.py", line=42) + e = read(tmp_path, "feat-1")[0] + assert e["kind"] == "comment_resolved" + assert e["comment_id"] == "99" + assert e["commit_sha"] == "abc12345" + assert e["gist"] == "renamed foo to bar" + + +def test_record_comment_deferred(tmp_path): + record_comment_deferred(tmp_path, "feat-1", comment_id=50, + reason="design discussion needed") + e = read(tmp_path, "feat-1")[0] + assert e["kind"] == "comment_deferred" + assert e["reason"] == "design discussion needed" + + +# ── PR context + updates ──────────────────────────────────────────────── + + +def test_record_pr_context(tmp_path): + record_pr_context(tmp_path, "feat-1", pr_number=142, repo="api", + title="cache stats", base="main", + rationale="closes 3 actionable threads", url="https://gh/p/142") + e = read(tmp_path, "feat-1")[0] + assert e["kind"] == "pr_context" + assert e["pr_number"] == 142 + assert e["title"] == "cache stats" + assert e["url"] == "https://gh/p/142" + + +def test_record_pr_update(tmp_path): + record_pr_update(tmp_path, "feat-1", pr_number=142, repo="api", + summary="addressed bot 789") + e = read(tmp_path, "feat-1")[0] + assert e["kind"] == "pr_update" + assert e["summary"] == "addressed bot 789" + + +# ── format_for_agent ───────────────────────────────────────────────────── + + +def test_format_for_agent_empty(tmp_path): + assert format_for_agent(tmp_path, "ghost") == "" + + +def test_format_for_agent_renders_three_sections(tmp_path): + record_decision(tmp_path, "feat-1", title="picked stdlib jwt") + record_comment_resolved(tmp_path, "feat-1", comment_id=1, commit_sha="abc12345", + gist="renamed", author="bot", path="x.py", line=1) + record_pr_context(tmp_path, "feat-1", pr_number=10, repo="api", title="t") + + md = format_for_agent(tmp_path, "feat-1") + assert "# Feature: feat-1" in md + assert "## Resolutions log" in md + assert "## PR context" in md + assert "## Sessions (newest first)" in md + # Resolution rendered with check glyph + sha shorthand + assert "✓ comment 1" in md + assert "abc12345" in md + # PR block rendered + assert "PR #10 — api" in md + # Decision rendered in Sessions + assert "decision:" in md + assert "picked stdlib jwt" in md + + +def test_format_for_agent_handles_missing_sections(tmp_path): + """Sections with no content show a placeholder rather than disappear.""" + record_decision(tmp_path, "feat-1", title="solo decision") + md = format_for_agent(tmp_path, "feat-1") + assert "_(no comment activity yet)_" in md + assert "_(no PRs opened yet)_" in md + + +def test_render_file_written_alongside_store(tmp_path): + record_decision(tmp_path, "feat-1", title="t") + assert store_path(tmp_path, "feat-1").exists() + assert render_path(tmp_path, "feat-1").exists() + md = render_path(tmp_path, "feat-1").read_text() + assert "# Feature: feat-1" in md + + +# ── Compaction ────────────────────────────────────────────────────────── + + +def test_compact_noop_when_within_limit(tmp_path, monkeypatch): + for i in range(3): + monkeypatch.setenv("CANOPY_SESSION_ID", f"s-{i}") + record_event(tmp_path, "feat-1", summary=f"event {i}") + out = compact(tmp_path, "feat-1", keep_sessions=5) + assert out["action"] == "noop" + + +def test_compact_drops_old_sessions_keeps_structural(tmp_path, monkeypatch): + # 7 sessions, each with one decision + one comment_resolved (structural). + for i in range(7): + monkeypatch.setenv("CANOPY_SESSION_ID", f"s-{i}") + record_decision(tmp_path, "feat-1", title=f"decision-{i}") + record_comment_resolved(tmp_path, "feat-1", comment_id=i, + commit_sha=f"sha{i}", gist=f"g-{i}") + pre_total = len(read(tmp_path, "feat-1")) + assert pre_total == 14 + + out = compact(tmp_path, "feat-1", keep_sessions=3) + assert out["action"] == "compacted" + assert out["kept"] < pre_total + entries = read(tmp_path, "feat-1") + # All 7 comment_resolved entries preserved (structural). + assert sum(1 for e in entries if e["kind"] == "comment_resolved") == 7 + # Only the last 3 sessions' decisions remain. + decisions = [e for e in entries if e["kind"] == "decision"] + assert len(decisions) == 3 + titles = {d["title"] for d in decisions} + assert titles == {"decision-4", "decision-5", "decision-6"} + + +def test_compact_noop_when_no_memory(tmp_path): + assert compact(tmp_path, "ghost")["action"] == "noop"