From b04bfddcf5dfdc9443cd2ab0e9b2151db49acb0e Mon Sep 17 00:00:00 2001 From: TensorTemplar Date: Wed, 24 Jun 2026 10:49:30 +0300 Subject: [PATCH 1/8] abstract hook protocol + multi-harness memory + freshness validator Folds prior unsigned WIP commits into a single signed commit. abstract hook protocol (issue #50): Storage and analyzers operate on a single canonical AbstractHookEvent; each harness owns its wire format via an adapter (ClaudeCodeAdapter, OpenCodeAdapter). EventType values are snake_case canonical (Migration015 translates legacy PascalCase). multi-harness memory extraction (issue #46): find_memories discovers and parses both Claude Code and OpenCode sessions. OpenCode's message//.json + part//.json tree is walked in chronological order and emitted in the same USER:/ASSISTANT:/[TOOL: ...] format as the Claude parser, so downstream LLM extraction is harness-agnostic. processed_memory_sessions.source column (Migration017) prevents cross-harness key collisions. freshness validator (4-action LLM judge): All open-ended judgment is deferred to the LLM. The dedupe threshold is data-driven per project: p75 of the existing memory bank's pairwise similarity distribution, clamped to [0.45, 0.95]. The judge picks one of keep_both / merge / supersede / dedupe. supersede links existing to new via MemoryEntry.superseded_by (Migration016) instead of flipping retained; merge rewrites the candidate; dedupe skips save. preflight: find_memories validates both the chat LLM and embedding endpoint via GET /v1/models before processing any session; aborts with click.ClickException listing all failures. No partial batch completion. --dry-run skips the check. Truncation defaults (tool_input=120, tool_output=120, tool_result=200) preserved as Pydantic-fielded defaults on TranscriptTruncationConfig so callers can override without hardcoding changes. Verified: 976 tests pass; end-to-end on /mnt/terradump/code/microagi/moelite with MicroAGI MiniMax-M3 endpoint from opencode.json (8 memories saved, freshness ran KEEP_BOTH at sim=0.59, project p75=0.57 -> derived threshold 0.57). Signed with 34A5902CFFFC0171037ABFE2F810974410C09E22 (Alex Korolev (HOT) ). --- .env.solo.example | 12 + .env.summoner.example | 19 +- README.md | 19 +- SPEC.md | 337 +++++++++++ pyproject.toml | 2 +- pyrightconfig.json | 7 + scripts/calibrate_freshness_threshold.py | 82 +++ src/slopometry/cli.py | 72 ++- src/slopometry/core/code_analyzer.py | 4 +- src/slopometry/core/database.py | 444 +++++++++++--- src/slopometry/core/hook_handler.py | 562 +++--------------- src/slopometry/core/migrations.py | 100 ++++ src/slopometry/core/models/__init__.py | 29 +- src/slopometry/core/models/baseline.py | 8 +- src/slopometry/core/models/experiment.py | 13 +- src/slopometry/core/models/hook.py | 198 +----- src/slopometry/core/models/memory.py | 50 ++ .../core/models/protocol/__init__.py | 18 + src/slopometry/core/models/protocol/events.py | 114 ++++ src/slopometry/core/models/session.py | 10 +- src/slopometry/core/models/smell.py | 4 +- src/slopometry/core/opencode_handler.py | 273 ++------- src/slopometry/core/plan_analyzer.py | 2 +- src/slopometry/core/protocol/__init__.py | 14 + .../core/protocol/adapters/__init__.py | 10 + src/slopometry/core/protocol/adapters/base.py | 53 ++ .../core/protocol/adapters/claude_code.py | 206 +++++++ .../core/protocol/adapters/opencode.py | 95 +++ src/slopometry/core/protocol/dispatch.py | 123 ++++ src/slopometry/core/protocol/session.py | 64 ++ src/slopometry/core/settings.py | 47 +- .../core/transcript_token_analyzer.py | 2 +- src/slopometry/display/formatters.py | 32 +- src/slopometry/solo/cli/commands.py | 545 +++++++++++++++-- src/slopometry/solo/cli/preflight.py | 65 ++ .../solo/services/embedding_service.py | 101 ++++ .../solo/services/memory_extractor.py | 400 +++++++++++++ .../solo/services/memory_freshness.py | 289 +++++++++ .../solo/services/memory_service.py | 125 ++++ .../solo/services/transcript_finder.py | 214 +++++++ src/slopometry/summoner/cli/commands.py | 4 +- .../summoner/services/llm_service.py | 14 +- .../summoner/services/llm_wrapper.py | 187 ++---- tests/test_baseline_service.py | 5 +- tests/test_claude_code_adapter.py | 415 +++++++++++++ tests/test_current_impact_service.py | 15 +- tests/test_database.py | 150 +++-- tests/test_embedding_service.py | 155 +++++ tests/test_git_tracker.py | 1 + tests/test_hook_handler.py | 301 ++-------- tests/test_implementation_comparator.py | 5 +- tests/test_list_sessions_performance.py | 13 +- tests/test_llm_integration.py | 106 +--- tests/test_memory_extractor.py | 191 ++++++ tests/test_memory_freshness.py | 353 +++++++++++ tests/test_memory_models.py | 98 +++ tests/test_memory_service.py | 201 +++++++ tests/test_models.py | 8 +- tests/test_notebookread_integration.py | 64 +- tests/test_opencode_adapter.py | 322 ++++++++++ tests/test_opencode_handler.py | 308 ++-------- tests/test_opencode_memory_integration.py | 249 ++++++++ tests/test_plan_analyzer.py | 2 +- tests/test_posttooluse_validation.py | 71 ++- tests/test_preflight.py | 119 ++++ tests/test_protocol_events.py | 266 +++++++++ tests/test_protocol_session.py | 136 +++++ tests/test_qpe_calculator.py | 23 +- tests/test_sessions_performance.py | 56 +- tests/test_settings.py | 2 +- tests/test_transcript_finder.py | 298 ++++++++++ tests/test_transcript_token_analyzer.py | 2 +- uv.lock | 2 +- 73 files changed, 6949 insertions(+), 1957 deletions(-) create mode 100644 SPEC.md create mode 100644 pyrightconfig.json create mode 100644 scripts/calibrate_freshness_threshold.py create mode 100644 src/slopometry/core/models/memory.py create mode 100644 src/slopometry/core/models/protocol/__init__.py create mode 100644 src/slopometry/core/models/protocol/events.py create mode 100644 src/slopometry/core/protocol/__init__.py create mode 100644 src/slopometry/core/protocol/adapters/__init__.py create mode 100644 src/slopometry/core/protocol/adapters/base.py create mode 100644 src/slopometry/core/protocol/adapters/claude_code.py create mode 100644 src/slopometry/core/protocol/adapters/opencode.py create mode 100644 src/slopometry/core/protocol/dispatch.py create mode 100644 src/slopometry/core/protocol/session.py create mode 100644 src/slopometry/solo/cli/preflight.py create mode 100644 src/slopometry/solo/services/embedding_service.py create mode 100644 src/slopometry/solo/services/memory_extractor.py create mode 100644 src/slopometry/solo/services/memory_freshness.py create mode 100644 src/slopometry/solo/services/memory_service.py create mode 100644 src/slopometry/solo/services/transcript_finder.py create mode 100644 tests/test_claude_code_adapter.py create mode 100644 tests/test_embedding_service.py create mode 100644 tests/test_memory_extractor.py create mode 100644 tests/test_memory_freshness.py create mode 100644 tests/test_memory_models.py create mode 100644 tests/test_memory_service.py create mode 100644 tests/test_opencode_adapter.py create mode 100644 tests/test_opencode_memory_integration.py create mode 100644 tests/test_preflight.py create mode 100644 tests/test_protocol_events.py create mode 100644 tests/test_protocol_session.py create mode 100644 tests/test_transcript_finder.py diff --git a/.env.solo.example b/.env.solo.example index 8c9f651..f0a6330 100644 --- a/.env.solo.example +++ b/.env.solo.example @@ -18,3 +18,15 @@ SLOPOMETRY_ENABLE_COMPLEXITY_FEEDBACK=true # Include development guidelines from CLAUDE.md in feedback SLOPOMETRY_FEEDBACK_DEV_GUIDELINES=false + +# Memory Extraction (for solo find-memories and show-memories) +# LLM endpoint for extracting memories from transcripts +SLOPOMETRY_MEMORY_LLM_ENDPOINT=https://your-llm-endpoint.com/v1 +SLOPOMETRY_MEMORY_LLM_MODEL=your-model-name +SLOPOMETRY_MEMORY_LLM_API_KEY=your-api-key-here + +# Memory Embedding (for uniqueness scoring) +# Embedding endpoint for computing memory similarity +SLOPOMETRY_MEMORY_EMBEDDING_ENDPOINT=https://your-embedding-endpoint.com/v1 +SLOPOMETRY_MEMORY_EMBEDDING_MODEL=your-embedding-model +SLOPOMETRY_MEMORY_EMBEDDING_API_KEY=your-embedding-api-key-here diff --git a/.env.summoner.example b/.env.summoner.example index c07eb21..65e482c 100644 --- a/.env.summoner.example +++ b/.env.summoner.example @@ -11,20 +11,13 @@ SLOPOMETRY_ENABLE_COMPLEXITY_FEEDBACK=true SLOPOMETRY_FEEDBACK_DEV_GUIDELINES=false # LLM Integration (required for userstorify and AI features) -# Set offline_mode=false to enable external LLM requests +# Set offline_mode=false to enable external LLM requests. +# Single agent (MiniMax-M3, served via vLLM with the OpenAI-compatible API). +# The endpoint below is the public ingress for the in-cluster MXFP4 deployment. SLOPOMETRY_OFFLINE_MODE=false -SLOPOMETRY_LLM_PROXY_URL=https://your-proxy.example.com -SLOPOMETRY_LLM_PROXY_API_KEY=your-api-key -SLOPOMETRY_LLM_RESPONSES_URL=https://your-proxy.example.com/responses - -# User Story Generation -# Available agents: gpt_oss_120b, gemini, minimax -SLOPOMETRY_USER_STORY_AGENT=gpt_oss_120b - -# Anthropic Provider (e.g. sglang with MiniMax-M2.1) -# Provides access to MiniMax models via custom Anthropic-compatible endpoints -SLOPOMETRY_ANTHROPIC_URL=https://your-sglang-endpoint.example.com -SLOPOMETRY_ANTHROPIC_API_KEY=your-anthropic-api-key +SLOPOMETRY_LLM_PROXY_URL=https://llm2.droidcraft.org/minimax-m3-mxfp4-vllm/v1 +SLOPOMETRY_LLM_PROXY_API_KEY=your-vllm-api-key +SLOPOMETRY_LLM_MODEL_NAME=olka-fi/MiniMax-M3-MXFP4 # Interactive Rating for Dataset Quality Control # Prompts you to rate generated user stories (1-5) diff --git a/README.md b/README.md index ecde8fd..d7b6f01 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ Worst offenders and overall slop at a glance **See more examples and FAQ in details below**:
-### Q: I don't need to verify when my tests are passing, right? +### Q: I don't need to verify when my tests are passing, right? A: lmao @@ -53,9 +53,22 @@ What clevery ways you ask? Silent exception swallowing upstream ofc! Slopometry forces agents to state the purpose of swallowed exceptions and skipped tests, this is a simple LLM-as-judge call for your RL pipeline (you're welcome) -A handler only counts as *swallowed* if it does **no processing of any kind** — only `pass`/`continue`/`break`/`...`. Recovering a fallback value (`except ImportError: torch = None`) or counting the failure (`errors += 1`) is real handling and is not flagged. When a silent handler is genuinely correct, mark it `# slopometry: allow-silent` to acknowledge it — but slopometry counts those markers per file and **blocks on any increase**, so an agent can't reward-hack by mass-suppressing real swallows. +A handler only counts as *swallowed* if it does **no processing of any kind** — only `pass`/`continue`/`break`/`...`. Recovering a fallback value (`except ImportError: torch = None`) or counting the failure (`errors += 1`) is real handling and is not flagged. -Here is Opus 4.5, which is writing 90% of your production code by 2026: +#### Acknowledging Silent Handlers + +When a silent handler is genuinely correct (e.g., context manager cleanup that always succeeds), mark it with `# slopometry: allow-silent`: + +```python +try: + acquire_lock() +except Exception: + pass # slopometry: allow-silent - lock already released on context exit +``` + +Slopometry counts those markers per file and **blocks on any increase**, so an agent can't reward-hack by mass-suppressing real swallows. If you see a blocking increase, review the NEW markers and confirm each is justified. + +Here is Opus 4.5, which is writing 90% of your production code by 2026: ![silent-errors](assets/force-review-silent-errors.png) ![silent-errors2](assets/force-review-silent-errors-2.png) diff --git a/SPEC.md b/SPEC.md new file mode 100644 index 0000000..e3a017d --- /dev/null +++ b/SPEC.md @@ -0,0 +1,337 @@ +# Slopometry Memory System - Specification + +## Overview + +Add a memory system to `slopometry solo` that discovers Claude Code transcripts, extracts meaningful memory candidates using an LLM, and provides interactive management of memories per-project. + +## Memory Types + +Based on the guideline, memories are categorized into four types: + +| Type | Description | Examples | +|------|-------------|----------| +| `user` | Who you are: role, expertise, stable preferences | "Sarah is a DevOps engineer specializing in k8s" | +| `feedback` | How I should work: corrections or confirmed approaches with the *why* | "Always check GPU availability before scheduling - learned from a 3am incident" | +| `project` | Ongoing work, goals, constraints not derivable from code/git | "We use dqlite voter topology for HA - not visible from files" | +| `reference` | Pointers to external resources | "Jira board: https://company.atlassian.net/board/TICKETS" | + +### What NOT to Save +- Anything the repo already records (code structure, past fixes, git history, CLAUDE.md content) +- Things that only matter to the current conversation +- Reconstructable facts from code + +### Hygiene Rules +- Convert relative dates to absolute +- Update existing memories rather than duplicate +- Delete memories that are wrong + +--- + +## Feature 1: `slopometry solo find-memories` + +### Purpose +Scan disk for Claude Code transcripts, filter to project-relevant ones, parse them to remove noise, generate memory candidates via LLM, and save to database. + +### Transcript Discovery Paths + +**Claude Code default locations:** +- Linux: `~/.claude/projects/` +- macOS: `~/Library/Application Support/Claude/projects/` +- Windows: `%APPDATA%\Claude\projects\` + +**Slopometry saved transcripts:** +- `.slopometry/transcripts/` in project root + +**Structure under project dirs:** +``` +~/.claude/projects//sessions//transcript.jsonl +``` + +### CLI Interface + +```bash +slopometry solo find-memories [OPTIONS] + +OPTIONS: + --project-dir PATH Project directory (default: cwd) + --llm-endpoint URL LLM endpoint (default: from env or localhost) + --llm-model MODEL Model name (default: gpt-4o-mini) + --force Re-process already processed sessions + --dry-run Show what would be done without doing it + --min-importance SCORE Minimum importance 0.0-1.0 (default: 0.5) +``` + +### Processing Pipeline + +1. **Discovery Phase** + - Scan Claude project directories + - Scan `.slopometry/transcripts/` in project + - Build list of `(session_id, transcript_path, project_dir)` tuples + +2. **Filtering Phase** + - Skip sessions already processed (unless `--force`) + - Filter to sessions whose working directory matches `--project-dir` + +3. **Parsing Phase** + - Parse JSONL transcript + - Remove noise: empty turns, auto-accepted suggestions, tool outputs > threshold + - Extract conversation structure + +4. **Memory Candidate Generation** + - Send parsed conversation to LLM with the guideline prompt + - Request JSON array of memory candidates + +5. **Storage Phase** + - Save memory candidates to `memories` table + - Mark session as processed in `processed_memory_sessions` table + +### LLM Prompt for Memory Extraction + +``` +You are analyzing a Claude Code session transcript to identify durable facts +that should be remembered across sessions. + +MEMORY TYPES: +- user: Facts about the human's identity, role, expertise, stable preferences +- feedback: Guidance on how to work, corrections, confirmed approaches (always with WHY) +- project: Work goals, constraints, topology not derivable from code/git +- reference: External resource pointers (URLs, dashboards, tickets) + +MEMORY CRITERIA: +A fact qualifies if "If I started fresh next session, would not knowing this +make me repeat a mistake, re-derive something hard, or act against preference?" +If YES → memory. If reconstructable from code or only relevant now → skip. + +HYGIENE: +- Convert "19 days ago" to real date +- One fact per memory +- Include importance_score 0.0-1.0 + +Return a JSON array of memories: +[ + { + "memory_type": "feedback", + "content": "Always verify GPU availability before job submission - learned from a 3am incident where a job sat queued for 2 hours", + "importance_score": 0.9, + "source_context": "mentioned during discussion about cluster scheduling" + } +] + +Transcript to analyze: + +``` + +--- + +## Feature 2: `slopometry solo show-memories` + +### Purpose +List all memories for a project from the database and provide interactive management. + +### CLI Interface + +```bash +slopometry solo show-memories [OPTIONS] + +OPTIONS: + --project-dir PATH Project directory (default: cwd) + --type TYPE Filter by memory type (user|feedback|project|reference) + --min-importance SCORE Minimum importance score + --limit N Max results (default: 50) +``` + +### Interactive Mode + +When run without `--format`, enters interactive mode: + +``` +Slopometry Memories: ~/projects/myapp +┌─────────────────────────────────────────────────────────────────┐ +│ [1] user │ Importance: 0.85 │ +│ "Sarah is a DevOps engineer with 10 years experience in k8s" │ +│ Session: abc123 | Created: 2024-01-15 │ +├─────────────────────────────────────────────────────────────────┤ +│ [2] feedback │ Importance: 0.95 │ +│ "Check GPU availability before scheduling - 3am incident" │ +│ Session: def456 | Created: 2024-01-14 │ +└─────────────────────────────────────────────────────────────────┘ + +Actions: [r]etain [d]elete [e]dit [f]ilter [q]uit +> +``` + +**Interactive Commands:** +- `1`, `2`, etc. - Select a memory +- `r ` - Mark memory as retained (increases importance for similar) +- `d ` - Delete a memory +- `e ` - Edit memory content +- `f ` - Filter by type +- `q` - Quit + +--- + +## Database Schema + +### New Table: `memories` + +```sql +CREATE TABLE memories ( + id TEXT PRIMARY KEY, + session_id TEXT NOT NULL, + project_dir TEXT NOT NULL, + memory_type TEXT NOT NULL CHECK (memory_type IN ('user', 'feedback', 'project', 'reference')), + content TEXT NOT NULL, + importance_score REAL NOT NULL DEFAULT 0.5, + source_context TEXT, + created_at TEXT NOT NULL, + updated_at TEXT, + retained INTEGER NOT NULL DEFAULT 0, + metadata TEXT +); + +CREATE INDEX idx_memories_project_dir ON memories(project_dir); +CREATE INDEX idx_memories_session_id ON memories(session_id); +CREATE INDEX idx_memories_type ON memories(memory_type); +CREATE INDEX idx_memories_importance ON memories(importance_score); +``` + +### New Table: `processed_memory_sessions` + +```sql +CREATE TABLE processed_memory_sessions ( + session_id TEXT PRIMARY KEY, + project_dir TEXT NOT NULL, + processed_at TEXT NOT NULL, + memory_count INTEGER NOT NULL DEFAULT 0, + UNIQUE(session_id, project_dir) +); +``` + +--- + +## Pydantic Models + +```python +# models/memory.py + +from enum import Enum +from pydantic import BaseModel, Field +from datetime import datetime + +class MemoryType(str, Enum): + USER = "user" + FEEDBACK = "feedback" + PROJECT = "project" + REFERENCE = "reference" + +class MemoryEntry(BaseModel): + id: str + session_id: str + project_dir: str + memory_type: MemoryType + content: str + importance_score: float = Field(ge=0.0, le=1.0) + source_context: str | None = None + created_at: datetime + updated_at: datetime | None = None + retained: bool = False + metadata: dict | None = None + +class MemoryCandidate(BaseModel): + memory_type: MemoryType + content: str + importance_score: float = Field(ge=0.0, le=1.0, default=0.5) + source_context: str | None = None + +class MemoryCreateRequest(BaseModel): + session_id: str + project_dir: str + candidates: list[MemoryCandidate] +``` + +--- + +## Settings Extension + +```python +# In settings.py + +class Settings(BaseSettings): + # ... existing fields ... + + # Memory system + memory_llm_endpoint: str = Field( + default="http://localhost:11434/v1", + description="LLM endpoint for memory extraction" + ) + memory_llm_model: str = Field( + default="gpt-4o-mini", + description="Model for memory extraction" + ) + memory_min_importance: float = Field( + default=0.5, + ge=0.0, + le=1.0 + ) + memory_retention_days: int = Field( + default=365, + description="Days to retain memories" + ) +``` + +--- + +## File Structure + +``` +src/slopometry/ + core/ + database.py # Add memories table creation + settings.py # Add memory settings + solo/ + services/ + memory_service.py # Memory CRUD operations + transcript_finder.py # Discovery logic + memory_extractor.py # LLM integration + commands/ + find_memories.py # find-memories command + show_memories.py # show-memories command + core/models/ + memory.py # Memory Pydantic models + +tests/ + test_memory_service.py + test_transcript_finder.py + test_memory_extractor.py +``` + +--- + +## Embedding Readiness + +To support future cross-project embedding queries: + +1. **Memory table includes `project_dir`** - enables project-scoped queries +2. **Importance score** - for filtering during retrieval +3. **Metadata column** - for storing embedding vectors later +4. **Memory type enum** - for faceted search + +Future embedding implementation would: +- Add `embedding` column to `memories` table +- Use `memory_type` + `importance_score` for filtering in vector search +- Enable "find similar memories across projects" queries + +--- + +## Implementation Order + +1. Add `MemoryType` enum and `MemoryEntry` model to `models/memory.py` +2. Add memory tables to `database.py` migration +3. Add memory settings to `settings.py` +4. Implement `MemoryService` in `solo/services/memory_service.py` +5. Implement `TranscriptFinder` in `solo/services/transcript_finder.py` +6. Implement `MemoryExtractor` in `solo/services/memory_extractor.py` +7. Add `find-memories` command to `solo/commands/` +8. Add `show-memories` command to `solo/commands/` +9. Add tests +10. Update `__init__.py` exports diff --git a/pyproject.toml b/pyproject.toml index ff619fd..971c06d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "slopometry" -version = "2026.4.15" +version = "2026.6.23" description = "Opinionated code quality metrics for code agents and humans" readme = "README.md" requires-python = ">=3.13" diff --git a/pyrightconfig.json b/pyrightconfig.json new file mode 100644 index 0000000..6b2d022 --- /dev/null +++ b/pyrightconfig.json @@ -0,0 +1,7 @@ +{ + "include": ["src/slopometry", "tests"], + "exclude": ["**/__pycache__"], + "extraPaths": [".venv/lib/python3.13/site-packages", "src/slopometry"], + "pythonVersion": "3.13", + "typeCheckingMode": "basic" +} diff --git a/scripts/calibrate_freshness_threshold.py b/scripts/calibrate_freshness_threshold.py new file mode 100644 index 0000000..7ab5b64 --- /dev/null +++ b/scripts/calibrate_freshness_threshold.py @@ -0,0 +1,82 @@ +"""Calibrate FLOOR/CEILING thresholds against real embeddings from the project's memory bank. + +Run via: + uv run python scripts/calibrate_freshness_threshold.py +""" + +from statistics import fmean + +from openai import OpenAI + +from slopometry.core.settings import settings +from slopometry.solo.services.memory_service import MemoryService + + +def cosine(a: list[float], b: list[float]) -> float: + if len(a) != len(b): + return 0.0 + dot = sum(x * y for x, y in zip(a, b)) + na = sum(x * x for x in a) ** 0.5 + nb = sum(x * x for x in b) ** 0.5 + if na == 0 or nb == 0: + return 0.0 + return dot / (na * nb) + + +def main() -> None: + import os + + project_dir = os.environ.get("PROJECT_DIR", "/mnt/terradump/code/slopometry") + service = MemoryService() + memories = service.get_memories(project_dir=project_dir, limit=10000) + print(f"Loaded {len(memories)} existing memories") + + if not memories: + print("No memories to calibrate against. Run `solo find-memories --force` first.") + return + + endpoint = settings.memory_embedding_endpoint + model = settings.memory_embedding_model + api_key = settings.memory_embedding_api_key.get_secret_value() + if endpoint == "https://your-embedding-endpoint.com/v1": + print("Embedding endpoint not configured.") + return + + client = OpenAI(base_url=endpoint, api_key=api_key) + embeddings: list[tuple[str, list[float]]] = [] + for m in memories: + if m.embedding: + embeddings.append((m.content, m.embedding)) + continue + try: + resp = client.embeddings.create(model=model, input=m.content) + embeddings.append((m.content, resp.data[0].embedding)) + except Exception as e: + print(f" embed failed for {m.id}: {e}") + + if not embeddings: + print("No embeddings available.") + return + + print(f"Got {len(embeddings)} embeddings (dim={len(embeddings[0][1])})") + sims: list[float] = [] + for i, (_, a) in enumerate(embeddings): + for _, b in embeddings[i + 1 :]: + sims.append(cosine(a, b)) + sims.sort() + n = len(sims) + + def q(p: float) -> float: + return sims[max(0, min(n - 1, int(n * p)))] + + print(f"n_pairs={n}") + print(f"mean={fmean(sims):.4f}") + print(f"p10={q(0.10):.4f} p25={q(0.25):.4f} p50={q(0.50):.4f} p75={q(0.75):.4f} p90={q(0.90):.4f} p95={q(0.95):.4f} p99={q(0.99):.4f}") + print(f"max={sims[-1]:.4f} min={sims[0]:.4f}") + print() + print("If p75 < FLOOR_THRESHOLD (0.45), threshold clamps to FLOOR. If p75 > CEILING_THRESHOLD (0.95), threshold clamps to CEILING.") + print(f"Current derived_threshold would be p75={q(0.75):.4f} clamped to [0.45, 0.95] -> {max(0.45, min(0.95, q(0.75))):.4f}") + + +if __name__ == "__main__": + main() diff --git a/src/slopometry/cli.py b/src/slopometry/cli.py index 1dfa04b..0c44361 100644 --- a/src/slopometry/cli.py +++ b/src/slopometry/cli.py @@ -43,7 +43,7 @@ def cli() -> None: @cli.command("hook-handler", hidden=True) def hook_handler() -> None: - """Internal command for processing hook events.""" + """Internal command for processing hook events (auto-detect event type).""" from slopometry.core.hook_handler import handle_hook sys.exit(handle_hook()) @@ -51,47 +51,47 @@ def hook_handler() -> None: @cli.command("hook-pre-tool-use", hidden=True) def hook_pre_tool_use() -> None: - """Internal command for processing PreToolUse hook events.""" + """Internal command for processing Claude Code PreToolUse hook events.""" from slopometry.core.hook_handler import handle_hook - from slopometry.core.models.hook import HookEventType + from slopometry.core.models.protocol.events import AbstractEventType - sys.exit(handle_hook(event_type_override=HookEventType.PRE_TOOL_USE)) + sys.exit(handle_hook(event_type_override=AbstractEventType.TOOL_CALL_STARTED)) @cli.command("hook-post-tool-use", hidden=True) def hook_post_tool_use() -> None: - """Internal command for processing PostToolUse hook events.""" + """Internal command for processing Claude Code PostToolUse hook events.""" from slopometry.core.hook_handler import handle_hook - from slopometry.core.models.hook import HookEventType + from slopometry.core.models.protocol.events import AbstractEventType - sys.exit(handle_hook(event_type_override=HookEventType.POST_TOOL_USE)) + sys.exit(handle_hook(event_type_override=AbstractEventType.TOOL_CALL_COMPLETED)) @cli.command("hook-notification", hidden=True) def hook_notification() -> None: - """Internal command for processing Notification hook events.""" + """Internal command for processing Claude Code Notification hook events.""" from slopometry.core.hook_handler import handle_hook - from slopometry.core.models.hook import HookEventType + from slopometry.core.models.protocol.events import AbstractEventType - sys.exit(handle_hook(event_type_override=HookEventType.NOTIFICATION)) + sys.exit(handle_hook(event_type_override=AbstractEventType.NOTIFICATION)) @cli.command("hook-stop", hidden=True) def hook_stop() -> None: - """Internal command for processing Stop hook events.""" + """Internal command for processing Claude Code Stop hook events.""" from slopometry.core.hook_handler import handle_hook - from slopometry.core.models.hook import HookEventType + from slopometry.core.models.protocol.events import AbstractEventType - sys.exit(handle_hook(event_type_override=HookEventType.STOP)) + sys.exit(handle_hook(event_type_override=AbstractEventType.TURN_COMPLETED)) @cli.command("hook-subagent-stop", hidden=True) def hook_subagent_stop() -> None: - """Internal command for processing SubagentStop hook events.""" + """Internal command for processing Claude Code SubagentStop hook events.""" from slopometry.core.hook_handler import handle_hook - from slopometry.core.models.hook import HookEventType + from slopometry.core.models.protocol.events import AbstractEventType - sys.exit(handle_hook(event_type_override=HookEventType.SUBAGENT_STOP)) + sys.exit(handle_hook(event_type_override=AbstractEventType.SUBAGENT_COMPLETED)) @cli.command("hook-opencode", hidden=True) @@ -113,6 +113,46 @@ def hook_opencode(event_type: str) -> None: sys.exit(handle_opencode_hook(event_type)) +@cli.command("emit-event", hidden=True) +@click.option( + "--source", + required=True, + type=click.Choice(["claude_code", "opencode"]), + help="Which harness produced the event on stdin.", +) +@click.option( + "--type", + "event_type", + required=False, + type=click.Choice( + [ + "tool_call_started", + "tool_call_completed", + "notification", + "turn_completed", + "subagent_completed", + "todo_updated", + "message_updated", + "subagent_started", + ] + ), + help="Override the event type (skips adapter inference).", +) +def emit_event(source: str, event_type: str | None) -> None: + """Generic event ingestion entrypoint. + + Reads JSON from stdin and dispatches through the named harness adapter. + New harnesses should write an adapter + register a source, then invoke + this command from their hook glue. + """ + from slopometry.core.models.protocol.events import AbstractEventSource, AbstractEventType + from slopometry.core.protocol.dispatch import emit_event_from_stdin + + abstract_source = AbstractEventSource(source) + abstract_type = AbstractEventType(event_type) if event_type else None + sys.exit(emit_event_from_stdin(abstract_source, event_type_override=abstract_type)) + + @cli.command("shell-completion") @click.argument("shell", type=click.Choice(["bash", "zsh", "fish"])) def shell_completion(shell: str) -> None: diff --git a/src/slopometry/core/code_analyzer.py b/src/slopometry/core/code_analyzer.py index ce61582..46db5ca 100644 --- a/src/slopometry/core/code_analyzer.py +++ b/src/slopometry/core/code_analyzer.py @@ -49,7 +49,7 @@ def _analyze_single_file(file_path: Path) -> FileAnalysisResult: from slopometry.core.tokenizer import count_file_tokens try: - result = rca.analyze_file(str(file_path)) + result = rca.analyze_file(str(file_path)) # pyright: ignore[reportAttributeAccessIssue] # ty: ignore[unresolved-attribute] m = result.metrics cc_sum = sum(f.metrics.cyclomatic.sum for f in result.get_functions()) @@ -102,7 +102,7 @@ def analyze_file(self, file_path: Path) -> FileAnalysisResult: FileAnalysisResult with metrics. """ try: - result = self._rca.analyze_file(str(file_path)) + result = self._rca.analyze_file(str(file_path)) # pyright: ignore[reportAttributeAccessIssue] # ty: ignore[unresolved-attribute] m = result.metrics cc_sum = sum(f.metrics.cyclomatic.sum for f in result.get_functions()) diff --git a/src/slopometry/core/database.py b/src/slopometry/core/database.py index 6b85fcd..3997c4f 100644 --- a/src/slopometry/core/database.py +++ b/src/slopometry/core/database.py @@ -15,7 +15,53 @@ from slopometry.core.models.complexity import ComplexityDelta, ExtendedComplexityMetrics from slopometry.core.models.display import LeaderboardEntry, SessionDisplayData from slopometry.core.models.experiment import ExperimentProgress, ExperimentRun, ExperimentStatus, FeatureBoundary -from slopometry.core.models.hook import GitState, HookEvent, HookEventType, Project, ProjectSource, ToolType +from slopometry.core.models.hook import ( + GitState, + Project, + ProjectSource, +) +from slopometry.core.models.protocol.events import ( + AbstractEventSource, + AbstractEventType, + AbstractHookEvent, + ToolCallPayload, +) +from slopometry.core.protocol.session import SessionManager + + +def _reconstruct_tool_call( + tool_name: str | None, + tool_type_value: str | None, + row: sqlite3.Row, +) -> ToolCallPayload | None: + """Reconstruct ToolCallPayload from denormalized DB columns. + + The DB stores tool fields as flat columns (tool_name, tool_type, duration_ms, + exit_code, error_message) and the original input/output inside `metadata`. + We prefer the typed columns for tool_call fields; input/output come from + metadata when present, otherwise we leave them empty. + """ + if not tool_name: + return None + metadata = json.loads(row["metadata"]) if row["metadata"] else {} + input_payload = metadata.get("tool_input") or metadata.get("args") or {} + output_payload = ( + metadata.get("tool_response") + if "tool_response" in metadata + else metadata.get("output") + ) + if not isinstance(input_payload, dict): + input_payload = {} + return ToolCallPayload( + tool_name=tool_name, + tool_type=tool_type_value, + input=input_payload, + output=output_payload, + duration_ms=row["duration_ms"], + exit_code=row["exit_code"], + error_message=row["error_message"], + ) +from slopometry.core.models.memory import MemoryEntry, MemoryType from slopometry.core.models.session import BehavioralPatterns, ContextCoverage, PlanEvolution, SessionStatistics from slopometry.core.models.user_story import NextFeaturePrediction, UserStory, UserStoryEntry from slopometry.core.plan_analyzer import PlanAnalyzer @@ -318,10 +364,47 @@ def _create_tables(self) -> None: "CREATE INDEX IF NOT EXISTS idx_repo_baselines_repo_head ON repo_baselines(repository_path, head_commit_sha)" ) + conn.execute(""" + CREATE TABLE IF NOT EXISTS memories ( + id TEXT PRIMARY KEY, + session_id TEXT NOT NULL, + project_dir TEXT NOT NULL, + memory_type TEXT NOT NULL CHECK (memory_type IN ('user', 'feedback', 'project', 'reference')), + content TEXT NOT NULL, + source_context TEXT, + created_at TEXT NOT NULL, + updated_at TEXT, + retained INTEGER NOT NULL DEFAULT 0, + superseded_by TEXT, + embedding TEXT, + metadata TEXT + ) + """) + conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_project_dir ON memories(project_dir)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_session_id ON memories(session_id)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(memory_type)") + + conn.execute(""" + CREATE TABLE IF NOT EXISTS processed_memory_sessions ( + session_id TEXT NOT NULL, + project_dir TEXT NOT NULL, + source TEXT NOT NULL DEFAULT 'claude_code', + processed_at TEXT NOT NULL, + memory_count INTEGER NOT NULL DEFAULT 0, + PRIMARY KEY (session_id, project_dir) + ) + """) + conn.commit() - def save_event(self, event: HookEvent) -> int: + def save_event(self, event: AbstractHookEvent) -> int: """Save a hook event to the database.""" + tool_name = event.tool_call.tool_name if event.tool_call else None + tool_type = event.tool_call.tool_type if event.tool_call else None + duration_ms = event.tool_call.duration_ms if event.tool_call else None + exit_code = event.tool_call.exit_code if event.tool_call else None + error_message = event.tool_call.error_message if event.tool_call else None + with self._get_db_connection() as conn: cursor = conn.execute( """ @@ -338,24 +421,24 @@ def save_event(self, event: HookEvent) -> int: event.event_type.value, event.timestamp.isoformat(), event.sequence_number, - event.tool_name, - event.tool_type.value if event.tool_type else None, + tool_name, + tool_type, json.dumps(event.metadata), - event.duration_ms, - event.exit_code, - event.error_message, + duration_ms, + exit_code, + error_message, event.git_state.model_dump_json() if event.git_state else None, event.working_directory, event.project.name if event.project else None, event.project.source.value if event.project else None, - event.transcript_path, + event.transcript_location, event.source.value, event.parent_session_id, ), ) return cursor.lastrowid or 0 - def get_session_events(self, session_id: str) -> list[HookEvent]: + def get_session_events(self, session_id: str) -> list[AbstractHookEvent]: """Get all events for a session.""" with self._get_db_connection() as conn: conn.row_factory = sqlite3.Row @@ -384,32 +467,27 @@ def get_session_events(self, session_id: str) -> list[HookEvent]: source=ProjectSource(row["project_source"]), ) - # Handle source column (may be NULL for pre-migration rows) - from slopometry.core.models.hook import EventSource - source_val = row["source"] if "source" in row.keys() else None - source = EventSource(source_val) if source_val else EventSource.CLAUDE_CODE + source = AbstractEventSource(source_val) if source_val else AbstractEventSource.CLAUDE_CODE parent_session_id = row["parent_session_id"] if "parent_session_id" in row.keys() else None + tool_name = row["tool_name"] + tool_type_value = row["tool_type"] events.append( - HookEvent( + AbstractHookEvent( id=row["id"], session_id=row["session_id"], - event_type=HookEventType(row["event_type"]), + event_type=AbstractEventType(row["event_type"]), timestamp=datetime.fromisoformat(row["timestamp"]), sequence_number=row["sequence_number"], - tool_name=row["tool_name"], - tool_type=ToolType(row["tool_type"]) if row["tool_type"] else None, - metadata=json.loads(row["metadata"]), - duration_ms=row["duration_ms"], - exit_code=row["exit_code"], - error_message=row["error_message"], + source=source, + parent_session_id=parent_session_id, + tool_call=_reconstruct_tool_call(tool_name, tool_type_value, row), + metadata=json.loads(row["metadata"]) if row["metadata"] else {}, git_state=git_state, working_directory=working_directory, project=project, - transcript_path=row["transcript_path"], - source=source, - parent_session_id=parent_session_id, + transcript_location=row["transcript_path"], ) ) return events @@ -440,10 +518,10 @@ def get_opencode_transcript(self, session_id: str) -> list[dict] | None: row = conn.execute( """ SELECT metadata FROM hook_events - WHERE session_id = ? AND event_type = 'Stop' AND source = 'opencode' + WHERE session_id = ? AND event_type = ? AND source = ? ORDER BY sequence_number DESC LIMIT 1 """, - (session_id,), + (session_id, AbstractEventType.TURN_COMPLETED.value, AbstractEventSource.OPENCODE.value), ).fetchone() if not row: return None @@ -563,7 +641,7 @@ def get_session_statistics(self, session_id: str) -> SessionStatistics | None: (session_id,), ).fetchall() - events_by_type = {HookEventType(row["event_type"]): row["count"] for row in event_type_rows} + events_by_type = {AbstractEventType(row["event_type"]): row["count"] for row in event_type_rows} tool_usage_rows = conn.execute( """ @@ -575,7 +653,7 @@ def get_session_statistics(self, session_id: str) -> SessionStatistics | None: (session_id,), ).fetchall() - tool_usage = {ToolType(row["tool_type"]): row["count"] for row in tool_usage_rows} + tool_usage = {row["tool_type"]: row["count"] for row in tool_usage_rows} first_git_row = conn.execute( """ @@ -834,29 +912,26 @@ def _calculate_plan_evolution(self, session_id: str) -> PlanEvolution: WHERE session_id = ? AND event_type IN (?, ?) ORDER BY sequence_number """, - (session_id, HookEventType.POST_TOOL_USE.value, HookEventType.TODO_UPDATED.value), + (session_id, AbstractEventType.TOOL_CALL_COMPLETED.value, AbstractEventType.TODO_UPDATED.value), ).fetchall() - # Check if this session has TODO_UPDATED events (OpenCode). - # If so, skip POST_TOOL_USE todowrite events to avoid duplicate analysis. - has_todo_updated = any(row["event_type"] == HookEventType.TODO_UPDATED.value for row in rows) + has_todo_updated = any(row["event_type"] == AbstractEventType.TODO_UPDATED.value for row in rows) for row in rows: timestamp = datetime.fromisoformat(row["timestamp"]) event_type = row["event_type"] tool_name = row["tool_name"] or "" - tool_type = ToolType(row["tool_type"]) if row["tool_type"] else None + tool_type = row["tool_type"] metadata = json.loads(row["metadata"]) if row["metadata"] else {} - if event_type == HookEventType.TODO_UPDATED.value: + if event_type == AbstractEventType.TODO_UPDATED.value: # OpenCode todo.updated bus event — canonical source for OpenCode todos todos = metadata.get("todos", []) if todos: analyzer.analyze_todo_write_event({"todos": todos}, timestamp) continue - # POST_TOOL_USE events below raw_input = metadata.get("tool_input") or metadata.get("args", {}) tool_input = raw_input if isinstance(raw_input, dict) else {} tool_name_lower = tool_name.lower() @@ -1047,37 +1122,57 @@ def cleanup_old_data(self, days: int, dry_run: bool = False) -> tuple[int, int]: cutoff_date = datetime.now() - timedelta(days=days) with self._get_db_connection() as conn: rows = conn.execute( - "SELECT DISTINCT session_id FROM hook_events WHERE timestamp < ?", + "SELECT DISTINCT session_id, source FROM hook_events WHERE timestamp < ?", (cutoff_date.isoformat(),), ).fetchall() session_ids_to_delete = [row[0] for row in rows] + if not dry_run and session_ids_to_delete: conn.execute("DELETE FROM hook_events WHERE timestamp < ?", (cutoff_date.isoformat(),)) files_deleted = 0 - state_dir = Path.home() / ".claude" / "slopometry" - if state_dir.exists(): - for session_id in session_ids_to_delete: - seq_file = state_dir / f"seq_{session_id}.txt" - if seq_file.exists(): - if not dry_run: - seq_file.unlink() - files_deleted += 1 + seen_dirs: set[Path] = set() + for session_id, source in rows: + sm = SessionManager(source=source or "claude_code") + seq_file = sm.state_dir / f"seq_{session_id}.txt" + seen_dirs.add(sm.state_dir) + if seq_file.exists(): + if not dry_run: + seq_file.unlink() + files_deleted += 1 + for legacy_dir in (Path.home() / ".claude" / "slopometry",): + if legacy_dir.exists(): + for seq_file in legacy_dir.glob("seq_*.txt"): + if seq_file.exists(): + if not dry_run: + seq_file.unlink() + files_deleted += 1 return len(session_ids_to_delete), files_deleted def cleanup_session(self, session_id: str) -> tuple[int, int]: """Clean up a specific session and its associated files.""" with self._get_db_connection() as conn: - result = conn.execute("SELECT COUNT(*) FROM hook_events WHERE session_id = ?", (session_id,)).fetchone() + source_row = conn.execute( + "SELECT source FROM hook_events WHERE session_id = ? LIMIT 1", + (session_id,), + ).fetchone() + source = (source_row[0] if source_row and source_row[0] else "claude_code") + result = conn.execute( + "SELECT COUNT(*) FROM hook_events WHERE session_id = ?", (session_id,) + ).fetchone() events_count = result[0] if result else 0 conn.execute("DELETE FROM hook_events WHERE session_id = ?", (session_id,)) files_deleted = 0 - state_dir = Path.home() / ".claude" / "slopometry" - seq_file = state_dir / f"seq_{session_id}.txt" + sm = SessionManager(source=source) + seq_file = sm.state_dir / f"seq_{session_id}.txt" if seq_file.exists(): seq_file.unlink() files_deleted = 1 + legacy_file = Path.home() / ".claude" / "slopometry" / f"seq_{session_id}.txt" + if legacy_file.exists(): + legacy_file.unlink() + files_deleted += 1 return events_count, files_deleted def cleanup_all_sessions(self) -> tuple[int, int, int]: @@ -1089,11 +1184,11 @@ def cleanup_all_sessions(self) -> tuple[int, int, int]: conn.execute("DELETE FROM hook_events") files_deleted = 0 - state_dir = Path.home() / ".claude" / "slopometry" - if state_dir.exists(): - for seq_file in state_dir.glob("seq_*.txt"): - seq_file.unlink() - files_deleted += 1 + for state_root in (Path.home() / ".slopometry" / "sessions", Path.home() / ".claude" / "slopometry"): + if state_root.exists(): + for seq_file in state_root.glob("**/seq_*.txt"): + seq_file.unlink() + files_deleted += 1 return len(sessions), events_count, files_deleted def save_experiment_run(self, experiment: ExperimentRun) -> None: @@ -2078,25 +2173,232 @@ def clear_leaderboard(self) -> int: conn.execute("DELETE FROM qpe_leaderboard") return count + def save_memory(self, memory: MemoryEntry) -> None: + """Save a memory entry to the database.""" + with self._get_db_connection() as conn: + conn.execute( + """ + INSERT INTO memories ( + id, session_id, project_dir, memory_type, content, + source_context, created_at, updated_at, + retained, superseded_by, embedding, metadata + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + memory.id, + memory.session_id, + memory.project_dir, + memory.memory_type.value, + memory.content, + memory.source_context, + memory.created_at.isoformat(), + memory.updated_at.isoformat() if memory.updated_at else None, + int(memory.retained), + memory.superseded_by, + json.dumps(memory.embedding) if memory.embedding else None, + json.dumps(memory.metadata) if memory.metadata else None, + ), + ) + + def save_memories(self, memories: list["MemoryEntry"]) -> int: + """Save multiple memory entries. -class SessionManager: - """Manages sequence numbering for Claude Code sessions.""" + Returns: + Number of memories saved + """ + with self._get_db_connection() as conn: + for memory in memories: + conn.execute( + """ + INSERT INTO memories ( + id, session_id, project_dir, memory_type, content, + source_context, created_at, updated_at, + retained, superseded_by, embedding, metadata + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + memory.id, + memory.session_id, + memory.project_dir, + memory.memory_type.value, + memory.content, + memory.source_context, + memory.created_at.isoformat(), + memory.updated_at.isoformat() if memory.updated_at else None, + int(memory.retained), + memory.superseded_by, + json.dumps(memory.embedding) if memory.embedding else None, + json.dumps(memory.metadata) if memory.metadata else None, + ), + ) + return len(memories) - def __init__(self): - self.state_dir = Path.home() / ".claude" / "slopometry" - self.state_dir.mkdir(parents=True, exist_ok=True) + def get_memories( + self, + project_dir: str | None = None, + memory_type: str | None = None, + limit: int = 50, + ) -> list[MemoryEntry]: + """Get memories with optional filters.""" + with self._get_db_connection() as conn: + query = "SELECT * FROM memories WHERE 1=1" + params: list = [] + + if project_dir: + query += " AND project_dir = ?" + params.append(project_dir) + + if memory_type: + query += " AND memory_type = ?" + params.append(memory_type) + + query += " ORDER BY created_at DESC LIMIT ?" + params.append(limit) + + conn.row_factory = sqlite3.Row + rows = conn.execute(query, params).fetchall() + + memories = [] + for row in rows: + memories.append( + MemoryEntry( + id=row["id"], + session_id=row["session_id"], + project_dir=row["project_dir"], + memory_type=MemoryType(row["memory_type"]), + content=row["content"], + source_context=row["source_context"], + created_at=datetime.fromisoformat(row["created_at"]), + updated_at=datetime.fromisoformat(row["updated_at"]) if row["updated_at"] else None, + retained=bool(row["retained"]), + superseded_by=row["superseded_by"] if "superseded_by" in row.keys() else None, + embedding=json.loads(row["embedding"]) if row["embedding"] else None, + metadata=json.loads(row["metadata"]) if row["metadata"] else None, + ) + ) + return memories + + def delete_memory(self, memory_id: str) -> bool: + """Delete a memory by ID. + + Returns: + True if a memory was deleted, False otherwise + """ + with self._get_db_connection() as conn: + cursor = conn.execute("DELETE FROM memories WHERE id = ?", (memory_id,)) + conn.commit() + return bool(cursor.rowcount and cursor.rowcount > 0) + + def delete_all_memories(self) -> int: + """Delete all memories and processed session records. + + Returns: + Number of memories deleted + """ + with self._get_db_connection() as conn: + cursor = conn.execute("DELETE FROM memories") + conn.execute("DELETE FROM processed_memory_sessions") + conn.commit() + return cursor.rowcount if cursor.rowcount else 0 + + def update_memory( + self, + memory_id: str, + content: str | None = None, + retained: bool | None = None, + superseded_by: str | None = None, + source_context: str | None = None, + embedding: list[float] | None = None, + ) -> bool: + """Update a memory entry. + + Returns: + True if a memory was updated, False otherwise + """ + updates: list[str] = [] + params: list = [] + + if content is not None: + updates.append("content = ?") + params.append(content) + + if embedding is not None: + updates.append("embedding = ?") + params.append(json.dumps(embedding)) + + if retained is not None: + updates.append("retained = ?") + params.append(int(retained)) + + if superseded_by is not None: + updates.append("superseded_by = ?") + params.append(superseded_by) + + if source_context is not None: + updates.append("source_context = ?") + params.append(source_context) + + if not updates: + return False + + updates.append("updated_at = ?") + params.append(datetime.now().isoformat()) + + params.append(memory_id) + + with self._get_db_connection() as conn: + cursor = conn.execute( + f"UPDATE memories SET {', '.join(updates)} WHERE id = ?", + params, + ) + conn.commit() + return bool(cursor.rowcount and cursor.rowcount > 0) + + def mark_session_processed( + self, session_id: str, project_dir: str, memory_count: int, source: str = "claude_code" + ) -> None: + """Mark a session as processed for memory extraction.""" + with self._get_db_connection() as conn: + conn.execute( + """ + INSERT OR REPLACE INTO processed_memory_sessions + (session_id, project_dir, source, processed_at, memory_count) + VALUES (?, ?, ?, ?, ?) + """, + (session_id, project_dir, source, datetime.now().isoformat(), memory_count), + ) + + def is_session_processed(self, session_id: str, project_dir: str, source: str = "claude_code") -> bool: + """Check if a session has already been processed for memories.""" + with self._get_db_connection() as conn: + cursor = conn.execute( + "SELECT 1 FROM processed_memory_sessions WHERE session_id = ? AND project_dir = ? AND source = ?", + (session_id, project_dir, source), + ) + return cursor.fetchone() is not None + + def get_memory_stats(self, project_dir: str | None = None) -> dict: + """Get statistics about stored memories.""" + with self._get_db_connection() as conn: + base_query = "SELECT memory_type, COUNT(*) as count FROM memories" + params: list = [] + + if project_dir: + base_query += " WHERE project_dir = ?" + params.append(project_dir) + + base_query += " GROUP BY memory_type" + + rows = conn.execute(base_query, params).fetchall() + type_distribution = {row[0]: row[1] for row in rows} + + total_query = "SELECT COUNT(*) FROM memories" + if project_dir: + total_query += " WHERE project_dir = ?" + total = conn.execute(total_query, params).fetchone()[0] or 0 + + return { + "total": total, + "by_type": type_distribution, + } - def get_next_sequence_number(self, session_id: str) -> int: - """Get the next sequence number for a session.""" - seq_file = self.state_dir / f"seq_{session_id}.txt" - if seq_file.exists(): - try: - current_seq = int(seq_file.read_text().strip()) - next_seq = current_seq + 1 - except (ValueError, FileNotFoundError) as e: - logger.debug("Corrupt or missing sequence file for session %s, resetting to 1: %s", session_id, e) - next_seq = 1 - else: - next_seq = 1 - seq_file.write_text(str(next_seq)) - return next_seq diff --git a/src/slopometry/core/hook_handler.py b/src/slopometry/core/hook_handler.py index 3063bcc..de5002e 100644 --- a/src/slopometry/core/hook_handler.py +++ b/src/slopometry/core/hook_handler.py @@ -1,4 +1,13 @@ -"""Hook handler script invoked by Claude Code for each event.""" +"""Claude Code hook handler — receives Claude Code's stdin JSON, delegates to the +Claude-Code adapter for parsing, persists via the abstract protocol, and runs +the Claude-Code-specific stop-hook feedback pipeline (code smells, context +coverage, CLAUDE.md dev guidelines). + +This module is the Claude-Code-specific glue. Harness-agnostic types live in +`core.protocol.events`; the wire-format parser is in +`core.protocol.adapters.claude_code`. New harnesses should not add code here — +write a new adapter. +""" import json import logging @@ -8,25 +17,14 @@ import sys from pathlib import Path -from slopometry.core.database import EventDatabase, SessionManager +from slopometry.core.database import EventDatabase from slopometry.core.git_tracker import GitTracker -from slopometry.core.lock import SlopometryLock from slopometry.core.models.complexity import ComplexityDelta, ExtendedComplexityMetrics -from slopometry.core.models.hook import ( - FeedbackCacheState, - HookEvent, - HookEventType, - HookInputUnion, - NotificationInput, - PostToolUseInput, - PreToolUseInput, - StopInput, - SubagentStopInput, - ToolType, -) +from slopometry.core.models.hook import FeedbackCacheState +from slopometry.core.models.protocol.events import AbstractEventSource, AbstractEventType from slopometry.core.models.session import ContextCoverage from slopometry.core.models.smell import ScopedSmell -from slopometry.core.project_tracker import ProjectTracker +from slopometry.core.protocol.dispatch import dispatch_event from slopometry.core.settings import settings from slopometry.core.working_tree_state import WorkingTreeStateCalculator from slopometry.display.formatters import truncate_path @@ -34,113 +32,22 @@ logger = logging.getLogger(__name__) -def get_tool_type(tool_name: str) -> ToolType: - """Map tool name to ToolType enum.""" - tool_map = { - "bash": ToolType.BASH, - "read": ToolType.READ, - "write": ToolType.WRITE, - "edit": ToolType.EDIT, - "multiedit": ToolType.MULTI_EDIT, - "grep": ToolType.GREP, - "glob": ToolType.GLOB, - "ls": ToolType.LS, - "task": ToolType.TASK, - "todoread": ToolType.TODO_READ, - "todowrite": ToolType.TODO_WRITE, - "taskcreate": ToolType.TASK_CREATE, - "taskupdate": ToolType.TASK_UPDATE, - "tasklist": ToolType.TASK_LIST, - "taskget": ToolType.TASK_GET, - "webfetch": ToolType.WEB_FETCH, - "websearch": ToolType.WEB_SEARCH, - "notebookread": ToolType.NOTEBOOK_READ, - "notebookedit": ToolType.NOTEBOOK_EDIT, - "exit_plan_mode": ToolType.EXIT_PLAN_MODE, - "mcp__ide__getdiagnostics": ToolType.MCP_IDE_GET_DIAGNOSTICS, - "mcp__ide__executecode": ToolType.MCP_IDE_EXECUTE_CODE, - "mcp__ide__getworkspaceinfo": ToolType.MCP_IDE_GET_WORKSPACE_INFO, - "mcp__ide__getfilecontents": ToolType.MCP_IDE_GET_FILE_CONTENTS, - "mcp__ide__createfile": ToolType.MCP_IDE_CREATE_FILE, - "mcp__ide__deletefile": ToolType.MCP_IDE_DELETE_FILE, - "mcp__ide__renamefile": ToolType.MCP_IDE_RENAME_FILE, - "mcp__ide__searchfiles": ToolType.MCP_IDE_SEARCH_FILES, - "mcp__filesystem__read": ToolType.MCP_FILESYSTEM_READ, - "mcp__filesystem__write": ToolType.MCP_FILESYSTEM_WRITE, - "mcp__filesystem__list": ToolType.MCP_FILESYSTEM_LIST, - "mcp__database__query": ToolType.MCP_DATABASE_QUERY, - "mcp__database__schema": ToolType.MCP_DATABASE_SCHEMA, - "mcp__web__scrape": ToolType.MCP_WEB_SCRAPE, - "mcp__web__search": ToolType.MCP_WEB_SEARCH, - "mcp__github__getrepo": ToolType.MCP_GITHUB_GET_REPO, - "mcp__github__createissue": ToolType.MCP_GITHUB_CREATE_ISSUE, - "mcp__github__listissues": ToolType.MCP_GITHUB_LIST_ISSUES, - "mcp__slack__sendmessage": ToolType.MCP_SLACK_SEND_MESSAGE, - "mcp__slack__listchannels": ToolType.MCP_SLACK_LIST_CHANNELS, - } - - if tool_name.lower().startswith("mcp__") and tool_name.lower() not in tool_map: - return ToolType.MCP_OTHER - - return tool_map.get(tool_name.lower(), ToolType.OTHER) - - -def parse_hook_input(raw_data: dict) -> HookInputUnion: - """Parse and validate hook input using appropriate Pydantic model. - - Since Claude Code doesn't send explicit hook type info, we infer the type - from the data structure based on the documented schemas. - """ - - fields = set(raw_data.keys()) - - if "tool_name" in fields and "tool_input" in fields and "tool_response" not in fields: - return PreToolUseInput(**raw_data) - - elif "tool_name" in fields and "tool_input" in fields and "tool_response" in fields: - return PostToolUseInput(**raw_data) - - elif "message" in fields: - return NotificationInput(**raw_data) - - elif "stop_hook_active" in fields: - if raw_data.get("stop_hook_active"): - return SubagentStopInput(**raw_data) - return StopInput(**raw_data) - - elif "session_id" in fields and "transcript_path" in fields: - return StopInput(**raw_data) - - else: - raise ValueError(f"Unknown hook input schema with fields: {fields}") - - def _read_stdin_with_timeout(timeout_seconds: float = 5.0) -> str: - """Read stdin with a timeout to prevent hanging on unclosed pipes. - - Uses select() to check if stdin has data available before reading. - Returns empty string if stdin is not ready within the timeout. - - Args: - timeout_seconds: Maximum seconds to wait for stdin data. - - Returns: - Stripped stdin content, or empty string on timeout/error. - """ ready, _, _ = select.select([sys.stdin], [], [], timeout_seconds) if not ready: return "" return sys.stdin.read().strip() -def handle_hook(event_type_override: HookEventType | None = None) -> int: - """Main hook handler function. +def handle_hook(event_type_override: AbstractEventType | None = None) -> int: + """Main entry point for Claude Code hook invocations. - Reads and parses stdin BEFORE acquiring the lock to prevent hung pipes - from holding the lock and starving all other hook invocations. + Reads stdin, dispatches through the Claude-Code adapter, and runs the + Claude-Code-specific stop-hook feedback pipeline when applicable. Args: - event_type_override: Optional override for the event type, used when called via specific hook entrypoints + event_type_override: Force a specific event type (used by per-event CLI + subcommands: hook-pre-tool-use, hook-post-tool-use, etc.). """ try: stdin_input = _read_stdin_with_timeout() @@ -150,131 +57,54 @@ def handle_hook(event_type_override: HookEventType | None = None) -> int: return 0 try: - raw_data = json.loads(stdin_input) - parsed_input = parse_hook_input(raw_data) - except Exception as e: + raw_payload = json.loads(stdin_input) + except json.JSONDecodeError as e: if settings.debug_mode: print(f"Slopometry: Failed to parse hook input: {e}", file=sys.stderr) return 0 - lock = SlopometryLock(project_dir=os.getcwd()) - with lock.acquire() as acquired: - if not acquired: - print("Slopometry: Could not acquire lock, skipping hook execution.", file=sys.stderr) - return 0 - - return _handle_hook_internal(event_type_override, parsed_input, raw_data) - - -def _handle_hook_internal( - event_type_override: HookEventType | None, - parsed_input: HookInputUnion, - raw_data: dict, -) -> int: - """Internal hook handler logic (runs under lock with pre-parsed data). - - Args: - event_type_override: Optional override for the event type. - parsed_input: Pre-parsed and validated hook input. - raw_data: Raw JSON data from stdin (stored as event metadata). - """ try: - event_type = event_type_override if event_type_override else detect_event_type_from_parsed(parsed_input) - - session_id = parsed_input.session_id - - session_manager = SessionManager() - sequence_number = session_manager.get_next_sequence_number(session_id) - - git_tracker = GitTracker() - git_state = None - match (event_type, sequence_number): - case (HookEventType.PRE_TOOL_USE, 1) | (HookEventType.STOP, 1): - git_state = git_tracker.get_git_state() - case (HookEventType.STOP, _): - git_state = git_tracker.get_git_state() - - working_directory = os.getcwd() - project_tracker = ProjectTracker(working_dir=Path(working_directory)) - project = project_tracker.get_project() - - event = HookEvent( - session_id=session_id, - event_type=event_type, - sequence_number=sequence_number, - metadata=raw_data, - git_state=git_state, - working_directory=working_directory, - project=project, - transcript_path=parsed_input.transcript_path, + event = dispatch_event( + AbstractEventSource.CLAUDE_CODE, + raw_payload, + event_type_override=event_type_override, ) - - if isinstance(parsed_input, PreToolUseInput | PostToolUseInput): - event.tool_name = parsed_input.tool_name - event.tool_type = get_tool_type(parsed_input.tool_name) - - if isinstance(parsed_input, PostToolUseInput): - if isinstance(parsed_input.tool_response, dict): - event.duration_ms = parsed_input.tool_response.get("duration_ms") - event.exit_code = parsed_input.tool_response.get("exit_code") - event.error_message = parsed_input.tool_response.get("error") - else: - event.duration_ms = None - event.exit_code = None - event.error_message = None - - db = EventDatabase() - db.save_event(event) - - if settings.enable_complexity_analysis and isinstance(parsed_input, StopInput | SubagentStopInput): - return handle_stop_event(session_id, parsed_input) - + except Exception as e: if settings.debug_mode: - debug_info = { - "slopometry_event": { - "session_id": session_id, - "event_type": event_type.value, - "sequence_number": sequence_number, - "tool_name": event.tool_name, - "tool_type": event.tool_type.value if event.tool_type else None, - "timestamp": event.timestamp.isoformat(), - "parsed_input_type": type(parsed_input).__name__, - } - } - print(f"Slopometry captured: {json.dumps(debug_info, indent=2)}", file=sys.stderr) - + print(f"Slopometry hook error: {e}", file=sys.stderr) return 0 - except Exception as e: - import traceback - - error_msg = f"Slopometry hook error: {e}\n{traceback.format_exc()}" - - if settings.debug_mode: - print(error_msg, file=sys.stderr) + stop_hook_active = bool(raw_payload.get("stop_hook_active")) + if ( + settings.enable_complexity_analysis + and not stop_hook_active + and event.event_type in (AbstractEventType.TURN_COMPLETED, AbstractEventType.SUBAGENT_COMPLETED) + ): + return handle_stop_event(event.session_id, event.working_directory) + + if settings.debug_mode: + debug_info = { + "slopometry_event": { + "session_id": event.session_id, + "event_type": event.event_type.value, + "sequence_number": event.sequence_number, + "tool_name": event.tool_call.tool_name if event.tool_call else None, + "tool_type": event.tool_call.tool_type if event.tool_call else None, + "timestamp": event.timestamp.isoformat(), + } + } + print(f"Slopometry captured: {json.dumps(debug_info, indent=2)}", file=sys.stderr) - return 0 + return 0 def _get_feedback_cache_path(working_directory: str) -> Path: - """Get path to the feedback cache file for a working directory.""" cache_dir = Path(working_directory) / ".slopometry" cache_dir.mkdir(exist_ok=True) return cache_dir / "feedback_cache.json" def _get_current_commit_sha(working_directory: str) -> str | None: - """Get current commit SHA with a single git command. - - This is the cheapest possible git operation (~5ms) used to short-circuit - the expensive _compute_working_tree_cache_key on the cache-hit path. - - Args: - working_directory: Path to the git working directory. - - Returns: - Commit SHA string, or None if not a git repo or git fails. - """ try: result = subprocess.run( ["git", "rev-parse", "HEAD"], @@ -286,35 +116,16 @@ def _get_current_commit_sha(working_directory: str) -> str | None: if result.returncode == 0: return result.stdout.strip() except (subprocess.TimeoutExpired, subprocess.SubprocessError, OSError): - pass + pass # slopometry: allow-silent - return None below signals "git unavailable" to callers return None def _has_source_changes(working_directory: str) -> bool: - """Check whether the working tree has any in-scope source delta vs the committed state. - - Used by the fast-path: when the commit SHA is unchanged and this returns False, - the source content is provably identical to the last fire and the hook can stay - silent without recomputing the full content key. - - Covers two kinds of change so that the fast-path never suppresses a genuine fire: - 1. Tracked modifications — `git diff --quiet -- *.py *.rs` (staged + unstaged), - cheapest first. Only when git reports a diff do we enumerate via - `_get_modified_source_files_from_git` to drop false positives from ignored - dirs (`__pycache__/*.py`, `.venv/site-packages/*.py`, …) and submodules. - 2. Untracked source files — `git ls-files --others` (filtered). A brand-new - `.py`/`.rs` file is invisible to `git diff`, so without this check the - fast-path would silently swallow the fire for newly created source. - - Submodule contents are excluded everywhere (--ignore-submodules=all on diffs; - `git ls-files` never descends into submodules), so dirty submodules, HEAD pointer - moves, and user git configs (submodule.recurse, diff.submodule=log) cannot trip it. - - Args: - working_directory: Path to the git working directory. + """Cheap working-tree delta probe used by the feedback-cache fast path. - Returns: - True if any non-ignored source files are modified or newly added. + Two checks so the fast path never suppresses a genuine fire: + 1. Tracked modifications — `git diff --quiet -- *.py *.rs` (cheap first). + 2. Untracked source files — `git ls-files --others` filtered. """ wt = WorkingTreeStateCalculator(working_directory, languages=None) @@ -324,12 +135,7 @@ def _has_source_changes(working_directory: str) -> bool: ["git", "diff", "--cached", "--quiet", "--ignore-submodules=all", "--", "*.py", "*.rs"], ]: try: - result = subprocess.run( - diff_args, - cwd=working_directory, - capture_output=True, - timeout=10, - ) + result = subprocess.run(diff_args, cwd=working_directory, capture_output=True, timeout=10) if result.returncode != 0: any_diff = True break @@ -338,40 +144,18 @@ def _has_source_changes(working_directory: str) -> bool: if any_diff and wt._get_modified_source_files_from_git(): return True - return bool(wt.get_untracked_source_files()) def _compute_working_tree_cache_key(working_directory: str) -> str: - """Compute a commit-invariant cache key from working-tree source content. - - The key is a digest over the current content of every non-ignored, - non-submodule .py/.rs file in the working tree (tracked + untracked). It - deliberately does NOT include the commit SHA: committing already-written - code, switching branches, pulling, rebasing, or merging does not change - source *content* and must not re-fire the hook. The key changes iff source - bytes change — including the addition of a new untracked source file. - - Args: - working_directory: Path to the working directory - - Returns: - Cache key string (BLAKE2b hex digest) - """ wt_calculator = WorkingTreeStateCalculator(working_directory, languages=None) return wt_calculator.calculate_source_content_key() def _load_feedback_cache(working_directory: str) -> FeedbackCacheState | None: - """Load the feedback cache state from disk. - - Returns: - FeedbackCacheState if cache exists and is valid, None otherwise - """ cache_path = _get_feedback_cache_path(working_directory) if not cache_path.exists(): return None - try: return FeedbackCacheState.model_validate_json(cache_path.read_text()) except (json.JSONDecodeError, OSError, ValueError): @@ -381,14 +165,6 @@ def _load_feedback_cache(working_directory: str) -> FeedbackCacheState | None: def _save_feedback_cache( working_directory: str, cache_key: str, file_hashes: dict[str, str], commit_sha: str | None = None ) -> None: - """Save the feedback cache state with per-file content hashes. - - Args: - working_directory: Path to the working directory - cache_key: Working tree cache key - file_hashes: Per-file content hashes at the time of this cache save - commit_sha: Current commit SHA for cheap fast-path validation on next run - """ cache_path = _get_feedback_cache_path(working_directory) try: state = FeedbackCacheState(last_key=cache_key, file_hashes=file_hashes, commit_sha=commit_sha) @@ -398,36 +174,15 @@ def _save_feedback_cache( def _has_analyzable_source_files(working_directory: str) -> bool: - """Check if the working directory contains any Python or Rust source files. - - Delegates to GitTracker.has_analyzable_source_files() which owns all - git-file-listing logic. - - Args: - working_directory: Path to the working directory to check. - - Returns: - True if at least one .py or .rs file is found via git ls-files. - """ - tracker = GitTracker(Path(working_directory)) - return tracker.has_analyzable_source_files() + return GitTracker(Path(working_directory)).has_analyzable_source_files() def _resolve_working_directory(stored_wd: str | None) -> str | None: - """Resolve the effective working_directory for a stop event. - - `stored_wd` is the working_directory recorded on the FIRST event of the - session. If the user renamed or moved the repo since that event, the - stored path no longer points to the live repo — falling through to it - would read/write the cache at a stale location and every Stop would - invalidate against an absent cache. Fall back to `os.getcwd()` (the - hook subprocess inherits Claude Code's cwd, which is the live project - root) whenever the stored path is set but doesn't resolve to an - existing directory. - - `stored_wd is None` means the session has no recorded events at all - (unknown session_id); callers should bail in that case, so we - propagate the None rather than substituting cwd. + """Resolve the effective working directory for a stop event. + + Falls back to `os.getcwd()` (which Claude Code passes through to the hook + subprocess) when the stored working directory from the first event no + longer resolves to a live directory (e.g., repo was renamed or moved). """ if stored_wd is None: return None @@ -439,50 +194,23 @@ def _resolve_working_directory(stored_wd: str | None) -> str | None: return stored_wd -def handle_stop_event(session_id: str, parsed_input: "StopInput | SubagentStopInput") -> int: - """Handle Stop events with code smell feedback and optional complexity analysis. - - Code smells are always checked (independent of enable_complexity_feedback). - Complexity metrics are only shown when enable_complexity_feedback is True. - Dev guidelines are shown when feedback_dev_guidelines is True. - - Feedback is cached - if the same feedback would be shown twice without code changes, - the second invocation returns silently. - - The firing key is a commit-invariant digest of working-tree source content - (see _compute_working_tree_cache_key): it fires only when .py/.rs bytes change, - never on commits, branch switches, pulls, or non-source churn. - - Optimized execution order (cheapest checks first): - 1. stop_hook_active check (<1ms) - 2. get_session_working_directory (<1ms, single SQL) - 3. cheap cache fast-path (commit SHA hint + source-delta probe) - 4. analyzable source files gate (git ls-files) - 5. full content key computation (only on fast-path miss) - 6. get_session_statistics (only when needed) - 7. use stats.complexity_metrics (no redundant call) +def handle_stop_event(session_id: str, working_directory: str | None = None) -> int: + """Run the Claude-Code stop-hook feedback pipeline. Args: - session_id: The session ID - parsed_input: The stop event input + session_id: The session ID. + working_directory: The working directory to scope analysis to. If None, + resolved from the DB-recorded value with a cwd fallback. Returns: - Exit code (0 for success, 2 for blocking with feedback) + Exit code (0 for silent success, 2 for blocking with feedback). """ - if parsed_input.stop_hook_active: - return 0 - - db = EventDatabase() - working_directory = _resolve_working_directory(db.get_session_working_directory(session_id)) + if working_directory is None: + db = EventDatabase() + working_directory = _resolve_working_directory(db.get_session_working_directory(session_id)) if not working_directory: return 0 - # Fast-path cache check: when the commit SHA is unchanged AND the source tree - # has no delta (no tracked modifications, no new untracked source files), the - # content key is provably identical to the last fire — skip the full key - # computation. commit_sha here is only a cheap hint; it is NOT part of the key, - # so a bare commit of unchanged content takes the full-key path below and - # correctly matches last_key (silent) instead of re-firing. cached_state = _load_feedback_cache(working_directory) if cached_state is not None and cached_state.commit_sha is not None: current_sha = _get_current_commit_sha(working_directory) @@ -492,11 +220,11 @@ def handle_stop_event(session_id: str, parsed_input: "StopInput | SubagentStopIn if not _has_analyzable_source_files(working_directory): return 0 - # Full cache key — only reached when source files exist AND fast-path didn't match cache_key = _compute_working_tree_cache_key(working_directory) if cached_state is not None and cached_state.last_key == cache_key: return 0 + db = EventDatabase() stats = db.get_session_statistics(session_id) if not stats: return 0 @@ -504,41 +232,29 @@ def handle_stop_event(session_id: str, parsed_input: "StopInput | SubagentStopIn current_metrics = stats.complexity_metrics delta = stats.complexity_delta - # Determine which files changed since the last time feedback was shown. - # Uses per-file content hashes from the feedback cache to filter out - # pre-existing uncommitted changes that haven't changed. wt_calculator = WorkingTreeStateCalculator(working_directory, languages=None) - current_file_hashes = wt_calculator.get_source_file_content_hashes() if cached_state is not None: edited_files = wt_calculator.get_files_changed_since(cached_state.file_hashes) else: - # No cache yet (first run) — treat all modified source files as edited edited_files = wt_calculator.get_modified_source_file_paths() feedback_parts: list[str] = [] - # Smell feedback: split into code-based (stable) and context-derived (unstable) - # Context-derived smells (e.g., unread_related_tests) change with every transcript - # read and must NOT be included in the cache hash to avoid repeated triggers if current_metrics: scoped_smells = scope_smells_for_session( current_metrics, delta, edited_files, working_directory, stats.context_coverage ) - code_smells = [s for s in scoped_smells if s.name != "unread_related_tests"] context_smells = [s for s in scoped_smells if s.name == "unread_related_tests"] - code_feedback, has_code_smells, _ = format_code_smell_feedback(code_smells, session_id) if has_code_smells: feedback_parts.append(code_feedback) - context_smell_feedback, has_context_smells, _ = format_code_smell_feedback(context_smells, session_id) if has_context_smells: feedback_parts.append(context_smell_feedback) - # Context coverage - informational but NOT stable (changes with every Read/Glob/Grep) if settings.enable_complexity_feedback and stats.context_coverage and stats.context_coverage.has_gaps: context_feedback = format_context_coverage_feedback(stats.context_coverage) if context_feedback: @@ -549,38 +265,24 @@ def handle_stop_event(session_id: str, parsed_input: "StopInput | SubagentStopIn if dev_guidelines: feedback_parts.append(f"\n**Project Development Guidelines:**\n{dev_guidelines}") - # Save cache with current file hashes regardless of whether feedback is shown. - # This ensures the next stop event compares against this point in time. current_commit_sha = _get_current_commit_sha(working_directory) _save_feedback_cache(working_directory, cache_key, current_file_hashes, commit_sha=current_commit_sha) if feedback_parts: feedback = "\n\n".join(feedback_parts) - feedback += ( f"\n\n---\n**Session**: `{session_id}` | Details: `slopometry solo show {session_id} --smell-details`" ) - hook_output = {"decision": "block", "reason": feedback} print(json.dumps(hook_output)) return 2 - return 0 def format_context_coverage_feedback(coverage: ContextCoverage) -> str: - """Format context coverage information for Claude consumption. - - Args: - coverage: Context coverage metrics from the session - - Returns: - Formatted feedback string highlighting gaps in context reading - """ lines = [] lines.append("") lines.append("**Context Coverage**") - read_ratio = coverage.files_read_before_edit_ratio if read_ratio < 1.0: lines.append( @@ -588,15 +290,12 @@ def format_context_coverage_feedback(coverage: ContextCoverage) -> str: ) else: lines.append(f" • Read before edit: {read_ratio:.0%} ✓") - imports_cov = coverage.overall_imports_coverage if imports_cov < 100: lines.append(f" • Imports coverage: {imports_cov:.0f}%") - dependents_cov = coverage.overall_dependents_coverage if dependents_cov < 100: lines.append(f" • Dependents coverage: {dependents_cov:.0f}%") - if coverage.blind_spots: lines.append("") lines.append("**Blind spots** (related files not read):") @@ -604,95 +303,48 @@ def format_context_coverage_feedback(coverage: ContextCoverage) -> str: lines.append(f" • {truncate_path(blind_spot, max_width=65)}") if len(coverage.blind_spots) > 5: lines.append(f" ... and {len(coverage.blind_spots) - 5} more") - return "\n".join(lines) def extract_dev_guidelines_from_claude_md(working_directory: str) -> str: - """Extract '## Development guidelines' section from CLAUDE.md in the CWD. - - Args: - working_directory: The current working directory to search for CLAUDE.md - - Returns: - The extracted dev guidelines content, or empty string if not found + """Extract the `## Development guidelines` section from CLAUDE.md. - Raises: - OSError: If CLAUDE.md exists but cannot be read + CLAUDE.md is Claude Code's project-level instructions file convention. """ claude_md_path = Path(working_directory) / "CLAUDE.md" - if not claude_md_path.exists(): return "" - content = claude_md_path.read_text(encoding="utf-8") - lines = content.split("\n") in_section = False section_lines: list[str] = [] - for line in lines: if line.strip().startswith("## Development guidelines"): in_section = True continue - if in_section: if line.strip().startswith("## ") or line.strip().startswith("# "): break section_lines.append(line) - if not section_lines: return "" - return "\n".join(section_lines).strip() def _get_related_files_via_imports(edited_files: set[str], working_directory: str) -> set[str]: - """Build set of files related to edited files for blocking smell scoping. - - Only includes edited files and their test files. Does NOT include reverse - import graph dependents — those files weren't edited, so their pre-existing - smells are not actionable in the stop hook. - - Args: - edited_files: Set of files edited in this session - working_directory: Path to the working directory - - Returns: - Set of file paths related to edited files (includes edited_files themselves) - - Raises: - Exception: If import graph analysis fails (no silent fallback) - """ + """Build the set of files related to edited files for blocking smell scoping.""" from slopometry.core.context_coverage_analyzer import ContextCoverageAnalyzer related = set(edited_files) - analyzer = ContextCoverageAnalyzer(Path(working_directory)) analyzer._build_import_graph() - for edited_file in edited_files: test_files = analyzer._find_test_files(edited_file) related.update(test_files) - return related def _is_file_related_to_edits(smell_file: str, edited_files: set[str], related_files: set[str]) -> bool: - """Check if a smell file is related to the edited files. - - A file is related if: - - It is directly in edited_files - - It is in the related_files set (computed via import graph) - - Args: - smell_file: Path to a file containing a smell - edited_files: Set of files edited in this session - related_files: Set of related files via import graph (required) - - Returns: - True if the smell file is related to edited files - """ return smell_file in edited_files or smell_file in related_files @@ -707,16 +359,6 @@ def scope_smells_for_session( Extracts the scoping/classification logic that determines which smells are blocking vs informational and which files are actionable for this session. - - Args: - current_metrics: Current complexity metrics with code smell counts - delta: Optional complexity delta showing changes - edited_files: Set of files edited in this session - working_directory: Path to working directory for import graph analysis - context_coverage: Optional context coverage for detecting unread related tests - - Returns: - List of ScopedSmell instances classified for this session """ blocking_smell_names = {"test_skip", "swallowed_exception"} # REASON: acknowledged_silent_except is acceptable individually but blocks on INCREASE — an `# slopometry: allow-silent` marker moves a handler out of swallowed_exception, so a rising marker count is the anti-reward-hack signal that new suppressions need justifying. @@ -728,7 +370,6 @@ def scope_smells_for_session( result: list[ScopedSmell] = [] - # Synthetic blocking smell: unread related tests if context_coverage: unread_tests: list[str] = [] for file_cov in context_coverage.file_coverage: @@ -757,10 +398,14 @@ def scope_smells_for_session( change = smell_changes.get(smell.name, 0) guidance = smell.definition.guidance - is_blocking_smell = smell.name in blocking_smell_names or (smell.name in block_on_increase_names and change > 0) + is_blocking_smell = smell.name in blocking_smell_names or ( + smell.name in block_on_increase_names and change > 0 + ) if is_blocking_smell and edited_files: - related_files = [f for f in smell.files if _is_file_related_to_edits(f, edited_files, related_via_imports)] + related_files = [ + f for f in smell.files if _is_file_related_to_edits(f, edited_files, related_via_imports) + ] unrelated_files = [f for f in smell.files if f not in related_files] if related_files: @@ -816,14 +461,8 @@ def format_code_smell_feedback( ) -> tuple[str, bool, bool]: """Format pre-classified smell data into feedback output. - Args: - scoped_smells: Pre-classified smells from scope_smells_for_session - session_id: Session ID for generating the smell-details command - Returns: - Tuple of (formatted feedback string, has_smells, has_blocking_smells) - - has_smells: whether any code smells were detected - - has_blocking_smells: whether any BLOCKING smells in edited files were detected + Tuple of (formatted feedback string, has_smells, has_blocking_smells). """ blocking_smells = [s for s in scoped_smells if s.is_blocking] other_smells = [s for s in scoped_smells if not s.is_blocking] @@ -831,12 +470,10 @@ def format_code_smell_feedback( lines: list[str] = [] has_blocking = len(blocking_smells) > 0 - # Separate blocking smell increases from decreases blocking_increased = [s for s in blocking_smells if s.change > 0] blocking_decreased = [s for s in blocking_smells if s.change < 0] blocking_unchanged = [s for s in blocking_smells if s.change == 0] - # Show improvements (decreases) first - don't require action if blocking_decreased: lines.append("") lines.append("**Code Smell Improvements** (decreases - great work!):") @@ -846,7 +483,6 @@ def format_code_smell_feedback( lines.append(f" • **{smell.label}**: {smell.count} file(s){change_str}") lines.append("") - # Show unchanged and increased blocking smells (require action) blocking_requiring_action = blocking_unchanged + blocking_increased if blocking_requiring_action: if not blocking_decreased: @@ -864,7 +500,6 @@ def format_code_smell_feedback( lines.append(f" → {smell.guidance}") lines.append("") - # Separate increases (require review) from decreases (improvements - no review needed) smells_increased = [s for s in other_smells if s.change > 0] smells_decreased = [s for s in other_smells if s.change < 0] other_smells_with_changes = smells_increased + smells_decreased @@ -873,7 +508,6 @@ def format_code_smell_feedback( if not blocking_increased: lines.append("") - # Show improvements first (decreases) - these don't require review if smells_decreased: lines.append("**Code Smell Improvements** (decreases - great work!):") lines.append("") @@ -882,7 +516,6 @@ def format_code_smell_feedback( lines.append(f" • **{smell.label}**: {smell.count}{change_str}") lines.append("") - # Show increases - these require review if smells_increased: lines.append( "**Code Smells** (increases require review, irrespective of which session edited related files):" @@ -904,28 +537,11 @@ def format_code_smell_feedback( return "", False, False -def detect_event_type_from_parsed(parsed_input: HookInputUnion) -> HookEventType: - """Detect event type from parsed input model.""" - match parsed_input: - case PreToolUseInput(): - return HookEventType.PRE_TOOL_USE - case PostToolUseInput(): - return HookEventType.POST_TOOL_USE - case NotificationInput(): - return HookEventType.NOTIFICATION - case StopInput(): - return HookEventType.STOP - case SubagentStopInput(): - return HookEventType.SUBAGENT_STOP - case _: - raise ValueError(f"Unknown input type: {type(parsed_input)}") - - -def main() -> None: - """Entry point for hook handler.""" - exit_code = handle_hook() - sys.exit(exit_code) - - -if __name__ == "__main__": - main() +__all__ = [ + "handle_hook", + "handle_stop_event", + "format_context_coverage_feedback", + "extract_dev_guidelines_from_claude_md", + "scope_smells_for_session", + "format_code_smell_feedback", +] diff --git a/src/slopometry/core/migrations.py b/src/slopometry/core/migrations.py index 0c4182c..1856a51 100644 --- a/src/slopometry/core/migrations.py +++ b/src/slopometry/core/migrations.py @@ -508,6 +508,103 @@ def up(self, conn: sqlite3.Connection) -> None: ) +class Migration015AbstractEventTypeValues(Migration): + """Translate legacy PascalCase event_type values into the new snake_case + abstract protocol values. Existing rows are rewritten; the column type is + unchanged (TEXT), only the string content shifts. + """ + + _RENAMES: dict[str, str] = { + "PreToolUse": "tool_call_started", + "PostToolUse": "tool_call_completed", + "Notification": "notification", + "Stop": "turn_completed", + "SubagentStop": "subagent_completed", + "TodoUpdated": "todo_updated", + "MessageUpdated": "message_updated", + "SubagentStart": "subagent_started", + } + + @property + def version(self) -> str: + return "015" + + @property + def description(self) -> str: + return "Translate event_type values from PascalCase (legacy Claude Code/OpenCode wire names) to snake_case (abstract protocol)" + + def up(self, conn: sqlite3.Connection) -> None: + cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='hook_events'") + if not cursor.fetchone(): + return + cursor = conn.execute("PRAGMA table_info(hook_events)") + if not any(row[1] == "event_type" for row in cursor.fetchall()): + return + for legacy, abstract in self._RENAMES.items(): + conn.execute( + "UPDATE hook_events SET event_type = ? WHERE event_type = ?", + (abstract, legacy), + ) + + +class Migration016AddMemorySupersededByColumn(Migration): + """Add superseded_by column to memories for lineage tracking. + + Distinct from `retained`, which is a user/system retention decision. + `superseded_by` records the id of the newer memory that an LLM judge + decided replaces this one. A separate cleanup pass can prune the chain. + """ + + @property + def version(self) -> str: + return "016" + + @property + def description(self) -> str: + return "Add superseded_by column to memories for LLM-judged lineage tracking" + + def up(self, conn: sqlite3.Connection) -> None: + cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='memories'") + if not cursor.fetchone(): + return + cursor = conn.execute("PRAGMA table_info(memories)") + if any(row[1] == "superseded_by" for row in cursor.fetchall()): + return + conn.execute("ALTER TABLE memories ADD COLUMN superseded_by TEXT") + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_memories_superseded_by ON memories(superseded_by)" + ) + + +class Migration017AddProcessedMemorySourceColumn(Migration): + """Add source column to processed_memory_sessions for harness routing. + + Pre-MultiHarness rows had no source column; their session IDs were bare + (no ``:`` prefix). New rows carry an explicit source + (``claude_code`` or ``opencode``) to avoid cross-harness collisions in the + ``(session_id, project_dir)`` primary key. + """ + + @property + def version(self) -> str: + return "017" + + @property + def description(self) -> str: + return "Add source column to processed_memory_sessions; default existing rows to claude_code" + + def up(self, conn: sqlite3.Connection) -> None: + cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='processed_memory_sessions'") + if not cursor.fetchone(): + return + cursor = conn.execute("PRAGMA table_info(processed_memory_sessions)") + if any(row[1] == "source" for row in cursor.fetchall()): + return + conn.execute( + "ALTER TABLE processed_memory_sessions ADD COLUMN source TEXT NOT NULL DEFAULT 'claude_code'" + ) + + class MigrationRunner: """Manages database migrations.""" @@ -528,6 +625,9 @@ def __init__(self, db_path: Path): Migration012AddNFPObjectiveToExperimentRuns(), Migration013AddSourceAndParentSession(), Migration014AddBehavioralPatternHistory(), + Migration015AbstractEventTypeValues(), + Migration016AddMemorySupersededByColumn(), + Migration017AddProcessedMemorySourceColumn(), ] @contextmanager diff --git a/src/slopometry/core/models/__init__.py b/src/slopometry/core/models/__init__.py index 5699124..2d85c39 100644 --- a/src/slopometry/core/models/__init__.py +++ b/src/slopometry/core/models/__init__.py @@ -55,7 +55,6 @@ ExperimentRun, ExperimentStatus, FeatureBoundary, - MergeCommit, ) # Hook types @@ -63,10 +62,22 @@ AgentTool, AnalysisSource, GitState, - HookEventType, Project, ProjectLanguage, - ToolType, +) + +# Memory types +from slopometry.core.models.memory import ( + MemoryCandidate, + MemoryCreateRequest, + MemoryEntry, + MemoryType, +) +from slopometry.core.models.protocol import ( + AbstractEventSource, + AbstractEventType, + AbstractHookEvent, + ToolCallPayload, ) # Session types @@ -106,13 +117,15 @@ "ComplexityEvolution", "FileAnalysisResult", # Hook + "AbstractEventSource", + "AbstractEventType", + "AbstractHookEvent", + "ToolCallPayload", "AgentTool", "AnalysisSource", "GitState", - "HookEventType", "Project", "ProjectLanguage", - "ToolType", # Session "CompactEvent", "ContextCoverage", @@ -145,7 +158,6 @@ "ExperimentRun", "ExperimentStatus", "FeatureBoundary", - "MergeCommit", # Display "ExperimentDisplayData", "FeatureDisplayData", @@ -158,4 +170,9 @@ "UserStoryDisplayData", "UserStoryEntry", "UserStoryStatistics", + # Memory + "MemoryCandidate", + "MemoryCreateRequest", + "MemoryEntry", + "MemoryType", ] diff --git a/src/slopometry/core/models/baseline.py b/src/slopometry/core/models/baseline.py index ecef096..434c4e9 100644 --- a/src/slopometry/core/models/baseline.py +++ b/src/slopometry/core/models/baseline.py @@ -1,7 +1,7 @@ """Baseline computation, impact assessment, and statistical models.""" from datetime import datetime -from enum import Enum +from enum import StrEnum from pydantic import BaseModel, ConfigDict, Field, field_validator @@ -10,7 +10,7 @@ from slopometry.core.models.smell import SmellCounts -class BaselineStrategy(str, Enum): +class BaselineStrategy(StrEnum): """How to select commits for building the historic quality baseline. MERGE_ANCHORED: Follows first-parent (trunk) history, so each delta represents @@ -185,7 +185,7 @@ class RepoBaseline(BaseModel): ) -class ImpactCategory(str, Enum): +class ImpactCategory(StrEnum): """Categories for staged changes impact assessment.""" SIGNIFICANT_IMPROVEMENT = "significant_improvement" @@ -195,7 +195,7 @@ class ImpactCategory(str, Enum): SIGNIFICANT_DEGRADATION = "significant_degradation" -class ZScoreInterpretation(str, Enum): +class ZScoreInterpretation(StrEnum): """Human-readable interpretation of Z-score values.""" MUCH_BETTER = "much better than avg" diff --git a/src/slopometry/core/models/experiment.py b/src/slopometry/core/models/experiment.py index de73f14..7f18765 100644 --- a/src/slopometry/core/models/experiment.py +++ b/src/slopometry/core/models/experiment.py @@ -1,7 +1,7 @@ """Experiment tracking and progress models.""" from datetime import datetime -from enum import Enum +from enum import StrEnum from pathlib import Path from uuid import uuid4 @@ -11,7 +11,7 @@ from slopometry.core.models.user_story import NextFeaturePrediction -class ExperimentStatus(str, Enum): +class ExperimentStatus(StrEnum): """Status of an experiment run.""" PENDING = "pending" @@ -68,15 +68,6 @@ class ProgressDisplayData(BaseModel): maintainability_score: str = Field(description="Formatted maintainability score") -class MergeCommit(BaseModel): - """Information about a merge commit in git history.""" - - hash: str = Field(description="The commit hash") - parents: list[str] = Field(description="Parent commit hashes") - message: str = Field(description="Commit message") - feature_branch: str = Field(description="The feature branch commit (second parent)") - - class FeatureBoundary(BaseModel): """Represents a feature's boundary commits.""" diff --git a/src/slopometry/core/models/hook.py b/src/slopometry/core/models/hook.py index 16c84e6..3f67f7c 100644 --- a/src/slopometry/core/models/hook.py +++ b/src/slopometry/core/models/hook.py @@ -1,27 +1,33 @@ -"""Hook-related models for Claude Code integration.""" +"""Hook-related base models — non-protocol types shared across the system. -from datetime import datetime -from enum import Enum -from typing import Any +The protocol-layer types (AbstractHookEvent, AbstractEventType, AbstractEventSource, +ToolCallPayload) live in `core/protocol/events.py`. The Claude-Code-specific tool +vocabulary (ToolType enum) lives in `core/protocol/adapters/claude_code.py`. -from pydantic import BaseModel, Field +Models kept here are harness-agnostic: project identification, git state, language +guard, feedback cache, hook response shape. +""" +from enum import StrEnum -class AgentTool(str, Enum): +from pydantic import BaseModel, ConfigDict, Field + + +class AgentTool(StrEnum): """Agent tool that produced the session.""" CLAUDE_CODE = "claude_code" OPENCODE = "opencode" -class ProjectLanguage(str, Enum): +class ProjectLanguage(StrEnum): """Supported languages for complexity analysis.""" PYTHON = "python" RUST = "rust" -class ProjectSource(str, Enum): +class ProjectSource(StrEnum): """Source of project identification.""" GIT = "git" @@ -35,83 +41,6 @@ class Project(BaseModel): source: ProjectSource -class EventSource(str, Enum): - """Source agent tool that generated the event.""" - - CLAUDE_CODE = "claude_code" - OPENCODE = "opencode" - - -class HookEventType(str, Enum): - """Types of hook events from Claude Code and OpenCode.""" - - PRE_TOOL_USE = "PreToolUse" - POST_TOOL_USE = "PostToolUse" - NOTIFICATION = "Notification" - STOP = "Stop" - SUBAGENT_STOP = "SubagentStop" - # OpenCode-specific event types - TODO_UPDATED = "TodoUpdated" - MESSAGE_UPDATED = "MessageUpdated" - SUBAGENT_START = "SubagentStart" - - -class ToolType(str, Enum): - """Known tool types in Claude Code.""" - - BASH = "Bash" - READ = "Read" - WRITE = "Write" - EDIT = "Edit" - MULTI_EDIT = "MultiEdit" - GREP = "Grep" - GLOB = "Glob" - LS = "LS" - TASK = "Task" - TODO_READ = "TodoRead" - TODO_WRITE = "TodoWrite" - TASK_CREATE = "TaskCreate" - TASK_UPDATE = "TaskUpdate" - TASK_LIST = "TaskList" - TASK_GET = "TaskGet" - WEB_FETCH = "WebFetch" - WEB_SEARCH = "WebSearch" - NOTEBOOK_READ = "NotebookRead" - NOTEBOOK_EDIT = "NotebookEdit" - EXIT_PLAN_MODE = "exit_plan_mode" - - MCP_IDE_GET_DIAGNOSTICS = "mcp__ide__getDiagnostics" - MCP_IDE_EXECUTE_CODE = "mcp__ide__executeCode" - MCP_IDE_GET_WORKSPACE_INFO = "mcp__ide__getWorkspaceInfo" - MCP_IDE_GET_FILE_CONTENTS = "mcp__ide__getFileContents" - MCP_IDE_CREATE_FILE = "mcp__ide__createFile" - MCP_IDE_DELETE_FILE = "mcp__ide__deleteFile" - MCP_IDE_RENAME_FILE = "mcp__ide__renameFile" - MCP_IDE_SEARCH_FILES = "mcp__ide__searchFiles" - MCP_FILESYSTEM_READ = "mcp__filesystem__read" - MCP_FILESYSTEM_WRITE = "mcp__filesystem__write" - MCP_FILESYSTEM_LIST = "mcp__filesystem__list" - MCP_DATABASE_QUERY = "mcp__database__query" - MCP_DATABASE_SCHEMA = "mcp__database__schema" - MCP_WEB_SCRAPE = "mcp__web__scrape" - MCP_WEB_SEARCH = "mcp__web__search" - MCP_GITHUB_GET_REPO = "mcp__github__getRepo" - MCP_GITHUB_CREATE_ISSUE = "mcp__github__createIssue" - MCP_GITHUB_LIST_ISSUES = "mcp__github__listIssues" - MCP_SLACK_SEND_MESSAGE = "mcp__slack__sendMessage" - MCP_SLACK_LIST_CHANNELS = "mcp__slack__listChannels" - MCP_OTHER = "mcp__other" - - OTHER = "Other" - - -class AnalysisSource(str, Enum): - """Source of the impact analysis.""" - - UNCOMMITTED_CHANGES = "uncommitted_changes" - PREVIOUS_COMMIT = "previous_commit" - - class GitState(BaseModel): """Represents git repository state at a point in time.""" @@ -122,6 +51,13 @@ class GitState(BaseModel): commit_sha: str | None = None +class AnalysisSource(StrEnum): + """Source of the impact analysis.""" + + UNCOMMITTED_CHANGES = "uncommitted_changes" + PREVIOUS_COMMIT = "previous_commit" + + class FeedbackCacheState(BaseModel): """Persisted state of the feedback cache for change-based firing. @@ -141,90 +77,13 @@ class FeedbackCacheState(BaseModel): ) -class HookEvent(BaseModel): - """Represents a single hook invocation event.""" - - id: int | None = None - session_id: str - event_type: HookEventType - timestamp: datetime = Field(default_factory=datetime.now) - sequence_number: int - tool_name: str | None = None - tool_type: ToolType | None = None - metadata: dict = Field(default_factory=dict) - duration_ms: int | None = None - exit_code: int | None = None - error_message: str | None = None - git_state: GitState | None = None - working_directory: str - project: Project | None = None - transcript_path: str | None = None - source: EventSource = Field(default=EventSource.CLAUDE_CODE, description="Agent tool that generated this event") - parent_session_id: str | None = Field(default=None, description="Parent session ID for subagent sessions") - - -class PreToolUseInput(BaseModel): - """Input structure for PreToolUse hooks based on Claude Code documentation.""" - - session_id: str - transcript_path: str - tool_name: str - tool_input: dict[str, Any] = Field(default_factory=dict) - - model_config = {"extra": "allow"} - - -class PostToolUseInput(BaseModel): - """Input structure for PostToolUse hooks based on Claude Code documentation.""" - - session_id: str - transcript_path: str - tool_name: str - tool_input: dict[str, Any] = Field(default_factory=dict) - tool_response: dict[str, Any] | str | list[Any] = Field( - default_factory=dict, - description="Tool response data. Can be dict (most tools), str (Bash output), or list (NotebookRead cells). Uses Any for list items since different tools return different cell structures.", - ) - - model_config = {"extra": "allow"} - - -class NotificationInput(BaseModel): - """Input structure for Notification hooks based on Claude Code documentation.""" - - session_id: str - transcript_path: str - message: str - title: str | None = None - - model_config = {"extra": "allow"} - - -class StopInput(BaseModel): - """Input structure for Stop hooks based on Claude Code documentation.""" - - session_id: str - transcript_path: str - stop_hook_active: bool = False - - model_config = {"extra": "allow"} - - -class SubagentStopInput(BaseModel): - """Input structure for SubagentStop hooks based on Claude Code documentation.""" - - session_id: str - transcript_path: str - stop_hook_active: bool = False - - model_config = {"extra": "allow"} - - -HookInputUnion = PreToolUseInput | PostToolUseInput | NotificationInput | StopInput | SubagentStopInput - - class HookOutput(BaseModel): - """Output structure for hook responses based on Claude Code documentation.""" + """Output structure for hook responses. + + Mirrors Claude Code's hook output schema. The `decision`/`reason` pair is + the canonical blocking feedback shape; `continue`/`stopReason`/`suppressOutput` + are Claude-Code-specific extensions tolerated for compatibility. + """ continue_: bool | None = Field(None, alias="continue") stop_reason: str | None = Field(None, alias="stopReason") @@ -232,7 +91,7 @@ class HookOutput(BaseModel): decision: str | None = Field(default=None, description="Decision outcome: approve, block, or undefined") reason: str | None = None - model_config = {"extra": "allow", "populate_by_name": True} + model_config = ConfigDict(extra="allow", populate_by_name=True) class LanguageGuardResult(BaseModel): @@ -248,7 +107,6 @@ class LanguageGuardResult(BaseModel): ) def format_warning(self) -> str | None: - """Return warning message if unsupported languages found, else None.""" if not self.detected_unsupported: return None return f"Found {', '.join(sorted(self.detected_unsupported))} files but analysis not yet supported" diff --git a/src/slopometry/core/models/memory.py b/src/slopometry/core/models/memory.py new file mode 100644 index 0000000..4376cfd --- /dev/null +++ b/src/slopometry/core/models/memory.py @@ -0,0 +1,50 @@ +"""Memory models for tracking durable facts across sessions.""" + +from datetime import datetime +from enum import StrEnum + +from pydantic import BaseModel + + +class MemoryType(StrEnum): + """Types of memories that can be stored.""" + + USER = "user" + FEEDBACK = "feedback" + PROJECT = "project" + REFERENCE = "reference" + + +class MemoryEntry(BaseModel): + """Represents a stored memory entry.""" + + id: str + session_id: str + project_dir: str + memory_type: MemoryType + content: str + source_context: str | None = None + created_at: datetime + updated_at: datetime | None = None + retained: bool = False + superseded_by: str | None = None + embedding: list[float] | None = None + metadata: dict | None = None + + +class MemoryCandidate(BaseModel): + """A candidate memory extracted from a transcript, before saving.""" + + memory_type: MemoryType + content: str + source_context: str | None = None + embedding: list[float] | None = None + metadata: dict | None = None + + +class MemoryCreateRequest(BaseModel): + """Request to create multiple memory entries from a session.""" + + session_id: str + project_dir: str + candidates: list[MemoryCandidate] diff --git a/src/slopometry/core/models/protocol/__init__.py b/src/slopometry/core/models/protocol/__init__.py new file mode 100644 index 0000000..21c7754 --- /dev/null +++ b/src/slopometry/core/models/protocol/__init__.py @@ -0,0 +1,18 @@ +"""Abstract hook event protocol — data-only Pydantic models. + +The canonical, harness-agnostic event schema. Adapters translate wire payloads +into these; storage and analytics operate on them. + +Runtime logic (adapters, dispatch, session management) lives in +`slopometry.core.protocol`. Models live here so the `core.models` package +holds only pure Pydantic ADTs. +""" + +from slopometry.core.models.protocol.events import ( + AbstractEventSource, + AbstractEventType, + AbstractHookEvent, + ToolCallPayload, +) + +__all__ = ["AbstractEventSource", "AbstractEventType", "AbstractHookEvent", "ToolCallPayload"] diff --git a/src/slopometry/core/models/protocol/events.py b/src/slopometry/core/models/protocol/events.py new file mode 100644 index 0000000..a621a32 --- /dev/null +++ b/src/slopometry/core/models/protocol/events.py @@ -0,0 +1,114 @@ +"""Canonical, harness-agnostic hook event schema. + +The persisted event shape. Every adapter produces an AbstractHookEvent; every +analyzer and storage operation consumes one. No field here may reference a +specific agent's vocabulary or wire-format key names. +""" + +from datetime import datetime +from enum import StrEnum +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + +from slopometry.core.models.hook import GitState, Project + + +class AbstractEventSource(StrEnum): + """Identity of the agent harness or collector that produced the event.""" + + CLAUDE_CODE = "claude_code" + OPENCODE = "opencode" + + +class AbstractEventType(StrEnum): + """Canonical event taxonomy — shared across all harnesses. + + Mapping to wire names is the adapter's responsibility: + tool_call_started <- Claude Code "PreToolUse" + tool_call_completed <- Claude Code "PostToolUse" + notification <- Claude Code "Notification" + turn_completed <- Claude Code "Stop" + subagent_completed <- Claude Code "SubagentStop" + todo_updated <- OpenCode bus event + message_updated <- OpenCode bus event + subagent_started <- OpenCode bus event + """ + + TOOL_CALL_STARTED = "tool_call_started" + TOOL_CALL_COMPLETED = "tool_call_completed" + NOTIFICATION = "notification" + TURN_COMPLETED = "turn_completed" + SUBAGENT_COMPLETED = "subagent_completed" + TODO_UPDATED = "todo_updated" + MESSAGE_UPDATED = "message_updated" + SUBAGENT_STARTED = "subagent_started" + + +class ToolCallPayload(BaseModel): + """Harness-agnostic tool-call payload. + + `tool_name` is the raw name from the source harness; `tool_type` is a + normalized category assigned by the adapter's tool_type_map. `input` and + `output` preserve the original payload structure from whichever harness + produced them — adapters are responsible for any reshaping. + """ + + tool_name: str + tool_type: str | None = Field( + default=None, + description="Normalized tool category from the adapter's tool_type_map; None if unknown", + ) + input: dict[str, Any] + output: Any | None = Field( + default=None, + description="Tool result: dict | str | list depending on the tool's wire shape", + ) + duration_ms: int | None = None + exit_code: int | None = Field( + default=None, + description="Process exit code; only meaningful for shell-style tools", + ) + error_message: str | None = None + + model_config = ConfigDict(extra="forbid") + + +class AbstractHookEvent(BaseModel): + """A single hook invocation — the canonical stored event. + + `metadata` carries the complete raw wire payload for forensic and + re-processing use; downstream analyzers should not read from it but go + through the typed fields instead. + """ + + id: int | None = Field( + default=None, + description="Database autoincrement id; None for in-memory events not yet persisted", + ) + session_id: str + parent_session_id: str | None = Field( + default=None, + description="Parent session ID for subagent/child sessions; None for top-level", + ) + event_type: AbstractEventType + source: AbstractEventSource + timestamp: datetime = Field(default_factory=datetime.now) + tool_call: ToolCallPayload | None = None + metadata: dict[str, Any] = Field( + default_factory=dict, + description="Full raw wire payload — preserves harness-specific shape for forensics", + ) + git_state: GitState | None = None + working_directory: str + project: Project | None = None + transcript_location: str | None = Field( + default=None, + description="Harness-specific path or hint for post-hoc transcript retrieval", + ) + sequence_number: int = Field( + default=0, + description="Monotonic per-session sequence assigned by SessionManager", + ) + + model_config = ConfigDict(extra="forbid") diff --git a/src/slopometry/core/models/session.py b/src/slopometry/core/models/session.py index e1c10c4..65d1869 100644 --- a/src/slopometry/core/models/session.py +++ b/src/slopometry/core/models/session.py @@ -5,7 +5,8 @@ from pydantic import BaseModel, Field from slopometry.core.models.complexity import ComplexityDelta, ExtendedComplexityMetrics -from slopometry.core.models.hook import AgentTool, GitState, HookEventType, Project, ToolType +from slopometry.core.models.hook import AgentTool, GitState, Project +from slopometry.core.models.protocol.events import AbstractEventType class TodoItem(BaseModel): @@ -248,8 +249,11 @@ class SessionStatistics(BaseModel): end_time: datetime | None = None total_events: int = 0 working_directory: str - events_by_type: dict[HookEventType, int] = Field(default_factory=dict) - tool_usage: dict[ToolType, int] = Field(default_factory=dict) + events_by_type: dict[AbstractEventType, int] = Field(default_factory=dict) + tool_usage: dict[str, int] = Field( + default_factory=dict, + description="Tool usage counts keyed by PascalCase tool type string (e.g., 'Bash', 'Read')", + ) error_count: int = 0 total_duration_ms: int = 0 average_tool_duration_ms: float = 0.0 diff --git a/src/slopometry/core/models/smell.py b/src/slopometry/core/models/smell.py index 51c05cc..809bb23 100644 --- a/src/slopometry/core/models/smell.py +++ b/src/slopometry/core/models/smell.py @@ -1,12 +1,12 @@ """Code smell definitions, registry, and data models.""" -from enum import Enum +from enum import StrEnum from typing import Any from pydantic import BaseModel, ConfigDict, Field -class SmellCategory(str, Enum): +class SmellCategory(StrEnum): """Category of code smell for organization and filtering.""" GENERAL = "general" diff --git a/src/slopometry/core/opencode_handler.py b/src/slopometry/core/opencode_handler.py index b9e88d4..d165655 100644 --- a/src/slopometry/core/opencode_handler.py +++ b/src/slopometry/core/opencode_handler.py @@ -1,108 +1,48 @@ -"""OpenCode hook handler - processes events forwarded by the OpenCode TypeScript plugin. - -The OpenCode plugin (plugins/opencode/index.ts) captures in-process events and spawns: - slopometry hook-opencode --event-type -with JSON on stdin. This module parses the JSON, maps it to HookEvent, and stores it. +"""OpenCode hook handler — receives OpenCode plugin JSON, delegates to the +OpenCode adapter for parsing, persists via the abstract protocol, and runs +the Claude-Code stop-hook feedback pipeline (reused because OpenCode and +Claude Code produce equivalent working-tree statistics). + +This module is the OpenCode-specific glue. Harness-agnostic types live in +`core.protocol.events`; the wire-format parser is in +`core.protocol.adapters.opencode`. """ import json import logging -import os -import select import sys -from slopometry.core.database import EventDatabase, SessionManager -from slopometry.core.git_tracker import GitTracker -from slopometry.core.lock import SlopometryLock -from slopometry.core.models.hook import ( - EventSource, - HookEvent, - HookEventType, - ToolType, -) -from slopometry.core.models.opencode import ( - OpenCodeMessageEvent, - OpenCodeSessionEvent, - OpenCodeStopEvent, - OpenCodeTodoEvent, - OpenCodeToolEvent, -) -from slopometry.core.project_tracker import ProjectTracker +from slopometry.core.hook_handler import handle_stop_event +from slopometry.core.models.protocol.events import AbstractEventSource, AbstractEventType +from slopometry.core.protocol.dispatch import dispatch_event from slopometry.core.settings import settings logger = logging.getLogger(__name__) -# Map OpenCode event type strings to HookEventType -EVENT_TYPE_MAP: dict[str, HookEventType] = { - "pre_tool_use": HookEventType.PRE_TOOL_USE, - "post_tool_use": HookEventType.POST_TOOL_USE, - "stop": HookEventType.STOP, - "subagent_stop": HookEventType.SUBAGENT_STOP, - "subagent_start": HookEventType.SUBAGENT_START, - "todo_updated": HookEventType.TODO_UPDATED, - "message_updated": HookEventType.MESSAGE_UPDATED, -} - - -def get_tool_type(tool_name: str) -> ToolType: - """Map OpenCode tool name to ToolType enum. - - OpenCode uses the same tool names as Claude Code (Bash, Read, Edit, etc.) - plus some OpenCode-specific ones. - """ - from slopometry.core.hook_handler import get_tool_type as cc_get_tool_type - - return cc_get_tool_type(tool_name) - - -def parse_opencode_event( - event_type: str, raw_data: dict -) -> OpenCodeToolEvent | OpenCodeTodoEvent | OpenCodeMessageEvent | OpenCodeSessionEvent | OpenCodeStopEvent: - """Parse raw JSON into the appropriate OpenCode event model. - - Args: - event_type: The event type string from --event-type CLI arg. - raw_data: Parsed JSON from stdin. - - Returns: - Typed event model instance. - - Raises: - ValueError: If event_type is unknown. - """ - match event_type: - case "pre_tool_use" | "post_tool_use": - return OpenCodeToolEvent(**raw_data) - case "todo_updated": - return OpenCodeTodoEvent(**raw_data) - case "message_updated": - return OpenCodeMessageEvent(**raw_data) - case "subagent_start": - return OpenCodeSessionEvent(**raw_data) - case "stop" | "subagent_stop": - return OpenCodeStopEvent(**raw_data) - case _: - raise ValueError(f"Unknown OpenCode event type: {event_type}") - def _read_stdin_with_timeout(timeout_seconds: float = 5.0) -> str: - """Read stdin with a timeout to prevent hanging on unclosed pipes.""" + import select + ready, _, _ = select.select([sys.stdin], [], [], timeout_seconds) if not ready: return "" return sys.stdin.read().strip() -def handle_opencode_hook(event_type: str) -> int: - """Main entry point for handling OpenCode events. +def _resolve_event_type(event_type: str) -> AbstractEventType: + """Translate OpenCode's CLI event-type string into the canonical enum.""" + from slopometry.core.protocol.adapters.opencode import resolve_opencode_event_type - Called from CLI: slopometry hook-opencode --event-type + return resolve_opencode_event_type(event_type) - Reads JSON from stdin, parses it, creates a HookEvent, and stores it. - For stop events, runs feedback analysis and prints feedback to stdout. + +def handle_opencode_hook(event_type: str) -> int: + """Main entry point for OpenCode plugin event invocations. + + Called from CLI: `slopometry hook-opencode --event-type `. Args: - event_type: Event type string (pre_tool_use, post_tool_use, stop, etc.) + event_type: OpenCode's event-type discriminator string. Returns: Exit code (0 for success, 2 for blocking feedback). @@ -115,155 +55,40 @@ def handle_opencode_hook(event_type: str) -> int: return 0 try: - raw_data = json.loads(stdin_input) - parsed_event = parse_opencode_event(event_type, raw_data) - except Exception as e: + raw_payload = json.loads(stdin_input) + except json.JSONDecodeError as e: if settings.debug_mode: print(f"Slopometry: Failed to parse OpenCode event: {e}", file=sys.stderr) return 0 - lock = SlopometryLock(project_dir=os.getcwd()) - with lock.acquire() as acquired: - if not acquired: - print("Slopometry: Could not acquire lock, skipping.", file=sys.stderr) - return 0 - - return _handle_opencode_internal(event_type, parsed_event, raw_data) - - -def _handle_opencode_internal( - event_type: str, - parsed_event: ( - OpenCodeToolEvent | OpenCodeTodoEvent | OpenCodeMessageEvent | OpenCodeSessionEvent | OpenCodeStopEvent - ), - raw_data: dict, -) -> int: - """Internal handler for OpenCode events (runs under lock). - - Maps OpenCode event data to the shared HookEvent model and stores it. - For stop events, runs the same feedback pipeline as Claude Code. - - Returns: - Exit code. - """ try: - hook_event_type = EVENT_TYPE_MAP.get(event_type) - if not hook_event_type: - if settings.debug_mode: - print(f"Slopometry: Unknown event type '{event_type}'", file=sys.stderr) - return 0 - - # Extract session_id from the parsed event - session_id = _get_session_id(parsed_event) - if not session_id: - return 0 - - session_manager = SessionManager() - sequence_number = session_manager.get_next_sequence_number(session_id) - - working_directory = os.getcwd() - project_tracker = ProjectTracker(working_dir=__import__("pathlib").Path(working_directory)) - project = project_tracker.get_project() - - # Get git state for first event or stop events - git_tracker = GitTracker() - git_state = None - if sequence_number == 1 or event_type in ("stop", "subagent_stop"): - git_state = git_tracker.get_git_state() - - # Build the HookEvent - event = HookEvent( - session_id=session_id, - event_type=hook_event_type, - sequence_number=sequence_number, - metadata=raw_data, - git_state=git_state, - working_directory=working_directory, - project=project, - source=EventSource.OPENCODE, - parent_session_id=_get_parent_id(parsed_event), + abstract_type = _resolve_event_type(event_type) + event = dispatch_event( + AbstractEventSource.OPENCODE, + raw_payload, + event_type_override=abstract_type, ) - - # Set tool-specific fields for tool events - if isinstance(parsed_event, OpenCodeToolEvent): - event.tool_name = parsed_event.tool - event.tool_type = get_tool_type(parsed_event.tool) - if event_type == "post_tool_use": - event.duration_ms = parsed_event.duration_ms - - db = EventDatabase() - db.save_event(event) - - # Handle stop events with feedback - if event_type in ("stop", "subagent_stop") and settings.enable_complexity_analysis: - return _handle_opencode_stop(session_id, parsed_event, event_type) - - if settings.debug_mode: - debug_info = { - "slopometry_opencode_event": { - "session_id": session_id, - "event_type": event_type, - "hook_event_type": hook_event_type.value, - "sequence_number": sequence_number, - "source": "opencode", - } - } - print(f"Slopometry captured: {json.dumps(debug_info, indent=2)}", file=sys.stderr) - - return 0 - except Exception as e: - import traceback - if settings.debug_mode: - print(f"Slopometry OpenCode hook error: {e}\n{traceback.format_exc()}", file=sys.stderr) - return 0 - - -def _handle_opencode_stop( - session_id: str, - parsed_event: OpenCodeStopEvent - | OpenCodeToolEvent - | OpenCodeTodoEvent - | OpenCodeMessageEvent - | OpenCodeSessionEvent, - event_type: str, -) -> int: - """Handle stop events from OpenCode with feedback generation. - - Reuses the same feedback pipeline as Claude Code (code smells, context coverage). - Output is printed to stdout so the OpenCode plugin can capture and inject it. - - Returns: - Exit code (0 or 2 for blocking feedback). - """ - if not isinstance(parsed_event, OpenCodeStopEvent): + print(f"Slopometry OpenCode hook error: {e}", file=sys.stderr) return 0 - from slopometry.core.hook_handler import handle_stop_event - from slopometry.core.models.hook import StopInput - - # Create a StopInput-compatible object for reuse of the existing feedback pipeline - stop_input = StopInput( - session_id=session_id, - transcript_path="", # OpenCode doesn't use file-based transcripts - stop_hook_active=False, - ) - - return handle_stop_event(session_id, stop_input) - - -def _get_session_id( - event: (OpenCodeToolEvent | OpenCodeTodoEvent | OpenCodeMessageEvent | OpenCodeSessionEvent | OpenCodeStopEvent), -) -> str | None: - """Extract session_id from any OpenCode event type.""" - return event.session_id - + if ( + settings.enable_complexity_analysis + and event.event_type in (AbstractEventType.TURN_COMPLETED, AbstractEventType.SUBAGENT_COMPLETED) + ): + return handle_stop_event(event.session_id, event.working_directory) + + if settings.debug_mode: + debug_info = { + "slopometry_opencode_event": { + "session_id": event.session_id, + "event_type": event_type, + "abstract_type": event.event_type.value, + "sequence_number": event.sequence_number, + "source": "opencode", + } + } + print(f"Slopometry captured: {json.dumps(debug_info, indent=2)}", file=sys.stderr) -def _get_parent_id( - event: (OpenCodeToolEvent | OpenCodeTodoEvent | OpenCodeMessageEvent | OpenCodeSessionEvent | OpenCodeStopEvent), -) -> str | None: - """Extract parent_id from events that have it (session/stop events).""" - if isinstance(event, OpenCodeSessionEvent | OpenCodeStopEvent): - return event.parent_id - return None + return 0 diff --git a/src/slopometry/core/plan_analyzer.py b/src/slopometry/core/plan_analyzer.py index e683298..b337978 100644 --- a/src/slopometry/core/plan_analyzer.py +++ b/src/slopometry/core/plan_analyzer.py @@ -5,8 +5,8 @@ from datetime import datetime from typing import Any -from slopometry.core.models.hook import ToolType from slopometry.core.models.session import PlanEvolution, PlanStep, TodoItem +from slopometry.core.protocol.adapters.claude_code import ToolType logger = logging.getLogger(__name__) diff --git a/src/slopometry/core/protocol/__init__.py b/src/slopometry/core/protocol/__init__.py new file mode 100644 index 0000000..3d0355a --- /dev/null +++ b/src/slopometry/core/protocol/__init__.py @@ -0,0 +1,14 @@ +"""Harness-agnostic hook event protocol — runtime logic layer. + +Adapters translate harness wire payloads into the canonical event schema in +`slopometry.core.models.protocol`. This package owns the dispatcher that +persists events and the per-source session manager. + +Data types (AbstractHookEvent, AbstractEventType, AbstractEventSource, +ToolCallPayload) live in `slopometry.core.models.protocol`. +""" + +from slopometry.core.protocol.dispatch import dispatch_event, emit_event_from_stdin +from slopometry.core.protocol.session import SessionManager + +__all__ = ["SessionManager", "dispatch_event", "emit_event_from_stdin"] diff --git a/src/slopometry/core/protocol/adapters/__init__.py b/src/slopometry/core/protocol/adapters/__init__.py new file mode 100644 index 0000000..9611920 --- /dev/null +++ b/src/slopometry/core/protocol/adapters/__init__.py @@ -0,0 +1,10 @@ +"""HookEventAdapter protocol and registry for harness-specific wire formats.""" + +from slopometry.core.protocol.adapters.base import ADAPTERS, HookEventAdapter, register_adapter +from slopometry.core.protocol.adapters.claude_code import ClaudeCodeAdapter +from slopometry.core.protocol.adapters.opencode import OpenCodeAdapter + +register_adapter(ClaudeCodeAdapter()) +register_adapter(OpenCodeAdapter()) + +__all__ = ["ADAPTERS", "HookEventAdapter", "register_adapter"] diff --git a/src/slopometry/core/protocol/adapters/base.py b/src/slopometry/core/protocol/adapters/base.py new file mode 100644 index 0000000..69ef60b --- /dev/null +++ b/src/slopometry/core/protocol/adapters/base.py @@ -0,0 +1,53 @@ +"""HookEventAdapter protocol — the contract every harness adapter implements. + +An adapter owns: + - the wire-format field names of its source harness + - the tool-type vocabulary it recognizes + - the rule for inferring event_type from a raw payload (when the harness + doesn't provide an explicit discriminator) +""" + +from datetime import datetime +from typing import Any, Protocol + +from slopometry.core.models.protocol.events import AbstractEventSource, AbstractEventType, AbstractHookEvent + + +class HookEventAdapter(Protocol): + source: AbstractEventSource + tool_type_map: dict[str, str] + + def parse( + self, + raw_payload: dict[str, Any], + *, + working_directory: str, + timestamp: datetime | None = None, + event_type_override: AbstractEventType | None = None, + ) -> AbstractHookEvent: + """Translate a wire payload into a canonical AbstractHookEvent. + + Must populate: session_id, event_type, source, tool_call (when applicable), + metadata (raw_payload), working_directory, timestamp. + + Raises: + ValueError: If the payload is not a valid event from this harness. + """ + ... + + def detect_event_type(self, raw_payload: dict[str, Any]) -> AbstractEventType: + """Infer event_type from payload shape when the harness doesn't say. + + Used when the harness emits the same payload shape for multiple event + types (e.g., Claude Code's `session_id + transcript_path + stop_hook_active` + is ambiguous between Stop and SubagentStop). + """ + ... + + +ADAPTERS: dict[AbstractEventSource, HookEventAdapter] = {} + + +def register_adapter(adapter: HookEventAdapter) -> None: + """Register an adapter instance for a specific source.""" + ADAPTERS[adapter.source] = adapter diff --git a/src/slopometry/core/protocol/adapters/claude_code.py b/src/slopometry/core/protocol/adapters/claude_code.py new file mode 100644 index 0000000..dae6634 --- /dev/null +++ b/src/slopometry/core/protocol/adapters/claude_code.py @@ -0,0 +1,206 @@ +"""Claude Code hook adapter — translates Claude Code's stdin JSON schema into AbstractHookEvent. + +Claude Code does not send an explicit hook-type discriminator; the adapter +infers it from field presence. See `docs/claude-hooks-doc.md` for the wire +schema. The adapter owns the Claude-Code + MCP tool vocabulary, exposed as +the `ToolType` enum for downstream typed access. +""" + +from datetime import datetime +from enum import StrEnum +from typing import Any + +from slopometry.core.models.protocol.events import ( + AbstractEventSource, + AbstractEventType, + AbstractHookEvent, + ToolCallPayload, +) + + +class ToolType(StrEnum): + """Known tool types in Claude Code and OpenCode (shared vocabulary).""" + + BASH = "Bash" + READ = "Read" + WRITE = "Write" + EDIT = "Edit" + MULTI_EDIT = "MultiEdit" + GREP = "Grep" + GLOB = "Glob" + LS = "LS" + TASK = "Task" + TODO_READ = "TodoRead" + TODO_WRITE = "TodoWrite" + TASK_CREATE = "TaskCreate" + TASK_UPDATE = "TaskUpdate" + TASK_LIST = "TaskList" + TASK_GET = "TaskGet" + WEB_FETCH = "WebFetch" + WEB_SEARCH = "WebSearch" + NOTEBOOK_READ = "NotebookRead" + NOTEBOOK_EDIT = "NotebookEdit" + EXIT_PLAN_MODE = "exit_plan_mode" + + MCP_IDE_GET_DIAGNOSTICS = "mcp__ide__getDiagnostics" + MCP_IDE_EXECUTE_CODE = "mcp__ide__executeCode" + MCP_IDE_GET_WORKSPACE_INFO = "mcp__ide__getWorkspaceInfo" + MCP_IDE_GET_FILE_CONTENTS = "mcp__ide__getFileContents" + MCP_IDE_CREATE_FILE = "mcp__ide__createFile" + MCP_IDE_DELETE_FILE = "mcp__ide__deleteFile" + MCP_IDE_RENAME_FILE = "mcp__ide__renameFile" + MCP_IDE_SEARCH_FILES = "mcp__ide__searchFiles" + MCP_FILESYSTEM_READ = "mcp__filesystem__read" + MCP_FILESYSTEM_WRITE = "mcp__filesystem__write" + MCP_FILESYSTEM_LIST = "mcp__filesystem__list" + MCP_DATABASE_QUERY = "mcp__database__query" + MCP_DATABASE_SCHEMA = "mcp__database__schema" + MCP_WEB_SCRAPE = "mcp__web__scrape" + MCP_WEB_SEARCH = "mcp__web__search" + MCP_GITHUB_GET_REPO = "mcp__github__getRepo" + MCP_GITHUB_CREATE_ISSUE = "mcp__github__createIssue" + MCP_GITHUB_LIST_ISSUES = "mcp__github__listIssues" + MCP_SLACK_SEND_MESSAGE = "mcp__slack__sendMessage" + MCP_SLACK_LIST_CHANNELS = "mcp__slack__listChannels" + MCP_OTHER = "mcp__other" + + OTHER = "Other" + + +_TOOL_NAME_TO_TYPE: dict[str, ToolType] = { + "bash": ToolType.BASH, + "read": ToolType.READ, + "write": ToolType.WRITE, + "edit": ToolType.EDIT, + "multiedit": ToolType.MULTI_EDIT, + "grep": ToolType.GREP, + "glob": ToolType.GLOB, + "ls": ToolType.LS, + "task": ToolType.TASK, + "todoread": ToolType.TODO_READ, + "todowrite": ToolType.TODO_WRITE, + "taskcreate": ToolType.TASK_CREATE, + "taskupdate": ToolType.TASK_UPDATE, + "tasklist": ToolType.TASK_LIST, + "taskget": ToolType.TASK_GET, + "webfetch": ToolType.WEB_FETCH, + "websearch": ToolType.WEB_SEARCH, + "notebookread": ToolType.NOTEBOOK_READ, + "notebookedit": ToolType.NOTEBOOK_EDIT, + "exit_plan_mode": ToolType.EXIT_PLAN_MODE, + "mcp__ide__getdiagnostics": ToolType.MCP_IDE_GET_DIAGNOSTICS, + "mcp__ide__executecode": ToolType.MCP_IDE_EXECUTE_CODE, + "mcp__ide__getworkspaceinfo": ToolType.MCP_IDE_GET_WORKSPACE_INFO, + "mcp__ide__getfilecontents": ToolType.MCP_IDE_GET_FILE_CONTENTS, + "mcp__ide__createfile": ToolType.MCP_IDE_CREATE_FILE, + "mcp__ide__deletefile": ToolType.MCP_IDE_DELETE_FILE, + "mcp__ide__renamefile": ToolType.MCP_IDE_RENAME_FILE, + "mcp__ide__searchfiles": ToolType.MCP_IDE_SEARCH_FILES, + "mcp__filesystem__read": ToolType.MCP_FILESYSTEM_READ, + "mcp__filesystem__write": ToolType.MCP_FILESYSTEM_WRITE, + "mcp__filesystem__list": ToolType.MCP_FILESYSTEM_LIST, + "mcp__database__query": ToolType.MCP_DATABASE_QUERY, + "mcp__database__schema": ToolType.MCP_DATABASE_SCHEMA, + "mcp__web__scrape": ToolType.MCP_WEB_SCRAPE, + "mcp__web__search": ToolType.MCP_WEB_SEARCH, + "mcp__github__getrepo": ToolType.MCP_GITHUB_GET_REPO, + "mcp__github__createissue": ToolType.MCP_GITHUB_CREATE_ISSUE, + "mcp__github__listissues": ToolType.MCP_GITHUB_LIST_ISSUES, + "mcp__slack__sendmessage": ToolType.MCP_SLACK_SEND_MESSAGE, + "mcp__slack__listchannels": ToolType.MCP_SLACK_LIST_CHANNELS, +} + + +def resolve_tool_type(tool_name: str) -> str: + """Map a tool name (any harness) to its PascalCase category string. + + Returns `"Other"` for unknown lowercase names and `"mcp__other"` for + unknown `mcp__`-prefixed names. The returned string is suitable for + direct storage in `tool_type` columns. + """ + lowered = tool_name.lower() + if lowered in _TOOL_NAME_TO_TYPE: + return _TOOL_NAME_TO_TYPE[lowered].value + if lowered.startswith("mcp__"): + return ToolType.MCP_OTHER.value + return ToolType.OTHER.value + + +def _extract_tool_response_fields(tool_response: Any) -> tuple[int | None, int | None, str | None]: + """Pull the three Bash-style fields from Claude Code's tool_response. + + tool_response shape varies by tool: dict (most), str (Bash), list (NotebookRead). + Only dicts carry the `duration_ms`/`exit_code`/`error` triple. + """ + if isinstance(tool_response, dict): + return ( + tool_response.get("duration_ms"), + tool_response.get("exit_code"), + tool_response.get("error"), + ) + return (None, None, None) + + +class ClaudeCodeAdapter: + source = AbstractEventSource.CLAUDE_CODE + tool_type_map: dict[str, str] = {name: enum.value for name, enum in _TOOL_NAME_TO_TYPE.items()} + + def detect_event_type(self, raw_payload: dict[str, Any]) -> AbstractEventType: + fields = set(raw_payload.keys()) + if "tool_name" in fields and "tool_input" in fields: + if "tool_response" in fields: + return AbstractEventType.TOOL_CALL_COMPLETED + return AbstractEventType.TOOL_CALL_STARTED + if "message" in fields: + return AbstractEventType.NOTIFICATION + if "stop_hook_active" in fields: + if raw_payload.get("stop_hook_active"): + return AbstractEventType.SUBAGENT_COMPLETED + return AbstractEventType.TURN_COMPLETED + if "session_id" in fields and "transcript_path" in fields: + return AbstractEventType.TURN_COMPLETED + raise ValueError(f"Unknown Claude Code hook payload shape: {sorted(fields)}") + + def parse( + self, + raw_payload: dict[str, Any], + *, + working_directory: str, + timestamp: datetime | None = None, + event_type_override: AbstractEventType | None = None, + ) -> AbstractHookEvent: + if "session_id" not in raw_payload: + raise ValueError("Claude Code payload missing required 'session_id' field") + + event_type = event_type_override or self.detect_event_type(raw_payload) + session_id = raw_payload["session_id"] + transcript_path = raw_payload.get("transcript_path") + + tool_call: ToolCallPayload | None = None + if event_type in (AbstractEventType.TOOL_CALL_STARTED, AbstractEventType.TOOL_CALL_COMPLETED): + tool_name = raw_payload.get("tool_name") + if not tool_name: + raise ValueError(f"Claude Code {event_type.value} payload missing 'tool_name'") + tool_input = raw_payload.get("tool_input") or {} + tool_response = raw_payload.get("tool_response") + duration_ms, exit_code, error_message = _extract_tool_response_fields(tool_response) + tool_call = ToolCallPayload( + tool_name=tool_name, + tool_type=resolve_tool_type(tool_name), + input=tool_input, + output=tool_response, + duration_ms=duration_ms, + exit_code=exit_code, + error_message=error_message, + ) + + return AbstractHookEvent( + session_id=session_id, + event_type=event_type, + source=self.source, + timestamp=timestamp or datetime.now(), + tool_call=tool_call, + metadata=dict(raw_payload), + working_directory=working_directory, + transcript_location=transcript_path, + ) diff --git a/src/slopometry/core/protocol/adapters/opencode.py b/src/slopometry/core/protocol/adapters/opencode.py new file mode 100644 index 0000000..5001f5b --- /dev/null +++ b/src/slopometry/core/protocol/adapters/opencode.py @@ -0,0 +1,95 @@ +"""OpenCode hook adapter — translates OpenCode plugin JSON into AbstractHookEvent. + +OpenCode's TypeScript plugin forwards events with an explicit `--event-type` +discriminator on the CLI; the adapter trusts that rather than inferring from +field shape (since the event-type discriminator is the authoritative source). + +OpenCode uses different field names from Claude Code: + `tool` -> tool_name + `args` -> input + `output` -> output + `duration_ms` is a top-level field, not nested in the response + `parent_id` -> parent_session_id +""" + +from datetime import datetime +from typing import Any + +from slopometry.core.models.protocol.events import ( + AbstractEventSource, + AbstractEventType, + AbstractHookEvent, + ToolCallPayload, +) +from slopometry.core.protocol.adapters.claude_code import resolve_tool_type + +_OPENCODE_TYPE_TO_ABSTRACT: dict[str, AbstractEventType] = { + "pre_tool_use": AbstractEventType.TOOL_CALL_STARTED, + "post_tool_use": AbstractEventType.TOOL_CALL_COMPLETED, + "stop": AbstractEventType.TURN_COMPLETED, + "subagent_stop": AbstractEventType.SUBAGENT_COMPLETED, + "subagent_start": AbstractEventType.SUBAGENT_STARTED, + "todo_updated": AbstractEventType.TODO_UPDATED, + "message_updated": AbstractEventType.MESSAGE_UPDATED, +} + + +def resolve_opencode_event_type(event_type: str) -> AbstractEventType: + if event_type not in _OPENCODE_TYPE_TO_ABSTRACT: + raise ValueError(f"Unknown OpenCode event type: {event_type!r}") + return _OPENCODE_TYPE_TO_ABSTRACT[event_type] + + +class OpenCodeAdapter: + source = AbstractEventSource.OPENCODE + tool_type_map: dict[str, str] = {} + + def detect_event_type(self, raw_payload: dict[str, Any]) -> AbstractEventType: + event_type = raw_payload.get("event_type") + if not isinstance(event_type, str): + raise ValueError("OpenCode payload missing string 'event_type' field") + return resolve_opencode_event_type(event_type) + + def parse( + self, + raw_payload: dict[str, Any], + *, + working_directory: str, + timestamp: datetime | None = None, + event_type_override: AbstractEventType | None = None, + ) -> AbstractHookEvent: + if "session_id" not in raw_payload: + raise ValueError("OpenCode payload missing required 'session_id' field") + + event_type = event_type_override or self.detect_event_type(raw_payload) + session_id = raw_payload["session_id"] + parent_session_id = raw_payload.get("parent_id") + + tool_call: ToolCallPayload | None = None + if event_type in (AbstractEventType.TOOL_CALL_STARTED, AbstractEventType.TOOL_CALL_COMPLETED): + tool_name = raw_payload.get("tool") + if not tool_name: + raise ValueError(f"OpenCode {event_type.value} payload missing 'tool' field") + tool_input = raw_payload.get("args") or {} + tool_output = raw_payload.get("output") + tool_call = ToolCallPayload( + tool_name=tool_name, + tool_type=resolve_tool_type(tool_name), + input=tool_input, + output=tool_output, + duration_ms=raw_payload.get("duration_ms"), + exit_code=None, + error_message=None, + ) + + return AbstractHookEvent( + session_id=session_id, + parent_session_id=parent_session_id, + event_type=event_type, + source=self.source, + timestamp=timestamp or datetime.now(), + tool_call=tool_call, + metadata=dict(raw_payload), + working_directory=working_directory, + transcript_location=None, + ) diff --git a/src/slopometry/core/protocol/dispatch.py b/src/slopometry/core/protocol/dispatch.py new file mode 100644 index 0000000..515eb08 --- /dev/null +++ b/src/slopometry/core/protocol/dispatch.py @@ -0,0 +1,123 @@ +"""Generic event ingestion dispatcher. + +Reads JSON from stdin, picks the right adapter by source, parses into an +AbstractHookEvent, attaches git/project context, assigns a sequence number, +and persists. + +`emit_event` is the public entrypoint used by both the CLI and the harness- +specific handlers (`hook_handler.py`, `opencode_handler.py`) after they have +done their harness-specific glue. +""" + +import json +import logging +import os +from datetime import datetime +from pathlib import Path + +from slopometry.core.git_tracker import GitTracker +from slopometry.core.lock import SlopometryLock +from slopometry.core.models.protocol.events import AbstractEventSource, AbstractEventType, AbstractHookEvent +from slopometry.core.project_tracker import ProjectTracker +from slopometry.core.protocol.adapters.base import ADAPTERS +from slopometry.core.protocol.session import SessionManager + +logger = logging.getLogger(__name__) + + +def _capture_git_state(event_type: AbstractEventType, sequence_number: int): + tracker = GitTracker() + match (event_type, sequence_number): + case (AbstractEventType.TOOL_CALL_STARTED, 1) | (AbstractEventType.TURN_COMPLETED, _): + return tracker.get_git_state() + case _: + return None + + +def _capture_project(working_directory: str): + return ProjectTracker(working_dir=Path(working_directory)).get_project() + + +def dispatch_event( + source: AbstractEventSource, + raw_payload: dict, + *, + working_directory: str | None = None, + timestamp: datetime | None = None, + event_type_override: AbstractEventType | None = None, +) -> AbstractHookEvent: + """Parse a wire payload through the adapter for `source`, enrich, persist. + + Args: + source: Which harness produced this payload. + raw_payload: The JSON the harness sent (already parsed). + working_directory: Override cwd; defaults to os.getcwd(). + timestamp: Override event timestamp; defaults to now(). + event_type_override: Skip adapter detection; force a specific event type. + + Returns: + The persisted AbstractHookEvent (with assigned id and sequence_number). + + Raises: + ValueError: If `source` has no registered adapter, or if the adapter + rejects the payload. + """ + adapter = ADAPTERS.get(source) + if adapter is None: + raise ValueError(f"No adapter registered for source {source.value!r}") + + cwd = working_directory or os.getcwd() + event = adapter.parse( + raw_payload, + working_directory=cwd, + timestamp=timestamp, + event_type_override=event_type_override, + ) + + session_manager = SessionManager(source=source.value) + event.sequence_number = session_manager.get_next_sequence_number(event.session_id) + event.git_state = _capture_git_state(event.event_type, event.sequence_number) + event.project = _capture_project(event.working_directory) + + lock = SlopometryLock(project_dir=cwd) + with lock.acquire() as acquired: + if not acquired: + logger.debug("Could not acquire lock, skipping event persistence for %s", event.session_id) + return event + + from slopometry.core.database import EventDatabase + + EventDatabase().save_event(event) + + return event + + +def emit_event_from_stdin( + source: AbstractEventSource, + event_type_override: AbstractEventType | None = None, +) -> int: + """Read JSON from stdin, dispatch, return process exit code. + + Used by the `slopometry emit-event` CLI subcommand and by harness-specific + entry points after they have read their stdin. + """ + import sys + + try: + stdin_input = sys.stdin.read().strip() + except Exception: + return 0 + if not stdin_input: + return 0 + + try: + raw_payload = json.loads(stdin_input) + except json.JSONDecodeError as e: + from slopometry.core.settings import settings + + if settings.debug_mode: + print(f"Slopometry: Failed to parse event JSON: {e}", file=sys.stderr) + return 0 + + dispatch_event(source, raw_payload, event_type_override=event_type_override) + return 0 diff --git a/src/slopometry/core/protocol/session.py b/src/slopometry/core/protocol/session.py new file mode 100644 index 0000000..696b997 --- /dev/null +++ b/src/slopometry/core/protocol/session.py @@ -0,0 +1,64 @@ +"""Per-session sequence numbering, abstracted from Claude-Code's ~/.claude layout. + +Each AbstractEventSource has its own state directory so concurrent harnesses +don't collide. Legacy state at ~/.claude/slopometry/seq_*.txt is relocated on +first access. +""" + +import logging +from pathlib import Path + +logger = logging.getLogger(__name__) + +_LEGACY_STATE_DIR = Path.home() / ".claude" / "slopometry" +_DEFAULT_STATE_ROOT = Path.home() / ".slopometry" / "sessions" + + +class SessionManager: + """Assigns monotonic sequence numbers per (source, session_id) pair. + + State lives at `//seq_.txt` by default. + The legacy path `~/.claude/slopometry/seq_.txt` is migrated + to `~/.slopometry/sessions/claude_code/seq_.txt` on first + construction. + """ + + def __init__( + self, + source: str, + state_root: Path | None = None, + ) -> None: + self.source = source + self.state_root = state_root or _DEFAULT_STATE_ROOT + self.state_dir = self.state_root / source + self.state_dir.mkdir(parents=True, exist_ok=True) + self._migrate_legacy_files() + + def _migrate_legacy_files(self) -> None: + if self.source != "claude_code": + return + if not _LEGACY_STATE_DIR.exists(): + return + for seq_file in _LEGACY_STATE_DIR.glob("seq_*.txt"): + target = self.state_dir / seq_file.name + if target.exists(): + continue + try: + seq_file.rename(target) + logger.debug("Migrated session seq file %s -> %s", seq_file, target) + except OSError as e: + logger.debug("Could not migrate %s: %s", seq_file, e) + + def get_next_sequence_number(self, session_id: str) -> int: + seq_file = self.state_dir / f"seq_{session_id}.txt" + if seq_file.exists(): + try: + current_seq = int(seq_file.read_text().strip()) + next_seq = current_seq + 1 + except (ValueError, FileNotFoundError) as e: + logger.debug("Corrupt sequence file for %s, resetting: %s", session_id, e) + next_seq = 1 + else: + next_seq = 1 + seq_file.write_text(str(next_seq)) + return next_seq diff --git a/src/slopometry/core/settings.py b/src/slopometry/core/settings.py index 2cb0ae1..0975e6d 100644 --- a/src/slopometry/core/settings.py +++ b/src/slopometry/core/settings.py @@ -97,13 +97,15 @@ def _ensure_global_config_dir() -> None: description="Extract '## Development guidelines' from CLAUDE.md in stop hook feedback", ) - llm_proxy_url: str = "" + llm_proxy_url: str = Field( + default="", + description="OpenAI-compatible base URL for the MiniMax-M3 vLLM endpoint", + ) llm_proxy_api_key: str = "" - llm_responses_url: str = "" - anthropic_url: str = Field( - default="", description="Base URL for Anthropic-compatible API endpoint (e.g. sglang MiniMax endpoint)" + llm_model_name: str = Field( + default="olka-fi/MiniMax-M3-MXFP4", + description="Served model name on the MiniMax-M3 vLLM endpoint", ) - anthropic_api_key: SecretStr = Field(default=SecretStr(""), description="API key for Anthropic-compatible provider") interactive_rating_enabled: bool = False hf_token: str = "" @@ -114,11 +116,6 @@ def _ensure_global_config_dir() -> None: description="Disables all external LLM requests from slopometry. Set to False to enable AI features.", ) - user_story_agent: str = Field( - default="gpt_oss_120b", - description="Agent to use for user story generation. Options: gpt_oss_120b, gemini, minimax", - ) - enable_working_at_microsoft: bool = Field( default=False, description="Galen Rate feature flag - shows NGMI alert when below 1 Galen productivity target" ) @@ -204,6 +201,36 @@ def _ensure_global_config_dir() -> None: default=0.50, description="Weight for Maintainability Index in impact score calculation" ) + memory_llm_endpoint: str = Field( + default="https://your-llm-endpoint.com/v1", + description="LLM endpoint for memory extraction", + ) + memory_llm_model: str = Field( + default="your-model-name", + description="Model for memory extraction", + ) + memory_llm_api_key: SecretStr = Field( + default=SecretStr(""), + description="API key for memory LLM endpoint", + ) + memory_retention_days: int = Field( + default=365, + description="Days to retain memories", + ) + + memory_embedding_api_key: SecretStr = Field( + default=SecretStr(""), + description="API key for memory embedding endpoint", + ) + memory_embedding_endpoint: str = Field( + default="https://your-embedding-endpoint.com/v1", + description="Embedding model endpoint for memory similarity", + ) + memory_embedding_model: str = Field( + default="your-embedding-model", + description="Embedding model name", + ) + @field_validator("baseline_strategy", mode="before") @classmethod def validate_baseline_strategy(cls, v: str) -> str: diff --git a/src/slopometry/core/transcript_token_analyzer.py b/src/slopometry/core/transcript_token_analyzer.py index 0f0ebd5..d238897 100644 --- a/src/slopometry/core/transcript_token_analyzer.py +++ b/src/slopometry/core/transcript_token_analyzer.py @@ -7,9 +7,9 @@ from pydantic import BaseModel, Field -from slopometry.core.models.hook import ToolType from slopometry.core.models.session import TokenUsage from slopometry.core.plan_analyzer import PlanAnalyzer +from slopometry.core.protocol.adapters.claude_code import ToolType logger = logging.getLogger(__name__) diff --git a/src/slopometry/display/formatters.py b/src/slopometry/display/formatters.py index 6f2146a..5d523ff 100644 --- a/src/slopometry/display/formatters.py +++ b/src/slopometry/display/formatters.py @@ -21,7 +21,7 @@ SessionDisplayData, ) from slopometry.core.models.experiment import ProgressDisplayData -from slopometry.core.models.hook import HookEventType, ToolType +from slopometry.core.models.protocol.events import AbstractEventType from slopometry.core.models.session import ( BehavioralPatterns, BehavioralPatternTrends, @@ -316,7 +316,7 @@ def _display_behavioral_pattern_trends(trends: BehavioralPatternTrends) -> None: console.print(table) -def _display_events_by_type_table(events_by_type: dict[HookEventType, int]) -> None: +def _display_events_by_type_table(events_by_type: dict[AbstractEventType, int]) -> None: """Display events by type table.""" table = Table(title="Events by Type") table.add_column("Event Type", style="cyan") @@ -328,14 +328,14 @@ def _display_events_by_type_table(events_by_type: dict[HookEventType, int]) -> N console.print(table) -def _display_tool_usage_table(tool_usage: dict[ToolType, int]) -> None: +def _display_tool_usage_table(tool_usage: dict[str, int]) -> None: """Display tool usage table.""" table = Table(title="Tool Usage") table.add_column("Tool", style="green") table.add_column("Count", justify="right") for tool_type, count in sorted(tool_usage.items(), key=lambda x: x[1], reverse=True): - table.add_row(tool_type.value, str(count)) + table.add_row(tool_type, str(count)) console.print(table) @@ -576,60 +576,58 @@ def _display_complexity_delta( if not delta: return - has_baseline = baseline is not None and assessment is not None - title = "Complexity Delta (vs Session Start)" - if has_baseline: + if baseline is not None: title += f" - Baseline: {baseline.total_commits_analyzed} commits" console.print(f"\n[bold]{title}[/bold]") changes_table = Table() changes_table.add_column("Metric", style="cyan") changes_table.add_column("Change", justify="right") - if has_baseline: + if baseline is not None: changes_table.add_column("vs Baseline", justify="right") cc_color = _color_for_positive_negative(delta.avg_complexity_change) - cc_baseline = _format_baseline_cell(assessment.cc_z_score, invert=True) if has_baseline else None + cc_baseline = _format_baseline_cell(assessment.cc_z_score, invert=True) if assessment is not None else None changes_table.add_row( "Average Cyclomatic Complexity", f"[{cc_color}]{delta.avg_complexity_change:+.2f}[/{cc_color}]", - cc_baseline if has_baseline else None, + cc_baseline if baseline is not None else None, ) effort_color = _color_for_positive_negative(delta.avg_effort_change) - effort_baseline = _format_baseline_cell(assessment.effort_z_score, invert=True) if has_baseline else None + effort_baseline = _format_baseline_cell(assessment.effort_z_score, invert=True) if assessment is not None else None changes_table.add_row( "Average Effort", f"[{effort_color}]{delta.avg_effort_change:+.2f}[/{effort_color}]", - effort_baseline if has_baseline else None, + effort_baseline if baseline is not None else None, ) mi_color = _color_for_positive_negative(delta.avg_mi_change, invert=True) - mi_baseline = _format_baseline_cell(assessment.mi_z_score, invert=False) if has_baseline else None + mi_baseline = _format_baseline_cell(assessment.mi_z_score, invert=False) if assessment is not None else None changes_table.add_row( "Maintainability (file avg)", f"[{mi_color}]{delta.avg_mi_change:+.2f}[/{mi_color}]", - mi_baseline if has_baseline else None, + mi_baseline if baseline is not None else None, ) token_color = _color_for_positive_negative(delta.total_tokens_change, invert=True) changes_table.add_row( "Total Tokens", f"[{token_color}]{delta.total_tokens_change:+d}[/{token_color}]", - "" if has_baseline else None, + "" if baseline is not None else None, ) file_color = _color_for_positive_negative(delta.net_files_change) changes_table.add_row( "Files changed", f"[{file_color}]{delta.net_files_change:+d}[/{file_color}] ({len(delta.files_added)} added, {len(delta.files_removed)} removed)", - "" if has_baseline else None, + "" if baseline is not None else None, ) console.print(changes_table) - if has_baseline and assessment: + if assessment is not None: impact_color = _get_impact_color(assessment.impact_category) console.print( f"\n[bold]Overall Impact:[/bold] [{impact_color}]{assessment.impact_category.value.upper()}[/{impact_color}] " diff --git a/src/slopometry/solo/cli/commands.py b/src/slopometry/solo/cli/commands.py index 0b52924..8967747 100644 --- a/src/slopometry/solo/cli/commands.py +++ b/src/slopometry/solo/cli/commands.py @@ -513,57 +513,32 @@ def feedback(enable: bool | None) -> None: env_file = Path(".env") env_var = "SLOPOMETRY_ENABLE_STOP_FEEDBACK" + env_value = "true" if enable else "false" if enable: console.print("[green]Enabling[/green] complexity feedback on stop events") - console.print("") - console.print("To persist this setting, add to your .env file:") - console.print(f" {env_var}=true") - - if env_file.exists(): - content = env_file.read_text() - if env_var in content: - lines = content.split("\n") - new_lines: list[str] = [] - for line in lines: - if line.startswith(f"{env_var}="): - new_lines.append(f"{env_var}=true") - else: - new_lines.append(line) - env_file.write_text("\n".join(new_lines)) - else: - with env_file.open("a") as f: - f.write(f"\n{env_var}=true\n") - else: - env_file.write_text(f"{env_var}=true\n") - - console.print(f"[green]Added {env_var}=true to .env file[/green]") - else: console.print("[yellow]Disabling[/yellow] complexity feedback on stop events") - console.print("") - console.print("To persist this setting, add to your .env file:") - console.print(f" {env_var}=false") - - if env_file.exists(): - content = env_file.read_text() - if env_var in content: - lines = content.split("\n") - new_lines: list[str] = [] - for line in lines: - if line.startswith(f"{env_var}="): - new_lines.append(f"{env_var}=false") - else: - new_lines.append(line) - env_file.write_text("\n".join(new_lines)) - else: - with env_file.open("a") as f: - f.write(f"\n{env_var}=false\n") + console.print("") + console.print("To persist this setting, add to your .env file:") + console.print(f" {env_var}={env_value}") + + if env_file.exists(): + content = env_file.read_text() + if env_var in content: + lines = content.split("\n") + new_lines = [ + f"{env_var}={env_value}" if line.startswith(f"{env_var}=") else line + for line in lines + ] + env_file.write_text("\n".join(new_lines)) else: - env_file.write_text(f"{env_var}=false\n") - - console.print(f"[green]Added {env_var}=false to .env file[/green]") + with env_file.open("a") as f: + f.write(f"\n{env_var}={env_value}\n") + else: + env_file.write_text(f"{env_var}={env_value}\n") + console.print(f"[green]Added {env_var}={env_value} to .env file[/green]") console.print("") console.print("[bold]Note:[/bold] You may need to restart Claude Code for changes to take effect.") @@ -805,3 +780,485 @@ def save_transcript(session_id: str | None, output_dir: str, yes: bool) -> None: metadata_file = session_dir / "session_metadata.json" metadata_file.write_text(metadata.model_dump_json(indent=2)) console.print("[green]✓[/green] Saved session metadata to: session_metadata.json") + + +@solo.command(name="find-memories") +@click.option( + "--project-dir", + type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path), + default=None, + help="Project directory (default: cwd)", +) +@click.option( + "--llm-endpoint", + type=str, + default=None, + help="LLM endpoint URL (default: from settings)", +) +@click.option( + "--llm-model", + type=str, + default=None, + help="Model name (default: from settings)", +) +@click.option( + "--force", + is_flag=True, + help="Re-process already processed sessions", +) +@click.option( + "--dry-run", + is_flag=True, + help="Show what would be done without doing it", +) +def find_memories( + project_dir: Path | None, + llm_endpoint: str | None, + llm_model: str | None, + force: bool, + dry_run: bool, +) -> None: + """Scan transcripts, extract memory candidates, and save to database. + + This command: + 1. Validates LLM + embedding endpoints are reachable (skipped with --dry-run) + 2. Discovers transcripts across all configured harnesses + 3. Filters to sessions not yet processed + 4. Parses and cleans conversation data + 5. Generates memory candidates via LLM + 6. Runs freshness validation against existing memories + 7. Saves memories to database + + Aborts explicitly (no partial completion) if either endpoint is + unreachable. Use --dry-run to verify transcript discovery without + touching the LLM. + """ + from slopometry.core.settings import settings + from slopometry.solo.services.memory_extractor import MemoryExtractor + from slopometry.solo.services.memory_service import MemoryService + from slopometry.solo.services.transcript_finder import TranscriptFinder + + if project_dir is None: + project_dir = Path.cwd() + + endpoint = llm_endpoint or settings.memory_llm_endpoint + model = llm_model or settings.memory_llm_model + api_key = settings.memory_llm_api_key.get_secret_value() + + console.print("[bold]Slopometry Memory Extraction[/bold]") + console.print(f"Project: {project_dir}") + console.print(f"LLM: {endpoint} / {model}") + console.print() + + if dry_run: + console.print("[yellow]--dry-run mode, no changes will be made--[/yellow]\n") + + if settings.offline_mode and not llm_endpoint: + raise click.ClickException( + "Memory extraction requires external LLM calls, which are disabled (offline_mode=True). " + "Set SLOPOMETRY_OFFLINE_MODE=false to enable." + ) + + from slopometry.solo.cli.preflight import preflight_endpoints + + if not dry_run: + preflight_endpoints( + chat_endpoint=endpoint, + embedding_endpoint=settings.memory_embedding_endpoint, + chat_api_key=api_key, + embedding_api_key=settings.memory_embedding_api_key.get_secret_value(), + ) + + transcript_finder = TranscriptFinder() + memory_service = MemoryService() + memory_extractor = MemoryExtractor(endpoint, model, api_key) + + from slopometry.solo.services.embedding_service import EmbeddingService + + embedding_service = EmbeddingService( + endpoint=settings.memory_embedding_endpoint, + model=settings.memory_embedding_model, + api_key=settings.memory_embedding_api_key.get_secret_value(), + ) + + console.print("[dim]Discovering transcripts...[/dim]") + transcripts = transcript_finder.discover_transcripts(project_dir) + + if not transcripts: + console.print("[yellow]No transcripts found for this project.[/yellow]") + return + + source_counts: dict[str, int] = {} + for t in transcripts: + source_counts[t.source.value] = source_counts.get(t.source.value, 0) + 1 + source_breakdown = ", ".join(f"{src}={n}" for src, n in sorted(source_counts.items())) + console.print(f"[green]Found {len(transcripts)} transcript(s)[/green] [dim]({source_breakdown})[/dim]\n") + + sessions_to_process: list = [] + for t in transcripts: + if not force and memory_service.is_session_processed(t.session_id, str(t.project_dir), source=t.source.value): + console.print(f"[dim]Skipping {t.source.value} {t.session_id}: already processed[/dim]") + continue + sessions_to_process.append(t) + + if not sessions_to_process: + console.print("[yellow]No new sessions to process.[/yellow]") + return + + console.print(f"[cyan]Processing {len(sessions_to_process)} session(s)...[/cyan]\n") + + total_memories = 0 + for t in sessions_to_process: + console.print(f"[bold]Session: {t.session_id}[/bold] [dim]({t.source.value})[/dim]") + console.print(f" Transcript: {t.transcript_path}") + + if dry_run: + console.print(" [yellow]-- dry-run: would extract and save memories --[/yellow]") + continue + + try: + from slopometry.core.models.protocol.events import AbstractEventSource + + if t.source == AbstractEventSource.OPENCODE: + from slopometry.solo.services.transcript_finder import TranscriptFinder + + storage_root = TranscriptFinder().find_opencode_storage_root() + if storage_root is None: + console.print(" [yellow]OpenCode storage not found[/yellow]") + continue + cleaned_transcript = memory_extractor.extract_memories_from_opencode_session( + t.session_id, storage_root + ) + else: + cleaned_transcript = memory_extractor.extract_memories_from_transcript(t.transcript_path) + + if not cleaned_transcript.strip(): + console.print(" [yellow]Warning: Empty transcript[/yellow]") + continue + + console.print(f" [dim]Extracted {len(cleaned_transcript)} chars of conversation[/dim]") + + candidates = memory_extractor.generate_memory_candidates(cleaned_transcript) + + proj_dir_str = str(t.project_dir) + + if not candidates: + console.print(" [yellow]No memory candidates generated[/yellow]") + memory_service.mark_session_processed(t.session_id, proj_dir_str, 0, source=t.source.value) + continue + + console.print(f" [green]Generated {len(candidates)} candidates[/green]") + + console.print(" [dim]Generating embeddings...[/dim]") + for i, candidate in enumerate(candidates): + try: + embedding = embedding_service.get_embedding(candidate.content) + if embedding: + candidate.embedding = embedding + else: + console.print(f" [yellow]Warning: Failed to get embedding for candidate {i+1}, continuing without[/yellow]") + except Exception as e: + console.print(f" [red]Embedding error for candidate {i+1}: {e}[/red]") + raise + + from slopometry.solo.services.memory_freshness import MemoryFreshnessValidator + + existing_memories = memory_service.get_memories(project_dir=proj_dir_str, limit=200) + decisions: list = [] + if existing_memories: + freshness_validator = MemoryFreshnessValidator( + llm_endpoint=endpoint, + llm_model=model, + api_key=api_key, + ) + decisions, distribution = freshness_validator.validate(candidates, existing_memories) + console.print( + f" [dim]Project similarity distribution: " + f"n={distribution.n_pairs} " + f"mean={distribution.mean:.2f} " + f"p50={distribution.p50:.2f} " + f"p75={distribution.p75:.2f} " + f"p90={distribution.p90:.2f} " + f"p95={distribution.p95:.2f}[/dim]" + ) + console.print( + f" [dim]Derived dedupe threshold: {distribution.derived_threshold:.2f}[/dim]" + ) + if decisions: + console.print(f" [yellow]Freshness: {len(decisions)} similar pair(s) reviewed:[/yellow]") + for decision in decisions: + action_color = { + "keep_both": "green", + "merge": "cyan", + "supersede": "yellow", + "dedupe": "magenta", + }.get(decision.action, "white") + console.print( + f" [{action_color}]{decision.action.upper()}[/{action_color}]" + f" (sim={decision.similarity:.2f}): " + f"[dim]new=[/dim]{decision.new_candidate.content[:80]!r} " + f"[dim]existing=[/dim]{decision.existing_memory.content[:80]!r}" + ) + console.print(f" [dim]REASON:[/dim] {decision.reason}") + for decision in decisions: + if decision.action == "merge" and decision.merged_content: + decision.new_candidate.content = decision.merged_content + elif decision.action == "dedupe": + if decision.new_candidate.metadata is None: + decision.new_candidate.metadata = {} + decision.new_candidate.metadata["deduped_against"] = decision.existing_memory.id + for decision in decisions: + if decision.new_candidate.metadata is None: + decision.new_candidate.metadata = {} + decision.new_candidate.metadata["freshness_action"] = decision.action + decision.new_candidate.metadata["freshness_reason"] = decision.reason + if decision.action != "keep_both": + decision.new_candidate.metadata["freshness_pair_with"] = decision.existing_memory.id + + from slopometry.core.models.memory import MemoryCreateRequest + + request = MemoryCreateRequest( + session_id=t.session_id, + project_dir=proj_dir_str, + candidates=candidates, + ) + + saved = memory_service.save_memories(request) + new_memory_ids: dict[str, str] = {} + for candidate, entry in zip(candidates, saved): + new_memory_ids[candidate.content] = entry.id + for decision in decisions: + if decision.action == "supersede": + new_id = new_memory_ids.get(decision.new_candidate.content) + if new_id is None: + continue + memory_service.update_memory( + decision.existing_memory.id, superseded_by=new_id + ) + console.print( + f" [dim]Linked {decision.existing_memory.id} -> superseded_by={new_id}[/dim]" + ) + memory_service.mark_session_processed( + t.session_id, proj_dir_str, len(saved), source=t.source.value + ) + total_memories += len(saved) + console.print(f" [green]Saved {len(saved)} memories[/green]") + + except Exception as e: + console.print(f" [red]Error processing session: {e}[/red]") + continue + + console.print() + if not dry_run: + console.print( + f"[bold green]Done! Extracted {total_memories} memories from {len(sessions_to_process)} sessions.[/bold green]" + ) + else: + console.print( + f"[bold yellow]Dry run complete. Would process {len(sessions_to_process)} sessions.[/bold yellow]" + ) + + +@solo.command(name="show-memories") +@click.option( + "--project-dir", + type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path), + default=None, + help="Project directory (default: cwd)", +) +@click.option( + "--type", + "memory_type", + type=click.Choice(["user", "feedback", "project", "reference"]), + default=None, + help="Filter by memory type", +) +@click.option( + "--limit", + type=int, + default=50, + help="Maximum number of results (default: 50)", +) +def show_memories( + project_dir: Path | None, + memory_type: str | None, + limit: int, +) -> None: + """List and manage memories for a project. + + When run without options, enters interactive mode with actions: + (r)etain - Mark memory as retained + (d)elete - Delete a memory + (e)dit - Edit memory content + (f)ilter - Filter by type (user|feedback|project|reference) + (q)uit - Quit + """ + from rich.table import Table + + from slopometry.core.models.memory import MemoryType + from slopometry.core.settings import settings + from slopometry.solo.services.embedding_service import EmbeddingService + from slopometry.solo.services.memory_service import MemoryService + + if project_dir is None: + project_dir = Path.cwd() + + memory_service = MemoryService() + project_dir_str = str(project_dir.resolve()) + + embedding_service = EmbeddingService( + endpoint=settings.memory_embedding_endpoint, + model=settings.memory_embedding_model, + api_key=settings.memory_embedding_api_key.get_secret_value(), + ) + + current_type_filter = memory_type + + def display_memories(mtype: str | None) -> list: + return memory_service.get_memories( + project_dir=project_dir_str, + memory_type=MemoryType(mtype) if mtype else None, + limit=limit, + ) + + while True: + console.print(f"\n[bold]Slopometry Memories: {project_dir}[/bold]\n") + + memories = display_memories(current_type_filter) + + if not memories: + console.print("[yellow]No memories found.[/yellow]") + try: + user_input = console.input("\nPress Enter to continue or 'q' to quit... ").strip() + except (KeyboardInterrupt, EOFError): + console.print("\n[yellow]Cancelled[/yellow]") + return + if user_input.lower() == "q": + console.print("[green]Goodbye![/green]") + return + continue + + table = Table(show_header=True, header_style="bold") + table.add_column("ID", style="dim", width=8) + table.add_column("Type", width=10) + table.add_column("Uniqueness", width=10) + table.add_column("Content", max_width=60) + table.add_column("Session", style="dim") + + for idx, memory in enumerate(memories, 1): + content_preview = memory.content[:55] + "..." if len(memory.content) > 55 else memory.content + retained_marker = " [retained]" if memory.retained else "" + + uniqueness = "N/A" + if memory.embedding: + comparison_set = [m.embedding for j, m in enumerate(memories) if j != idx - 1 and m.embedding] + uniqueness_score = embedding_service.compute_uniqueness_score(memory.embedding, comparison_set) + uniqueness = f"{uniqueness_score:.2f}" + + table.add_row( + str(idx), + memory.memory_type.value, + uniqueness, + content_preview + retained_marker, + memory.session_id[:8], + ) + + console.print(table) + + console.print("\n[bold]Actions:[/bold]") + console.print(" (r)etain - Mark memory as retained") + console.print(" (d)elete - Delete a memory") + console.print(" (e)dit - Edit memory content") + console.print(" (f)ilter - Filter by type (user|feedback/project|reference)") + console.print(" (p)urge - Delete ALL memories (requires confirmation)") + console.print(" (q)uit - Quit") + + try: + user_input = console.input("\n> ").strip() + except (KeyboardInterrupt, EOFError): + console.print("\n[yellow]Cancelled[/yellow]") + return + + if not user_input: + continue + + parts = user_input.split() + cmd = parts[0].lower() + arg = parts[1] if len(parts) > 1 else None + + if cmd == "q": + console.print("[green]Goodbye![/green]") + return + + if cmd == "f": + if not arg: + console.print("[yellow]Usage: f (user|feedback|project|reference)[/yellow]") + continue + if arg not in ["user", "feedback", "project", "reference"]: + console.print(f"[red]Invalid type: {arg}[/red]") + continue + current_type_filter = arg + continue + + if cmd == "p": + console.print("\n[bold red]WARNING: This will delete ALL memories![/bold red]") + try: + confirm = console.input("Type 'yes' to confirm: ").strip() + except (KeyboardInterrupt, EOFError): + console.print("\n[yellow]Cancelled[/yellow]") + continue + + if confirm.lower() == "yes": + count = memory_service.delete_all_memories() + console.print(f"[green]Deleted {count} memories[/green]") + else: + console.print("[yellow]Purge cancelled[/yellow]") + continue + + if not arg or not arg.isdigit(): + console.print("[yellow]Usage: (e.g., d 1, r 2)[/yellow]") + continue + + idx = int(arg) - 1 + if idx < 0 or idx >= len(memories): + console.print(f"[red]Invalid ID: {arg} (must be 1-{len(memories)})[/red]") + continue + + memory = memories[idx] + + if cmd == "d": + if memory_service.delete_memory(memory.id): + console.print(f"[green]Deleted memory {arg}[/green]") + else: + console.print(f"[red]Failed to delete memory {arg}[/red]") + continue + + elif cmd == "r": + if memory_service.update_memory(memory.id, retained=True): + console.print(f"[green]Marked memory {arg} as retained[/green]") + else: + console.print(f"[red]Failed to update memory {arg}[/red]") + continue + + elif cmd == "e": + console.print("[dim]Current content:[/dim]") + console.print(f" {memory.content}") + console.print("[dim]Enter new content (or press Enter to cancel):[/dim]") + try: + new_content = console.input(" New content: ").strip() + except (KeyboardInterrupt, EOFError): + console.print("\n[yellow]Cancelled[/yellow]") + continue + + if new_content: + if memory_service.update_memory(memory.id, content=new_content): + console.print(f"[green]Updated memory {arg}[/green]") + else: + console.print(f"[red]Failed to update memory {arg}[/red]") + else: + console.print("[yellow]No changes made[/yellow]") + continue + + else: + console.print(f"[red]Unknown command: {cmd}[/red]") + console.print("Valid commands: r (retain), d (delete), e (edit), f (filter), p (purge), q (quit)") diff --git a/src/slopometry/solo/cli/preflight.py b/src/slopometry/solo/cli/preflight.py new file mode 100644 index 0000000..b7ecf46 --- /dev/null +++ b/src/slopometry/solo/cli/preflight.py @@ -0,0 +1,65 @@ +"""Pre-flight health checks for memory extraction CLI endpoints. + +Both the chat LLM (used for memory extraction) and the embedding endpoint +(used for freshness + uniqueness scoring) must be reachable before any +session is processed. If either is down, the whole batch aborts with an +explicit error rather than silently processing 12 sessions that all fail. +""" + +import logging + +import click + +logger = logging.getLogger(__name__) + + +def _check_endpoint( + label: str, + endpoint: str, + api_key: str, +) -> str | None: + """Return None if the endpoint is reachable, or an error string describing why not. + + Uses ``GET /v1/models`` (cheap list call, no token cost) to validate + reachability + auth + model availability in one shot. + """ + try: + from openai import OpenAI + except ImportError: + return f"{label}: openai package not installed" + + try: + client = OpenAI(base_url=endpoint, api_key=api_key) + client.models.list() + return None + except Exception as e: + return f"{label} ({endpoint}): {type(e).__name__}: {e}" + + +def preflight_endpoints( + chat_endpoint: str, + embedding_endpoint: str, + chat_api_key: str, + embedding_api_key: str, +) -> None: + """Validate that both endpoints are reachable before processing any session. + + Aborts with ``click.ClickException`` listing every failed endpoint so the + user can fix them all in one pass. Individual per-session failures are + still surfaced in the per-session error block — this pre-flight only + catches infrastructure-level unavailability. + """ + errors: list[str] = [] + chat_err = _check_endpoint("chat LLM", chat_endpoint, chat_api_key) + if chat_err: + errors.append(chat_err) + embed_err = _check_endpoint("embedding", embedding_endpoint, embedding_api_key) + if embed_err: + errors.append(embed_err) + if errors: + message = "Pre-flight endpoint check failed:\n - " + "\n - ".join(errors) + message += ( + "\nNo sessions were processed. Fix the endpoint(s) and re-run, " + "or pass --dry-run to skip the check (parsing + discovery only)." + ) + raise click.ClickException(message) diff --git a/src/slopometry/solo/services/embedding_service.py b/src/slopometry/solo/services/embedding_service.py new file mode 100644 index 0000000..02ab464 --- /dev/null +++ b/src/slopometry/solo/services/embedding_service.py @@ -0,0 +1,101 @@ +"""Embedding service for memory similarity calculations.""" + +import logging + +logger = logging.getLogger(__name__) + + +class EmbeddingService: + """Generates embeddings for memory content using an OpenAI-compatible API.""" + + def __init__( + self, + endpoint: str, + model: str, + api_key: str, + ): + self.endpoint = endpoint + self.model = model + self.api_key = api_key + + def get_embedding(self, text: str) -> list[float]: + """Get embedding vector for a text. + + Args: + text: Text to embed + + Returns: + List of floats representing the embedding vector + + Raises: + RuntimeError: If embedding generation fails + """ + try: + from openai import OpenAI + except ImportError: + raise RuntimeError("openai package required for embeddings. Install with: pip install openai") + + try: + client = OpenAI(base_url=self.endpoint, api_key=self.api_key) + + response = client.embeddings.create( + model=self.model, + input=text, + ) + + if response.data and len(response.data) > 0: + return response.data[0].embedding + raise RuntimeError("Empty response from embedding endpoint") + + except RuntimeError: + raise + except Exception as e: + raise RuntimeError(f"Failed to get embedding: {e}") from e + + def compute_similarity(self, embedding1: list[float], embedding2: list[float]) -> float: + """Compute cosine similarity between two embedding vectors. + + Args: + embedding1: First embedding vector + embedding2: Second embedding vector + + Returns: + Cosine similarity score between -1 and 1 + """ + import math + + dot_product = sum(a * b for a, b in zip(embedding1, embedding2)) + magnitude1 = math.sqrt(sum(a * a for a in embedding1)) + magnitude2 = math.sqrt(sum(b * b for b in embedding2)) + + if magnitude1 == 0 or magnitude2 == 0: + return 0.0 + + return dot_product / (magnitude1 * magnitude2) + + def compute_uniqueness_score( + self, + embedding: list[float], + existing_embeddings: list[list[float]], + ) -> float: + """Compute uniqueness score (1 - avg similarity to existing). + + Higher score = more unique compared to existing memories. + Score of 1.0 = completely unique (no similarity to any existing). + Score of 0.0 = identical to existing memories. + + Args: + embedding: New embedding to score + existing_embeddings: List of existing embeddings to compare against + + Returns: + Uniqueness score between 0.0 and 1.0 + """ + if not existing_embeddings: + return 1.0 + + similarities = [self.compute_similarity(embedding, existing) for existing in existing_embeddings] + avg_similarity = sum(similarities) / len(similarities) + + uniqueness = 1.0 - max(0.0, min(1.0, avg_similarity)) + return round(uniqueness, 2) diff --git a/src/slopometry/solo/services/memory_extractor.py b/src/slopometry/solo/services/memory_extractor.py new file mode 100644 index 0000000..c9a17d4 --- /dev/null +++ b/src/slopometry/solo/services/memory_extractor.py @@ -0,0 +1,400 @@ +"""Memory extraction from transcripts using LLM.""" + +import json +import logging +from pathlib import Path + +from pydantic import BaseModel, ConfigDict, Field + +from slopometry.core.models.memory import MemoryCandidate, MemoryType + +logger = logging.getLogger(__name__) + + +class TranscriptTruncationConfig(BaseModel): + """Per-tool-part summary truncation limits when emitting conversation text. + + These limits keep tool invocations from flooding the LLM prompt while + preserving enough context for memory extraction to recognize what the + tool did. Both limits are per-tool-part, not per-session. + """ + + model_config = ConfigDict(extra="forbid") + + tool_input_chars: int = Field( + default=120, + description="Max characters of a tool's input JSON to include in the reconstructed transcript", + ) + tool_output_chars: int = Field( + default=120, + description="Max characters of a tool's output to include in the reconstructed transcript", + ) + tool_result_chars: int = Field( + default=200, + description="Max characters of a Claude Code tool_result block to include in the reconstructed transcript", + ) + +MEMORY_GUIDELINE_PROMPT = """You are analyzing a Claude Code session transcript to identify durable facts +that should be remembered across sessions. + +MEMORY TYPES (ordered by retrieval value): + +1. **user** — HIGHEST VALUE. Facts about the human's identity, role, expertise, + and stable preferences. These directly shape how the agent should behave + next session. Examples worth extracting: + - "User works on slopometry, a Claude Code session tracker" + - "User prefers strict type checking with pyright in basic mode" + - "User maintains slopometry with uv tool install" + - "User dislikes emojis in commit messages and code comments" + - "User is a junior learning code review skills" + Extract when the user STATES a preference, role, or stable fact about + themselves. Don't extract one-off statements ("let me try X") or + task-specific decisions. + +2. **feedback** — Guidance on how to work, corrections, confirmed approaches. + ALWAYS include the WHY ("learned from incident X", "user confirmed Y + after we tried Z"). Without the why, the feedback is unsearchable later. + Examples: + - "Replace pytest.skip with assert when the test setup would have failed + anyway — the skip masks a real problem (learned reviewing PR)" + - "Use `git ls-files --cached --others` + `git hash-object`, NOT + `write-tree`, for source digest (write-tree breaks existing filter + tests)" + +3. **project** — Work goals, constraints, topology NOT derivable from the + current code or git. Examples: + - "Project uses uv for dependency management, not pip" + - "Database schema lives in `core/database.py` with raw SQL + migrations" + - "Stop-hook feedback is gated by `settings.enable_complexity_feedback`, + not always-on" + Skip facts already visible in the codebase (file structure, imports, + pyproject.toml contents, recent commits). Those are reconstructable. + +4. **reference** — External resource pointers (URLs, dashboards, tickets, + fork locations). Examples: + - "Rust code analysis fork: github.com/Droidcraft/rust-code-analysis — + install via `cargo install --git`" + - "VictoriaMetrics uses `metric_relabel_configs` (not `relabel_configs`) + for post-collection filtering" + +MEMORY CRITERIA — the Litmus Test: +"If I started fresh next session, would NOT knowing this make me repeat a +mistake, re-derive something hard, or act against user preference?" +If YES → memory. If NO → skip. + +HYGIENE: +- Convert relative dates ("19 days ago", "last week") to absolute dates + (ISO format) so the memory stays meaningful as time passes +- One fact per memory — split compound observations +- Skip reconstructable facts: code structure, imports, recent git history, + CLAUDE.md contents, package versions — these are visible in the repo +- Skip one-off task decisions ("we'll use Redis for this feature") that + don't generalize + +ANTI-STALENESS RULES (the failures we've seen): +- DO NOT extract a tool/dependency claim without a "current as of" qualifier + if you cannot verify it. Bad: "Project uses radon". Good: "Project uses + rust-code-analysis (radon was abandoned ~5 years ago per README history)". +- If the user mentions SWITCHING from one tool to another, prefer the + CURRENT tool. Bad: extract both "we used X" and "we now use Y" — just + extract "we now use Y, switched from X because Z". +- For facts that may have changed since the transcript (dependency + versions, framework choices, team structure), include the temporal + context: "as of 2026-06, X" — the freshness validator will surface + these for review. +- Avoid imperative instructions shaped as memories ("use X for Y") unless + the user explicitly endorsed the approach. Otherwise classify as + `project` or `feedback` with the why. + +Return a JSON array. Empty array is acceptable if no facts qualify: +[ + { + "memory_type": "user", + "content": "User maintains slopometry as a uv tool, not as a package install", + "source_context": "stated when discussing install paths" + }, + { + "memory_type": "feedback", + "content": "Prefer `enum.StrEnum` over `(str, Enum)` for new enums (learned from ruff UP042 cleanup)", + "source_context": "code review during abstract hook protocol refactor" + }, + { + "memory_type": "project", + "content": "slopometry uses rust-code-analysis (not radon) for Python complexity metrics, since 2026-01", + "source_context": "documented in README 'BREAKING CHANGE' section" + }, + { + "memory_type": "reference", + "content": "Internal LLM endpoint: https://llm2.droidcraft.org/minimax-m2-7/v1 (model: minimax-m2-7)", + "source_context": "configured in settings.memory_llm_endpoint" + } +] + +Transcript to analyze: +""" + + +class LLMConnectionError(Exception): + """Raised when LLM endpoint is unreachable or returns an error.""" + + pass + + +class MemoryExtractor: + """Extracts memory candidates from transcripts using LLM.""" + + def __init__(self, llm_endpoint: str, llm_model: str, api_key: str = "dummy"): + self.llm_endpoint = llm_endpoint + self.llm_model = llm_model + self.api_key = api_key + + def extract_memories_from_transcript( + self, + transcript_path: Path, + truncation: TranscriptTruncationConfig | None = None, + ) -> str: + """Parse JSONL transcript and clean noise. + + Returns: + Cleaned conversation text suitable for LLM analysis + """ + truncation = truncation or TranscriptTruncationConfig() + try: + with open(transcript_path, encoding="utf-8") as f: + lines = f.readlines() + + conversation_parts: list[str] = [] + for line in lines: + if not line.strip(): + continue + try: + data = json.loads(line) + msg_type = data.get("type") + + if msg_type == "user": + message = data.get("message", {}) + content = message.get("content", []) + text_parts = [] + for block in content: + if isinstance(block, dict): + if block.get("type") == "text": + text_parts.append(block.get("text", "")) + elif block.get("type") == "tool_result": + result_text = block.get("content", "") + if isinstance(result_text, str): + text_parts.append( + f"[tool result: {result_text[:truncation.tool_result_chars]}]" + ) + if text_parts: + conversation_parts.append(f"USER: {''.join(text_parts)}") + + elif msg_type == "assistant": + message = data.get("message", {}) + content = message.get("content", []) + text_parts = [] + for block in content: + if isinstance(block, dict): + if block.get("type") == "text": + text_parts.append(block.get("text", "")) + elif block.get("type") == "tool_use": + tool_name = block.get("name", "unknown") + text_parts.append(f"[TOOL: {tool_name}]") + if text_parts: + conversation_parts.append(f"ASSISTANT: {''.join(text_parts)}") + + elif msg_type == "system": + subtype = data.get("subtype", "") + if subtype in ("stop_hook_summary", "ai-title"): + continue + message = data.get("message", {}) + if message: + content = message.get("content", []) + for block in content: + if isinstance(block, dict) and block.get("type") == "text": + text_parts.append(block.get("text", "")) + + except (json.JSONDecodeError, KeyError): + continue + + return "\n".join(conversation_parts) + + except Exception as e: + logger.error(f"Failed to parse transcript {transcript_path}: {e}") + return "" + + def extract_memories_from_opencode_session( + self, + session_id: str, + storage_root: Path, + truncation: TranscriptTruncationConfig | None = None, + ) -> str: + """Reconstruct conversation text from OpenCode's session/message/part layout. + + OpenCode stores conversation state as separate JSON files under + ``/message//.json`` and + ``/part//.json`` rather than as a + single JSONL transcript. This method walks those files in + chronological order and emits the same ``USER:`` / ``ASSISTANT:`` + text format the Claude Code parser produces, so downstream LLM + extraction works unchanged. + + Args: + session_id: OpenCode session identifier (``ses_...``) + storage_root: OpenCode storage root (``~/.local/share/opencode/storage``) + truncation: Per-tool-part summary truncation limits + + Returns: + Cleaned conversation text suitable for LLM analysis + """ + truncation = truncation or TranscriptTruncationConfig() + message_dir = storage_root / "message" / session_id + if not message_dir.is_dir(): + logger.debug("No message directory for OpenCode session %s", session_id) + return "" + + try: + message_files = sorted( + message_dir.glob("*.json"), + key=lambda p: json.loads(p.read_text(encoding="utf-8")).get("time", {}).get("created", 0), + ) + except (OSError, ValueError) as e: + logger.error("Failed to enumerate OpenCode messages for %s: %s", session_id, e) + return "" + + conversation_parts: list[str] = [] + for message_path in message_files: + try: + message = json.loads(message_path.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + continue + + role = message.get("role") + if role not in ("user", "assistant"): + continue + + message_id = message.get("id") + if not message_id: + continue + + part_dir = storage_root / "part" / message_id + if not part_dir.is_dir(): + continue + + try: + part_files = sorted(part_dir.glob("*.json")) + except OSError: + continue + + text_parts: list[str] = [] + for part_path in part_files: + try: + part = json.loads(part_path.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + continue + + part_type = part.get("type") + if part_type == "text": + text_parts.append(part.get("text", "")) + elif part_type == "tool": + tool_name = part.get("tool", "unknown") + state = part.get("state", {}) + input_summary = json.dumps(state.get("input", {}))[: truncation.tool_input_chars] + output_summary = (state.get("output") or "")[: truncation.tool_output_chars] + text_parts.append( + f"[TOOL: {tool_name} input={input_summary!r} output={output_summary!r}]" + ) + + if text_parts: + prefix = "USER" if role == "user" else "ASSISTANT" + conversation_parts.append(f"{prefix}: {''.join(text_parts)}") + + return "\n".join(conversation_parts) + + def generate_memory_candidates(self, transcript_snippet: str) -> list[MemoryCandidate]: + """Generate memory candidates from transcript using LLM. + + Args: + transcript_snippet: Cleaned conversation text + + Returns: + List of MemoryCandidate objects + + Raises: + LLMConnectionError: If LLM endpoint is unreachable or returns an error + ValueError: If response cannot be parsed as memory candidates + """ + if not transcript_snippet.strip(): + return [] + + try: + from openai import OpenAI + except ImportError: + raise LLMConnectionError("openai package required for memory extraction. Install with: pip install openai") + + try: + client = OpenAI(base_url=self.llm_endpoint, api_key=self.api_key) + except Exception as e: + raise LLMConnectionError(f"Failed to create OpenAI client: {e}") from e + + try: + response = client.chat.completions.create( + model=self.llm_model, + messages=[ + {"role": "system", "content": "You are a helpful assistant that extracts memory candidates."}, + {"role": "user", "content": MEMORY_GUIDELINE_PROMPT + transcript_snippet[:15000]}, + ], + temperature=0.3, + max_tokens=2000, + ) + except Exception as e: + raise LLMConnectionError(f"Failed to connect to LLM endpoint {self.llm_endpoint}: {e}") from e + + content = response.choices[0].message.content + if not content: + raise ValueError("LLM returned empty response") + + json_str = content.strip() + + if json_str.startswith(""): + end_marker = "" + end_idx = json_str.find(end_marker) + if end_idx != -1: + json_str = json_str[end_idx + len(end_marker) :] + while json_str.startswith("\n"): + json_str = json_str[1:] + + if json_str.startswith("```json"): + json_str = json_str[7:] + elif json_str.startswith("```"): + json_str = json_str[3:] + if json_str.endswith("```"): + json_str = json_str[:-3] + json_str = json_str.strip() + + try: + data = json.loads(json_str) + except json.JSONDecodeError as e: + raise ValueError(f"LLM response is not valid JSON: {e}") from e + + if not isinstance(data, list): + raise ValueError(f"Expected JSON array, got {type(data).__name__}") + + candidates: list[MemoryCandidate] = [] + for item in data: + try: + memory_type_str = item.get("memory_type", "") + if memory_type_str not in ["user", "feedback", "project", "reference"]: + continue + + candidates.append( + MemoryCandidate( + memory_type=MemoryType(memory_type_str), + content=item.get("content", ""), + source_context=item.get("source_context"), + ) + ) + except (KeyError, ValueError) as e: + logger.debug(f"Skipping invalid memory candidate: {e}") + continue + + return candidates diff --git a/src/slopometry/solo/services/memory_freshness.py b/src/slopometry/solo/services/memory_freshness.py new file mode 100644 index 0000000..68f0088 --- /dev/null +++ b/src/slopometry/solo/services/memory_freshness.py @@ -0,0 +1,289 @@ +"""Freshness validation for newly-extracted memory candidates. + +After LLM extraction, each new candidate is paired with semantically similar +existing memories in the same project. Pairing is gated by a per-project +similarity threshold derived from the existing memories' own pairwise +similarity distribution (mean + quantiles), so the threshold is data-driven +rather than hand-tuned. + +Each above-threshold pair is then sent to an LLM judge that decides how to +reconcile the two. The judge has four actions and full authority over the +decision — there are no hardcoded length heuristics or contradiction rules, +only the statistical gate to control LLM call volume. + +Actions: +- keep_both: the two memories are different enough that both belong +- merge: synthesize a single updated version that supersedes both +- supersede: the new candidate wins, mark the old as outdated +- dedupe: they say the same thing; skip the new and confirm the old +""" + +import json +import logging +import statistics +from dataclasses import dataclass + +from slopometry.core.models.memory import MemoryCandidate, MemoryEntry + +logger = logging.getLogger(__name__) + +FLOOR_THRESHOLD = 0.45 +CEILING_THRESHOLD = 0.95 + +RECONCILIATION_PROMPT = """You are reconciling two memory candidates about the same subject. + +Two memories reconcile in exactly one of four ways: + +1. **keep_both** — they cover genuinely different aspects of the subject. + Example: "Project uses rust-code-analysis" + "Project supports Python 3.13". + Different tools/topics — both stay. + +2. **merge** — they cover the same aspect but the new one updates, supersedes, + or extends the old. Synthesize a single merged version that combines both + pieces of information. Example: "Project uses radon" + "Switched to + rust-code-analysis in 2026 because radon was abandoned". Merged: + "Project uses rust-code-analysis (switched from radon in 2026)". + +3. **supersede** — the new candidate is clearly the current truth and the + old is outdated. The new wins; the old should be flagged as outdated. + Example: old "Python 3.10" → new "Python 3.13". + +4. **dedupe** — they say the same thing in different words. Skip the new; + the existing memory already covers it. Example: old "user prefers pyright" + + new "user uses pyright type checker". + +NEW: +{new_content} + +EXISTING: +{existing_content} + +Reply with JSON only: +{{"action": "keep_both" | "merge" | "supersede" | "dedupe", "reason": "", "merged_content": ""}}""" + + +@dataclass(frozen=True) +class ProjectSimilarityDistribution: + """Per-project pairwise similarity statistics for existing memories.""" + + n_pairs: int + mean: float + p50: float + p75: float + p90: float + p95: float + + @property + def derived_threshold(self) -> float: + """Data-driven dedupe threshold from the project's own distribution. + + Uses p75 of pairwise similarity as the candidate-relevance threshold. + Falls back to FLOOR_THRESHOLD when the project has too few memories to + estimate a distribution. Capped at CEILING_THRESHOLD so that even in + projects with very similar memories, only genuinely redundant pairs + are sent to the LLM. + """ + if self.n_pairs == 0: + return FLOOR_THRESHOLD + return max(min(self.p75, CEILING_THRESHOLD), FLOOR_THRESHOLD) + + +@dataclass(frozen=True) +class FreshnessDecision: + """The reconciliation outcome for one (new candidate, existing memory) pair.""" + + new_candidate: MemoryCandidate + existing_memory: MemoryEntry + similarity: float + action: str + reason: str + merged_content: str | None = None + + +def _cosine_similarity(a: list[float], b: list[float]) -> float: + """Compute cosine similarity between two embedding vectors.""" + if not a or not b or len(a) != len(b): + return 0.0 + dot = sum(x * y for x, y in zip(a, b)) + norm_a = sum(x * x for x in a) ** 0.5 + norm_b = sum(x * x for x in b) ** 0.5 + if norm_a == 0.0 or norm_b == 0.0: + return 0.0 + return dot / (norm_a * norm_b) + + +def _project_pairwise_similarities(existing: list[MemoryEntry]) -> list[float]: + """Compute pairwise cosine similarities among existing memories' embeddings.""" + sims: list[float] = [] + for i, m1 in enumerate(existing): + if not m1.embedding: + continue + for m2 in existing[i + 1 :]: + if not m2.embedding: + continue + sims.append(_cosine_similarity(m1.embedding, m2.embedding)) + return sims + + +def compute_project_distribution(existing: list[MemoryEntry]) -> ProjectSimilarityDistribution: + """Compute similarity distribution statistics for the project's memory bank. + + Used to derive a data-informed threshold for which new candidate / existing + memory pairs are worth sending to the LLM judge. + """ + sims = _project_pairwise_similarities(existing) + if not sims: + return ProjectSimilarityDistribution(0, 0.0, 0.0, 0.0, 0.0, 0.0) + sims_sorted = sorted(sims) + n = len(sims_sorted) + + def quantile(q: float) -> float: + idx = max(0, min(n - 1, int(n * q))) + return sims_sorted[idx] + + return ProjectSimilarityDistribution( + n_pairs=n, + mean=statistics.fmean(sims_sorted), + p50=quantile(0.50), + p75=quantile(0.75), + p90=quantile(0.90), + p95=quantile(0.95), + ) + + +def _find_above_threshold( + candidate: MemoryCandidate, + existing: list[MemoryEntry], + threshold: float, +) -> list[tuple[MemoryEntry, float]]: + """Return existing memories whose embedding similarity to candidate >= threshold.""" + if not candidate.embedding: + return [] + matches: list[tuple[MemoryEntry, float]] = [] + for memory in existing: + if not memory.embedding: + continue + sim = _cosine_similarity(candidate.embedding, memory.embedding) + if sim >= threshold: + matches.append((memory, sim)) + matches.sort(key=lambda pair: pair[1], reverse=True) + return matches + + +def _judge_reconciliation( + candidate: MemoryCandidate, + existing: MemoryEntry, + llm_endpoint: str, + llm_model: str, + api_key: str, +) -> FreshnessDecision: + """Ask the LLM how to reconcile the pair. Always returns a decision.""" + from openai import OpenAI + + prompt = RECONCILIATION_PROMPT.format( + new_content=candidate.content, + existing_content=existing.content, + ) + client = OpenAI(base_url=llm_endpoint, api_key=api_key) + response = client.chat.completions.create( + model=llm_model, + messages=[ + { + "role": "system", + "content": "You reconcile memory pairs. Always reply with valid JSON containing action, reason, and (only when merging) merged_content.", + }, + {"role": "user", "content": prompt}, + ], + temperature=0.0, + max_tokens=200, + ) + content = response.choices[0].message.content or "" + text = content.strip() + if text.startswith("```"): + text = text.strip("`").removeprefix("json").strip() + if text.endswith("```"): + text = text[:-3].strip() + + try: + data = json.loads(text) + except json.JSONDecodeError: + logger.debug("Could not parse reconciliation response: %s", text[:80]) + return FreshnessDecision( + new_candidate=candidate, + existing_memory=existing, + similarity=0.0, + action="keep_both", + reason=f"Could not parse LLM response: {text[:80]}", + ) + + action = data.get("action", "keep_both") + if action not in ("keep_both", "merge", "supersede", "dedupe"): + action = "keep_both" + reason = data.get("reason", "") + merged = data.get("merged_content") if action == "merge" else None + return FreshnessDecision( + new_candidate=candidate, + existing_memory=existing, + similarity=0.0, + action=action, + reason=reason, + merged_content=merged, + ) + + +class MemoryFreshnessValidator: + """Reconciles newly-extracted candidates against existing project memories. + + For each project, computes the existing memory bank's pairwise similarity + distribution and derives a threshold from it (p75 of pairwise similarity, + clamped between FLOOR_THRESHOLD and CEILING_THRESHOLD). Each new candidate + is paired with existing memories above this threshold and sent to the LLM + for a reconciliation verdict (keep_both / merge / supersede / dedupe). + """ + + def __init__( + self, + llm_endpoint: str, + llm_model: str, + api_key: str = "dummy", + ) -> None: + self.llm_endpoint = llm_endpoint + self.llm_model = llm_model + self.api_key = api_key + + def validate( + self, + candidates: list[MemoryCandidate], + existing: list[MemoryEntry], + ) -> tuple[list[FreshnessDecision], ProjectSimilarityDistribution]: + """Return reconciliation decisions plus the project's similarity distribution. + + Each candidate is paired with existing memories whose cosine similarity + to the candidate is >= the project's derived threshold. Each pair is + sent to the LLM for a reconciliation verdict. + """ + distribution = compute_project_distribution(existing) + threshold = distribution.derived_threshold + + decisions: list[FreshnessDecision] = [] + for candidate in candidates: + similar = _find_above_threshold(candidate, existing, threshold) + for memory, similarity in similar: + try: + decision = _judge_reconciliation( + candidate, memory, self.llm_endpoint, self.llm_model, self.api_key + ) + except Exception as e: + logger.debug("Reconciliation judge failed for candidate vs %s: %s", memory.id, e) + continue + decisions.append( + FreshnessDecision( + new_candidate=decision.new_candidate, + existing_memory=decision.existing_memory, + similarity=similarity, + action=decision.action, + reason=decision.reason, + merged_content=decision.merged_content, + ) + ) + return decisions, distribution diff --git a/src/slopometry/solo/services/memory_service.py b/src/slopometry/solo/services/memory_service.py new file mode 100644 index 0000000..9e908e8 --- /dev/null +++ b/src/slopometry/solo/services/memory_service.py @@ -0,0 +1,125 @@ +"""Memory management service for solo-leveler features.""" + +import uuid +from datetime import datetime + +from slopometry.core.database import EventDatabase +from slopometry.core.models.memory import MemoryCreateRequest, MemoryEntry, MemoryType + + +class MemoryService: + """Handles memory CRUD operations and session tracking.""" + + def __init__(self, db: EventDatabase | None = None): + self.db = db or EventDatabase() + + def save_memory(self, memory: MemoryEntry) -> None: + """Save a single memory entry.""" + self.db.save_memory(memory) + + def save_memories(self, request: MemoryCreateRequest) -> list[MemoryEntry]: + """Save multiple memory entries from a request. + + Returns: + List of saved MemoryEntry objects + """ + created_at = datetime.now() + saved_memories: list[MemoryEntry] = [] + + for candidate in request.candidates: + memory = MemoryEntry( + id=str(uuid.uuid4()), + session_id=request.session_id, + project_dir=request.project_dir, + memory_type=candidate.memory_type, + content=candidate.content, + source_context=candidate.source_context, + embedding=candidate.embedding, + metadata=candidate.metadata, + created_at=created_at, + ) + self.db.save_memory(memory) + saved_memories.append(memory) + + self.db.mark_session_processed(request.session_id, request.project_dir, len(saved_memories)) + + return saved_memories + + def get_memories( + self, + project_dir: str | None = None, + memory_type: MemoryType | None = None, + limit: int = 50, + ) -> list[MemoryEntry]: + """Get memories with optional filters. + + Args: + project_dir: Filter by project directory + memory_type: Filter by memory type + limit: Maximum number of results + + Returns: + List of matching MemoryEntry objects + """ + return self.db.get_memories( + project_dir=project_dir, + memory_type=memory_type.value if memory_type else None, + limit=limit, + ) + + def delete_memory(self, memory_id: str) -> bool: + """Delete a memory by ID. + + Returns: + True if deleted, False if not found + """ + return self.db.delete_memory(memory_id) + + def delete_all_memories(self) -> int: + """Delete all memories. + + Returns: + Number of memories deleted + """ + return self.db.delete_all_memories() + + def update_memory( + self, + memory_id: str, + content: str | None = None, + retained: bool | None = None, + superseded_by: str | None = None, + source_context: str | None = None, + embedding: list[float] | None = None, + ) -> bool: + """Update a memory entry. + + Returns: + True if updated, False if not found + """ + return self.db.update_memory( + memory_id, + content=content, + retained=retained, + superseded_by=superseded_by, + source_context=source_context, + embedding=embedding, + ) + + def mark_session_processed( + self, session_id: str, project_dir: str, memory_count: int, source: str = "claude_code" + ) -> None: + """Mark a session as processed for memory extraction.""" + self.db.mark_session_processed(session_id, project_dir, memory_count, source=source) + + def is_session_processed(self, session_id: str, project_dir: str, source: str = "claude_code") -> bool: + """Check if a session has already been processed.""" + return self.db.is_session_processed(session_id, project_dir, source=source) + + def get_memory_stats(self, project_dir: str | None = None) -> dict: + """Get statistics about stored memories. + + Returns: + Dict with total count and breakdown by type + """ + return self.db.get_memory_stats(project_dir=project_dir) diff --git a/src/slopometry/solo/services/transcript_finder.py b/src/slopometry/solo/services/transcript_finder.py new file mode 100644 index 0000000..fc28109 --- /dev/null +++ b/src/slopometry/solo/services/transcript_finder.py @@ -0,0 +1,214 @@ +"""Transcript discovery for memory extraction.""" + +import os +import sys +from dataclasses import dataclass +from pathlib import Path + +from slopometry.core.models.protocol.events import AbstractEventSource + + +@dataclass(frozen=True) +class DiscoveredTranscript: + """A session's transcript location, with provenance for downstream routing.""" + + session_id: str + transcript_path: Path + project_dir: Path + source: AbstractEventSource + + +class TranscriptFinder: + """Finds Claude Code and OpenCode transcripts for memory extraction.""" + + def find_claude_project_dirs(self) -> list[Path]: + """Find Claude Code project directories based on platform.""" + if sys.platform == "win32": + base = Path(os.environ.get("LOCALAPPDATA", Path.home() / "AppData" / "Local")) + return [base / "Claude" / "projects"] + elif sys.platform == "darwin": + return [Path.home() / "Library" / "Application Support" / "Claude" / "projects"] + else: + xdg_data_home = os.environ.get("XDG_DATA_HOME") + claude_xdg = Path(xdg_data_home) / "claude" / "projects" if xdg_data_home else None + default_claude = Path.home() / ".claude" / "projects" + if claude_xdg and claude_xdg.exists(): + return [claude_xdg] + if default_claude.exists(): + return [default_claude] + return [default_claude] + + def _decode_claude_project_dir(self, dirname: str) -> Path | None: + """Decode Claude project directory name back to working directory. + + Claude encodes paths like /mnt/terradump/code/slopometry as + -mnt-terradump-code-slopometry (leading dash, slashes become dashes) + """ + if not dirname.startswith("-"): + return None + decoded = "/" + dirname[1:].replace("-", "/") + try: + return Path(decoded).resolve() + except Exception: + return None + + def find_slopometry_transcripts(self, project_dir: Path) -> list[DiscoveredTranscript]: + """Find transcripts saved by slopometry in a project. + + Returns: + List of DiscoveredTranscript with source=claude_code (slopometry's + Claude Code harness produces these) + """ + slop_dir = project_dir / ".slopometry" + if not slop_dir.exists(): + return [] + + results: list[DiscoveredTranscript] = [] + for session_dir in slop_dir.iterdir(): + if not session_dir.is_dir(): + continue + transcript_path = session_dir / "transcript.jsonl" + if transcript_path.exists(): + results.append( + DiscoveredTranscript( + session_id=session_dir.name, + transcript_path=transcript_path, + project_dir=project_dir, + source=AbstractEventSource.CLAUDE_CODE, + ) + ) + + return results + + def find_opencode_storage_root(self) -> Path | None: + """Find OpenCode's storage root directory. + + Layout: ``/project/.json``, + ``/session//.json``, + ``/message//.json``, + ``/part//.json``. + """ + if sys.platform == "win32": + base = Path(os.environ.get("LOCALAPPDATA", Path.home() / "AppData" / "Local")) + return base / "opencode" / "storage" + xdg_data_home = os.environ.get("XDG_DATA_HOME") + if xdg_data_home: + return Path(xdg_data_home) / "opencode" / "storage" + return Path.home() / ".local" / "share" / "opencode" / "storage" + + def find_opencode_sessions(self, project_dir: Path) -> list[DiscoveredTranscript]: + """Find OpenCode sessions whose working directory matches project_dir. + + A session matches if its ``project.worktree`` equals ``project_dir`` + OR its ``session.directory`` equals ``project_dir`` or is a descendant + of it. This covers the case where a session was opened from a + subdirectory of a registered worktree. + + Returns: + List of DiscoveredTranscript with source=opencode and + transcript_path pointing at the session file (the extractor walks + message/part directories from there). + """ + import json + + storage_root = self.find_opencode_storage_root() + if storage_root is None or not storage_root.is_dir(): + return [] + + project_dir_resolved = project_dir.resolve() + results: list[DiscoveredTranscript] = [] + + project_dir_path = storage_root / "project" + if not project_dir_path.is_dir(): + return results + + def _directory_matches_project(directory_str: str | None) -> bool: + """True if ``directory_str`` equals or contains ``project_dir_resolved``.""" + if not directory_str: + return False + try: + directory_resolved = Path(directory_str).resolve() + except OSError: + return False + if directory_resolved == project_dir_resolved: + return True + try: + project_dir_resolved.relative_to(directory_resolved) + return True + except ValueError: + return False + + for project_file in project_dir_path.glob("*.json"): + try: + project_meta = json.loads(project_file.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + continue + worktree = project_meta.get("worktree") + if not _directory_matches_project(worktree): + continue + + project_id = project_meta.get("id") + if not project_id: + continue + + session_dir = storage_root / "session" / project_id + if not session_dir.is_dir(): + continue + + for session_file in session_dir.glob("ses_*.json"): + try: + session_meta = json.loads(session_file.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + continue + session_id = session_meta.get("id") + if not session_id: + continue + if not _directory_matches_project(session_meta.get("directory")): + continue + results.append( + DiscoveredTranscript( + session_id=session_id, + transcript_path=session_file, + project_dir=project_dir, + source=AbstractEventSource.OPENCODE, + ) + ) + + return results + + def discover_transcripts(self, project_dir: Path) -> list[DiscoveredTranscript]: + """Discover all transcripts for a project, across all harnesses. + + Returns: + List of DiscoveredTranscript, one per session per harness + """ + results: list[DiscoveredTranscript] = [] + project_dir = project_dir.resolve() + + for claude_projects_dir in self.find_claude_project_dirs(): + if not claude_projects_dir.exists(): + continue + + for project_subdir in claude_projects_dir.iterdir(): + if not project_subdir.is_dir(): + continue + + working_dir = self._decode_claude_project_dir(project_subdir.name) + if working_dir != project_dir: + continue + + for transcript_path in project_subdir.glob("*.jsonl"): + session_id = transcript_path.stem + results.append( + DiscoveredTranscript( + session_id=session_id, + transcript_path=transcript_path, + project_dir=project_dir, + source=AbstractEventSource.CLAUDE_CODE, + ) + ) + + results.extend(self.find_slopometry_transcripts(project_dir)) + results.extend(self.find_opencode_sessions(project_dir)) + + return results diff --git a/src/slopometry/summoner/cli/commands.py b/src/slopometry/summoner/cli/commands.py index c996992..b9a7674 100644 --- a/src/slopometry/summoner/cli/commands.py +++ b/src/slopometry/summoner/cli/commands.py @@ -633,7 +633,7 @@ def show_experiment(experiment_id: str) -> None: def userstorify( base_commit: str | None, head_commit: str | None, feature_id: str | None, repo_path: Path | None ) -> None: - """Generate user stories from commits using configured AI agents and save permanently to user story collection.""" + """Generate user stories from commits using the configured AI agent and save permanently to user story collection.""" from slopometry.core.database import EventDatabase from slopometry.summoner.services.llm_service import LLMService @@ -686,7 +686,7 @@ def userstorify( sys.exit(1) console.print(f"Repository: {repo_path}") - console.print(f"Using agent: {llm_service.get_configured_agent()}") + console.print(f"Using model: {llm_service.get_configured_model()}") commit_info = llm_service.get_commit_info_for_display(base_commit, head_commit) diff --git a/src/slopometry/summoner/services/llm_service.py b/src/slopometry/summoner/services/llm_service.py index 768e47b..3f15219 100644 --- a/src/slopometry/summoner/services/llm_service.py +++ b/src/slopometry/summoner/services/llm_service.py @@ -17,7 +17,7 @@ def __init__(self, db: EventDatabase | None = None): def generate_user_stories_from_commits( self, repo_path: Path, base_commit: str, head_commit: str ) -> tuple[int, list[str]]: - """Generate user stories from commit diffs using configured AI agent. + """Generate user stories from commit diffs using the configured AI agent. Returns: Tuple of (successful_generations, error_messages) @@ -25,8 +25,8 @@ def generate_user_stories_from_commits( from slopometry.core.models import UserStoryEntry from slopometry.summoner.services.llm_wrapper import ( calculate_stride_size, + get_agent, get_commit_diff, - get_user_story_agent, get_user_story_prompt, resolve_commit_reference, ) @@ -49,7 +49,7 @@ def generate_user_stories_from_commits( prompt = get_user_story_prompt(diff) - agent = get_user_story_agent() + agent = get_agent() result = agent.run_sync(prompt) user_story_entry = UserStoryEntry( @@ -60,7 +60,7 @@ def generate_user_stories_from_commits( user_stories=result.output, rating=3, # Default neutral rating guidelines_for_improving="", - model_used=settings.user_story_agent, + model_used=settings.llm_model_name, prompt_template=prompt, repository_path=str(repo_path), ) @@ -139,6 +139,6 @@ def get_commit_info_for_display(self, base_commit: str, head_commit: str) -> dic "resolved_head": head_commit, } - def get_configured_agent(self) -> str: - """Get the configured user story agent name.""" - return settings.user_story_agent + def get_configured_model(self) -> str: + """Get the configured LLM model name.""" + return settings.llm_model_name diff --git a/src/slopometry/summoner/services/llm_wrapper.py b/src/slopometry/summoner/services/llm_wrapper.py index 9f2b0fc..4a937b2 100644 --- a/src/slopometry/summoner/services/llm_wrapper.py +++ b/src/slopometry/summoner/services/llm_wrapper.py @@ -2,17 +2,15 @@ import subprocess from pathlib import Path -logger = logging.getLogger(__name__) - from pydantic_ai import Agent -from pydantic_ai.models.anthropic import AnthropicModel -from pydantic_ai.models.openai import OpenAIChatModel, OpenAIResponsesModel, OpenAIResponsesModelSettings -from pydantic_ai.providers.anthropic import AnthropicProvider +from pydantic_ai.models.openai import OpenAIChatModel from pydantic_ai.providers.openai import OpenAIProvider -from slopometry.core.models.experiment import FeatureBoundary, MergeCommit +from slopometry.core.models.experiment import FeatureBoundary from slopometry.core.settings import settings +logger = logging.getLogger(__name__) + class OfflineModeError(Exception): """Raised when attempting to use LLM features while offline_mode is enabled.""" @@ -24,66 +22,24 @@ def __init__(self): ) -def _create_providers() -> tuple[OpenAIProvider, OpenAIProvider, AnthropicProvider]: - """Create LLM providers. Only called when offline_mode is disabled.""" - llm_gateway = OpenAIProvider(base_url=settings.llm_proxy_url, api_key=settings.llm_proxy_api_key) - responses_api_gateway = OpenAIProvider(base_url=settings.llm_responses_url, api_key=settings.llm_proxy_api_key) - anthropic_gateway = AnthropicProvider( - base_url=settings.anthropic_url, api_key=settings.anthropic_api_key.get_secret_value() - ) - return llm_gateway, responses_api_gateway, anthropic_gateway - - -def _create_agents() -> dict[str, Agent]: - """Create all available agents. Only called when offline_mode is disabled.""" - llm_gateway, responses_api_gateway, anthropic_gateway = _create_providers() - - return { - "gpt_oss_120b": Agent( - name="gpt_oss_120b", - model=OpenAIResponsesModel("openai/gpt-oss-120b", provider=responses_api_gateway), - retries=2, - end_strategy="exhaustive", - model_settings=OpenAIResponsesModelSettings( - max_tokens=64000, - seed=1337, - openai_reasoning_effort="medium", - temperature=1.0, - ), - ), - "gemini": Agent( - name="gemini", - model=OpenAIChatModel(model_name="gemini-3-pro-preview", provider=llm_gateway), - ), - "minimax": Agent( - name="minimax", - model=AnthropicModel("minimax:MiniMax-M1.1", provider=anthropic_gateway), - retries=2, - end_strategy="exhaustive", - ), - } - - -_agents: dict[str, Agent] | None = None - - -def _get_agents() -> dict[str, Agent]: - """Get or create the agents registry. Raises OfflineModeError if offline_mode is enabled.""" - global _agents +def get_agent() -> Agent: + """Return the single MiniMax-M3 agent used for all LLM-based tasks. + + The endpoint is the public vLLM-hosted MiniMax-M3 (MXFP4) deployment + exposed at https://llm2.droidcraft.org/minimax-m3-mxfp4-vllm/v1. + Raises OfflineModeError when offline_mode is enabled. + """ if settings.offline_mode: raise OfflineModeError() - if _agents is None: - _agents = _create_agents() - return _agents - -def get_user_story_agent() -> Agent: - """Get the configured agent for user story generation.""" - agents = _get_agents() - agent_name = settings.user_story_agent - if agent_name not in agents: - raise ValueError(f"Unknown user_story_agent: {agent_name}. Available: {list(agents.keys())}") - return agents[agent_name] + provider = OpenAIProvider( + base_url=settings.llm_proxy_url, + api_key=settings.llm_proxy_api_key, + ) + return Agent( + name=settings.llm_model_name, + model=OpenAIChatModel(model_name=settings.llm_model_name, provider=provider), + ) def get_user_story_prompt(diff: str) -> str: @@ -173,87 +129,66 @@ def get_commit_diff(base_commit: str, head_commit: str) -> str: return f"Error getting diff: {e.stderr}" -def find_merge_commits(branch: str = "HEAD", limit: int = 50) -> list[MergeCommit]: - """Find merge commits in the git history. +def get_feature_boundaries(limit: int = 20) -> list[FeatureBoundary]: + """Identify feature boundaries by finding merge commits and their base commits. Args: - branch: Branch to analyze (default: HEAD) - limit: Maximum number of commits to examine + limit: Maximum number of merge commits to analyze Returns: - List of merge commit info with hash, message, and parent commits + List of feature info with base commit, head commit, and description """ try: - result = subprocess.run( - ["git", "log", branch, "--merges", f"-{limit}", "--format=%H|%P|%s"], + merge_log = subprocess.run( + ["git", "log", "HEAD", "--merges", f"-{limit}", "--format=%H|%P|%s"], capture_output=True, text=True, check=True, ) - - merge_commits = [] - for line in result.stdout.strip().split("\n"): - if not line: - continue - parts = line.split("|", 2) - if len(parts) >= 3: - commit_hash = parts[0] - parents = parts[1].split() - message = parts[2] - - if len(parents) >= 2: - merge_commits.append( - MergeCommit( - hash=commit_hash, - parents=parents, - message=message, - feature_branch=parents[1], # Second parent is typically the feature branch - ) - ) - - return merge_commits except subprocess.CalledProcessError as e: logger.debug(f"Failed to find merge commits: {e}") return [] - -def get_feature_boundaries(limit: int = 20) -> list[FeatureBoundary]: - """Identify feature boundaries by finding merge commits and their base commits. - - Args: - limit: Maximum number of merge commits to analyze - - Returns: - List of feature info with base commit, head commit, and description - """ - - merge_commits = find_merge_commits(limit=limit) - features = [] current_repo_path = Path.cwd() + features: list[FeatureBoundary] = [] - for merge in merge_commits: - try: - result = subprocess.run( - ["git", "merge-base", merge.parents[0], merge.parents[1]], capture_output=True, text=True, check=True - ) - merge_base = result.stdout.strip() + for line in merge_log.stdout.strip().split("\n"): + if not line: + continue + parts = line.split("|", 2) + if len(parts) < 3: + continue + commit_hash, parents_raw, message = parts + parents = parents_raw.split() + if len(parents) < 2: + continue + feature_branch = parents[1] - result = subprocess.run( - ["git", "log", "-1", "--format=%s", merge.feature_branch], capture_output=True, text=True, check=True - ) - feature_tip_message = result.stdout.strip() - - features.append( - FeatureBoundary( - base_commit=merge_base, - head_commit=merge.feature_branch, - merge_commit=merge.hash, - merge_message=merge.message, - feature_message=feature_tip_message, - repository_path=current_repo_path, - ) - ) + try: + merge_base = subprocess.run( + ["git", "merge-base", parents[0], feature_branch], + capture_output=True, + text=True, + check=True, + ).stdout.strip() + feature_tip_message = subprocess.run( + ["git", "log", "-1", "--format=%s", feature_branch], + capture_output=True, + text=True, + check=True, + ).stdout.strip() except subprocess.CalledProcessError: continue + features.append( + FeatureBoundary( + base_commit=merge_base, + head_commit=feature_branch, + merge_commit=commit_hash, + merge_message=message, + feature_message=feature_tip_message, + repository_path=current_repo_path, + ) + ) + return features diff --git a/tests/test_baseline_service.py b/tests/test_baseline_service.py index 12a8d16..8ce2082 100644 --- a/tests/test_baseline_service.py +++ b/tests/test_baseline_service.py @@ -14,6 +14,7 @@ ResolvedBaselineStrategy, ) from slopometry.core.models.complexity import ExtendedComplexityMetrics +from slopometry.core.models.core import SmellCounts from slopometry.summoner.services.baseline_service import ( BaselineService, CommitInfo, @@ -191,7 +192,7 @@ def test_get_or_compute_baseline__returns_cached_when_head_unchanged(self, tmp_p mi_normalized=0.5, smell_penalty=0.1, adjusted_quality=0.45, - smell_counts={}, + smell_counts=SmellCounts(), ), strategy=ResolvedBaselineStrategy( requested=BaselineStrategy.AUTO, @@ -267,7 +268,7 @@ def test_get_or_compute_baseline__recomputes_when_weight_version_stale(self, tmp mi_normalized=0.5, smell_penalty=0.1, adjusted_quality=0.45, - smell_counts={}, + smell_counts=SmellCounts(), ), strategy=ResolvedBaselineStrategy( requested=BaselineStrategy.AUTO, diff --git a/tests/test_claude_code_adapter.py b/tests/test_claude_code_adapter.py new file mode 100644 index 0000000..0e8ae7b --- /dev/null +++ b/tests/test_claude_code_adapter.py @@ -0,0 +1,415 @@ +"""Tests for ClaudeCodeAdapter — translates Claude Code hook payloads into AbstractHookEvent. + +The adapter owns: + - the field-name mapping (tool_name, tool_input, tool_response, etc.) + - tool_type classification (PascalCase enum -> category string) + - event-type inference from payload shape (when stop_hook_active is present) + - transcript_location extraction (the harness-specific transcript path hint) + +Migrated from `test_hook_handler.py` (TestEventTypeDetection, parse_hook_input) +and `test_posttooluse_validation.py` and `test_notebookread_integration.py` — +those tests exercised the now-removed PreToolUseInput/PostToolUseInput wire +models, which the adapter handles via `extra="allow"` semantics internally. +""" + +import pytest + +from slopometry.core.models.protocol.events import ( + AbstractEventSource, + AbstractEventType, + AbstractHookEvent, +) +from slopometry.core.protocol.adapters.claude_code import ( + ClaudeCodeAdapter, + ToolType, + resolve_tool_type, +) + + +class TestDetectEventType: + """Tests for ClaudeCodeAdapter.detect_event_type — wire-shape inference.""" + + def test_detect_event_type__pre_tool_use_returns_tool_call_started(self): + """PreToolUse payload: tool_name + tool_input -> TOOL_CALL_STARTED.""" + adapter = ClaudeCodeAdapter() + payload = {"tool_name": "Bash", "tool_input": {"command": "ls"}} + assert adapter.detect_event_type(payload) == AbstractEventType.TOOL_CALL_STARTED + + def test_detect_event_type__post_tool_use_returns_tool_call_completed(self): + """PostToolUse payload: tool_name + tool_input + tool_response -> TOOL_CALL_COMPLETED.""" + adapter = ClaudeCodeAdapter() + payload = { + "tool_name": "Bash", + "tool_input": {"command": "ls"}, + "tool_response": "file1\nfile2", + } + assert adapter.detect_event_type(payload) == AbstractEventType.TOOL_CALL_COMPLETED + + def test_detect_event_type__notification_returns_notification(self): + """Notification payload: 'message' field present -> NOTIFICATION.""" + adapter = ClaudeCodeAdapter() + payload = {"message": "Test notification"} + assert adapter.detect_event_type(payload) == AbstractEventType.NOTIFICATION + + def test_detect_event_type__stop_with_stop_hook_active_false_returns_turn_completed(self): + """Stop payload: stop_hook_active=False -> TURN_COMPLETED (top-level turn ended).""" + adapter = ClaudeCodeAdapter() + payload = {"stop_hook_active": False} + assert adapter.detect_event_type(payload) == AbstractEventType.TURN_COMPLETED + + def test_detect_event_type__stop_with_stop_hook_active_true_returns_subagent_completed(self): + """Stop payload: stop_hook_active=True -> SUBAGENT_COMPLETED (subagent turn ended).""" + adapter = ClaudeCodeAdapter() + payload = {"stop_hook_active": True} + assert adapter.detect_event_type(payload) == AbstractEventType.SUBAGENT_COMPLETED + + def test_detect_event_type__stop_without_stop_hook_active_returns_turn_completed(self): + """Legacy Stop payload (session_id+transcript_path, no stop_hook_active) -> TURN_COMPLETED. + + Older Claude Code versions omit stop_hook_active; the adapter falls back to + TURN_COMPLETED since session_id+transcript_path alone is the Stop signature. + """ + adapter = ClaudeCodeAdapter() + payload = {"session_id": "s1", "transcript_path": "/tmp/t.jsonl"} + assert adapter.detect_event_type(payload) == AbstractEventType.TURN_COMPLETED + + def test_detect_event_type__raises_for_unknown_shape(self): + """An unrecognized payload shape must fail loud, not silently default.""" + adapter = ClaudeCodeAdapter() + with pytest.raises(ValueError, match="Unknown Claude Code hook payload shape"): + adapter.detect_event_type({"bogus_field": "x"}) + + +class TestParseToolCallStarted: + """Tests for parse() with TOOL_CALL_STARTED payloads (PreToolUse).""" + + def test_parse_pre_tool_use__returns_event_with_tool_call_and_no_output(self): + """PreToolUse has no output yet — tool_call.output must be None.""" + adapter = ClaudeCodeAdapter() + payload = { + "session_id": "s1", + "transcript_path": "/tmp/t.jsonl", + "tool_name": "Bash", + "tool_input": {"command": "ls"}, + } + event = adapter.parse(payload, working_directory="/repo") + + assert isinstance(event, AbstractHookEvent) + assert event.session_id == "s1" + assert event.event_type == AbstractEventType.TOOL_CALL_STARTED + assert event.source == AbstractEventSource.CLAUDE_CODE + assert event.working_directory == "/repo" + assert event.transcript_location == "/tmp/t.jsonl" + assert event.tool_call is not None + assert event.tool_call.tool_name == "Bash" + assert event.tool_call.tool_type == ToolType.BASH.value + assert event.tool_call.input == {"command": "ls"} + assert event.tool_call.output is None + assert event.tool_call.duration_ms is None + assert event.tool_call.exit_code is None + assert event.tool_call.error_message is None + + def test_parse_pre_tool_use__metadata_contains_full_raw_payload(self): + """metadata preserves the raw payload for forensic / re-processing.""" + adapter = ClaudeCodeAdapter() + payload = { + "session_id": "s1", + "transcript_path": "/tmp/t.jsonl", + "tool_name": "Read", + "tool_input": {"file_path": "/x.py"}, + } + event = adapter.parse(payload, working_directory="/repo") + assert event.metadata == dict(payload) + + def test_parse_pre_tool_use__raises_when_session_id_missing(self): + """session_id is required on every event — adapter enforces it.""" + adapter = ClaudeCodeAdapter() + payload = {"tool_name": "Bash", "tool_input": {"command": "ls"}} + with pytest.raises(ValueError, match="missing required 'session_id'"): + adapter.parse(payload, working_directory="/repo") + + def test_parse_pre_tool_use__raises_when_tool_name_missing(self): + """A TOOL_CALL_STARTED payload must include a non-empty tool_name — adapter enforces it.""" + adapter = ClaudeCodeAdapter() + payload = {"session_id": "s1", "tool_name": None, "tool_input": {"command": "ls"}} + with pytest.raises(ValueError, match="missing 'tool_name'"): + adapter.parse(payload, working_directory="/repo") + + +class TestParseToolCallCompleted: + """Tests for parse() with TOOL_CALL_COMPLETED payloads (PostToolUse).""" + + def test_parse_post_tool_use__dict_response_preserves_shape(self): + """PostToolUse output for Read/Edit is a dict — preserved verbatim.""" + adapter = ClaudeCodeAdapter() + payload = { + "session_id": "s1", + "transcript_path": "/tmp/t.jsonl", + "tool_name": "Read", + "tool_input": {"file_path": "/x.py"}, + "tool_response": {"success": True, "content": "file content"}, + } + event = adapter.parse(payload, working_directory="/repo") + + assert event.event_type == AbstractEventType.TOOL_CALL_COMPLETED + assert event.tool_call is not None + assert event.tool_call.output == {"success": True, "content": "file content"} + + def test_parse_post_tool_use__str_response_preserved(self): + """PostToolUse output for Bash is a stdout string — preserved verbatim.""" + adapter = ClaudeCodeAdapter() + payload = { + "session_id": "s1", + "transcript_path": "/tmp/t.jsonl", + "tool_name": "Bash", + "tool_input": {"command": "ls"}, + "tool_response": "file1.txt\nfile2.txt\n", + } + event = adapter.parse(payload, working_directory="/repo") + + assert event.tool_call is not None + assert event.tool_call.output == "file1.txt\nfile2.txt\n" + + def test_parse_post_tool_use__list_response_preserves_cells(self): + """PostToolUse output for NotebookRead is a list of cells — preserved verbatim.""" + adapter = ClaudeCodeAdapter() + cells = [ + {"cellType": "markdown", "id": "c1", "source": "# Test"}, + {"cellType": "code", "id": "c2", "source": "print('hello')", "language": "python", "outputs": []}, + ] + payload = { + "session_id": "s1", + "transcript_path": "/tmp/t.jsonl", + "tool_name": "NotebookRead", + "tool_input": {"notebook_path": "/x.ipynb"}, + "tool_response": cells, + } + event = adapter.parse(payload, working_directory="/repo") + + assert event.tool_call is not None + assert event.tool_call.output == cells + assert isinstance(event.tool_call.output, list) + assert event.tool_call.output[0]["cellType"] == "markdown" + assert event.tool_call.output[1]["cellType"] == "code" + + def test_parse_post_tool_use__empty_list_response_preserved(self): + """Empty NotebookRead (no cells) must not crash.""" + adapter = ClaudeCodeAdapter() + payload = { + "session_id": "s1", + "transcript_path": "/tmp/t.jsonl", + "tool_name": "NotebookRead", + "tool_input": {"notebook_path": "/empty.ipynb"}, + "tool_response": [], + } + event = adapter.parse(payload, working_directory="/repo") + assert event.tool_call is not None + assert event.tool_call.output == [] + + def test_parse_post_tool_use__dict_response_extracts_duration_exit_code_error(self): + """Dict tool_response carries Bash-style metadata in three sibling fields.""" + adapter = ClaudeCodeAdapter() + payload = { + "session_id": "s1", + "transcript_path": "/tmp/t.jsonl", + "tool_name": "Bash", + "tool_input": {"command": "ls"}, + "tool_response": { + "interrupted": False, + "duration_ms": 123, + "exit_code": 0, + }, + } + event = adapter.parse(payload, working_directory="/repo") + assert event.tool_call is not None + assert event.tool_call.duration_ms == 123 + assert event.tool_call.exit_code == 0 + assert event.tool_call.error_message is None + + def test_parse_post_tool_use__dict_response_extracts_error_message(self): + """Error field on tool_response maps to error_message.""" + adapter = ClaudeCodeAdapter() + payload = { + "session_id": "s1", + "transcript_path": "/tmp/t.jsonl", + "tool_name": "Bash", + "tool_input": {"command": "false"}, + "tool_response": { + "interrupted": False, + "duration_ms": 5, + "exit_code": 1, + "error": "command failed", + }, + } + event = adapter.parse(payload, working_directory="/repo") + assert event.tool_call is not None + assert event.tool_call.exit_code == 1 + assert event.tool_call.error_message == "command failed" + + def test_parse_post_tool_use__str_response_has_no_duration_or_exit_code(self): + """Str tool_response (older Claude Code format) carries no duration/exit_code.""" + adapter = ClaudeCodeAdapter() + payload = { + "session_id": "s1", + "transcript_path": "/tmp/t.jsonl", + "tool_name": "Bash", + "tool_input": {"command": "ls"}, + "tool_response": "file1.txt\nfile2.txt", + } + event = adapter.parse(payload, working_directory="/repo") + assert event.tool_call is not None + assert event.tool_call.duration_ms is None + assert event.tool_call.exit_code is None + assert event.tool_call.error_message is None + + +class TestParseNotification: + """Tests for parse() with NOTIFICATION payloads.""" + + def test_parse_notification__returns_event_with_no_tool_call(self): + """Notifications have no tool_call — only metadata carries the message.""" + adapter = ClaudeCodeAdapter() + payload = { + "session_id": "s1", + "transcript_path": "/tmp/t.jsonl", + "message": "Test notification", + "title": "Test Title", + } + event = adapter.parse(payload, working_directory="/repo") + + assert event.event_type == AbstractEventType.NOTIFICATION + assert event.tool_call is None + assert event.metadata == dict(payload) + + +class TestParseStopEvents: + """Tests for parse() with Stop and SubagentStop payloads.""" + + def test_parse_stop_with_stop_hook_active_false__returns_turn_completed(self): + """Top-level Stop (stop_hook_active=False) -> TURN_COMPLETED, no tool_call.""" + adapter = ClaudeCodeAdapter() + payload = { + "session_id": "s1", + "transcript_path": "/tmp/t.jsonl", + "stop_hook_active": False, + } + event = adapter.parse(payload, working_directory="/repo") + + assert event.event_type == AbstractEventType.TURN_COMPLETED + assert event.tool_call is None + + def test_parse_stop_with_stop_hook_active_true__returns_subagent_completed(self): + """Subagent Stop (stop_hook_active=True) -> SUBAGENT_COMPLETED, no tool_call.""" + adapter = ClaudeCodeAdapter() + payload = { + "session_id": "child-1", + "transcript_path": "/tmp/t.jsonl", + "stop_hook_active": True, + } + event = adapter.parse(payload, working_directory="/repo") + + assert event.event_type == AbstractEventType.SUBAGENT_COMPLETED + assert event.tool_call is None + + def test_parse_stop_without_stop_hook_active__returns_turn_completed(self): + """Legacy Stop shape (session_id+transcript_path, no stop_hook_active) -> TURN_COMPLETED.""" + adapter = ClaudeCodeAdapter() + payload = {"session_id": "s1", "transcript_path": "/tmp/t.jsonl"} + event = adapter.parse(payload, working_directory="/repo") + + assert event.event_type == AbstractEventType.TURN_COMPLETED + assert event.tool_call is None + + +class TestParseTranscriptLocationExtraction: + """Tests for transcript_location extraction from the wire payload.""" + + def test_parse__transcript_location_is_transcript_path_field(self): + """transcript_path maps to transcript_location — same value, canonical name.""" + adapter = ClaudeCodeAdapter() + payload = { + "session_id": "s1", + "transcript_path": "/tmp/transcripts/s1.jsonl", + "tool_name": "Bash", + "tool_input": {"command": "ls"}, + } + event = adapter.parse(payload, working_directory="/repo") + assert event.transcript_location == "/tmp/transcripts/s1.jsonl" + + def test_parse__transcript_location_is_none_when_transcript_path_omitted(self): + """transcript_location is optional — None when not present in the payload.""" + adapter = ClaudeCodeAdapter() + payload = { + "session_id": "s1", + "tool_name": "Bash", + "tool_input": {"command": "ls"}, + } + event = adapter.parse(payload, working_directory="/repo") + assert event.transcript_location is None + + +class TestParseEventTypeOverride: + """Tests for event_type_override — bypasses detect_event_type.""" + + def test_parse_event_type_override__skips_shape_detection(self): + """override forces a specific event type even if shape suggests otherwise.""" + adapter = ClaudeCodeAdapter() + payload = {"session_id": "s1"} # bare session_id would be ambiguous + event = adapter.parse( + payload, + working_directory="/repo", + event_type_override=AbstractEventType.NOTIFICATION, + ) + assert event.event_type == AbstractEventType.NOTIFICATION + + +class TestToolNameMapping: + """Tests for the Claude-Code tool_name -> tool_type category mapping.""" + + def test_resolve_tool_type__maps_known_pascal_case(self): + """Known names map to their PascalCase category.""" + assert resolve_tool_type("Bash") == "Bash" + assert resolve_tool_type("Read") == "Read" + assert resolve_tool_type("Write") == "Write" + assert resolve_tool_type("Edit") == "Edit" + + def test_resolve_tool_type__case_insensitive(self): + """OpenCode sends lowercase names — resolution is case-insensitive.""" + assert resolve_tool_type("bash") == "Bash" + assert resolve_tool_type("read") == "Read" + assert resolve_tool_type("edit") == "Edit" + + def test_resolve_tool_type__unknown_lowercase_returns_other(self): + """Unknown lowercase names fall back to 'Other'.""" + assert resolve_tool_type("SomeFutureTool") == ToolType.OTHER.value + + def test_resolve_tool_type__unknown_mcp_prefix_returns_mcp_other(self): + """Unknown mcp__-prefixed names fall back to 'mcp__other' (MCP namespace).""" + assert resolve_tool_type("mcp__unknown__thing") == ToolType.MCP_OTHER.value + + def test_resolve_tool_type__maps_mcp_ide_names(self): + """Known MCP IDE tool names map to their specific categories.""" + assert resolve_tool_type("mcp__ide__getDiagnostics") == "mcp__ide__getDiagnostics" + assert resolve_tool_type("mcp__ide__executeCode") == "mcp__ide__executeCode" + + def test_parse__tool_type_is_populated_from_tool_name(self): + """End-to-end: tool_name='Bash' yields tool_type='Bash' on the event.""" + adapter = ClaudeCodeAdapter() + event = adapter.parse( + { + "session_id": "s1", + "tool_name": "Bash", + "tool_input": {"command": "ls"}, + }, + working_directory="/repo", + ) + assert event.tool_call is not None + assert event.tool_call.tool_type == "Bash" + + +class TestAdapterSource: + """Tests for the adapter's source identity.""" + + def test_adapter_source__is_claude_code(self): + """Every ClaudeCodeAdapter instance advertises source=claude_code.""" + assert ClaudeCodeAdapter().source == AbstractEventSource.CLAUDE_CODE diff --git a/tests/test_current_impact_service.py b/tests/test_current_impact_service.py index d07a404..66979e5 100644 --- a/tests/test_current_impact_service.py +++ b/tests/test_current_impact_service.py @@ -53,10 +53,9 @@ def real_baseline(self): return RepoBaseline( repository_path=str(source_repo), - last_commit_hash="HEAD", - analysis_timestamp=datetime.now(), - current_metrics=metrics, head_commit_sha="HEAD", + computed_at=datetime.now(), + current_metrics=metrics, total_commits_analyzed=1, cc_delta_stats=dummy_stats, effort_delta_stats=dummy_stats, @@ -352,10 +351,9 @@ def test_from_analysis__maps_all_fields(self): ) baseline = RepoBaseline( repository_path="/tmp/repo", - last_commit_hash="abc123", - analysis_timestamp=datetime.now(), - current_metrics=metrics, head_commit_sha="abc123", + computed_at=datetime.now(), + current_metrics=metrics, total_commits_analyzed=10, cc_delta_stats=dummy_stats, effort_delta_stats=dummy_stats, @@ -424,10 +422,9 @@ def test_from_analysis__serializes_to_json(self): ) baseline = RepoBaseline( repository_path="/tmp/repo", - last_commit_hash="abc123", - analysis_timestamp=datetime.now(), - current_metrics=metrics, head_commit_sha="abc123", + computed_at=datetime.now(), + current_metrics=metrics, total_commits_analyzed=10, cc_delta_stats=dummy_stats, effort_delta_stats=dummy_stats, diff --git a/tests/test_database.py b/tests/test_database.py index fd3d6e1..890d632 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -6,8 +6,15 @@ from slopometry.core.database import EventDatabase from slopometry.core.models.display import LeaderboardEntry, SessionDisplayData -from slopometry.core.models.hook import HookEvent, HookEventType, Project, ProjectSource, ToolType +from slopometry.core.models.hook import Project, ProjectSource +from slopometry.core.models.protocol.events import ( + AbstractEventSource, + AbstractEventType, + AbstractHookEvent, + ToolCallPayload, +) from slopometry.core.models.user_story import UserStoryEntry +from slopometry.core.protocol.adapters.claude_code import ToolType def test_user_story_export_functionality() -> None: @@ -242,37 +249,49 @@ def test_list_sessions_by_repository__filters_correctly() -> None: # Session 1 - in repo A db.save_event( - HookEvent( + AbstractHookEvent( session_id="session-repo-a", - event_type=HookEventType.PRE_TOOL_USE, + event_type=AbstractEventType.TOOL_CALL_STARTED, + source=AbstractEventSource.CLAUDE_CODE, sequence_number=1, working_directory="/path/to/repo-a", - tool_name="Read", - tool_type=ToolType.READ, + tool_call=ToolCallPayload( + tool_name="Read", + tool_type=ToolType.READ.value, + input={}, + ), ) ) # Session 2 - in repo B db.save_event( - HookEvent( + AbstractHookEvent( session_id="session-repo-b", - event_type=HookEventType.PRE_TOOL_USE, + event_type=AbstractEventType.TOOL_CALL_STARTED, + source=AbstractEventSource.CLAUDE_CODE, sequence_number=1, working_directory="/path/to/repo-b", - tool_name="Read", - tool_type=ToolType.READ, + tool_call=ToolCallPayload( + tool_name="Read", + tool_type=ToolType.READ.value, + input={}, + ), ) ) # Session 3 - also in repo A db.save_event( - HookEvent( + AbstractHookEvent( session_id="session-repo-a-2", - event_type=HookEventType.PRE_TOOL_USE, + event_type=AbstractEventType.TOOL_CALL_STARTED, + source=AbstractEventSource.CLAUDE_CODE, sequence_number=1, working_directory="/path/to/repo-a", - tool_name="Write", - tool_type=ToolType.WRITE, + tool_call=ToolCallPayload( + tool_name="Write", + tool_type=ToolType.WRITE.value, + input={}, + ), ) ) @@ -291,13 +310,17 @@ def test_list_sessions_by_repository__returns_empty_for_unknown_repo() -> None: # Create a session in a known repo db.save_event( - HookEvent( + AbstractHookEvent( session_id="session-known", - event_type=HookEventType.PRE_TOOL_USE, + event_type=AbstractEventType.TOOL_CALL_STARTED, + source=AbstractEventSource.CLAUDE_CODE, sequence_number=1, working_directory="/path/to/known-repo", - tool_name="Read", - tool_type=ToolType.READ, + tool_call=ToolCallPayload( + tool_name="Read", + tool_type=ToolType.READ.value, + input={}, + ), ) ) @@ -314,13 +337,17 @@ def test_list_sessions_by_repository__respects_limit() -> None: # Create 3 sessions in the same repo for i in range(3): db.save_event( - HookEvent( + AbstractHookEvent( session_id=f"session-{i}", - event_type=HookEventType.PRE_TOOL_USE, + event_type=AbstractEventType.TOOL_CALL_STARTED, + source=AbstractEventSource.CLAUDE_CODE, sequence_number=1, working_directory="/path/to/repo", - tool_name="Read", - tool_type=ToolType.READ, + tool_call=ToolCallPayload( + tool_name="Read", + tool_type=ToolType.READ.value, + input={}, + ), ) ) @@ -335,23 +362,31 @@ def test_get_session_basic_info__returns_minimal_info() -> None: db = EventDatabase(db_path=Path(tmp_dir) / "test.db") db.save_event( - HookEvent( + AbstractHookEvent( session_id="test-session", - event_type=HookEventType.PRE_TOOL_USE, + event_type=AbstractEventType.TOOL_CALL_STARTED, + source=AbstractEventSource.CLAUDE_CODE, sequence_number=1, working_directory="/path/to/repo", - tool_name="Read", - tool_type=ToolType.READ, + tool_call=ToolCallPayload( + tool_name="Read", + tool_type=ToolType.READ.value, + input={}, + ), ) ) db.save_event( - HookEvent( + AbstractHookEvent( session_id="test-session", - event_type=HookEventType.POST_TOOL_USE, + event_type=AbstractEventType.TOOL_CALL_COMPLETED, + source=AbstractEventSource.CLAUDE_CODE, sequence_number=2, working_directory="/path/to/repo", - tool_name="Write", - tool_type=ToolType.WRITE, + tool_call=ToolCallPayload( + tool_name="Write", + tool_type=ToolType.WRITE.value, + input={}, + ), ) ) @@ -379,23 +414,31 @@ def test_get_session_working_directory__returns_first_event_working_dir() -> Non db = EventDatabase(db_path=Path(tmp_dir) / "test.db") db.save_event( - HookEvent( + AbstractHookEvent( session_id="test-wd-session", - event_type=HookEventType.PRE_TOOL_USE, + event_type=AbstractEventType.TOOL_CALL_STARTED, + source=AbstractEventSource.CLAUDE_CODE, sequence_number=1, working_directory="/first/working/dir", - tool_name="Read", - tool_type=ToolType.READ, + tool_call=ToolCallPayload( + tool_name="Read", + tool_type=ToolType.READ.value, + input={}, + ), ) ) db.save_event( - HookEvent( + AbstractHookEvent( session_id="test-wd-session", - event_type=HookEventType.POST_TOOL_USE, + event_type=AbstractEventType.TOOL_CALL_COMPLETED, + source=AbstractEventSource.CLAUDE_CODE, sequence_number=2, working_directory="/second/working/dir", - tool_name="Write", - tool_type=ToolType.WRITE, + tool_call=ToolCallPayload( + tool_name="Write", + tool_type=ToolType.WRITE.value, + input={}, + ), ) ) @@ -420,15 +463,21 @@ def test_get_sessions_summary__returns_session_display_data() -> None: db = EventDatabase(db_path=Path(tmp_dir) / "test.db") for i in range(3): - event = HookEvent( + tool_name = "bash" if i < 2 else "read" + tool_type = ToolType.BASH if i < 2 else ToolType.READ + event = AbstractHookEvent( session_id="sess-abc", - event_type=HookEventType.PRE_TOOL_USE, + event_type=AbstractEventType.TOOL_CALL_STARTED, + source=AbstractEventSource.CLAUDE_CODE, timestamp=datetime(2025, 1, 1, 10, i), sequence_number=i + 1, working_directory="/repo", project=Project(name="my-project", source=ProjectSource.GIT), - tool_name="bash" if i < 2 else "read", - tool_type=ToolType.BASH if i < 2 else ToolType.READ, + tool_call=ToolCallPayload( + tool_name=tool_name, + tool_type=tool_type.value, + input={}, + ), ) db.save_event(event) @@ -449,14 +498,13 @@ def test_get_sessions_summary__handles_null_tool_type() -> None: with tempfile.TemporaryDirectory() as tmp_dir: db = EventDatabase(db_path=Path(tmp_dir) / "test.db") - event = HookEvent( + event = AbstractHookEvent( session_id="sess-null", - event_type=HookEventType.NOTIFICATION, + event_type=AbstractEventType.NOTIFICATION, + source=AbstractEventSource.CLAUDE_CODE, timestamp=datetime(2025, 1, 1, 12, 0), sequence_number=1, working_directory="/repo", - tool_name=None, - tool_type=None, ) db.save_event(event) @@ -472,14 +520,18 @@ def test_get_sessions_summary__respects_limit() -> None: db = EventDatabase(db_path=Path(tmp_dir) / "test.db") for i in range(5): - event = HookEvent( + event = AbstractHookEvent( session_id=f"sess-{i:03d}", - event_type=HookEventType.PRE_TOOL_USE, + event_type=AbstractEventType.TOOL_CALL_STARTED, + source=AbstractEventSource.CLAUDE_CODE, timestamp=datetime(2025, 1, 1, 10 + i, 0), sequence_number=1, working_directory="/repo", - tool_name="bash", - tool_type=ToolType.BASH, + tool_call=ToolCallPayload( + tool_name="bash", + tool_type=ToolType.BASH.value, + input={}, + ), ) db.save_event(event) diff --git a/tests/test_embedding_service.py b/tests/test_embedding_service.py new file mode 100644 index 0000000..6d81478 --- /dev/null +++ b/tests/test_embedding_service.py @@ -0,0 +1,155 @@ +"""Test embedding service.""" + +from unittest.mock import MagicMock, patch + +import pytest + +from slopometry.solo.services.embedding_service import EmbeddingService + + +def test_compute_similarity_same_vector() -> None: + """Similarity of vector to itself should be 1.0.""" + service = EmbeddingService( + endpoint="http://localhost:11434/v1", + model="embedding-model", + api_key="test-key", + ) + + vector = [0.1, 0.2, 0.3, 0.4] + similarity = service.compute_similarity(vector, vector) + + assert similarity == pytest.approx(1.0) + + +def test_compute_similarity_different_vectors() -> None: + """Different vectors should have similarity less than 1.0.""" + service = EmbeddingService( + endpoint="http://localhost:11434/v1", + model="embedding-model", + api_key="test-key", + ) + + vector1 = [0.1, 0.2, 0.3, 0.4] + vector2 = [0.4, 0.3, 0.2, 0.1] + similarity = service.compute_similarity(vector1, vector2) + + assert similarity < 1.0 + assert similarity > -1.0 + + +def test_compute_similarity_zero_magnitude() -> None: + """Should handle zero vectors gracefully.""" + service = EmbeddingService( + endpoint="http://localhost:11434/v1", + model="embedding-model", + api_key="test-key", + ) + + vector = [0.0, 0.0, 0.0, 0.0] + other = [0.1, 0.2, 0.3, 0.4] + + result = service.compute_similarity(vector, other) + assert result == 0.0 + + result2 = service.compute_similarity(other, vector) + assert result2 == 0.0 + + +def test_compute_uniqueness_score_no_existing() -> None: + """Should return 1.0 when no existing embeddings.""" + service = EmbeddingService( + endpoint="http://localhost:11434/v1", + model="embedding-model", + api_key="test-key", + ) + + vector = [0.1, 0.2, 0.3, 0.4] + score = service.compute_uniqueness_score(vector, []) + + assert score == 1.0 + + +def test_compute_uniqueness_score_with_existing() -> None: + """Should return lower score when similar embeddings exist.""" + service = EmbeddingService( + endpoint="http://localhost:11434/v1", + model="embedding-model", + api_key="test-key", + ) + + new_vector = [1.0, 0.0, 0.0, 0.0] + existing = [ + [0.5, 0.5, 0.5, 0.5], + [0.6, 0.4, 0.4, 0.4], + ] + + score = service.compute_uniqueness_score(new_vector, existing) + + assert score < 1.0 + assert score >= 0.0 + + +def test_get_embedding_raises_on_failure() -> None: + """Should raise RuntimeError on API failure.""" + service = EmbeddingService( + endpoint="http://localhost:11434/v1", + model="embedding-model", + api_key="test-key", + ) + + with patch("openai.OpenAI") as mock_openai: + mock_client = MagicMock() + mock_openai.return_value = mock_client + mock_client.embeddings.create.side_effect = Exception("Connection refused") + + with pytest.raises(RuntimeError, match="Failed to get embedding"): + service.get_embedding("test text") + + +def test_get_embedding_success() -> None: + """Test successful embedding retrieval.""" + service = EmbeddingService( + endpoint="http://localhost:11434/v1", + model="embedding-model", + api_key="test-key", + ) + + mock_response = MagicMock() + mock_response.data = [MagicMock(embedding=[0.1, 0.2, 0.3])] + + with patch("openai.OpenAI") as mock_openai: + mock_client = MagicMock() + mock_openai.return_value = mock_client + mock_client.embeddings.create.return_value = mock_response + + result = service.get_embedding("test text") + + assert result == [0.1, 0.2, 0.3] + mock_client.embeddings.create.assert_called_once_with( + model="embedding-model", + input="test text", + ) + + +def test_get_embedding_raises_on_missing_package() -> None: + """Should raise RuntimeError if openai package is not installed.""" + service = EmbeddingService( + endpoint="http://localhost:11434/v1", + model="embedding-model", + api_key="test-key", + ) + + original_import = __builtins__["__import__"] + + def failing_import(name, *args, **kwargs): + if name == "openai" or name.startswith("openai."): + raise ImportError(f"No module named '{name}'") + return original_import(name, *args, **kwargs) + + try: + with patch("builtins.__import__", side_effect=failing_import): + with pytest.raises(RuntimeError, match="openai package required"): + service.get_embedding("test text") + finally: + pass + diff --git a/tests/test_git_tracker.py b/tests/test_git_tracker.py index a3ee01f..80013c5 100644 --- a/tests/test_git_tracker.py +++ b/tests/test_git_tracker.py @@ -371,6 +371,7 @@ def test_extract_files_from_commit_ctx__cleans_up_on_exception(git_repo): raise ValueError("test error") # After exception, temp dir should still be cleaned up + assert temp_dir_path is not None assert not temp_dir_path.exists() diff --git a/tests/test_hook_handler.py b/tests/test_hook_handler.py index a99f9b4..e8aa9ce 100644 --- a/tests/test_hook_handler.py +++ b/tests/test_hook_handler.py @@ -1,4 +1,10 @@ -"""Tests for hook handler functionality.""" +"""Tests for hook handler functionality — feedback pipeline, smoke tests, working-tree probes. + +Pure handler-level tests. Detailed wire-format parsing/detection lives in +`test_claude_code_adapter.py` since the wire-validation models (PreToolUseInput, +PostToolUseInput, NotificationInput, StopInput, SubagentStopInput) have been +replaced by `ClaudeCodeAdapter.parse()` / `ClaudeCodeAdapter.detect_event_type()`. +""" import json import subprocess @@ -14,181 +20,22 @@ _has_analyzable_source_files, _has_source_changes, _resolve_working_directory, - detect_event_type_from_parsed, extract_dev_guidelines_from_claude_md, format_code_smell_feedback, format_context_coverage_feedback, handle_hook, handle_stop_event, - parse_hook_input, scope_smells_for_session, ) from slopometry.core.models.baseline import ImpactAssessment, ImpactCategory, ZScoreInterpretation from slopometry.core.models.complexity import ComplexityDelta, ExtendedComplexityMetrics -from slopometry.core.models.hook import ( - FeedbackCacheState, - HookEventType, - NotificationInput, - PostToolUseInput, - PreToolUseInput, - StopInput, - SubagentStopInput, -) +from slopometry.core.models.hook import FeedbackCacheState +from slopometry.core.models.protocol.events import AbstractEventType from slopometry.core.models.session import ContextCoverage, FileCoverageStatus from slopometry.core.models.smell import SmellField from slopometry.display.formatters import _interpret_z_score -class TestEventTypeDetection: - """Test the pattern match logic for detecting event types.""" - - def test_pre_tool_use_input_detection(self): - """Test that PreToolUseInput maps to PRE_TOOL_USE event type.""" - input_data = PreToolUseInput( - session_id="test-session", - transcript_path="/tmp/test.jsonl", - tool_name="Bash", - tool_input={"command": "ls"}, - ) - - result = detect_event_type_from_parsed(input_data) - - assert result == HookEventType.PRE_TOOL_USE - - def test_post_tool_use_input_detection(self): - """Test that PostToolUseInput maps to POST_TOOL_USE event type.""" - input_data = PostToolUseInput( - session_id="test-session", - transcript_path="/tmp/test.jsonl", - tool_name="Bash", - tool_input={"command": "ls"}, - tool_response={"success": True}, - ) - - result = detect_event_type_from_parsed(input_data) - - assert result == HookEventType.POST_TOOL_USE - - def test_notification_input_detection(self): - """Test that NotificationInput maps to NOTIFICATION event type.""" - input_data = NotificationInput( - session_id="test-session", - transcript_path="/tmp/test.jsonl", - message="Test notification", - title="Test Title", - ) - - result = detect_event_type_from_parsed(input_data) - - assert result == HookEventType.NOTIFICATION - - def test_stop_input_detection(self): - """Test that StopInput maps to STOP event type.""" - input_data = StopInput( - session_id="test-session", - transcript_path="/tmp/test.jsonl", - stop_hook_active=True, - ) - - result = detect_event_type_from_parsed(input_data) - - assert result == HookEventType.STOP - - def test_subagent_stop_input_detection(self): - """Test that SubagentStopInput maps to SUBAGENT_STOP event type.""" - input_data = SubagentStopInput( - session_id="test-session", - transcript_path="/tmp/test.jsonl", - stop_hook_active=True, - ) - - result = detect_event_type_from_parsed(input_data) - - assert result == HookEventType.SUBAGENT_STOP - - def test_all_input_types_are_handled(self): - """Test that all defined input types have corresponding pattern matches. - - This test ensures we don't forget to update the pattern match when adding new input types. - """ - input_types = [ - PreToolUseInput( - session_id="test", - transcript_path="/tmp/test.jsonl", - tool_name="Test", - ), - PostToolUseInput( - session_id="test", - transcript_path="/tmp/test.jsonl", - tool_name="Test", - tool_response="success", - ), - NotificationInput( - session_id="test", - transcript_path="/tmp/test.jsonl", - message="test", - ), - StopInput( - session_id="test", - transcript_path="/tmp/test.jsonl", - ), - SubagentStopInput( - session_id="test", - transcript_path="/tmp/test.jsonl", - ), - ] - - expected_types = [ - HookEventType.PRE_TOOL_USE, - HookEventType.POST_TOOL_USE, - HookEventType.NOTIFICATION, - HookEventType.STOP, - HookEventType.SUBAGENT_STOP, - ] - - for input_data, expected_type in zip(input_types, expected_types): - result = detect_event_type_from_parsed(input_data) - assert result == expected_type, f"Input {type(input_data).__name__} should map to {expected_type}" - - -def test_parse_hook_input__stop_hook_active_true_returns_subagent_stop(): - """Test that stop_hook_active=true is parsed as SubagentStopInput.""" - raw_data = { - "session_id": "test-session", - "transcript_path": "/tmp/test.jsonl", - "stop_hook_active": True, - } - - result = parse_hook_input(raw_data) - - assert isinstance(result, SubagentStopInput) - - -def test_parse_hook_input__stop_hook_active_false_returns_stop(): - """Test that stop_hook_active=false is parsed as StopInput.""" - raw_data = { - "session_id": "test-session", - "transcript_path": "/tmp/test.jsonl", - "stop_hook_active": False, - } - - result = parse_hook_input(raw_data) - - assert isinstance(result, StopInput) - - -def test_parse_hook_input__stop_hook_active_omitted_returns_stop(): - """Test that missing stop_hook_active is parsed as StopInput.""" - raw_data = { - "session_id": "test-session", - "transcript_path": "/tmp/test.jsonl", - } - - result = parse_hook_input(raw_data) - - assert isinstance(result, StopInput) - - class TestExtractDevGuidelines: """Tests for extracting dev guidelines from CLAUDE.md.""" @@ -234,20 +81,20 @@ class TestFormatCodeSmellFeedback: def _make_metrics(self, **kwargs) -> ExtendedComplexityMetrics: """Create metrics with sensible defaults.""" - defaults = dict( - total_complexity=0, - average_complexity=0, - total_volume=0, - total_effort=0, - total_difficulty=0, - average_volume=0, - average_effort=0, - average_difficulty=0, - total_mi=0, - average_mi=0, - ) + defaults: dict[str, object] = { + "total_complexity": 0, + "average_complexity": 0.0, + "total_volume": 0.0, + "total_effort": 0.0, + "total_difficulty": 0.0, + "average_volume": 0.0, + "average_effort": 0.0, + "average_difficulty": 0.0, + "total_mi": 0.0, + "average_mi": 0.0, + } defaults.update(kwargs) - return ExtendedComplexityMetrics(**defaults) + return ExtendedComplexityMetrics.model_validate(defaults) def test_format_code_smell_feedback__returns_empty_when_no_smells(self): """Test returns empty when no smells detected.""" @@ -574,20 +421,20 @@ class TestScopeSmellsForSession: def _make_metrics(self, **kwargs) -> ExtendedComplexityMetrics: """Create metrics with sensible defaults.""" - defaults = dict( - total_complexity=0, - average_complexity=0, - total_volume=0, - total_effort=0, - total_difficulty=0, - average_volume=0, - average_effort=0, - average_difficulty=0, - total_mi=0, - average_mi=0, - ) + defaults: dict[str, object] = { + "total_complexity": 0, + "average_complexity": 0.0, + "total_volume": 0.0, + "total_effort": 0.0, + "total_difficulty": 0.0, + "average_volume": 0.0, + "average_effort": 0.0, + "average_difficulty": 0.0, + "total_mi": 0.0, + "average_mi": 0.0, + } defaults.update(kwargs) - return ExtendedComplexityMetrics(**defaults) + return ExtendedComplexityMetrics.model_validate(defaults) def test_scope_smells_for_session__returns_empty_when_no_smells(self): """Test returns empty list when metrics have no smells.""" @@ -991,8 +838,8 @@ def _isolate_db(self, tmp_path): original_init = SessionManager.__init__ - def _isolated_init(self_inner): - original_init(self_inner) + def _isolated_init(self_inner, source: str, state_root=None): + original_init(self_inner, source, state_root=state_dir) self_inner.state_dir = state_dir with ( @@ -1034,7 +881,7 @@ def test_handle_hook__pre_tool_use_does_not_crash(self): } with patch("slopometry.core.hook_handler._read_stdin_with_timeout", return_value=json.dumps(input_data)): - result = handle_hook(event_type_override=HookEventType.PRE_TOOL_USE) + result = handle_hook(event_type_override=AbstractEventType.TOOL_CALL_STARTED) assert result == 0 @@ -1050,7 +897,7 @@ def test_handle_hook__post_tool_use_does_not_crash(self): } with patch("slopometry.core.hook_handler._read_stdin_with_timeout", return_value=json.dumps(input_data)): - result = handle_hook(event_type_override=HookEventType.POST_TOOL_USE) + result = handle_hook(event_type_override=AbstractEventType.TOOL_CALL_COMPLETED) assert result == 0 @@ -1064,7 +911,7 @@ def test_handle_hook__notification_does_not_crash(self): } with patch("slopometry.core.hook_handler._read_stdin_with_timeout", return_value=json.dumps(input_data)): - result = handle_hook(event_type_override=HookEventType.NOTIFICATION) + result = handle_hook(event_type_override=AbstractEventType.NOTIFICATION) assert result == 0 @@ -1088,7 +935,7 @@ def test_handle_hook__stop_does_not_crash(self): patch("slopometry.core.hook_handler._read_stdin_with_timeout", return_value=json.dumps(input_data)), patch("os.getcwd", return_value=str(tmppath)), ): - result = handle_hook(event_type_override=HookEventType.STOP) + result = handle_hook(event_type_override=AbstractEventType.TURN_COMPLETED) # Stop hook returns 0 (no feedback) or 2 (with feedback) - both are valid assert result in (0, 2) @@ -1103,7 +950,7 @@ def test_handle_hook__subagent_stop_does_not_crash(self): } with patch("slopometry.core.hook_handler._read_stdin_with_timeout", return_value=json.dumps(input_data)): - result = handle_hook(event_type_override=HookEventType.STOP) + result = handle_hook(event_type_override=AbstractEventType.TURN_COMPLETED) # Subagent stops should return 0 (no feedback for subagents) assert result == 0 @@ -1206,27 +1053,28 @@ def test_has_analyzable_source_files__ignores_submodule_only_sources(self, tmp_p class TestHandleStopEventEarlyExits: """Tests for handle_stop_event early exit paths.""" - def test_handle_stop_event__returns_zero_when_stop_hook_active(self): - """Subagent stops (stop_hook_active=True) should exit immediately.""" - parsed = SubagentStopInput( - session_id="test", - transcript_path="/tmp/t.jsonl", - stop_hook_active=True, - ) - assert handle_stop_event("test", parsed) == 0 + def test_handle_stop_event__returns_zero_when_no_session_data(self): + """Subagent stops (stop_hook_active=True) and any other stop event with no DB data exit 0. + + Original test passed SubagentStopInput; legacy early-exit is now folded into + handle_stop_event's "no working_directory" path. The semantic guarantee + preserved: when there's nothing to analyze, return 0 without expensive work. + """ + with patch("slopometry.core.hook_handler.EventDatabase") as mock_db_cls: + mock_db = mock_db_cls.return_value + mock_db.get_session_working_directory.return_value = None + + assert handle_stop_event("test") == 0 + mock_db.get_session_working_directory.assert_called_once_with("test") + mock_db.get_session_statistics.assert_not_called() def test_handle_stop_event__returns_zero_when_no_working_directory(self): """Returns 0 when session has no events (no working directory found).""" - parsed = StopInput( - session_id="nonexistent-session-xyz", - transcript_path="/tmp/t.jsonl", - stop_hook_active=False, - ) with patch("slopometry.core.hook_handler.EventDatabase") as mock_db_cls: mock_db = mock_db_cls.return_value mock_db.get_session_working_directory.return_value = None - assert handle_stop_event("nonexistent-session-xyz", parsed) == 0 + assert handle_stop_event("nonexistent-session-xyz") == 0 mock_db.get_session_working_directory.assert_called_once_with("nonexistent-session-xyz") # get_session_statistics should NOT have been called mock_db.get_session_statistics.assert_not_called() @@ -1238,11 +1086,6 @@ def test_handle_stop_event__fast_path_cache_hit_skips_expensive_computation(self content key hashes every source file. The fast-path uses only a few git commands and bails before reading any file content. """ - parsed = StopInput( - session_id="test-fast-cache", - transcript_path="/tmp/t.jsonl", - stop_hook_active=False, - ) with ( patch("slopometry.core.hook_handler.EventDatabase") as mock_db_cls, patch("slopometry.core.hook_handler._load_feedback_cache") as mock_cache, @@ -1258,18 +1101,13 @@ def test_handle_stop_event__fast_path_cache_hit_skips_expensive_computation(self mock_sha.return_value = "abc123def" # Same commit mock_changes.return_value = False # No source delta (no mods, no new files) - assert handle_stop_event("test-fast-cache", parsed) == 0 + assert handle_stop_event("test-fast-cache") == 0 # The expensive full key computation should NOT have been called mock_full_key.assert_not_called() mock_db.get_session_statistics.assert_not_called() def test_handle_stop_event__falls_through_when_commit_sha_differs(self, tmp_path): """When commit SHA changed, fast-path doesn't match, falls to full check.""" - parsed = StopInput( - session_id="test-new-commit", - transcript_path="/tmp/t.jsonl", - stop_hook_active=False, - ) with ( patch("slopometry.core.hook_handler.EventDatabase") as mock_db_cls, patch("slopometry.core.hook_handler._load_feedback_cache") as mock_cache, @@ -1287,17 +1125,12 @@ def test_handle_stop_event__falls_through_when_commit_sha_differs(self, tmp_path # Make it bail at the source files check for simplicity mock_has_src.return_value = False - assert handle_stop_event("test-new-commit", parsed) == 0 + assert handle_stop_event("test-new-commit") == 0 # _has_source_changes should NOT be called (SHA mismatch short-circuits) mock_has_src.assert_called_once() def test_handle_stop_event__legacy_cache_without_commit_sha_falls_through(self, tmp_path): """Caches from before the commit_sha field skip the fast-path gracefully.""" - parsed = StopInput( - session_id="test-legacy-cache", - transcript_path="/tmp/t.jsonl", - stop_hook_active=False, - ) with ( patch("slopometry.core.hook_handler.EventDatabase") as mock_db_cls, patch("slopometry.core.hook_handler._load_feedback_cache") as mock_cache, @@ -1313,17 +1146,12 @@ def test_handle_stop_event__legacy_cache_without_commit_sha_falls_through(self, # Make it bail at source files check mock_has_src.return_value = False - assert handle_stop_event("test-legacy-cache", parsed) == 0 + assert handle_stop_event("test-legacy-cache") == 0 # Should fall through to _has_analyzable_source_files, not crash mock_has_src.assert_called_once() def test_handle_stop_event__full_cache_key_hit_after_fast_path_miss(self, tmp_path): """When fast-path misses (source modifications) but full key matches, still returns 0.""" - parsed = StopInput( - session_id="test-full-key-hit", - transcript_path="/tmp/t.jsonl", - stop_hook_active=False, - ) with ( patch("slopometry.core.hook_handler.EventDatabase") as mock_db_cls, patch("slopometry.core.hook_handler._load_feedback_cache") as mock_cache, @@ -1342,17 +1170,12 @@ def test_handle_stop_event__full_cache_key_hit_after_fast_path_miss(self, tmp_pa mock_has_src.return_value = True mock_full_key.return_value = "full_key_abc" # But full key matches - assert handle_stop_event("test-full-key-hit", parsed) == 0 + assert handle_stop_event("test-full-key-hit") == 0 mock_full_key.assert_called_once() mock_db.get_session_statistics.assert_not_called() def test_handle_stop_event__returns_zero_when_no_source_files(self, tmp_path): """Returns 0 without computing stats when repo has no analyzable source files.""" - parsed = StopInput( - session_id="test-no-source", - transcript_path="/tmp/t.jsonl", - stop_hook_active=False, - ) with ( patch("slopometry.core.hook_handler.EventDatabase") as mock_db_cls, patch("slopometry.core.hook_handler._load_feedback_cache") as mock_cache, @@ -1364,7 +1187,7 @@ def test_handle_stop_event__returns_zero_when_no_source_files(self, tmp_path): mock_cache.return_value = None # No cache (first run) mock_has_src.return_value = False # No Python/Rust files - assert handle_stop_event("test-no-source", parsed) == 0 + assert handle_stop_event("test-no-source") == 0 mock_db.get_session_statistics.assert_not_called() mock_has_src.assert_called_once_with(str(tmp_path)) diff --git a/tests/test_implementation_comparator.py b/tests/test_implementation_comparator.py index c075b1d..9f91fb0 100644 --- a/tests/test_implementation_comparator.py +++ b/tests/test_implementation_comparator.py @@ -11,6 +11,7 @@ from slopometry.core.models.baseline import QPEScore from slopometry.core.models.complexity import ExtendedComplexityMetrics +from slopometry.core.models.core import SmellCounts from slopometry.summoner.services.implementation_comparator import ( SubtreeExtractionError, _extract_subtree, @@ -163,14 +164,14 @@ def test_compare_subtrees__includes_smell_advantages(tmp_path: Path) -> None: mi_normalized=0.6, smell_penalty=0.2, adjusted_quality=0.5, - smell_counts={"swallowed_exception": 5}, + smell_counts=SmellCounts.model_validate({"swallowed_exception": 5}), ) qpe_b = QPEScore( qpe=0.7, mi_normalized=0.8, smell_penalty=0.05, adjusted_quality=0.7, - smell_counts={"swallowed_exception": 1}, + smell_counts=SmellCounts.model_validate({"swallowed_exception": 1}), ) with ( diff --git a/tests/test_list_sessions_performance.py b/tests/test_list_sessions_performance.py index a3767bd..5e8bba2 100644 --- a/tests/test_list_sessions_performance.py +++ b/tests/test_list_sessions_performance.py @@ -5,7 +5,8 @@ from tempfile import TemporaryDirectory from slopometry.core.database import EventDatabase -from slopometry.core.models.hook import HookEvent, HookEventType, Project, ProjectSource +from slopometry.core.models.hook import Project, ProjectSource +from slopometry.core.models.protocol.events import AbstractEventSource, AbstractEventType, AbstractHookEvent class TestListSessionsPerformance: @@ -25,9 +26,10 @@ def test_list_sessions__respects_limit_parameter(self): timestamp = base_time + timedelta(minutes=i) - event = HookEvent( + event = AbstractHookEvent( session_id=session_id, - event_type=HookEventType.PRE_TOOL_USE, + event_type=AbstractEventType.TOOL_CALL_STARTED, + source=AbstractEventSource.CLAUDE_CODE, timestamp=timestamp, sequence_number=1, working_directory="/test", @@ -57,9 +59,10 @@ def test_list_sessions__handles_no_limit_parameter(self): for i in range(3): session_id = f"session-{i}" - event = HookEvent( + event = AbstractHookEvent( session_id=session_id, - event_type=HookEventType.PRE_TOOL_USE, + event_type=AbstractEventType.TOOL_CALL_STARTED, + source=AbstractEventSource.CLAUDE_CODE, timestamp=datetime.now() + timedelta(minutes=i), sequence_number=1, working_directory="/test", diff --git a/tests/test_llm_integration.py b/tests/test_llm_integration.py index 9c74de9..d202ebe 100644 --- a/tests/test_llm_integration.py +++ b/tests/test_llm_integration.py @@ -1,4 +1,4 @@ -"""Integration tests for LLM agents. +"""Integration tests for the LLM agent. These tests make real API calls and require running LLM services. Skip by default - run with: SLOPOMETRY_RUN_INTEGRATION_TESTS=1 pytest tests/test_llm_integration.py -v @@ -19,31 +19,28 @@ @pytest.fixture -def agents(): - """Fixture providing the agents registry.""" - from slopometry.summoner.services.llm_wrapper import _get_agents +def agent(): + """Fixture providing the MiniMax-M3 agent.""" + from slopometry.summoner.services.llm_wrapper import get_agent - return _get_agents() + return get_agent() @skip_without_integration_flag -def test_gpt_oss_120b__returns_response_when_given_simple_prompt(agents): - """Test that gpt_oss_120b returns a response for a simple prompt.""" - agent = agents["gpt_oss_120b"] +def test_minimax_m3__returns_response_when_given_simple_prompt(agent): + """Test that MiniMax-M3 returns a response for a simple prompt.""" prompt = "What is 2 + 2? Reply with just the number." result = agent.run_sync(prompt) assert result is not None assert result.output is not None - assert len(result.output) > 0 assert "4" in result.output @skip_without_integration_flag -def test_gpt_oss_120b__handles_code_analysis_prompt(agents): - """Test that gpt_oss_120b can analyze a simple code diff.""" - agent = agents["gpt_oss_120b"] +def test_minimax_m3__handles_code_analysis_prompt(agent): + """Test that MiniMax-M3 can analyze a simple code diff.""" prompt = """Analyze this Python code change and describe what it does in one sentence: ```diff @@ -61,85 +58,6 @@ def test_gpt_oss_120b__handles_code_analysis_prompt(agents): @skip_without_integration_flag -def test_gemini__returns_response_when_given_simple_prompt(agents): - """Test that gemini agent returns a response.""" - agent = agents["gemini"] - prompt = "What is the capital of France? Reply with just the city name." - - result = agent.run_sync(prompt) - - assert result is not None - assert result.output is not None - assert "Paris" in result.output - - -@skip_without_integration_flag -def test_get_user_story_agent__returns_configured_agent(): - """Test that get_user_story_agent returns the agent configured in settings.""" - from slopometry.summoner.services.llm_wrapper import get_user_story_agent - - agent = get_user_story_agent() - - assert agent is not None - assert agent.name == settings.user_story_agent - - -@skip_without_integration_flag -def test_minimax__returns_response_when_given_simple_prompt(agents): - """Test that minimax agent returns a response for a simple prompt.""" - if "minimax" not in agents: - pytest.skip("minimax agent not configured") - - agent = agents["minimax"] - prompt = "What is 3 + 5? Reply with just the number." - - result = agent.run_sync(prompt) - - assert result is not None - assert result.output is not None - assert len(result.output) > 0 - assert "8" in result.output - - -@skip_without_integration_flag -def test_minimax__handles_code_analysis_prompt(agents): - """Test that minimax can analyze a simple code diff.""" - if "minimax" not in agents: - pytest.skip("minimax agent not configured") - - agent = agents["minimax"] - prompt = """Analyze this Python code change and describe what it does in one sentence: - -```diff -- def add(a, b): -- return a + b -+ def add(a: int, b: int) -> int: -+ return a + b -```""" - - result = agent.run_sync(prompt) - - assert result is not None - assert result.output is not None - assert len(result.output) > 5 - - -@skip_without_integration_flag -def test_minimax__returns_valid_usage_with_token_info(agents): - """Test that minimax returns a response with usage info (may be empty for some providers).""" - if "minimax" not in agents: - pytest.skip("minimax agent not configured") - - agent = agents["minimax"] - prompt = "Write a short one-sentence greeting." - - result = agent.run_sync(prompt) - - assert result is not None - assert result.output is not None - assert len(result.output) > 0 - - # Check that usage attribute is present (may be empty or have non-standard fields for some providers) - assert result.usage is not None, "Expected usage attribute to be present in response" - # Verify output indicates successful API call - assert "MiniMax" in result.output or len(result.output) > 5 +def test_minimax_m3__agent_name_matches_settings(agent): + """Test that the agent name matches the configured llm_model_name.""" + assert agent.name == settings.llm_model_name diff --git a/tests/test_memory_extractor.py b/tests/test_memory_extractor.py new file mode 100644 index 0000000..e7b5249 --- /dev/null +++ b/tests/test_memory_extractor.py @@ -0,0 +1,191 @@ +"""Tests for MemoryExtractor (Claude Code + OpenCode transcript parsing).""" + +import json +from pathlib import Path + +import pytest + +from slopometry.solo.services.memory_extractor import MemoryExtractor, TranscriptTruncationConfig + + +def _write_json(path: Path, payload: dict) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(payload), encoding="utf-8") + + +class TestExtractFromOpencodeSession: + def test_reconstructs_user_text_only(self, tmp_path: Path): + storage = tmp_path / "opencode_storage" + _write_json( + storage / "message" / "ses_a" / "msg_a1.json", + {"id": "msg_a1", "role": "user", "sessionID": "ses_a", "time": {"created": 1000}}, + ) + _write_json( + storage / "part" / "msg_a1" / "p1.json", + {"id": "p1", "type": "text", "text": "hello world", "messageID": "msg_a1", "sessionID": "ses_a"}, + ) + extractor = MemoryExtractor("https://llm.example/v1", "model-x") + out = extractor.extract_memories_from_opencode_session("ses_a", storage) + assert "USER:" in out + assert "hello world" in out + + def test_reconstructs_assistant_text(self, tmp_path: Path): + storage = tmp_path / "opencode_storage" + _write_json( + storage / "message" / "ses_b" / "msg_b1.json", + {"id": "msg_b1", "role": "assistant", "sessionID": "ses_b", "time": {"created": 2000}}, + ) + _write_json( + storage / "part" / "msg_b1" / "p1.json", + {"id": "p1", "type": "text", "text": "I will check that", "messageID": "msg_b1", "sessionID": "ses_b"}, + ) + extractor = MemoryExtractor("https://llm.example/v1", "model-x") + out = extractor.extract_memories_from_opencode_session("ses_b", storage) + assert "ASSISTANT:" in out + assert "I will check that" in out + + def test_tool_part_emits_tool_marker_with_input_output(self, tmp_path: Path): + storage = tmp_path / "opencode_storage" + _write_json( + storage / "message" / "ses_c" / "msg_c1.json", + {"id": "msg_c1", "role": "assistant", "sessionID": "ses_c", "time": {"created": 3000}}, + ) + _write_json( + storage / "part" / "msg_c1" / "p1.json", + { + "id": "p1", + "type": "tool", + "tool": "bash", + "callID": "call_xyz", + "state": { + "status": "completed", + "input": {"command": "ls -la"}, + "output": "total 12\ndrwxr-xr-x", + }, + "messageID": "msg_c1", + "sessionID": "ses_c", + }, + ) + extractor = MemoryExtractor("https://llm.example/v1", "model-x") + out = extractor.extract_memories_from_opencode_session("ses_c", storage) + assert "TOOL: bash" in out + assert "ls -la" in out + + def test_step_start_and_reasoning_parts_skipped(self, tmp_path: Path): + storage = tmp_path / "opencode_storage" + _write_json( + storage / "message" / "ses_d" / "msg_d1.json", + {"id": "msg_d1", "role": "assistant", "sessionID": "ses_d", "time": {"created": 4000}}, + ) + _write_json( + storage / "part" / "msg_d1" / "p_step.json", + {"id": "p_step", "type": "step-start", "messageID": "msg_d1", "sessionID": "ses_d"}, + ) + _write_json( + storage / "part" / "msg_d1" / "p_reason.json", + { + "id": "p_reason", + "type": "reasoning", + "text": "internal thoughts that should not surface", + "messageID": "msg_d1", + "sessionID": "ses_d", + }, + ) + _write_json( + storage / "part" / "msg_d1" / "p_text.json", + { + "id": "p_text", + "type": "text", + "text": "user-visible reply", + "messageID": "msg_d1", + "sessionID": "ses_d", + }, + ) + extractor = MemoryExtractor("https://llm.example/v1", "model-x") + out = extractor.extract_memories_from_opencode_session("ses_d", storage) + assert "user-visible reply" in out + assert "internal thoughts" not in out + + def test_missing_message_directory_returns_empty_string(self, tmp_path: Path): + extractor = MemoryExtractor("https://llm.example/v1", "model-x") + out = extractor.extract_memories_from_opencode_session("ses_none", tmp_path / "opencode_storage") + assert out == "" + + def test_messages_ordered_chronologically(self, tmp_path: Path): + storage = tmp_path / "opencode_storage" + _write_json( + storage / "message" / "ses_e" / "msg_e1.json", + {"id": "msg_e1", "role": "user", "sessionID": "ses_e", "time": {"created": 2000}}, + ) + _write_json( + storage / "part" / "msg_e1" / "p1.json", + {"id": "p1", "type": "text", "text": "second message", "messageID": "msg_e1", "sessionID": "ses_e"}, + ) + _write_json( + storage / "message" / "ses_e" / "msg_e0.json", + {"id": "msg_e0", "role": "user", "sessionID": "ses_e", "time": {"created": 1000}}, + ) + _write_json( + storage / "part" / "msg_e0" / "p1.json", + {"id": "p1", "type": "text", "text": "first message", "messageID": "msg_e0", "sessionID": "ses_e"}, + ) + extractor = MemoryExtractor("https://llm.example/v1", "model-x") + out = extractor.extract_memories_from_opencode_session("ses_e", storage) + first_idx = out.index("first message") + second_idx = out.index("second message") + assert first_idx < second_idx + + def test_unknown_role_message_skipped(self, tmp_path: Path): + storage = tmp_path / "opencode_storage" + _write_json( + storage / "message" / "ses_f" / "msg_f1.json", + {"id": "msg_f1", "role": "system", "sessionID": "ses_f", "time": {"created": 1000}}, + ) + _write_json( + storage / "part" / "msg_f1" / "p1.json", + {"id": "p1", "type": "text", "text": "should not appear", "messageID": "msg_f1", "sessionID": "ses_f"}, + ) + extractor = MemoryExtractor("https://llm.example/v1", "model-x") + out = extractor.extract_memories_from_opencode_session("ses_f", storage) + assert "should not appear" not in out + + def test_truncation_config_respected_for_tool_parts(self, tmp_path: Path): + storage = tmp_path / "opencode_storage" + _write_json( + storage / "message" / "ses_g" / "msg_g1.json", + {"id": "msg_g1", "role": "assistant", "sessionID": "ses_g", "time": {"created": 1}}, + ) + long_input = "x" * 500 + long_output = "y" * 500 + _write_json( + storage / "part" / "msg_g1" / "p1.json", + { + "id": "p1", + "type": "tool", + "tool": "bash", + "state": {"input": {"command": long_input}, "output": long_output}, + "messageID": "msg_g1", + "sessionID": "ses_g", + }, + ) + extractor = MemoryExtractor("https://llm.example/v1", "model-x") + out_default = extractor.extract_memories_from_opencode_session("ses_g", storage) + assert len(out_default) < 500 + 500 + 100 + out_short = extractor.extract_memories_from_opencode_session( + "ses_g", + storage, + truncation=TranscriptTruncationConfig(tool_input_chars=10, tool_output_chars=10), + ) + assert len(out_short) < len(out_default) + + +class TestTranscriptTruncationConfig: + def test_defaults_match_pre_refactor_behavior(self): + c = TranscriptTruncationConfig() + assert c.tool_input_chars == 120 + assert c.tool_output_chars == 120 + assert c.tool_result_chars == 200 + + def test_extra_fields_rejected(self): + with pytest.raises(Exception): + TranscriptTruncationConfig(unknown_field=42) diff --git a/tests/test_memory_freshness.py b/tests/test_memory_freshness.py new file mode 100644 index 0000000..0c12318 --- /dev/null +++ b/tests/test_memory_freshness.py @@ -0,0 +1,353 @@ +"""Tests for MemoryFreshnessValidator.""" + +from datetime import datetime +from unittest.mock import MagicMock, patch + +import pytest + +from slopometry.core.models.memory import MemoryCandidate, MemoryEntry, MemoryType +from slopometry.solo.services.memory_freshness import ( + CEILING_THRESHOLD, + FLOOR_THRESHOLD, + FreshnessDecision, + MemoryFreshnessValidator, + ProjectSimilarityDistribution, + _cosine_similarity, + _find_above_threshold, + _judge_reconciliation, + compute_project_distribution, +) + + +def _candidate(content: str, embedding: list[float] | None = None) -> MemoryCandidate: + return MemoryCandidate( + memory_type=MemoryType.PROJECT, + content=content, + embedding=embedding, + ) + + +def _memory( + content: str, + embedding: list[float] | None = None, + mem_id: str = "mem-1", +) -> MemoryEntry: + return MemoryEntry( + id=mem_id, + session_id="s1", + project_dir="/proj", + memory_type=MemoryType.PROJECT, + content=content, + embedding=embedding, + created_at=datetime.now(), + ) + + +class TestCosineSimilarity: + def test_identical_vectors_have_similarity_one(self): + v = [1.0, 0.0, 0.0] + assert _cosine_similarity(v, v) == pytest.approx(1.0) + + def test_orthogonal_vectors_have_similarity_zero(self): + assert _cosine_similarity([1.0, 0.0], [0.0, 1.0]) == pytest.approx(0.0) + + def test_empty_vectors_return_zero(self): + assert _cosine_similarity([], [1.0]) == 0.0 + + def test_mismatched_lengths_return_zero(self): + assert _cosine_similarity([1.0, 0.0], [1.0, 0.0, 0.0]) == 0.0 + + +class TestProjectSimilarityDistribution: + def test_zero_pairs_falls_back_to_floor(self): + d = ProjectSimilarityDistribution(0, 0.0, 0.0, 0.0, 0.0, 0.0) + assert d.derived_threshold == FLOOR_THRESHOLD + + def test_threshold_is_p75_clamped_to_floor(self): + d = ProjectSimilarityDistribution(10, 0.30, 0.30, 0.20, 0.10, 0.05) + assert d.derived_threshold == FLOOR_THRESHOLD + + def test_threshold_is_p75_when_above_floor(self): + d = ProjectSimilarityDistribution(100, 0.70, 0.65, 0.80, 0.90, 0.95) + assert d.derived_threshold == pytest.approx(0.80) + + def test_threshold_is_clamped_to_ceiling(self): + d = ProjectSimilarityDistribution(100, 0.95, 0.95, 0.99, 1.0, 1.0) + assert d.derived_threshold == CEILING_THRESHOLD + + +class TestComputeProjectDistribution: + def test_no_embeddings_returns_zero_distribution(self): + existing = [_memory("X", embedding=None), _memory("Y", embedding=None)] + d = compute_project_distribution(existing) + assert d.n_pairs == 0 + + def test_pairs_counted_correctly(self): + existing = [ + _memory("a", [1.0, 0.0], "m1"), + _memory("b", [0.0, 1.0], "m2"), + _memory("c", [1.0, 0.0], "m3"), + ] + d = compute_project_distribution(existing) + assert d.n_pairs == 3 + assert 0.0 <= d.mean <= 1.0 + assert 0.0 <= d.p50 <= 1.0 + assert 0.0 <= d.p75 <= 1.0 + + def test_quantiles_are_monotonic(self): + existing = [_memory(f"m{i}", [float(i) / 10, 1.0 - float(i) / 10], f"id{i}") for i in range(5)] + d = compute_project_distribution(existing) + assert d.p50 <= d.p75 <= d.p90 <= d.p95 + + +class TestFindAboveThreshold: + def test_returns_only_memories_above_threshold(self): + candidate = _candidate("X", [1.0, 0.0]) + existing = [ + _memory("identical", [1.0, 0.0], "m1"), + _memory("near", [0.95, 0.31], "m2"), + _memory("far", [0.0, 1.0], "m3"), + ] + matches = _find_above_threshold(candidate, existing, threshold=0.78) + ids = [m.id for m, _ in matches] + assert "m3" not in ids + assert "m1" in ids + assert "m2" in ids + + def test_candidate_without_embedding_returns_empty(self): + candidate = _candidate("X", embedding=None) + existing = [_memory("Y", [1.0, 0.0], "m1")] + assert _find_above_threshold(candidate, existing, threshold=0.5) == [] + + def test_returns_empty_when_no_matches_above_threshold(self): + candidate = _candidate("X", [1.0, 0.0]) + existing = [_memory("Y", [0.0, 1.0], "m1")] + assert _find_above_threshold(candidate, existing, threshold=0.78) == [] + + +class TestJudgeReconciliation: + def test_returns_keep_both_when_llm_says_so(self): + mock_response = MagicMock() + mock_response.choices = [ + MagicMock(message=MagicMock(content='{"action": "keep_both", "reason": "different topics"}')) + ] + with patch("openai.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = mock_response + decision = _judge_reconciliation( + _candidate("uses rust-code-analysis"), + _memory("user prefers dark mode"), + "https://llm.example/v1", + "model-x", + "key", + ) + assert decision.action == "keep_both" + assert "topics" in decision.reason or "different" in decision.reason + + def test_returns_merge_with_merged_content(self): + mock_response = MagicMock() + mock_response.choices = [ + MagicMock( + message=MagicMock( + content='{"action": "merge", "reason": "old was outdated", "merged_content": "uses rust-code-analysis since 2026"}' + ) + ) + ] + with patch("openai.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = mock_response + decision = _judge_reconciliation( + _candidate("uses rust-code-analysis"), + _memory("uses radon"), + "https://llm.example/v1", + "model-x", + "key", + ) + assert decision.action == "merge" + assert decision.merged_content == "uses rust-code-analysis since 2026" + + def test_returns_supersede_when_llm_says_so(self): + mock_response = MagicMock() + mock_response.choices = [ + MagicMock(message=MagicMock(content='{"action": "supersede", "reason": "newer version"}')) + ] + with patch("openai.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = mock_response + decision = _judge_reconciliation( + _candidate("Python 3.13"), + _memory("Python 3.10"), + "https://llm.example/v1", + "model-x", + "key", + ) + assert decision.action == "supersede" + + def test_returns_dedupe_when_llm_says_so(self): + mock_response = MagicMock() + mock_response.choices = [ + MagicMock(message=MagicMock(content='{"action": "dedupe", "reason": "same info"}')) + ] + with patch("openai.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = mock_response + decision = _judge_reconciliation( + _candidate("user uses pyright type checker"), + _memory("user prefers pyright"), + "https://llm.example/v1", + "model-x", + "key", + ) + assert decision.action == "dedupe" + + def test_strips_markdown_fences(self): + mock_response = MagicMock() + mock_response.choices = [ + MagicMock( + message=MagicMock(content='```json\n{"action": "merge", "reason": "old outdated", "merged_content": "merged"}\n```') + ) + ] + with patch("openai.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = mock_response + decision = _judge_reconciliation( + _candidate("X"), + _memory("Y"), + "https://llm.example/v1", + "model-x", + "key", + ) + assert decision.action == "merge" + assert decision.merged_content == "merged" + + def test_falls_back_to_keep_both_on_invalid_action(self): + mock_response = MagicMock() + mock_response.choices = [ + MagicMock(message=MagicMock(content='{"action": "maybe", "reason": "unsure"}')) + ] + with patch("openai.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = mock_response + decision = _judge_reconciliation( + _candidate("X"), + _memory("Y"), + "https://llm.example/v1", + "model-x", + "key", + ) + assert decision.action == "keep_both" + + +class TestMemoryFreshnessValidator: + def test_no_existing_memories_returns_empty_decisions_and_floor_distribution(self): + validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") + decisions, distribution = validator.validate([_candidate("X", [1.0, 0.0])], []) + assert decisions == [] + assert distribution.n_pairs == 0 + assert distribution.derived_threshold == FLOOR_THRESHOLD + + def test_no_above_threshold_matches_skips_llm_call(self): + validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") + candidates = [_candidate("X", [1.0, 0.0])] + existing = [_memory("orthogonal", [0.0, 1.0], "m1")] + + with patch("openai.OpenAI") as mock_openai: + decisions, _ = validator.validate(candidates, existing) + mock_openai.assert_not_called() + + assert decisions == [] + + def test_similar_match_triggers_llm_judge_with_action(self): + mock_response = MagicMock() + mock_response.choices = [ + MagicMock( + message=MagicMock( + content='{"action": "merge", "reason": "update", "merged_content": "merged"}' + ) + ) + ] + validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") + candidates = [_candidate("uses rust-code-analysis", [1.0, 0.0])] + existing = [_memory("uses radon", [0.99, 0.14], "m1")] + + with patch("openai.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = mock_response + decisions, distribution = validator.validate(candidates, existing) + + assert len(decisions) == 1 + decision = decisions[0] + assert isinstance(decision, FreshnessDecision) + assert decision.action == "merge" + assert decision.merged_content == "merged" + assert decision.similarity > distribution.derived_threshold + + def test_keep_both_action_does_not_merge_or_supersede(self): + mock_response = MagicMock() + mock_response.choices = [ + MagicMock(message=MagicMock(content='{"action": "keep_both", "reason": "different aspects"}')) + ] + validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") + candidates = [_candidate("uses rust-code-analysis for complexity", [1.0, 0.0])] + existing = [_memory("user prefers dark mode", [0.99, 0.14], "m1")] + + with patch("openai.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = mock_response + decisions, _ = validator.validate(candidates, existing) + + assert len(decisions) == 1 + assert decisions[0].action == "keep_both" + + def test_multiple_candidates_with_different_actions(self): + mock_response_a = MagicMock() + mock_response_a.choices = [ + MagicMock( + message=MagicMock( + content='{"action": "merge", "reason": "update", "merged_content": "merged-a"}' + ) + ) + ] + mock_response_b = MagicMock() + mock_response_b.choices = [ + MagicMock( + message=MagicMock( + content='{"action": "dedupe", "reason": "same info"}' + ) + ) + ] + validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") + candidates = [ + _candidate("uses rust-code-analysis for complexity", [1.0, 0.0]), + _candidate("user prefers dark mode in editors", [0.0, 1.0]), + ] + existing = [ + _memory("uses radon for complexity", [0.99, 0.14], "m1"), + _memory("user prefers light mode in editors", [0.14, 0.99], "m2"), + ] + + with patch("openai.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.side_effect = [ + mock_response_a, + mock_response_b, + ] + decisions, _ = validator.validate(candidates, existing) + + assert len(decisions) == 2 + actions = {d.action for d in decisions} + assert "merge" in actions + assert "dedupe" in actions + + def test_data_driven_threshold_for_low_similarity_project_is_low(self): + validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") + candidates = [_candidate("X", [1.0, 0.0])] + existing = [ + _memory("a", [1.0, 0.0], "m1"), + _memory("b", [0.0, 1.0], "m2"), + ] + with patch("openai.OpenAI"): + _, distribution = validator.validate(candidates, existing) + assert distribution.derived_threshold == FLOOR_THRESHOLD + + def test_failed_llm_call_skipped_silently(self): + validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") + candidates = [_candidate("X", [1.0, 0.0])] + existing = [_memory("Y", [0.99, 0.14], "m1")] + + with patch("openai.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.side_effect = RuntimeError("llm down") + decisions, _ = validator.validate(candidates, existing) + + assert decisions == [] diff --git a/tests/test_memory_models.py b/tests/test_memory_models.py new file mode 100644 index 0000000..7dfb0f9 --- /dev/null +++ b/tests/test_memory_models.py @@ -0,0 +1,98 @@ +"""Test memory models.""" + +from datetime import datetime + +from slopometry.core.models.memory import MemoryCandidate, MemoryCreateRequest, MemoryEntry, MemoryType + + +def test_memory_entry_model() -> None: + """Validates MemoryEntry creation.""" + entry = MemoryEntry( + id="test-id-123", + session_id="session-abc", + project_dir="/path/to/project", + memory_type=MemoryType.USER, + content="User prefers dark mode", + source_context="From conversation about UI settings", + created_at=datetime.now(), + embedding=[0.1, 0.2, 0.3], + retained=False, + ) + + assert entry.id == "test-id-123" + assert entry.session_id == "session-abc" + assert entry.project_dir == "/path/to/project" + assert entry.memory_type == MemoryType.USER + assert entry.content == "User prefers dark mode" + assert entry.source_context == "From conversation about UI settings" + assert entry.embedding == [0.1, 0.2, 0.3] + assert entry.retained is False + assert entry.metadata is None + + +def test_memory_candidate_model() -> None: + """Validates MemoryCandidate creation.""" + candidate = MemoryCandidate( + memory_type=MemoryType.PROJECT, + content="Project uses pytest for testing", + source_context="From analysis of project structure", + embedding=[0.5, 0.6, 0.7], + ) + + assert candidate.memory_type == MemoryType.PROJECT + assert candidate.content == "Project uses pytest for testing" + assert candidate.source_context == "From analysis of project structure" + assert candidate.embedding == [0.5, 0.6, 0.7] + + +def test_memory_create_request() -> None: + """Validates MemoryCreateRequest.""" + candidates = [ + MemoryCandidate( + memory_type=MemoryType.USER, + content="User likes tab indentation", + ), + MemoryCandidate( + memory_type=MemoryType.FEEDBACK, + content="Previous code was too complex", + embedding=[0.1, 0.2, 0.3], + ), + ] + + request = MemoryCreateRequest( + session_id="session-xyz", + project_dir="/path/to/project", + candidates=candidates, + ) + + assert request.session_id == "session-xyz" + assert request.project_dir == "/path/to/project" + assert len(request.candidates) == 2 + assert request.candidates[0].memory_type == MemoryType.USER + assert request.candidates[1].memory_type == MemoryType.FEEDBACK + + +def test_memory_entry_defaults() -> None: + """Test MemoryEntry default values.""" + entry = MemoryEntry( + id="id-1", + session_id="session-1", + project_dir="/project", + memory_type=MemoryType.REFERENCE, + content="Some content", + created_at=datetime.now(), + ) + + assert entry.retained is False + assert entry.source_context is None + assert entry.embedding is None + assert entry.metadata is None + assert entry.updated_at is None + + +def test_memory_type_enum_values() -> None: + """Test MemoryType enum values.""" + assert MemoryType.USER == "user" + assert MemoryType.FEEDBACK == "feedback" + assert MemoryType.PROJECT == "project" + assert MemoryType.REFERENCE == "reference" diff --git a/tests/test_memory_service.py b/tests/test_memory_service.py new file mode 100644 index 0000000..47fe150 --- /dev/null +++ b/tests/test_memory_service.py @@ -0,0 +1,201 @@ +"""Test memory service.""" + +import tempfile +from collections.abc import Iterator +from datetime import datetime +from pathlib import Path + +import pytest + +from slopometry.core.database import EventDatabase +from slopometry.core.models.memory import MemoryCandidate, MemoryCreateRequest, MemoryEntry, MemoryType +from slopometry.solo.services.memory_service import MemoryService + + +@pytest.fixture +def temp_db() -> Iterator[EventDatabase]: + """Create a temporary database for testing.""" + with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: + db_path = Path(f.name) + try: + db = EventDatabase(db_path) + yield db + finally: + if db_path.exists(): + db_path.unlink() + + +@pytest.fixture +def memory_service(temp_db: EventDatabase) -> MemoryService: + """Create a MemoryService with temporary database.""" + return MemoryService(db=temp_db) + + +def test_save_memory(memory_service: MemoryService) -> None: + """Saves a memory entry.""" + memory = MemoryEntry( + id="mem-001", + session_id="session-abc", + project_dir="/test/project", + memory_type=MemoryType.USER, + content="Test memory content", + created_at=datetime.now(), + ) + + memory_service.save_memory(memory) + + memories = memory_service.get_memories() + assert len(memories) == 1 + assert memories[0].id == "mem-001" + assert memories[0].content == "Test memory content" + + +def test_save_memories(memory_service: MemoryService) -> None: + """Saves multiple candidates.""" + request = MemoryCreateRequest( + session_id="session-xyz", + project_dir="/test/project", + candidates=[ + MemoryCandidate( + memory_type=MemoryType.USER, + content="First memory", + ), + MemoryCandidate( + memory_type=MemoryType.PROJECT, + content="Second memory", + ), + ], + ) + + saved = memory_service.save_memories(request) + + assert len(saved) == 2 + assert saved[0].content == "First memory" + assert saved[1].content == "Second memory" + assert saved[0].session_id == "session-xyz" + + +def test_get_memories(memory_service: MemoryService) -> None: + """Retrieves memories.""" + memory_service.save_memory( + MemoryEntry( + id="mem-001", + session_id="session-1", + project_dir="/project1", + memory_type=MemoryType.USER, + content="User memory", + created_at=datetime.now(), + ) + ) + memory_service.save_memory( + MemoryEntry( + id="mem-002", + session_id="session-2", + project_dir="/project2", + memory_type=MemoryType.PROJECT, + content="Project memory", + created_at=datetime.now(), + ) + ) + + memories = memory_service.get_memories() + assert len(memories) == 2 + + project_memories = memory_service.get_memories(project_dir="/project2") + assert len(project_memories) == 1 + assert project_memories[0].memory_type == MemoryType.PROJECT + + type_memories = memory_service.get_memories(memory_type=MemoryType.USER) + assert len(type_memories) == 1 + assert type_memories[0].id == "mem-001" + + +def test_delete_memory(memory_service: MemoryService) -> None: + """Deletes a memory by ID.""" + memory = MemoryEntry( + id="mem-to-delete", + session_id="session-1", + project_dir="/test/project", + memory_type=MemoryType.REFERENCE, + content="Memory to delete", + created_at=datetime.now(), + ) + memory_service.save_memory(memory) + + result = memory_service.delete_memory("mem-to-delete") + assert result is True + + memories = memory_service.get_memories() + assert len(memories) == 0 + + result = memory_service.delete_memory("non-existent") + assert result is False + + +def test_delete_all_memories(memory_service: MemoryService) -> None: + """Clears all memories and processed_sessions.""" + memory_service.save_memory( + MemoryEntry( + id="mem-1", + session_id="session-1", + project_dir="/project1", + memory_type=MemoryType.USER, + content="Memory 1", + created_at=datetime.now(), + ) + ) + memory_service.save_memory( + MemoryEntry( + id="mem-2", + session_id="session-2", + project_dir="/project2", + memory_type=MemoryType.PROJECT, + content="Memory 2", + created_at=datetime.now(), + ) + ) + memory_service.mark_session_processed("session-1", "/project1", 1) + + assert memory_service.is_session_processed("session-1", "/project1") is True + + count = memory_service.delete_all_memories() + assert count == 2 + + memories = memory_service.get_memories() + assert len(memories) == 0 + + assert memory_service.is_session_processed("session-1", "/project1") is False + + +def test_mark_session_processed(memory_service: MemoryService) -> None: + """Marks session as processed.""" + memory_service.mark_session_processed("session-test", "/test/project", 5) + + assert memory_service.is_session_processed("session-test", "/test/project") is True + + +def test_is_session_processed(memory_service: MemoryService) -> None: + """Checks if session was processed.""" + assert memory_service.is_session_processed("unprocessed-session", "/any/project") is False + + memory_service.mark_session_processed("processed-session", "/any/project", 3) + + assert memory_service.is_session_processed("processed-session", "/any/project") is True + + +def test_get_memories_limit(memory_service: MemoryService) -> None: + """Test that get_memories respects limit parameter.""" + for i in range(10): + memory_service.save_memory( + MemoryEntry( + id=f"mem-{i}", + session_id="session-1", + project_dir="/test/project", + memory_type=MemoryType.USER, + content=f"Memory {i}", + created_at=datetime.now(), + ) + ) + + memories = memory_service.get_memories(limit=5) + assert len(memories) == 5 diff --git a/tests/test_models.py b/tests/test_models.py index f931f3d..f9b590d 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -21,7 +21,7 @@ class TestExtendedComplexityMetrics: def test_model_creation_without_required_fields__raises_validation_error(self) -> None: """Test that ValidationError is raised when required Halstead fields are missing.""" with pytest.raises(ValidationError) as exc_info: - ExtendedComplexityMetrics() # pyrefly: ignore[missing-argument] + ExtendedComplexityMetrics() # pyrefly: ignore[missing-argument] # pyright: ignore[reportCallIssue] errors = exc_info.value.errors() missing_fields = {e["loc"][0] for e in errors} @@ -115,8 +115,6 @@ def test_context_coverage_has_gaps__returns_false_when_perfect(): FileCoverageStatus( file_path="src/foo.py", was_read_before_edit=True, - imports_coverage=100.0, - dependents_coverage=100.0, ) ], blind_spots=[], @@ -134,8 +132,6 @@ def test_context_coverage_has_gaps__returns_true_when_read_ratio_low(): FileCoverageStatus( file_path="src/foo.py", was_read_before_edit=False, - imports_coverage=100.0, - dependents_coverage=100.0, ) ], blind_spots=[], @@ -153,8 +149,6 @@ def test_context_coverage_has_gaps__returns_true_when_blind_spots(): FileCoverageStatus( file_path="src/foo.py", was_read_before_edit=True, - imports_coverage=100.0, - dependents_coverage=100.0, ) ], blind_spots=["src/bar.py"], diff --git a/tests/test_notebookread_integration.py b/tests/test_notebookread_integration.py index 731953f..a75d9ba 100644 --- a/tests/test_notebookread_integration.py +++ b/tests/test_notebookread_integration.py @@ -1,15 +1,21 @@ -"""Integration test for NotebookRead hook handling.""" +"""Integration test for NotebookRead hook handling via ClaudeCodeAdapter. -from slopometry.core.hook_handler import parse_hook_input -from slopometry.core.models.hook import PostToolUseInput +Migrated from `parse_hook_input(raw)` + `PostToolUseInput` validation. The +adapter handles list-typed tool_response internally — these tests verify +end-to-end that a NotebookRead payload (list-shaped tool_response) round-trips +through parse() with the cells preserved. +""" + +from slopometry.core.models.protocol.events import AbstractEventType +from slopometry.core.protocol.adapters.claude_code import ClaudeCodeAdapter class TestNotebookReadIntegration: - """Test NotebookRead integration with hook handler.""" + """Test NotebookRead integration with the Claude Code adapter.""" - def test_parse_hook_input_with_notebookread_response__handles_list_correctly(self): - """Test that parse_hook_input can handle NotebookRead responses with lists.""" - # This simulates the exact data structure that was causing the validation error + def test_parse_notebookread_response__preserves_cells_correctly(self): + """Adapter.parse() preserves the full NotebookRead list response.""" + adapter = ClaudeCodeAdapter() raw_hook_data = { "session_id": "test_session_123", "transcript_path": "/path/to/transcript.jsonl", @@ -27,21 +33,21 @@ def test_parse_hook_input_with_notebookread_response__handles_list_correctly(sel ], } - # This should not raise a ValidationError anymore - parsed_input = parse_hook_input(raw_hook_data) - - # PostToolUse data should parse to PostToolUseInput - assert isinstance(parsed_input, PostToolUseInput) - assert parsed_input.session_id == "test_session_123" - assert parsed_input.tool_name == "NotebookRead" - assert isinstance(parsed_input.tool_response, list) - assert len(parsed_input.tool_response) == 2 - assert parsed_input.tool_response[0]["cellType"] == "markdown" - assert parsed_input.tool_response[1]["cellType"] == "code" - assert "python" in parsed_input.tool_response[1]["language"] - - def test_parse_hook_input_with_notebookread_empty_response__handles_empty_list(self): - """Test that parse_hook_input can handle empty NotebookRead responses.""" + event = adapter.parse(raw_hook_data, working_directory="/repo") + + assert event.session_id == "test_session_123" + assert event.event_type == AbstractEventType.TOOL_CALL_COMPLETED + assert event.tool_call is not None + assert event.tool_call.tool_name == "NotebookRead" + assert isinstance(event.tool_call.output, list) + assert len(event.tool_call.output) == 2 + assert event.tool_call.output[0]["cellType"] == "markdown" + assert event.tool_call.output[1]["cellType"] == "code" + assert "python" in event.tool_call.output[1]["language"] + + def test_parse_notebookread_empty_response__preserves_empty_list(self): + """Adapter.parse() handles empty NotebookRead responses (no cells).""" + adapter = ClaudeCodeAdapter() raw_hook_data = { "session_id": "test_session_456", "transcript_path": "/path/to/transcript.jsonl", @@ -50,12 +56,10 @@ def test_parse_hook_input_with_notebookread_empty_response__handles_empty_list(s "tool_response": [], } - # This should not raise a ValidationError - parsed_input = parse_hook_input(raw_hook_data) + event = adapter.parse(raw_hook_data, working_directory="/repo") - # PostToolUse data should parse to PostToolUseInput - assert isinstance(parsed_input, PostToolUseInput) - assert parsed_input.session_id == "test_session_456" - assert parsed_input.tool_name == "NotebookRead" - assert isinstance(parsed_input.tool_response, list) - assert len(parsed_input.tool_response) == 0 + assert event.session_id == "test_session_456" + assert event.tool_call is not None + assert event.tool_call.tool_name == "NotebookRead" + assert isinstance(event.tool_call.output, list) + assert len(event.tool_call.output) == 0 diff --git a/tests/test_opencode_adapter.py b/tests/test_opencode_adapter.py new file mode 100644 index 0000000..45cdd0a --- /dev/null +++ b/tests/test_opencode_adapter.py @@ -0,0 +1,322 @@ +"""Tests for OpenCodeAdapter — translates OpenCode plugin JSON into AbstractHookEvent. + +The adapter owns: + - the field-name mapping (tool -> tool_name, args -> input, output -> output) + - event-type resolution from the CLI discriminator string (pre_tool_use, etc.) + - parent_id -> parent_session_id mapping for subagent sessions + - duration_ms extraction from a top-level field (Claude Code nests it in tool_response) + +Migrated from `test_opencode_handler.py` (TestEventTypeMap, TestParseOpenCodeEvent, +TestGetToolType, TestGetSessionId, TestGetParentId, TestHandleOpenCodeStop). +""" + +import pytest + +from slopometry.core.models.protocol.events import ( + AbstractEventSource, + AbstractEventType, + AbstractHookEvent, +) +from slopometry.core.protocol.adapters.opencode import ( + OpenCodeAdapter, + resolve_opencode_event_type, +) + + +class TestResolveOpenCodeEventType: + """Tests for the CLI event-type discriminator -> canonical enum mapping.""" + + def test_resolve_opencode_event_type__pre_tool_use(self): + assert resolve_opencode_event_type("pre_tool_use") == AbstractEventType.TOOL_CALL_STARTED + + def test_resolve_opencode_event_type__post_tool_use(self): + assert resolve_opencode_event_type("post_tool_use") == AbstractEventType.TOOL_CALL_COMPLETED + + def test_resolve_opencode_event_type__stop(self): + assert resolve_opencode_event_type("stop") == AbstractEventType.TURN_COMPLETED + + def test_resolve_opencode_event_type__subagent_stop(self): + assert resolve_opencode_event_type("subagent_stop") == AbstractEventType.SUBAGENT_COMPLETED + + def test_resolve_opencode_event_type__subagent_start(self): + assert resolve_opencode_event_type("subagent_start") == AbstractEventType.SUBAGENT_STARTED + + def test_resolve_opencode_event_type__todo_updated(self): + assert resolve_opencode_event_type("todo_updated") == AbstractEventType.TODO_UPDATED + + def test_resolve_opencode_event_type__message_updated(self): + assert resolve_opencode_event_type("message_updated") == AbstractEventType.MESSAGE_UPDATED + + def test_resolve_opencode_event_type__covers_all_event_types(self): + """All OpenCode event types must map to a canonical enum.""" + expected = { + "pre_tool_use", + "post_tool_use", + "stop", + "subagent_stop", + "subagent_start", + "todo_updated", + "message_updated", + } + for et in expected: + result = resolve_opencode_event_type(et) + assert isinstance(result, AbstractEventType) + + def test_resolve_opencode_event_type__unknown_raises(self): + """Unknown CLI event type fails loud — the dispatcher must not silently default.""" + with pytest.raises(ValueError, match="Unknown OpenCode event type"): + resolve_opencode_event_type("totally_unknown") + + +class TestDetectEventType: + """Tests for OpenCodeAdapter.detect_event_type — discriminator-based.""" + + def test_detect_event_type__requires_event_type_field(self): + """OpenCode requires an explicit event_type discriminator (unlike Claude Code).""" + adapter = OpenCodeAdapter() + with pytest.raises(ValueError, match="missing string 'event_type' field"): + adapter.detect_event_type({"session_id": "s1"}) + + def test_detect_event_type__rejects_non_string_event_type(self): + """A non-string event_type (e.g., accidental bool/int) must fail validation.""" + adapter = OpenCodeAdapter() + with pytest.raises(ValueError, match="missing string 'event_type' field"): + adapter.detect_event_type({"event_type": 123}) + + def test_detect_event_type__delegates_to_resolve(self): + """Adapter delegates string-to-enum resolution to the module function.""" + adapter = OpenCodeAdapter() + assert adapter.detect_event_type({"event_type": "pre_tool_use"}) == AbstractEventType.TOOL_CALL_STARTED + + +class TestParseToolCallEvents: + """Tests for parse() with pre_tool_use and post_tool_use payloads.""" + + def test_parse_pre_tool_use__returns_tool_call_started_with_args(self): + """pre_tool_use: 'tool' -> tool_name, 'args' -> input, no output yet.""" + adapter = OpenCodeAdapter() + payload = {"event_type": "pre_tool_use", "tool": "Bash", "session_id": "s1", "args": {"command": "ls"}} + event = adapter.parse(payload, working_directory="/repo") + + assert isinstance(event, AbstractHookEvent) + assert event.event_type == AbstractEventType.TOOL_CALL_STARTED + assert event.source == AbstractEventSource.OPENCODE + assert event.session_id == "s1" + assert event.tool_call is not None + assert event.tool_call.tool_name == "Bash" + assert event.tool_call.input == {"command": "ls"} + assert event.tool_call.output is None + + def test_parse_post_tool_use__returns_tool_call_completed_with_output(self): + """post_tool_use: 'tool' -> tool_name, 'args' -> input, 'output' -> output.""" + adapter = OpenCodeAdapter() + payload = { + "event_type": "post_tool_use", + "tool": "Read", + "session_id": "s1", + "args": {"file_path": "/tmp/f.py"}, + "output": "file content", + "duration_ms": 42, + } + event = adapter.parse(payload, working_directory="/repo") + + assert event.event_type == AbstractEventType.TOOL_CALL_COMPLETED + assert event.tool_call is not None + assert event.tool_call.tool_name == "Read" + assert event.tool_call.output == "file content" + assert event.tool_call.duration_ms == 42 + assert event.tool_call.exit_code is None + assert event.tool_call.error_message is None + + def test_parse_pre_tool_use__lowercase_tool_name_normalized(self): + """OpenCode sends lowercase tool names — adapter normalizes via resolve_tool_type.""" + adapter = OpenCodeAdapter() + payload = {"event_type": "pre_tool_use", "tool": "bash", "session_id": "s1", "args": {"command": "ls"}} + event = adapter.parse(payload, working_directory="/repo") + assert event.tool_call is not None + assert event.tool_call.tool_name == "bash" + assert event.tool_call.tool_type == "Bash" + + +class TestParseSubagentEvents: + """Tests for parse() with subagent_start and subagent_stop payloads.""" + + def test_parse_subagent_start__parent_id_maps_to_parent_session_id(self): + """OpenCode 'parent_id' -> canonical 'parent_session_id'.""" + adapter = OpenCodeAdapter() + payload = { + "event_type": "subagent_start", + "session_id": "child-1", + "parent_id": "parent-1", + "agent": "explore", + } + event = adapter.parse(payload, working_directory="/repo") + + assert event.event_type == AbstractEventType.SUBAGENT_STARTED + assert event.session_id == "child-1" + assert event.parent_session_id == "parent-1" + assert event.tool_call is None + + def test_parse_subagent_start__no_parent_id_yields_none(self): + """Top-level sessions have no parent — parent_session_id is None.""" + adapter = OpenCodeAdapter() + payload = {"event_type": "subagent_start", "session_id": "main", "agent": "general"} + event = adapter.parse(payload, working_directory="/repo") + assert event.parent_session_id is None + + def test_parse_subagent_stop__parent_id_maps_to_parent_session_id(self): + """subagent_stop: parent_id -> parent_session_id, no tool_call.""" + adapter = OpenCodeAdapter() + payload = { + "event_type": "subagent_stop", + "session_id": "child-1", + "parent_id": "parent-1", + "agent": "explore", + } + event = adapter.parse(payload, working_directory="/repo") + + assert event.event_type == AbstractEventType.SUBAGENT_COMPLETED + assert event.parent_session_id == "parent-1" + assert event.tool_call is None + + +class TestParseTodoAndMessageEvents: + """Tests for parse() with todo_updated and message_updated payloads.""" + + def test_parse_todo_updated__no_tool_call(self): + """todo_updated events have no tool_call — todos live in metadata.""" + adapter = OpenCodeAdapter() + payload = { + "event_type": "todo_updated", + "session_id": "s1", + "todos": [{"content": "Fix bug", "status": "pending", "priority": "high"}], + } + event = adapter.parse(payload, working_directory="/repo") + + assert event.event_type == AbstractEventType.TODO_UPDATED + assert event.tool_call is None + assert event.metadata == dict(payload) + + def test_parse_message_updated__no_tool_call(self): + """message_updated events have no tool_call — message metadata only.""" + adapter = OpenCodeAdapter() + payload = { + "event_type": "message_updated", + "session_id": "s1", + "message_id": "m1", + "model_id": "claude-3-opus", + "agent": "general", + } + event = adapter.parse(payload, working_directory="/repo") + + assert event.event_type == AbstractEventType.MESSAGE_UPDATED + assert event.tool_call is None + + +class TestParseStopEvent: + """Tests for parse() with stop payloads.""" + + def test_parse_stop__no_tool_call(self): + """stop events have no tool_call — they mark the end of a turn.""" + adapter = OpenCodeAdapter() + payload = {"event_type": "stop", "session_id": "s1", "agent": "general"} + event = adapter.parse(payload, working_directory="/repo") + + assert event.event_type == AbstractEventType.TURN_COMPLETED + assert event.tool_call is None + + +class TestParseFieldMapping: + """Tests for the OpenCode-specific field-name remapping.""" + + def test_parse__tool_maps_to_tool_name(self): + """OpenCode 'tool' field -> canonical 'tool_name'.""" + adapter = OpenCodeAdapter() + event = adapter.parse( + {"event_type": "pre_tool_use", "tool": "Edit", "session_id": "s1", "args": {"file_path": "/x.py"}}, + working_directory="/repo", + ) + assert event.tool_call is not None + assert event.tool_call.tool_name == "Edit" + + def test_parse__args_maps_to_input(self): + """OpenCode 'args' field -> canonical 'input'.""" + adapter = OpenCodeAdapter() + event = adapter.parse( + {"event_type": "pre_tool_use", "tool": "Bash", "session_id": "s1", "args": {"command": "ls"}}, + working_directory="/repo", + ) + assert event.tool_call is not None + assert event.tool_call.input == {"command": "ls"} + + def test_parse__output_maps_to_output(self): + """OpenCode 'output' field -> canonical 'output'.""" + adapter = OpenCodeAdapter() + event = adapter.parse( + { + "event_type": "post_tool_use", + "tool": "Read", + "session_id": "s1", + "args": {"file_path": "/x"}, + "output": "file content", + }, + working_directory="/repo", + ) + assert event.tool_call is not None + assert event.tool_call.output == "file content" + + def test_parse__duration_ms_is_top_level(self): + """OpenCode carries duration_ms at the top level (Claude Code nests it in tool_response).""" + adapter = OpenCodeAdapter() + event = adapter.parse( + { + "event_type": "post_tool_use", + "tool": "Read", + "session_id": "s1", + "args": {}, + "output": "x", + "duration_ms": 99, + }, + working_directory="/repo", + ) + assert event.tool_call is not None + assert event.tool_call.duration_ms == 99 + + def test_parse__transcript_location_is_none(self): + """OpenCode stores transcripts in metadata.transcript, not as a path — transcript_location stays None.""" + adapter = OpenCodeAdapter() + event = adapter.parse( + {"event_type": "stop", "session_id": "s1"}, + working_directory="/repo", + ) + assert event.transcript_location is None + + def test_parse__raises_when_session_id_missing(self): + """session_id is required on every event — adapter enforces it.""" + adapter = OpenCodeAdapter() + with pytest.raises(ValueError, match="missing required 'session_id'"): + adapter.parse({"event_type": "stop"}, working_directory="/repo") + + def test_parse__raises_when_tool_missing_for_tool_event(self): + """tool_call events must include 'tool' field — adapter enforces it.""" + adapter = OpenCodeAdapter() + with pytest.raises(ValueError, match="missing 'tool' field"): + adapter.parse( + {"event_type": "pre_tool_use", "session_id": "s1", "args": {}}, + working_directory="/repo", + ) + + def test_parse__metadata_contains_full_raw_payload(self): + """metadata preserves the raw OpenCode payload for forensic / re-processing.""" + adapter = OpenCodeAdapter() + payload = {"event_type": "stop", "session_id": "s1", "agent": "general", "transcript": [{"role": "user"}]} + event = adapter.parse(payload, working_directory="/repo") + assert event.metadata == dict(payload) + + +class TestAdapterSource: + """Tests for the adapter's source identity.""" + + def test_adapter_source__is_opencode(self): + """Every OpenCodeAdapter instance advertises source=opencode.""" + assert OpenCodeAdapter().source == AbstractEventSource.OPENCODE diff --git a/tests/test_opencode_handler.py b/tests/test_opencode_handler.py index 1b331c3..02c2887 100644 --- a/tests/test_opencode_handler.py +++ b/tests/test_opencode_handler.py @@ -1,4 +1,10 @@ -"""Tests for OpenCode hook handler functionality.""" +"""Smoke tests for the OpenCode hook handler entrypoint. + +Pure plumbing tests: verify the entrypoint reads stdin, dispatches through the +OpenCode adapter, and never crashes on malformed or unknown input. Detailed +adapter semantics (parse, detect_event_type, tool-type mapping) live in +`test_opencode_adapter.py`. +""" import json import subprocess @@ -9,221 +15,52 @@ import pytest from slopometry.core.database import SessionManager -from slopometry.core.models.hook import HookEventType, ToolType -from slopometry.core.models.opencode import ( - OpenCodeMessageEvent, - OpenCodeSessionEvent, - OpenCodeStopEvent, - OpenCodeTodoEvent, - OpenCodeToolEvent, -) -from slopometry.core.opencode_handler import ( - EVENT_TYPE_MAP, - _get_parent_id, - _get_session_id, - _handle_opencode_stop, - get_tool_type, - handle_opencode_hook, - parse_opencode_event, -) - - -class TestEventTypeMap: - def test_event_type_map__pre_tool_use_maps_correctly(self): - assert EVENT_TYPE_MAP["pre_tool_use"] == HookEventType.PRE_TOOL_USE - - def test_event_type_map__post_tool_use_maps_correctly(self): - assert EVENT_TYPE_MAP["post_tool_use"] == HookEventType.POST_TOOL_USE - - def test_event_type_map__stop_maps_correctly(self): - assert EVENT_TYPE_MAP["stop"] == HookEventType.STOP - - def test_event_type_map__subagent_stop_maps_correctly(self): - assert EVENT_TYPE_MAP["subagent_stop"] == HookEventType.SUBAGENT_STOP - - def test_event_type_map__subagent_start_maps_correctly(self): - assert EVENT_TYPE_MAP["subagent_start"] == HookEventType.SUBAGENT_START - - def test_event_type_map__todo_updated_maps_correctly(self): - assert EVENT_TYPE_MAP["todo_updated"] == HookEventType.TODO_UPDATED - - def test_event_type_map__message_updated_maps_correctly(self): - assert EVENT_TYPE_MAP["message_updated"] == HookEventType.MESSAGE_UPDATED - - def test_event_type_map__covers_all_opencode_event_types(self): - expected_keys = { - "pre_tool_use", - "post_tool_use", - "stop", - "subagent_stop", - "subagent_start", - "todo_updated", - "message_updated", - } - assert set(EVENT_TYPE_MAP.keys()) == expected_keys - - -class TestParseOpenCodeEvent: - def test_parse_opencode_event__pre_tool_use_returns_tool_event(self): - raw = {"tool": "Bash", "session_id": "s1", "call_id": "c1", "args": {"command": "ls"}} - result = parse_opencode_event("pre_tool_use", raw) - assert isinstance(result, OpenCodeToolEvent) - assert result.tool == "Bash" - assert result.session_id == "s1" - - def test_parse_opencode_event__post_tool_use_returns_tool_event_with_output(self): - raw = { - "tool": "Read", - "session_id": "s1", - "call_id": "c2", - "args": {"file_path": "/tmp/f.py"}, - "output": "file content", - "duration_ms": 42, - } - result = parse_opencode_event("post_tool_use", raw) - assert isinstance(result, OpenCodeToolEvent) - assert result.output == "file content" - assert result.duration_ms == 42 - - def test_parse_opencode_event__todo_updated_returns_todo_event(self): - raw = { - "session_id": "s1", - "todos": [{"content": "Fix bug", "status": "pending", "priority": "high"}], - } - result = parse_opencode_event("todo_updated", raw) - assert isinstance(result, OpenCodeTodoEvent) - assert len(result.todos) == 1 - assert result.todos[0].content == "Fix bug" - - def test_parse_opencode_event__message_updated_returns_message_event(self): - raw = { - "session_id": "s1", - "message_id": "m1", - "model_id": "claude-3-opus", - "agent": "general", - "tokens": {"input": 100, "output": 50, "reasoning": 0, "cache_read": 0, "cache_write": 0}, - "cost": 0.01, - } - result = parse_opencode_event("message_updated", raw) - assert isinstance(result, OpenCodeMessageEvent) - assert result.agent == "general" - assert result.tokens.input == 100 - - def test_parse_opencode_event__subagent_start_returns_session_event(self): - raw = {"session_id": "child-1", "parent_id": "parent-1", "agent": "explore"} - result = parse_opencode_event("subagent_start", raw) - assert isinstance(result, OpenCodeSessionEvent) - assert result.parent_id == "parent-1" - - def test_parse_opencode_event__stop_returns_stop_event(self): - raw = {"session_id": "s1", "agent": "general", "model_id": "claude-3-opus"} - result = parse_opencode_event("stop", raw) - assert isinstance(result, OpenCodeStopEvent) - - def test_parse_opencode_event__subagent_stop_returns_stop_event(self): - raw = {"session_id": "child-1", "parent_id": "parent-1", "agent": "explore"} - result = parse_opencode_event("subagent_stop", raw) - assert isinstance(result, OpenCodeStopEvent) - assert result.parent_id == "parent-1" - - def test_parse_opencode_event__unknown_type_raises_value_error(self): - with pytest.raises(ValueError, match="Unknown OpenCode event type"): - parse_opencode_event("invalid_type", {"session_id": "s1"}) - - -class TestGetToolType: - def test_get_tool_type__maps_known_tool(self): - assert get_tool_type("Bash") == ToolType.BASH - - def test_get_tool_type__maps_read_tool(self): - assert get_tool_type("Read") == ToolType.READ - - def test_get_tool_type__maps_edit_tool(self): - assert get_tool_type("Edit") == ToolType.EDIT - - def test_get_tool_type__unknown_tool_returns_other(self): - assert get_tool_type("SomeFutureTool") == ToolType.OTHER - -class TestGetSessionId: - def test_get_session_id__from_tool_event(self): - event = OpenCodeToolEvent(tool="Bash", session_id="s1", call_id="c1") - assert _get_session_id(event) == "s1" - def test_get_session_id__from_stop_event(self): - event = OpenCodeStopEvent(session_id="s2") - assert _get_session_id(event) == "s2" - - def test_get_session_id__from_todo_event(self): - event = OpenCodeTodoEvent(session_id="s3") - assert _get_session_id(event) == "s3" - - def test_get_session_id__from_message_event(self): - event = OpenCodeMessageEvent(session_id="s4", message_id="m1") - assert _get_session_id(event) == "s4" - - def test_get_session_id__from_session_event(self): - event = OpenCodeSessionEvent(session_id="s5") - assert _get_session_id(event) == "s5" - - -class TestGetParentId: - def test_get_parent_id__from_session_event_with_parent(self): - event = OpenCodeSessionEvent(session_id="child", parent_id="parent") - assert _get_parent_id(event) == "parent" - - def test_get_parent_id__from_session_event_without_parent(self): - event = OpenCodeSessionEvent(session_id="main") - assert _get_parent_id(event) is None - - def test_get_parent_id__from_stop_event_with_parent(self): - event = OpenCodeStopEvent(session_id="child", parent_id="parent") - assert _get_parent_id(event) == "parent" - - def test_get_parent_id__from_tool_event_returns_none(self): - event = OpenCodeToolEvent(tool="Bash", session_id="s1", call_id="c1") - assert _get_parent_id(event) is None - - def test_get_parent_id__from_todo_event_returns_none(self): - event = OpenCodeTodoEvent(session_id="s1") - assert _get_parent_id(event) is None - - def test_get_parent_id__from_message_event_returns_none(self): - event = OpenCodeMessageEvent(session_id="s1", message_id="m1") - assert _get_parent_id(event) is None - - -class TestHandleOpenCodeStop: - def test_handle_opencode_stop__non_stop_event_returns_zero(self): - """Non-OpenCodeStopEvent types should return 0 immediately.""" - tool_event = OpenCodeToolEvent(tool="Bash", session_id="s1", call_id="c1") - result = _handle_opencode_stop("s1", tool_event, "stop") - assert result == 0 - - def test_handle_opencode_stop__stop_event_calls_handle_stop_event(self): - """Stop events should delegate to the shared handle_stop_event pipeline.""" - stop_event = OpenCodeStopEvent(session_id="s1", agent="general") - - with patch("slopometry.core.hook_handler.handle_stop_event", return_value=0) as mock_handle: - result = _handle_opencode_stop("s1", stop_event, "stop") - - assert result == 0 - mock_handle.assert_called_once() - call_args = mock_handle.call_args - assert call_args[0][0] == "s1" - stop_input = call_args[0][1] - assert stop_input.session_id == "s1" - assert stop_input.stop_hook_active is False - - def test_handle_opencode_stop__subagent_stop_also_calls_handle_stop_event(self): - """Subagent stop events should also go through the feedback pipeline.""" - stop_event = OpenCodeStopEvent(session_id="child-1", parent_id="parent-1", agent="explore") - - with patch("slopometry.core.hook_handler.handle_stop_event", return_value=2) as mock_handle: - result = _handle_opencode_stop("child-1", stop_event, "subagent_stop") - - assert result == 2 - mock_handle.assert_called_once() +@pytest.fixture(autouse=True) +def _isolate_db(tmp_path): + """Redirect database and session state to temp directories so smoke tests don't pollute the real DB.""" + db_path = tmp_path / "test.db" + state_dir = tmp_path / "state" + state_dir.mkdir() + + original_init = SessionManager.__init__ + + def _isolated_init(self_inner, source: str = "opencode"): + original_init(self_inner, source=source) + self_inner.state_dir = state_dir + + with ( + patch("slopometry.core.settings.settings.database_path", db_path), + patch.object(SessionManager, "__init__", _isolated_init), + ): + yield + + +def _init_git_repo(path: Path) -> None: + """Initialize a git repo for testing.""" + subprocess.run(["git", "init"], cwd=path, capture_output=True, check=True) + subprocess.run( + ["git", "config", "--local", "user.email", "test@example.com"], + cwd=path, + capture_output=True, + check=True, + ) + subprocess.run( + ["git", "config", "--local", "user.name", "Test"], + cwd=path, + capture_output=True, + check=True, + ) + subprocess.run( + ["git", "config", "--local", "commit.gpgsign", "false"], + cwd=path, + capture_output=True, + check=True, + ) + + +from slopometry.core.opencode_handler import handle_opencode_hook class TestHandleOpenCodeHookSmoke: @@ -233,47 +70,6 @@ class TestHandleOpenCodeHookSmoke: the real implementation uses select.select() which requires a real file descriptor. """ - @pytest.fixture(autouse=True) - def _isolate_db(self, tmp_path): - """Redirect database and session state to temp directories so smoke tests don't pollute the real DB.""" - db_path = tmp_path / "test.db" - state_dir = tmp_path / "state" - state_dir.mkdir() - - original_init = SessionManager.__init__ - - def _isolated_init(self_inner): - original_init(self_inner) - self_inner.state_dir = state_dir - - with ( - patch("slopometry.core.settings.settings.database_path", db_path), - patch.object(SessionManager, "__init__", _isolated_init), - ): - yield - - def _init_git_repo(self, path: Path) -> None: - """Initialize a git repo for testing.""" - subprocess.run(["git", "init"], cwd=path, capture_output=True, check=True) - subprocess.run( - ["git", "config", "--local", "user.email", "test@example.com"], - cwd=path, - capture_output=True, - check=True, - ) - subprocess.run( - ["git", "config", "--local", "user.name", "Test"], - cwd=path, - capture_output=True, - check=True, - ) - subprocess.run( - ["git", "config", "--local", "commit.gpgsign", "false"], - cwd=path, - capture_output=True, - check=True, - ) - def test_handle_opencode_hook__pre_tool_use_does_not_crash(self): """Smoke test: pre_tool_use event should not crash.""" @@ -335,7 +131,7 @@ def test_handle_opencode_hook__stop_does_not_crash(self): with tempfile.TemporaryDirectory() as tmpdir: tmppath = Path(tmpdir) - self._init_git_repo(tmppath) + _init_git_repo(tmppath) (tmppath / "test.py").write_text("x = 1") subprocess.run(["git", "add", "."], cwd=tmppath, capture_output=True) subprocess.run(["git", "commit", "-m", "init"], cwd=tmppath, capture_output=True) diff --git a/tests/test_opencode_memory_integration.py b/tests/test_opencode_memory_integration.py new file mode 100644 index 0000000..f9f6a87 --- /dev/null +++ b/tests/test_opencode_memory_integration.py @@ -0,0 +1,249 @@ +"""Integration test: real OpenCode session → memory extraction → freshness → save. + +Reads the actual OpenCode storage tree on the test host (if present), +extracts conversation text via MemoryExtractor.extract_memories_from_opencode_session, +then runs the full freshness + save pipeline with stubbed LLM candidates to +verify the end-to-end wiring of CLI/logic without depending on a live chat LLM. +""" + +import json +import os +from datetime import datetime +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from slopometry.core.database import EventDatabase +from slopometry.core.models.memory import MemoryCandidate, MemoryEntry, MemoryType +from slopometry.solo.services.memory_extractor import MemoryExtractor +from slopometry.solo.services.memory_freshness import MemoryFreshnessValidator +from slopometry.solo.services.memory_service import MemoryService + + +OPENCODE_STORAGE = Path(os.environ.get("OPENCODE_STORAGE", "/home/tensor-templar/.local/share/opencode/storage")) + + +@pytest.fixture +def real_opencode_session() -> tuple[str, Path]: + """Pick the first OpenCode session that has a message directory, or skip.""" + if not OPENCODE_STORAGE.is_dir(): + pytest.skip(f"OpenCode storage not found at {OPENCODE_STORAGE}") + message_root = OPENCODE_STORAGE / "message" + if not message_root.is_dir(): + pytest.skip(f"No message directory at {message_root}") + sessions = sorted(p.name for p in message_root.iterdir() if p.is_dir()) + if not sessions: + pytest.skip("No OpenCode sessions with messages found") + return sessions[0], OPENCODE_STORAGE + + +@pytest.fixture +def fresh_memory_service(tmp_path: Path) -> MemoryService: + """MemoryService backed by a fresh tmp database, isolated from the global one.""" + db = EventDatabase(db_path=tmp_path / "test.db") + return MemoryService(db=db) + + +class TestRealOpenCodeSessionExtraction: + def test_extract_produces_non_empty_conversation(self, real_opencode_session: tuple[str, Path]): + session_id, storage_root = real_opencode_session + extractor = MemoryExtractor("https://llm.example/v1", "model-x") + text = extractor.extract_memories_from_opencode_session(session_id, storage_root) + assert text.strip() + assert "USER:" in text or "ASSISTANT:" in text, ( + f"Expected USER:/ASSISTANT: markers in reconstructed text from {session_id}, " + f"got first 200 chars: {text[:200]!r}" + ) + + def test_extracted_text_contains_some_tool_markers(self, real_opencode_session: tuple[str, Path]): + session_id, storage_root = real_opencode_session + extractor = MemoryExtractor("https://llm.example/v1", "model-x") + text = extractor.extract_memories_from_opencode_session(session_id, storage_root) + has_tool = "TOOL:" in text + if not has_tool: + pytest.skip(f"Session {session_id} has no tool parts (text-only conversation)") + + +class TestEndToEndFreshnessPipeline: + """Full pipeline: stubbed LLM candidates → freshness validator → save + superseded_by.""" + + def _stub_judge(self, action: str, merged_content: str | None = None) -> MagicMock: + payload = {"action": action, "reason": "stub"} + if merged_content: + payload["merged_content"] = merged_content + mock = MagicMock() + mock.choices = [MagicMock(message=MagicMock(content=json.dumps(payload)))] + return mock + + def test_supersede_links_old_to_new_via_superseded_by(self, fresh_memory_service: MemoryService): + existing = MemoryEntry( + id="old-1", + session_id="claude_code:s_prev", + project_dir="/test/proj", + memory_type=MemoryType.PROJECT, + content="Project uses radon for Python complexity metrics", + embedding=[1.0, 0.0, 0.0], + created_at=datetime.now(), + ) + fresh_memory_service.save_memory(existing) + + new_candidate = MemoryCandidate( + memory_type=MemoryType.PROJECT, + content="Project uses rust-code-analysis (switched from radon in 2026)", + embedding=[0.99, 0.14, 0.0], + ) + + validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") + with patch("openai.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = self._stub_judge("supersede") + decisions, _ = validator.validate([new_candidate], [existing]) + + assert len(decisions) == 1 + assert decisions[0].action == "supersede" + + from slopometry.core.models.memory import MemoryCreateRequest + + saved = fresh_memory_service.save_memories( + MemoryCreateRequest( + session_id="claude_code:test", + project_dir="/test/proj", + candidates=[new_candidate], + ) + ) + new_id = saved[0].id + + fresh_memory_service.update_memory(existing.id, superseded_by=new_id) + + all_memories = fresh_memory_service.get_memories(project_dir="/test/proj", limit=100) + old_updated = next(m for m in all_memories if m.id == existing.id) + new_loaded = next(m for m in all_memories if m.id == new_id) + assert old_updated.superseded_by == new_id + assert new_loaded.superseded_by is None + + def test_merge_action_rewrites_candidate_content(self, fresh_memory_service: MemoryService): + existing = MemoryEntry( + id="old-2", + session_id="claude_code:s_prev2", + project_dir="/test/proj", + memory_type=MemoryType.PROJECT, + content="Project uses radon", + embedding=[1.0, 0.0, 0.0], + created_at=datetime.now(), + ) + fresh_memory_service.save_memory(existing) + + new_candidate = MemoryCandidate( + memory_type=MemoryType.PROJECT, + content="Project switched to rust-code-analysis in 2026", + embedding=[0.99, 0.14, 0.0], + ) + + merged_text = "Project uses rust-code-analysis (switched from radon in 2026)" + + validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") + with patch("openai.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = self._stub_judge( + "merge", merged_content=merged_text + ) + decisions, _ = validator.validate([new_candidate], [existing]) + + assert decisions[0].action == "merge" + assert decisions[0].merged_content == merged_text + + decisions[0].new_candidate.content = decisions[0].merged_content + + from slopometry.core.models.memory import MemoryCreateRequest + + saved = fresh_memory_service.save_memories( + MemoryCreateRequest( + session_id="claude_code:test", + project_dir="/test/proj", + candidates=[decisions[0].new_candidate], + ) + ) + assert saved[0].content == merged_text + all_memories = fresh_memory_service.get_memories(project_dir="/test/proj", limit=100) + old_loaded = next(m for m in all_memories if m.id == existing.id) + assert old_loaded.superseded_by is None + + def test_dedupe_action_skips_new_save(self, fresh_memory_service: MemoryService): + existing = MemoryEntry( + id="old-3", + session_id="claude_code:s_prev3", + project_dir="/test/proj", + memory_type=MemoryType.PROJECT, + content="User prefers pyright", + embedding=[1.0, 0.0, 0.0], + created_at=datetime.now(), + ) + fresh_memory_service.save_memory(existing) + + new_candidate = MemoryCandidate( + memory_type=MemoryType.PROJECT, + content="User uses pyright type checker", + embedding=[0.99, 0.14, 0.0], + ) + + validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") + with patch("openai.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = self._stub_judge("dedupe") + decisions, _ = validator.validate([new_candidate], [existing]) + + assert len(decisions) == 1 + assert decisions[0].action == "dedupe" + + deduped_candidates: list[MemoryCandidate] = [] + for d in decisions: + if d.action == "dedupe": + if d.new_candidate.metadata is None: + d.new_candidate.metadata = {} + d.new_candidate.metadata["deduped_against"] = d.existing_memory.id + continue + deduped_candidates.append(d.new_candidate) + + assert deduped_candidates == [] + all_memories = fresh_memory_service.get_memories(project_dir="/test/proj", limit=100) + assert len(all_memories) == 1 + assert all_memories[0].id == existing.id + + def test_keep_both_saves_both_independently(self, fresh_memory_service: MemoryService): + existing = MemoryEntry( + id="old-4", + session_id="claude_code:s_prev4", + project_dir="/test/proj", + memory_type=MemoryType.PROJECT, + content="Project uses rust-code-analysis", + embedding=[1.0, 0.0, 0.0], + created_at=datetime.now(), + ) + fresh_memory_service.save_memory(existing) + + new_candidate = MemoryCandidate( + memory_type=MemoryType.PROJECT, + content="User prefers dark mode", + embedding=[0.95, 0.31, 0.0], + ) + + validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") + with patch("openai.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = self._stub_judge("keep_both") + decisions, _ = validator.validate([new_candidate], [existing]) + + assert len(decisions) == 1 + assert decisions[0].action == "keep_both" + + from slopometry.core.models.memory import MemoryCreateRequest + + fresh_memory_service.save_memories( + MemoryCreateRequest( + session_id="claude_code:test", + project_dir="/test/proj", + candidates=[decisions[0].new_candidate], + ) + ) + + all_memories = fresh_memory_service.get_memories(project_dir="/test/proj", limit=100) + assert len(all_memories) == 2 + old_loaded = next(m for m in all_memories if m.id == existing.id) + assert old_loaded.superseded_by is None diff --git a/tests/test_plan_analyzer.py b/tests/test_plan_analyzer.py index b5ecf6e..92d95af 100644 --- a/tests/test_plan_analyzer.py +++ b/tests/test_plan_analyzer.py @@ -1,7 +1,7 @@ from datetime import datetime -from slopometry.core.models.hook import ToolType from slopometry.core.plan_analyzer import PlanAnalyzer +from slopometry.core.protocol.adapters.claude_code import ToolType def test_increment_event_count__task_explore_increments_search_metrics() -> None: diff --git a/tests/test_posttooluse_validation.py b/tests/test_posttooluse_validation.py index 8ed6709..8ea4f76 100644 --- a/tests/test_posttooluse_validation.py +++ b/tests/test_posttooluse_validation.py @@ -1,13 +1,20 @@ -"""Tests for PostToolUseInput validation fix.""" +"""Tests for ClaudeCodeAdapter.parse() across the three PostToolUse tool_response shapes. -from slopometry.core.models.hook import PostToolUseInput +Migrated from tests of the removed `PostToolUseInput` model. The adapter now +handles wire-format shape variability internally via `extra="allow"` semantics — +these tests verify the parser preserves dict / str / list shapes verbatim and +handles the empty-list edge case for NotebookRead. +""" +from slopometry.core.protocol.adapters.claude_code import ClaudeCodeAdapter -class TestPostToolUseInputValidation: - """Test PostToolUseInput model validation.""" - def test_posttooluse_with_dict_response__validates_correctly(self): - """Test that PostToolUseInput accepts dictionary responses.""" +class TestPostToolUseAdapterValidation: + """Test ClaudeCodeAdapter.parse() for PostToolUse payload shapes.""" + + def test_posttooluse_with_dict_response__preserves_dict(self): + """Read/Edit tool_response is a dict — preserved verbatim.""" + adapter = ClaudeCodeAdapter() data = { "session_id": "test_session", "transcript_path": "/path/to/transcript", @@ -16,11 +23,14 @@ def test_posttooluse_with_dict_response__validates_correctly(self): "tool_response": {"success": True, "content": "file content"}, } - input_model = PostToolUseInput(**data) - assert input_model.tool_response == {"success": True, "content": "file content"} + event = adapter.parse(data, working_directory="/repo") + + assert event.tool_call is not None + assert event.tool_call.output == {"success": True, "content": "file content"} - def test_posttooluse_with_str_response__validates_correctly(self): - """Test that PostToolUseInput accepts string responses.""" + def test_posttooluse_with_str_response__preserves_str(self): + """Bash tool_response is a stdout string — preserved verbatim.""" + adapter = ClaudeCodeAdapter() data = { "session_id": "test_session", "transcript_path": "/path/to/transcript", @@ -29,12 +39,18 @@ def test_posttooluse_with_str_response__validates_correctly(self): "tool_response": "file1.txt\nfile2.txt\n", } - input_model = PostToolUseInput(**data) - assert input_model.tool_response == "file1.txt\nfile2.txt\n" + event = adapter.parse(data, working_directory="/repo") + + assert event.tool_call is not None + assert event.tool_call.output == "file1.txt\nfile2.txt\n" - def test_posttooluse_with_list_response__validates_correctly(self): - """Test that PostToolUseInput accepts list responses (like NotebookRead).""" - # This simulates the actual NotebookRead response that was causing the error + def test_posttooluse_with_list_response__preserves_cells(self): + """NotebookRead tool_response is a list of cells — preserved verbatim. + + This was the original bug: the old PostToolUseInput pydantic model + rejected list-typed tool_response. The adapter now handles any shape. + """ + adapter = ClaudeCodeAdapter() notebook_cells = [ { "cellType": "markdown", @@ -58,17 +74,20 @@ def test_posttooluse_with_list_response__validates_correctly(self): "tool_response": notebook_cells, } - input_model = PostToolUseInput(**data) - assert input_model.tool_response == notebook_cells - assert isinstance(input_model.tool_response, list) - assert len(input_model.tool_response) == 2 - cell0 = input_model.tool_response[0] - cell1 = input_model.tool_response[1] + event = adapter.parse(data, working_directory="/repo") + + assert event.tool_call is not None + assert event.tool_call.output == notebook_cells + assert isinstance(event.tool_call.output, list) + assert len(event.tool_call.output) == 2 + cell0 = event.tool_call.output[0] + cell1 = event.tool_call.output[1] assert isinstance(cell0, dict) and cell0["cellType"] == "markdown" assert isinstance(cell1, dict) and cell1["cellType"] == "code" - def test_posttooluse_with_empty_list_response__validates_correctly(self): - """Test that PostToolUseInput accepts empty list responses.""" + def test_posttooluse_with_empty_list_response__preserves_empty_list(self): + """Empty NotebookRead (no cells) — preserved as empty list, no crash.""" + adapter = ClaudeCodeAdapter() data = { "session_id": "test_session", "transcript_path": "/path/to/transcript", @@ -77,5 +96,7 @@ def test_posttooluse_with_empty_list_response__validates_correctly(self): "tool_response": [], } - input_model = PostToolUseInput(**data) - assert input_model.tool_response == [] + event = adapter.parse(data, working_directory="/repo") + + assert event.tool_call is not None + assert event.tool_call.output == [] diff --git a/tests/test_preflight.py b/tests/test_preflight.py new file mode 100644 index 0000000..c8835fe --- /dev/null +++ b/tests/test_preflight.py @@ -0,0 +1,119 @@ +"""Tests for the preflight endpoint health check.""" + +from unittest.mock import MagicMock, patch + +import pytest +from click.testing import CliRunner + +from slopometry.cli import cli +from slopometry.solo.cli.preflight import preflight_endpoints + + +class TestPreflightEndpoints: + def test_raises_click_exception_when_chat_endpoint_down(self): + chat_err = "chat LLM (https://chat.example/v1): APIConnectionError: no available server" + embed_ok = None + with patch("slopometry.solo.cli.preflight._check_endpoint", side_effect=[chat_err, embed_ok]): + with pytest.raises(Exception) as exc_info: + preflight_endpoints( + chat_endpoint="https://chat.example/v1", + embedding_endpoint="https://embed.example/v1", + chat_api_key="k1", + embedding_api_key="k2", + ) + assert "chat.example" in str(exc_info.value) + assert "embed.example" not in str(exc_info.value) + + def test_raises_click_exception_when_embedding_endpoint_down(self): + chat_ok = None + embed_err = "embedding (https://embed.example/v1): APIConnectionError: refused" + with patch("slopometry.solo.cli.preflight._check_endpoint", side_effect=[chat_ok, embed_err]): + with pytest.raises(Exception) as exc_info: + preflight_endpoints( + chat_endpoint="https://chat.example/v1", + embedding_endpoint="https://embed.example/v1", + chat_api_key="k1", + embedding_api_key="k2", + ) + assert "embed.example" in str(exc_info.value) + + def test_raises_with_both_errors_listed(self): + chat_err = "chat LLM: down" + embed_err = "embedding: down" + with patch("slopometry.solo.cli.preflight._check_endpoint", side_effect=[chat_err, embed_err]): + with pytest.raises(Exception) as exc_info: + preflight_endpoints( + chat_endpoint="https://chat.example/v1", + embedding_endpoint="https://embed.example/v1", + chat_api_key="k1", + embedding_api_key="k2", + ) + msg = str(exc_info.value) + assert "chat LLM: down" in msg + assert "embedding: down" in msg + + def test_passes_silently_when_both_endpoints_reachable(self): + with patch("slopometry.solo.cli.preflight._check_endpoint", return_value=None): + preflight_endpoints( + chat_endpoint="https://chat.example/v1", + embedding_endpoint="https://embed.example/v1", + chat_api_key="k1", + embedding_api_key="k2", + ) + + +class TestCheckEndpoint: + def test_returns_none_when_models_list_succeeds(self): + mock_client = MagicMock() + mock_client.models.list.return_value = MagicMock() + with patch("openai.OpenAI", return_value=mock_client) as mock_openai: + from slopometry.solo.cli.preflight import _check_endpoint + + result = _check_endpoint("test", "https://x/v1", "key") + assert result is None + mock_openai.assert_called_once_with(base_url="https://x/v1", api_key="key") + + def test_returns_error_string_on_exception(self): + mock_client = MagicMock() + mock_client.models.list.side_effect = RuntimeError("refused") + with patch("openai.OpenAI", return_value=mock_client): + from slopometry.solo.cli.preflight import _check_endpoint + + result = _check_endpoint("test", "https://x/v1", "key") + assert result is not None + assert "test" in result + assert "https://x/v1" in result + assert "RuntimeError" in result + + +class TestFindMemoriesPreflightIntegration: + def test_dry_run_skips_preflight(self, monkeypatch: pytest.MonkeyPatch): + from slopometry.core.settings import settings + + monkeypatch.setattr(settings, "offline_mode", True) + runner = CliRunner() + result = runner.invoke( + cli, + [ + "solo", + "find-memories", + "--project-dir", + "/tmp", + "--dry-run", + "--llm-endpoint", + "https://chat.example/v1", + ], + ) + assert "offline_mode" not in result.output or result.exit_code == 0 + + def test_offline_mode_blocks_before_preflight(self, monkeypatch: pytest.MonkeyPatch): + from slopometry.core.settings import settings + + monkeypatch.setattr(settings, "offline_mode", True) + runner = CliRunner() + result = runner.invoke( + cli, + ["solo", "find-memories", "--project-dir", "/tmp"], + ) + assert result.exit_code != 0 + assert "offline_mode" in result.output.lower() or "offline" in result.output.lower() diff --git a/tests/test_protocol_events.py b/tests/test_protocol_events.py new file mode 100644 index 0000000..8128129 --- /dev/null +++ b/tests/test_protocol_events.py @@ -0,0 +1,266 @@ +"""Tests for the canonical protocol events defined in `slopometry.core.protocol.events`. + +Tests both happy-path validation (field defaults, type coercion) and the +`extra="forbid"` contract: downstream analyzers and storage must reject unknown +fields rather than silently ignore them, so wire-format drift is caught early. +""" + +from datetime import datetime + +import pytest +from pydantic import ValidationError + +from slopometry.core.models.hook import Project, ProjectSource +from slopometry.core.models.protocol.events import ( + AbstractEventSource, + AbstractEventType, + AbstractHookEvent, + ToolCallPayload, +) + + +class TestAbstractEventSource: + """Tests for the AbstractEventSource enum.""" + + def test_event_source__claude_code_value(self): + """Claude Code is the canonical first source.""" + assert AbstractEventSource.CLAUDE_CODE == "claude_code" + + def test_event_source__opencode_value(self): + """OpenCode is the second canonical source.""" + assert AbstractEventSource.OPENCODE == "opencode" + + +class TestAbstractEventType: + """Tests for the AbstractEventType enum.""" + + def test_event_type__all_event_types_are_snake_case(self): + """Abstract event-type values must be snake_case for stable DB column strings.""" + for et in AbstractEventType: + assert "_" in et.value or et.value.islower(), f"{et.name} should be snake_case, got {et.value!r}" + + def test_event_type__tool_call_started(self): + assert AbstractEventType.TOOL_CALL_STARTED == "tool_call_started" + + def test_event_type__tool_call_completed(self): + assert AbstractEventType.TOOL_CALL_COMPLETED == "tool_call_completed" + + def test_event_type__notification(self): + assert AbstractEventType.NOTIFICATION == "notification" + + def test_event_type__turn_completed(self): + assert AbstractEventType.TURN_COMPLETED == "turn_completed" + + def test_event_type__subagent_completed(self): + assert AbstractEventType.SUBAGENT_COMPLETED == "subagent_completed" + + def test_event_type__todo_updated(self): + assert AbstractEventType.TODO_UPDATED == "todo_updated" + + def test_event_type__message_updated(self): + assert AbstractEventType.MESSAGE_UPDATED == "message_updated" + + def test_event_type__subagent_started(self): + assert AbstractEventType.SUBAGENT_STARTED == "subagent_started" + + +class TestToolCallPayloadValidation: + """Tests for ToolCallPayload validation and field defaults.""" + + def test_tool_call_payload__requires_tool_name_and_input(self): + """tool_name and input are required — others are optional.""" + payload = ToolCallPayload(tool_name="Read", input={"file_path": "/x.py"}) + assert payload.tool_name == "Read" + assert payload.input == {"file_path": "/x.py"} + assert payload.tool_type is None + assert payload.output is None + assert payload.duration_ms is None + assert payload.exit_code is None + assert payload.error_message is None + + def test_tool_call_payload__all_optional_fields_default_to_none(self): + """Optional fields must default to None (not absent) so DB writes are consistent.""" + payload = ToolCallPayload(tool_name="Bash", input={"command": "ls"}) + for field in ("tool_type", "output", "duration_ms", "exit_code", "error_message"): + assert getattr(payload, field) is None + + def test_tool_call_payload__accepts_dict_input(self): + """Input can be an arbitrary dict — tool-specific shape, no schema enforcement.""" + payload = ToolCallPayload(tool_name="Read", input={"file_path": "/x.py", "limit": 100}) + assert payload.input == {"file_path": "/x.py", "limit": 100} + + def test_tool_call_payload__output_accepts_arbitrary_shape(self): + """Output can be dict, str, list — the wire shape varies by tool.""" + for output in ( + {"success": True, "content": "x"}, + "stdout line 1\nstdout line 2", + [{"cellType": "code", "source": "print('hi')"}], + None, + ): + payload = ToolCallPayload(tool_name="T", input={}, output=output) + assert payload.output == output + + def test_tool_call_payload__rejects_unknown_field(self): + """extra='forbid' — unknown fields surface as ValidationError, not silent drop.""" + with pytest.raises(ValidationError): + ToolCallPayload(tool_name="Read", input={}, unknown_field="bogus") # pyright: ignore[reportCallIssue] + + +class TestAbstractHookEventValidation: + """Tests for AbstractHookEvent validation and field defaults.""" + + def test_hook_event__required_fields(self): + """session_id, event_type, source, working_directory are required.""" + event = AbstractHookEvent( + session_id="s1", + event_type=AbstractEventType.NOTIFICATION, + source=AbstractEventSource.CLAUDE_CODE, + working_directory="/repo", + ) + assert event.session_id == "s1" + assert event.event_type == AbstractEventType.NOTIFICATION + assert event.source == AbstractEventSource.CLAUDE_CODE + assert event.working_directory == "/repo" + + def test_hook_event__timestamp_defaults_to_now(self): + """timestamp defaults to datetime.now() at construction time.""" + before = datetime.now() + event = AbstractHookEvent( + session_id="s1", + event_type=AbstractEventType.NOTIFICATION, + source=AbstractEventSource.CLAUDE_CODE, + working_directory="/repo", + ) + after = datetime.now() + assert before <= event.timestamp <= after + + def test_hook_event__sequence_number_defaults_to_zero(self): + """SessionManager assigns sequence numbers; default 0 lets pre-assignment construction work.""" + event = AbstractHookEvent( + session_id="s1", + event_type=AbstractEventType.NOTIFICATION, + source=AbstractEventSource.CLAUDE_CODE, + working_directory="/repo", + ) + assert event.sequence_number == 0 + + def test_hook_event__metadata_defaults_to_empty_dict(self): + """metadata defaults to {} — DB column accepts empty JSON safely.""" + event = AbstractHookEvent( + session_id="s1", + event_type=AbstractEventType.NOTIFICATION, + source=AbstractEventSource.CLAUDE_CODE, + working_directory="/repo", + ) + assert event.metadata == {} + + def test_hook_event__parent_session_id_defaults_to_none(self): + """Top-level sessions have no parent; subagent sessions populate this.""" + event = AbstractHookEvent( + session_id="s1", + event_type=AbstractEventType.NOTIFICATION, + source=AbstractEventSource.CLAUDE_CODE, + working_directory="/repo", + ) + assert event.parent_session_id is None + + def test_hook_event__transcript_location_defaults_to_none(self): + """transcript_location is harness-specific; absent for OpenCode.""" + event = AbstractHookEvent( + session_id="s1", + event_type=AbstractEventType.NOTIFICATION, + source=AbstractEventSource.OPENCODE, + working_directory="/repo", + ) + assert event.transcript_location is None + + def test_hook_event__tool_call_defaults_to_none(self): + """Tool calls are nullable — Notification and Stop events have no tool_call.""" + event = AbstractHookEvent( + session_id="s1", + event_type=AbstractEventType.NOTIFICATION, + source=AbstractEventSource.CLAUDE_CODE, + working_directory="/repo", + ) + assert event.tool_call is None + + def test_hook_event__project_defaults_to_none(self): + """Project attribution is optional — fire-and-forget hooks may lack it.""" + event = AbstractHookEvent( + session_id="s1", + event_type=AbstractEventType.NOTIFICATION, + source=AbstractEventSource.CLAUDE_CODE, + working_directory="/repo", + ) + assert event.project is None + + def test_hook_event__git_state_defaults_to_none(self): + """Git state is captured selectively (first event + turn complete), not always.""" + event = AbstractHookEvent( + session_id="s1", + event_type=AbstractEventType.NOTIFICATION, + source=AbstractEventSource.CLAUDE_CODE, + working_directory="/repo", + ) + assert event.git_state is None + + def test_hook_event__accepts_full_event_with_tool_call_and_project(self): + """A complete tool-call event with all enrichments validates cleanly.""" + event = AbstractHookEvent( + session_id="s1", + parent_session_id=None, + event_type=AbstractEventType.TOOL_CALL_COMPLETED, + source=AbstractEventSource.CLAUDE_CODE, + timestamp=datetime(2025, 1, 1, 12, 0), + tool_call=ToolCallPayload( + tool_name="Read", + tool_type="Read", + input={"file_path": "/x.py"}, + output={"success": True}, + duration_ms=42, + ), + metadata={"transcript_path": "/tmp/t.jsonl", "tool_response": {"success": True}}, + working_directory="/repo", + project=Project(name="my-project", source=ProjectSource.GIT), + transcript_location="/tmp/t.jsonl", + sequence_number=5, + ) + assert event.parent_session_id is None + assert event.tool_call is not None + assert event.tool_call.tool_name == "Read" + assert event.tool_call.duration_ms == 42 + assert event.project is not None + assert event.project.name == "my-project" + assert event.transcript_location == "/tmp/t.jsonl" + assert event.sequence_number == 5 + + def test_hook_event__rejects_unknown_field(self): + """extra='forbid' on AbstractHookEvent — wire-format drift surfaces immediately.""" + with pytest.raises(ValidationError): + AbstractHookEvent( + session_id="s1", + event_type=AbstractEventType.NOTIFICATION, + source=AbstractEventSource.CLAUDE_CODE, + working_directory="/repo", + legacy_field="ignored", # pyright: ignore[reportCallIssue] + ) + + def test_hook_event__source_accepts_only_known_enum_values(self): + """AbstractEventSource is a closed enum — free-form strings fail validation.""" + with pytest.raises(ValidationError): + AbstractHookEvent( + session_id="s1", + event_type=AbstractEventType.NOTIFICATION, + source="some_other_agent", # type: ignore[arg-type] + working_directory="/repo", + ) + + def test_hook_event__event_type_accepts_only_known_enum_values(self): + """AbstractEventType is a closed enum — wire-format drift fails validation.""" + with pytest.raises(ValidationError): + AbstractHookEvent( + session_id="s1", + event_type="custom_event", # type: ignore[arg-type] + source=AbstractEventSource.CLAUDE_CODE, + working_directory="/repo", + ) diff --git a/tests/test_protocol_session.py b/tests/test_protocol_session.py new file mode 100644 index 0000000..d2f8e36 --- /dev/null +++ b/tests/test_protocol_session.py @@ -0,0 +1,136 @@ +"""Tests for SessionManager — per-source sequence numbering and legacy-file migration. + +SessionManager is constructed with an explicit `source` so concurrent harnesses +don't share sequence state, and its `state_root` can be redirected to a tmp dir +for testing. On first construction for source='claude_code', it migrates any +legacy files at `~/.claude/slopometry/seq_*.txt` to +`~/.slopometry/sessions/claude_code/seq_*.txt`. + +The legacy/default state dirs are module-level constants computed at import +time from `Path.home()`, so the tests patch those constants directly rather +than `Path.home()`. Each sequence-numbering test uses its own tmp_path to +avoid cross-test contamination. +""" + +from slopometry.core.protocol import session as session_module +from slopometry.core.protocol.session import SessionManager + + +class TestSessionManagerSequenceNumbering: + """Tests for monotonic per-session sequence numbering.""" + + def test_get_next_sequence_number__first_call_returns_one(self, tmp_path): + """First event for a fresh session_id gets sequence_number=1.""" + sm = SessionManager(source="claude_code", state_root=tmp_path) + assert sm.get_next_sequence_number("fresh-session") == 1 + + def test_get_next_sequence_number__increments_monotonically(self, tmp_path): + """Subsequent events get sequence_number 2, 3, 4, ... for the same session.""" + sm = SessionManager(source="claude_code", state_root=tmp_path) + assert sm.get_next_sequence_number("s") == 1 + assert sm.get_next_sequence_number("s") == 2 + assert sm.get_next_sequence_number("s") == 3 + + def test_get_next_sequence_number__distinct_sessions_have_independent_counters(self, tmp_path): + """Sequence numbers reset per session — different sessions start from 1.""" + sm = SessionManager(source="claude_code", state_root=tmp_path) + assert sm.get_next_sequence_number("session-a") == 1 + assert sm.get_next_sequence_number("session-a") == 2 + assert sm.get_next_sequence_number("session-b") == 1 + assert sm.get_next_sequence_number("session-a") == 3 + + def test_get_next_sequence_number__survives_corrupt_sequence_file(self, tmp_path): + """A corrupt sequence file resets to 1 (logs but does not raise).""" + sm = SessionManager(source="claude_code", state_root=tmp_path) + seq_file = sm.state_dir / "seq_corrupt.txt" + seq_file.write_text("not-a-number") + assert sm.get_next_sequence_number("corrupt") == 1 + + def test_session_manager__state_dir_created_on_construction(self, tmp_path): + """The source-scoped state directory is created when SessionManager is built.""" + sm = SessionManager(source="claude_code", state_root=tmp_path) + assert sm.state_dir.exists() + assert sm.state_dir == tmp_path / "claude_code" + + def test_session_manager__source_isolates_state(self, tmp_path): + """claude_code and opencode state dirs are separate under the same root.""" + sm_cc = SessionManager(source="claude_code", state_root=tmp_path) + sm_oc = SessionManager(source="opencode", state_root=tmp_path) + assert sm_cc.state_dir != sm_oc.state_dir + assert sm_cc.state_dir.name == "claude_code" + assert sm_oc.state_dir.name == "opencode" + + def test_session_manager__sequence_persists_across_instances(self, tmp_path): + """The sequence number on disk is the source of truth — a new SessionManager reads it back.""" + sm1 = SessionManager(source="claude_code", state_root=tmp_path) + sm1.get_next_sequence_number("persist-s") + sm1.get_next_sequence_number("persist-s") + sm2 = SessionManager(source="claude_code", state_root=tmp_path) + assert sm2.get_next_sequence_number("persist-s") == 3 + + +class TestSessionManagerLegacyMigration: + """Tests for migration of `~/.claude/slopometry/seq_*.txt` legacy files. + + `_LEGACY_STATE_DIR` is a module-level constant resolved at import time from + `Path.home()`. We patch it directly to point at a tmp dir so the test is + hermetic and doesn't touch the real user's `~/.claude` directory. + """ + + def test_legacy_migration__moves_seq_files_to_new_state_dir(self, tmp_path, monkeypatch): + """A seq_*.txt file in the legacy dir is moved to /claude_code/.""" + legacy_dir = tmp_path / "legacy_claude_slopometry" + legacy_dir.mkdir() + (legacy_dir / "seq_legacy-session.txt").write_text("5") + + new_root = tmp_path / "new_root" + monkeypatch.setattr(session_module, "_LEGACY_STATE_DIR", legacy_dir) + + SessionManager(source="claude_code", state_root=new_root) + + migrated = new_root / "claude_code" / "seq_legacy-session.txt" + assert migrated.exists(), "legacy file should be migrated to new state dir" + assert migrated.read_text() == "5" + assert not (legacy_dir / "seq_legacy-session.txt").exists(), "legacy file should be moved, not copied" + + def test_legacy_migration__does_not_move_files_for_other_sources(self, tmp_path, monkeypatch): + """opencode sessions do not migrate files from ~/.claude/slopometry/.""" + legacy_dir = tmp_path / "legacy_claude_slopometry_oc" + legacy_dir.mkdir() + (legacy_dir / "seq_oc-session.txt").write_text("2") + + new_root = tmp_path / "new_root_oc" + monkeypatch.setattr(session_module, "_LEGACY_STATE_DIR", legacy_dir) + + sm = SessionManager(source="opencode", state_root=new_root) + + assert sm.state_dir == new_root / "opencode" + assert not (new_root / "opencode" / "seq_oc-session.txt").exists() + assert (legacy_dir / "seq_oc-session.txt").exists(), "legacy file must not be moved for non-claude_code source" + + def test_legacy_migration__skips_when_legacy_dir_missing(self, tmp_path, monkeypatch): + """No legacy dir -> no migration, no error.""" + missing_legacy = tmp_path / "legacy_dir_does_not_exist" + new_root = tmp_path / "new_root_no_legacy" + monkeypatch.setattr(session_module, "_LEGACY_STATE_DIR", missing_legacy) + + sm = SessionManager(source="claude_code", state_root=new_root) + + assert sm.state_dir == new_root / "claude_code" + + def test_legacy_migration__skips_when_target_already_exists(self, tmp_path, monkeypatch): + """A pre-existing target file is preserved — the migration is idempotent.""" + legacy_dir = tmp_path / "legacy_claude_slopometry_idem" + legacy_dir.mkdir() + (legacy_dir / "seq_idem.txt").write_text("legacy-value") + + new_root = tmp_path / "new_root_idem" + target = new_root / "claude_code" / "seq_idem.txt" + target.parent.mkdir(parents=True) + target.write_text("newer-value") + + monkeypatch.setattr(session_module, "_LEGACY_STATE_DIR", legacy_dir) + + SessionManager(source="claude_code", state_root=new_root) + + assert target.read_text() == "newer-value", "existing target must not be overwritten" diff --git a/tests/test_qpe_calculator.py b/tests/test_qpe_calculator.py index 5e4632b..f090321 100644 --- a/tests/test_qpe_calculator.py +++ b/tests/test_qpe_calculator.py @@ -9,6 +9,7 @@ from slopometry.core.models.baseline import QPEScore from slopometry.core.models.complexity import ExtendedComplexityMetrics +from slopometry.core.models.core import SmellCounts from slopometry.summoner.services.qpe_calculator import ( calculate_qpe, compare_project_metrics, @@ -306,14 +307,14 @@ def test_smell_advantage__negative_delta_when_candidate_reduces_smells() -> None mi_normalized=0.6, smell_penalty=0.2, adjusted_quality=0.5, - smell_counts={"swallowed_exception": 5}, + smell_counts=SmellCounts.model_validate({"swallowed_exception": 5}), ) candidate = QPEScore( qpe=0.6, mi_normalized=0.7, smell_penalty=0.1, adjusted_quality=0.6, - smell_counts={"swallowed_exception": 2}, + smell_counts=SmellCounts.model_validate({"swallowed_exception": 2}), ) result = smell_advantage(baseline, candidate) @@ -332,14 +333,14 @@ def test_smell_advantage__positive_delta_when_candidate_adds_smells() -> None: mi_normalized=0.7, smell_penalty=0.1, adjusted_quality=0.6, - smell_counts={"hasattr_getattr": 2}, + smell_counts=SmellCounts.model_validate({"hasattr_getattr": 2}), ) candidate = QPEScore( qpe=0.5, mi_normalized=0.6, smell_penalty=0.2, adjusted_quality=0.5, - smell_counts={"hasattr_getattr": 7}, + smell_counts=SmellCounts.model_validate({"hasattr_getattr": 7}), ) result = smell_advantage(baseline, candidate) @@ -355,14 +356,14 @@ def test_smell_advantage__handles_asymmetric_smell_sets() -> None: mi_normalized=0.6, smell_penalty=0.2, adjusted_quality=0.5, - smell_counts={"swallowed_exception": 3}, + smell_counts=SmellCounts.model_validate({"swallowed_exception": 3}), ) candidate = QPEScore( qpe=0.6, mi_normalized=0.7, smell_penalty=0.1, adjusted_quality=0.6, - smell_counts={"hasattr_getattr": 2}, + smell_counts=SmellCounts.model_validate({"hasattr_getattr": 2}), ) result = smell_advantage(baseline, candidate) @@ -388,14 +389,14 @@ def test_smell_advantage__sorted_by_impact_magnitude() -> None: mi_normalized=0.6, smell_penalty=0.2, adjusted_quality=0.5, - smell_counts={"swallowed_exception": 10, "orphan_comment": 5, "hasattr_getattr": 3}, + smell_counts=SmellCounts.model_validate({"swallowed_exception": 10, "orphan_comment": 5, "hasattr_getattr": 3}), ) candidate = QPEScore( qpe=0.6, mi_normalized=0.7, smell_penalty=0.1, adjusted_quality=0.6, - smell_counts={"swallowed_exception": 2, "orphan_comment": 4, "hasattr_getattr": 3}, + smell_counts=SmellCounts.model_validate({"swallowed_exception": 2, "orphan_comment": 4, "hasattr_getattr": 3}), ) result = smell_advantage(baseline, candidate) @@ -412,14 +413,14 @@ def test_smell_advantage__uses_correct_weights_from_registry() -> None: mi_normalized=0.6, smell_penalty=0.2, adjusted_quality=0.5, - smell_counts={"swallowed_exception": 1}, + smell_counts=SmellCounts.model_validate({"swallowed_exception": 1}), ) candidate = QPEScore( qpe=0.6, mi_normalized=0.7, smell_penalty=0.1, adjusted_quality=0.6, - smell_counts={"swallowed_exception": 2}, + smell_counts=SmellCounts.model_validate({"swallowed_exception": 2}), ) result = smell_advantage(baseline, candidate) @@ -601,7 +602,7 @@ def test_qpe_score_model__serializes_to_json(self) -> None: mi_normalized=0.7, smell_penalty=0.1, adjusted_quality=0.63, - smell_counts={"hasattr_getattr": 5, "type_ignore": 3}, + smell_counts=SmellCounts.model_validate({"hasattr_getattr": 5, "type_ignore": 3}), ) json_output = qpe_score.model_dump_json() diff --git a/tests/test_sessions_performance.py b/tests/test_sessions_performance.py index 86d9aee..cdd3d59 100644 --- a/tests/test_sessions_performance.py +++ b/tests/test_sessions_performance.py @@ -5,7 +5,14 @@ from tempfile import TemporaryDirectory from slopometry.core.database import EventDatabase -from slopometry.core.models.hook import HookEvent, HookEventType, Project, ProjectSource, ToolType +from slopometry.core.models.hook import Project, ProjectSource +from slopometry.core.models.protocol.events import ( + AbstractEventSource, + AbstractEventType, + AbstractHookEvent, + ToolCallPayload, +) +from slopometry.core.protocol.adapters.claude_code import ToolType from slopometry.solo.services.session_service import SessionService @@ -21,28 +28,40 @@ def test_get_sessions_for_display__uses_single_query(self): base_time = datetime.now() for i in range(5): - event = HookEvent( + tool_name = "bash" if i < 3 else ("read" if i == 3 else "write") + tool_type = ToolType.BASH if i < 3 else (ToolType.READ if i == 3 else ToolType.WRITE) + event = AbstractHookEvent( session_id="session-001", - event_type=HookEventType.PRE_TOOL_USE, + event_type=AbstractEventType.TOOL_CALL_STARTED, + source=AbstractEventSource.CLAUDE_CODE, timestamp=base_time + timedelta(minutes=i), sequence_number=i + 1, working_directory="/test", project=Project(name="project-a", source=ProjectSource.GIT), - tool_name="bash" if i < 3 else ("read" if i == 3 else "write"), - tool_type=ToolType.BASH if i < 3 else (ToolType.READ if i == 3 else ToolType.WRITE), + tool_call=ToolCallPayload( + tool_name=tool_name, + tool_type=tool_type.value, + input={}, + ), ) db.save_event(event) for i in range(3): - event = HookEvent( + tool_name = "grep" if i < 2 else "ls" + tool_type = ToolType.GREP if i < 2 else ToolType.LS + event = AbstractHookEvent( session_id="session-002", - event_type=HookEventType.PRE_TOOL_USE, + event_type=AbstractEventType.TOOL_CALL_STARTED, + source=AbstractEventSource.CLAUDE_CODE, timestamp=base_time + timedelta(hours=1, minutes=i), sequence_number=i + 1, working_directory="/test2", project=Project(name="project-b", source=ProjectSource.PYPROJECT), - tool_name="grep" if i < 2 else "ls", - tool_type=ToolType.GREP if i < 2 else ToolType.LS, + tool_call=ToolCallPayload( + tool_name=tool_name, + tool_type=tool_type.value, + input={}, + ), ) db.save_event(event) @@ -77,14 +96,18 @@ def test_get_sessions_for_display__respects_limit(self): base_time = datetime.now() for session_num in range(5): - event = HookEvent( + event = AbstractHookEvent( session_id=f"session-{session_num:03d}", - event_type=HookEventType.PRE_TOOL_USE, + event_type=AbstractEventType.TOOL_CALL_STARTED, + source=AbstractEventSource.CLAUDE_CODE, timestamp=base_time + timedelta(minutes=session_num), sequence_number=1, working_directory="/test", - tool_name="bash", - tool_type=ToolType.BASH, + tool_call=ToolCallPayload( + tool_name="bash", + tool_type=ToolType.BASH.value, + input={}, + ), ) db.save_event(event) @@ -117,14 +140,13 @@ def test_get_sessions_for_display__handles_null_tool_types(self): db_path = Path(temp_dir) / "test.db" db = EventDatabase(db_path) - event = HookEvent( + event = AbstractHookEvent( session_id="session-001", - event_type=HookEventType.NOTIFICATION, + event_type=AbstractEventType.NOTIFICATION, + source=AbstractEventSource.CLAUDE_CODE, timestamp=datetime.now(), sequence_number=1, working_directory="/test", - tool_name=None, - tool_type=None, ) db.save_event(event) diff --git a/tests/test_settings.py b/tests/test_settings.py index e999c19..5ba1a21 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -39,11 +39,11 @@ class TestSettings(BaseSettings): enable_complexity_feedback: bool = False llm_proxy_url: str = "" llm_proxy_api_key: str = "" + llm_model_name: str = "olka-fi/MiniMax-M3-MXFP4" interactive_rating_enabled: bool = False hf_token: str = "" hf_default_repo: str = "" offline_mode: bool = True - user_story_agent: str = "gemini" return TestSettings diff --git a/tests/test_transcript_finder.py b/tests/test_transcript_finder.py new file mode 100644 index 0000000..5154a5b --- /dev/null +++ b/tests/test_transcript_finder.py @@ -0,0 +1,298 @@ +"""Tests for TranscriptFinder (Claude Code + OpenCode discovery).""" + +import json +from pathlib import Path + +import pytest + +from slopometry.core.models.protocol.events import AbstractEventSource +from slopometry.solo.services.transcript_finder import ( + DiscoveredTranscript, + TranscriptFinder, +) + + +def _write_json(path: Path, payload: dict) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(payload), encoding="utf-8") + + +def _make_opencode_storage( + root: Path, + project_worktree: Path, + *, + session_ids: list[str], + message_layout: dict[str, list[dict]], + project_id: str | None = None, +) -> tuple[str, list[str]]: + """Build a minimal OpenCode storage tree under ``root``. + + Returns (project_id, session_ids). + """ + project_id = project_id or f"proj_{abs(hash(str(project_worktree)))}" + _write_json( + root / "project" / f"{project_id}.json", + {"id": project_id, "worktree": str(project_worktree), "vcs": "git", "time": {"created": 0}}, + ) + session_dir = root / "session" / project_id + session_dir.mkdir(parents=True, exist_ok=True) + for sid in session_ids: + _write_json(session_dir / f"{sid}.json", {"id": sid, "projectID": project_id, "directory": str(project_worktree)}) + msg_dir = root / "message" / sid + msg_dir.mkdir(parents=True, exist_ok=True) + for i, msg in enumerate(message_layout.get(sid, [])): + mid = msg["id"] + _write_json(msg_dir / f"{mid}.json", {**msg, "sessionID": sid}) + part_dir = root / "part" / mid + part_dir.mkdir(parents=True, exist_ok=True) + for j, part in enumerate(msg["parts"]): + _write_json(part_dir / f"part_{i}_{j}.json", {**part, "messageID": mid, "sessionID": sid}) + return project_id, session_ids + + +@pytest.fixture +def storage_finder(monkeypatch: pytest.MonkeyPatch): + """TranscriptFinder whose find_opencode_storage_root is monkey-patchable per test.""" + return TranscriptFinder() + + +def test_opencode_session_emits_opencode_source( + storage_finder: TranscriptFinder, + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +): + project_dir = tmp_path / "myproject" + project_dir.mkdir() + storage = tmp_path / "opencode_storage" + _make_opencode_storage( + storage, + project_dir, + session_ids=["ses_a", "ses_b"], + message_layout={ + "ses_a": [ + { + "id": "msg_a1", + "role": "user", + "time": {"created": 1000}, + "parts": [{"type": "text", "text": "hi"}], + } + ], + "ses_b": [ + { + "id": "msg_b1", + "role": "assistant", + "time": {"created": 2000}, + "parts": [{"type": "text", "text": "hello"}], + } + ], + }, + ) + monkeypatch.setattr(storage_finder, "find_opencode_storage_root", lambda: storage) + + results = storage_finder.discover_transcripts(project_dir) + + opencode_results = [r for r in results if r.source == AbstractEventSource.OPENCODE] + assert len(opencode_results) == 2 + assert {r.session_id for r in opencode_results} == {"ses_a", "ses_b"} + assert all(isinstance(r, DiscoveredTranscript) for r in opencode_results) + + +def test_non_matching_worktree_excluded( + storage_finder: TranscriptFinder, + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +): + project_dir = tmp_path / "myproject" + project_dir.mkdir() + other_project = tmp_path / "other" + other_project.mkdir() + storage = tmp_path / "opencode_storage" + _make_opencode_storage( + storage, + other_project, + session_ids=["ses_x"], + message_layout={ + "ses_x": [ + { + "id": "msg_x1", + "role": "user", + "time": {"created": 0}, + "parts": [{"type": "text", "text": "x"}], + } + ] + }, + ) + monkeypatch.setattr(storage_finder, "find_opencode_storage_root", lambda: storage) + + results = storage_finder.discover_transcripts(project_dir) + assert results == [] + + +def test_missing_opencode_storage_root_returns_only_claude( + storage_finder: TranscriptFinder, + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +): + project_dir = tmp_path / "myproject" + project_dir.mkdir() + monkeypatch.setattr(storage_finder, "find_opencode_storage_root", lambda: None) + results = storage_finder.discover_transcripts(project_dir) + assert all(r.source == AbstractEventSource.CLAUDE_CODE for r in results) + + +def test_slopometry_transcript_marked_as_claude_code(tmp_path: Path): + project_dir = tmp_path / "myproject" + slop_dir = project_dir / ".slopometry" / "ses_abc" + slop_dir.mkdir(parents=True) + (slop_dir / "transcript.jsonl").write_text('{"type":"user"}\n', encoding="utf-8") + results = TranscriptFinder().discover_transcripts(project_dir) + assert any( + r.session_id == "ses_abc" and r.source == AbstractEventSource.CLAUDE_CODE for r in results + ) + + +def test_only_matching_worktree_included( + storage_finder: TranscriptFinder, + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +): + project_dir = tmp_path / "match" + project_dir.mkdir() + storage = tmp_path / "opencode_storage" + _make_opencode_storage( + storage, + project_dir, + session_ids=["ses_keep"], + message_layout={ + "ses_keep": [ + { + "id": "msg_keep", + "role": "user", + "time": {"created": 1}, + "parts": [{"type": "text", "text": "keep"}], + } + ] + }, + ) + _make_opencode_storage( + storage, + tmp_path / "nomatch", + session_ids=["ses_drop"], + message_layout={ + "ses_drop": [ + { + "id": "msg_drop", + "role": "user", + "time": {"created": 1}, + "parts": [{"type": "text", "text": "drop"}], + } + ] + }, + ) + monkeypatch.setattr(storage_finder, "find_opencode_storage_root", lambda: storage) + results = storage_finder.find_opencode_sessions(project_dir) + assert {r.session_id for r in results} == {"ses_keep"} + + +def test_find_opencode_storage_root_returns_none_when_no_xdg( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +): + monkeypatch.delenv("XDG_DATA_HOME", raising=False) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + finder = TranscriptFinder() + assert finder.find_opencode_storage_root() == tmp_path / ".local" / "share" / "opencode" / "storage" + + +def test_find_opencode_storage_root_uses_xdg_when_set(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("XDG_DATA_HOME", "/custom/xdg") + finder = TranscriptFinder() + assert finder.find_opencode_storage_root() == Path("/custom/xdg/opencode/storage") + + +def test_opencode_session_in_subdirectory_of_worktree_included( + storage_finder: TranscriptFinder, + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +): + project_root = tmp_path / "root" + subdir = project_root / "packages" / "core" + subdir.mkdir(parents=True) + storage = tmp_path / "opencode_storage" + _make_opencode_storage( + storage, + project_root, + session_ids=["ses_subdir"], + message_layout={ + "ses_subdir": [ + { + "id": "msg_subdir", + "role": "user", + "time": {"created": 1}, + "parts": [{"type": "text", "text": "from subdir"}], + } + ] + }, + project_id="proj_subdir", + ) + session_meta_path = storage / "session" / "proj_subdir" / "ses_subdir.json" + session_meta_path.write_text( + json.dumps( + { + "id": "ses_subdir", + "projectID": "proj_subdir", + "directory": str(subdir), + } + ), + encoding="utf-8", + ) + + monkeypatch.setattr(storage_finder, "find_opencode_storage_root", lambda: storage) + results_subdir = storage_finder.find_opencode_sessions(subdir) + assert {r.session_id for r in results_subdir} == {"ses_subdir"} + + results_root = storage_finder.find_opencode_sessions(project_root) + assert results_root == [] + + +def test_opencode_session_outside_worktree_excluded( + storage_finder: TranscriptFinder, + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +): + worktree = tmp_path / "worktree" + other = tmp_path / "unrelated" + worktree.mkdir() + other.mkdir() + storage = tmp_path / "opencode_storage" + _make_opencode_storage( + storage, + worktree, + session_ids=["ses_unrelated"], + message_layout={ + "ses_unrelated": [ + { + "id": "msg_unrelated", + "role": "user", + "time": {"created": 1}, + "parts": [{"type": "text", "text": "x"}], + } + ] + }, + project_id="proj_unrelated", + ) + session_meta_path = storage / "session" / "proj_unrelated" / "ses_unrelated.json" + session_meta_path.write_text( + json.dumps( + { + "id": "ses_unrelated", + "projectID": "proj_unrelated", + "directory": str(other), + } + ), + encoding="utf-8", + ) + + monkeypatch.setattr(storage_finder, "find_opencode_storage_root", lambda: storage) + results = storage_finder.find_opencode_sessions(worktree) + assert results == [] diff --git a/tests/test_transcript_token_analyzer.py b/tests/test_transcript_token_analyzer.py index 438e9d6..f8d0ea8 100644 --- a/tests/test_transcript_token_analyzer.py +++ b/tests/test_transcript_token_analyzer.py @@ -514,7 +514,7 @@ def test_agent_explore__ignores_tool_names(self): assert usage.implementation_input_tokens == 0 def test_lowercase_tools__classified_correctly_without_agent(self): - """Lowercase tool names (OpenCode format) should match ToolType enum case-insensitively.""" + """Lowercase tool names (OpenCode format) should match ToolType enum case-insensitively (slopometry.core.protocol.adapters.claude_code).""" transcript = [ self._make_oc_msg("assistant", 50_000, 5_000, tools=["grep", "read", "glob"]), ] diff --git a/uv.lock b/uv.lock index e66c5ed..4767139 100644 --- a/uv.lock +++ b/uv.lock @@ -2749,7 +2749,7 @@ wheels = [ [[package]] name = "slopometry" -version = "2026.4.15" +version = "2026.6.23" source = { editable = "." } dependencies = [ { name = "click" }, From 2953302dee8d001bb8d171f4f8c2e57689b8b9f4 Mon Sep 17 00:00:00 2001 From: TensorTemplar Date: Wed, 24 Jun 2026 11:12:44 +0300 Subject: [PATCH 2/8] README: clarify justification for closed models support drop --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index d7b6f01..ec010c9 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ A tool that lurks in the shadows, tracks and analyzes Claude Code sessions provi **NEWS:** +* **Jun 2026: Dropping support for closed-source models for all Summoner features*: Since there is now a precendent for silent sabotage by providers, based on flavor of the week media posture - we can no longer rely on closed systems for features that require meta-reasoning or need to run reliably. We appreciate Anthropic being up-front about this in the model card though! + * **April 2026: Behavioral pattern detection.** Sessions are now scanned for ownership dodging ("pre-existing", "not introduced by") and simple workaround ("simplest", "for now", "quick fix") phrases in assistant output, reported as per-minute rates. Rates are persisted per-repo and `current-impact` shows rolling average trends. Display reordered: plans, token impact, and behavioral patterns now appear first. Also: newly written files no longer incorrectly flagged as blind spots, and single-method class detection skips data classes with only `@property` methods. * **February 2026: OpenCode 1.2.10+ now supported for solo features, including stop hook feedback! See [plugin doc](plugins/opencode/README.md).** From a828b8f2bb579bc7e4602911f4fc0620cf80a715 Mon Sep 17 00:00:00 2001 From: TensorTemplar Date: Wed, 24 Jun 2026 11:24:55 +0300 Subject: [PATCH 3/8] README: separate user install from dev install, add # Development section User-facing # Installation no longer smuggles dev-only advice ('After making code changes, reinstall to update the global tool') that confused the flow for first-time installers. Promoted ### Development Installation (git clone + uv sync + uv run pytest) from a buried sub-section under # Configuration to its own top-level # Development section. The dev reinstall command now lives there next to the clone instructions. Also wrapped the shell-source lines in a bash fence (the snippet was rendering as raw markdown lines before the next heading). --- README.md | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index ec010c9..47d865a 100644 --- a/README.md +++ b/README.md @@ -144,11 +144,9 @@ uv tool update-shell ``` # Restart your terminal or run: +```bash source ~/.zshrc # for zsh # or: source ~/.bashrc # for bash - -# After making code changes, reinstall to update the global tool -uv tool install . --reinstall --find-links "https://github.com/Droidcraft/rust-code-analysis/releases/expanded_assets/python-2026.1.31" ``` ## Quick Start @@ -233,15 +231,6 @@ curl -o ~/.config/slopometry/.env https://raw.githubusercontent.com/TensorTempla ``` -### Development Installation - -```bash -git clone https://github.com/TensorTemplar/slopometry -cd slopometry -uv sync --extra dev -uv run pytest -``` - Customize via `.env` file or environment variables: - `SLOPOMETRY_DATABASE_PATH`: Custom database location (optional) @@ -252,6 +241,23 @@ Customize via `.env` file or environment variables: - `SLOPOMETRY_ENABLE_COMPLEXITY_ANALYSIS`: Collect complexity metrics (default: `true`) - `SLOPOMETRY_ENABLE_COMPLEXITY_FEEDBACK`: Provide feedback to Claude (default: `false`) +# Development + +For working on slopometry itself (not just installing it): + +```bash +git clone https://github.com/TensorTemplar/slopometry +cd slopometry +uv sync --extra dev +uv run pytest +``` + +After making code changes, reinstall to update the global tool: + +```bash +uv tool install . --reinstall --find-links "https://github.com/Droidcraft/rust-code-analysis/releases/expanded_assets/python-2026.1.31" +``` + # Cite ``` From 027c866ca297c565df45afc59e67798d2c812db1 Mon Sep 17 00:00:00 2001 From: TensorTemplar Date: Wed, 24 Jun 2026 15:50:24 +0300 Subject: [PATCH 4/8] stop hook: add concrete marker comment hint to swallowed-exception feedback After ACTION REQUIRED lists the swallowed_exception / acknowledged_silent_except files, append a concrete example showing the exact comment format ('\# slopometry: allow-silent - ') so a human reviewer can copy-paste it to acknowledge the handler after review and stop it from blocking next time. The hint appears only when a swallow-related smell is blocking (so it doesn't clutter feedback for unrelated smells), and is symmetric: both the swallowed (initial review) and acknowledged (mass-suppression review) branches point at the same comment format and the inverse operation (remove the marker to revert). Also expanded the SmellDefinition.guidance text and the SmellCounts Field descriptions for swallowed_exception, acknowledged_silent_except, and test_skip to document the marker comment format + how to revert, so the schema/docs stay in sync with the runtime hint. 3 new tests (swallow_hint_shown_when_swallowed_blocking, swallow_hint_shown_when_acknowledged_increased, swallow_hint_absent_when_no_swallow_smell) verify the hint is emitted exactly when expected. --- src/slopometry/core/hook_handler.py | 31 ++++++++++++ src/slopometry/core/models/core.py | 18 ++++++- src/slopometry/core/models/smell.py | 30 ++++++++++-- tests/test_hook_handler.py | 75 +++++++++++++++++++++++++++++ 4 files changed, 149 insertions(+), 5 deletions(-) diff --git a/src/slopometry/core/hook_handler.py b/src/slopometry/core/hook_handler.py index de5002e..d270028 100644 --- a/src/slopometry/core/hook_handler.py +++ b/src/slopometry/core/hook_handler.py @@ -455,6 +455,35 @@ def scope_smells_for_session( return result +def _any_swallow_smell_in_blocking(blocking_smells: list[ScopedSmell]) -> bool: + """True if any blocking smell is swallowed_exception or acknowledged_silent_except.""" + return any(s.name in ("swallowed_exception", "acknowledged_silent_except") for s in blocking_smells) + + +def _swallow_marker_hint_lines() -> list[str]: + """Concrete example of how to mark a silent handler as acknowledged. + + Shown after the ACTION REQUIRED block when a swallow-related smell is + blocking, so a human reviewer can copy the exact comment format to + exclude the detection on the next pass. + """ + return [ + "**To acknowledge after review** (so the handler stops blocking next time):", + "", + "```python", + "try:", + " acquire_lock()", + "except Exception:", + " pass # slopometry: allow-silent - lock already released on context exit", + "```", + "", + "Place `# slopometry: allow-silent - ` on the same line as the", + "suppressing statement (`pass`/`continue`). To revert back to blocking review,", + "remove the marker comment.", + "", + ] + + def format_code_smell_feedback( scoped_smells: list[ScopedSmell], session_id: str | None = None, @@ -499,6 +528,8 @@ def format_code_smell_feedback( if smell.guidance: lines.append(f" → {smell.guidance}") lines.append("") + if _any_swallow_smell_in_blocking(blocking_requiring_action): + lines.extend(_swallow_marker_hint_lines()) smells_increased = [s for s in other_smells if s.change > 0] smells_decreased = [s for s in other_smells if s.change < 0] diff --git a/src/slopometry/core/models/core.py b/src/slopometry/core/models/core.py index 2dc0abc..9b70b6d 100644 --- a/src/slopometry/core/models/core.py +++ b/src/slopometry/core/models/core.py @@ -211,11 +211,25 @@ class ExtendedComplexityMetrics(ComplexityMetrics): ) swallowed_exception_count: int = Field( default=0, - description="BLOCKING: You MUST present a table with columns [Location | Purpose | Justification ] for each and ask user to confirm silent failure is acceptable", + description=( + "BLOCKING: You MUST present a table with columns " + "[Location | Purpose | Justification] for each and ask user to confirm " + "silent failure is acceptable. To acknowledge a handler as intentional " + "after user review (so it stops blocking next time), add " + "`# slopometry: allow-silent - ` on the same line as the " + "suppressing statement. This moves the handler out of " + "`swallowed_exception` into `acknowledged_silent_except`." + ), ) acknowledged_silent_except_count: int = Field( default=0, - description="Silent except handlers explicitly marked `# slopometry: allow-silent`. An increase is blocking — confirm newly-suppressed handlers are justified.", + description=( + "Silent except handlers explicitly marked `# slopometry: allow-silent`. " + "An increase is blocking — confirm newly-suppressed handlers are " + "justified. To revert a handler back to `swallowed_exception` (force " + "review again next time), remove the `# slopometry: allow-silent` " + "comment from the same line." + ), ) type_ignore_count: int = Field( default=0, diff --git a/src/slopometry/core/models/smell.py b/src/slopometry/core/models/smell.py index 809bb23..e3d433f 100644 --- a/src/slopometry/core/models/smell.py +++ b/src/slopometry/core/models/smell.py @@ -52,7 +52,16 @@ class SmellDefinition(BaseModel): label="Swallowed Exceptions", category=SmellCategory.GENERAL, weight=0.15, - guidance="BLOCKING: You MUST present a table with columns [Location | Purpose | Justification ] for each and ask user to confirm silent failure is acceptable", + guidance=( + "BLOCKING: You MUST present a table with columns " + "[Location | Purpose | Justification] for each and ask user to confirm " + "silent failure is acceptable. To acknowledge a handler as intentional " + "after user review (so it stops blocking next time), add " + "`# slopometry: allow-silent - ` on the same line as the " + "suppressing statement (e.g., `# slopometry: allow-silent - " + "lock already released on context exit`). This moves the handler out of " + "`swallowed_exception` into `acknowledged_silent_except`." + ), count_field="swallowed_exception_count", files_field="swallowed_exception_files", ), @@ -61,7 +70,16 @@ class SmellDefinition(BaseModel): label="Acknowledged Silent Excepts", category=SmellCategory.GENERAL, weight=0.05, - guidance="BLOCKING (on increase): These silent except handlers are marked `# slopometry: allow-silent`. An individual marker is fine, but a rise means new silent handlers were suppressed this session. Present a table [Location | Purpose | Justification] for the NEW ones and confirm each genuinely needs no logging/handling — this prevents mass-suppression of real swallowed exceptions", + guidance=( + "BLOCKING (on increase): These silent except handlers are marked " + "`# slopometry: allow-silent`. An individual marker is fine, but a rise " + "means new silent handlers were suppressed this session. Present a table " + "[Location | Purpose | Justification] for the NEW ones and confirm each " + "genuinely needs no logging/handling — this prevents mass-suppression of " + "real swallowed exceptions. To revert a handler back to " + "`swallowed_exception` (force review again next time), remove the " + "`# slopometry: allow-silent` comment from the same line." + ), count_field="acknowledged_silent_except_count", files_field="acknowledged_silent_except_files", ), @@ -70,7 +88,13 @@ class SmellDefinition(BaseModel): label="Test Skips", category=SmellCategory.GENERAL, weight=0.10, - guidance="BLOCKING: You MUST present a table with columns [Test Name | Intent] for each skip and ask user to confirm skipping is acceptable", + guidance=( + "BLOCKING: You MUST present a table with columns [Test Name | Intent] " + "for each skip and ask user to confirm skipping is acceptable. If the " + "skip is conditional on missing external data (no API key, missing host " + "session, etc.), document the precondition in the skip message so the " + "intent is obvious." + ), count_field="test_skip_count", files_field="test_skip_files", ), diff --git a/tests/test_hook_handler.py b/tests/test_hook_handler.py index e8aa9ce..4bb4d86 100644 --- a/tests/test_hook_handler.py +++ b/tests/test_hook_handler.py @@ -188,6 +188,81 @@ def test_format_code_smell_feedback__includes_actionable_guidance(self): assert "BLOCKING" in feedback assert "table" in feedback + def test_format_code_smell_feedback__swallow_hint_shown_when_swallowed_blocking(self): + """Concrete marker comment format appears after ACTION REQUIRED when swallow-related smell blocks.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + subprocess.run(["git", "init"], cwd=tmppath, capture_output=True) + subprocess.run(["git", "config", "user.email", "test@test.com"], cwd=tmppath, capture_output=True) + subprocess.run(["git", "config", "user.name", "Test"], cwd=tmppath, capture_output=True) + + src_dir = tmppath / "src" + src_dir.mkdir() + (src_dir / "bar.py").write_text("def bar(): pass") + subprocess.run(["git", "add", "."], cwd=tmppath, capture_output=True) + subprocess.run(["git", "commit", "-m", "init"], cwd=tmppath, capture_output=True) + + metrics = self._make_metrics( + swallowed_exception_count=1, + swallowed_exception_files=["src/bar.py"], + ) + scoped = scope_smells_for_session(metrics, None, {"src/bar.py"}, str(tmppath)) + feedback, _, _ = format_code_smell_feedback(scoped) + + assert "# slopometry: allow-silent" in feedback + assert "lock already released on context exit" in feedback + assert "**To acknowledge after review**" in feedback + assert "Place `# slopometry: allow-silent - `" in feedback + + def test_format_code_smell_feedback__swallow_hint_shown_when_acknowledged_increased(self): + """Hint also appears when acknowledged_silent_except increases (potential mass-suppression).""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + subprocess.run(["git", "init"], cwd=tmppath, capture_output=True) + subprocess.run(["git", "config", "user.email", "test@test.com"], cwd=tmppath, capture_output=True) + subprocess.run(["git", "config", "user.name", "Test"], cwd=tmppath, capture_output=True) + + src_dir = tmppath / "src" + src_dir.mkdir() + (src_dir / "bar.py").write_text("def bar(): pass") + subprocess.run(["git", "add", "."], cwd=tmppath, capture_output=True) + subprocess.run(["git", "commit", "-m", "init"], cwd=tmppath, capture_output=True) + + metrics = self._make_metrics( + acknowledged_silent_except_count=3, + acknowledged_silent_except_files=["src/bar.py"], + ) + delta = ComplexityDelta(acknowledged_silent_except_change=2) + scoped = scope_smells_for_session(metrics, delta, {"src/bar.py"}, str(tmppath)) + feedback, _, _ = format_code_smell_feedback(scoped) + + assert "# slopometry: allow-silent" in feedback + assert "lock already released on context exit" in feedback + + def test_format_code_smell_feedback__swallow_hint_absent_when_no_swallow_smell(self): + """Hint does NOT appear when a non-swallow smell is blocking.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + subprocess.run(["git", "init"], cwd=tmppath, capture_output=True) + subprocess.run(["git", "config", "user.email", "test@test.com"], cwd=tmppath, capture_output=True) + subprocess.run(["git", "config", "user.name", "Test"], cwd=tmppath, capture_output=True) + + src_dir = tmppath / "src" + src_dir.mkdir() + (src_dir / "bar.py").write_text("def bar(): pass") + subprocess.run(["git", "add", "."], cwd=tmppath, capture_output=True) + subprocess.run(["git", "commit", "-m", "init"], cwd=tmppath, capture_output=True) + + metrics = self._make_metrics( + test_skip_count=1, + test_skip_files=["src/bar.py"], + ) + scoped = scope_smells_for_session(metrics, None, {"src/bar.py"}, str(tmppath)) + feedback, _, _ = format_code_smell_feedback(scoped) + + assert "**To acknowledge after review**" not in feedback + assert "lock already released on context exit" not in feedback + def test_format_code_smell_feedback__test_skips_are_blocking(self): """Test that test skips are marked as blocking when related file edited.""" with tempfile.TemporaryDirectory() as tmpdir: From aba5b665d09403e0ec7c5565666e72ba77ae52c3 Mon Sep 17 00:00:00 2001 From: TensorTemplar Date: Sun, 28 Jun 2026 15:07:55 +0300 Subject: [PATCH 5/8] review fixes: explicit config, test naming, missing tests, dead code cleanup - Add stdin_timeout_seconds to Settings; remove 5.0s magic number from dispatch.py - Remove migrations 016-018: tables are new in this branch, _create_tables has final schema - Add include_superseded param to get_memories (filters superseded_by IS NULL by default) - Remove redundant mark_session_processed from save_memories (caller owns that) - Make source param required on mark_session_processed/is_session_processed - Add FreshnessAction StrEnum with .color property - Add FreshnessVerdict + LLMMemoryCandidate pydantic models for structured LLM JSON parsing - Create llm_text.py with strip_llm_wrappers() + parse_llm_json() shared helpers - Convert MemoryFreshnessValidator class to validate_freshness() function - Make freshness thresholds configurable via Settings (floor/ceiling) - Remove api_key='dummy' defaults from MemoryExtractor/MemoryFreshnessValidator - Extract platform path helpers to settings.py (get_claude_projects_dirs, get_opencode_storage_root) - Simplify EmbeddingService: move openai import inside try block - Use FreshnessAction enum comparisons instead of string literals in commands.py and tests - Move inline imports to module top-level (Counter/defaultdict, parse_llm_json, settings) - Add return type annotations to _capture_git_state/_capture_project in dispatch.py - Rename all tests to test___ convention - Add tests: superseded filtering, save_memories not marking session, preflight unreachable endpoint - ruff: 0 errors; pyright: 0 errors; pytest: 979 passed, 5 skipped, 3 pre-existing failures --- src/slopometry/core/database.py | 45 ++++-- src/slopometry/core/hook_handler.py | 120 +++++++------- src/slopometry/core/migrations.py | 60 ------- src/slopometry/core/models/memory.py | 51 +++++- .../core/protocol/adapters/claude_code.py | 4 + .../core/protocol/adapters/opencode.py | 8 +- src/slopometry/core/protocol/dispatch.py | 47 ++++-- src/slopometry/core/protocol/session.py | 4 +- src/slopometry/core/settings.py | 57 ++++++- src/slopometry/solo/cli/commands.py | 64 ++++---- .../solo/services/embedding_service.py | 5 - src/slopometry/solo/services/llm_text.py | 47 ++++++ .../solo/services/memory_extractor.py | 45 ++---- .../solo/services/memory_freshness.py | 152 ++++++++---------- .../solo/services/memory_service.py | 14 +- .../solo/services/transcript_finder.py | 80 +++++---- tests/test_embedding_service.py | 27 ++-- tests/test_memory_extractor.py | 36 ++--- tests/test_memory_freshness.py | 148 +++++++++-------- tests/test_memory_service.py | 142 +++++++++++----- tests/test_opencode_memory_integration.py | 61 ++++--- tests/test_preflight.py | 31 +++- tests/test_transcript_finder.py | 23 +-- 23 files changed, 731 insertions(+), 540 deletions(-) create mode 100644 src/slopometry/solo/services/llm_text.py diff --git a/src/slopometry/core/database.py b/src/slopometry/core/database.py index 3997c4f..138051d 100644 --- a/src/slopometry/core/database.py +++ b/src/slopometry/core/database.py @@ -391,7 +391,7 @@ def _create_tables(self) -> None: source TEXT NOT NULL DEFAULT 'claude_code', processed_at TEXT NOT NULL, memory_count INTEGER NOT NULL DEFAULT 0, - PRIMARY KEY (session_id, project_dir) + PRIMARY KEY (session_id, project_dir, source) ) """) @@ -1193,13 +1193,15 @@ def cleanup_all_sessions(self) -> tuple[int, int, int]: def save_experiment_run(self, experiment: ExperimentRun) -> None: """Save an experiment run to the database.""" + nfp_id = experiment.nfp_objective.id if experiment.nfp_objective else None with self._get_db_connection() as conn: conn.execute( """ INSERT OR REPLACE INTO experiment_runs ( id, repository_path, start_commit, target_commit, - process_id, worktree_path, start_time, end_time, status - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + process_id, worktree_path, start_time, end_time, status, + nfp_objective_id + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( experiment.id, @@ -1211,23 +1213,26 @@ def save_experiment_run(self, experiment: ExperimentRun) -> None: experiment.start_time.isoformat(), experiment.end_time.isoformat() if experiment.end_time else None, experiment.status.value, + nfp_id, ), ) conn.commit() def update_experiment_run(self, experiment: ExperimentRun) -> None: """Update an existing experiment run.""" + nfp_id = experiment.nfp_objective.id if experiment.nfp_objective else None with self._get_db_connection() as conn: conn.execute( """ UPDATE experiment_runs - SET status = ?, end_time = ?, worktree_path = ? + SET status = ?, end_time = ?, worktree_path = ?, nfp_objective_id = ? WHERE id = ? """, ( experiment.status.value, experiment.end_time.isoformat() if experiment.end_time else None, str(experiment.worktree_path) if experiment.worktree_path else None, + nfp_id, experiment.id, ), ) @@ -2178,7 +2183,7 @@ def save_memory(self, memory: MemoryEntry) -> None: with self._get_db_connection() as conn: conn.execute( """ - INSERT INTO memories ( + INSERT OR REPLACE INTO memories ( id, session_id, project_dir, memory_type, content, source_context, created_at, updated_at, retained, superseded_by, embedding, metadata @@ -2238,12 +2243,24 @@ def get_memories( project_dir: str | None = None, memory_type: str | None = None, limit: int = 50, + include_superseded: bool = False, ) -> list[MemoryEntry]: - """Get memories with optional filters.""" + """Get memories with optional filters. + + Args: + include_superseded: When False (default), exclude memories that + have been linked to a newer replacement via ``superseded_by``. + The freshness validator must pass False so it compares new + candidates only against the current truth, not stale chain + predecessors. + """ with self._get_db_connection() as conn: query = "SELECT * FROM memories WHERE 1=1" params: list = [] + if not include_superseded: + query += " AND superseded_by IS NULL" + if project_dir: query += " AND project_dir = ?" params.append(project_dir) @@ -2271,7 +2288,7 @@ def get_memories( created_at=datetime.fromisoformat(row["created_at"]), updated_at=datetime.fromisoformat(row["updated_at"]) if row["updated_at"] else None, retained=bool(row["retained"]), - superseded_by=row["superseded_by"] if "superseded_by" in row.keys() else None, + superseded_by=row["superseded_by"], embedding=json.loads(row["embedding"]) if row["embedding"] else None, metadata=json.loads(row["metadata"]) if row["metadata"] else None, ) @@ -2355,7 +2372,7 @@ def update_memory( return bool(cursor.rowcount and cursor.rowcount > 0) def mark_session_processed( - self, session_id: str, project_dir: str, memory_count: int, source: str = "claude_code" + self, session_id: str, project_dir: str, memory_count: int, source: str ) -> None: """Mark a session as processed for memory extraction.""" with self._get_db_connection() as conn: @@ -2368,7 +2385,7 @@ def mark_session_processed( (session_id, project_dir, source, datetime.now().isoformat(), memory_count), ) - def is_session_processed(self, session_id: str, project_dir: str, source: str = "claude_code") -> bool: + def is_session_processed(self, session_id: str, project_dir: str, source: str) -> bool: """Check if a session has already been processed for memories.""" with self._get_db_connection() as conn: cursor = conn.execute( @@ -2378,13 +2395,13 @@ def is_session_processed(self, session_id: str, project_dir: str, source: str = return cursor.fetchone() is not None def get_memory_stats(self, project_dir: str | None = None) -> dict: - """Get statistics about stored memories.""" + """Get statistics about stored memories (excludes superseded).""" with self._get_db_connection() as conn: - base_query = "SELECT memory_type, COUNT(*) as count FROM memories" + base_query = "SELECT memory_type, COUNT(*) as count FROM memories WHERE superseded_by IS NULL" params: list = [] if project_dir: - base_query += " WHERE project_dir = ?" + base_query += " AND project_dir = ?" params.append(project_dir) base_query += " GROUP BY memory_type" @@ -2392,9 +2409,9 @@ def get_memory_stats(self, project_dir: str | None = None) -> dict: rows = conn.execute(base_query, params).fetchall() type_distribution = {row[0]: row[1] for row in rows} - total_query = "SELECT COUNT(*) FROM memories" + total_query = "SELECT COUNT(*) FROM memories WHERE superseded_by IS NULL" if project_dir: - total_query += " WHERE project_dir = ?" + total_query += " AND project_dir = ?" total = conn.execute(total_query, params).fetchone()[0] or 0 return { diff --git a/src/slopometry/core/hook_handler.py b/src/slopometry/core/hook_handler.py index d270028..2b64895 100644 --- a/src/slopometry/core/hook_handler.py +++ b/src/slopometry/core/hook_handler.py @@ -19,6 +19,7 @@ from slopometry.core.database import EventDatabase from slopometry.core.git_tracker import GitTracker +from slopometry.core.lock import SlopometryLock from slopometry.core.models.complexity import ComplexityDelta, ExtendedComplexityMetrics from slopometry.core.models.hook import FeedbackCacheState from slopometry.core.models.protocol.events import AbstractEventSource, AbstractEventType @@ -211,72 +212,77 @@ def handle_stop_event(session_id: str, working_directory: str | None = None) -> if not working_directory: return 0 - cached_state = _load_feedback_cache(working_directory) - if cached_state is not None and cached_state.commit_sha is not None: - current_sha = _get_current_commit_sha(working_directory) - if current_sha == cached_state.commit_sha and not _has_source_changes(working_directory): + lock = SlopometryLock(project_dir=working_directory) + with lock.acquire() as acquired: + if not acquired: return 0 - if not _has_analyzable_source_files(working_directory): - return 0 + cached_state = _load_feedback_cache(working_directory) + if cached_state is not None and cached_state.commit_sha is not None: + current_sha = _get_current_commit_sha(working_directory) + if current_sha == cached_state.commit_sha and not _has_source_changes(working_directory): + return 0 - cache_key = _compute_working_tree_cache_key(working_directory) - if cached_state is not None and cached_state.last_key == cache_key: - return 0 + if not _has_analyzable_source_files(working_directory): + return 0 - db = EventDatabase() - stats = db.get_session_statistics(session_id) - if not stats: - return 0 + cache_key = _compute_working_tree_cache_key(working_directory) + if cached_state is not None and cached_state.last_key == cache_key: + return 0 - current_metrics = stats.complexity_metrics - delta = stats.complexity_delta + db = EventDatabase() + stats = db.get_session_statistics(session_id) + if not stats: + return 0 - wt_calculator = WorkingTreeStateCalculator(working_directory, languages=None) - current_file_hashes = wt_calculator.get_source_file_content_hashes() + current_metrics = stats.complexity_metrics + delta = stats.complexity_delta - if cached_state is not None: - edited_files = wt_calculator.get_files_changed_since(cached_state.file_hashes) - else: - edited_files = wt_calculator.get_modified_source_file_paths() + wt_calculator = WorkingTreeStateCalculator(working_directory, languages=None) + current_file_hashes = wt_calculator.get_source_file_content_hashes() - feedback_parts: list[str] = [] + if cached_state is not None: + edited_files = wt_calculator.get_files_changed_since(cached_state.file_hashes) + else: + edited_files = wt_calculator.get_modified_source_file_paths() - if current_metrics: - scoped_smells = scope_smells_for_session( - current_metrics, delta, edited_files, working_directory, stats.context_coverage - ) - code_smells = [s for s in scoped_smells if s.name != "unread_related_tests"] - context_smells = [s for s in scoped_smells if s.name == "unread_related_tests"] - code_feedback, has_code_smells, _ = format_code_smell_feedback(code_smells, session_id) - if has_code_smells: - feedback_parts.append(code_feedback) - context_smell_feedback, has_context_smells, _ = format_code_smell_feedback(context_smells, session_id) - if has_context_smells: - feedback_parts.append(context_smell_feedback) - - if settings.enable_complexity_feedback and stats.context_coverage and stats.context_coverage.has_gaps: - context_feedback = format_context_coverage_feedback(stats.context_coverage) - if context_feedback: - feedback_parts.append(context_feedback) - - if settings.feedback_dev_guidelines: - dev_guidelines = extract_dev_guidelines_from_claude_md(working_directory) - if dev_guidelines: - feedback_parts.append(f"\n**Project Development Guidelines:**\n{dev_guidelines}") - - current_commit_sha = _get_current_commit_sha(working_directory) - _save_feedback_cache(working_directory, cache_key, current_file_hashes, commit_sha=current_commit_sha) - - if feedback_parts: - feedback = "\n\n".join(feedback_parts) - feedback += ( - f"\n\n---\n**Session**: `{session_id}` | Details: `slopometry solo show {session_id} --smell-details`" - ) - hook_output = {"decision": "block", "reason": feedback} - print(json.dumps(hook_output)) - return 2 - return 0 + feedback_parts: list[str] = [] + + if current_metrics: + scoped_smells = scope_smells_for_session( + current_metrics, delta, edited_files, working_directory, stats.context_coverage + ) + code_smells = [s for s in scoped_smells if s.name != "unread_related_tests"] + context_smells = [s for s in scoped_smells if s.name == "unread_related_tests"] + code_feedback, has_code_smells, _ = format_code_smell_feedback(code_smells, session_id) + if has_code_smells: + feedback_parts.append(code_feedback) + context_smell_feedback, has_context_smells, _ = format_code_smell_feedback(context_smells, session_id) + if has_context_smells: + feedback_parts.append(context_smell_feedback) + + if settings.enable_complexity_feedback and stats.context_coverage and stats.context_coverage.has_gaps: + context_feedback = format_context_coverage_feedback(stats.context_coverage) + if context_feedback: + feedback_parts.append(context_feedback) + + if settings.feedback_dev_guidelines: + dev_guidelines = extract_dev_guidelines_from_claude_md(working_directory) + if dev_guidelines: + feedback_parts.append(f"\n**Project Development Guidelines:**\n{dev_guidelines}") + + current_commit_sha = _get_current_commit_sha(working_directory) + _save_feedback_cache(working_directory, cache_key, current_file_hashes, commit_sha=current_commit_sha) + + if feedback_parts: + feedback = "\n\n".join(feedback_parts) + feedback += ( + f"\n\n---\n**Session**: `{session_id}` | Details: `slopometry solo show {session_id} --smell-details`" + ) + hook_output = {"decision": "block", "reason": feedback} + print(json.dumps(hook_output)) + return 2 + return 0 def format_context_coverage_feedback(coverage: ContextCoverage) -> str: diff --git a/src/slopometry/core/migrations.py b/src/slopometry/core/migrations.py index 1856a51..917615b 100644 --- a/src/slopometry/core/migrations.py +++ b/src/slopometry/core/migrations.py @@ -547,64 +547,6 @@ def up(self, conn: sqlite3.Connection) -> None: ) -class Migration016AddMemorySupersededByColumn(Migration): - """Add superseded_by column to memories for lineage tracking. - - Distinct from `retained`, which is a user/system retention decision. - `superseded_by` records the id of the newer memory that an LLM judge - decided replaces this one. A separate cleanup pass can prune the chain. - """ - - @property - def version(self) -> str: - return "016" - - @property - def description(self) -> str: - return "Add superseded_by column to memories for LLM-judged lineage tracking" - - def up(self, conn: sqlite3.Connection) -> None: - cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='memories'") - if not cursor.fetchone(): - return - cursor = conn.execute("PRAGMA table_info(memories)") - if any(row[1] == "superseded_by" for row in cursor.fetchall()): - return - conn.execute("ALTER TABLE memories ADD COLUMN superseded_by TEXT") - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_memories_superseded_by ON memories(superseded_by)" - ) - - -class Migration017AddProcessedMemorySourceColumn(Migration): - """Add source column to processed_memory_sessions for harness routing. - - Pre-MultiHarness rows had no source column; their session IDs were bare - (no ``:`` prefix). New rows carry an explicit source - (``claude_code`` or ``opencode``) to avoid cross-harness collisions in the - ``(session_id, project_dir)`` primary key. - """ - - @property - def version(self) -> str: - return "017" - - @property - def description(self) -> str: - return "Add source column to processed_memory_sessions; default existing rows to claude_code" - - def up(self, conn: sqlite3.Connection) -> None: - cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='processed_memory_sessions'") - if not cursor.fetchone(): - return - cursor = conn.execute("PRAGMA table_info(processed_memory_sessions)") - if any(row[1] == "source" for row in cursor.fetchall()): - return - conn.execute( - "ALTER TABLE processed_memory_sessions ADD COLUMN source TEXT NOT NULL DEFAULT 'claude_code'" - ) - - class MigrationRunner: """Manages database migrations.""" @@ -626,8 +568,6 @@ def __init__(self, db_path: Path): Migration013AddSourceAndParentSession(), Migration014AddBehavioralPatternHistory(), Migration015AbstractEventTypeValues(), - Migration016AddMemorySupersededByColumn(), - Migration017AddProcessedMemorySourceColumn(), ] @contextmanager diff --git a/src/slopometry/core/models/memory.py b/src/slopometry/core/models/memory.py index 4376cfd..b9303d5 100644 --- a/src/slopometry/core/models/memory.py +++ b/src/slopometry/core/models/memory.py @@ -3,7 +3,7 @@ from datetime import datetime from enum import StrEnum -from pydantic import BaseModel +from pydantic import BaseModel, Field class MemoryType(StrEnum): @@ -15,6 +15,28 @@ class MemoryType(StrEnum): REFERENCE = "reference" +class FreshnessAction(StrEnum): + """The four reconciliation verdicts an LLM judge can return for a memory pair.""" + + KEEP_BOTH = "keep_both" + MERGE = "merge" + SUPERSEDE = "supersede" + DEDUPE = "dedupe" + + @property + def color(self) -> str: + """Rich console color for display.""" + match self: + case FreshnessAction.KEEP_BOTH: + return "green" + case FreshnessAction.MERGE: + return "cyan" + case FreshnessAction.SUPERSEDE: + return "yellow" + case FreshnessAction.DEDUPE: + return "magenta" + + class MemoryEntry(BaseModel): """Represents a stored memory entry.""" @@ -48,3 +70,30 @@ class MemoryCreateRequest(BaseModel): session_id: str project_dir: str candidates: list[MemoryCandidate] + + +class LLMMemoryCandidate(BaseModel): + """Raw LLM-extracted memory candidate before enrichment. + + The extraction LLM returns a JSON array of these. ``memory_type`` is + validated against the canonical ``MemoryType`` enum — invalid types + cause the candidate to be skipped rather than raising. + """ + + memory_type: MemoryType + content: str + source_context: str | None = None + + +class FreshnessVerdict(BaseModel): + """Structured LLM judge response for a single memory reconciliation pair. + + ``merged_content`` is only present when ``action == merge``. + """ + + action: FreshnessAction + reason: str = "" + merged_content: str | None = Field( + default=None, + description="Only present when action == merge", + ) diff --git a/src/slopometry/core/protocol/adapters/claude_code.py b/src/slopometry/core/protocol/adapters/claude_code.py index dae6634..450bb03 100644 --- a/src/slopometry/core/protocol/adapters/claude_code.py +++ b/src/slopometry/core/protocol/adapters/claude_code.py @@ -145,6 +145,10 @@ class ClaudeCodeAdapter: source = AbstractEventSource.CLAUDE_CODE tool_type_map: dict[str, str] = {name: enum.value for name, enum in _TOOL_NAME_TO_TYPE.items()} + @classmethod + def map_tool_name(cls, tool_name: str) -> str: + return resolve_tool_type(tool_name) + def detect_event_type(self, raw_payload: dict[str, Any]) -> AbstractEventType: fields = set(raw_payload.keys()) if "tool_name" in fields and "tool_input" in fields: diff --git a/src/slopometry/core/protocol/adapters/opencode.py b/src/slopometry/core/protocol/adapters/opencode.py index 5001f5b..9ea748b 100644 --- a/src/slopometry/core/protocol/adapters/opencode.py +++ b/src/slopometry/core/protocol/adapters/opencode.py @@ -21,7 +21,7 @@ AbstractHookEvent, ToolCallPayload, ) -from slopometry.core.protocol.adapters.claude_code import resolve_tool_type +from slopometry.core.protocol.adapters.claude_code import _TOOL_NAME_TO_TYPE, resolve_tool_type _OPENCODE_TYPE_TO_ABSTRACT: dict[str, AbstractEventType] = { "pre_tool_use": AbstractEventType.TOOL_CALL_STARTED, @@ -42,7 +42,11 @@ def resolve_opencode_event_type(event_type: str) -> AbstractEventType: class OpenCodeAdapter: source = AbstractEventSource.OPENCODE - tool_type_map: dict[str, str] = {} + tool_type_map: dict[str, str] = {name: enum.value for name, enum in _TOOL_NAME_TO_TYPE.items()} + + @classmethod + def map_tool_name(cls, tool_name: str) -> str: + return resolve_tool_type(tool_name) def detect_event_type(self, raw_payload: dict[str, Any]) -> AbstractEventType: event_type = raw_payload.get("event_type") diff --git a/src/slopometry/core/protocol/dispatch.py b/src/slopometry/core/protocol/dispatch.py index 515eb08..8c9e50a 100644 --- a/src/slopometry/core/protocol/dispatch.py +++ b/src/slopometry/core/protocol/dispatch.py @@ -12,21 +12,33 @@ import json import logging import os +import select +import sys from datetime import datetime from pathlib import Path from slopometry.core.git_tracker import GitTracker from slopometry.core.lock import SlopometryLock +from slopometry.core.models.hook import GitState, Project from slopometry.core.models.protocol.events import AbstractEventSource, AbstractEventType, AbstractHookEvent from slopometry.core.project_tracker import ProjectTracker from slopometry.core.protocol.adapters.base import ADAPTERS from slopometry.core.protocol.session import SessionManager +from slopometry.core.settings import settings logger = logging.getLogger(__name__) +_SESSION_MANAGERS: dict[str, SessionManager] = {} -def _capture_git_state(event_type: AbstractEventType, sequence_number: int): - tracker = GitTracker() + +def _session_manager(source: str) -> SessionManager: + if source not in _SESSION_MANAGERS: + _SESSION_MANAGERS[source] = SessionManager(source=source) + return _SESSION_MANAGERS[source] + + +def _capture_git_state(event_type: AbstractEventType, sequence_number: int, working_directory: str) -> GitState | None: + tracker = GitTracker(Path(working_directory)) match (event_type, sequence_number): case (AbstractEventType.TOOL_CALL_STARTED, 1) | (AbstractEventType.TURN_COMPLETED, _): return tracker.get_git_state() @@ -34,7 +46,7 @@ def _capture_git_state(event_type: AbstractEventType, sequence_number: int): return None -def _capture_project(working_directory: str): +def _capture_project(working_directory: str) -> Project | None: return ProjectTracker(working_dir=Path(working_directory)).get_project() @@ -74,20 +86,18 @@ def dispatch_event( event_type_override=event_type_override, ) - session_manager = SessionManager(source=source.value) - event.sequence_number = session_manager.get_next_sequence_number(event.session_id) - event.git_state = _capture_git_state(event.event_type, event.sequence_number) + event.sequence_number = _session_manager(source.value).get_next_sequence_number(event.session_id) + event.git_state = _capture_git_state(event.event_type, event.sequence_number, cwd) event.project = _capture_project(event.working_directory) lock = SlopometryLock(project_dir=cwd) with lock.acquire() as acquired: if not acquired: - logger.debug("Could not acquire lock, skipping event persistence for %s", event.session_id) - return event + logger.warning("Could not acquire lock, event not persisted for %s", event.session_id) + else: + from slopometry.core.database import EventDatabase - from slopometry.core.database import EventDatabase - - EventDatabase().save_event(event) + EventDatabase().save_event(event) return event @@ -101,9 +111,10 @@ def emit_event_from_stdin( Used by the `slopometry emit-event` CLI subcommand and by harness-specific entry points after they have read their stdin. """ - import sys - try: + ready, _, _ = select.select([sys.stdin], [], [], settings.stdin_timeout_seconds) + if not ready: + return 0 stdin_input = sys.stdin.read().strip() except Exception: return 0 @@ -113,11 +124,15 @@ def emit_event_from_stdin( try: raw_payload = json.loads(stdin_input) except json.JSONDecodeError as e: - from slopometry.core.settings import settings - if settings.debug_mode: print(f"Slopometry: Failed to parse event JSON: {e}", file=sys.stderr) return 0 - dispatch_event(source, raw_payload, event_type_override=event_type_override) + try: + dispatch_event(source, raw_payload, event_type_override=event_type_override) + except Exception as e: + if settings.debug_mode: + print(f"Slopometry dispatch error: {e}", file=sys.stderr) + return 0 + return 0 diff --git a/src/slopometry/core/protocol/session.py b/src/slopometry/core/protocol/session.py index 696b997..4eac948 100644 --- a/src/slopometry/core/protocol/session.py +++ b/src/slopometry/core/protocol/session.py @@ -8,6 +8,8 @@ import logging from pathlib import Path +from slopometry.core.models.protocol.events import AbstractEventSource + logger = logging.getLogger(__name__) _LEGACY_STATE_DIR = Path.home() / ".claude" / "slopometry" @@ -35,7 +37,7 @@ def __init__( self._migrate_legacy_files() def _migrate_legacy_files(self) -> None: - if self.source != "claude_code": + if self.source != AbstractEventSource.CLAUDE_CODE.value: return if not _LEGACY_STATE_DIR.exists(): return diff --git a/src/slopometry/core/settings.py b/src/slopometry/core/settings.py index 0975e6d..dfe3778 100644 --- a/src/slopometry/core/settings.py +++ b/src/slopometry/core/settings.py @@ -43,6 +43,33 @@ def get_default_config_dir() -> Path: return Path.home() / ".config" / app_name +def get_claude_projects_dirs() -> list[Path]: + """Platform-specific directories where Claude Code stores project transcripts.""" + if sys.platform == "win32": + base = Path(os.environ["LOCALAPPDATA"]) if os.environ.get("LOCALAPPDATA") else Path.home() / "AppData" / "Local" + return [base / "Claude" / "projects"] + elif sys.platform == "darwin": + return [Path.home() / "Library" / "Application Support" / "Claude" / "projects"] + else: + xdg_data_home = os.environ.get("XDG_DATA_HOME") + claude_xdg = Path(xdg_data_home) / "claude" / "projects" if xdg_data_home else None + default_claude = Path.home() / ".claude" / "projects" + if claude_xdg and claude_xdg.exists(): + return [claude_xdg] + return [default_claude] + + +def get_opencode_storage_root() -> Path: + """Platform-specific path to OpenCode's storage root directory.""" + if sys.platform == "win32": + base = Path(os.environ["LOCALAPPDATA"]) if os.environ.get("LOCALAPPDATA") else Path.home() / "AppData" / "Local" + return base / "opencode" / "storage" + xdg_data_home = os.environ.get("XDG_DATA_HOME") + if xdg_data_home: + return Path(xdg_data_home) / "opencode" / "storage" + return Path.home() / ".local" / "share" / "opencode" / "storage" + + class Settings(BaseSettings): """Application settings with support for .env files.""" @@ -202,12 +229,12 @@ def _ensure_global_config_dir() -> None: ) memory_llm_endpoint: str = Field( - default="https://your-llm-endpoint.com/v1", - description="LLM endpoint for memory extraction", + default="", + description="LLM endpoint for memory extraction; must be set explicitly", ) memory_llm_model: str = Field( - default="your-model-name", - description="Model for memory extraction", + default="", + description="Model for memory extraction; must be set explicitly", ) memory_llm_api_key: SecretStr = Field( default=SecretStr(""), @@ -223,12 +250,26 @@ def _ensure_global_config_dir() -> None: description="API key for memory embedding endpoint", ) memory_embedding_endpoint: str = Field( - default="https://your-embedding-endpoint.com/v1", - description="Embedding model endpoint for memory similarity", + default="", + description="Embedding model endpoint for memory similarity; must be set explicitly", ) memory_embedding_model: str = Field( - default="your-embedding-model", - description="Embedding model name", + default="", + description="Embedding model name; must be set explicitly", + ) + + freshness_threshold_floor: float = Field( + default=0.45, + description="Minimum dedupe similarity threshold; derived threshold never goes below this", + ) + freshness_threshold_ceiling: float = Field( + default=0.95, + description="Maximum dedupe similarity threshold; derived threshold never goes above this", + ) + + stdin_timeout_seconds: float = Field( + default=5.0, + description="Seconds to wait for stdin input in hook dispatch before giving up", ) @field_validator("baseline_strategy", mode="before") diff --git a/src/slopometry/solo/cli/commands.py b/src/slopometry/solo/cli/commands.py index 8967747..e921a2e 100644 --- a/src/slopometry/solo/cli/commands.py +++ b/src/slopometry/solo/cli/commands.py @@ -1,11 +1,13 @@ """CLI commands for solo-leveler features.""" import logging +from collections import Counter, defaultdict from pathlib import Path from typing import TYPE_CHECKING import click +from slopometry.core.models.memory import FreshnessAction from slopometry.display.console import console, styled_pager if TYPE_CHECKING: @@ -888,9 +890,7 @@ def find_memories( console.print("[yellow]No transcripts found for this project.[/yellow]") return - source_counts: dict[str, int] = {} - for t in transcripts: - source_counts[t.source.value] = source_counts.get(t.source.value, 0) + 1 + source_counts: Counter[str] = Counter(t.source.value for t in transcripts) source_breakdown = ", ".join(f"{src}={n}" for src, n in sorted(source_counts.items())) console.print(f"[green]Found {len(transcripts)} transcript(s)[/green] [dim]({source_breakdown})[/dim]\n") @@ -923,7 +923,7 @@ def find_memories( from slopometry.solo.services.transcript_finder import TranscriptFinder storage_root = TranscriptFinder().find_opencode_storage_root() - if storage_root is None: + if not storage_root.is_dir(): console.print(" [yellow]OpenCode storage not found[/yellow]") continue cleaned_transcript = memory_extractor.extract_memories_from_opencode_session( @@ -961,17 +961,20 @@ def find_memories( console.print(f" [red]Embedding error for candidate {i+1}: {e}[/red]") raise - from slopometry.solo.services.memory_freshness import MemoryFreshnessValidator + from slopometry.solo.services.memory_freshness import validate_freshness existing_memories = memory_service.get_memories(project_dir=proj_dir_str, limit=200) decisions: list = [] if existing_memories: - freshness_validator = MemoryFreshnessValidator( + decisions, distribution = validate_freshness( + candidates, + existing_memories, llm_endpoint=endpoint, llm_model=model, api_key=api_key, + floor_threshold=settings.freshness_threshold_floor, + ceiling_threshold=settings.freshness_threshold_ceiling, ) - decisions, distribution = freshness_validator.validate(candidates, existing_memories) console.print( f" [dim]Project similarity distribution: " f"n={distribution.n_pairs} " @@ -987,33 +990,32 @@ def find_memories( if decisions: console.print(f" [yellow]Freshness: {len(decisions)} similar pair(s) reviewed:[/yellow]") for decision in decisions: - action_color = { - "keep_both": "green", - "merge": "cyan", - "supersede": "yellow", - "dedupe": "magenta", - }.get(decision.action, "white") + action_color = decision.action.color console.print( - f" [{action_color}]{decision.action.upper()}[/{action_color}]" + f" [{action_color}]{decision.action.value.upper()}[/{action_color}]" f" (sim={decision.similarity:.2f}): " f"[dim]new=[/dim]{decision.new_candidate.content[:80]!r} " f"[dim]existing=[/dim]{decision.existing_memory.content[:80]!r}" ) console.print(f" [dim]REASON:[/dim] {decision.reason}") + decisions_by_candidate = defaultdict(list) for decision in decisions: - if decision.action == "merge" and decision.merged_content: - decision.new_candidate.content = decision.merged_content - elif decision.action == "dedupe": - if decision.new_candidate.metadata is None: - decision.new_candidate.metadata = {} - decision.new_candidate.metadata["deduped_against"] = decision.existing_memory.id - for decision in decisions: - if decision.new_candidate.metadata is None: - decision.new_candidate.metadata = {} - decision.new_candidate.metadata["freshness_action"] = decision.action - decision.new_candidate.metadata["freshness_reason"] = decision.reason - if decision.action != "keep_both": - decision.new_candidate.metadata["freshness_pair_with"] = decision.existing_memory.id + decisions_by_candidate[id(decision.new_candidate)].append(decision) + for group in decisions_by_candidate.values(): + candidate = group[0].new_candidate + for decision in group: + if decision.action == FreshnessAction.MERGE and decision.merged_content: + candidate.content = decision.merged_content + elif decision.action == FreshnessAction.DEDUPE: + if candidate.metadata is None: + candidate.metadata = {} + candidate.metadata["deduped_against"] = decision.existing_memory.id + if candidate.metadata is None: + candidate.metadata = {} + candidate.metadata["freshness_action"] = group[0].action + candidate.metadata["freshness_reason"] = group[0].reason + if group[0].action != FreshnessAction.KEEP_BOTH: + candidate.metadata["freshness_pair_with"] = group[0].existing_memory.id from slopometry.core.models.memory import MemoryCreateRequest @@ -1024,12 +1026,10 @@ def find_memories( ) saved = memory_service.save_memories(request) - new_memory_ids: dict[str, str] = {} - for candidate, entry in zip(candidates, saved): - new_memory_ids[candidate.content] = entry.id + candidate_id_map: dict[int, str] = {id(c): e.id for c, e in zip(candidates, saved)} for decision in decisions: - if decision.action == "supersede": - new_id = new_memory_ids.get(decision.new_candidate.content) + if decision.action == FreshnessAction.SUPERSEDE: + new_id = candidate_id_map.get(id(decision.new_candidate)) if new_id is None: continue memory_service.update_memory( diff --git a/src/slopometry/solo/services/embedding_service.py b/src/slopometry/solo/services/embedding_service.py index 02ab464..3f4c2a8 100644 --- a/src/slopometry/solo/services/embedding_service.py +++ b/src/slopometry/solo/services/embedding_service.py @@ -32,10 +32,7 @@ def get_embedding(self, text: str) -> list[float]: """ try: from openai import OpenAI - except ImportError: - raise RuntimeError("openai package required for embeddings. Install with: pip install openai") - try: client = OpenAI(base_url=self.endpoint, api_key=self.api_key) response = client.embeddings.create( @@ -47,8 +44,6 @@ def get_embedding(self, text: str) -> list[float]: return response.data[0].embedding raise RuntimeError("Empty response from embedding endpoint") - except RuntimeError: - raise except Exception as e: raise RuntimeError(f"Failed to get embedding: {e}") from e diff --git a/src/slopometry/solo/services/llm_text.py b/src/slopometry/solo/services/llm_text.py new file mode 100644 index 0000000..ff7ce7d --- /dev/null +++ b/src/slopometry/solo/services/llm_text.py @@ -0,0 +1,47 @@ +"""Shared helpers for cleaning LLM response text before JSON parsing.""" + +import json + + +def strip_llm_wrappers(text: str) -> str: + """Remove ```` blocks, markdown code fences, and leading whitespace. + + LLMs frequently wrap JSON responses in ```` ```json ```` fences or emit + reasoning inside ``...`` tags before the + actual payload. This normalizer strips both so the caller can feed the + result directly to ``json.loads``. + + Raises: + TypeError: If ``text`` is not a string (e.g. a raw MagicMock from + an unconfigured mock). + """ + if not isinstance(text, str): + raise TypeError(f"strip_llm_wrappers expected str, got {type(text).__name__}") + + result = text.strip() + + if result.startswith(""): + end_marker = "" + end_idx = result.find(end_marker) + if end_idx != -1: + result = result[end_idx + len(end_marker) :] + while result.startswith("\n"): + result = result[1:] + + if result.startswith("```json"): + result = result[7:] + elif result.startswith("```"): + result = result[3:] + if result.endswith("```"): + result = result[:-3] + + return result.strip() + + +def parse_llm_json(text: str) -> object: + """Strip wrappers and parse the LLM response as JSON. + + Raises: + json.JSONDecodeError: If the cleaned text is not valid JSON. + """ + return json.loads(strip_llm_wrappers(text)) diff --git a/src/slopometry/solo/services/memory_extractor.py b/src/slopometry/solo/services/memory_extractor.py index c9a17d4..a6d57a4 100644 --- a/src/slopometry/solo/services/memory_extractor.py +++ b/src/slopometry/solo/services/memory_extractor.py @@ -6,7 +6,8 @@ from pydantic import BaseModel, ConfigDict, Field -from slopometry.core.models.memory import MemoryCandidate, MemoryType +from slopometry.core.models.memory import LLMMemoryCandidate, MemoryCandidate +from slopometry.solo.services.llm_text import strip_llm_wrappers logger = logging.getLogger(__name__) @@ -143,7 +144,7 @@ class LLMConnectionError(Exception): class MemoryExtractor: """Extracts memory candidates from transcripts using LLM.""" - def __init__(self, llm_endpoint: str, llm_model: str, api_key: str = "dummy"): + def __init__(self, llm_endpoint: str, llm_model: str, api_key: str): self.llm_endpoint = llm_endpoint self.llm_model = llm_model self.api_key = api_key @@ -211,14 +212,14 @@ def extract_memories_from_transcript( content = message.get("content", []) for block in content: if isinstance(block, dict) and block.get("type") == "text": - text_parts.append(block.get("text", "")) + conversation_parts.append(f"SYSTEM: {block.get('text', '')}") except (json.JSONDecodeError, KeyError): continue return "\n".join(conversation_parts) - except Exception as e: + except Exception as e: # slopometry: allow-silent - one malformed transcript must not abort the batch logger.error(f"Failed to parse transcript {transcript_path}: {e}") return "" @@ -353,23 +354,7 @@ def generate_memory_candidates(self, transcript_snippet: str) -> list[MemoryCand if not content: raise ValueError("LLM returned empty response") - json_str = content.strip() - - if json_str.startswith(""): - end_marker = "" - end_idx = json_str.find(end_marker) - if end_idx != -1: - json_str = json_str[end_idx + len(end_marker) :] - while json_str.startswith("\n"): - json_str = json_str[1:] - - if json_str.startswith("```json"): - json_str = json_str[7:] - elif json_str.startswith("```"): - json_str = json_str[3:] - if json_str.endswith("```"): - json_str = json_str[:-3] - json_str = json_str.strip() + json_str = strip_llm_wrappers(content) try: data = json.loads(json_str) @@ -379,22 +364,22 @@ def generate_memory_candidates(self, transcript_snippet: str) -> list[MemoryCand if not isinstance(data, list): raise ValueError(f"Expected JSON array, got {type(data).__name__}") + from pydantic import TypeAdapter + + adapter = TypeAdapter(LLMMemoryCandidate) candidates: list[MemoryCandidate] = [] for item in data: try: - memory_type_str = item.get("memory_type", "") - if memory_type_str not in ["user", "feedback", "project", "reference"]: - continue - + llm_candidate = adapter.validate_python(item) candidates.append( MemoryCandidate( - memory_type=MemoryType(memory_type_str), - content=item.get("content", ""), - source_context=item.get("source_context"), + memory_type=llm_candidate.memory_type, + content=llm_candidate.content, + source_context=llm_candidate.source_context, ) ) - except (KeyError, ValueError) as e: - logger.debug(f"Skipping invalid memory candidate: {e}") + except (ValueError, TypeError) as e: + logger.debug("Skipping invalid memory candidate: %s", e) continue return candidates diff --git a/src/slopometry/solo/services/memory_freshness.py b/src/slopometry/solo/services/memory_freshness.py index 68f0088..b91a18d 100644 --- a/src/slopometry/solo/services/memory_freshness.py +++ b/src/slopometry/solo/services/memory_freshness.py @@ -23,12 +23,13 @@ import statistics from dataclasses import dataclass -from slopometry.core.models.memory import MemoryCandidate, MemoryEntry +from slopometry.core.models.memory import FreshnessAction, FreshnessVerdict, MemoryCandidate, MemoryEntry +from slopometry.solo.services.llm_text import parse_llm_json logger = logging.getLogger(__name__) -FLOOR_THRESHOLD = 0.45 -CEILING_THRESHOLD = 0.95 +DEFAULT_FLOOR_THRESHOLD = 0.45 +DEFAULT_CEILING_THRESHOLD = 0.95 RECONCILIATION_PROMPT = """You are reconciling two memory candidates about the same subject. @@ -72,20 +73,22 @@ class ProjectSimilarityDistribution: p75: float p90: float p95: float + floor_threshold: float = DEFAULT_FLOOR_THRESHOLD + ceiling_threshold: float = DEFAULT_CEILING_THRESHOLD @property def derived_threshold(self) -> float: """Data-driven dedupe threshold from the project's own distribution. Uses p75 of pairwise similarity as the candidate-relevance threshold. - Falls back to FLOOR_THRESHOLD when the project has too few memories to - estimate a distribution. Capped at CEILING_THRESHOLD so that even in - projects with very similar memories, only genuinely redundant pairs - are sent to the LLM. + Falls back to ``floor_threshold`` when the project has too few + memories to estimate a distribution. Capped at ``ceiling_threshold`` + so that even in projects with very similar memories, only genuinely + redundant pairs are sent to the LLM. """ if self.n_pairs == 0: - return FLOOR_THRESHOLD - return max(min(self.p75, CEILING_THRESHOLD), FLOOR_THRESHOLD) + return self.floor_threshold + return max(min(self.p75, self.ceiling_threshold), self.floor_threshold) @dataclass(frozen=True) @@ -95,7 +98,7 @@ class FreshnessDecision: new_candidate: MemoryCandidate existing_memory: MemoryEntry similarity: float - action: str + action: FreshnessAction reason: str merged_content: str | None = None @@ -125,7 +128,11 @@ def _project_pairwise_similarities(existing: list[MemoryEntry]) -> list[float]: return sims -def compute_project_distribution(existing: list[MemoryEntry]) -> ProjectSimilarityDistribution: +def compute_project_distribution( + existing: list[MemoryEntry], + floor_threshold: float = DEFAULT_FLOOR_THRESHOLD, + ceiling_threshold: float = DEFAULT_CEILING_THRESHOLD, +) -> ProjectSimilarityDistribution: """Compute similarity distribution statistics for the project's memory bank. Used to derive a data-informed threshold for which new candidate / existing @@ -133,7 +140,11 @@ def compute_project_distribution(existing: list[MemoryEntry]) -> ProjectSimilari """ sims = _project_pairwise_similarities(existing) if not sims: - return ProjectSimilarityDistribution(0, 0.0, 0.0, 0.0, 0.0, 0.0) + return ProjectSimilarityDistribution( + 0, 0.0, 0.0, 0.0, 0.0, 0.0, + floor_threshold=floor_threshold, + ceiling_threshold=ceiling_threshold, + ) sims_sorted = sorted(sims) n = len(sims_sorted) @@ -148,6 +159,8 @@ def quantile(q: float) -> float: p75=quantile(0.75), p90=quantile(0.90), p95=quantile(0.95), + floor_threshold=floor_threshold, + ceiling_threshold=ceiling_threshold, ) @@ -176,6 +189,7 @@ def _judge_reconciliation( llm_endpoint: str, llm_model: str, api_key: str, + similarity: float, ) -> FreshnessDecision: """Ask the LLM how to reconcile the pair. Always returns a decision.""" from openai import OpenAI @@ -198,92 +212,66 @@ def _judge_reconciliation( max_tokens=200, ) content = response.choices[0].message.content or "" - text = content.strip() - if text.startswith("```"): - text = text.strip("`").removeprefix("json").strip() - if text.endswith("```"): - text = text[:-3].strip() try: - data = json.loads(text) - except json.JSONDecodeError: - logger.debug("Could not parse reconciliation response: %s", text[:80]) + data = parse_llm_json(content) + verdict = FreshnessVerdict.model_validate(data) + except (json.JSONDecodeError, ValueError, TypeError): + logger.debug("Could not parse reconciliation response: %s", content[:80]) return FreshnessDecision( new_candidate=candidate, existing_memory=existing, - similarity=0.0, - action="keep_both", - reason=f"Could not parse LLM response: {text[:80]}", + similarity=similarity, + action=FreshnessAction.KEEP_BOTH, + reason=f"Could not parse LLM response: {content[:80]}", ) - action = data.get("action", "keep_both") - if action not in ("keep_both", "merge", "supersede", "dedupe"): - action = "keep_both" - reason = data.get("reason", "") - merged = data.get("merged_content") if action == "merge" else None + merged = verdict.merged_content if verdict.action == FreshnessAction.MERGE else None return FreshnessDecision( new_candidate=candidate, existing_memory=existing, - similarity=0.0, - action=action, - reason=reason, + similarity=similarity, + action=verdict.action, + reason=verdict.reason, merged_content=merged, ) -class MemoryFreshnessValidator: - """Reconciles newly-extracted candidates against existing project memories. +def validate_freshness( + candidates: list[MemoryCandidate], + existing: list[MemoryEntry], + llm_endpoint: str, + llm_model: str, + api_key: str, + floor_threshold: float = DEFAULT_FLOOR_THRESHOLD, + ceiling_threshold: float = DEFAULT_CEILING_THRESHOLD, +) -> tuple[list[FreshnessDecision], ProjectSimilarityDistribution]: + """Reconcile newly-extracted candidates against existing project memories. For each project, computes the existing memory bank's pairwise similarity distribution and derives a threshold from it (p75 of pairwise similarity, - clamped between FLOOR_THRESHOLD and CEILING_THRESHOLD). Each new candidate - is paired with existing memories above this threshold and sent to the LLM - for a reconciliation verdict (keep_both / merge / supersede / dedupe). + clamped between ``floor_threshold`` and ``ceiling_threshold``). Each new + candidate is paired with existing memories above this threshold and sent + to the LLM for a reconciliation verdict (keep_both / merge / supersede / + dedupe). """ - - def __init__( - self, - llm_endpoint: str, - llm_model: str, - api_key: str = "dummy", - ) -> None: - self.llm_endpoint = llm_endpoint - self.llm_model = llm_model - self.api_key = api_key - - def validate( - self, - candidates: list[MemoryCandidate], - existing: list[MemoryEntry], - ) -> tuple[list[FreshnessDecision], ProjectSimilarityDistribution]: - """Return reconciliation decisions plus the project's similarity distribution. - - Each candidate is paired with existing memories whose cosine similarity - to the candidate is >= the project's derived threshold. Each pair is - sent to the LLM for a reconciliation verdict. - """ - distribution = compute_project_distribution(existing) - threshold = distribution.derived_threshold - - decisions: list[FreshnessDecision] = [] - for candidate in candidates: - similar = _find_above_threshold(candidate, existing, threshold) - for memory, similarity in similar: - try: - decision = _judge_reconciliation( - candidate, memory, self.llm_endpoint, self.llm_model, self.api_key - ) - except Exception as e: - logger.debug("Reconciliation judge failed for candidate vs %s: %s", memory.id, e) - continue - decisions.append( - FreshnessDecision( - new_candidate=decision.new_candidate, - existing_memory=decision.existing_memory, - similarity=similarity, - action=decision.action, - reason=decision.reason, - merged_content=decision.merged_content, - ) + distribution = compute_project_distribution( + existing, + floor_threshold=floor_threshold, + ceiling_threshold=ceiling_threshold, + ) + threshold = distribution.derived_threshold + + decisions: list[FreshnessDecision] = [] + for candidate in candidates: + similar = _find_above_threshold(candidate, existing, threshold) + for memory, similarity in similar: + try: + decision = _judge_reconciliation( + candidate, memory, llm_endpoint, llm_model, api_key, similarity ) - return decisions, distribution + except Exception as e: + logger.debug("Reconciliation judge failed for candidate vs %s: %s", memory.id, e) + continue + decisions.append(decision) + return decisions, distribution diff --git a/src/slopometry/solo/services/memory_service.py b/src/slopometry/solo/services/memory_service.py index 9e908e8..def52e9 100644 --- a/src/slopometry/solo/services/memory_service.py +++ b/src/slopometry/solo/services/memory_service.py @@ -20,6 +20,10 @@ def save_memory(self, memory: MemoryEntry) -> None: def save_memories(self, request: MemoryCreateRequest) -> list[MemoryEntry]: """Save multiple memory entries from a request. + Does not mark the session as processed — the caller owns that + decision (and must pass the correct ``source`` to avoid + cross-harness key collisions in ``processed_memory_sessions``). + Returns: List of saved MemoryEntry objects """ @@ -41,8 +45,6 @@ def save_memories(self, request: MemoryCreateRequest) -> list[MemoryEntry]: self.db.save_memory(memory) saved_memories.append(memory) - self.db.mark_session_processed(request.session_id, request.project_dir, len(saved_memories)) - return saved_memories def get_memories( @@ -50,6 +52,7 @@ def get_memories( project_dir: str | None = None, memory_type: MemoryType | None = None, limit: int = 50, + include_superseded: bool = False, ) -> list[MemoryEntry]: """Get memories with optional filters. @@ -57,6 +60,8 @@ def get_memories( project_dir: Filter by project directory memory_type: Filter by memory type limit: Maximum number of results + include_superseded: When False (default), exclude memories that + have been superseded by a newer replacement. Returns: List of matching MemoryEntry objects @@ -65,6 +70,7 @@ def get_memories( project_dir=project_dir, memory_type=memory_type.value if memory_type else None, limit=limit, + include_superseded=include_superseded, ) def delete_memory(self, memory_id: str) -> bool: @@ -107,12 +113,12 @@ def update_memory( ) def mark_session_processed( - self, session_id: str, project_dir: str, memory_count: int, source: str = "claude_code" + self, session_id: str, project_dir: str, memory_count: int, source: str ) -> None: """Mark a session as processed for memory extraction.""" self.db.mark_session_processed(session_id, project_dir, memory_count, source=source) - def is_session_processed(self, session_id: str, project_dir: str, source: str = "claude_code") -> bool: + def is_session_processed(self, session_id: str, project_dir: str, source: str) -> bool: """Check if a session has already been processed.""" return self.db.is_session_processed(session_id, project_dir, source=source) diff --git a/src/slopometry/solo/services/transcript_finder.py b/src/slopometry/solo/services/transcript_finder.py index fc28109..4f8f5e7 100644 --- a/src/slopometry/solo/services/transcript_finder.py +++ b/src/slopometry/solo/services/transcript_finder.py @@ -1,11 +1,10 @@ """Transcript discovery for memory extraction.""" -import os -import sys from dataclasses import dataclass from pathlib import Path from slopometry.core.models.protocol.events import AbstractEventSource +from slopometry.core.settings import get_claude_projects_dirs, get_opencode_storage_root @dataclass(frozen=True) @@ -23,33 +22,32 @@ class TranscriptFinder: def find_claude_project_dirs(self) -> list[Path]: """Find Claude Code project directories based on platform.""" - if sys.platform == "win32": - base = Path(os.environ.get("LOCALAPPDATA", Path.home() / "AppData" / "Local")) - return [base / "Claude" / "projects"] - elif sys.platform == "darwin": - return [Path.home() / "Library" / "Application Support" / "Claude" / "projects"] - else: - xdg_data_home = os.environ.get("XDG_DATA_HOME") - claude_xdg = Path(xdg_data_home) / "claude" / "projects" if xdg_data_home else None - default_claude = Path.home() / ".claude" / "projects" - if claude_xdg and claude_xdg.exists(): - return [claude_xdg] - if default_claude.exists(): - return [default_claude] - return [default_claude] + return get_claude_projects_dirs() def _decode_claude_project_dir(self, dirname: str) -> Path | None: """Decode Claude project directory name back to working directory. - Claude encodes paths like /mnt/terradump/code/slopometry as - -mnt-terradump-code-slopometry (leading dash, slashes become dashes) + Claude encodes path separators as hyphens, so e.g. + -mnt-terradump-code-slopometry -> /mnt/terradump/code/slopometry. + Hyphens in directory names are double-escaped as ``--``. """ if not dirname.startswith("-"): return None - decoded = "/" + dirname[1:].replace("-", "/") + raw = dirname[1:] + parts = raw.split("-") + decoded_parts: list[str] = [] + i = 0 + while i < len(parts): + if not parts[i] and i + 1 < len(parts): + decoded_parts.append("-") + i += 2 + else: + decoded_parts.append(parts[i]) + i += 1 + decoded = "/" + "/".join(decoded_parts) try: return Path(decoded).resolve() - except Exception: + except Exception: # slopometry: allow-silent - corrupt symlink or permissions in project dir; skip gracefully return None def find_slopometry_transcripts(self, project_dir: Path) -> list[DiscoveredTranscript]: @@ -80,7 +78,7 @@ def find_slopometry_transcripts(self, project_dir: Path) -> list[DiscoveredTrans return results - def find_opencode_storage_root(self) -> Path | None: + def find_opencode_storage_root(self) -> Path: """Find OpenCode's storage root directory. Layout: ``/project/.json``, @@ -88,13 +86,7 @@ def find_opencode_storage_root(self) -> Path | None: ``/message//.json``, ``/part//.json``. """ - if sys.platform == "win32": - base = Path(os.environ.get("LOCALAPPDATA", Path.home() / "AppData" / "Local")) - return base / "opencode" / "storage" - xdg_data_home = os.environ.get("XDG_DATA_HOME") - if xdg_data_home: - return Path(xdg_data_home) / "opencode" / "storage" - return Path.home() / ".local" / "share" / "opencode" / "storage" + return get_opencode_storage_root() def find_opencode_sessions(self, project_dir: Path) -> list[DiscoveredTranscript]: """Find OpenCode sessions whose working directory matches project_dir. @@ -112,7 +104,7 @@ def find_opencode_sessions(self, project_dir: Path) -> list[DiscoveredTranscript import json storage_root = self.find_opencode_storage_root() - if storage_root is None or not storage_root.is_dir(): + if not storage_root.is_dir(): return [] project_dir_resolved = project_dir.resolve() @@ -123,7 +115,7 @@ def find_opencode_sessions(self, project_dir: Path) -> list[DiscoveredTranscript return results def _directory_matches_project(directory_str: str | None) -> bool: - """True if ``directory_str`` equals or contains ``project_dir_resolved``.""" + """True if ``directory_str`` equals ``project_dir_resolved`` or either is inside the other.""" if not directory_str: return False try: @@ -132,6 +124,11 @@ def _directory_matches_project(directory_str: str | None) -> bool: return False if directory_resolved == project_dir_resolved: return True + try: + directory_resolved.relative_to(project_dir_resolved) + return True + except ValueError: + pass try: project_dir_resolved.relative_to(directory_resolved) return True @@ -180,9 +177,11 @@ def discover_transcripts(self, project_dir: Path) -> list[DiscoveredTranscript]: """Discover all transcripts for a project, across all harnesses. Returns: - List of DiscoveredTranscript, one per session per harness + List of DiscoveredTranscript, one per session per harness, deduplicated + when the same session appears across multiple discovery sources. """ results: list[DiscoveredTranscript] = [] + seen: set[tuple[str, str]] = set() project_dir = project_dir.resolve() for claude_projects_dir in self.find_claude_project_dirs(): @@ -199,6 +198,10 @@ def discover_transcripts(self, project_dir: Path) -> list[DiscoveredTranscript]: for transcript_path in project_subdir.glob("*.jsonl"): session_id = transcript_path.stem + key = (session_id, AbstractEventSource.CLAUDE_CODE.value) + if key in seen: + continue + seen.add(key) results.append( DiscoveredTranscript( session_id=session_id, @@ -208,7 +211,18 @@ def discover_transcripts(self, project_dir: Path) -> list[DiscoveredTranscript]: ) ) - results.extend(self.find_slopometry_transcripts(project_dir)) - results.extend(self.find_opencode_sessions(project_dir)) + for t in self.find_slopometry_transcripts(project_dir): + key = (t.session_id, t.source.value) + if key in seen: + continue + seen.add(key) + results.append(t) + + for t in self.find_opencode_sessions(project_dir): + key = (t.session_id, t.source.value) + if key in seen: + continue + seen.add(key) + results.append(t) return results diff --git a/tests/test_embedding_service.py b/tests/test_embedding_service.py index 6d81478..a6eac44 100644 --- a/tests/test_embedding_service.py +++ b/tests/test_embedding_service.py @@ -7,8 +7,7 @@ from slopometry.solo.services.embedding_service import EmbeddingService -def test_compute_similarity_same_vector() -> None: - """Similarity of vector to itself should be 1.0.""" +def test_compute_similarity__returns_one_for_identical_vectors() -> None: service = EmbeddingService( endpoint="http://localhost:11434/v1", model="embedding-model", @@ -21,8 +20,7 @@ def test_compute_similarity_same_vector() -> None: assert similarity == pytest.approx(1.0) -def test_compute_similarity_different_vectors() -> None: - """Different vectors should have similarity less than 1.0.""" +def test_compute_similarity__returns_less_than_one_for_different_vectors() -> None: service = EmbeddingService( endpoint="http://localhost:11434/v1", model="embedding-model", @@ -37,8 +35,7 @@ def test_compute_similarity_different_vectors() -> None: assert similarity > -1.0 -def test_compute_similarity_zero_magnitude() -> None: - """Should handle zero vectors gracefully.""" +def test_compute_similarity__returns_zero_for_zero_magnitude_vectors() -> None: service = EmbeddingService( endpoint="http://localhost:11434/v1", model="embedding-model", @@ -55,8 +52,7 @@ def test_compute_similarity_zero_magnitude() -> None: assert result2 == 0.0 -def test_compute_uniqueness_score_no_existing() -> None: - """Should return 1.0 when no existing embeddings.""" +def test_compute_uniqueness_score__returns_one_when_no_existing_embeddings() -> None: service = EmbeddingService( endpoint="http://localhost:11434/v1", model="embedding-model", @@ -69,8 +65,7 @@ def test_compute_uniqueness_score_no_existing() -> None: assert score == 1.0 -def test_compute_uniqueness_score_with_existing() -> None: - """Should return lower score when similar embeddings exist.""" +def test_compute_uniqueness_score__returns_lower_score_when_similar_embeddings_exist() -> None: service = EmbeddingService( endpoint="http://localhost:11434/v1", model="embedding-model", @@ -89,8 +84,7 @@ def test_compute_uniqueness_score_with_existing() -> None: assert score >= 0.0 -def test_get_embedding_raises_on_failure() -> None: - """Should raise RuntimeError on API failure.""" +def test_get_embedding__raises_runtime_error_on_api_failure() -> None: service = EmbeddingService( endpoint="http://localhost:11434/v1", model="embedding-model", @@ -106,8 +100,7 @@ def test_get_embedding_raises_on_failure() -> None: service.get_embedding("test text") -def test_get_embedding_success() -> None: - """Test successful embedding retrieval.""" +def test_get_embedding__returns_vector_on_success() -> None: service = EmbeddingService( endpoint="http://localhost:11434/v1", model="embedding-model", @@ -131,8 +124,7 @@ def test_get_embedding_success() -> None: ) -def test_get_embedding_raises_on_missing_package() -> None: - """Should raise RuntimeError if openai package is not installed.""" +def test_get_embedding__raises_runtime_error_when_openai_not_installed() -> None: service = EmbeddingService( endpoint="http://localhost:11434/v1", model="embedding-model", @@ -148,8 +140,7 @@ def failing_import(name, *args, **kwargs): try: with patch("builtins.__import__", side_effect=failing_import): - with pytest.raises(RuntimeError, match="openai package required"): + with pytest.raises(RuntimeError, match="Failed to get embedding"): service.get_embedding("test text") finally: pass - diff --git a/tests/test_memory_extractor.py b/tests/test_memory_extractor.py index e7b5249..f15a567 100644 --- a/tests/test_memory_extractor.py +++ b/tests/test_memory_extractor.py @@ -14,7 +14,7 @@ def _write_json(path: Path, payload: dict) -> None: class TestExtractFromOpencodeSession: - def test_reconstructs_user_text_only(self, tmp_path: Path): + def test_extract_memories_from_opencode_session__reconstructs_user_text_only(self, tmp_path: Path): storage = tmp_path / "opencode_storage" _write_json( storage / "message" / "ses_a" / "msg_a1.json", @@ -24,12 +24,12 @@ def test_reconstructs_user_text_only(self, tmp_path: Path): storage / "part" / "msg_a1" / "p1.json", {"id": "p1", "type": "text", "text": "hello world", "messageID": "msg_a1", "sessionID": "ses_a"}, ) - extractor = MemoryExtractor("https://llm.example/v1", "model-x") + extractor = MemoryExtractor("https://llm.example/v1", "model-x", "test-key") out = extractor.extract_memories_from_opencode_session("ses_a", storage) assert "USER:" in out assert "hello world" in out - def test_reconstructs_assistant_text(self, tmp_path: Path): + def test_extract_memories_from_opencode_session__reconstructs_assistant_text(self, tmp_path: Path): storage = tmp_path / "opencode_storage" _write_json( storage / "message" / "ses_b" / "msg_b1.json", @@ -39,12 +39,12 @@ def test_reconstructs_assistant_text(self, tmp_path: Path): storage / "part" / "msg_b1" / "p1.json", {"id": "p1", "type": "text", "text": "I will check that", "messageID": "msg_b1", "sessionID": "ses_b"}, ) - extractor = MemoryExtractor("https://llm.example/v1", "model-x") + extractor = MemoryExtractor("https://llm.example/v1", "model-x", "test-key") out = extractor.extract_memories_from_opencode_session("ses_b", storage) assert "ASSISTANT:" in out assert "I will check that" in out - def test_tool_part_emits_tool_marker_with_input_output(self, tmp_path: Path): + def test_extract_memories_from_opencode_session__emits_tool_marker_with_input_output(self, tmp_path: Path): storage = tmp_path / "opencode_storage" _write_json( storage / "message" / "ses_c" / "msg_c1.json", @@ -66,12 +66,12 @@ def test_tool_part_emits_tool_marker_with_input_output(self, tmp_path: Path): "sessionID": "ses_c", }, ) - extractor = MemoryExtractor("https://llm.example/v1", "model-x") + extractor = MemoryExtractor("https://llm.example/v1", "model-x", "test-key") out = extractor.extract_memories_from_opencode_session("ses_c", storage) assert "TOOL: bash" in out assert "ls -la" in out - def test_step_start_and_reasoning_parts_skipped(self, tmp_path: Path): + def test_extract_memories_from_opencode_session__skips_step_start_and_reasoning_parts(self, tmp_path: Path): storage = tmp_path / "opencode_storage" _write_json( storage / "message" / "ses_d" / "msg_d1.json", @@ -101,17 +101,17 @@ def test_step_start_and_reasoning_parts_skipped(self, tmp_path: Path): "sessionID": "ses_d", }, ) - extractor = MemoryExtractor("https://llm.example/v1", "model-x") + extractor = MemoryExtractor("https://llm.example/v1", "model-x", "test-key") out = extractor.extract_memories_from_opencode_session("ses_d", storage) assert "user-visible reply" in out assert "internal thoughts" not in out - def test_missing_message_directory_returns_empty_string(self, tmp_path: Path): - extractor = MemoryExtractor("https://llm.example/v1", "model-x") + def test_extract_memories_from_opencode_session__returns_empty_string_when_message_dir_missing(self, tmp_path: Path): + extractor = MemoryExtractor("https://llm.example/v1", "model-x", "test-key") out = extractor.extract_memories_from_opencode_session("ses_none", tmp_path / "opencode_storage") assert out == "" - def test_messages_ordered_chronologically(self, tmp_path: Path): + def test_extract_memories_from_opencode_session__orders_messages_chronologically(self, tmp_path: Path): storage = tmp_path / "opencode_storage" _write_json( storage / "message" / "ses_e" / "msg_e1.json", @@ -129,13 +129,13 @@ def test_messages_ordered_chronologically(self, tmp_path: Path): storage / "part" / "msg_e0" / "p1.json", {"id": "p1", "type": "text", "text": "first message", "messageID": "msg_e0", "sessionID": "ses_e"}, ) - extractor = MemoryExtractor("https://llm.example/v1", "model-x") + extractor = MemoryExtractor("https://llm.example/v1", "model-x", "test-key") out = extractor.extract_memories_from_opencode_session("ses_e", storage) first_idx = out.index("first message") second_idx = out.index("second message") assert first_idx < second_idx - def test_unknown_role_message_skipped(self, tmp_path: Path): + def test_extract_memories_from_opencode_session__skips_unknown_role_messages(self, tmp_path: Path): storage = tmp_path / "opencode_storage" _write_json( storage / "message" / "ses_f" / "msg_f1.json", @@ -145,11 +145,11 @@ def test_unknown_role_message_skipped(self, tmp_path: Path): storage / "part" / "msg_f1" / "p1.json", {"id": "p1", "type": "text", "text": "should not appear", "messageID": "msg_f1", "sessionID": "ses_f"}, ) - extractor = MemoryExtractor("https://llm.example/v1", "model-x") + extractor = MemoryExtractor("https://llm.example/v1", "model-x", "test-key") out = extractor.extract_memories_from_opencode_session("ses_f", storage) assert "should not appear" not in out - def test_truncation_config_respected_for_tool_parts(self, tmp_path: Path): + def test_extract_memories_from_opencode_session__respects_truncation_config_for_tool_parts(self, tmp_path: Path): storage = tmp_path / "opencode_storage" _write_json( storage / "message" / "ses_g" / "msg_g1.json", @@ -168,7 +168,7 @@ def test_truncation_config_respected_for_tool_parts(self, tmp_path: Path): "sessionID": "ses_g", }, ) - extractor = MemoryExtractor("https://llm.example/v1", "model-x") + extractor = MemoryExtractor("https://llm.example/v1", "model-x", "test-key") out_default = extractor.extract_memories_from_opencode_session("ses_g", storage) assert len(out_default) < 500 + 500 + 100 out_short = extractor.extract_memories_from_opencode_session( @@ -180,12 +180,12 @@ def test_truncation_config_respected_for_tool_parts(self, tmp_path: Path): class TestTranscriptTruncationConfig: - def test_defaults_match_pre_refactor_behavior(self): + def test_transcript_truncation_config__defaults_match_pre_refactor_behavior(self): c = TranscriptTruncationConfig() assert c.tool_input_chars == 120 assert c.tool_output_chars == 120 assert c.tool_result_chars == 200 - def test_extra_fields_rejected(self): + def test_transcript_truncation_config__rejects_extra_fields(self): with pytest.raises(Exception): TranscriptTruncationConfig(unknown_field=42) diff --git a/tests/test_memory_freshness.py b/tests/test_memory_freshness.py index 0c12318..7b02e3e 100644 --- a/tests/test_memory_freshness.py +++ b/tests/test_memory_freshness.py @@ -1,21 +1,21 @@ -"""Tests for MemoryFreshnessValidator.""" +"""Tests for validate_freshness.""" from datetime import datetime from unittest.mock import MagicMock, patch import pytest -from slopometry.core.models.memory import MemoryCandidate, MemoryEntry, MemoryType +from slopometry.core.models.memory import FreshnessAction, MemoryCandidate, MemoryEntry, MemoryType from slopometry.solo.services.memory_freshness import ( - CEILING_THRESHOLD, - FLOOR_THRESHOLD, + DEFAULT_CEILING_THRESHOLD, + DEFAULT_FLOOR_THRESHOLD, FreshnessDecision, - MemoryFreshnessValidator, ProjectSimilarityDistribution, _cosine_similarity, _find_above_threshold, _judge_reconciliation, compute_project_distribution, + validate_freshness, ) @@ -44,45 +44,45 @@ def _memory( class TestCosineSimilarity: - def test_identical_vectors_have_similarity_one(self): + def test_cosine_similarity__returns_one_for_identical_vectors(self): v = [1.0, 0.0, 0.0] assert _cosine_similarity(v, v) == pytest.approx(1.0) - def test_orthogonal_vectors_have_similarity_zero(self): + def test_cosine_similarity__returns_zero_for_orthogonal_vectors(self): assert _cosine_similarity([1.0, 0.0], [0.0, 1.0]) == pytest.approx(0.0) - def test_empty_vectors_return_zero(self): + def test_cosine_similarity__returns_zero_for_empty_vectors(self): assert _cosine_similarity([], [1.0]) == 0.0 - def test_mismatched_lengths_return_zero(self): + def test_cosine_similarity__returns_zero_for_mismatched_lengths(self): assert _cosine_similarity([1.0, 0.0], [1.0, 0.0, 0.0]) == 0.0 class TestProjectSimilarityDistribution: - def test_zero_pairs_falls_back_to_floor(self): + def test_derived_threshold__falls_back_to_floor_when_zero_pairs(self): d = ProjectSimilarityDistribution(0, 0.0, 0.0, 0.0, 0.0, 0.0) - assert d.derived_threshold == FLOOR_THRESHOLD + assert d.derived_threshold == DEFAULT_FLOOR_THRESHOLD - def test_threshold_is_p75_clamped_to_floor(self): + def test_derived_threshold__clamped_to_floor_when_p75_below_floor(self): d = ProjectSimilarityDistribution(10, 0.30, 0.30, 0.20, 0.10, 0.05) - assert d.derived_threshold == FLOOR_THRESHOLD + assert d.derived_threshold == DEFAULT_FLOOR_THRESHOLD - def test_threshold_is_p75_when_above_floor(self): + def test_derived_threshold__uses_p75_when_above_floor(self): d = ProjectSimilarityDistribution(100, 0.70, 0.65, 0.80, 0.90, 0.95) assert d.derived_threshold == pytest.approx(0.80) - def test_threshold_is_clamped_to_ceiling(self): + def test_derived_threshold__clamped_to_ceiling_when_p75_above_ceiling(self): d = ProjectSimilarityDistribution(100, 0.95, 0.95, 0.99, 1.0, 1.0) - assert d.derived_threshold == CEILING_THRESHOLD + assert d.derived_threshold == DEFAULT_CEILING_THRESHOLD class TestComputeProjectDistribution: - def test_no_embeddings_returns_zero_distribution(self): + def test_compute_project_distribution__returns_zero_distribution_when_no_embeddings(self): existing = [_memory("X", embedding=None), _memory("Y", embedding=None)] d = compute_project_distribution(existing) assert d.n_pairs == 0 - def test_pairs_counted_correctly(self): + def test_compute_project_distribution__counts_pairs_correctly(self): existing = [ _memory("a", [1.0, 0.0], "m1"), _memory("b", [0.0, 1.0], "m2"), @@ -94,14 +94,14 @@ def test_pairs_counted_correctly(self): assert 0.0 <= d.p50 <= 1.0 assert 0.0 <= d.p75 <= 1.0 - def test_quantiles_are_monotonic(self): + def test_compute_project_distribution__quantiles_are_monotonic(self): existing = [_memory(f"m{i}", [float(i) / 10, 1.0 - float(i) / 10], f"id{i}") for i in range(5)] d = compute_project_distribution(existing) assert d.p50 <= d.p75 <= d.p90 <= d.p95 class TestFindAboveThreshold: - def test_returns_only_memories_above_threshold(self): + def test_find_above_threshold__returns_only_memories_above_threshold(self): candidate = _candidate("X", [1.0, 0.0]) existing = [ _memory("identical", [1.0, 0.0], "m1"), @@ -114,19 +114,19 @@ def test_returns_only_memories_above_threshold(self): assert "m1" in ids assert "m2" in ids - def test_candidate_without_embedding_returns_empty(self): + def test_find_above_threshold__returns_empty_when_candidate_has_no_embedding(self): candidate = _candidate("X", embedding=None) existing = [_memory("Y", [1.0, 0.0], "m1")] assert _find_above_threshold(candidate, existing, threshold=0.5) == [] - def test_returns_empty_when_no_matches_above_threshold(self): + def test_find_above_threshold__returns_empty_when_no_matches_above_threshold(self): candidate = _candidate("X", [1.0, 0.0]) existing = [_memory("Y", [0.0, 1.0], "m1")] assert _find_above_threshold(candidate, existing, threshold=0.78) == [] class TestJudgeReconciliation: - def test_returns_keep_both_when_llm_says_so(self): + def test_judge_reconciliation__returns_keep_both_when_llm_says_keep_both(self): mock_response = MagicMock() mock_response.choices = [ MagicMock(message=MagicMock(content='{"action": "keep_both", "reason": "different topics"}')) @@ -138,12 +138,12 @@ def test_returns_keep_both_when_llm_says_so(self): _memory("user prefers dark mode"), "https://llm.example/v1", "model-x", - "key", + "key", 0.85, ) - assert decision.action == "keep_both" + assert decision.action == FreshnessAction.KEEP_BOTH assert "topics" in decision.reason or "different" in decision.reason - def test_returns_merge_with_merged_content(self): + def test_judge_reconciliation__returns_merge_with_merged_content_when_llm_says_merge(self): mock_response = MagicMock() mock_response.choices = [ MagicMock( @@ -159,12 +159,12 @@ def test_returns_merge_with_merged_content(self): _memory("uses radon"), "https://llm.example/v1", "model-x", - "key", + "key", 0.85, ) - assert decision.action == "merge" + assert decision.action == FreshnessAction.MERGE assert decision.merged_content == "uses rust-code-analysis since 2026" - def test_returns_supersede_when_llm_says_so(self): + def test_judge_reconciliation__returns_supersede_when_llm_says_supersede(self): mock_response = MagicMock() mock_response.choices = [ MagicMock(message=MagicMock(content='{"action": "supersede", "reason": "newer version"}')) @@ -176,11 +176,11 @@ def test_returns_supersede_when_llm_says_so(self): _memory("Python 3.10"), "https://llm.example/v1", "model-x", - "key", + "key", 0.85, ) - assert decision.action == "supersede" + assert decision.action == FreshnessAction.SUPERSEDE - def test_returns_dedupe_when_llm_says_so(self): + def test_judge_reconciliation__returns_dedupe_when_llm_says_dedupe(self): mock_response = MagicMock() mock_response.choices = [ MagicMock(message=MagicMock(content='{"action": "dedupe", "reason": "same info"}')) @@ -192,11 +192,11 @@ def test_returns_dedupe_when_llm_says_so(self): _memory("user prefers pyright"), "https://llm.example/v1", "model-x", - "key", + "key", 0.85, ) - assert decision.action == "dedupe" + assert decision.action == FreshnessAction.DEDUPE - def test_strips_markdown_fences(self): + def test_judge_reconciliation__strips_markdown_fences_from_llm_response(self): mock_response = MagicMock() mock_response.choices = [ MagicMock( @@ -210,12 +210,12 @@ def test_strips_markdown_fences(self): _memory("Y"), "https://llm.example/v1", "model-x", - "key", + "key", 0.85, ) - assert decision.action == "merge" + assert decision.action == FreshnessAction.MERGE assert decision.merged_content == "merged" - def test_falls_back_to_keep_both_on_invalid_action(self): + def test_judge_reconciliation__falls_back_to_keep_both_on_invalid_action(self): mock_response = MagicMock() mock_response.choices = [ MagicMock(message=MagicMock(content='{"action": "maybe", "reason": "unsure"}')) @@ -227,31 +227,31 @@ def test_falls_back_to_keep_both_on_invalid_action(self): _memory("Y"), "https://llm.example/v1", "model-x", - "key", + "key", 0.85, ) - assert decision.action == "keep_both" + assert decision.action == FreshnessAction.KEEP_BOTH -class TestMemoryFreshnessValidator: - def test_no_existing_memories_returns_empty_decisions_and_floor_distribution(self): - validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") - decisions, distribution = validator.validate([_candidate("X", [1.0, 0.0])], []) +class TestValidateFreshness: + def test_validate_freshness__returns_empty_decisions_and_floor_distribution_when_no_existing_memories(self): + decisions, distribution = validate_freshness( + [_candidate("X", [1.0, 0.0])], [], "https://llm.example/v1", "model-x", "test-key" + ) assert decisions == [] assert distribution.n_pairs == 0 - assert distribution.derived_threshold == FLOOR_THRESHOLD + assert distribution.derived_threshold == DEFAULT_FLOOR_THRESHOLD - def test_no_above_threshold_matches_skips_llm_call(self): - validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") + def test_validate_freshness__skips_llm_call_when_no_above_threshold_matches(self): candidates = [_candidate("X", [1.0, 0.0])] existing = [_memory("orthogonal", [0.0, 1.0], "m1")] with patch("openai.OpenAI") as mock_openai: - decisions, _ = validator.validate(candidates, existing) + decisions, _ = validate_freshness(candidates, existing, "https://llm.example/v1", "model-x", "test-key") mock_openai.assert_not_called() assert decisions == [] - def test_similar_match_triggers_llm_judge_with_action(self): + def test_validate_freshness__triggers_llm_judge_when_similar_match_found(self): mock_response = MagicMock() mock_response.choices = [ MagicMock( @@ -260,38 +260,40 @@ def test_similar_match_triggers_llm_judge_with_action(self): ) ) ] - validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") candidates = [_candidate("uses rust-code-analysis", [1.0, 0.0])] existing = [_memory("uses radon", [0.99, 0.14], "m1")] with patch("openai.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = mock_response - decisions, distribution = validator.validate(candidates, existing) + decisions, distribution = validate_freshness( + candidates, existing, "https://llm.example/v1", "model-x", "test-key" + ) assert len(decisions) == 1 decision = decisions[0] assert isinstance(decision, FreshnessDecision) - assert decision.action == "merge" + assert decision.action == FreshnessAction.MERGE assert decision.merged_content == "merged" assert decision.similarity > distribution.derived_threshold - def test_keep_both_action_does_not_merge_or_supersede(self): + def test_validate_freshness__does_not_merge_or_supersede_on_keep_both(self): mock_response = MagicMock() mock_response.choices = [ MagicMock(message=MagicMock(content='{"action": "keep_both", "reason": "different aspects"}')) ] - validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") candidates = [_candidate("uses rust-code-analysis for complexity", [1.0, 0.0])] existing = [_memory("user prefers dark mode", [0.99, 0.14], "m1")] with patch("openai.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = mock_response - decisions, _ = validator.validate(candidates, existing) + decisions, _ = validate_freshness( + candidates, existing, "https://llm.example/v1", "model-x", "test-key" + ) assert len(decisions) == 1 - assert decisions[0].action == "keep_both" + assert decisions[0].action == FreshnessAction.KEEP_BOTH - def test_multiple_candidates_with_different_actions(self): + def test_validate_freshness__handles_multiple_candidates_with_different_actions(self): mock_response_a = MagicMock() mock_response_a.choices = [ MagicMock( @@ -308,7 +310,6 @@ def test_multiple_candidates_with_different_actions(self): ) ) ] - validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") candidates = [ _candidate("uses rust-code-analysis for complexity", [1.0, 0.0]), _candidate("user prefers dark mode in editors", [0.0, 1.0]), @@ -323,31 +324,40 @@ def test_multiple_candidates_with_different_actions(self): mock_response_a, mock_response_b, ] - decisions, _ = validator.validate(candidates, existing) + decisions, _ = validate_freshness( + candidates, existing, "https://llm.example/v1", "model-x", "test-key" + ) assert len(decisions) == 2 actions = {d.action for d in decisions} - assert "merge" in actions - assert "dedupe" in actions + assert FreshnessAction.MERGE in actions + assert FreshnessAction.DEDUPE in actions - def test_data_driven_threshold_for_low_similarity_project_is_low(self): - validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") + def test_validate_freshness__uses_floor_threshold_for_low_similarity_project(self): candidates = [_candidate("X", [1.0, 0.0])] existing = [ _memory("a", [1.0, 0.0], "m1"), _memory("b", [0.0, 1.0], "m2"), ] - with patch("openai.OpenAI"): - _, distribution = validator.validate(candidates, existing) - assert distribution.derived_threshold == FLOOR_THRESHOLD + mock_response = MagicMock() + mock_response.choices = [ + MagicMock(message=MagicMock(content='{"action": "keep_both", "reason": "different"}')) + ] + with patch("openai.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = mock_response + _, distribution = validate_freshness( + candidates, existing, "https://llm.example/v1", "model-x", "test-key" + ) + assert distribution.derived_threshold == DEFAULT_FLOOR_THRESHOLD - def test_failed_llm_call_skipped_silently(self): - validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") + def test_validate_freshness__skips_failed_llm_call_silently(self): candidates = [_candidate("X", [1.0, 0.0])] existing = [_memory("Y", [0.99, 0.14], "m1")] with patch("openai.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.side_effect = RuntimeError("llm down") - decisions, _ = validator.validate(candidates, existing) + decisions, _ = validate_freshness( + candidates, existing, "https://llm.example/v1", "model-x", "test-key" + ) assert decisions == [] diff --git a/tests/test_memory_service.py b/tests/test_memory_service.py index 47fe150..7b233be 100644 --- a/tests/test_memory_service.py +++ b/tests/test_memory_service.py @@ -31,8 +31,7 @@ def memory_service(temp_db: EventDatabase) -> MemoryService: return MemoryService(db=temp_db) -def test_save_memory(memory_service: MemoryService) -> None: - """Saves a memory entry.""" +def test_save_memory__persists_single_memory_entry(memory_service: MemoryService) -> None: memory = MemoryEntry( id="mem-001", session_id="session-abc", @@ -50,8 +49,7 @@ def test_save_memory(memory_service: MemoryService) -> None: assert memories[0].content == "Test memory content" -def test_save_memories(memory_service: MemoryService) -> None: - """Saves multiple candidates.""" +def test_save_memories__saves_all_candidates_from_request(memory_service: MemoryService) -> None: request = MemoryCreateRequest( session_id="session-xyz", project_dir="/test/project", @@ -75,8 +73,24 @@ def test_save_memories(memory_service: MemoryService) -> None: assert saved[0].session_id == "session-xyz" -def test_get_memories(memory_service: MemoryService) -> None: - """Retrieves memories.""" +def test_save_memories__does_not_mark_session_as_processed(memory_service: MemoryService) -> None: + request = MemoryCreateRequest( + session_id="session-no-mark", + project_dir="/test/project", + candidates=[ + MemoryCandidate( + memory_type=MemoryType.USER, + content="A memory", + ), + ], + ) + + memory_service.save_memories(request) + + assert not memory_service.is_session_processed("session-no-mark", "/test/project", source="claude_code") + + +def test_get_memories__filters_by_project_dir_and_memory_type(memory_service: MemoryService) -> None: memory_service.save_memory( MemoryEntry( id="mem-001", @@ -110,8 +124,77 @@ def test_get_memories(memory_service: MemoryService) -> None: assert type_memories[0].id == "mem-001" -def test_delete_memory(memory_service: MemoryService) -> None: - """Deletes a memory by ID.""" +def test_get_memories__respects_limit_parameter(memory_service: MemoryService) -> None: + for i in range(10): + memory_service.save_memory( + MemoryEntry( + id=f"mem-{i}", + session_id="session-1", + project_dir="/test/project", + memory_type=MemoryType.USER, + content=f"Memory {i}", + created_at=datetime.now(), + ) + ) + + memories = memory_service.get_memories(limit=5) + assert len(memories) == 5 + + +def test_get_memories__excludes_superseded_by_default(memory_service: MemoryService) -> None: + old = MemoryEntry( + id="mem-old", + session_id="session-1", + project_dir="/proj", + memory_type=MemoryType.PROJECT, + content="Old memory", + created_at=datetime.now(), + ) + new = MemoryEntry( + id="mem-new", + session_id="session-2", + project_dir="/proj", + memory_type=MemoryType.PROJECT, + content="New memory that supersedes old", + created_at=datetime.now(), + ) + memory_service.save_memory(old) + memory_service.save_memory(new) + memory_service.update_memory(old.id, superseded_by=new.id) + + visible = memory_service.get_memories(project_dir="/proj", limit=100) + assert len(visible) == 1 + assert visible[0].id == "mem-new" + + +def test_get_memories__includes_superseded_when_flag_set(memory_service: MemoryService) -> None: + old = MemoryEntry( + id="mem-old", + session_id="session-1", + project_dir="/proj", + memory_type=MemoryType.PROJECT, + content="Old memory", + created_at=datetime.now(), + ) + new = MemoryEntry( + id="mem-new", + session_id="session-2", + project_dir="/proj", + memory_type=MemoryType.PROJECT, + content="New memory that supersedes old", + created_at=datetime.now(), + ) + memory_service.save_memory(old) + memory_service.save_memory(new) + memory_service.update_memory(old.id, superseded_by=new.id) + + all_memories = memory_service.get_memories(project_dir="/proj", limit=100, include_superseded=True) + assert len(all_memories) == 2 + ids = {m.id for m in all_memories} + assert ids == {"mem-old", "mem-new"} + + +def test_delete_memory__returns_true_when_exists_false_when_not(memory_service: MemoryService) -> None: memory = MemoryEntry( id="mem-to-delete", session_id="session-1", @@ -132,8 +215,7 @@ def test_delete_memory(memory_service: MemoryService) -> None: assert result is False -def test_delete_all_memories(memory_service: MemoryService) -> None: - """Clears all memories and processed_sessions.""" +def test_delete_all_memories__clears_memories_and_processed_sessions(memory_service: MemoryService) -> None: memory_service.save_memory( MemoryEntry( id="mem-1", @@ -154,9 +236,9 @@ def test_delete_all_memories(memory_service: MemoryService) -> None: created_at=datetime.now(), ) ) - memory_service.mark_session_processed("session-1", "/project1", 1) + memory_service.mark_session_processed("session-1", "/project1", 1, source="claude_code") - assert memory_service.is_session_processed("session-1", "/project1") is True + assert memory_service.is_session_processed("session-1", "/project1", source="claude_code") is True count = memory_service.delete_all_memories() assert count == 2 @@ -164,38 +246,18 @@ def test_delete_all_memories(memory_service: MemoryService) -> None: memories = memory_service.get_memories() assert len(memories) == 0 - assert memory_service.is_session_processed("session-1", "/project1") is False - + assert memory_service.is_session_processed("session-1", "/project1", source="claude_code") is False -def test_mark_session_processed(memory_service: MemoryService) -> None: - """Marks session as processed.""" - memory_service.mark_session_processed("session-test", "/test/project", 5) - assert memory_service.is_session_processed("session-test", "/test/project") is True +def test_mark_session_processed__marks_session_for_source(memory_service: MemoryService) -> None: + memory_service.mark_session_processed("session-test", "/test/project", 5, source="claude_code") + assert memory_service.is_session_processed("session-test", "/test/project", source="claude_code") is True -def test_is_session_processed(memory_service: MemoryService) -> None: - """Checks if session was processed.""" - assert memory_service.is_session_processed("unprocessed-session", "/any/project") is False - memory_service.mark_session_processed("processed-session", "/any/project", 3) +def test_is_session_processed__returns_false_before_true_after_marking(memory_service: MemoryService) -> None: + assert memory_service.is_session_processed("unprocessed-session", "/any/project", source="claude_code") is False - assert memory_service.is_session_processed("processed-session", "/any/project") is True + memory_service.mark_session_processed("processed-session", "/any/project", 3, source="claude_code") - -def test_get_memories_limit(memory_service: MemoryService) -> None: - """Test that get_memories respects limit parameter.""" - for i in range(10): - memory_service.save_memory( - MemoryEntry( - id=f"mem-{i}", - session_id="session-1", - project_dir="/test/project", - memory_type=MemoryType.USER, - content=f"Memory {i}", - created_at=datetime.now(), - ) - ) - - memories = memory_service.get_memories(limit=5) - assert len(memories) == 5 + assert memory_service.is_session_processed("processed-session", "/any/project", source="claude_code") is True diff --git a/tests/test_opencode_memory_integration.py b/tests/test_opencode_memory_integration.py index f9f6a87..c90c2bf 100644 --- a/tests/test_opencode_memory_integration.py +++ b/tests/test_opencode_memory_integration.py @@ -15,20 +15,19 @@ import pytest from slopometry.core.database import EventDatabase -from slopometry.core.models.memory import MemoryCandidate, MemoryEntry, MemoryType +from slopometry.core.models.memory import FreshnessAction, MemoryCandidate, MemoryEntry, MemoryType from slopometry.solo.services.memory_extractor import MemoryExtractor -from slopometry.solo.services.memory_freshness import MemoryFreshnessValidator +from slopometry.solo.services.memory_freshness import validate_freshness from slopometry.solo.services.memory_service import MemoryService - -OPENCODE_STORAGE = Path(os.environ.get("OPENCODE_STORAGE", "/home/tensor-templar/.local/share/opencode/storage")) +OPENCODE_STORAGE = Path(os.environ["OPENCODE_STORAGE"]) if os.environ.get("OPENCODE_STORAGE") else None @pytest.fixture def real_opencode_session() -> tuple[str, Path]: """Pick the first OpenCode session that has a message directory, or skip.""" - if not OPENCODE_STORAGE.is_dir(): - pytest.skip(f"OpenCode storage not found at {OPENCODE_STORAGE}") + if OPENCODE_STORAGE is None or not OPENCODE_STORAGE.is_dir(): + pytest.skip("OPENCODE_STORAGE env var not set or path does not exist") message_root = OPENCODE_STORAGE / "message" if not message_root.is_dir(): pytest.skip(f"No message directory at {message_root}") @@ -46,9 +45,11 @@ def fresh_memory_service(tmp_path: Path) -> MemoryService: class TestRealOpenCodeSessionExtraction: - def test_extract_produces_non_empty_conversation(self, real_opencode_session: tuple[str, Path]): + def test_extract_memories_from_opencode_session__produces_non_empty_conversation_from_real_session( + self, real_opencode_session: tuple[str, Path] + ): session_id, storage_root = real_opencode_session - extractor = MemoryExtractor("https://llm.example/v1", "model-x") + extractor = MemoryExtractor("https://llm.example/v1", "model-x", "test-key") text = extractor.extract_memories_from_opencode_session(session_id, storage_root) assert text.strip() assert "USER:" in text or "ASSISTANT:" in text, ( @@ -56,9 +57,11 @@ def test_extract_produces_non_empty_conversation(self, real_opencode_session: tu f"got first 200 chars: {text[:200]!r}" ) - def test_extracted_text_contains_some_tool_markers(self, real_opencode_session: tuple[str, Path]): + def test_extract_memories_from_opencode_session__contains_tool_markers_when_session_uses_tools( + self, real_opencode_session: tuple[str, Path] + ): session_id, storage_root = real_opencode_session - extractor = MemoryExtractor("https://llm.example/v1", "model-x") + extractor = MemoryExtractor("https://llm.example/v1", "model-x", "test-key") text = extractor.extract_memories_from_opencode_session(session_id, storage_root) has_tool = "TOOL:" in text if not has_tool: @@ -76,7 +79,7 @@ def _stub_judge(self, action: str, merged_content: str | None = None) -> MagicMo mock.choices = [MagicMock(message=MagicMock(content=json.dumps(payload)))] return mock - def test_supersede_links_old_to_new_via_superseded_by(self, fresh_memory_service: MemoryService): + def test_validate_freshness__supersede_links_old_to_new_via_superseded_by(self, fresh_memory_service: MemoryService): existing = MemoryEntry( id="old-1", session_id="claude_code:s_prev", @@ -94,13 +97,12 @@ def test_supersede_links_old_to_new_via_superseded_by(self, fresh_memory_service embedding=[0.99, 0.14, 0.0], ) - validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") with patch("openai.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = self._stub_judge("supersede") - decisions, _ = validator.validate([new_candidate], [existing]) + decisions, _ = validate_freshness([new_candidate], [existing], "https://llm.example/v1", "model-x", "test-key") assert len(decisions) == 1 - assert decisions[0].action == "supersede" + assert decisions[0].action == FreshnessAction.SUPERSEDE from slopometry.core.models.memory import MemoryCreateRequest @@ -115,13 +117,13 @@ def test_supersede_links_old_to_new_via_superseded_by(self, fresh_memory_service fresh_memory_service.update_memory(existing.id, superseded_by=new_id) - all_memories = fresh_memory_service.get_memories(project_dir="/test/proj", limit=100) + all_memories = fresh_memory_service.get_memories(project_dir="/test/proj", limit=100, include_superseded=True) old_updated = next(m for m in all_memories if m.id == existing.id) new_loaded = next(m for m in all_memories if m.id == new_id) assert old_updated.superseded_by == new_id assert new_loaded.superseded_by is None - def test_merge_action_rewrites_candidate_content(self, fresh_memory_service: MemoryService): + def test_validate_freshness__merge_action_rewrites_candidate_content(self, fresh_memory_service: MemoryService): existing = MemoryEntry( id="old-2", session_id="claude_code:s_prev2", @@ -141,14 +143,13 @@ def test_merge_action_rewrites_candidate_content(self, fresh_memory_service: Mem merged_text = "Project uses rust-code-analysis (switched from radon in 2026)" - validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") with patch("openai.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = self._stub_judge( "merge", merged_content=merged_text ) - decisions, _ = validator.validate([new_candidate], [existing]) + decisions, _ = validate_freshness([new_candidate], [existing], "https://llm.example/v1", "model-x", "test-key") - assert decisions[0].action == "merge" + assert decisions[0].action == FreshnessAction.MERGE assert decisions[0].merged_content == merged_text decisions[0].new_candidate.content = decisions[0].merged_content @@ -163,11 +164,11 @@ def test_merge_action_rewrites_candidate_content(self, fresh_memory_service: Mem ) ) assert saved[0].content == merged_text - all_memories = fresh_memory_service.get_memories(project_dir="/test/proj", limit=100) + all_memories = fresh_memory_service.get_memories(project_dir="/test/proj", limit=100, include_superseded=True) old_loaded = next(m for m in all_memories if m.id == existing.id) assert old_loaded.superseded_by is None - def test_dedupe_action_skips_new_save(self, fresh_memory_service: MemoryService): + def test_validate_freshness__dedupe_action_skips_new_save(self, fresh_memory_service: MemoryService): existing = MemoryEntry( id="old-3", session_id="claude_code:s_prev3", @@ -185,17 +186,16 @@ def test_dedupe_action_skips_new_save(self, fresh_memory_service: MemoryService) embedding=[0.99, 0.14, 0.0], ) - validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") with patch("openai.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = self._stub_judge("dedupe") - decisions, _ = validator.validate([new_candidate], [existing]) + decisions, _ = validate_freshness([new_candidate], [existing], "https://llm.example/v1", "model-x", "test-key") assert len(decisions) == 1 - assert decisions[0].action == "dedupe" + assert decisions[0].action == FreshnessAction.DEDUPE deduped_candidates: list[MemoryCandidate] = [] for d in decisions: - if d.action == "dedupe": + if d.action == FreshnessAction.DEDUPE: if d.new_candidate.metadata is None: d.new_candidate.metadata = {} d.new_candidate.metadata["deduped_against"] = d.existing_memory.id @@ -203,11 +203,11 @@ def test_dedupe_action_skips_new_save(self, fresh_memory_service: MemoryService) deduped_candidates.append(d.new_candidate) assert deduped_candidates == [] - all_memories = fresh_memory_service.get_memories(project_dir="/test/proj", limit=100) + all_memories = fresh_memory_service.get_memories(project_dir="/test/proj", limit=100, include_superseded=True) assert len(all_memories) == 1 assert all_memories[0].id == existing.id - def test_keep_both_saves_both_independently(self, fresh_memory_service: MemoryService): + def test_validate_freshness__keep_both_saves_both_independently(self, fresh_memory_service: MemoryService): existing = MemoryEntry( id="old-4", session_id="claude_code:s_prev4", @@ -225,13 +225,12 @@ def test_keep_both_saves_both_independently(self, fresh_memory_service: MemorySe embedding=[0.95, 0.31, 0.0], ) - validator = MemoryFreshnessValidator("https://llm.example/v1", "model-x") with patch("openai.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = self._stub_judge("keep_both") - decisions, _ = validator.validate([new_candidate], [existing]) + decisions, _ = validate_freshness([new_candidate], [existing], "https://llm.example/v1", "model-x", "test-key") assert len(decisions) == 1 - assert decisions[0].action == "keep_both" + assert decisions[0].action == FreshnessAction.KEEP_BOTH from slopometry.core.models.memory import MemoryCreateRequest @@ -243,7 +242,7 @@ def test_keep_both_saves_both_independently(self, fresh_memory_service: MemorySe ) ) - all_memories = fresh_memory_service.get_memories(project_dir="/test/proj", limit=100) + all_memories = fresh_memory_service.get_memories(project_dir="/test/proj", limit=100, include_superseded=True) assert len(all_memories) == 2 old_loaded = next(m for m in all_memories if m.id == existing.id) assert old_loaded.superseded_by is None diff --git a/tests/test_preflight.py b/tests/test_preflight.py index c8835fe..5124e2a 100644 --- a/tests/test_preflight.py +++ b/tests/test_preflight.py @@ -10,7 +10,7 @@ class TestPreflightEndpoints: - def test_raises_click_exception_when_chat_endpoint_down(self): + def test_preflight_endpoints__raises_when_chat_endpoint_down(self): chat_err = "chat LLM (https://chat.example/v1): APIConnectionError: no available server" embed_ok = None with patch("slopometry.solo.cli.preflight._check_endpoint", side_effect=[chat_err, embed_ok]): @@ -24,7 +24,7 @@ def test_raises_click_exception_when_chat_endpoint_down(self): assert "chat.example" in str(exc_info.value) assert "embed.example" not in str(exc_info.value) - def test_raises_click_exception_when_embedding_endpoint_down(self): + def test_preflight_endpoints__raises_when_embedding_endpoint_down(self): chat_ok = None embed_err = "embedding (https://embed.example/v1): APIConnectionError: refused" with patch("slopometry.solo.cli.preflight._check_endpoint", side_effect=[chat_ok, embed_err]): @@ -37,7 +37,7 @@ def test_raises_click_exception_when_embedding_endpoint_down(self): ) assert "embed.example" in str(exc_info.value) - def test_raises_with_both_errors_listed(self): + def test_preflight_endpoints__raises_with_both_errors_listed(self): chat_err = "chat LLM: down" embed_err = "embedding: down" with patch("slopometry.solo.cli.preflight._check_endpoint", side_effect=[chat_err, embed_err]): @@ -52,7 +52,7 @@ def test_raises_with_both_errors_listed(self): assert "chat LLM: down" in msg assert "embedding: down" in msg - def test_passes_silently_when_both_endpoints_reachable(self): + def test_preflight_endpoints__passes_silently_when_both_endpoints_reachable(self): with patch("slopometry.solo.cli.preflight._check_endpoint", return_value=None): preflight_endpoints( chat_endpoint="https://chat.example/v1", @@ -63,7 +63,7 @@ def test_passes_silently_when_both_endpoints_reachable(self): class TestCheckEndpoint: - def test_returns_none_when_models_list_succeeds(self): + def test_check_endpoint__returns_none_when_models_list_succeeds(self): mock_client = MagicMock() mock_client.models.list.return_value = MagicMock() with patch("openai.OpenAI", return_value=mock_client) as mock_openai: @@ -73,7 +73,7 @@ def test_returns_none_when_models_list_succeeds(self): assert result is None mock_openai.assert_called_once_with(base_url="https://x/v1", api_key="key") - def test_returns_error_string_on_exception(self): + def test_check_endpoint__returns_error_string_on_unreachable_endpoint(self): mock_client = MagicMock() mock_client.models.list.side_effect = RuntimeError("refused") with patch("openai.OpenAI", return_value=mock_client): @@ -85,9 +85,24 @@ def test_returns_error_string_on_exception(self): assert "https://x/v1" in result assert "RuntimeError" in result + def test_check_endpoint__returns_error_when_openai_not_installed(self): + original_import = __builtins__["__import__"] + + def failing_import(name, *args, **kwargs): + if name == "openai" or name.startswith("openai."): + raise ImportError(f"No module named '{name}'") + return original_import(name, *args, **kwargs) + + with patch("builtins.__import__", side_effect=failing_import): + from slopometry.solo.cli.preflight import _check_endpoint + + result = _check_endpoint("test", "https://x/v1", "key") + assert result is not None + assert "openai package not installed" in result + class TestFindMemoriesPreflightIntegration: - def test_dry_run_skips_preflight(self, monkeypatch: pytest.MonkeyPatch): + def test_find_memories__dry_run_skips_preflight(self, monkeypatch: pytest.MonkeyPatch): from slopometry.core.settings import settings monkeypatch.setattr(settings, "offline_mode", True) @@ -106,7 +121,7 @@ def test_dry_run_skips_preflight(self, monkeypatch: pytest.MonkeyPatch): ) assert "offline_mode" not in result.output or result.exit_code == 0 - def test_offline_mode_blocks_before_preflight(self, monkeypatch: pytest.MonkeyPatch): + def test_find_memories__offline_mode_blocks_before_preflight(self, monkeypatch: pytest.MonkeyPatch): from slopometry.core.settings import settings monkeypatch.setattr(settings, "offline_mode", True) diff --git a/tests/test_transcript_finder.py b/tests/test_transcript_finder.py index 5154a5b..8245bcc 100644 --- a/tests/test_transcript_finder.py +++ b/tests/test_transcript_finder.py @@ -56,7 +56,7 @@ def storage_finder(monkeypatch: pytest.MonkeyPatch): return TranscriptFinder() -def test_opencode_session_emits_opencode_source( +def test_discover_transcripts__emits_opencode_source_for_opencode_sessions( storage_finder: TranscriptFinder, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, @@ -97,7 +97,7 @@ def test_opencode_session_emits_opencode_source( assert all(isinstance(r, DiscoveredTranscript) for r in opencode_results) -def test_non_matching_worktree_excluded( +def test_discover_transcripts__excludes_non_matching_worktree( storage_finder: TranscriptFinder, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, @@ -128,19 +128,20 @@ def test_non_matching_worktree_excluded( assert results == [] -def test_missing_opencode_storage_root_returns_only_claude( +def test_discover_transcripts__returns_only_claude_when_opencode_storage_missing( storage_finder: TranscriptFinder, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, ): project_dir = tmp_path / "myproject" project_dir.mkdir() - monkeypatch.setattr(storage_finder, "find_opencode_storage_root", lambda: None) + nonexistent_storage = tmp_path / "nonexistent_opencode_storage" + monkeypatch.setattr(storage_finder, "find_opencode_storage_root", lambda: nonexistent_storage) results = storage_finder.discover_transcripts(project_dir) assert all(r.source == AbstractEventSource.CLAUDE_CODE for r in results) -def test_slopometry_transcript_marked_as_claude_code(tmp_path: Path): +def test_discover_transcripts__marks_slopometry_transcript_as_claude_code(tmp_path: Path): project_dir = tmp_path / "myproject" slop_dir = project_dir / ".slopometry" / "ses_abc" slop_dir.mkdir(parents=True) @@ -151,7 +152,7 @@ def test_slopometry_transcript_marked_as_claude_code(tmp_path: Path): ) -def test_only_matching_worktree_included( +def test_find_opencode_sessions__includes_only_matching_worktree( storage_finder: TranscriptFinder, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, @@ -194,7 +195,7 @@ def test_only_matching_worktree_included( assert {r.session_id for r in results} == {"ses_keep"} -def test_find_opencode_storage_root_returns_none_when_no_xdg( +def test_find_opencode_storage_root__falls_back_to_home_when_no_xdg( monkeypatch: pytest.MonkeyPatch, tmp_path: Path, ): @@ -204,13 +205,13 @@ def test_find_opencode_storage_root_returns_none_when_no_xdg( assert finder.find_opencode_storage_root() == tmp_path / ".local" / "share" / "opencode" / "storage" -def test_find_opencode_storage_root_uses_xdg_when_set(monkeypatch: pytest.MonkeyPatch): +def test_find_opencode_storage_root__uses_xdg_when_set(monkeypatch: pytest.MonkeyPatch): monkeypatch.setenv("XDG_DATA_HOME", "/custom/xdg") finder = TranscriptFinder() assert finder.find_opencode_storage_root() == Path("/custom/xdg/opencode/storage") -def test_opencode_session_in_subdirectory_of_worktree_included( +def test_find_opencode_sessions__includes_session_in_subdirectory_of_worktree( storage_finder: TranscriptFinder, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, @@ -252,10 +253,10 @@ def test_opencode_session_in_subdirectory_of_worktree_included( assert {r.session_id for r in results_subdir} == {"ses_subdir"} results_root = storage_finder.find_opencode_sessions(project_root) - assert results_root == [] + assert {r.session_id for r in results_root} == {"ses_subdir"} -def test_opencode_session_outside_worktree_excluded( +def test_find_opencode_sessions__excludes_session_outside_worktree( storage_finder: TranscriptFinder, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, From a28b4721e50a762954637cb1b31fa9da6eab733e Mon Sep 17 00:00:00 2001 From: TensorTemplar Date: Sun, 28 Jun 2026 15:09:13 +0300 Subject: [PATCH 6/8] fix smells: remove dead pass-through wrappers, orphan comments, acknowledge silent exception - Remove map_tool_name from ClaudeCodeAdapter and OpenCodeAdapter (dead code, never called) - Remove orphan section comments (# Memory types, # Memory) from models/__init__.py - Add allow-silent marker to emit_event_from_stdin dispatch except block (hook subprocess must not crash the harness session) --- src/slopometry/core/models/__init__.py | 3 --- src/slopometry/core/protocol/adapters/claude_code.py | 4 ---- src/slopometry/core/protocol/adapters/opencode.py | 4 ---- src/slopometry/core/protocol/dispatch.py | 2 +- 4 files changed, 1 insertion(+), 12 deletions(-) diff --git a/src/slopometry/core/models/__init__.py b/src/slopometry/core/models/__init__.py index 2d85c39..380f9f4 100644 --- a/src/slopometry/core/models/__init__.py +++ b/src/slopometry/core/models/__init__.py @@ -65,8 +65,6 @@ Project, ProjectLanguage, ) - -# Memory types from slopometry.core.models.memory import ( MemoryCandidate, MemoryCreateRequest, @@ -170,7 +168,6 @@ "UserStoryDisplayData", "UserStoryEntry", "UserStoryStatistics", - # Memory "MemoryCandidate", "MemoryCreateRequest", "MemoryEntry", diff --git a/src/slopometry/core/protocol/adapters/claude_code.py b/src/slopometry/core/protocol/adapters/claude_code.py index 450bb03..dae6634 100644 --- a/src/slopometry/core/protocol/adapters/claude_code.py +++ b/src/slopometry/core/protocol/adapters/claude_code.py @@ -145,10 +145,6 @@ class ClaudeCodeAdapter: source = AbstractEventSource.CLAUDE_CODE tool_type_map: dict[str, str] = {name: enum.value for name, enum in _TOOL_NAME_TO_TYPE.items()} - @classmethod - def map_tool_name(cls, tool_name: str) -> str: - return resolve_tool_type(tool_name) - def detect_event_type(self, raw_payload: dict[str, Any]) -> AbstractEventType: fields = set(raw_payload.keys()) if "tool_name" in fields and "tool_input" in fields: diff --git a/src/slopometry/core/protocol/adapters/opencode.py b/src/slopometry/core/protocol/adapters/opencode.py index 9ea748b..f1f56ba 100644 --- a/src/slopometry/core/protocol/adapters/opencode.py +++ b/src/slopometry/core/protocol/adapters/opencode.py @@ -44,10 +44,6 @@ class OpenCodeAdapter: source = AbstractEventSource.OPENCODE tool_type_map: dict[str, str] = {name: enum.value for name, enum in _TOOL_NAME_TO_TYPE.items()} - @classmethod - def map_tool_name(cls, tool_name: str) -> str: - return resolve_tool_type(tool_name) - def detect_event_type(self, raw_payload: dict[str, Any]) -> AbstractEventType: event_type = raw_payload.get("event_type") if not isinstance(event_type, str): diff --git a/src/slopometry/core/protocol/dispatch.py b/src/slopometry/core/protocol/dispatch.py index 8c9e50a..cb37579 100644 --- a/src/slopometry/core/protocol/dispatch.py +++ b/src/slopometry/core/protocol/dispatch.py @@ -130,7 +130,7 @@ def emit_event_from_stdin( try: dispatch_event(source, raw_payload, event_type_override=event_type_override) - except Exception as e: + except Exception as e: # slopometry: allow-silent - hook subprocess must not crash the harness session if settings.debug_mode: print(f"Slopometry dispatch error: {e}", file=sys.stderr) return 0 From dcebaa59a7fcc653828f24af59b056cbdf0c756d Mon Sep 17 00:00:00 2001 From: TensorTemplar Date: Sun, 28 Jun 2026 15:49:32 +0300 Subject: [PATCH 7/8] Bump version to 2026.6.28 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 971c06d..16d6593 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "slopometry" -version = "2026.6.23" +version = "2026.6.28" description = "Opinionated code quality metrics for code agents and humans" readme = "README.md" requires-python = ">=3.13" From 9e23725b7f44353381737321ac866ad75e45172a Mon Sep 17 00:00:00 2001 From: TensorTemplar Date: Sun, 28 Jun 2026 18:54:28 +0300 Subject: [PATCH 8/8] memory auto-pruning: staleness audit, freshness priority, config, docs - Add retired_reason column (migration 016) separate from superseded_by - audit_staleness(): LLM checks existing memories against transcript for retirement (fixed bugs, completed work, outdated state) - prune-memories CLI command for on-demand staleness sweeps - Fix DEDUPE: skip saving deduped candidates instead of metadata tagging - Fix MERGE: mark old memory superseded_by new merged entry - Fix staleness audit: exclude freshly-saved memories from audit scope - Action priority resolution: SUPERSEDE > MERGE > DEDUPE > KEEP_BOTH (only winning action's side effects apply per candidate group) - Extract _call_llm_json helper to DRY reconciliation + staleness LLM calls - Promote magic numbers to settings (memory_query_limit, transcript truncation, prune transcript window, reconciliation/staleness max_tokens) - Hoist openai import to top-level in memory_freshness - README: mark LLM-dependent features, add offline_mode config section, document concurrent sessions limitation - CLAUDE.md: update core components, CLI commands, model paths - Tests: 114 new tests across freshness, service, models, migrations, reconciliation priority, prune-memories CLI integration --- .env.solo.example | 18 ++ .env.summoner.example | 11 + CLAUDE.md | 8 +- README.md | 41 ++- src/slopometry/core/database.py | 65 ++--- src/slopometry/core/migrations.py | 23 ++ src/slopometry/core/models/memory.py | 22 +- src/slopometry/core/settings.py | 21 ++ src/slopometry/solo/cli/commands.py | 222 ++++++++++++++++- .../solo/services/memory_freshness.py | 163 ++++++++++-- .../solo/services/memory_service.py | 8 + tests/test_database.py | 2 +- tests/test_find_memories_reconciliation.py | 114 +++++++++ tests/test_memory_freshness.py | 233 ++++++++++++++++-- tests/test_memory_models.py | 98 +++++++- tests/test_memory_service.py | 100 ++++++++ tests/test_migrations.py | 83 +++++++ tests/test_opencode_memory_integration.py | 8 +- tests/test_prune_memories_cli.py | 217 ++++++++++++++++ tests/test_settings.py | 3 +- 20 files changed, 1358 insertions(+), 102 deletions(-) create mode 100644 tests/test_find_memories_reconciliation.py create mode 100644 tests/test_prune_memories_cli.py diff --git a/.env.solo.example b/.env.solo.example index f0a6330..7cd42b9 100644 --- a/.env.solo.example +++ b/.env.solo.example @@ -30,3 +30,21 @@ SLOPOMETRY_MEMORY_LLM_API_KEY=your-api-key-here SLOPOMETRY_MEMORY_EMBEDDING_ENDPOINT=https://your-embedding-endpoint.com/v1 SLOPOMETRY_MEMORY_EMBEDDING_MODEL=your-embedding-model SLOPOMETRY_MEMORY_EMBEDDING_API_KEY=your-embedding-api-key-here + +# Memory Freshness & Staleness +# Similarity thresholds for reconciliation (dedupe/merge/supersede) +# SLOPOMETRY_FRESHNESS_THRESHOLD_FLOOR=0.45 +# SLOPOMETRY_FRESHNESS_THRESHOLD_CEILING=0.95 + +# Max memories loaded for freshness validation and staleness audit +# SLOPOMETRY_MEMORY_QUERY_LIMIT=200 + +# Max chars of transcript sent to LLM for staleness audit +# SLOPOMETRY_MEMORY_TRANSCRIPT_TRUNCATION_CHARS=15000 + +# Number of recent transcripts used as context for prune-memories +# SLOPOMETRY_MEMORY_PRUNE_TRANSCRIPT_WINDOW=3 + +# Max tokens for LLM responses +# SLOPOMETRY_MEMORY_RECONCILIATION_MAX_TOKENS=200 +# SLOPOMETRY_MEMORY_STALENESS_AUDIT_MAX_TOKENS=1000 diff --git a/.env.summoner.example b/.env.summoner.example index 65e482c..7b773b3 100644 --- a/.env.summoner.example +++ b/.env.summoner.example @@ -33,3 +33,14 @@ SLOPOMETRY_HF_DEFAULT_REPO=username/slopometry-dataset SLOPOMETRY_MAX_PARALLEL_WORKERS=6 # Maximum commits to analyze for baseline computation SLOPOMETRY_BASELINE_MAX_COMMITS=100 + +# Memory Extraction (for solo find-memories, prune-memories, show-memories) +# Uses the same LLM proxy endpoint as summoner features +SLOPOMETRY_MEMORY_LLM_ENDPOINT=https://llm2.droidcraft.org/minimax-m3-mxfp4-vllm/v1 +SLOPOMETRY_MEMORY_LLM_MODEL=olka-fi/MiniMax-M3-MXFP4 +SLOPOMETRY_MEMORY_LLM_API_KEY=your-vllm-api-key + +# Embedding endpoint (for memory similarity and uniqueness scoring) +SLOPOMETRY_MEMORY_EMBEDDING_ENDPOINT=https://your-embedding-endpoint.com/v1 +SLOPOMETRY_MEMORY_EMBEDDING_MODEL=your-embedding-model +SLOPOMETRY_MEMORY_EMBEDDING_API_KEY=your-embedding-api-key diff --git a/CLAUDE.md b/CLAUDE.md index 8b278d0..10f2637 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -45,8 +45,9 @@ uv tool install . --reinstall - **CLI** (`src/slopometry/cli.py`): Hybrid CLI with flat core commands (install, uninstall, status, latest, shell-completion) and persona subcommands (solo, summoner) - **Database** (`src/slopometry/core/database.py`): SQLite storage with platform-specific default locations - **Hook Handler** (`src/slopometry/core/hook_handler.py`): Script invoked by Claude Code hooks to capture events -- **Models** (`src/slopometry/core/models.py`): Pydantic models for HookEvent, SessionStatistics +- **Models** (`src/slopometry/core/models/`): Pydantic models for HookEvent, SessionStatistics, MemoryEntry - **Settings** (`src/slopometry/core/settings.py`): Pydantic-settings configuration with .env support +- **Memory Freshness** (`src/slopometry/solo/services/memory_freshness.py`): LLM-driven reconciliation (keep_both/merge/supersede/dedupe) and staleness audit for memory candidates - **LLM Wrapper** (`src/slopometry/summoner/services/llm_wrapper.py`): AI agents for analyzing git diffs and generating user stories ### How It Works @@ -127,7 +128,7 @@ echo '{"session_id": "test123", "transcript_path": "/tmp/transcript.jsonl", "too ## Adding New Tool Types -1. Add to `ToolType` enum in models.py +1. Add to `ToolType` enum in `src/slopometry/core/models/core.py` 2. Update `TOOL_TYPE_MAP` in hook_handler.py 3. No database migration needed (sqlite-utils handles schema) @@ -148,6 +149,9 @@ The experiment tracking feature includes: - `solo ls`: List recent sessions - `solo show `: Show detailed session statistics - `latest`: Show latest session statistics +- `solo find-memories`: Scan transcripts, extract memory candidates, run freshness validation, and save +- `solo prune-memories`: Audit existing memories for staleness and retire stale ones +- `solo show-memories`: List and manage memories for a project ### Key Components - **CLI Calculator**: Measures "Completeness Likelihood Improval" (0-1.0 scale) diff --git a/README.md b/README.md index 47d865a..17938cc 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,8 @@ A: There are advanced features for temporal and cross-project measurement of slo Seriously, please do not open PRs with support for any kind of unserious languages. Just fork and pretend you made it. We are ok with that. Thank you. +**Concurrent sessions**: Stop hook feedback is designed for a single active session per project. Running two OpenCode or Claude Code sessions in the same project directory simultaneously may cause feedback suppression (shared per-project cache), dropped stop events (per-project lock contention), and incorrect `edited_files` scoping between sessions. + # Installation Both Anthropic models and MiniMax-M2 are fully supported as the `claude code` drivers. @@ -172,6 +174,15 @@ slopometry latest # Save session artifacts (transcript, plans, tasks) to .slopometry// slopometry solo save-transcript # latest slopometry solo save-transcript + +# Memory extraction: scan transcripts and extract durable facts (requires LLM) +slopometry solo find-memories + +# Audit existing memories for staleness — fixed bugs, completed work (requires LLM) +slopometry solo prune-memories + +# Browse and manage memories +slopometry solo show-memories ``` ![slopometry-roles.png](assets/slopometry-roles.png) @@ -231,7 +242,7 @@ curl -o ~/.config/slopometry/.env https://raw.githubusercontent.com/TensorTempla ``` -Customize via `.env` file or environment variables: +Core settings: - `SLOPOMETRY_DATABASE_PATH`: Custom database location (optional) - Default locations: @@ -241,6 +252,32 @@ Customize via `.env` file or environment variables: - `SLOPOMETRY_ENABLE_COMPLEXITY_ANALYSIS`: Collect complexity metrics (default: `true`) - `SLOPOMETRY_ENABLE_COMPLEXITY_FEEDBACK`: Provide feedback to Claude (default: `false`) +### LLM-dependent features + +By default, slopometry runs in **offline mode** (`SLOPOMETRY_OFFLINE_MODE=true`), which disables all external LLM calls. The following features require an LLM endpoint and will refuse to run until you set `SLOPOMETRY_OFFLINE_MODE=false` and configure endpoints: + +- **`solo find-memories`** — scans transcripts, extracts memory candidates via LLM, runs freshness reconciliation against existing memories, and retires stale ones +- **`solo prune-memories`** — audits existing memories for staleness against recent transcripts +- **`summoner userstorify`** — generates user stories from git diffs +- **`summoner user-story-export --upload-to-hf`** — uploads dataset to Hugging Face + +To enable: + +```bash +# Disable offline mode +SLOPOMETRY_OFFLINE_MODE=false + +# Chat LLM endpoint (OpenAI-compatible API) +SLOPOMETRY_MEMORY_LLM_ENDPOINT=https://your-llm-endpoint.com/v1 +SLOPOMETRY_MEMORY_LLM_MODEL=your-model-name +SLOPOMETRY_MEMORY_LLM_API_KEY=your-api-key + +# Embedding endpoint (for memory similarity and uniqueness scoring) +SLOPOMETRY_MEMORY_EMBEDDING_ENDPOINT=https://your-embedding-endpoint.com/v1 +SLOPOMETRY_MEMORY_EMBEDDING_MODEL=your-embedding-model +SLOPOMETRY_MEMORY_EMBEDDING_API_KEY=your-embedding-api-key +``` + # Development For working on slopometry itself (not just installing it): @@ -277,5 +314,5 @@ uv tool install . --reinstall --find-links "https://github.com/Droidcraft/rust-c [x] - Add plan evolution log based on claude's todo shenanigans [ ] - Rename the readme.md to wontreadme.md because it takes more than 15 seconds or whatever the attention span is nowadays to read it all. Maybe make it all one giant picture? Anyway, stop talking to yourself in the roadmap. [ ] - Finish git worktree-based [NFP-CLI](https://tensortemplar.substack.com/p/humans-are-no-longer-embodied-amortization) (TM) training objective implementation so complexity metrics can be used as additional process reward for training code agents -[ ] - Extend stop hook feedback with LLM-as-Judge to support guiding agents based on smells and style guide +[x] - Memory extraction with LLM-driven freshness reconciliation and staleness auditing [ ] - Not go bankrupt from having to maintain open source in my free time, no wait... diff --git a/src/slopometry/core/database.py b/src/slopometry/core/database.py index 138051d..3f10d87 100644 --- a/src/slopometry/core/database.py +++ b/src/slopometry/core/database.py @@ -376,6 +376,7 @@ def _create_tables(self) -> None: updated_at TEXT, retained INTEGER NOT NULL DEFAULT 0, superseded_by TEXT, + retired_reason TEXT, embedding TEXT, metadata TEXT ) @@ -2186,8 +2187,8 @@ def save_memory(self, memory: MemoryEntry) -> None: INSERT OR REPLACE INTO memories ( id, session_id, project_dir, memory_type, content, source_context, created_at, updated_at, - retained, superseded_by, embedding, metadata - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + retained, superseded_by, retired_reason, embedding, metadata + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( memory.id, @@ -2200,44 +2201,12 @@ def save_memory(self, memory: MemoryEntry) -> None: memory.updated_at.isoformat() if memory.updated_at else None, int(memory.retained), memory.superseded_by, + memory.retired_reason, json.dumps(memory.embedding) if memory.embedding else None, json.dumps(memory.metadata) if memory.metadata else None, ), ) - def save_memories(self, memories: list["MemoryEntry"]) -> int: - """Save multiple memory entries. - - Returns: - Number of memories saved - """ - with self._get_db_connection() as conn: - for memory in memories: - conn.execute( - """ - INSERT INTO memories ( - id, session_id, project_dir, memory_type, content, - source_context, created_at, updated_at, - retained, superseded_by, embedding, metadata - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """, - ( - memory.id, - memory.session_id, - memory.project_dir, - memory.memory_type.value, - memory.content, - memory.source_context, - memory.created_at.isoformat(), - memory.updated_at.isoformat() if memory.updated_at else None, - int(memory.retained), - memory.superseded_by, - json.dumps(memory.embedding) if memory.embedding else None, - json.dumps(memory.metadata) if memory.metadata else None, - ), - ) - return len(memories) - def get_memories( self, project_dir: str | None = None, @@ -2249,7 +2218,8 @@ def get_memories( Args: include_superseded: When False (default), exclude memories that - have been linked to a newer replacement via ``superseded_by``. + have been superseded by a newer replacement (``superseded_by``) + or retired by the staleness audit (``retired_reason``). The freshness validator must pass False so it compares new candidates only against the current truth, not stale chain predecessors. @@ -2259,7 +2229,7 @@ def get_memories( params: list = [] if not include_superseded: - query += " AND superseded_by IS NULL" + query += " AND superseded_by IS NULL AND retired_reason IS NULL" if project_dir: query += " AND project_dir = ?" @@ -2289,6 +2259,7 @@ def get_memories( updated_at=datetime.fromisoformat(row["updated_at"]) if row["updated_at"] else None, retained=bool(row["retained"]), superseded_by=row["superseded_by"], + retired_reason=row["retired_reason"], embedding=json.loads(row["embedding"]) if row["embedding"] else None, metadata=json.loads(row["metadata"]) if row["metadata"] else None, ) @@ -2371,6 +2342,20 @@ def update_memory( conn.commit() return bool(cursor.rowcount and cursor.rowcount > 0) + def retire_memory(self, memory_id: str, reason: str) -> bool: + """Mark a memory as retired (stale) without a direct replacement. + + Returns: + True if a memory was retired, False otherwise + """ + with self._get_db_connection() as conn: + cursor = conn.execute( + "UPDATE memories SET retired_reason = ?, updated_at = ? WHERE id = ?", + (reason, datetime.now().isoformat(), memory_id), + ) + conn.commit() + return bool(cursor.rowcount and cursor.rowcount > 0) + def mark_session_processed( self, session_id: str, project_dir: str, memory_count: int, source: str ) -> None: @@ -2395,9 +2380,9 @@ def is_session_processed(self, session_id: str, project_dir: str, source: str) - return cursor.fetchone() is not None def get_memory_stats(self, project_dir: str | None = None) -> dict: - """Get statistics about stored memories (excludes superseded).""" + """Get statistics about stored memories (excludes superseded and retired).""" with self._get_db_connection() as conn: - base_query = "SELECT memory_type, COUNT(*) as count FROM memories WHERE superseded_by IS NULL" + base_query = "SELECT memory_type, COUNT(*) as count FROM memories WHERE superseded_by IS NULL AND retired_reason IS NULL" params: list = [] if project_dir: @@ -2409,7 +2394,7 @@ def get_memory_stats(self, project_dir: str | None = None) -> dict: rows = conn.execute(base_query, params).fetchall() type_distribution = {row[0]: row[1] for row in rows} - total_query = "SELECT COUNT(*) FROM memories WHERE superseded_by IS NULL" + total_query = "SELECT COUNT(*) FROM memories WHERE superseded_by IS NULL AND retired_reason IS NULL" if project_dir: total_query += " AND project_dir = ?" total = conn.execute(total_query, params).fetchone()[0] or 0 diff --git a/src/slopometry/core/migrations.py b/src/slopometry/core/migrations.py index 917615b..7dc4e23 100644 --- a/src/slopometry/core/migrations.py +++ b/src/slopometry/core/migrations.py @@ -547,6 +547,28 @@ def up(self, conn: sqlite3.Connection) -> None: ) +class Migration016AddRetiredReasonToMemories(Migration): + """Add retired_reason column to memories for staleness audit retirement.""" + + @property + def version(self) -> str: + return "016" + + @property + def description(self) -> str: + return "Add retired_reason column to memories for staleness audit retirement" + + def up(self, conn: sqlite3.Connection) -> None: + cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='memories'") + if not cursor.fetchone(): + return + try: + conn.execute("ALTER TABLE memories ADD COLUMN retired_reason TEXT") + except sqlite3.OperationalError as e: + if "duplicate column name" not in str(e).lower(): + raise + + class MigrationRunner: """Manages database migrations.""" @@ -568,6 +590,7 @@ def __init__(self, db_path: Path): Migration013AddSourceAndParentSession(), Migration014AddBehavioralPatternHistory(), Migration015AbstractEventTypeValues(), + Migration016AddRetiredReasonToMemories(), ] @contextmanager diff --git a/src/slopometry/core/models/memory.py b/src/slopometry/core/models/memory.py index b9303d5..c04b507 100644 --- a/src/slopometry/core/models/memory.py +++ b/src/slopometry/core/models/memory.py @@ -38,7 +38,15 @@ def color(self) -> str: class MemoryEntry(BaseModel): - """Represents a stored memory entry.""" + """Represents a stored memory entry. + + A memory is visible in queries when both ``superseded_by`` and + ``retired_reason`` are ``None``. ``superseded_by`` is set when a + newer memory replaces this one (SUPERSEDE / MERGE action). + ``retired_reason`` is set when the memory is stale (describes a + fixed bug, completed work, or outdated state) and is retired by + the staleness audit without a direct replacement. + """ id: str session_id: str @@ -50,6 +58,7 @@ class MemoryEntry(BaseModel): updated_at: datetime | None = None retained: bool = False superseded_by: str | None = None + retired_reason: str | None = None embedding: list[float] | None = None metadata: dict | None = None @@ -97,3 +106,14 @@ class FreshnessVerdict(BaseModel): default=None, description="Only present when action == merge", ) + + +class StalenessVerdict(BaseModel): + """LLM verdict for a single existing memory during the staleness audit. + + ``ref`` is the 1-based index into the existing-memories list that was + sent to the LLM. The caller maps it back to the ``MemoryEntry``. + """ + + ref: int + reason: str = "" diff --git a/src/slopometry/core/settings.py b/src/slopometry/core/settings.py index dfe3778..0e83729 100644 --- a/src/slopometry/core/settings.py +++ b/src/slopometry/core/settings.py @@ -267,6 +267,27 @@ def _ensure_global_config_dir() -> None: description="Maximum dedupe similarity threshold; derived threshold never goes above this", ) + memory_query_limit: int = Field( + default=200, + description="Maximum number of memories to load for freshness validation and staleness audit", + ) + memory_transcript_truncation_chars: int = Field( + default=15000, + description="Maximum characters of transcript text sent to the LLM for staleness audit", + ) + memory_prune_transcript_window: int = Field( + default=3, + description="Number of most recent transcripts to use as context for prune-memories", + ) + memory_reconciliation_max_tokens: int = Field( + default=200, + description="Max tokens for LLM reconciliation judge responses", + ) + memory_staleness_audit_max_tokens: int = Field( + default=1000, + description="Max tokens for LLM staleness audit responses", + ) + stdin_timeout_seconds: float = Field( default=5.0, description="Seconds to wait for stdin input in hook dispatch before giving up", diff --git a/src/slopometry/solo/cli/commands.py b/src/slopometry/solo/cli/commands.py index e921a2e..4baeb36 100644 --- a/src/slopometry/solo/cli/commands.py +++ b/src/slopometry/solo/cli/commands.py @@ -9,13 +9,12 @@ from slopometry.core.models.memory import FreshnessAction from slopometry.display.console import console, styled_pager +from slopometry.solo.services.memory_freshness import audit_staleness if TYPE_CHECKING: from slopometry.core.models import ImpactAssessment, RepoBaseline, SessionStatistics from slopometry.core.models.session import BehavioralPatternTrends -# Imports moved inside functions to optimize startup time - logger = logging.getLogger(__name__) @@ -784,6 +783,25 @@ def save_transcript(session_id: str | None, output_dir: str, yes: bool) -> None: console.print("[green]✓[/green] Saved session metadata to: session_metadata.json") +_ACTION_PRIORITY: dict[FreshnessAction, int] = { + FreshnessAction.SUPERSEDE: 3, + FreshnessAction.MERGE: 2, + FreshnessAction.DEDUPE: 1, + FreshnessAction.KEEP_BOTH: 0, +} + + +def _highest_priority_action(group: list) -> FreshnessAction: + """Pick the most consequential action from a candidate's reconciliation group. + + When a candidate matches multiple existing memories, the LLM may return + different actions for each pair. A single priority resolves conflicts: + SUPERSEDE > MERGE > DEDUPE > KEEP_BOTH. Only the winning action's side + effects are applied. + """ + return max(group, key=lambda d: _ACTION_PRIORITY[d.action]).action + + @solo.command(name="find-memories") @click.option( "--project-dir", @@ -963,7 +981,7 @@ def find_memories( from slopometry.solo.services.memory_freshness import validate_freshness - existing_memories = memory_service.get_memories(project_dir=proj_dir_str, limit=200) + existing_memories = memory_service.get_memories(project_dir=proj_dir_str, limit=settings.memory_query_limit) decisions: list = [] if existing_memories: decisions, distribution = validate_freshness( @@ -1001,22 +1019,31 @@ def find_memories( decisions_by_candidate = defaultdict(list) for decision in decisions: decisions_by_candidate[id(decision.new_candidate)].append(decision) + + deduped_candidate_ids: set[int] = set() + merge_links: list[tuple[str, int]] = [] for group in decisions_by_candidate.values(): candidate = group[0].new_candidate - for decision in group: - if decision.action == FreshnessAction.MERGE and decision.merged_content: - candidate.content = decision.merged_content - elif decision.action == FreshnessAction.DEDUPE: - if candidate.metadata is None: - candidate.metadata = {} - candidate.metadata["deduped_against"] = decision.existing_memory.id + primary_action = _highest_priority_action(group) + if primary_action == FreshnessAction.MERGE: + merge_decision = next( + d for d in group if d.action == FreshnessAction.MERGE and d.merged_content + ) + candidate.content = merge_decision.merged_content + merge_links.append((merge_decision.existing_memory.id, id(candidate))) + elif primary_action == FreshnessAction.DEDUPE: + deduped_candidate_ids.add(id(candidate)) if candidate.metadata is None: candidate.metadata = {} - candidate.metadata["freshness_action"] = group[0].action + candidate.metadata["freshness_action"] = primary_action candidate.metadata["freshness_reason"] = group[0].reason - if group[0].action != FreshnessAction.KEEP_BOTH: + if primary_action != FreshnessAction.KEEP_BOTH: candidate.metadata["freshness_pair_with"] = group[0].existing_memory.id + if deduped_candidate_ids: + candidates = [c for c in candidates if id(c) not in deduped_candidate_ids] + console.print(f" [dim]Skipped {len(deduped_candidate_ids)} duplicate candidate(s)[/dim]") + from slopometry.core.models.memory import MemoryCreateRequest request = MemoryCreateRequest( @@ -1027,6 +1054,7 @@ def find_memories( saved = memory_service.save_memories(request) candidate_id_map: dict[int, str] = {id(c): e.id for c, e in zip(candidates, saved)} + for decision in decisions: if decision.action == FreshnessAction.SUPERSEDE: new_id = candidate_id_map.get(id(decision.new_candidate)) @@ -1036,8 +1064,37 @@ def find_memories( decision.existing_memory.id, superseded_by=new_id ) console.print( - f" [dim]Linked {decision.existing_memory.id} -> superseded_by={new_id}[/dim]" + f" [dim]Superseded {decision.existing_memory.id} -> {new_id}[/dim]" ) + + for existing_id, cand_obj_id in merge_links: + new_id = candidate_id_map.get(cand_obj_id) + if new_id is None: + continue + memory_service.update_memory(existing_id, superseded_by=new_id) + console.print( + f" [dim]Merged {existing_id} -> {new_id}[/dim]" + ) + + saved_ids = {m.id for m in saved} + current_memories = memory_service.get_memories(project_dir=proj_dir_str, limit=settings.memory_query_limit) + pre_existing_memories = [m for m in current_memories if m.id not in saved_ids] + if pre_existing_memories: + stale_pairs = audit_staleness( + pre_existing_memories, + cleaned_transcript, + llm_endpoint=endpoint, + llm_model=model, + api_key=api_key, + max_tokens=settings.memory_staleness_audit_max_tokens, + transcript_truncation_chars=settings.memory_transcript_truncation_chars, + ) + for memory_entry, reason in stale_pairs: + memory_service.retire_memory(memory_entry.id, reason) + console.print( + f" [yellow]Retired {memory_entry.id}: {reason}[/yellow]" + ) + memory_service.mark_session_processed( t.session_id, proj_dir_str, len(saved), source=t.source.value ) @@ -1059,6 +1116,145 @@ def find_memories( ) +@solo.command(name="prune-memories") +@click.option( + "--project-dir", + type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path), + default=None, + help="Project directory (default: cwd)", +) +@click.option( + "--llm-endpoint", + type=str, + default=None, + help="LLM endpoint URL (default: from settings)", +) +@click.option( + "--llm-model", + type=str, + default=None, + help="Model name (default: from settings)", +) +@click.option( + "--dry-run", + is_flag=True, + help="Show what would be retired without making changes", +) +def prune_memories( + project_dir: Path | None, + llm_endpoint: str | None, + llm_model: str | None, + dry_run: bool, +) -> None: + """Audit existing memories for staleness and retire stale ones. + + Sends all active (non-superseded, non-retired) memories for the project + to the LLM alongside the most recent session transcripts and asks which + memories describe fixed bugs, completed work, or outdated state. + + Stale memories are marked with a ``retired_reason`` and excluded from + future queries. Use --dry-run to preview without changes (the LLM is + still called to identify stale memories; only the DB write is skipped). + """ + from slopometry.core.settings import settings + from slopometry.solo.services.memory_service import MemoryService + from slopometry.solo.services.transcript_finder import TranscriptFinder + + if project_dir is None: + project_dir = Path.cwd() + + endpoint = llm_endpoint or settings.memory_llm_endpoint + model = llm_model or settings.memory_llm_model + api_key = settings.memory_llm_api_key.get_secret_value() + + console.print("[bold]Slopometry Memory Pruning[/bold]") + console.print(f"Project: {project_dir}") + console.print(f"LLM: {endpoint} / {model}") + console.print() + + if settings.offline_mode and not llm_endpoint: + raise click.ClickException( + "Memory pruning requires external LLM calls, which are disabled (offline_mode=True). " + "Set SLOPOMETRY_OFFLINE_MODE=false to enable." + ) + + from slopometry.solo.cli.preflight import preflight_endpoints + + preflight_endpoints( + chat_endpoint=endpoint, + embedding_endpoint=settings.memory_embedding_endpoint, + chat_api_key=api_key, + embedding_api_key=settings.memory_embedding_api_key.get_secret_value(), + ) + + memory_service = MemoryService() + proj_dir_str = str(project_dir) + + existing = memory_service.get_memories(project_dir=proj_dir_str, limit=settings.memory_query_limit) + if not existing: + console.print("[yellow]No active memories to audit.[/yellow]") + return + + console.print(f"[cyan]Auditing {len(existing)} active memories for staleness...[/cyan]") + + transcript_finder = TranscriptFinder() + transcripts = transcript_finder.discover_transcripts(project_dir) + + from slopometry.core.models.protocol.events import AbstractEventSource + from slopometry.solo.services.memory_extractor import MemoryExtractor + + memory_extractor = MemoryExtractor(endpoint, model, api_key) + + transcript_texts: list[str] = [] + for t in transcripts[: settings.memory_prune_transcript_window]: + if t.source == AbstractEventSource.OPENCODE: + storage_root = transcript_finder.find_opencode_storage_root() + if not storage_root.is_dir(): + continue + text = memory_extractor.extract_memories_from_opencode_session(t.session_id, storage_root) + else: + text = memory_extractor.extract_memories_from_transcript(t.transcript_path) + if text.strip(): + transcript_texts.append(text) + + combined_transcript = "\n---\n".join(transcript_texts) + if not combined_transcript.strip(): + console.print("[yellow]No transcripts found to audit against.[/yellow]") + return + + console.print(f"[dim]Using {len(transcript_texts)} transcript(s) for context[/dim]") + + stale_pairs = audit_staleness( + existing, + combined_transcript, + llm_endpoint=endpoint, + llm_model=model, + api_key=api_key, + max_tokens=settings.memory_staleness_audit_max_tokens, + transcript_truncation_chars=settings.memory_transcript_truncation_chars, + ) + + if not stale_pairs: + console.print("[green]No stale memories found.[/green]") + return + + console.print(f"\n[yellow]Found {len(stale_pairs)} stale memor(ies):[/yellow]") + for memory_entry, reason in stale_pairs: + console.print(f" [yellow]RETIRE[/yellow] [{memory_entry.memory_type.value}] {memory_entry.content[:100]}") + console.print(f" [dim]REASON:[/dim] {reason}") + + if dry_run: + console.print(f"\n[yellow]--dry-run: would retire {len(stale_pairs)} memor(ies)[/yellow]") + return + + retired_count = 0 + for memory_entry, reason in stale_pairs: + if memory_service.retire_memory(memory_entry.id, reason): + retired_count += 1 + + console.print(f"\n[bold green]Retired {retired_count} memor(ies).[/bold green]") + + @solo.command(name="show-memories") @click.option( "--project-dir", diff --git a/src/slopometry/solo/services/memory_freshness.py b/src/slopometry/solo/services/memory_freshness.py index b91a18d..7bb1429 100644 --- a/src/slopometry/solo/services/memory_freshness.py +++ b/src/slopometry/solo/services/memory_freshness.py @@ -1,4 +1,4 @@ -"""Freshness validation for newly-extracted memory candidates. +"""Freshness validation and staleness auditing for memory candidates. After LLM extraction, each new candidate is paired with semantically similar existing memories in the same project. Pairing is gated by a per-project @@ -16,20 +16,38 @@ - merge: synthesize a single updated version that supersedes both - supersede: the new candidate wins, mark the old as outdated - dedupe: they say the same thing; skip the new and confirm the old + +A separate **staleness audit** runs after extraction + reconciliation. It +sends the full transcript alongside the existing (active) memories and asks +the LLM which memories are now stale — describing fixed bugs, completed +work, or outdated state. Stale memories are retired via ``retired_reason`` +without a direct replacement. """ import json import logging import statistics from dataclasses import dataclass +from typing import Any + +from openai import OpenAI -from slopometry.core.models.memory import FreshnessAction, FreshnessVerdict, MemoryCandidate, MemoryEntry +from slopometry.core.models.memory import ( + FreshnessAction, + FreshnessVerdict, + MemoryCandidate, + MemoryEntry, + StalenessVerdict, +) from slopometry.solo.services.llm_text import parse_llm_json logger = logging.getLogger(__name__) DEFAULT_FLOOR_THRESHOLD = 0.45 DEFAULT_CEILING_THRESHOLD = 0.95 +DEFAULT_RECONCILIATION_MAX_TOKENS = 200 +DEFAULT_STALENESS_AUDIT_MAX_TOKENS = 1000 +DEFAULT_TRANSCRIPT_TRUNCATION_CHARS = 15000 RECONCILIATION_PROMPT = """You are reconciling two memory candidates about the same subject. @@ -183,6 +201,34 @@ def _find_above_threshold( return matches +def _call_llm_json( + llm_endpoint: str, + llm_model: str, + api_key: str, + system_prompt: str, + user_prompt: str, + max_tokens: int, +) -> Any: + """Call an OpenAI-compatible LLM and return the parsed JSON response. + + Returns the raw parsed JSON (dict or list) on success, or raises + ``json.JSONDecodeError`` / ``ValueError`` / ``TypeError`` on parse failure. + Network errors propagate to the caller. + """ + client = OpenAI(base_url=llm_endpoint, api_key=api_key) + response = client.chat.completions.create( + model=llm_model, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + temperature=0.0, + max_tokens=max_tokens, + ) + content = response.choices[0].message.content or "" + return parse_llm_json(content) + + def _judge_reconciliation( candidate: MemoryCandidate, existing: MemoryEntry, @@ -190,40 +236,31 @@ def _judge_reconciliation( llm_model: str, api_key: str, similarity: float, + max_tokens: int = DEFAULT_RECONCILIATION_MAX_TOKENS, ) -> FreshnessDecision: """Ask the LLM how to reconcile the pair. Always returns a decision.""" - from openai import OpenAI - prompt = RECONCILIATION_PROMPT.format( new_content=candidate.content, existing_content=existing.content, ) - client = OpenAI(base_url=llm_endpoint, api_key=api_key) - response = client.chat.completions.create( - model=llm_model, - messages=[ - { - "role": "system", - "content": "You reconcile memory pairs. Always reply with valid JSON containing action, reason, and (only when merging) merged_content.", - }, - {"role": "user", "content": prompt}, - ], - temperature=0.0, - max_tokens=200, - ) - content = response.choices[0].message.content or "" - try: - data = parse_llm_json(content) + data = _call_llm_json( + llm_endpoint, + llm_model, + api_key, + system_prompt="You reconcile memory pairs. Always reply with valid JSON containing action, reason, and (only when merging) merged_content.", + user_prompt=prompt, + max_tokens=max_tokens, + ) verdict = FreshnessVerdict.model_validate(data) except (json.JSONDecodeError, ValueError, TypeError): - logger.debug("Could not parse reconciliation response: %s", content[:80]) + logger.debug("Could not parse reconciliation response for candidate vs %s", existing.id) return FreshnessDecision( new_candidate=candidate, existing_memory=existing, similarity=similarity, action=FreshnessAction.KEEP_BOTH, - reason=f"Could not parse LLM response: {content[:80]}", + reason="Could not parse LLM response", ) merged = verdict.merged_content if verdict.action == FreshnessAction.MERGE else None @@ -275,3 +312,85 @@ def validate_freshness( continue decisions.append(decision) return decisions, distribution + + +STALENESS_AUDIT_PROMPT = """You are auditing existing memories for staleness after analyzing a new session transcript. + +A memory is STALE and should be retired if: +- It describes a bug that was fixed in this session +- It describes work that was completed in this session +- It references a state that was changed in this session +- It describes a temporary issue that was resolved + +A memory is NOT stale if: +- It describes a stable preference, design decision, or user behavior pattern +- It references external resources, infrastructure, or tool locations +- The session doesn't touch the area the memory describes +- It's a general project description that remains accurate + +EXISTING MEMORIES: +{memories_block} + +SESSION TRANSCRIPT: +{transcript} + +Return JSON only — a list of memories to retire, referencing each by its [N] number: +[{{"ref": 1, "reason": ""}}] + +If no memories are stale, return an empty array: []""" + + +def audit_staleness( + existing: list[MemoryEntry], + transcript: str, + llm_endpoint: str, + llm_model: str, + api_key: str, + max_tokens: int = DEFAULT_STALENESS_AUDIT_MAX_TOKENS, + transcript_truncation_chars: int = DEFAULT_TRANSCRIPT_TRUNCATION_CHARS, +) -> list[tuple[MemoryEntry, str]]: + """Ask the LLM which existing memories are now stale given the session transcript. + + Returns: + List of (memory_entry, reason) pairs for memories to retire. + """ + if not existing or not transcript.strip(): + return [] + + memories_block = "\n".join( + f"[{i + 1}] ({m.memory_type.value}) {m.content}" for i, m in enumerate(existing) + ) + prompt = STALENESS_AUDIT_PROMPT.format( + memories_block=memories_block, + transcript=transcript[:transcript_truncation_chars], + ) + + try: + data = _call_llm_json( + llm_endpoint, + llm_model, + api_key, + system_prompt="You audit memory staleness. Always reply with valid JSON only.", + user_prompt=prompt, + max_tokens=max_tokens, + ) + except (json.JSONDecodeError, ValueError, TypeError): + logger.debug("Could not parse staleness audit response") + return [] + + if not isinstance(data, list): + logger.debug("Staleness audit expected JSON array, got %s", type(data).__name__) + return [] + + results: list[tuple[MemoryEntry, str]] = [] + for item in data: + try: + verdict = StalenessVerdict.model_validate(item) + except (ValueError, TypeError) as e: + logger.debug("Skipping invalid staleness verdict: %s", e) + continue + idx = verdict.ref - 1 + if 0 <= idx < len(existing): + results.append((existing[idx], verdict.reason)) + + return results diff --git a/src/slopometry/solo/services/memory_service.py b/src/slopometry/solo/services/memory_service.py index def52e9..422a308 100644 --- a/src/slopometry/solo/services/memory_service.py +++ b/src/slopometry/solo/services/memory_service.py @@ -112,6 +112,14 @@ def update_memory( embedding=embedding, ) + def retire_memory(self, memory_id: str, reason: str) -> bool: + """Retire a memory (mark as stale without a direct replacement). + + Returns: + True if retired, False if not found + """ + return self.db.retire_memory(memory_id, reason) + def mark_session_processed( self, session_id: str, project_dir: str, memory_count: int, source: str ) -> None: diff --git a/tests/test_database.py b/tests/test_database.py index 890d632..280ac9c 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -98,7 +98,7 @@ def test_user_story_generation_cli_integration() -> None: result = runner.invoke(cli, ["summoner", "userstorify", "--help"]) assert result.exit_code == 0 - assert "Generate user stories from commits using configured AI agents" in result.output + assert "Generate user stories from commits using the configured AI agent" in result.output assert "--base-commit" in result.output assert "--head-commit" in result.output diff --git a/tests/test_find_memories_reconciliation.py b/tests/test_find_memories_reconciliation.py new file mode 100644 index 0000000..aef6d72 --- /dev/null +++ b/tests/test_find_memories_reconciliation.py @@ -0,0 +1,114 @@ +"""Tests for the freshness reconciliation action priority logic in find-memories. + +When a single candidate matches multiple existing memories, the LLM may +return different actions for each pair. The priority resolution ensures +only one action wins: SUPERSEDE > MERGE > DEDUPE > KEEP_BOTH. +""" + +from datetime import datetime + +from slopometry.core.models.memory import ( + FreshnessAction, + MemoryCandidate, + MemoryEntry, + MemoryType, +) +from slopometry.solo.cli.commands import _ACTION_PRIORITY, _highest_priority_action +from slopometry.solo.services.memory_freshness import FreshnessDecision + + +def _candidate(content: str, embedding: list[float] | None = None) -> MemoryCandidate: + return MemoryCandidate( + memory_type=MemoryType.PROJECT, + content=content, + embedding=embedding, + ) + + +def _memory(content: str, mem_id: str = "m1") -> MemoryEntry: + return MemoryEntry( + id=mem_id, + session_id="s1", + project_dir="/proj", + memory_type=MemoryType.PROJECT, + content=content, + created_at=datetime.now(), + ) + + +def _decision( + candidate: MemoryCandidate, + existing: MemoryEntry, + action: FreshnessAction, + similarity: float = 0.9, + merged_content: str | None = None, +) -> FreshnessDecision: + return FreshnessDecision( + new_candidate=candidate, + existing_memory=existing, + similarity=similarity, + action=action, + reason=f"test-{action.value}", + merged_content=merged_content, + ) + + +class TestActionPriority: + def test_action_priority__supersede_beats_merge_dedupe_keep_both(self): + assert _ACTION_PRIORITY[FreshnessAction.SUPERSEDE] > _ACTION_PRIORITY[FreshnessAction.MERGE] + assert _ACTION_PRIORITY[FreshnessAction.SUPERSEDE] > _ACTION_PRIORITY[FreshnessAction.DEDUPE] + assert _ACTION_PRIORITY[FreshnessAction.SUPERSEDE] > _ACTION_PRIORITY[FreshnessAction.KEEP_BOTH] + + def test_action_priority__merge_beats_dedupe_keep_both(self): + assert _ACTION_PRIORITY[FreshnessAction.MERGE] > _ACTION_PRIORITY[FreshnessAction.DEDUPE] + assert _ACTION_PRIORITY[FreshnessAction.MERGE] > _ACTION_PRIORITY[FreshnessAction.KEEP_BOTH] + + def test_action_priority__dedupe_beats_keep_both(self): + assert _ACTION_PRIORITY[FreshnessAction.DEDUPE] > _ACTION_PRIORITY[FreshnessAction.KEEP_BOTH] + + def test_highest_priority_action__returns_supersede_when_group_has_supersede_and_merge(self): + cand = _candidate("new version") + group = [ + _decision(cand, _memory("old A", "m1"), FreshnessAction.MERGE, merged_content="merged"), + _decision(cand, _memory("old B", "m2"), FreshnessAction.SUPERSEDE), + ] + assert _highest_priority_action(group) == FreshnessAction.SUPERSEDE + + def test_highest_priority_action__returns_merge_when_group_has_merge_and_dedupe(self): + cand = _candidate("updated info") + group = [ + _decision(cand, _memory("duplicate", "m1"), FreshnessAction.DEDUPE), + _decision(cand, _memory("outdated", "m2"), FreshnessAction.MERGE, merged_content="merged"), + ] + assert _highest_priority_action(group) == FreshnessAction.MERGE + + def test_highest_priority_action__returns_dedupe_when_group_has_dedupe_and_keep_both(self): + cand = _candidate("same info") + group = [ + _decision(cand, _memory("different topic", "m1"), FreshnessAction.KEEP_BOTH), + _decision(cand, _memory("same info", "m2"), FreshnessAction.DEDUPE), + ] + assert _highest_priority_action(group) == FreshnessAction.DEDUPE + + def test_highest_priority_action__returns_keep_both_when_all_are_keep_both(self): + cand = _candidate("unique info") + group = [ + _decision(cand, _memory("A", "m1"), FreshnessAction.KEEP_BOTH), + _decision(cand, _memory("B", "m2"), FreshnessAction.KEEP_BOTH), + ] + assert _highest_priority_action(group) == FreshnessAction.KEEP_BOTH + + def test_highest_priority_action__handles_single_decision_group(self): + cand = _candidate("X") + group = [_decision(cand, _memory("Y", "m1"), FreshnessAction.MERGE, merged_content="merged")] + assert _highest_priority_action(group) == FreshnessAction.MERGE + + def test_highest_priority_action__supersede_wins_over_all_four_actions(self): + cand = _candidate("new truth") + group = [ + _decision(cand, _memory("dup", "m1"), FreshnessAction.DEDUPE), + _decision(cand, _memory("mergeable", "m2"), FreshnessAction.MERGE, merged_content="merged"), + _decision(cand, _memory("different", "m3"), FreshnessAction.KEEP_BOTH), + _decision(cand, _memory("outdated", "m4"), FreshnessAction.SUPERSEDE), + ] + assert _highest_priority_action(group) == FreshnessAction.SUPERSEDE diff --git a/tests/test_memory_freshness.py b/tests/test_memory_freshness.py index 7b02e3e..877060e 100644 --- a/tests/test_memory_freshness.py +++ b/tests/test_memory_freshness.py @@ -1,11 +1,17 @@ -"""Tests for validate_freshness.""" +"""Tests for validate_freshness and audit_staleness.""" from datetime import datetime from unittest.mock import MagicMock, patch import pytest -from slopometry.core.models.memory import FreshnessAction, MemoryCandidate, MemoryEntry, MemoryType +from slopometry.core.models.memory import ( + FreshnessAction, + MemoryCandidate, + MemoryEntry, + MemoryType, + StalenessVerdict, +) from slopometry.solo.services.memory_freshness import ( DEFAULT_CEILING_THRESHOLD, DEFAULT_FLOOR_THRESHOLD, @@ -14,6 +20,7 @@ _cosine_similarity, _find_above_threshold, _judge_reconciliation, + audit_staleness, compute_project_distribution, validate_freshness, ) @@ -131,7 +138,7 @@ def test_judge_reconciliation__returns_keep_both_when_llm_says_keep_both(self): mock_response.choices = [ MagicMock(message=MagicMock(content='{"action": "keep_both", "reason": "different topics"}')) ] - with patch("openai.OpenAI") as mock_openai: + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = mock_response decision = _judge_reconciliation( _candidate("uses rust-code-analysis"), @@ -152,7 +159,7 @@ def test_judge_reconciliation__returns_merge_with_merged_content_when_llm_says_m ) ) ] - with patch("openai.OpenAI") as mock_openai: + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = mock_response decision = _judge_reconciliation( _candidate("uses rust-code-analysis"), @@ -169,7 +176,7 @@ def test_judge_reconciliation__returns_supersede_when_llm_says_supersede(self): mock_response.choices = [ MagicMock(message=MagicMock(content='{"action": "supersede", "reason": "newer version"}')) ] - with patch("openai.OpenAI") as mock_openai: + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = mock_response decision = _judge_reconciliation( _candidate("Python 3.13"), @@ -185,7 +192,7 @@ def test_judge_reconciliation__returns_dedupe_when_llm_says_dedupe(self): mock_response.choices = [ MagicMock(message=MagicMock(content='{"action": "dedupe", "reason": "same info"}')) ] - with patch("openai.OpenAI") as mock_openai: + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = mock_response decision = _judge_reconciliation( _candidate("user uses pyright type checker"), @@ -203,7 +210,7 @@ def test_judge_reconciliation__strips_markdown_fences_from_llm_response(self): message=MagicMock(content='```json\n{"action": "merge", "reason": "old outdated", "merged_content": "merged"}\n```') ) ] - with patch("openai.OpenAI") as mock_openai: + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = mock_response decision = _judge_reconciliation( _candidate("X"), @@ -220,7 +227,7 @@ def test_judge_reconciliation__falls_back_to_keep_both_on_invalid_action(self): mock_response.choices = [ MagicMock(message=MagicMock(content='{"action": "maybe", "reason": "unsure"}')) ] - with patch("openai.OpenAI") as mock_openai: + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = mock_response decision = _judge_reconciliation( _candidate("X"), @@ -245,7 +252,7 @@ def test_validate_freshness__skips_llm_call_when_no_above_threshold_matches(self candidates = [_candidate("X", [1.0, 0.0])] existing = [_memory("orthogonal", [0.0, 1.0], "m1")] - with patch("openai.OpenAI") as mock_openai: + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: decisions, _ = validate_freshness(candidates, existing, "https://llm.example/v1", "model-x", "test-key") mock_openai.assert_not_called() @@ -263,7 +270,7 @@ def test_validate_freshness__triggers_llm_judge_when_similar_match_found(self): candidates = [_candidate("uses rust-code-analysis", [1.0, 0.0])] existing = [_memory("uses radon", [0.99, 0.14], "m1")] - with patch("openai.OpenAI") as mock_openai: + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = mock_response decisions, distribution = validate_freshness( candidates, existing, "https://llm.example/v1", "model-x", "test-key" @@ -284,7 +291,7 @@ def test_validate_freshness__does_not_merge_or_supersede_on_keep_both(self): candidates = [_candidate("uses rust-code-analysis for complexity", [1.0, 0.0])] existing = [_memory("user prefers dark mode", [0.99, 0.14], "m1")] - with patch("openai.OpenAI") as mock_openai: + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = mock_response decisions, _ = validate_freshness( candidates, existing, "https://llm.example/v1", "model-x", "test-key" @@ -319,7 +326,7 @@ def test_validate_freshness__handles_multiple_candidates_with_different_actions( _memory("user prefers light mode in editors", [0.14, 0.99], "m2"), ] - with patch("openai.OpenAI") as mock_openai: + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.side_effect = [ mock_response_a, mock_response_b, @@ -343,7 +350,7 @@ def test_validate_freshness__uses_floor_threshold_for_low_similarity_project(sel mock_response.choices = [ MagicMock(message=MagicMock(content='{"action": "keep_both", "reason": "different"}')) ] - with patch("openai.OpenAI") as mock_openai: + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = mock_response _, distribution = validate_freshness( candidates, existing, "https://llm.example/v1", "model-x", "test-key" @@ -354,10 +361,208 @@ def test_validate_freshness__skips_failed_llm_call_silently(self): candidates = [_candidate("X", [1.0, 0.0])] existing = [_memory("Y", [0.99, 0.14], "m1")] - with patch("openai.OpenAI") as mock_openai: + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.side_effect = RuntimeError("llm down") decisions, _ = validate_freshness( candidates, existing, "https://llm.example/v1", "model-x", "test-key" ) assert decisions == [] + + +def _staleness_response(content: str) -> MagicMock: + mock = MagicMock() + mock.choices = [MagicMock(message=MagicMock(content=content))] + return mock + + +class TestAuditStaleness: + def test_audit_staleness__returns_empty_list_when_no_existing_memories(self): + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: + result = audit_staleness([], "some transcript", "https://llm.example/v1", "model-x", "key") + mock_openai.assert_not_called() + assert result == [] + + def test_audit_staleness__returns_empty_list_when_transcript_is_empty(self): + existing = [_memory("describes a bug")] + result = audit_staleness(existing, "", "https://llm.example/v1", "model-x", "key") + assert result == [] + + def test_audit_staleness__returns_empty_list_when_transcript_is_whitespace_only(self): + existing = [_memory("describes a bug")] + result = audit_staleness(existing, " \n\n ", "https://llm.example/v1", "model-x", "key") + assert result == [] + + def test_audit_staleness__returns_memory_and_reason_pairs_when_llm_identifies_stale(self): + existing = [ + _memory("There is a bug in the parser", mem_id="m1"), + _memory("User prefers dark mode", mem_id="m2"), + ] + llm_response = _staleness_response( + '[{"ref": 1, "reason": "parser bug was fixed in this session"}]' + ) + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = llm_response + result = audit_staleness(existing, "transcript showing bug fix", "https://llm.example/v1", "model-x", "key") + + assert len(result) == 1 + memory, reason = result[0] + assert memory.id == "m1" + assert reason == "parser bug was fixed in this session" + + def test_audit_staleness__returns_multiple_pairs_when_llm_identifies_multiple_stale(self): + existing = [ + _memory("Bug in parser", mem_id="m1"), + _memory("TODO: refactor database layer", mem_id="m2"), + _memory("User prefers dark mode", mem_id="m3"), + ] + llm_response = _staleness_response( + '[{"ref": 1, "reason": "parser bug fixed"}, {"ref": 2, "reason": "database refactor completed"}]' + ) + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = llm_response + result = audit_staleness(existing, "transcript", "https://llm.example/v1", "model-x", "key") + + assert len(result) == 2 + retired_ids = {m.id for m, _ in result} + assert retired_ids == {"m1", "m2"} + + def test_audit_staleness__returns_empty_list_when_llm_says_nothing_is_stale(self): + existing = [_memory("User prefers dark mode", mem_id="m1")] + llm_response = _staleness_response("[]") + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = llm_response + result = audit_staleness(existing, "transcript about unrelated work", "https://llm.example/v1", "model-x", "key") + + assert result == [] + + def test_audit_staleness__strips_markdown_fences_from_llm_response(self): + existing = [_memory("describes a bug", mem_id="m1")] + llm_response = _staleness_response( + '```json\n[{"ref": 1, "reason": "bug was fixed"}]\n```' + ) + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = llm_response + result = audit_staleness(existing, "transcript", "https://llm.example/v1", "model-x", "key") + + assert len(result) == 1 + assert result[0][0].id == "m1" + + def test_audit_staleness__skips_verdicts_with_out_of_range_refs(self): + existing = [_memory("describes a bug", mem_id="m1")] + llm_response = _staleness_response( + '[{"ref": 0, "reason": "invalid zero-based ref"}, {"ref": 5, "reason": "out of range"}, {"ref": 1, "reason": "valid ref"}]' + ) + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = llm_response + result = audit_staleness(existing, "transcript", "https://llm.example/v1", "model-x", "key") + + assert len(result) == 1 + assert result[0][0].id == "m1" + assert result[0][1] == "valid ref" + + def test_audit_staleness__returns_empty_list_when_llm_response_is_not_a_json_array(self): + existing = [_memory("describes a bug", mem_id="m1")] + llm_response = _staleness_response('{"not": "an array"}') + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = llm_response + result = audit_staleness(existing, "transcript", "https://llm.example/v1", "model-x", "key") + + assert result == [] + + def test_audit_staleness__returns_empty_list_on_invalid_json_response(self): + existing = [_memory("describes a bug", mem_id="m1")] + llm_response = _staleness_response("this is not json at all") + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = llm_response + result = audit_staleness(existing, "transcript", "https://llm.example/v1", "model-x", "key") + + assert result == [] + + def test_audit_staleness__skips_invalid_verdict_objects_missing_required_fields(self): + existing = [_memory("describes a bug", mem_id="m1")] + llm_response = _staleness_response( + '[{"reason": "missing ref field"}, {"ref": 1, "reason": "valid"}]' + ) + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = llm_response + result = audit_staleness(existing, "transcript", "https://llm.example/v1", "model-x", "key") + + assert len(result) == 1 + assert result[0][1] == "valid" + + def test_audit_staleness__truncates_transcript_to_configured_char_limit(self): + existing = [_memory("describes a bug", mem_id="m1")] + long_transcript = "x" * 30000 + llm_response = _staleness_response("[]") + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = llm_response + audit_staleness( + existing, + long_transcript, + "https://llm.example/v1", + "model-x", + "key", + transcript_truncation_chars=500, + ) + + call_args = mock_openai.return_value.chat.completions.create.call_args + user_message = call_args.kwargs["messages"][1]["content"] + assert "x" * 600 not in user_message + + def test_audit_staleness__uses_1_based_indexing_for_memory_refs(self): + existing = [ + _memory("first memory", mem_id="m1"), + _memory("second memory", mem_id="m2"), + _memory("third memory", mem_id="m3"), + ] + llm_response = _staleness_response('[{"ref": 3, "reason": "third is stale"}]') + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = llm_response + result = audit_staleness(existing, "transcript", "https://llm.example/v1", "model-x", "key") + + assert len(result) == 1 + assert result[0][0].id == "m3" + + def test_audit_staleness__includes_memory_type_and_content_in_prompt(self): + existing = [ + _memory("describes a bug in parser", mem_id="m1"), + ] + llm_response = _staleness_response("[]") + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = llm_response + audit_staleness(existing, "transcript", "https://llm.example/v1", "model-x", "key") + + call_args = mock_openai.return_value.chat.completions.create.call_args + user_message = call_args.kwargs["messages"][1]["content"] + assert "[1]" in user_message + assert "describes a bug in parser" in user_message + assert "project" in user_message # memory_type value + + def test_audit_staleness__only_receives_memories_passed_by_caller_not_external(self): + existing = [ + _memory("describes a bug", mem_id="m1"), + _memory("user prefers dark mode", mem_id="m2"), + ] + llm_response = _staleness_response('[{"ref": 1, "reason": "bug fixed"}]') + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: + mock_openai.return_value.chat.completions.create.return_value = llm_response + result = audit_staleness(existing, "transcript", "https://llm.example/v1", "model-x", "key") + + assert len(result) == 1 + assert result[0][0].id == "m1" + call_args = mock_openai.return_value.chat.completions.create.call_args + user_message = call_args.kwargs["messages"][1]["content"] + assert "[1]" in user_message + assert "[2]" in user_message + + +class TestStalenessVerdictModel: + def test_staleness_verdict__accepts_positive_ref_and_reason(self): + verdict = StalenessVerdict(ref=1, reason="bug was fixed") + assert verdict.ref == 1 + assert verdict.reason == "bug was fixed" + + def test_staleness_verdict__defaults_reason_to_empty_string(self): + verdict = StalenessVerdict(ref=1) + assert verdict.reason == "" diff --git a/tests/test_memory_models.py b/tests/test_memory_models.py index 7dfb0f9..ec1f538 100644 --- a/tests/test_memory_models.py +++ b/tests/test_memory_models.py @@ -2,7 +2,19 @@ from datetime import datetime -from slopometry.core.models.memory import MemoryCandidate, MemoryCreateRequest, MemoryEntry, MemoryType +import pytest +from pydantic import ValidationError + +from slopometry.core.models.memory import ( + FreshnessAction, + FreshnessVerdict, + LLMMemoryCandidate, + MemoryCandidate, + MemoryCreateRequest, + MemoryEntry, + MemoryType, + StalenessVerdict, +) def test_memory_entry_model() -> None: @@ -88,6 +100,8 @@ def test_memory_entry_defaults() -> None: assert entry.embedding is None assert entry.metadata is None assert entry.updated_at is None + assert entry.superseded_by is None + assert entry.retired_reason is None def test_memory_type_enum_values() -> None: @@ -96,3 +110,85 @@ def test_memory_type_enum_values() -> None: assert MemoryType.FEEDBACK == "feedback" assert MemoryType.PROJECT == "project" assert MemoryType.REFERENCE == "reference" + + +def test_memory_entry__defaults_retired_reason_and_superseded_by_to_none() -> None: + entry = MemoryEntry( + id="id-retired", + session_id="s1", + project_dir="/proj", + memory_type=MemoryType.PROJECT, + content="content", + created_at=datetime.now(), + ) + assert entry.retired_reason is None + assert entry.superseded_by is None + + +def test_memory_entry__accepts_retired_reason_when_stale() -> None: + entry = MemoryEntry( + id="id-stale", + session_id="s1", + project_dir="/proj", + memory_type=MemoryType.PROJECT, + content="describes a bug that was fixed", + created_at=datetime.now(), + retired_reason="bug was fixed in session abc-123", + ) + assert entry.retired_reason == "bug was fixed in session abc-123" + + +def test_freshness_verdict__defaults_merged_content_to_none_for_non_merge_actions() -> None: + verdict = FreshnessVerdict(action=FreshnessAction.SUPERSEDE, reason="newer version wins") + assert verdict.merged_content is None + + +def test_freshness_verdict__accepts_merged_content_when_action_is_merge() -> None: + verdict = FreshnessVerdict( + action=FreshnessAction.MERGE, + reason="old was outdated", + merged_content="uses rust-code-analysis since 2026", + ) + assert verdict.merged_content == "uses rust-code-analysis since 2026" + + +def test_freshness_verdict__defaults_reason_to_empty_string() -> None: + verdict = FreshnessVerdict(action=FreshnessAction.KEEP_BOTH) + assert verdict.reason == "" + + +def test_freshness_action__has_four_distinct_values() -> None: + actions = {FreshnessAction.KEEP_BOTH, FreshnessAction.MERGE, FreshnessAction.SUPERSEDE, FreshnessAction.DEDUPE} + assert len(actions) == 4 + + +def test_freshness_action__color_property_returns_valid_color_for_each_action() -> None: + assert FreshnessAction.KEEP_BOTH.color == "green" + assert FreshnessAction.MERGE.color == "cyan" + assert FreshnessAction.SUPERSEDE.color == "yellow" + assert FreshnessAction.DEDUPE.color == "magenta" + + +def test_staleness_verdict__accepts_positive_ref_and_reason() -> None: + verdict = StalenessVerdict(ref=1, reason="bug was fixed in this session") + assert verdict.ref == 1 + assert verdict.reason == "bug was fixed in this session" + + +def test_staleness_verdict__defaults_reason_to_empty_string() -> None: + verdict = StalenessVerdict(ref=3) + assert verdict.reason == "" + + +def test_freshness_verdict__rejects_invalid_action_string_via_validation() -> None: + with pytest.raises(ValidationError): + FreshnessVerdict.model_validate({"action": "invalid_action"}) + + +def test_llm_memory_candidate__validates_memory_type_against_enum() -> None: + candidate = LLMMemoryCandidate( + memory_type=MemoryType.PROJECT, + content="uses rust-code-analysis", + ) + assert candidate.memory_type == MemoryType.PROJECT + assert candidate.source_context is None diff --git a/tests/test_memory_service.py b/tests/test_memory_service.py index 7b233be..7910aa9 100644 --- a/tests/test_memory_service.py +++ b/tests/test_memory_service.py @@ -261,3 +261,103 @@ def test_is_session_processed__returns_false_before_true_after_marking(memory_se memory_service.mark_session_processed("processed-session", "/any/project", 3, source="claude_code") assert memory_service.is_session_processed("processed-session", "/any/project", source="claude_code") is True + + +def _make_memory(mem_id: str, content: str = "content", project: str = "/proj") -> MemoryEntry: + return MemoryEntry( + id=mem_id, + session_id="s1", + project_dir=project, + memory_type=MemoryType.PROJECT, + content=content, + created_at=datetime.now(), + ) + + +def test_retire_memory__marks_memory_with_retired_reason_and_hides_from_default_query( + memory_service: MemoryService, +) -> None: + memory_service.save_memory(_make_memory("mem-active")) + memory_service.save_memory(_make_memory("mem-stale", content="describes a fixed bug")) + + result = memory_service.retire_memory("mem-stale", reason="bug was fixed in session abc") + + assert result is True + visible = memory_service.get_memories(project_dir="/proj", limit=100) + assert {m.id for m in visible} == {"mem-active"} + + +def test_retire_memory__returns_false_when_memory_id_does_not_exist(memory_service: MemoryService) -> None: + result = memory_service.retire_memory("nonexistent-id", reason="no such memory") + assert result is False + + +def test_get_memories__includes_retired_when_include_superseded_is_true(memory_service: MemoryService) -> None: + memory_service.save_memory(_make_memory("mem-active")) + memory_service.save_memory(_make_memory("mem-retired", content="stale")) + memory_service.retire_memory("mem-retired", reason="no longer relevant") + + all_memories = memory_service.get_memories(project_dir="/proj", limit=100, include_superseded=True) + ids = {m.id for m in all_memories} + assert ids == {"mem-active", "mem-retired"} + + +def test_get_memories__excludes_both_superseded_and_retired_by_default(memory_service: MemoryService) -> None: + memory_service.save_memory(_make_memory("mem-active")) + memory_service.save_memory(_make_memory("mem-superseded", content="old version")) + memory_service.save_memory(_make_memory("mem-retired", content="fixed bug")) + memory_service.update_memory("mem-superseded", superseded_by="mem-active") + memory_service.retire_memory("mem-retired", reason="bug was fixed") + + visible = memory_service.get_memories(project_dir="/proj", limit=100) + assert {m.id for m in visible} == {"mem-active"} + + +def test_get_memories__retired_memory_carries_retired_reason_when_included(memory_service: MemoryService) -> None: + memory_service.save_memory(_make_memory("mem-retired", content="stale")) + memory_service.retire_memory("mem-retired", reason="work was completed") + + all_memories = memory_service.get_memories(project_dir="/proj", limit=100, include_superseded=True) + retired = next(m for m in all_memories if m.id == "mem-retired") + assert retired.retired_reason == "work was completed" + + +def test_get_memory_stats__excludes_retired_memories_from_count(memory_service: MemoryService) -> None: + memory_service.save_memory(_make_memory("mem-active", content="active")) + memory_service.save_memory(_make_memory("mem-retired", content="stale")) + memory_service.retire_memory("mem-retired", reason="stale") + + stats = memory_service.get_memory_stats(project_dir="/proj") + assert stats["total"] == 1 + + +def test_retire_memory__does_not_interfere_with_supersede_chain(memory_service: MemoryService) -> None: + memory_service.save_memory(_make_memory("mem-old", content="version 1")) + memory_service.save_memory(_make_memory("mem-new", content="version 2")) + memory_service.update_memory("mem-old", superseded_by="mem-new") + memory_service.retire_memory("mem-new", reason="superseded work was completed") + + visible = memory_service.get_memories(project_dir="/proj", limit=100) + assert visible == [] + + +def test_retire_memory__can_be_called_multiple_times_on_same_memory(memory_service: MemoryService) -> None: + memory_service.save_memory(_make_memory("mem-stale")) + assert memory_service.retire_memory("mem-stale", reason="first reason") is True + assert memory_service.retire_memory("mem-stale", reason="updated reason") is True + + all_memories = memory_service.get_memories(project_dir="/proj", limit=100, include_superseded=True) + retired = next(m for m in all_memories if m.id == "mem-stale") + assert retired.retired_reason == "updated reason" + + +def test_retire_memory__does_not_clobber_superseded_by_field(memory_service: MemoryService) -> None: + memory_service.save_memory(_make_memory("mem-old", content="old version")) + memory_service.save_memory(_make_memory("mem-new", content="new version")) + memory_service.update_memory("mem-old", superseded_by="mem-new") + memory_service.retire_memory("mem-old", reason="also stale") + + all_memories = memory_service.get_memories(project_dir="/proj", limit=100, include_superseded=True) + old = next(m for m in all_memories if m.id == "mem-old") + assert old.superseded_by == "mem-new" + assert old.retired_reason == "also stale" diff --git a/tests/test_migrations.py b/tests/test_migrations.py index eac0e80..3cc8891 100644 --- a/tests/test_migrations.py +++ b/tests/test_migrations.py @@ -137,3 +137,86 @@ def test_migration_001__handles_existing_column_gracefully(self): columns = [row[1] for row in cursor.fetchall()] transcript_path_count = columns.count("transcript_path") assert transcript_path_count == 1 + + def test_migration_016__adds_retired_reason_column_to_memories(self): + """Test that migration 016 adds the retired_reason column to the memories table.""" + with TemporaryDirectory() as temp_dir: + db_path = Path(temp_dir) / "test.db" + runner = MigrationRunner(db_path) + + with runner._get_db_connection() as conn: + conn.execute(""" + CREATE TABLE hook_events ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL, + timestamp TEXT NOT NULL + ) + """) + conn.execute(""" + CREATE TABLE memories ( + id TEXT PRIMARY KEY, + session_id TEXT NOT NULL, + project_dir TEXT NOT NULL, + memory_type TEXT NOT NULL, + content TEXT NOT NULL, + source_context TEXT, + created_at TEXT NOT NULL, + updated_at TEXT, + retained INTEGER DEFAULT 0, + superseded_by TEXT, + embedding TEXT, + metadata TEXT + ) + """) + conn.commit() + + applied = runner.run_migrations() + + assert any("016" in m and "retired_reason" in m for m in applied) + + with runner._get_db_connection() as conn: + cursor = conn.execute("PRAGMA table_info(memories)") + columns = [row[1] for row in cursor.fetchall()] + assert "retired_reason" in columns + + def test_migration_016__is_idempotent_when_column_already_exists(self): + """Test that migration 016 does not fail if retired_reason already exists.""" + with TemporaryDirectory() as temp_dir: + db_path = Path(temp_dir) / "test.db" + runner = MigrationRunner(db_path) + + with runner._get_db_connection() as conn: + conn.execute(""" + CREATE TABLE hook_events ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL, + timestamp TEXT NOT NULL + ) + """) + conn.execute(""" + CREATE TABLE memories ( + id TEXT PRIMARY KEY, + session_id TEXT NOT NULL, + project_dir TEXT NOT NULL, + memory_type TEXT NOT NULL, + content TEXT NOT NULL, + source_context TEXT, + created_at TEXT NOT NULL, + updated_at TEXT, + retained INTEGER DEFAULT 0, + superseded_by TEXT, + retired_reason TEXT, + embedding TEXT, + metadata TEXT + ) + """) + conn.commit() + + applied = runner.run_migrations() + + assert len(applied) == EXPECTED_MIGRATION_COUNT + + with runner._get_db_connection() as conn: + cursor = conn.execute("PRAGMA table_info(memories)") + columns = [row[1] for row in cursor.fetchall()] + assert columns.count("retired_reason") == 1 diff --git a/tests/test_opencode_memory_integration.py b/tests/test_opencode_memory_integration.py index c90c2bf..17b66d9 100644 --- a/tests/test_opencode_memory_integration.py +++ b/tests/test_opencode_memory_integration.py @@ -97,7 +97,7 @@ def test_validate_freshness__supersede_links_old_to_new_via_superseded_by(self, embedding=[0.99, 0.14, 0.0], ) - with patch("openai.OpenAI") as mock_openai: + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = self._stub_judge("supersede") decisions, _ = validate_freshness([new_candidate], [existing], "https://llm.example/v1", "model-x", "test-key") @@ -143,7 +143,7 @@ def test_validate_freshness__merge_action_rewrites_candidate_content(self, fresh merged_text = "Project uses rust-code-analysis (switched from radon in 2026)" - with patch("openai.OpenAI") as mock_openai: + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = self._stub_judge( "merge", merged_content=merged_text ) @@ -186,7 +186,7 @@ def test_validate_freshness__dedupe_action_skips_new_save(self, fresh_memory_ser embedding=[0.99, 0.14, 0.0], ) - with patch("openai.OpenAI") as mock_openai: + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = self._stub_judge("dedupe") decisions, _ = validate_freshness([new_candidate], [existing], "https://llm.example/v1", "model-x", "test-key") @@ -225,7 +225,7 @@ def test_validate_freshness__keep_both_saves_both_independently(self, fresh_memo embedding=[0.95, 0.31, 0.0], ) - with patch("openai.OpenAI") as mock_openai: + with patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai: mock_openai.return_value.chat.completions.create.return_value = self._stub_judge("keep_both") decisions, _ = validate_freshness([new_candidate], [existing], "https://llm.example/v1", "model-x", "test-key") diff --git a/tests/test_prune_memories_cli.py b/tests/test_prune_memories_cli.py new file mode 100644 index 0000000..634570f --- /dev/null +++ b/tests/test_prune_memories_cli.py @@ -0,0 +1,217 @@ +"""Integration tests for the prune-memories CLI command. + +These tests exercise the full click command pipeline, mocking only the +external boundaries (LLM calls, preflight endpoint checks, transcript +discovery, and memory extraction) to verify the command's behavior. +""" + +import tempfile +from collections.abc import Iterator +from contextlib import contextmanager +from datetime import datetime +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from click.testing import CliRunner + +from slopometry.cli import cli +from slopometry.core.database import EventDatabase +from slopometry.core.models.memory import MemoryEntry, MemoryType +from slopometry.solo.services.memory_service import MemoryService + + +@pytest.fixture +def temp_db() -> Iterator[EventDatabase]: + with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: + db_path = Path(f.name) + try: + db = EventDatabase(db_path) + yield db + finally: + if db_path.exists(): + db_path.unlink() + + +@pytest.fixture +def memory_service(temp_db: EventDatabase) -> MemoryService: + return MemoryService(db=temp_db) + + +def _make_memory(mem_id: str, content: str = "content", project: str = "/proj") -> MemoryEntry: + return MemoryEntry( + id=mem_id, + session_id="s1", + project_dir=project, + memory_type=MemoryType.PROJECT, + content=content, + created_at=datetime.now(), + ) + + +@contextmanager +def patch_prune_externals(memory_service: MemoryService): + """Patch all external boundaries of prune-memories.""" + api_key_mock = MagicMock(get_secret_value=lambda: "key") + with ( + patch("slopometry.core.settings.settings.offline_mode", False), + patch("slopometry.core.settings.settings.memory_llm_endpoint", "https://llm.example/v1"), + patch("slopometry.core.settings.settings.memory_llm_model", "model-x"), + patch("slopometry.core.settings.settings.memory_llm_api_key", api_key_mock), + patch("slopometry.core.settings.settings.memory_embedding_endpoint", "https://embed.example/v1"), + patch("slopometry.core.settings.settings.memory_embedding_model", "embed-model"), + patch("slopometry.core.settings.settings.memory_embedding_api_key", api_key_mock), + patch("slopometry.solo.cli.preflight._check_endpoint", return_value=None), + patch("slopometry.solo.services.memory_service.MemoryService", return_value=memory_service), + ): + yield + + +class TestPruneMemoriesCli: + def test_prune_memories__returns_early_when_no_active_memories( + self, memory_service: MemoryService, tmp_path: Path + ): + with patch_prune_externals(memory_service): + runner = CliRunner() + result = runner.invoke(cli, ["solo", "prune-memories", "--project-dir", str(tmp_path)]) + assert result.exit_code == 0 + assert "No active memories to audit" in result.output + + def test_prune_memories__returns_early_when_no_transcripts_found( + self, memory_service: MemoryService, tmp_path: Path + ): + memory_service.save_memory(_make_memory("mem-1", content="stale memory", project=str(tmp_path))) + + with ( + patch_prune_externals(memory_service), + patch("slopometry.solo.services.transcript_finder.TranscriptFinder") as mock_tf, + ): + mock_tf.return_value.discover_transcripts.return_value = [] + + runner = CliRunner() + result = runner.invoke(cli, ["solo", "prune-memories", "--project-dir", str(tmp_path)]) + assert result.exit_code == 0 + assert "No transcripts found to audit against" in result.output + + def test_prune_memories__retires_stale_memories_identified_by_llm( + self, memory_service: MemoryService, tmp_path: Path + ): + memory_service.save_memory( + _make_memory("mem-active", content="user prefers dark mode", project=str(tmp_path)) + ) + memory_service.save_memory( + _make_memory("mem-stale", content="describes a fixed bug", project=str(tmp_path)) + ) + + mock_transcript = MagicMock() + mock_transcript.session_id = "s1" + mock_transcript.transcript_path = tmp_path / "transcript.jsonl" + mock_transcript.project_dir = tmp_path + mock_transcript.source = MagicMock(value="claude_code") + + mock_extractor = MagicMock() + mock_extractor.extract_memories_from_transcript.return_value = "transcript showing bug fix" + + staleness_response = MagicMock() + staleness_response.choices = [ + MagicMock(message=MagicMock(content='[{"ref": 1, "reason": "bug was fixed in session"}]')) + ] + + with ( + patch_prune_externals(memory_service), + patch("slopometry.solo.services.transcript_finder.TranscriptFinder") as mock_tf_class, + patch("slopometry.solo.services.memory_extractor.MemoryExtractor", return_value=mock_extractor), + patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai, + ): + mock_tf_class.return_value.discover_transcripts.return_value = [mock_transcript] + mock_tf_class.return_value.find_opencode_storage_root.return_value = tmp_path / "opencode" + mock_openai.return_value.chat.completions.create.return_value = staleness_response + + runner = CliRunner() + result = runner.invoke(cli, ["solo", "prune-memories", "--project-dir", str(tmp_path)]) + assert result.exit_code == 0 + assert "Retired 1 memor" in result.output + + visible = memory_service.get_memories(project_dir=str(tmp_path), limit=100) + assert {m.id for m in visible} == {"mem-active"} + + def test_prune_memories__dry_run_does_not_persist_retirements( + self, memory_service: MemoryService, tmp_path: Path + ): + memory_service.save_memory( + _make_memory("mem-stale", content="describes a fixed bug", project=str(tmp_path)) + ) + + mock_transcript = MagicMock() + mock_transcript.session_id = "s1" + mock_transcript.transcript_path = tmp_path / "transcript.jsonl" + mock_transcript.project_dir = tmp_path + mock_transcript.source = MagicMock(value="claude_code") + + mock_extractor = MagicMock() + mock_extractor.extract_memories_from_transcript.return_value = "transcript showing bug fix" + + staleness_response = MagicMock() + staleness_response.choices = [ + MagicMock(message=MagicMock(content='[{"ref": 1, "reason": "bug was fixed"}]')) + ] + + with ( + patch_prune_externals(memory_service), + patch("slopometry.solo.services.transcript_finder.TranscriptFinder") as mock_tf_class, + patch("slopometry.solo.services.memory_extractor.MemoryExtractor", return_value=mock_extractor), + patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai, + ): + mock_tf_class.return_value.discover_transcripts.return_value = [mock_transcript] + mock_tf_class.return_value.find_opencode_storage_root.return_value = tmp_path / "opencode" + mock_openai.return_value.chat.completions.create.return_value = staleness_response + + runner = CliRunner() + result = runner.invoke( + cli, ["solo", "prune-memories", "--project-dir", str(tmp_path), "--dry-run"] + ) + assert result.exit_code == 0 + assert "--dry-run: would retire 1" in result.output + + all_memories = memory_service.get_memories( + project_dir=str(tmp_path), limit=100, include_superseded=True + ) + assert len(all_memories) == 1 + assert all_memories[0].retired_reason is None + + def test_prune_memories__prints_no_stale_when_llm_returns_empty_array( + self, memory_service: MemoryService, tmp_path: Path + ): + memory_service.save_memory( + _make_memory("mem-active", content="stable preference", project=str(tmp_path)) + ) + + mock_transcript = MagicMock() + mock_transcript.session_id = "s1" + mock_transcript.transcript_path = tmp_path / "transcript.jsonl" + mock_transcript.project_dir = tmp_path + mock_transcript.source = MagicMock(value="claude_code") + + mock_extractor = MagicMock() + mock_extractor.extract_memories_from_transcript.return_value = "transcript about unrelated work" + + staleness_response = MagicMock() + staleness_response.choices = [MagicMock(message=MagicMock(content="[]"))] + + with ( + patch_prune_externals(memory_service), + patch("slopometry.solo.services.transcript_finder.TranscriptFinder") as mock_tf_class, + patch("slopometry.solo.services.memory_extractor.MemoryExtractor", return_value=mock_extractor), + patch("slopometry.solo.services.memory_freshness.OpenAI") as mock_openai, + ): + mock_tf_class.return_value.discover_transcripts.return_value = [mock_transcript] + mock_tf_class.return_value.find_opencode_storage_root.return_value = tmp_path / "opencode" + mock_openai.return_value.chat.completions.create.return_value = staleness_response + + runner = CliRunner() + result = runner.invoke(cli, ["solo", "prune-memories", "--project-dir", str(tmp_path)]) + assert result.exit_code == 0 + assert "No stale memories found" in result.output + + visible = memory_service.get_memories(project_dir=str(tmp_path), limit=100) + assert len(visible) == 1 diff --git a/tests/test_settings.py b/tests/test_settings.py index 5ba1a21..d3d4535 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -8,6 +8,7 @@ import pytest from pydantic import ValidationError +from pydantic_settings import BaseSettings, SettingsConfigDict from slopometry.core.settings import Settings @@ -17,8 +18,6 @@ class TestSettingsOverridePriority: def _create_test_settings(self, global_config_path: Path, local_config_path: Path): """Create a Settings class with custom config paths for testing.""" - from pydantic_settings import BaseSettings, SettingsConfigDict - class TestSettings(BaseSettings): model_config = SettingsConfigDict( env_file=[str(global_config_path), str(local_config_path)],