diff --git a/.dev/status/current-handoff.md b/.dev/status/current-handoff.md index 4697f23..a5fe1a4 100644 --- a/.dev/status/current-handoff.md +++ b/.dev/status/current-handoff.md @@ -1,7 +1,7 @@ # agent-memory current handoff Status: AI-authored draft. Not yet human-approved. -Last updated: 2026-04-30 23:00 KST +Last updated: 2026-05-01 00:20 KST ## Trigger for the next session @@ -16,7 +16,7 @@ read this file first. Do not ask the user to restate context. Verify repo state, ## Ready-to-say answer -지금 agent-memory는 v0.1.33까지 release fallback rerun idempotency와 Hermes v0.1.33 QA까지 끝났고, 현재 진행 중인 통합 slice는 v0.1.34 후보야. 세 가지를 한 PR로 묶어 진행 중이야: published smoke propagation/backoff hardening, release-sync PR CI validation dispatch, read-only relation graph inspect CLI. +agent-memory는 v0.1.34까지 배포/Hermes QA가 완료됐고, 지금은 Priority 5 dogfood/noise monitoring 첫 slice인 v0.1.35 후보 작업 중이야. 현재 브랜치는 `feat/retrieval-observation-log`이고, 목표는 Hermes/CLI retrieval이 어떤 memory를 주입했는지 secret-safe local observation log로 남겨 이후 noisy memory audit의 기반을 만드는 거야. ## Current repo state @@ -32,16 +32,15 @@ Expected GitHub identity: Verified base before this slice: -- branch: `main` -- latest completed release: `v0.1.33` -- v0.1.33 included release-sync fallback rerun idempotency. -- local Hermes hook uses `/Users/reddit/.agent-memory/runtime/v0.1.33/.venv/bin/agent-memory` against `/Users/reddit/.agent-memory/memory.db`. +- latest completed release: `v0.1.34` +- v0.1.34 included published smoke propagation retry/backoff, release-sync PR CI dispatch, and read-only relation graph inspect CLI. +- local Hermes hook uses `/Users/reddit/.agent-memory/runtime/v0.1.34/.venv/bin/agent-memory` against `/Users/reddit/.agent-memory/memory.db`. Active slice/worktree: -- branch: `feat/release-graph-hardening` -- worktree: `/Users/reddit/Project/agent-memory/.worktrees/release-graph-hardening` -- intended release after merge: likely `v0.1.34` +- branch: `feat/retrieval-observation-log` +- worktree: `/Users/reddit/Project/agent-memory/.worktrees/retrieval-observation-log` +- intended release after merge: likely `v0.1.35` Expected local untracked artifacts to preserve in the root checkout: @@ -53,159 +52,107 @@ Expected local untracked artifacts to preserve in the root checkout: Do not delete or commit these unless the user explicitly asks. -## What is complete through v0.1.33 +## What is complete through v0.1.34 ### Distribution and release automation - npm package and PyPI package are published from the same versioned source. - npm-first user install path is documented and verified. - Publish workflow gates GitHub Release creation on `published-install-smoke` after npm/PyPI publish. -- Published smoke uploads `published-install-smoke-result` JSON artifact with success/failure diagnostics. -- v0.1.28+ smoke covers npm/npx/npm-exec/uvx/pipx and Hermes hook stdin payload handling. -- Protected `main` fallback is automated: auto-release creates `release-sync/vX.Y.Z` PR when direct metadata write-back is rejected; after merge, auto-release tags and dispatches publish. -- v0.1.33 made that fallback safe to rerun when the branch or PR already exists. +- Published smoke uploads JSON diagnostics artifacts. +- v0.1.34 distinguishes normal retry budget from propagation/transient resolver failure budget and adds registry probe diagnostics. +- Protected `main` fallback is automated and rerun-idempotent. +- release-sync fallback now dispatches `ci.yml` on the bot-created release-sync branch and comments/step-summarizes that handoff. ### Runtime adapter readiness - Hermes bootstrap/doctor/install flow exists and defaults to the conservative preset. -- This local Hermes setup has agent-memory enabled via `/Users/reddit/.agent-memory/runtime/v0.1.33/.venv/bin/agent-memory` against `/Users/reddit/.agent-memory/memory.db`. +- This local Hermes setup has agent-memory enabled via `/Users/reddit/.agent-memory/runtime/v0.1.34/.venv/bin/agent-memory`. - Hermes hook fails closed: unavailable DB/schema returns `{}` and exit 0 instead of breaking prompt flow. - Conservative preset remains default: small prompt budgets, one top memory, no alternative-memory detail, no reason-code noise. - `--preset balanced` is explicit opt-in for more context/noise. -### Truth lifecycle and eval readiness +### Truth lifecycle, eval, and graph foundation - Normal retrieval is approved-only by default. - Candidate/disputed/deprecated facts remain available only behind explicit forensic/review surfaces. -- `memory_status_transitions` records status changes with from/to status, reason, actor, evidence IDs, and timestamp. -- `agent-memory review history fact|procedure|episode ...` exposes transition history. -- `agent-memory review supersede fact ` records fact replacement as a relation edge. -- Replacement relation direction: `fact: --superseded_by--> fact:`. -- Superseding a fact deprecates the old fact and approves the replacement fact, preserving reason/actor/evidence in transition history. -- `agent-memory review replacements fact ...` exposes replacement chains. -- `agent-memory review explain fact ...` explains status, default retrieval visibility, same claim-slot alternatives, replacement chain, and review follow-up commands. -- Retrieval eval calls the real retrieval path but suppresses retrieval bookkeeping writes (`retrieval_count`, `reinforcement_count`, `last_accessed_at`). +- `memory_status_transitions` records status changes. +- `review history`, `review supersede`, `review replacements`, and `review explain` exist. +- Retrieval eval calls the real retrieval path but suppresses retrieval bookkeeping writes. +- `agent-memory graph inspect --depth N --limit N` traverses stored `Relation` edges read-only and does not mutate memory state. -## Current slice: release/package/graph hardening +## Current slice: local retrieval observation log -User asked to do all three next recommended tasks: +Goal: -1. Published smoke propagation/backoff improvement. -2. release-sync PR CI dispatch/status automation. -3. Graph foundation first safe slice: read-only relation graph inspect CLI. +- Build a local-only, secret-safe observation log that records what retrieval injected during real dogfood use. +- This is the first Priority 5 dogfood/noise monitoring slice and should feed later noisy-memory audit commands. -Current implementation direction: +Implemented so far: -### Published smoke propagation/backoff +- New SQLite table `retrieval_observations`. +- New model `RetrievalObservation`. +- New storage APIs: + - `record_retrieval_observation(...)` + - `list_retrieval_observations(...)` +- `retrieve_memory_packet(...)` accepts: + - `observation_surface` + - `observation_metadata` +- `agent-memory retrieve ... --observe ` records an opt-in observation. +- Hermes pre-LLM hook records an observation automatically with surface `hermes-pre-llm-hook`. +- New CLI: + - `agent-memory observations list --limit 50` -Files: +Secret-safety contract: -- `scripts/smoke_published_install.py` -- `tests/test_published_install_smoke.py` -- `.github/workflows/publish.yml` -- `.github/workflows/published-install-smoke.yml` +- raw query text is not stored. +- stores `query_sha256` and a short redacted preview. +- redacts secret-like assignments such as password/token/api_key/secret/credential/connection_string. +- stores selected memory refs, top memory ref, response mode, statuses, preferred scope, and small metadata. -Behavior: +Files changed: -- Detect resolver/package-index propagation-like failures such as `No solution found`, `No matching distribution found`, npm 404/ETARGET/NOTARGET, and exact `cafitac-agent-memory==X.Y.Z` misses. -- Apply a separate longer retry budget only for propagation-like failures: - - normal attempts remain bounded - - propagation attempts can extend with exponential backoff -- Failure artifacts include registry probe diagnostics: - - npm version present/latest - - PyPI JSON release present - - PyPI simple index mentions version - - probe errors -- `publish.yml` uses `--attempts 12`, `--propagation-attempts 36`, `--propagation-delay-seconds 20`. -- Manual `published-install-smoke.yml` exposes propagation attempt/delay inputs. - -### release-sync PR CI validation dispatch - -Files: - -- `.github/workflows/auto-release.yml` -- `tests/test_release_workflows.py` - -Behavior: - -- When fallback creates a new `release-sync/vX.Y.Z` PR, capture the PR URL. -- Dispatch `ci.yml` explicitly on `release-sync/vX.Y.Z` with `gh workflow run ci.yml --ref "${RELEASE_SYNC_BRANCH}"`. -- Comment on the PR explaining that bot-created refs may suppress automatic PR checks and that maintainers should wait for the dispatched `ci.yml` run before merging. - -### read-only relation graph inspect CLI - -Files: - -- `src/agent_memory/api/cli.py` +- `src/agent_memory/core/models.py` +- `src/agent_memory/storage/schema.sql` - `src/agent_memory/storage/sqlite.py` +- `src/agent_memory/core/retrieval.py` +- `src/agent_memory/integrations/hermes_hooks.py` +- `src/agent_memory/api/cli.py` - `tests/test_cli.py` - `README.md` +- `docs/hermes-dogfood.md` +- `.dev/status/current-handoff.md` -New command: - -```bash -agent-memory graph inspect --depth 1 --limit 100 -``` - -Example: +Current focused verification already passed: ```bash -agent-memory graph inspect ~/.agent-memory/memory.db fact:1 --depth 2 --limit 50 -``` - -Behavior: - -- Traverses stored `Relation` edges only. -- JSON output includes: - - `kind: relation_graph_inspection` - - `start_ref` - - `depth` - - `limit` - - `read_only: true` - - `nodes` - - `edges` - - `truncated` -- Does not change retrieval behavior. -- Does not mutate memory state. -- Intended as the first safe graph-foundation slice before default retrieval graph traversal. +uv run pytest tests/test_cli.py::test_python_module_cli_retrieve_observe_records_secret_safe_local_observation tests/test_cli.py::test_python_module_cli_hermes_pre_llm_hook_outputs_context_for_hermes_shell_hook_payload -q +# 2 passed -## Verification checklist for this slice - -Run from the active worktree: - -```bash -uv run pytest tests/test_published_install_smoke.py -q -uv run pytest tests/test_release_workflows.py -q -uv run pytest tests/test_cli.py::test_python_module_cli_graph_inspect_returns_read_only_relation_neighborhood -q -uv run pytest tests/test_published_install_smoke.py tests/test_release_workflows.py tests/test_cli.py -q -uv run pytest tests/ -q -uv run python scripts/check_release_metadata.py -uv run python scripts/smoke_release_readiness.py -npm pack --dry-run -git diff --check -node --check bin/agent-memory.js +uv run pytest tests/test_cli.py tests/test_retrieval_evaluation.py -q +# 83 passed ``` -Before PR, run a static diff secret scan and confirm finding_count 0. - -## PR/release notes - -This slice affects release automation, published install smoke, and a new read-only CLI command. Treat it as a patch release candidate, likely v0.1.34 after PR merge. - -Expected live verification after merge: - -1. PR merge should trigger auto-release and bump metadata to v0.1.34. -2. Protected `main` should trigger fallback. -3. Fallback should create `release-sync/v0.1.34` PR and dispatch `ci.yml` on that branch. -4. Wait for the dispatched CI run before merging release-sync PR. -5. Merge the release-sync PR. -6. Confirm release-sync follow-up creates tag `v0.1.34`, dispatches publish, and published smoke passes. -7. Verify GitHub Release/npm/PyPI/published-install-smoke artifact. -8. Update local Hermes runtime to v0.1.34 only after package release is verified. - -## Next likely slices after this - -1. Actual Hermes dogfood observations and noise/latency notes. -2. Expand graph inspection with node metadata/status summaries, still read-only. -3. Later graph retrieval eval fixtures before any default graph expansion. -4. PyPI Trusted Publisher later; user deferred it. +## Remaining work for this slice + +1. Run real smoke for observation CLI and Hermes hook from the worktree. +2. Run full verification: + ```bash + uv run pytest tests/ -q + uv run python scripts/check_release_metadata.py + uv run python scripts/smoke_release_readiness.py + npm pack --dry-run + git diff --check + node --check bin/agent-memory.js + ``` +3. Run static diff secret scan and confirm finding_count 0. +4. Commit branch and open PR. +5. Watch PR CI, merge when green. +6. Verify auto-release/release-sync/publish for likely v0.1.35. +7. Verify GitHub Release/npm/PyPI/published smoke artifact. +8. Install pinned Hermes runtime v0.1.35 and run Hermes QA. +9. Cleanup worktree/branch and update durable memory. + +## Next likely slice after this + +After observation logging is released and dogfooded, build a read-only noisy memory audit command over `retrieval_observations`, for example frequently injected memory refs, surprising scopes, high hidden-alternative counts, and stale/deprecated-nearby risks. diff --git a/README.md b/README.md index 2ec02a6..fb83e8f 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,15 @@ agent-memory graph inspect "$DB" fact:1 --depth 2 --limit 50 The JSON output includes the start ref, visited node refs, relation edges, traversal depth per edge, and a `read_only: true` marker. It is intended as a safe graph-foundation slice before enabling any broader graph traversal in default retrieval. +For local dogfood and noise monitoring, retrievals can leave a secret-safe observation log. Normal `retrieve` only records an observation when explicitly asked; the Hermes pre-LLM hook records one automatically in the local SQLite DB. Observations store a query hash, a redacted short preview, selected memory refs, top memory ref, response mode, scope, and surface. They do not store the raw query text. + +```bash +agent-memory retrieve "$DB" "How should I install agent-memory?" --preferred-scope user:default --observe cli +agent-memory observations list "$DB" --limit 20 +``` + +Use the observation log to spot frequently injected or surprising memories before changing retrieval behavior. Treat it as local operator telemetry, not a synced analytics stream. + ## Hermes quickstart For most Hermes users: diff --git a/docs/hermes-dogfood.md b/docs/hermes-dogfood.md index a5a6361..55f842d 100644 --- a/docs/hermes-dogfood.md +++ b/docs/hermes-dogfood.md @@ -36,8 +36,19 @@ Capture these observations for each dogfood run: - whether returned context includes only approved memory - whether unrelated scopes stay out of the prompt - whether failure paths fail closed with no broken prompt text +- whether `agent-memory observations list ~/.agent-memory/memory.db --limit 20` shows the expected memory refs without raw query text or secrets -A good conservative smoke has low latency, at most one surfaced memory, no noisy reason codes, and no workflow-blocking error if the memory DB is missing. +A good conservative smoke has low latency, at most one surfaced memory, no noisy reason codes, no workflow-blocking error if the memory DB is missing, and a local observation entry that explains what memory was injected. + +## Local observation log + +Hermes pre-LLM hook retrievals write a secret-safe local observation row to the SQLite DB. The row is intended for dogfood/noise review and stores the surface, query hash, redacted query preview, selected memory refs, top memory ref, response mode, scope, and small metadata. It does not store the raw query text. + +```bash +agent-memory observations list ~/.agent-memory/memory.db --limit 20 +``` + +Use this before tuning ranking or adding broader graph traversal: first confirm which memories are frequently injected, which scopes are active, and whether the top memory is surprising. Keep this data local unless you intentionally export it. ## Fallback and rollback diff --git a/src/agent_memory/api/cli.py b/src/agent_memory/api/cli.py index d23fd40..f8da927 100644 --- a/src/agent_memory/api/cli.py +++ b/src/agent_memory/api/cli.py @@ -50,6 +50,7 @@ list_facts_by_claim_slot, list_memory_status_history, list_relations_for_node, + list_retrieval_observations, ) @@ -416,6 +417,17 @@ def _build_parser() -> argparse.ArgumentParser: default="approved", help="Memory status to retrieve. Defaults to approved; use all for forensic/debug review.", ) + retrieve_parser.add_argument( + "--observe", + metavar="SURFACE", + help="Record a secret-safe local retrieval observation for this query.", + ) + + observations_parser = subparsers.add_parser("observations") + observations_subparsers = observations_parser.add_subparsers(dest="observations_action", required=True) + observations_list_parser = observations_subparsers.add_parser("list") + observations_list_parser.add_argument("db_path", type=Path) + observations_list_parser.add_argument("--limit", type=int, default=50) graph_parser = subparsers.add_parser("graph") graph_subparsers = graph_parser.add_subparsers(dest="graph_action", required=True) @@ -815,10 +827,27 @@ def main() -> None: limit=args.limit, preferred_scope=args.preferred_scope, statuses=statuses, + observation_surface=args.observe, ) print(packet.model_dump_json(indent=2)) return + if args.command == "observations": + if args.observations_action == "list": + observations = list_retrieval_observations(args.db_path, limit=args.limit) + print( + json.dumps( + { + "kind": "retrieval_observations", + "read_only": True, + "observations": [observation.model_dump(mode="json") for observation in observations], + }, + indent=2, + ) + ) + return + raise ValueError(f"Unsupported observations action: {args.observations_action}") + if args.command == "graph": if args.graph_action == "inspect": print( diff --git a/src/agent_memory/core/models.py b/src/agent_memory/core/models.py index 8b368bb..ea4b0f4 100644 --- a/src/agent_memory/core/models.py +++ b/src/agent_memory/core/models.py @@ -83,6 +83,22 @@ class MemoryStatusTransition(BaseModel): created_at: str +class RetrievalObservation(BaseModel): + id: int + created_at: str + surface: str + query_sha256: str + query_text: None = None + query_preview: str | None = None + preferred_scope: str | None = None + limit: int + statuses: list[MemoryStatus] = Field(default_factory=list) + retrieved_memory_refs: list[str] = Field(default_factory=list) + top_memory_ref: str | None = None + response_mode: Literal["direct", "cautious", "verify_first"] | None = None + metadata: dict[str, Any] = Field(default_factory=dict) + + class ProvenanceSummary(BaseModel): source_id: int source_type: str diff --git a/src/agent_memory/core/retrieval.py b/src/agent_memory/core/retrieval.py index 158914a..ae1d683 100644 --- a/src/agent_memory/core/retrieval.py +++ b/src/agent_memory/core/retrieval.py @@ -15,6 +15,7 @@ from agent_memory.storage.sqlite import ( get_source_records_by_ids, record_memory_retrieval, + record_retrieval_observation, search_ranked_approved_episodes, search_ranked_approved_facts, search_ranked_approved_procedures, @@ -289,6 +290,8 @@ def retrieve_memory_packet( preferred_scope: str | None = None, statuses: tuple[MemoryStatus, ...] = ("approved",), record_retrievals: bool = True, + observation_surface: str | None = None, + observation_metadata: dict[str, object] | None = None, ) -> MemoryPacket: if statuses == ("approved",): ranked_facts = search_ranked_approved_facts( @@ -432,6 +435,22 @@ def retrieve_memory_packet( if episode.status == "approved": record_memory_retrieval(db_path, memory_type="episode", memory_id=episode.id) + if observation_surface: + try: + record_retrieval_observation( + db_path, + surface=observation_surface, + query=query, + preferred_scope=preferred_scope, + limit=limit, + statuses=statuses, + retrieval_trace=retrieval_trace, + response_mode=decision_summary.recommended_answer_mode if decision_summary is not None else None, + metadata=observation_metadata, + ) + except Exception: + pass + return MemoryPacket( query=query, working_hints=working_hints, diff --git a/src/agent_memory/integrations/hermes_hooks.py b/src/agent_memory/integrations/hermes_hooks.py index 4c8b9f5..4926efb 100644 --- a/src/agent_memory/integrations/hermes_hooks.py +++ b/src/agent_memory/integrations/hermes_hooks.py @@ -375,6 +375,8 @@ def build_pre_llm_hook_context( query=user_message, limit=options.limit, preferred_scope=effective_preferred_scope, + observation_surface="hermes-pre-llm-hook", + observation_metadata={"hook_event_name": payload.hook_event_name}, ) context = prepare_hermes_memory_context( packet, diff --git a/src/agent_memory/storage/schema.sql b/src/agent_memory/storage/schema.sql index cf699eb..63dd0b4 100644 --- a/src/agent_memory/storage/schema.sql +++ b/src/agent_memory/storage/schema.sql @@ -94,7 +94,24 @@ CREATE TABLE IF NOT EXISTS memory_status_transitions ( created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP ); +CREATE TABLE IF NOT EXISTS retrieval_observations ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, + surface TEXT NOT NULL, + query_sha256 TEXT NOT NULL, + query_preview TEXT, + preferred_scope TEXT, + limit_value INTEGER NOT NULL, + statuses_json TEXT NOT NULL DEFAULT '["approved"]', + retrieved_memory_refs_json TEXT NOT NULL DEFAULT '[]', + top_memory_ref TEXT, + response_mode TEXT CHECK (response_mode IN ('direct', 'cautious', 'verify_first')), + metadata_json TEXT NOT NULL DEFAULT '{}' +); + CREATE INDEX IF NOT EXISTS idx_memory_status_transitions_memory ON memory_status_transitions(memory_type, memory_id, id); +CREATE INDEX IF NOT EXISTS idx_retrieval_observations_created_at ON retrieval_observations(created_at, id); +CREATE INDEX IF NOT EXISTS idx_retrieval_observations_surface ON retrieval_observations(surface, created_at); CREATE INDEX IF NOT EXISTS idx_facts_status_scope ON facts(status, scope); CREATE INDEX IF NOT EXISTS idx_facts_subject ON facts(subject_ref); diff --git a/src/agent_memory/storage/sqlite.py b/src/agent_memory/storage/sqlite.py index c08633e..3cb111a 100644 --- a/src/agent_memory/storage/sqlite.py +++ b/src/agent_memory/storage/sqlite.py @@ -1,7 +1,9 @@ from __future__ import annotations +import hashlib import json import math +import re import sqlite3 from datetime import datetime from importlib.resources import files @@ -15,6 +17,7 @@ MemoryStatusTransition, Procedure, Relation, + RetrievalObservation, RetrievalTraceEntry, SourceRecord, ) @@ -108,6 +111,30 @@ def initialize_database(db_path: Path | str) -> None: connection.execute( "CREATE INDEX IF NOT EXISTS idx_episodes_status_scope_importance ON episodes(status, scope, importance_score)" ) + connection.execute( + """ + CREATE TABLE IF NOT EXISTS retrieval_observations ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, + surface TEXT NOT NULL, + query_sha256 TEXT NOT NULL, + query_preview TEXT, + preferred_scope TEXT, + limit_value INTEGER NOT NULL, + statuses_json TEXT NOT NULL DEFAULT '["approved"]', + retrieved_memory_refs_json TEXT NOT NULL DEFAULT '[]', + top_memory_ref TEXT, + response_mode TEXT CHECK (response_mode IN ('direct', 'cautious', 'verify_first')), + metadata_json TEXT NOT NULL DEFAULT '{}' + ) + """ + ) + connection.execute( + "CREATE INDEX IF NOT EXISTS idx_retrieval_observations_created_at ON retrieval_observations(created_at, id)" + ) + connection.execute( + "CREATE INDEX IF NOT EXISTS idx_retrieval_observations_surface ON retrieval_observations(surface, created_at)" + ) def _ensure_memory_table_columns( @@ -767,6 +794,88 @@ def record_memory_retrieval( ) +_SECRET_ASSIGNMENT_PATTERN = re.compile(r"(?i)\b(password|passwd|pwd|token|api[_-]?key|secret|credential|connection[_-]?string)\s*[:=]\s*\S+") +_BEARER_PATTERN = re.compile(r"(?i)\bbearer\s+[A-Za-z0-9._~+\-/=]+") +_LONG_TOKEN_PATTERN = re.compile(r"\b[A-Za-z0-9_\-]{24,}\b") + + +def _redacted_query_preview(query: str, *, max_chars: int = 120) -> str: + preview = _SECRET_ASSIGNMENT_PATTERN.sub(lambda match: f"{match.group(1)}=[REDACTED]", query) + preview = _BEARER_PATTERN.sub("Bearer [REDACTED]", preview) + preview = _LONG_TOKEN_PATTERN.sub("[REDACTED]", preview) + preview = " ".join(preview.split()) + if len(preview) > max_chars: + return f"{preview[: max_chars - 1]}…" + return preview + + +def _memory_ref(memory_type: str, memory_id: int) -> str: + return f"{memory_type}:{memory_id}" + + +def record_retrieval_observation( + db_path: Path | str, + *, + surface: str, + query: str, + preferred_scope: str | None, + limit: int, + statuses: tuple[MemoryStatus, ...], + retrieval_trace: list[RetrievalTraceEntry], + response_mode: str | None, + metadata: dict[str, Any] | None = None, +) -> RetrievalObservation: + retrieved_memory_refs = [_memory_ref(trace.memory_type, trace.memory_id) for trace in retrieval_trace] + top_memory_ref = retrieved_memory_refs[0] if retrieved_memory_refs else None + query_sha256 = hashlib.sha256(query.encode("utf-8")).hexdigest() + with connect(db_path) as connection: + cursor = connection.execute( + """ + INSERT INTO retrieval_observations ( + surface, + query_sha256, + query_preview, + preferred_scope, + limit_value, + statuses_json, + retrieved_memory_refs_json, + top_memory_ref, + response_mode, + metadata_json + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + surface, + query_sha256, + _redacted_query_preview(query), + preferred_scope, + limit, + json.dumps(list(statuses)), + json.dumps(retrieved_memory_refs), + top_memory_ref, + response_mode, + json.dumps(metadata or {}, sort_keys=True), + ), + ) + row = connection.execute("SELECT * FROM retrieval_observations WHERE id = ?", (cursor.lastrowid,)).fetchone() + return retrieval_observation_from_row(row) + + +def list_retrieval_observations(db_path: Path | str, *, limit: int = 50) -> list[RetrievalObservation]: + with connect(db_path) as connection: + rows = connection.execute( + """ + SELECT * + FROM retrieval_observations + ORDER BY id DESC + LIMIT ? + """, + (limit,), + ).fetchall() + return [retrieval_observation_from_row(row) for row in rows] + + def _search_model_rows_with_trace( db_path: Path | str, *, @@ -1161,6 +1270,23 @@ def memory_status_transition_from_row(row: sqlite3.Row) -> MemoryStatusTransitio ) +def retrieval_observation_from_row(row: sqlite3.Row) -> RetrievalObservation: + return RetrievalObservation( + id=row["id"], + created_at=row["created_at"], + surface=row["surface"], + query_sha256=row["query_sha256"], + query_preview=row["query_preview"], + preferred_scope=row["preferred_scope"], + limit=row["limit_value"], + statuses=json.loads(row["statuses_json"]), + retrieved_memory_refs=json.loads(row["retrieved_memory_refs_json"]), + top_memory_ref=row["top_memory_ref"], + response_mode=row["response_mode"], + metadata=json.loads(row["metadata_json"]), + ) + + def relation_from_row(row: sqlite3.Row) -> Relation: return Relation( id=row["id"], diff --git a/tests/test_cli.py b/tests/test_cli.py index 5d3da27..b627bfd 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -97,6 +97,75 @@ def test_python_module_cli_init_creates_database(tmp_path: Path) -> None: +def test_python_module_cli_retrieve_observe_records_secret_safe_local_observation(tmp_path: Path) -> None: + db_path = tmp_path / "retrieve-observation.db" + initialize_database(db_path) + source = ingest_source_text( + db_path=db_path, + source_type="transcript", + content="Observation smoke target phrase appears in curated memory records.", + metadata={"project": "observation-smoke"}, + ) + fact = create_candidate_fact( + db_path=db_path, + subject_ref="Observation smoke", + predicate="target_phrase", + object_ref_or_value="OBSERVATION_OK", + evidence_ids=[source.id], + scope="project:observation-smoke", + confidence=0.95, + ) + approve_fact(db_path=db_path, fact_id=fact.id) + + secret_query = "What is the target phrase? password=SUPERSECRET token=abc123" + env = {**os.environ, "PYTHONPATH": "src"} + retrieve_result = subprocess.run( + [ + sys.executable, + "-m", + "agent_memory.api.cli", + "retrieve", + str(db_path), + secret_query, + "--preferred-scope", + "project:observation-smoke", + "--observe", + "cli-test", + ], + cwd=Path(__file__).resolve().parents[1], + env=env, + capture_output=True, + text=True, + ) + assert retrieve_result.returncode == 0, retrieve_result.stderr + + list_result = subprocess.run( + [ + sys.executable, + "-m", + "agent_memory.api.cli", + "observations", + "list", + str(db_path), + ], + cwd=Path(__file__).resolve().parents[1], + env=env, + capture_output=True, + text=True, + ) + + assert list_result.returncode == 0, list_result.stderr + payload = json.loads(list_result.stdout) + assert payload["kind"] == "retrieval_observations" + assert payload["observations"][0]["surface"] == "cli-test" + assert payload["observations"][0]["query_sha256"] + assert payload["observations"][0]["query_text"] is None + assert payload["observations"][0]["retrieved_memory_refs"] == [f"fact:{fact.id}"] + assert payload["observations"][0]["top_memory_ref"] == f"fact:{fact.id}" + assert "SUPERSECRET" not in list_result.stdout + assert "abc123" not in list_result.stdout + + def test_python_module_cli_retrieve_defaults_to_approved_and_hides_disputed_content(tmp_path: Path) -> None: db_path = tmp_path / "retrieve-approved-only.db" initialize_database(db_path) @@ -1019,6 +1088,27 @@ def test_python_module_cli_hermes_pre_llm_hook_outputs_context_for_hermes_shell_ assert "HH-###" not in hook_response["context"] # compact target context, not raw fact dump assert "Reason codes:" not in hook_response["context"] + observations_result = subprocess.run( + [ + sys.executable, + "-m", + "agent_memory.api.cli", + "observations", + "list", + str(db_path), + ], + cwd=Path(__file__).resolve().parents[1], + env=env, + capture_output=True, + text=True, + ) + assert observations_result.returncode == 0, observations_result.stderr + observations_payload = json.loads(observations_result.stdout) + observation = observations_payload["observations"][0] + assert observation["surface"] == "hermes-pre-llm-hook" + assert observation["retrieved_memory_refs"] == [f"fact:{fact.id}"] + assert observation["metadata"] == {"hook_event_name": "pre_llm_call"} + def test_python_module_cli_hermes_pre_llm_hook_derives_path_scope_from_payload_cwd(