Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
6afd62b
fix(redaction): scope long_hex_secret to credential context (#136)
tconnally-sam Jun 3, 2026
8d636b0
fix(audit,query): redact secrets in audit log fields and @query error…
tconnally-sam Jun 3, 2026
81f54b0
fix(mneme): auto-migrate legacy MD5 narrative files to SHA-256 paths …
tconnally-sam Jun 3, 2026
50dd9cf
fix(memory): enforce wall-clock deadline on compact LLM path (#131)
tconnally-sam Jun 3, 2026
0d9396d
fix(mcp,query): kill subprocess tree on _call_tool timeout (#139)
tconnally-sam Jun 3, 2026
7494a80
fix(config): make trust profile / user override layering structural (…
tconnally-sam Jun 3, 2026
524f4a1
fix(renderer): make parallel_queries pre-scan control-flow aware (#165)
tconnally-sam Jun 3, 2026
a1eda24
Merge branch 'main' into fix/136-long-hex-secret-redaction
tcconnally Jun 4, 2026
edda2b5
Merge branch 'main' into fix/136-long-hex-secret-redaction
tcconnally Jun 4, 2026
9acb989
fix(mcp): apply redaction to all _call_tool return paths (#166)
tconnally-sam Jun 3, 2026
67c8a00
Merge PR #159: fix(redaction): scope long_hex_secret to credential co…
Jun 5, 2026
ba6b528
Merge PR #164: fix(config): make trust profile / user override layeri…
Jun 5, 2026
db68d6d
Merge PR #161: #128 — Mneme MD5→SHA256 migration
Jun 5, 2026
1b2e877
Merge PR #162: #131 — memory compact wall-clock timeout
Jun 5, 2026
719ac87
Merge PR #163: #139 — MCP subprocess tree kill on timeout
Jun 5, 2026
7040858
Merge PR #160: #137 — audit log secret redaction
Jun 5, 2026
ead32c0
Merge PR #170: #165 — parallel_queries control-flow aware
Jun 5, 2026
f32b510
Merge PR #171: #166 — MCP tool response redaction
Jun 5, 2026
3697807
fix: remove embedded conflict markers from PR #171 branch
Jun 5, 2026
25c6f12
fix(mcp): apply PR #163 subprocess-tree timeout fix to mcp.py source
Jun 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,322 changes: 1,277 additions & 45 deletions CHANGELOG.md

Large diffs are not rendered by default.

932 changes: 842 additions & 90 deletions perseus.py

Large diffs are not rendered by default.

138 changes: 137 additions & 1 deletion src/perseus/agora.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,72 @@ def _memory_do_compact(workspace: Path, cfg: dict, provider: str | None) -> str:
fm = _mneme_default_frontmatter(workspace)

if provider:
new_body = _mneme_compact_llm(all_checkpoints, all_pythia, workspace, cfg, provider)
# Regression for #131 — pre-1.0.6, _mneme_compact_llm() called run_llm()
# which only enforced `llm.timeout_s` (default 30s) on the HTTP request
# itself. With streaming-token providers like Ollama serving a large
# model, individual tokens can arrive within timeout but total wall
# time was unbounded — operators reported `memory compact` hanging
# for hours.
#
# We now wrap the LLM call in a wall-clock deadline (memory.
# compact_total_timeout_s, default 180s). On timeout we abandon the
# LLM future and fall back to deterministic narrative — operators get
# SOME narrative, plus a clear stderr signal so they can decide
# whether to upgrade their LLM setup or stay deterministic.
#
# Limitation: ThreadPoolExecutor cannot truly kill the worker thread
# (Python provides no public API for that). The in-flight HTTP
# request continues until urllib's per-request timeout fires.
# Worst-case observed total wait is therefore
# `compact_total_timeout_s + llm.timeout_s`. The leaked thread is
# daemonized by Python's default ThreadPoolExecutor settings; it
# will not prevent process exit.
total_timeout = float(cfg.get("memory", {}).get(
"compact_total_timeout_s", 180.0
))
try:
import concurrent.futures as _cf
executor = _cf.ThreadPoolExecutor(
max_workers=1, thread_name_prefix="mneme-compact-llm",
)
try:
fut = executor.submit(
_mneme_compact_llm,
all_checkpoints, all_pythia, workspace, cfg, provider,
)
new_body = fut.result(timeout=total_timeout)
finally:
# Don't block on the worker — it may still be waiting on
# urllib. The thread is daemonic and will not block exit.
executor.shutdown(wait=False, cancel_futures=True)
except _cf.TimeoutError:
sys.stderr.write(
f"> ⚠ Mnēmē compact: LLM provider {provider!r} exceeded "
f"compact_total_timeout_s={total_timeout:.0f}s; "
f"falling back to deterministic narrative.\n"
)
try:
audit_event(
cfg, "memory_compact_timeout",
provider=provider,
total_timeout_s=total_timeout,
workspace_hash=_workspace_hash(workspace),
)
except Exception:
pass
new_body = _deterministic_narrative(
all_checkpoints, all_pythia, "", workspace, cfg,
)
except Exception as exc:
# LLM call raised (model server unreachable, payload error, etc.)
# — surface the failure but still produce SOMETHING usable.
sys.stderr.write(
f"> ⚠ Mnēmē compact: LLM provider {provider!r} failed "
f"({exc}); falling back to deterministic narrative.\n"
)
new_body = _deterministic_narrative(
all_checkpoints, all_pythia, "", workspace, cfg,
)
else:
new_body = _deterministic_narrative(all_checkpoints, all_pythia, "", workspace, cfg)

Expand Down Expand Up @@ -269,9 +334,80 @@ def cmd_memory(args, cfg):
_cmd_memory_index(args, cfg)
return

if sub == "doctor":
cmd_memory_doctor(args, cfg)
return

print(f"perseus memory: unknown subcommand '{sub}'.", file=sys.stderr)
sys.exit(2)


def cmd_memory_doctor(args, cfg) -> None:
"""Mnēmē doctor — scan and optionally migrate legacy MD5-named narratives.

Regression for #128: pre-1.0.3 narratives are named after an MD5 hash of
the workspace path; v1.0.3+ uses SHA-256. _mneme_path() auto-migrates on
first access, but that requires the operator to actually open the
workspace. ``memory doctor`` lets an operator scan and migrate all
workspaces at once, and surface diagnostic info for files that can't be
auto-migrated (e.g. missing frontmatter, cross-device renames).
"""
do_migrate = bool(getattr(args, "migrate", False))
use_json = bool(getattr(args, "json", False))
scan = _mneme_doctor_scan(cfg)

if do_migrate:
result = _mneme_doctor_migrate(cfg)
if use_json:
import json as _json
print(_json.dumps({"scan_before": scan, "migrate": result}, indent=2))
return
print(f"Mnēmē doctor — store: {scan['store']}")
print(f" Narrative files: {len(scan['narrative_files'])}")
print(f" Legacy MD5 found: {len(scan['legacy_md5_files'])}")
print(f" Migrated: {len(result['migrated'])}")
for old, new in result["migrated"]:
print(f" ✓ {Path(old).name} → {Path(new).name}")
if result["skipped"]:
print(f" Skipped: {len(result['skipped'])}")
for old, new, reason in result["skipped"]:
print(f" ⚠ {Path(old).name}: {reason}")
if result["errors"]:
print(f" Errors: {len(result['errors'])}")
for old, exc_str in result["errors"]:
print(f" ✗ {Path(old).name}: {exc_str}")
return

# Read-only scan
if use_json:
import json as _json
print(_json.dumps(scan, indent=2))
return
print(f"Mnēmē doctor — store: {scan['store']}")
print(f" Narrative files: {len(scan['narrative_files'])}")
print(f" SHA-256 (current):{len(scan['sha256_files'])}")
print(f" Legacy MD5: {len(scan['legacy_md5_files'])}")
print(f" Orphan: {len(scan['orphan_files'])}")
print(f" Unknown stems: {len(scan['unknown_files'])}")
if scan["legacy_md5_files"]:
print()
print("Legacy MD5-named narratives detected. Run:")
print(" perseus memory doctor --migrate")
print("to rename them to their SHA-256 paths in place. Operation is")
print("idempotent and uses atomic os.replace.")
if scan["orphan_files"]:
print()
print("⚠ Orphan files (frontmatter workspace doesn't match filename):")
for fp in scan["orphan_files"]:
print(f" - {fp}")
print("These were likely written under a different store, OR the")
print("workspace path moved. Review manually before deleting.")
if scan["unknown_files"]:
print()
print("Files with non-standard names (skipped by Mnēmē):")
for fp in scan["unknown_files"]:
print(f" - {fp}")

def _memory_federation_diagnostic(name: str, args_str: str, cfg: dict, workspace: object) -> list[dict]:
"""Per-directive LSP diagnostic for @memory: warn on unsubscribed federation alias.

Expand Down
118 changes: 113 additions & 5 deletions src/perseus/audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,63 @@ def _audit_rotate_if_needed(path: Path, max_bytes: int) -> None:
return


# Audit field names that NEVER get redacted (they are structural metadata,
# never user-supplied secrets). Adding to this allowlist is a security
# decision — review carefully.
_AUDIT_NEVER_REDACT_KEYS = frozenset({
"ts", "event_type", "perseus_version", "pid",
"directive", "exit_code", "duration_ms", "bytes_in", "bytes_out",
"schema_ref", "schema_ok", "policy", "decision", "trust_profile",
"permission", "session_id", "workspace_hash",
})


def _audit_redact_value(value, cfg):
"""Apply render-time redaction rules to an audit field value.

Regression for #137: pre-1.0.6, `audit_event` wrote field values verbatim
to ``audit_log.jsonl``. When a user wrote
``@query "curl -H 'Authorization: Bearer ghp_…'"``, the rendered output
was correctly redacted, but the audit log retained the raw bearer token
forever. We now pipe every string-shaped audit field through
``redact_text`` before writing.

Lists, dicts, and nested structures are walked recursively. Non-string
leaves (ints, bools, None) pass through. If ``redact_text`` is unavailable
or raises (older builds, malformed rules), we fall back to the raw value
rather than dropping the audit entry — observability beats perfect
redaction here, and rendered output is the primary defense.
"""
if value is None or isinstance(value, (bool, int, float)):
return value
if isinstance(value, str):
try:
redacted, _ = redact_text(value, cfg)
return redacted
except Exception:
return value
if isinstance(value, dict):
return {k: _audit_redact_value(v, cfg) for k, v in value.items()}
if isinstance(value, (list, tuple)):
return [_audit_redact_value(v, cfg) for v in value]
# Bytes, sets, custom objects — stringify then redact.
try:
as_str = str(value)
redacted, _ = redact_text(as_str, cfg)
return redacted
except Exception:
return repr(value)


def audit_event(cfg: dict, event_type: str, **fields) -> None:
"""Append a structured audit event to the configured JSONL log.

AC #1: sensitive operations emit structured events.
AC #4: logging failures warn but do not break normal render.
AC #5: callers can disable via `audit.enabled = false`.
AC #6 (1.0.6, #137): user-supplied field values are passed through the
same redaction rules used for render output. Structural metadata
keys (in ``_AUDIT_NEVER_REDACT_KEYS``) are exempt.

Caller passes any JSON-serializable fields. We always stamp:
ts — UTC ISO-8601
Expand All @@ -105,7 +156,12 @@ def audit_event(cfg: dict, event_type: str, **fields) -> None:
"perseus_version": _PERSEUS_VERSION,
"pid": os.getpid(),
}
# Allow operators to opt out of audit redaction (e.g. for forensic mode
# where the audit log is itself the secured artifact). Default ON.
redact_audit = bool(audit_cfg.get("redact_fields", True))
for k, v in fields.items():
if redact_audit and k not in _AUDIT_NEVER_REDACT_KEYS:
v = _audit_redact_value(v, cfg)
# Defensive: stringify any non-JSON-safe value rather than crashing.
try:
json.dumps(v)
Expand All @@ -114,10 +170,13 @@ def audit_event(cfg: dict, event_type: str, **fields) -> None:
record[k] = repr(v)
# v1.0.5 review: redact secrets before persisting to disk.
# Audit events can contain command strings, paths, or args with tokens.
try:
record, _report = redact_value(record, cfg)
except Exception:
pass # redaction failure must not block audit persistence
# Respect audit.redact_fields opt-out — operators may use forensic mode
# where the audit log is itself the secured artifact.
if redact_audit:
try:
record, _report = redact_value(record, cfg)
except Exception:
pass # redaction failure must not block audit persistence
try:
path = _audit_log_path(cfg)
path.parent.mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -230,6 +289,17 @@ def load_config(workspace: Path | None = None) -> dict:

The profile is sandwiched between the hardcoded defaults and user values
so explicit config keys always win — see task-45 AC #3.

Hardening (#129, v1.0.6): pre-v1.0.5, profile application ran AFTER the
user merge in some code paths, silently overriding `allow_query_shell:
true` set by a power user who also asked for a `balanced` profile (this
is a legitimate combination — "tighten everything but let me run queries").
To make the precedence regression-proof we now:
1. Pre-scan all sources to collect which (section, key) pairs the user
has set explicitly (regardless of value).
2. Apply the profile BEFORE the user merge, so user values write last.
3. Surface the layering decision in the audit log so operators can
observe what won and what lost.
"""
cfg = dict(DEFAULT_CONFIG)
for section, vals in DEFAULT_CONFIG.items():
Expand All @@ -254,8 +324,46 @@ def load_config(workspace: Path | None = None) -> dict:
perms = (src or {}).get("permissions") if isinstance(src, dict) else None
if isinstance(perms, dict) and "profile" in perms:
effective_profile = perms.get("profile")

# Collect (section, key) pairs the user has explicitly set across ALL
# sources. Used by `_apply_permission_profile` to skip user-owned keys.
# This makes the "user wins" guarantee structural — it no longer depends
# on the textual ordering of `_apply_permission_profile` vs `merge_loaded`.
user_set_keys: set[tuple[str, str]] = set()
for src in loaded_sources:
for section, vals in (src or {}).items():
if isinstance(vals, dict):
for key in vals.keys():
user_set_keys.add((section, key))

if effective_profile:
_apply_permission_profile(cfg, effective_profile)
applied = _apply_permission_profile(
cfg, effective_profile, skip_keys=user_set_keys
)
if applied:
# Audit the layering decision so operators can see which user
# keys (if any) won out over the profile. Best-effort: don't
# break load_config if audit fails.
try:
overrides = sorted(
f"{section}.{key}"
for (section, key) in user_set_keys
if section in PERMISSION_PROFILES.get(applied, {})
and key in PERMISSION_PROFILES[applied].get(section, {})
)
if overrides:
audit_event(
cfg,
"config_profile_overridden",
profile=applied,
user_overrides=overrides,
note=(
"User config explicitly set these keys; they "
"win over the profile (see #129 hardening)."
),
)
except Exception:
pass

# #168/#169 (v1.0.6): track per-section workspace provenance for
# hooks.py / registry.py consumers so dangerous workspace-sourced
Expand Down
10 changes: 10 additions & 0 deletions src/perseus/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,16 @@ def main():
p_fed_pull = fed_sub.add_parser("pull", help="Re-read all subscribed narratives (read-only, manual)")
p_fed_pull.add_argument("--json", action="store_true", help="Machine-readable JSON output")

# memory doctor (#128 — legacy MD5 → SHA-256 narrative migration)
p_mem_doc = mem_sub.add_parser(
"doctor",
help="Scan/repair the Mnēmē memory store (legacy MD5 → SHA-256 narrative migration)",
)
p_mem_doc.add_argument("--migrate", action="store_true",
help="Rename legacy MD5-named narratives to their SHA-256 paths (atomic, idempotent)")
p_mem_doc.add_argument("--json", action="store_true",
help="Machine-readable JSON output")

# memory index (Mnēmē v2)
p_mem_idx = mem_sub.add_parser("index", help="Manage the FTS5 search index")
idx_sub = p_mem_idx.add_subparsers(dest="index_command", required=True)
Expand Down
31 changes: 29 additions & 2 deletions src/perseus/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,12 @@
"recent_keep": 5, # raw checkpoints to include in Recent Activity
"auto_update": True, # update narrative on every checkpoint write
"compact_threshold": 20, # advisory: compact after this many incremental updates
# #131: wall-clock deadline for `perseus memory compact` LLM path.
# 0 = no deadline (pre-1.0.6 behavior — can hang indefinitely on
# slow models). Default 180s (3 min) covers Ollama mistral on a
# modern laptop for typical workspace sizes. On timeout the LLM
# call is abandoned and the deterministic narrative is used.
"compact_total_timeout_s": 180,
"llm_provider": None, # None = deterministic; "ollama" / "openai-compat" enables LLM
"llm_model": None, # inherits from llm: block if None
"max_narrative_lines": 300, # warn (not error) if narrative grows beyond this
Expand Down Expand Up @@ -287,24 +293,45 @@
}


def _apply_permission_profile(cfg: dict, profile_name: object) -> str | None:
def _apply_permission_profile(
cfg: dict,
profile_name: object,
skip_keys: set[tuple[str, str]] | None = None,
) -> str | None:
"""Apply a permission profile to cfg in place.

Returns the canonical profile name applied, or None if profile_name is
falsy or unknown. Unknown profile names are silently ignored so a config
typo cannot brick the renderer — but `perseus trust` surfaces the
canonical applied profile so the operator can spot the mismatch.

#129 hardening (v1.0.6): callers may pass `skip_keys` — a set of
`(section, key)` tuples that the user has explicitly set in their
config. Those keys are skipped, structurally guaranteeing that
explicit user values win over the profile regardless of which order
the caller invokes profile-apply vs user-merge.

Pre-v1.0.6 callers (skip_keys=None) get the legacy destructive merge,
which still works correctly when followed by a user-merge step — but
is fragile to ordering changes. New callers should always pass
skip_keys (even if empty) so the audit-log layering decision is
accurate.
"""
if not profile_name:
return None
name = str(profile_name).strip().lower()
profile = PERMISSION_PROFILES.get(name)
if not profile:
return None
skip = skip_keys or set()
for section, vals in profile.items():
if section not in cfg or not isinstance(cfg[section], dict):
cfg[section] = {}
cfg[section].update(vals)
for key, val in vals.items():
if (section, key) in skip:
# User has explicitly configured this key; respect them.
continue
cfg[section][key] = val
return name


Expand Down
Loading
Loading