tcconnally · tcconnally · Jun 3, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,40 @@ Each entry maps a release to the task IDs that shipped in it. The
 single-file `perseus.py` runtime is the only required artifact; everything
 else (installer, docs) is generated by `scripts/release.sh`.
 
+## [1.0.6] — UNRELEASED
+
+Critical security + correctness hotfix bundle. See GitHub milestone
+[v1.0.6](https://github.com/tcconnally/perseus/milestone/1).
+
+### 🔒 Security
+
+- **#137** — `@query` audit log no longer leaks secrets. Pre-1.0.6, calls
+  like `@query "curl -H 'Authorization: Bearer ghp_…'"` rendered correctly
+  redacted output but persisted the raw bearer token to
+  `~/.perseus/audit_log.jsonl` (via the `command` field), and leaked the
+  same secret in `@query` error/timeout/no-output messages back into
+  render output.
+  - `audit_event` (audit.py) now passes every user-supplied field value
+    through `redact_text` before writing. Structural fields (`directive`,
+    `exit_code`, `duration_ms`, `pid`, etc.) are exempt via an explicit
+    allowlist (`_AUDIT_NEVER_REDACT_KEYS`).
+  - `resolve_query` (directives/query.py) redacts `cmd`, `stderr`, and
+    exception messages before interpolating them into the error/timeout/
+    no-output strings.
+  - New config knob `audit.redact_fields` (default `true`); set `false`
+    to opt out for forensic mode where the audit log is itself the
+    secured artifact.
+  - Nested structures (dicts, lists) are walked recursively.
+
+### 🐛 Bug Fixes (other v1.0.6 items, tracked in milestone)
+- #128, #129, #130, #131, #135, #136, #138, #139, #140, #141, #142
+
+### 📦 Migration Notes
+- No config breaking changes. The new `audit.redact_fields` default of
+  `true` is strictly more secure than pre-1.0.6 behavior.
+
+---
+
 ## [1.0.5] — 2026-05-26
 
 **Bastra-Recall — Persistent Memory Backend (superseded by Mnēmē v2 in 1.0.6):**

diff --git a/perseus.py b/perseus.py
@@ -1540,12 +1540,63 @@ def _audit_rotate_if_needed(path: Path, max_bytes: int) -> None:
         return
 
 
+# Audit field names that NEVER get redacted (they are structural metadata,
+# never user-supplied secrets). Adding to this allowlist is a security
+# decision — review carefully.
+_AUDIT_NEVER_REDACT_KEYS = frozenset({
+    "ts", "event_type", "perseus_version", "pid",
+    "directive", "exit_code", "duration_ms", "bytes_in", "bytes_out",
+    "schema_ref", "schema_ok", "policy", "decision", "trust_profile",
+    "permission", "session_id", "workspace_hash",
+})
+
+
+def _audit_redact_value(value, cfg):
+    """Apply render-time redaction rules to an audit field value.
+
+    Regression for #137: pre-1.0.6, `audit_event` wrote field values verbatim
+    to ``audit_log.jsonl``. When a user wrote
+    ``@query "curl -H 'Authorization: Bearer ghp_…'"``, the rendered output
+    was correctly redacted, but the audit log retained the raw bearer token
+    forever. We now pipe every string-shaped audit field through
+    ``redact_text`` before writing.
+
+    Lists, dicts, and nested structures are walked recursively. Non-string
+    leaves (ints, bools, None) pass through. If ``redact_text`` is unavailable
+    or raises (older builds, malformed rules), we fall back to the raw value
+    rather than dropping the audit entry — observability beats perfect
+    redaction here, and rendered output is the primary defense.
+    """
+    if value is None or isinstance(value, (bool, int, float)):
+        return value
+    if isinstance(value, str):
+        try:
+            redacted, _ = redact_text(value, cfg)
+            return redacted
+        except Exception:
+            return value
+    if isinstance(value, dict):
+        return {k: _audit_redact_value(v, cfg) for k, v in value.items()}
+    if isinstance(value, (list, tuple)):
+        return [_audit_redact_value(v, cfg) for v in value]
+    # Bytes, sets, custom objects — stringify then redact.
+    try:
+        as_str = str(value)
+        redacted, _ = redact_text(as_str, cfg)
+        return redacted
+    except Exception:
+        return repr(value)
+
+
 def audit_event(cfg: dict, event_type: str, **fields) -> None:
     """Append a structured audit event to the configured JSONL log.
 
     AC #1: sensitive operations emit structured events.
     AC #4: logging failures warn but do not break normal render.
     AC #5: callers can disable via `audit.enabled = false`.
+    AC #6 (1.0.6, #137): user-supplied field values are passed through the
+        same redaction rules used for render output. Structural metadata
+        keys (in ``_AUDIT_NEVER_REDACT_KEYS``) are exempt.
 
     Caller passes any JSON-serializable fields. We always stamp:
         ts        — UTC ISO-8601
@@ -1562,7 +1613,12 @@ def audit_event(cfg: dict, event_type: str, **fields) -> None:
         "perseus_version": _PERSEUS_VERSION,
         "pid": os.getpid(),
     }
+    # Allow operators to opt out of audit redaction (e.g. for forensic mode
+    # where the audit log is itself the secured artifact). Default ON.
+    redact_audit = bool(audit_cfg.get("redact_fields", True))
     for k, v in fields.items():
+        if redact_audit and k not in _AUDIT_NEVER_REDACT_KEYS:
+            v = _audit_redact_value(v, cfg)
         # Defensive: stringify any non-JSON-safe value rather than crashing.
         try:
             json.dumps(v)
@@ -2451,7 +2507,6 @@ def resolve_include(args_str: str, workspace: Path | None = None, cfg: dict | No
         return f"> ⚠ @include: could not read `{file_path_str}`: {e}"
 
     # ── File size limit check (byte-counted, not character-counted) ──
-    max_bytes = render_cfg.get("max_include_bytes")
     if max_bytes is not None and len(data) > max_bytes:
         raw = data[:max_bytes].decode(errors="replace").rstrip()
         actual_size = len(data)
@@ -2577,7 +2632,6 @@ def fallback_result() -> str:
         return f"> ⚠ @read: could not read `{file_path_str}`: {e}"
 
     # ── File size limit check (byte-counted, not character-counted) ──
-    max_bytes = render_cfg.get("max_read_bytes")
     if max_bytes is not None and len(data) > max_bytes:
         content = data[:max_bytes].decode(errors="replace")
         trunc_note = (
@@ -2805,14 +2859,22 @@ def resolve_query(args_str: str, cfg: dict, workspace: "Path | None" = None) ->
         if exit_code != 0:
             if fallback is not None:
                 return fallback
-            header = f"> ⚠ `@query` exited {exit_code}: `{cmd}`\n\n"
-            body = stdout or stderr or "(no output)"
-            return header + f"```{lang}\n{body}\n```"
+            # #137: redact secrets out of `cmd` and `stderr` before interpolating
+            # them into render output. Without this, a command like
+            # `@query "curl -H 'Authorization: Bearer ghp_…'"` leaks the bearer
+            # token in the exit-nonzero header. Render-time redaction only runs
+            # later in the pipeline and only on the final assembled output, but
+            # by then this string has been logged elsewhere.
+            safe_cmd, _ = redact_text(cmd, cfg)
+            safe_body, _ = redact_text(stdout or stderr or "(no output)", cfg)
+            header = f"> ⚠ `@query` exited {exit_code}: `{safe_cmd}`\n\n"
+            return header + f"```{lang}\n{safe_body}\n```"
 
         if not stdout:
             if fallback is not None:
                 return fallback
-            return f"> (no output from `{cmd}`)"
+            safe_cmd, _ = redact_text(cmd, cfg)
+            return f"> (no output from `{safe_cmd}`)"
 
         # Apply stdout size cap (default 256 KB).
         # Truncate at the nearest preceding newline to avoid mid-line cuts.
@@ -2847,11 +2909,14 @@ def resolve_query(args_str: str, cfg: dict, workspace: "Path | None" = None) ->
     except subprocess.TimeoutExpired:
         if fallback is not None:
             return fallback
-        return f"> ⚠ `@query` timed out ({timeout}s): `{cmd}`"
+        safe_cmd, _ = redact_text(cmd, cfg)
+        return f"> ⚠ `@query` timed out ({timeout}s): `{safe_cmd}`"
     except Exception as exc:
         if fallback is not None:
             return fallback
-        return f"> ⚠ `@query` error: {exc}"
+        # exc.args often includes argv[0] which contains the full cmd; redact.
+        safe_err, _ = redact_text(str(exc), cfg)
+        return f"> ⚠ `@query` error: {safe_err}"
 
 
 def _guess_lang(cmd: str) -> str:

diff --git a/src/perseus/audit.py b/src/perseus/audit.py
@@ -78,12 +78,63 @@ def _audit_rotate_if_needed(path: Path, max_bytes: int) -> None:
         return
 
 
+# Audit field names that NEVER get redacted (they are structural metadata,
+# never user-supplied secrets). Adding to this allowlist is a security
+# decision — review carefully.
+_AUDIT_NEVER_REDACT_KEYS = frozenset({
+    "ts", "event_type", "perseus_version", "pid",
+    "directive", "exit_code", "duration_ms", "bytes_in", "bytes_out",
+    "schema_ref", "schema_ok", "policy", "decision", "trust_profile",
+    "permission", "session_id", "workspace_hash",
+})
+
+
+def _audit_redact_value(value, cfg):
+    """Apply render-time redaction rules to an audit field value.
+
+    Regression for #137: pre-1.0.6, `audit_event` wrote field values verbatim
+    to ``audit_log.jsonl``. When a user wrote
+    ``@query "curl -H 'Authorization: Bearer ghp_…'"``, the rendered output
+    was correctly redacted, but the audit log retained the raw bearer token
+    forever. We now pipe every string-shaped audit field through
+    ``redact_text`` before writing.
+
+    Lists, dicts, and nested structures are walked recursively. Non-string
+    leaves (ints, bools, None) pass through. If ``redact_text`` is unavailable
+    or raises (older builds, malformed rules), we fall back to the raw value
+    rather than dropping the audit entry — observability beats perfect
+    redaction here, and rendered output is the primary defense.
+    """
+    if value is None or isinstance(value, (bool, int, float)):
+        return value
+    if isinstance(value, str):
+        try:
+            redacted, _ = redact_text(value, cfg)
+            return redacted
+        except Exception:
+            return value
+    if isinstance(value, dict):
+        return {k: _audit_redact_value(v, cfg) for k, v in value.items()}
+    if isinstance(value, (list, tuple)):
+        return [_audit_redact_value(v, cfg) for v in value]
+    # Bytes, sets, custom objects — stringify then redact.
+    try:
+        as_str = str(value)
+        redacted, _ = redact_text(as_str, cfg)
+        return redacted
+    except Exception:
+        return repr(value)
+
+
 def audit_event(cfg: dict, event_type: str, **fields) -> None:
     """Append a structured audit event to the configured JSONL log.
 
     AC #1: sensitive operations emit structured events.
     AC #4: logging failures warn but do not break normal render.
     AC #5: callers can disable via `audit.enabled = false`.
+    AC #6 (1.0.6, #137): user-supplied field values are passed through the
+        same redaction rules used for render output. Structural metadata
+        keys (in ``_AUDIT_NEVER_REDACT_KEYS``) are exempt.
 
     Caller passes any JSON-serializable fields. We always stamp:
         ts        — UTC ISO-8601
@@ -100,7 +151,12 @@ def audit_event(cfg: dict, event_type: str, **fields) -> None:
         "perseus_version": _PERSEUS_VERSION,
         "pid": os.getpid(),
     }
+    # Allow operators to opt out of audit redaction (e.g. for forensic mode
+    # where the audit log is itself the secured artifact). Default ON.
+    redact_audit = bool(audit_cfg.get("redact_fields", True))
     for k, v in fields.items():
+        if redact_audit and k not in _AUDIT_NEVER_REDACT_KEYS:
+            v = _audit_redact_value(v, cfg)
         # Defensive: stringify any non-JSON-safe value rather than crashing.
         try:
             json.dumps(v)

diff --git a/src/perseus/directives/query.py b/src/perseus/directives/query.py
@@ -117,14 +117,22 @@ def resolve_query(args_str: str, cfg: dict, workspace: "Path | None" = None) ->
         if exit_code != 0:
             if fallback is not None:
                 return fallback
-            header = f"> ⚠ `@query` exited {exit_code}: `{cmd}`\n\n"
-            body = stdout or stderr or "(no output)"
-            return header + f"```{lang}\n{body}\n```"
+            # #137: redact secrets out of `cmd` and `stderr` before interpolating
+            # them into render output. Without this, a command like
+            # `@query "curl -H 'Authorization: Bearer ghp_…'"` leaks the bearer
+            # token in the exit-nonzero header. Render-time redaction only runs
+            # later in the pipeline and only on the final assembled output, but
+            # by then this string has been logged elsewhere.
+            safe_cmd, _ = redact_text(cmd, cfg)
+            safe_body, _ = redact_text(stdout or stderr or "(no output)", cfg)
+            header = f"> ⚠ `@query` exited {exit_code}: `{safe_cmd}`\n\n"
+            return header + f"```{lang}\n{safe_body}\n```"
 
         if not stdout:
             if fallback is not None:
                 return fallback
-            return f"> (no output from `{cmd}`)"
+            safe_cmd, _ = redact_text(cmd, cfg)
+            return f"> (no output from `{safe_cmd}`)"
 
         # Apply stdout size cap (default 256 KB).
         # Truncate at the nearest preceding newline to avoid mid-line cuts.
@@ -159,11 +167,14 @@ def resolve_query(args_str: str, cfg: dict, workspace: "Path | None" = None) ->
     except subprocess.TimeoutExpired:
         if fallback is not None:
             return fallback
-        return f"> ⚠ `@query` timed out ({timeout}s): `{cmd}`"
+        safe_cmd, _ = redact_text(cmd, cfg)
+        return f"> ⚠ `@query` timed out ({timeout}s): `{safe_cmd}`"
     except Exception as exc:
         if fallback is not None:
             return fallback
-        return f"> ⚠ `@query` error: {exc}"
+        # exc.args often includes argv[0] which contains the full cmd; redact.
+        safe_err, _ = redact_text(str(exc), cfg)
+        return f"> ⚠ `@query` error: {safe_err}"
 
 
 def _guess_lang(cmd: str) -> str:

diff --git a/tests/test_audit_log.py b/tests/test_audit_log.py
@@ -188,6 +188,91 @@ def test_audit_log_never_contains_raw_secret(tmp_path, monkeypatch):
     assert "\"event_type\": \"redaction\"" in audit_text
 
 
+# ── regression: #137 audit field values must be redacted ────────────────────
+
+
+def _setup_redaction_home(home: Path, monkeypatch) -> tuple[Path, dict]:
+    """Set up an isolated PERSEUS_HOME and return (audit_log_path, cfg).
+
+    Uses the standard test pattern from `test_audit_log_never_contains_raw_secret`
+    — monkeypatches PERSEUS_HOME so `_audit_log_path` accepts the location.
+    """
+    home.mkdir(exist_ok=True)
+    monkeypatch.setattr(perseus, "PERSEUS_HOME", home)
+    log_path = home / "audit_log.jsonl"
+    cfg = {
+        "audit": {
+            "enabled": True,
+            "log_path": str(log_path),
+            "max_log_bytes": 1_048_576,
+        },
+        "redaction": {"enabled": True},
+    }
+    return log_path, cfg
+
+
+def test_audit_event_redacts_aws_key_in_command_field(tmp_path, monkeypatch):
+    """Regression for #137 — AWS access key in audit.command must be redacted."""
+    log_path, cfg = _setup_redaction_home(tmp_path / "home", monkeypatch)
+    aws_key = "AKIAIOSFODNN7EXAMPLE"
+    perseus.audit_event(cfg, "shell_exec",
+                        directive="@query",
+                        command=f"aws s3 cp s3://b/k . --access-key {aws_key}")
+    log_text = log_path.read_text()
+    assert aws_key not in log_text, (
+        f"AWS key leaked to audit log:\n{log_text}"
+    )
+    assert "REDACTED" in log_text
+
+
+def test_audit_event_redacts_bearer_token_in_command_field(tmp_path, monkeypatch):
+    """Regression for #137 — bearer tokens in audit.command must be redacted."""
+    log_path, cfg = _setup_redaction_home(tmp_path / "home", monkeypatch)
+    token = "ghp_" + "Z" * 36
+    perseus.audit_event(cfg, "shell_exec",
+                        directive="@query",
+                        command=f"curl -H 'Authorization: Bearer {token}'")
+    log_text = log_path.read_text()
+    assert token not in log_text
+
+
+def test_audit_event_does_not_redact_structural_fields(tmp_path, monkeypatch):
+    """Structural fields (directive, exit_code, etc.) must pass through verbatim."""
+    log_path, cfg = _setup_redaction_home(tmp_path / "home", monkeypatch)
+    perseus.audit_event(cfg, "shell_exec",
+                        directive="@query",
+                        exit_code=42,
+                        duration_ms=1234)
+    entry = json.loads(log_path.read_text().strip())
+    assert entry["directive"] == "@query"
+    assert entry["exit_code"] == 42
+    assert entry["duration_ms"] == 1234
+
+
+def test_audit_event_redact_fields_can_be_disabled(tmp_path, monkeypatch):
+    """Forensic mode: audit.redact_fields=false preserves raw values."""
+    log_path, cfg = _setup_redaction_home(tmp_path / "home", monkeypatch)
+    cfg["audit"]["redact_fields"] = False
+    aws_key = "AKIAIOSFODNN7EXAMPLE"
+    perseus.audit_event(cfg, "shell_exec",
+                        directive="@query",
+                        command=f"aws sts --key {aws_key}")
+    log_text = log_path.read_text()
+    assert aws_key in log_text  # opt-out works
+
+
+def test_audit_event_walks_nested_dict_fields(tmp_path, monkeypatch):
+    """Nested structures (dicts/lists) are walked recursively for redaction."""
+    log_path, cfg = _setup_redaction_home(tmp_path / "home", monkeypatch)
+    token = "ghp_" + "Y" * 36
+    perseus.audit_event(cfg, "model_call",
+                        directive="@perseus",
+                        env={"GITHUB_TOKEN": token, "DEBUG": "1"},
+                        argv=["curl", "-H", f"Authorization: Bearer {token}"])
+    log_text = log_path.read_text()
+    assert token not in log_text
+
+
 # ── integration: `perseus trust audit` subcommand ───────────────────────────