diff --git a/CHANGELOG.md b/CHANGELOG.md index 1056a60..22cbd51 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,40 @@ Each entry maps a release to the task IDs that shipped in it. The single-file `perseus.py` runtime is the only required artifact; everything else (installer, docs) is generated by `scripts/release.sh`. +## [1.0.6] — UNRELEASED + +Critical security + correctness hotfix bundle. See GitHub milestone +[v1.0.6](https://github.com/tcconnally/perseus/milestone/1). + +### 🔒 Security + +- **#137** — `@query` audit log no longer leaks secrets. Pre-1.0.6, calls + like `@query "curl -H 'Authorization: Bearer ghp_…'"` rendered correctly + redacted output but persisted the raw bearer token to + `~/.perseus/audit_log.jsonl` (via the `command` field), and leaked the + same secret in `@query` error/timeout/no-output messages back into + render output. + - `audit_event` (audit.py) now passes every user-supplied field value + through `redact_text` before writing. Structural fields (`directive`, + `exit_code`, `duration_ms`, `pid`, etc.) are exempt via an explicit + allowlist (`_AUDIT_NEVER_REDACT_KEYS`). + - `resolve_query` (directives/query.py) redacts `cmd`, `stderr`, and + exception messages before interpolating them into the error/timeout/ + no-output strings. + - New config knob `audit.redact_fields` (default `true`); set `false` + to opt out for forensic mode where the audit log is itself the + secured artifact. + - Nested structures (dicts, lists) are walked recursively. + +### 🐛 Bug Fixes (other v1.0.6 items, tracked in milestone) +- #128, #129, #130, #131, #135, #136, #138, #139, #140, #141, #142 + +### 📦 Migration Notes +- No config breaking changes. The new `audit.redact_fields` default of + `true` is strictly more secure than pre-1.0.6 behavior. + +--- + ## [1.0.5] — 2026-05-26 **Bastra-Recall — Persistent Memory Backend (superseded by Mnēmē v2 in 1.0.6):** diff --git a/perseus.py b/perseus.py index 0527f05..96c5513 100644 --- a/perseus.py +++ b/perseus.py @@ -1540,12 +1540,63 @@ def _audit_rotate_if_needed(path: Path, max_bytes: int) -> None: return +# Audit field names that NEVER get redacted (they are structural metadata, +# never user-supplied secrets). Adding to this allowlist is a security +# decision — review carefully. +_AUDIT_NEVER_REDACT_KEYS = frozenset({ + "ts", "event_type", "perseus_version", "pid", + "directive", "exit_code", "duration_ms", "bytes_in", "bytes_out", + "schema_ref", "schema_ok", "policy", "decision", "trust_profile", + "permission", "session_id", "workspace_hash", +}) + + +def _audit_redact_value(value, cfg): + """Apply render-time redaction rules to an audit field value. + + Regression for #137: pre-1.0.6, `audit_event` wrote field values verbatim + to ``audit_log.jsonl``. When a user wrote + ``@query "curl -H 'Authorization: Bearer ghp_…'"``, the rendered output + was correctly redacted, but the audit log retained the raw bearer token + forever. We now pipe every string-shaped audit field through + ``redact_text`` before writing. + + Lists, dicts, and nested structures are walked recursively. Non-string + leaves (ints, bools, None) pass through. If ``redact_text`` is unavailable + or raises (older builds, malformed rules), we fall back to the raw value + rather than dropping the audit entry — observability beats perfect + redaction here, and rendered output is the primary defense. + """ + if value is None or isinstance(value, (bool, int, float)): + return value + if isinstance(value, str): + try: + redacted, _ = redact_text(value, cfg) + return redacted + except Exception: + return value + if isinstance(value, dict): + return {k: _audit_redact_value(v, cfg) for k, v in value.items()} + if isinstance(value, (list, tuple)): + return [_audit_redact_value(v, cfg) for v in value] + # Bytes, sets, custom objects — stringify then redact. + try: + as_str = str(value) + redacted, _ = redact_text(as_str, cfg) + return redacted + except Exception: + return repr(value) + + def audit_event(cfg: dict, event_type: str, **fields) -> None: """Append a structured audit event to the configured JSONL log. AC #1: sensitive operations emit structured events. AC #4: logging failures warn but do not break normal render. AC #5: callers can disable via `audit.enabled = false`. + AC #6 (1.0.6, #137): user-supplied field values are passed through the + same redaction rules used for render output. Structural metadata + keys (in ``_AUDIT_NEVER_REDACT_KEYS``) are exempt. Caller passes any JSON-serializable fields. We always stamp: ts — UTC ISO-8601 @@ -1562,7 +1613,12 @@ def audit_event(cfg: dict, event_type: str, **fields) -> None: "perseus_version": _PERSEUS_VERSION, "pid": os.getpid(), } + # Allow operators to opt out of audit redaction (e.g. for forensic mode + # where the audit log is itself the secured artifact). Default ON. + redact_audit = bool(audit_cfg.get("redact_fields", True)) for k, v in fields.items(): + if redact_audit and k not in _AUDIT_NEVER_REDACT_KEYS: + v = _audit_redact_value(v, cfg) # Defensive: stringify any non-JSON-safe value rather than crashing. try: json.dumps(v) @@ -2451,7 +2507,6 @@ def resolve_include(args_str: str, workspace: Path | None = None, cfg: dict | No return f"> ⚠ @include: could not read `{file_path_str}`: {e}" # ── File size limit check (byte-counted, not character-counted) ── - max_bytes = render_cfg.get("max_include_bytes") if max_bytes is not None and len(data) > max_bytes: raw = data[:max_bytes].decode(errors="replace").rstrip() actual_size = len(data) @@ -2577,7 +2632,6 @@ def fallback_result() -> str: return f"> ⚠ @read: could not read `{file_path_str}`: {e}" # ── File size limit check (byte-counted, not character-counted) ── - max_bytes = render_cfg.get("max_read_bytes") if max_bytes is not None and len(data) > max_bytes: content = data[:max_bytes].decode(errors="replace") trunc_note = ( @@ -2805,14 +2859,22 @@ def resolve_query(args_str: str, cfg: dict, workspace: "Path | None" = None) -> if exit_code != 0: if fallback is not None: return fallback - header = f"> ⚠ `@query` exited {exit_code}: `{cmd}`\n\n" - body = stdout or stderr or "(no output)" - return header + f"```{lang}\n{body}\n```" + # #137: redact secrets out of `cmd` and `stderr` before interpolating + # them into render output. Without this, a command like + # `@query "curl -H 'Authorization: Bearer ghp_…'"` leaks the bearer + # token in the exit-nonzero header. Render-time redaction only runs + # later in the pipeline and only on the final assembled output, but + # by then this string has been logged elsewhere. + safe_cmd, _ = redact_text(cmd, cfg) + safe_body, _ = redact_text(stdout or stderr or "(no output)", cfg) + header = f"> ⚠ `@query` exited {exit_code}: `{safe_cmd}`\n\n" + return header + f"```{lang}\n{safe_body}\n```" if not stdout: if fallback is not None: return fallback - return f"> (no output from `{cmd}`)" + safe_cmd, _ = redact_text(cmd, cfg) + return f"> (no output from `{safe_cmd}`)" # Apply stdout size cap (default 256 KB). # Truncate at the nearest preceding newline to avoid mid-line cuts. @@ -2847,11 +2909,14 @@ def resolve_query(args_str: str, cfg: dict, workspace: "Path | None" = None) -> except subprocess.TimeoutExpired: if fallback is not None: return fallback - return f"> ⚠ `@query` timed out ({timeout}s): `{cmd}`" + safe_cmd, _ = redact_text(cmd, cfg) + return f"> ⚠ `@query` timed out ({timeout}s): `{safe_cmd}`" except Exception as exc: if fallback is not None: return fallback - return f"> ⚠ `@query` error: {exc}" + # exc.args often includes argv[0] which contains the full cmd; redact. + safe_err, _ = redact_text(str(exc), cfg) + return f"> ⚠ `@query` error: {safe_err}" def _guess_lang(cmd: str) -> str: diff --git a/src/perseus/audit.py b/src/perseus/audit.py index ee154ba..6025b57 100644 --- a/src/perseus/audit.py +++ b/src/perseus/audit.py @@ -78,12 +78,63 @@ def _audit_rotate_if_needed(path: Path, max_bytes: int) -> None: return +# Audit field names that NEVER get redacted (they are structural metadata, +# never user-supplied secrets). Adding to this allowlist is a security +# decision — review carefully. +_AUDIT_NEVER_REDACT_KEYS = frozenset({ + "ts", "event_type", "perseus_version", "pid", + "directive", "exit_code", "duration_ms", "bytes_in", "bytes_out", + "schema_ref", "schema_ok", "policy", "decision", "trust_profile", + "permission", "session_id", "workspace_hash", +}) + + +def _audit_redact_value(value, cfg): + """Apply render-time redaction rules to an audit field value. + + Regression for #137: pre-1.0.6, `audit_event` wrote field values verbatim + to ``audit_log.jsonl``. When a user wrote + ``@query "curl -H 'Authorization: Bearer ghp_…'"``, the rendered output + was correctly redacted, but the audit log retained the raw bearer token + forever. We now pipe every string-shaped audit field through + ``redact_text`` before writing. + + Lists, dicts, and nested structures are walked recursively. Non-string + leaves (ints, bools, None) pass through. If ``redact_text`` is unavailable + or raises (older builds, malformed rules), we fall back to the raw value + rather than dropping the audit entry — observability beats perfect + redaction here, and rendered output is the primary defense. + """ + if value is None or isinstance(value, (bool, int, float)): + return value + if isinstance(value, str): + try: + redacted, _ = redact_text(value, cfg) + return redacted + except Exception: + return value + if isinstance(value, dict): + return {k: _audit_redact_value(v, cfg) for k, v in value.items()} + if isinstance(value, (list, tuple)): + return [_audit_redact_value(v, cfg) for v in value] + # Bytes, sets, custom objects — stringify then redact. + try: + as_str = str(value) + redacted, _ = redact_text(as_str, cfg) + return redacted + except Exception: + return repr(value) + + def audit_event(cfg: dict, event_type: str, **fields) -> None: """Append a structured audit event to the configured JSONL log. AC #1: sensitive operations emit structured events. AC #4: logging failures warn but do not break normal render. AC #5: callers can disable via `audit.enabled = false`. + AC #6 (1.0.6, #137): user-supplied field values are passed through the + same redaction rules used for render output. Structural metadata + keys (in ``_AUDIT_NEVER_REDACT_KEYS``) are exempt. Caller passes any JSON-serializable fields. We always stamp: ts — UTC ISO-8601 @@ -100,7 +151,12 @@ def audit_event(cfg: dict, event_type: str, **fields) -> None: "perseus_version": _PERSEUS_VERSION, "pid": os.getpid(), } + # Allow operators to opt out of audit redaction (e.g. for forensic mode + # where the audit log is itself the secured artifact). Default ON. + redact_audit = bool(audit_cfg.get("redact_fields", True)) for k, v in fields.items(): + if redact_audit and k not in _AUDIT_NEVER_REDACT_KEYS: + v = _audit_redact_value(v, cfg) # Defensive: stringify any non-JSON-safe value rather than crashing. try: json.dumps(v) diff --git a/src/perseus/directives/query.py b/src/perseus/directives/query.py index c79479f..ed33b2f 100644 --- a/src/perseus/directives/query.py +++ b/src/perseus/directives/query.py @@ -117,14 +117,22 @@ def resolve_query(args_str: str, cfg: dict, workspace: "Path | None" = None) -> if exit_code != 0: if fallback is not None: return fallback - header = f"> ⚠ `@query` exited {exit_code}: `{cmd}`\n\n" - body = stdout or stderr or "(no output)" - return header + f"```{lang}\n{body}\n```" + # #137: redact secrets out of `cmd` and `stderr` before interpolating + # them into render output. Without this, a command like + # `@query "curl -H 'Authorization: Bearer ghp_…'"` leaks the bearer + # token in the exit-nonzero header. Render-time redaction only runs + # later in the pipeline and only on the final assembled output, but + # by then this string has been logged elsewhere. + safe_cmd, _ = redact_text(cmd, cfg) + safe_body, _ = redact_text(stdout or stderr or "(no output)", cfg) + header = f"> ⚠ `@query` exited {exit_code}: `{safe_cmd}`\n\n" + return header + f"```{lang}\n{safe_body}\n```" if not stdout: if fallback is not None: return fallback - return f"> (no output from `{cmd}`)" + safe_cmd, _ = redact_text(cmd, cfg) + return f"> (no output from `{safe_cmd}`)" # Apply stdout size cap (default 256 KB). # Truncate at the nearest preceding newline to avoid mid-line cuts. @@ -159,11 +167,14 @@ def resolve_query(args_str: str, cfg: dict, workspace: "Path | None" = None) -> except subprocess.TimeoutExpired: if fallback is not None: return fallback - return f"> ⚠ `@query` timed out ({timeout}s): `{cmd}`" + safe_cmd, _ = redact_text(cmd, cfg) + return f"> ⚠ `@query` timed out ({timeout}s): `{safe_cmd}`" except Exception as exc: if fallback is not None: return fallback - return f"> ⚠ `@query` error: {exc}" + # exc.args often includes argv[0] which contains the full cmd; redact. + safe_err, _ = redact_text(str(exc), cfg) + return f"> ⚠ `@query` error: {safe_err}" def _guess_lang(cmd: str) -> str: diff --git a/tests/test_audit_log.py b/tests/test_audit_log.py index b1bc87b..f9c5ce4 100644 --- a/tests/test_audit_log.py +++ b/tests/test_audit_log.py @@ -188,6 +188,91 @@ def test_audit_log_never_contains_raw_secret(tmp_path, monkeypatch): assert "\"event_type\": \"redaction\"" in audit_text +# ── regression: #137 audit field values must be redacted ──────────────────── + + +def _setup_redaction_home(home: Path, monkeypatch) -> tuple[Path, dict]: + """Set up an isolated PERSEUS_HOME and return (audit_log_path, cfg). + + Uses the standard test pattern from `test_audit_log_never_contains_raw_secret` + — monkeypatches PERSEUS_HOME so `_audit_log_path` accepts the location. + """ + home.mkdir(exist_ok=True) + monkeypatch.setattr(perseus, "PERSEUS_HOME", home) + log_path = home / "audit_log.jsonl" + cfg = { + "audit": { + "enabled": True, + "log_path": str(log_path), + "max_log_bytes": 1_048_576, + }, + "redaction": {"enabled": True}, + } + return log_path, cfg + + +def test_audit_event_redacts_aws_key_in_command_field(tmp_path, monkeypatch): + """Regression for #137 — AWS access key in audit.command must be redacted.""" + log_path, cfg = _setup_redaction_home(tmp_path / "home", monkeypatch) + aws_key = "AKIAIOSFODNN7EXAMPLE" + perseus.audit_event(cfg, "shell_exec", + directive="@query", + command=f"aws s3 cp s3://b/k . --access-key {aws_key}") + log_text = log_path.read_text() + assert aws_key not in log_text, ( + f"AWS key leaked to audit log:\n{log_text}" + ) + assert "REDACTED" in log_text + + +def test_audit_event_redacts_bearer_token_in_command_field(tmp_path, monkeypatch): + """Regression for #137 — bearer tokens in audit.command must be redacted.""" + log_path, cfg = _setup_redaction_home(tmp_path / "home", monkeypatch) + token = "ghp_" + "Z" * 36 + perseus.audit_event(cfg, "shell_exec", + directive="@query", + command=f"curl -H 'Authorization: Bearer {token}'") + log_text = log_path.read_text() + assert token not in log_text + + +def test_audit_event_does_not_redact_structural_fields(tmp_path, monkeypatch): + """Structural fields (directive, exit_code, etc.) must pass through verbatim.""" + log_path, cfg = _setup_redaction_home(tmp_path / "home", monkeypatch) + perseus.audit_event(cfg, "shell_exec", + directive="@query", + exit_code=42, + duration_ms=1234) + entry = json.loads(log_path.read_text().strip()) + assert entry["directive"] == "@query" + assert entry["exit_code"] == 42 + assert entry["duration_ms"] == 1234 + + +def test_audit_event_redact_fields_can_be_disabled(tmp_path, monkeypatch): + """Forensic mode: audit.redact_fields=false preserves raw values.""" + log_path, cfg = _setup_redaction_home(tmp_path / "home", monkeypatch) + cfg["audit"]["redact_fields"] = False + aws_key = "AKIAIOSFODNN7EXAMPLE" + perseus.audit_event(cfg, "shell_exec", + directive="@query", + command=f"aws sts --key {aws_key}") + log_text = log_path.read_text() + assert aws_key in log_text # opt-out works + + +def test_audit_event_walks_nested_dict_fields(tmp_path, monkeypatch): + """Nested structures (dicts/lists) are walked recursively for redaction.""" + log_path, cfg = _setup_redaction_home(tmp_path / "home", monkeypatch) + token = "ghp_" + "Y" * 36 + perseus.audit_event(cfg, "model_call", + directive="@perseus", + env={"GITHUB_TOKEN": token, "DEBUG": "1"}, + argv=["curl", "-H", f"Authorization: Bearer {token}"]) + log_text = log_path.read_text() + assert token not in log_text + + # ── integration: `perseus trust audit` subcommand ───────────────────────────