tcconnally · tcconnally · Jun 5, 2026 · Jun 5, 2026
diff --git a/perseus.py b/perseus.py
@@ -804,6 +804,14 @@ def _webhook_worker(url, ep, wh_cfg, q):
             "version": version,
             "data": payload
         }
+        # #167: redact secrets from webhook payload before external delivery.
+        # Pre-1.0.6, directive args and output snippets in payload["data"]
+        # were sent verbatim to webhook endpoints, leaking secrets.
+        try:
+            redacted_data, _ = redact_text(payload, cfg)
+            body_dict["data"] = redacted_data
+        except Exception:
+            pass  # redaction failure must not block webhook delivery
         body_json = json.dumps(body_dict)
         body_data = body_json.encode("utf-8")
 
@@ -1533,8 +1541,8 @@ def _reset_plugin_cache() -> None:
     {"name": "aws_access_key_id", "pattern": r"\b(?:AKIA|ASIA)[0-9A-Z]{16}\b"},
     # Slack bot/user/app/refresh tokens
     {"name": "slack_token", "pattern": r"\bxox[abprso]-[A-Za-z0-9-]{10,}\b"},
-    # Generic bearer header value (Authorization: Bearer XXXX)
-    {"name": "bearer_header", "pattern": r"(?i)(authorization:\s*bearer\s+)[A-Za-z0-9._\-+/=]{16,}"},
+    # Generic bearer header value (Authorization: Bearer ***
+    {"name": "bearer_header", "pattern": r"(?i)(authorization:\s*bearer\s+)[A-Za-z0-9._\-+/=]{16,}", "_prefix_group": 1},
     # JWT (three base64url segments). Conservative: require non-trivial first segment.
     {"name": "jwt", "pattern": r"\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b"},
     # PEM private key block (covers RSA, EC, OPENSSH, generic)
@@ -1552,6 +1560,9 @@ def _reset_plugin_cache() -> None:
     {"name": "long_hex_secret",
      "pattern": r"(?i)(?:secret|token|key|password|passwd|api[_-]?key|auth(?:orization)?)\s*[:=]\s*[\"']?([a-fA-F0-9]{40,})[\"']?",
      "_anchor_group": 1},
+    # Atlassian API token: ATATT3... (Confluence/Jira personal access tokens)
+    # See: https://github.com/tcconnally/perseus/issues/142
+    {"name": "atlassian_api_token", "pattern": r"\bATATT3[A-Za-z0-9+/=_-]{40,}\b"},
     # HuggingFace: hf_... (read/write tokens)
     {"name": "huggingface_token", "pattern": r"\bhf_[A-Za-z0-9]{30,}\b"},
     # Google Cloud API key: AIza...
@@ -1622,11 +1633,13 @@ def _compile_redaction_rules(cfg: dict) -> list[dict]:
         # behavior: group(1) (if present) is treated as a leading prefix to
         # preserve and the rest of the match is replaced.
         anchor_group = rule.get("_anchor_group")
+        prefix_group = rule.get("_prefix_group")
         compiled.append({
             "name": name,
             "regex": regex,
             "replacement": str(replacement),
             "anchor_group": anchor_group,
+            "prefix_group": prefix_group,
         })
     return compiled
 
@@ -1683,8 +1696,13 @@ def _sub(match, _repl=rule["replacement"], _ag=rule.get("anchor_group")):
                 rel_start = span_start - match.start()
                 rel_end = span_end - match.start()
                 return full[:rel_start] + _repl + full[rel_end:]
-            if match.lastindex:
-                return match.group(1) + _repl
+            # #141: prefix-preservation only for rules that explicitly
+            # declare _prefix_group (e.g. bearer_header). User-supplied
+            # patterns with accidental capture groups would silently
+            # truncate data under the old `match.lastindex` heuristic.
+            _pg = rule.get("prefix_group")
+            if _pg is not None and match.lastindex and match.lastindex >= _pg:
+                return match.group(_pg) + _repl
             return _repl
         out, n = regex.subn(_sub, out)
         if n:
@@ -1734,7 +1752,6 @@ def redact_value(value, cfg: dict) -> tuple[object, dict]:
                 counts[name] = counts.get(name, 0) + count
         return out, {"enabled": enabled, "total": total, "counts": counts, "rules_active": rules_active}
     return value, {"enabled": True, "total": 0, "counts": {}, "rules_active": 0}
-
 # Callers can disable via `audit.enabled = false`.
 _VALIDATOR_CACHE: dict[str, Callable] = {}
 
@@ -3605,6 +3622,17 @@ def resolve_query(args_str: str, cfg: dict, workspace: "Path | None" = None) ->
         fallback = _unescape_fallback(fallback)
         raw = (raw[:fb_match.start()] + raw[fb_match.end():]).rstrip()
 
+    # #138: strip timeout=N modifier BEFORE command extraction to prevent
+    # it from leaking into the executed shell command.
+
+    # Extract timeout=N modifier (per-directive override, default 30s)
+    timeout = int(cfg["render"].get("query_timeout_s", 30))
+    tm_match = re.search(r'\s+timeout=(\d+)(?:\s|$)', raw)
+    if tm_match:
+        timeout = int(tm_match.group(1))
+        raw = (raw[:tm_match.start()] + raw[tm_match.end():]).rstrip()
+
+
     cmd_match = re.match(r'^"((?:[^"\\]|\\.)*)"', raw)   # double-quoted
     if not cmd_match:
         cmd_match = re.match(r"^'((?:[^'\\]|\\.)*)'", raw)  # single-quoted
@@ -3626,13 +3654,6 @@ def resolve_query(args_str: str, cfg: dict, workspace: "Path | None" = None) ->
                 command=cmd[:500],
                 shell=shell)
 
-    # Extract timeout=N modifier (per-directive override, default 30s)
-    timeout = int(cfg["render"].get("query_timeout_s", 30))
-    tm_match = re.search(r'\s+timeout=(\d+)(?:\s|$)', raw)
-    if tm_match:
-        timeout = int(tm_match.group(1))
-        raw = (raw[:tm_match.start()] + raw[tm_match.end():]).rstrip()
-
     try:
         # #139: when invoked under MCP's _call_tool timeout wrapper, the
         # wrapper needs to kill this subprocess (and any descendants) if
@@ -3690,7 +3711,7 @@ class _Result:
                 return fallback
             # #137: redact secrets out of `cmd` and `stderr` before interpolating
             # them into render output. Without this, a command like
-            # `@query "curl -H 'Authorization: Bearer ghp_…'"` leaks the bearer
+            # `@query "curl -H 'Authorization: Bearer *** leaks the bearer
             # token in the exit-nonzero header. Render-time redaction only runs
             # later in the pipeline and only on the final assembled output, but
             # by then this string has been logged elsewhere.
@@ -4546,7 +4567,6 @@ def format_prefetch_human(result: dict) -> str:
         trigger = entry.get("trigger") or "no-trigger"
         lines.append(f"- {entry['status']}: {entry['rule']} {trigger} -> {target}{reason}")
     return "\n".join(lines)
-
 # ──────────────────────────────── @agent ──────────────────────────────────────
 
 def resolve_agent(args_str: str, cfg: dict, workspace: Path | None = None) -> str:
@@ -9758,13 +9778,31 @@ def _load_narrative(path: Path) -> tuple[dict, str]:
     return fm, body
 
 
+
+def _safe_fsync(path):
+    """Fsync file + parent directory for durability (#140)."""
+    try:
+        with open(path, "rb") as f:
+            os.fsync(f.fileno())
+    except OSError:
+        pass
+    try:
+        fd = os.open(str(path.parent), os.O_RDONLY)
+        os.fsync(fd)
+        os.close(fd)
+    except OSError:
+        pass
+
 def _save_narrative(path: Path, frontmatter: dict, body: str) -> None:
     """Atomically write the narrative file (temp + rename)."""
     path.parent.mkdir(parents=True, exist_ok=True)
     fm_yaml = yaml.safe_dump(frontmatter, default_flow_style=False, allow_unicode=True, sort_keys=False).strip()
     payload = f"---\n{fm_yaml}\n---\n\n{body.rstrip()}\n"
     tmp = path.with_suffix(path.suffix + ".tmp")
     tmp.write_text(payload, encoding="utf-8")
+    # #140: fsync temp file + parent directory before atomic rename to
+    # prevent narrative loss on system crash / power loss.
+    _safe_fsync(tmp)
     os.replace(tmp, path)
 
 

diff --git a/src/perseus/directives/query.py b/src/perseus/directives/query.py
@@ -172,6 +172,17 @@ def resolve_query(args_str: str, cfg: dict, workspace: "Path | None" = None) ->
         fallback = _unescape_fallback(fallback)
         raw = (raw[:fb_match.start()] + raw[fb_match.end():]).rstrip()
 
+    # #138: strip timeout=N modifier BEFORE command extraction to prevent
+    # it from leaking into the executed shell command.
+
+    # Extract timeout=N modifier (per-directive override, default 30s)
+    timeout = int(cfg["render"].get("query_timeout_s", 30))
+    tm_match = re.search(r'\s+timeout=(\d+)(?:\s|$)', raw)
+    if tm_match:
+        timeout = int(tm_match.group(1))
+        raw = (raw[:tm_match.start()] + raw[tm_match.end():]).rstrip()
+
+
     cmd_match = re.match(r'^"((?:[^"\\]|\\.)*)"', raw)   # double-quoted
     if not cmd_match:
         cmd_match = re.match(r"^'((?:[^'\\]|\\.)*)'", raw)  # single-quoted
@@ -193,13 +204,6 @@ def resolve_query(args_str: str, cfg: dict, workspace: "Path | None" = None) ->
                 command=cmd[:500],
                 shell=shell)
 
-    # Extract timeout=N modifier (per-directive override, default 30s)
-    timeout = int(cfg["render"].get("query_timeout_s", 30))
-    tm_match = re.search(r'\s+timeout=(\d+)(?:\s|$)', raw)
-    if tm_match:
-        timeout = int(tm_match.group(1))
-        raw = (raw[:tm_match.start()] + raw[tm_match.end():]).rstrip()
-
     try:
         # #139: when invoked under MCP's _call_tool timeout wrapper, the
         # wrapper needs to kill this subprocess (and any descendants) if
@@ -257,7 +261,7 @@ class _Result:
                 return fallback
             # #137: redact secrets out of `cmd` and `stderr` before interpolating
             # them into render output. Without this, a command like
-            # `@query "curl -H 'Authorization: Bearer ghp_…'"` leaks the bearer
+            # `@query "curl -H 'Authorization: Bearer *** leaks the bearer
             # token in the exit-nonzero header. Render-time redaction only runs
             # later in the pipeline and only on the final assembled output, but
             # by then this string has been logged elsewhere.
@@ -1112,5 +1116,4 @@ def format_prefetch_human(result: dict) -> str:
         reason = f" ({entry['reason']})" if entry.get("reason") else ""
         trigger = entry.get("trigger") or "no-trigger"
         lines.append(f"- {entry['status']}: {entry['rule']} {trigger} -> {target}{reason}")
-    return "\n".join(lines)
-
+    return "\n".join(lines)
diff --git a/src/perseus/mneme_narrative.py b/src/perseus/mneme_narrative.py
@@ -202,13 +202,31 @@ def _load_narrative(path: Path) -> tuple[dict, str]:
     return fm, body
 
 
+
+def _safe_fsync(path):
+    """Fsync file + parent directory for durability (#140)."""
+    try:
+        with open(path, "rb") as f:
+            os.fsync(f.fileno())
+    except OSError:
+        pass
+    try:
+        fd = os.open(str(path.parent), os.O_RDONLY)
+        os.fsync(fd)
+        os.close(fd)
+    except OSError:
+        pass
+
 def _save_narrative(path: Path, frontmatter: dict, body: str) -> None:
     """Atomically write the narrative file (temp + rename)."""
     path.parent.mkdir(parents=True, exist_ok=True)
     fm_yaml = yaml.safe_dump(frontmatter, default_flow_style=False, allow_unicode=True, sort_keys=False).strip()
     payload = f"---\n{fm_yaml}\n---\n\n{body.rstrip()}\n"
     tmp = path.with_suffix(path.suffix + ".tmp")
     tmp.write_text(payload, encoding="utf-8")
+    # #140: fsync temp file + parent directory before atomic rename to
+    # prevent narrative loss on system crash / power loss.
+    _safe_fsync(tmp)
     os.replace(tmp, path)
 
 

diff --git a/src/perseus/redaction.py b/src/perseus/redaction.py
@@ -38,8 +38,8 @@
     {"name": "aws_access_key_id", "pattern": r"\b(?:AKIA|ASIA)[0-9A-Z]{16}\b"},
     # Slack bot/user/app/refresh tokens
     {"name": "slack_token", "pattern": r"\bxox[abprso]-[A-Za-z0-9-]{10,}\b"},
-    # Generic bearer header value (Authorization: Bearer XXXX)
-    {"name": "bearer_header", "pattern": r"(?i)(authorization:\s*bearer\s+)[A-Za-z0-9._\-+/=]{16,}"},
+    # Generic bearer header value (Authorization: Bearer ***
+    {"name": "bearer_header", "pattern": r"(?i)(authorization:\s*bearer\s+)[A-Za-z0-9._\-+/=]{16,}", "_prefix_group": 1},
     # JWT (three base64url segments). Conservative: require non-trivial first segment.
     {"name": "jwt", "pattern": r"\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b"},
     # PEM private key block (covers RSA, EC, OPENSSH, generic)
@@ -57,6 +57,9 @@
     {"name": "long_hex_secret",
      "pattern": r"(?i)(?:secret|token|key|password|passwd|api[_-]?key|auth(?:orization)?)\s*[:=]\s*[\"']?([a-fA-F0-9]{40,})[\"']?",
      "_anchor_group": 1},
+    # Atlassian API token: ATATT3... (Confluence/Jira personal access tokens)
+    # See: https://github.com/tcconnally/perseus/issues/142
+    {"name": "atlassian_api_token", "pattern": r"\bATATT3[A-Za-z0-9+/=_-]{40,}\b"},
     # HuggingFace: hf_... (read/write tokens)
     {"name": "huggingface_token", "pattern": r"\bhf_[A-Za-z0-9]{30,}\b"},
     # Google Cloud API key: AIza...
@@ -127,11 +130,13 @@ def _compile_redaction_rules(cfg: dict) -> list[dict]:
         # behavior: group(1) (if present) is treated as a leading prefix to
         # preserve and the rest of the match is replaced.
         anchor_group = rule.get("_anchor_group")
+        prefix_group = rule.get("_prefix_group")
         compiled.append({
             "name": name,
             "regex": regex,
             "replacement": str(replacement),
             "anchor_group": anchor_group,
+            "prefix_group": prefix_group,
         })
     return compiled
 
@@ -188,8 +193,13 @@ def _sub(match, _repl=rule["replacement"], _ag=rule.get("anchor_group")):
                 rel_start = span_start - match.start()
                 rel_end = span_end - match.start()
                 return full[:rel_start] + _repl + full[rel_end:]
-            if match.lastindex:
-                return match.group(1) + _repl
+            # #141: prefix-preservation only for rules that explicitly
+            # declare _prefix_group (e.g. bearer_header). User-supplied
+            # patterns with accidental capture groups would silently
+            # truncate data under the old `match.lastindex` heuristic.
+            _pg = rule.get("prefix_group")
+            if _pg is not None and match.lastindex and match.lastindex >= _pg:
+                return match.group(_pg) + _repl
             return _repl
         out, n = regex.subn(_sub, out)
         if n:
@@ -238,5 +248,4 @@ def redact_value(value, cfg: dict) -> tuple[object, dict]:
             for name, count in rep.get("counts", {}).items():
                 counts[name] = counts.get(name, 0) + count
         return out, {"enabled": enabled, "total": total, "counts": counts, "rules_active": rules_active}
-    return value, {"enabled": True, "total": 0, "counts": {}, "rules_active": 0}
-
+    return value, {"enabled": True, "total": 0, "counts": {}, "rules_active": 0}
diff --git a/src/perseus/webhooks.py b/src/perseus/webhooks.py
@@ -139,6 +139,15 @@ def _webhook_worker(url, ep, wh_cfg, q):
             "version": version,
             "data": payload
         }
+        # #167: redact secrets from webhook payload before external delivery.
+        # Pre-1.0.6, directive args and output snippets in payload["data"]
+        # were sent verbatim to webhook endpoints, leaking secrets.
+        try:
+            from perseus.redaction import redact_text
+            redacted_data, _ = redact_text(payload, cfg)
+            body_dict["data"] = redacted_data
+        except Exception:
+            pass  # redaction failure must not block webhook delivery
         body_json = json.dumps(body_dict)
         body_data = body_json.encode("utf-8")