Merge pull request #74 from StackOneHQ/feat/codex-compat-parity

StuBehan · web-flow · commit a9459fab25d4 · 2026-06-08T16:58:56.000+01:00
feat(): codex parity with claude code session information
diff --git a/README.md b/README.md
@@ -30,9 +30,10 @@
 | Cursor | ✅ |
 | Codex | ✅ |
 | Gemini CLI | ✅ † |
+| Antigravity CLI | ✅ † |
 | Any hooks-capable agent | ✅ — point it at `notify.sh` |
 
-† Gemini's tool-permission hook is observability-only — the banner shows the prompt, but the actual Allow / Deny click has to happen in Gemini's terminal. Claude Code, Cursor, and Codex permission events can be approved from the panel directly.
+† Gemini CLI and Antigravity route tool-permission prompts through an observability-only hook — the banner shows the prompt, but the Allow / Deny click still has to happen in the agent's own terminal. Claude Code and Codex permission events can be approved from the panel directly.
 
 **Platforms:** macOS — full app with panel, click-to-focus banners, auto-update, quota tracking, voice. Linux (PulseAudio / ALSA / libnotify) and Windows (Git Bash / WSL) get audio + basic notifications via `notify.sh` only.
 
@@ -75,7 +76,7 @@ cd stack-nudge
 
 **Prerequisites:** Python ≥ 3.10 (the bundled voice engine [stackvox](https://github.com/StackOneHQ/stackvox) requires it).
 
-The installer auto-wires hooks for **Claude Code** (`~/.claude/settings.json`) and **Cursor** (`~/.cursor/hooks.json`). Gemini CLI and Codex are supported through the same `notify.sh` entry-point, but their hooks must be wired manually — see [Manual setup](#manual-setup) below.
+The installer auto-wires hooks for every detected agent — **Claude Code** (`~/.claude`), **Cursor** (`~/.cursor`), **Codex** (`~/.codex`), **Gemini CLI** (`~/.gemini`), and **Antigravity CLI** (`~/.gemini/antigravity-cli`). Any other hooks-capable agent can be wired by hand — see [Manual setup](#manual-setup) below.
 
 ### From source (macOS dev)
 
@@ -335,7 +336,7 @@ Same set of cleanups as the in-app path, useful when the .app isn't reachable or
 
 ## Manual setup
 
-Claude Code, Cursor, Codex, and Gemini CLI are auto-wired by the first-launch wizard. For other hooks-capable agents (or to integrate from a custom script), all you need is to invoke `notify.sh <agent-label> <event>` from wherever your agent emits lifecycle events. `<event>` should be `stop` (agent finished a turn) or `permission` (waiting for approval); `<agent-label>` can be anything — it just controls the banner title.
+Claude Code, Cursor, Codex, Gemini CLI, and Antigravity CLI are auto-wired by the first-launch wizard. For other hooks-capable agents (or to integrate from a custom script), all you need is to invoke `notify.sh <agent-label> <event>` from wherever your agent emits lifecycle events. `<event>` should be `stop` (agent finished a turn) or `permission` (waiting for approval); `<agent-label>` can be anything — it just controls the banner title.
 
 Example block in any agent's hooks config:
 
diff --git a/Tests/StackNudgePanelCoreTests/CodexTranscriptReaderTests.swift b/Tests/StackNudgePanelCoreTests/CodexTranscriptReaderTests.swift
@@ -0,0 +1,79 @@
+import XCTest
+
+@testable import StackNudgePanelCore
+
+// CodexTranscriptReader parses Codex CLI rollout JSONL into the same
+// TranscriptStats the Claude reader produces. The schema is fixed against
+// Codex's own TokenUsage definition: context occupancy is
+// last_token_usage.total_tokens - reasoning_output_tokens, and cached input
+// is a subset of input (never summed). Fixtures here encode that contract so
+// a Codex schema drift, or a regression to the cached-double-count bug, is
+// caught without needing a live Codex session.
+final class CodexTranscriptReaderTests: XCTestCase {
+
+    private func writeRollout(_ lines: [String], name: String = "rollout-test.jsonl") -> String {
+        let dir = NSTemporaryDirectory() + "codex-rollout-\(UUID().uuidString)/"
+        try? FileManager.default.createDirectory(atPath: dir, withIntermediateDirectories: true)
+        let path = dir + name
+        try? (lines.joined(separator: "\n") + "\n")
+            .write(toFile: path, atomically: true, encoding: .utf8)
+        return path
+    }
+
+    func test_read_usesContextOccupancyFromLatestTokenCount() {
+        let path = writeRollout([
+            #"{"type":"session_meta","payload":{"id":"s1","model":"gpt-5-codex"}}"#,
+            #"{"type":"turn_context","payload":{"model":"gpt-5-codex"}}"#,
+            #"{"type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":10000,"cached_input_tokens":2000,"output_tokens":500,"reasoning_output_tokens":300,"total_tokens":10800},"last_token_usage":{"input_tokens":10000,"cached_input_tokens":2000,"output_tokens":500,"reasoning_output_tokens":300,"total_tokens":10800},"model_context_window":272000}}}"#,
+            #"{"type":"response_item","payload":{"role":"assistant"}}"#,
+            #"{"type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":60000,"cached_input_tokens":50000,"output_tokens":2000,"reasoning_output_tokens":1000,"total_tokens":63000},"last_token_usage":{"input_tokens":52000,"cached_input_tokens":48000,"output_tokens":1500,"reasoning_output_tokens":1000,"total_tokens":54500},"model_context_window":272000}}}"#,
+        ])
+
+        let actual = CodexTranscriptReader.read(path: path)
+
+        // Latest token_count: last_token_usage.total_tokens - reasoning = 54500 - 1000.
+        XCTAssertEqual(actual?.tokens, 53500)
+        XCTAssertEqual(actual?.model, "gpt-5-codex")
+    }
+
+    func test_read_ignoresCumulativeTotalAndCachedSubset() {
+        // A single turn where cached == input. If the reader wrongly summed
+        // cached into input, or used the cumulative total_token_usage, the
+        // number would differ. Occupancy must be last.total - last.reasoning.
+        let path = writeRollout([
+            #"{"type":"turn_context","payload":{"model":"gpt-5"}}"#,
+            #"{"type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":900000,"cached_input_tokens":0,"output_tokens":40000,"reasoning_output_tokens":12000,"total_tokens":940000},"last_token_usage":{"input_tokens":30000,"cached_input_tokens":30000,"output_tokens":800,"reasoning_output_tokens":200,"total_tokens":30800},"model_context_window":400000}}}"#,
+        ])
+
+        let actual = CodexTranscriptReader.read(path: path)
+
+        XCTAssertEqual(actual?.tokens, 30600)  // 30800 - 200, not 940000-ish, not +cached
+        XCTAssertEqual(actual?.model, "gpt-5")
+    }
+
+    func test_read_returnsNilWhenNoTokenCount() {
+        let path = writeRollout([
+            #"{"type":"session_meta","payload":{"model":"gpt-5-codex"}}"#,
+            #"{"type":"response_item","payload":{"role":"user"}}"#,
+        ])
+
+        XCTAssertNil(CodexTranscriptReader.read(path: path))
+    }
+
+    func test_read_returnsNilForMissingFile() {
+        XCTAssertNil(CodexTranscriptReader.read(path: "/nonexistent/rollout-x.jsonl"))
+    }
+
+    func test_dispatch_routesRolloutFilenameToCodexReader() {
+        // A Claude-shaped assistant line written into a rollout-* file. If the
+        // dispatcher routed by filename to the Codex reader (correct), it finds
+        // no token_count and returns nil. If it fell through to the Claude
+        // reader, it would parse the usage block and return tokens. nil proves
+        // the routing.
+        let path = writeRollout([
+            #"{"type":"assistant","message":{"model":"claude-x","usage":{"input_tokens":5,"cache_creation_input_tokens":0,"cache_read_input_tokens":0}}}"#,
+        ], name: "rollout-abc.jsonl")
+
+        XCTAssertNil(TranscriptReader.read(path: path))
+    }
+}
diff --git a/build.sh b/build.sh
@@ -239,6 +239,7 @@ build_app "$APP" "stack-nudge" \
   panel/Sessions.swift \
   panel/CompactView.swift \
   panel/TranscriptStats.swift \
+  panel/CodexTranscriptStats.swift \
   panel/ModelLimits.swift \
   panel/Phrases.swift \
   panel/UpdateChecker.swift \
diff --git a/install.sh b/install.sh
@@ -95,6 +95,11 @@ if [[ -z "$PYTHON" ]]; then
   exit 1
 fi
 if [[ ! -x "$VENV/bin/stackvox" ]]; then
+  # Clear any partial/incompatible venv first. `python -m venv` over an
+  # existing directory can fail with "[Errno 17] File exists" — e.g. a venv
+  # left by a different Python, or an interrupted earlier install — so make
+  # creation idempotent rather than aborting the whole install (set -e).
+  rm -rf "$VENV"
   "$PYTHON" -m venv "$VENV"
   "$VENV/bin/pip" install --quiet "$STACKVOX_SPEC"
   echo "  Voice engine installed -> $VENV  (using $PYTHON)"
@@ -301,6 +306,57 @@ PY
   installed_any=true
 fi
 
+# Codex
+# Codex's hooks file shares Claude Code's matcher-group JSON shape and event
+# names (Stop + PermissionRequest), in seconds — only the path and agent-arg
+# differ. See https://developers.openai.com/codex/hooks
+if [[ -d "$HOME/.codex" ]]; then
+  echo ""
+  echo "Detected Codex (~/.codex)"
+  python3 - "$HOME/.codex/hooks.json" "$NOTIFY" "codex" <<'PY'
+import json, os, re, sys
+from pathlib import Path
+
+path = Path(sys.argv[1])
+notify = sys.argv[2]
+agent = sys.argv[3]
+path.parent.mkdir(parents=True, exist_ok=True)
+if path.exists():
+    settings = json.loads(path.read_text() or "{}")
+else:
+    settings = {}
+
+STALE = re.compile(r"(?:^|/)\.?(?:tinynudge|stack-nudge)/notify\.sh(?:\s|$)")
+
+hooks = settings.setdefault("hooks", {})
+# PermissionRequest blocks on a FIFO until the user approves via stack-nudge,
+# so it needs a longer timeout than the default.
+for event, arg, timeout in [("Stop", "stop", 30), ("PermissionRequest", "permission", 600)]:
+    groups = hooks.setdefault(event, [])
+
+    cleaned = []
+    for g in groups:
+        inner = g.get("hooks", [])
+        kept = [h for h in inner if not STALE.search(h.get("command", "") or "")]
+        if not kept:
+            continue
+        if kept != inner:
+            g = {**g, "hooks": kept}
+        cleaned.append(g)
+    groups[:] = cleaned
+
+    cmd = f"{notify} {agent} {arg}"
+    groups.append({
+        "matcher": "",
+        "hooks": [{"type": "command", "command": cmd, "timeout": timeout}],
+    })
+
+path.write_text(json.dumps(settings, indent=2) + "\n")
+print(f"  Updated {path}")
+PY
+  installed_any=true
+fi
+
 # Gemini CLI
 if [[ -d "$HOME/.gemini" ]]; then
   echo ""
@@ -395,7 +451,7 @@ fi
 
 if [[ "$installed_any" == "false" ]]; then
   echo ""
-  echo "No supported agents detected (Claude Code, Cursor, Gemini CLI, Antigravity CLI)."
+  echo "No supported agents detected (Claude Code, Cursor, Codex, Gemini CLI, Antigravity CLI)."
   echo "Install one, then re-run this script."
   exit 0
 fi
diff --git a/notify.sh b/notify.sh
@@ -40,6 +40,18 @@ permission_context() {
       file=$(printf '%s' "$HOOK_JSON" | jq -r '.tool_input.file_path // empty' 2>/dev/null | sed 's|.*/||')
       [[ -n "$file" ]] && echo "${tool_name}: ${file}"
       ;;
+    apply_patch)
+      # Codex's edit tool. tool_input carries a patch envelope rather than a
+      # plain file_path; pull the first target file out of the patch body.
+      # tool_input may be the patch string itself or wrap it under .input/.patch.
+      local patch file
+      patch=$(printf '%s' "$HOOK_JSON" \
+        | jq -r '.tool_input | if type=="string" then . else (.input // .patch // empty) end' 2>/dev/null)
+      file=$(printf '%s\n' "$patch" \
+        | grep -m1 -oE '^\*\*\* (Add|Update|Delete) File: .+' \
+        | sed -E 's/^.*File: //; s|.*/||')
+      if [[ -n "$file" ]]; then echo "apply_patch: ${file}"; else echo "apply_patch"; fi
+      ;;
     *)
       echo "$tool_name"
       ;;
@@ -64,6 +76,15 @@ voice_permission_context() {
       file=$(printf '%s' "$HOOK_JSON" | jq -r '.tool_input.file_path // empty' 2>/dev/null | sed 's|.*/||')
       [[ -n "$file" ]] && echo "${tool_name}: ${file}"
       ;;
+    apply_patch)
+      local patch file
+      patch=$(printf '%s' "$HOOK_JSON" \
+        | jq -r '.tool_input | if type=="string" then . else (.input // .patch // empty) end' 2>/dev/null)
+      file=$(printf '%s\n' "$patch" \
+        | grep -m1 -oE '^\*\*\* (Add|Update|Delete) File: .+' \
+        | sed -E 's/^.*File: //; s|.*/||')
+      if [[ -n "$file" ]]; then echo "apply_patch: ${file}"; else echo "Edit needs approval"; fi
+      ;;
     *)
       echo "$tool_name"
       ;;
@@ -219,6 +240,21 @@ agent_label() {
   esac
 }
 
+# True when this agent's permission hook blocks on stdout for an allow/deny
+# decision, so the panel's Approve/Deny can actually drive it. Claude Code and
+# Codex use the blocking PermissionRequest hook with the hookSpecificOutput
+# decision schema. Gemini and Antigravity route permission alerts through the
+# fire-and-forget Notification hook, which can't consume a decision — creating
+# a FIFO and blocking on it for those just burns the hook's timeout budget and
+# shows an Allow button that does nothing. (Phase 2 will add antigravity here
+# once it's wired via the decision-capable PreToolUse hook.)
+agent_supports_decision() {
+  case "$AGENT" in
+    claude-code|codex) return 0 ;;
+    *)                 return 1 ;;
+  esac
+}
+
 # Bundled voice engine paths. stackvox 0.3.x consolidated the CLI — there
 # is no separate `stackvox-say` console script anymore; speech goes through
 # `stackvox say <text>` as a subcommand.
@@ -477,9 +513,12 @@ notify_macos() {
       -e "end tell" 2>/dev/null)
   fi
 
+  # Only offer the in-panel Allow/Deny (and block on a FIFO for the response)
+  # when the agent's hook can actually consume a decision. For observability-
+  # only agents the banner still shows; the user approves in the agent's own UI.
   local has_action="false"
   local fifo_path=""
-  if [[ "${EVENT}" == "permission" ]]; then
+  if [[ "${EVENT}" == "permission" ]] && agent_supports_decision; then
     has_action="true"
     fifo_path=$(create_perm_fifo)
   fi
diff --git a/panel/CodexTranscriptStats.swift b/panel/CodexTranscriptStats.swift
@@ -0,0 +1,83 @@
+import Foundation
+
+// Reads a Codex CLI rollout JSONL (~/.codex/sessions/YYYY/MM/DD/rollout-*.jsonl)
+// and returns the same TranscriptStats the Claude reader produces, so the
+// Sessions/Compact UI shows context usage for Codex sessions identically.
+//
+// Codex differs from Claude Code's transcript in three ways that matter here:
+//   • Usage lives in `event_msg` lines whose payload.type == "token_count",
+//     not in assistant messages. `last_token_usage` is the latest turn's
+//     snapshot; `total_token_usage` is cumulative across the whole session
+//     (useful for cost, wrong for "how full is the window now").
+//   • Context-window occupancy is `total_tokens - reasoning_output_tokens`,
+//     mirroring Codex's own TokenUsage::tokens_in_context_window() — reasoning
+//     tokens don't persist in the window between turns.
+//   • `input_tokens` already includes `cached_input_tokens` (cached is a
+//     subset, not additive), so we must not sum them — the mistake that
+//     produces the well-known token-inflation bugs in third-party parsers.
+//
+// The active model is stamped on `turn_context` lines (and `session_meta`).
+enum CodexTranscriptReader {
+
+    // Read the whole file and scan newest-first, same approach (and same
+    // tens-of-MB caveat) as the Claude reader. The latest token_count gives
+    // current context occupancy; the latest model-bearing line gives the
+    // active model. Returns nil for unreadable files or rollouts with no
+    // token_count event yet.
+    static func read(path: String) -> TranscriptStats? {
+        guard let data = try? Data(contentsOf: URL(fileURLWithPath: path), options: .mappedIfSafe),
+              let text = String(data: data, encoding: .utf8)
+        else { return nil }
+
+        let lines = text.split(separator: "\n", omittingEmptySubsequences: true)
+
+        var tokens: Int?
+        var model: String?
+
+        for line in lines.reversed() {
+            if tokens != nil && model != nil { break }
+            guard let lineData = line.data(using: .utf8),
+                  let obj = try? JSONSerialization.jsonObject(with: lineData) as? [String: Any]
+            else { continue }
+
+            let payload = obj["payload"] as? [String: Any]
+            let payloadType = (payload?["type"] as? String) ?? (obj["type"] as? String)
+
+            if tokens == nil,
+               payloadType == "token_count",
+               let info = payload?["info"] as? [String: Any],
+               let last = info["last_token_usage"] as? [String: Any] {
+                let total = intValue(last["total_tokens"])
+                let reasoning = intValue(last["reasoning_output_tokens"])
+                if total > 0 { tokens = max(0, total - reasoning) }
+            }
+
+            if model == nil {
+                model = modelFrom(obj: obj, payload: payload)
+            }
+        }
+
+        guard let tokens else { return nil }
+        return TranscriptStats(tokens: tokens, model: model)
+    }
+
+    // Codex stamps the active model on turn_context lines; session_meta carries
+    // it for the session as a whole. Nesting has shifted across Codex versions,
+    // so probe the few known shapes defensively and ignore anything else.
+    private static func modelFrom(obj: [String: Any], payload: [String: Any]?) -> String? {
+        if let model = payload?["model"] as? String, !model.isEmpty { return model }
+        if let turnContext = payload?["turn_context"] as? [String: Any],
+           let model = turnContext["model"] as? String, !model.isEmpty { return model }
+        if let model = obj["model"] as? String, !model.isEmpty { return model }
+        return nil
+    }
+
+    // Rollout numbers are emitted as JSON integers, but decode defensively in
+    // case a writer or version serialises them as doubles.
+    private static func intValue(_ value: Any?) -> Int {
+        if let int = value as? Int { return int }
+        if let number = value as? NSNumber { return number.intValue }
+        if let double = value as? Double { return Int(double) }
+        return 0
+    }
+}
diff --git a/panel/TranscriptStats.swift b/panel/TranscriptStats.swift
@@ -14,17 +14,31 @@ struct TranscriptStats: Equatable {
 
 enum TranscriptReader {
 
-    // Read the transcript at `path`, scan from the end for the most
-    // recent assistant message with a usage block, and return its
-    // stats. Returns nil for unreadable files, empty transcripts, or
-    // transcripts that don't yet contain an assistant message.
+    // Dispatch by transcript path. Codex rollout files live under
+    // ~/.codex/sessions/.../rollout-*.jsonl and use a different JSONL schema
+    // (token_count events rather than assistant-message usage blocks), so they
+    // route to CodexTranscriptReader. Everything else is a Claude Code
+    // transcript. Both return the same TranscriptStats shape so the Sessions
+    // and Compact views render context usage identically across agents.
+    static func read(path: String) -> TranscriptStats? {
+        if path.contains("/.codex/")
+            || (path as NSString).lastPathComponent.hasPrefix("rollout-") {
+            return CodexTranscriptReader.read(path: path)
+        }
+        return readClaude(path: path)
+    }
+
+    // Read a Claude Code transcript at `path`, scan from the end for the most
+    // recent assistant message with a usage block, and return its stats.
+    // Returns nil for unreadable files, empty transcripts, or transcripts that
+    // don't yet contain an assistant message.
     //
     // We read the whole file rather than tail-seeking — Claude Code
     // transcripts are typically a few MB even for long sessions, and
     // tail-seeking JSONL safely requires byte-by-byte reverse scanning
     // to find a newline boundary. If transcripts grow to tens of MB
     // in practice we can revisit.
-    static func read(path: String) -> TranscriptStats? {
+    private static func readClaude(path: String) -> TranscriptStats? {
         guard let data = try? Data(contentsOf: URL(fileURLWithPath: path), options: .mappedIfSafe),
               let text = String(data: data, encoding: .utf8)
         else { return nil }
diff --git a/uninstall.sh b/uninstall.sh