diff --git a/README.md b/README.md index bc6e30b..10e1919 100644 --- a/README.md +++ b/README.md @@ -30,9 +30,10 @@ | Cursor | ✅ | | Codex | ✅ | | Gemini CLI | ✅ † | +| Antigravity CLI | ✅ † | | Any hooks-capable agent | ✅ — point it at `notify.sh` | -† Gemini's tool-permission hook is observability-only — the banner shows the prompt, but the actual Allow / Deny click has to happen in Gemini's terminal. Claude Code, Cursor, and Codex permission events can be approved from the panel directly. +† Gemini CLI and Antigravity route tool-permission prompts through an observability-only hook — the banner shows the prompt, but the Allow / Deny click still has to happen in the agent's own terminal. Claude Code and Codex permission events can be approved from the panel directly. **Platforms:** macOS — full app with panel, click-to-focus banners, auto-update, quota tracking, voice. Linux (PulseAudio / ALSA / libnotify) and Windows (Git Bash / WSL) get audio + basic notifications via `notify.sh` only. @@ -75,7 +76,7 @@ cd stack-nudge **Prerequisites:** Python ≥ 3.10 (the bundled voice engine [stackvox](https://github.com/StackOneHQ/stackvox) requires it). -The installer auto-wires hooks for **Claude Code** (`~/.claude/settings.json`) and **Cursor** (`~/.cursor/hooks.json`). Gemini CLI and Codex are supported through the same `notify.sh` entry-point, but their hooks must be wired manually — see [Manual setup](#manual-setup) below. +The installer auto-wires hooks for every detected agent — **Claude Code** (`~/.claude`), **Cursor** (`~/.cursor`), **Codex** (`~/.codex`), **Gemini CLI** (`~/.gemini`), and **Antigravity CLI** (`~/.gemini/antigravity-cli`). Any other hooks-capable agent can be wired by hand — see [Manual setup](#manual-setup) below. ### From source (macOS dev) @@ -335,7 +336,7 @@ Same set of cleanups as the in-app path, useful when the .app isn't reachable or ## Manual setup -Claude Code, Cursor, Codex, and Gemini CLI are auto-wired by the first-launch wizard. For other hooks-capable agents (or to integrate from a custom script), all you need is to invoke `notify.sh ` from wherever your agent emits lifecycle events. `` should be `stop` (agent finished a turn) or `permission` (waiting for approval); `` can be anything — it just controls the banner title. +Claude Code, Cursor, Codex, Gemini CLI, and Antigravity CLI are auto-wired by the first-launch wizard. For other hooks-capable agents (or to integrate from a custom script), all you need is to invoke `notify.sh ` from wherever your agent emits lifecycle events. `` should be `stop` (agent finished a turn) or `permission` (waiting for approval); `` can be anything — it just controls the banner title. Example block in any agent's hooks config: diff --git a/Tests/StackNudgePanelCoreTests/CodexTranscriptReaderTests.swift b/Tests/StackNudgePanelCoreTests/CodexTranscriptReaderTests.swift new file mode 100644 index 0000000..deeb93c --- /dev/null +++ b/Tests/StackNudgePanelCoreTests/CodexTranscriptReaderTests.swift @@ -0,0 +1,79 @@ +import XCTest + +@testable import StackNudgePanelCore + +// CodexTranscriptReader parses Codex CLI rollout JSONL into the same +// TranscriptStats the Claude reader produces. The schema is fixed against +// Codex's own TokenUsage definition: context occupancy is +// last_token_usage.total_tokens - reasoning_output_tokens, and cached input +// is a subset of input (never summed). Fixtures here encode that contract so +// a Codex schema drift, or a regression to the cached-double-count bug, is +// caught without needing a live Codex session. +final class CodexTranscriptReaderTests: XCTestCase { + + private func writeRollout(_ lines: [String], name: String = "rollout-test.jsonl") -> String { + let dir = NSTemporaryDirectory() + "codex-rollout-\(UUID().uuidString)/" + try? FileManager.default.createDirectory(atPath: dir, withIntermediateDirectories: true) + let path = dir + name + try? (lines.joined(separator: "\n") + "\n") + .write(toFile: path, atomically: true, encoding: .utf8) + return path + } + + func test_read_usesContextOccupancyFromLatestTokenCount() { + let path = writeRollout([ + #"{"type":"session_meta","payload":{"id":"s1","model":"gpt-5-codex"}}"#, + #"{"type":"turn_context","payload":{"model":"gpt-5-codex"}}"#, + #"{"type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":10000,"cached_input_tokens":2000,"output_tokens":500,"reasoning_output_tokens":300,"total_tokens":10800},"last_token_usage":{"input_tokens":10000,"cached_input_tokens":2000,"output_tokens":500,"reasoning_output_tokens":300,"total_tokens":10800},"model_context_window":272000}}}"#, + #"{"type":"response_item","payload":{"role":"assistant"}}"#, + #"{"type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":60000,"cached_input_tokens":50000,"output_tokens":2000,"reasoning_output_tokens":1000,"total_tokens":63000},"last_token_usage":{"input_tokens":52000,"cached_input_tokens":48000,"output_tokens":1500,"reasoning_output_tokens":1000,"total_tokens":54500},"model_context_window":272000}}}"#, + ]) + + let actual = CodexTranscriptReader.read(path: path) + + // Latest token_count: last_token_usage.total_tokens - reasoning = 54500 - 1000. + XCTAssertEqual(actual?.tokens, 53500) + XCTAssertEqual(actual?.model, "gpt-5-codex") + } + + func test_read_ignoresCumulativeTotalAndCachedSubset() { + // A single turn where cached == input. If the reader wrongly summed + // cached into input, or used the cumulative total_token_usage, the + // number would differ. Occupancy must be last.total - last.reasoning. + let path = writeRollout([ + #"{"type":"turn_context","payload":{"model":"gpt-5"}}"#, + #"{"type":"event_msg","payload":{"type":"token_count","info":{"total_token_usage":{"input_tokens":900000,"cached_input_tokens":0,"output_tokens":40000,"reasoning_output_tokens":12000,"total_tokens":940000},"last_token_usage":{"input_tokens":30000,"cached_input_tokens":30000,"output_tokens":800,"reasoning_output_tokens":200,"total_tokens":30800},"model_context_window":400000}}}"#, + ]) + + let actual = CodexTranscriptReader.read(path: path) + + XCTAssertEqual(actual?.tokens, 30600) // 30800 - 200, not 940000-ish, not +cached + XCTAssertEqual(actual?.model, "gpt-5") + } + + func test_read_returnsNilWhenNoTokenCount() { + let path = writeRollout([ + #"{"type":"session_meta","payload":{"model":"gpt-5-codex"}}"#, + #"{"type":"response_item","payload":{"role":"user"}}"#, + ]) + + XCTAssertNil(CodexTranscriptReader.read(path: path)) + } + + func test_read_returnsNilForMissingFile() { + XCTAssertNil(CodexTranscriptReader.read(path: "/nonexistent/rollout-x.jsonl")) + } + + func test_dispatch_routesRolloutFilenameToCodexReader() { + // A Claude-shaped assistant line written into a rollout-* file. If the + // dispatcher routed by filename to the Codex reader (correct), it finds + // no token_count and returns nil. If it fell through to the Claude + // reader, it would parse the usage block and return tokens. nil proves + // the routing. + let path = writeRollout([ + #"{"type":"assistant","message":{"model":"claude-x","usage":{"input_tokens":5,"cache_creation_input_tokens":0,"cache_read_input_tokens":0}}}"#, + ], name: "rollout-abc.jsonl") + + XCTAssertNil(TranscriptReader.read(path: path)) + } +} diff --git a/build.sh b/build.sh index fd1ea92..0f5b421 100755 --- a/build.sh +++ b/build.sh @@ -239,6 +239,7 @@ build_app "$APP" "stack-nudge" \ panel/Sessions.swift \ panel/CompactView.swift \ panel/TranscriptStats.swift \ + panel/CodexTranscriptStats.swift \ panel/ModelLimits.swift \ panel/Phrases.swift \ panel/UpdateChecker.swift \ diff --git a/install.sh b/install.sh index 17dc27a..549bacf 100755 --- a/install.sh +++ b/install.sh @@ -95,6 +95,11 @@ if [[ -z "$PYTHON" ]]; then exit 1 fi if [[ ! -x "$VENV/bin/stackvox" ]]; then + # Clear any partial/incompatible venv first. `python -m venv` over an + # existing directory can fail with "[Errno 17] File exists" — e.g. a venv + # left by a different Python, or an interrupted earlier install — so make + # creation idempotent rather than aborting the whole install (set -e). + rm -rf "$VENV" "$PYTHON" -m venv "$VENV" "$VENV/bin/pip" install --quiet "$STACKVOX_SPEC" echo " Voice engine installed -> $VENV (using $PYTHON)" @@ -301,6 +306,57 @@ PY installed_any=true fi +# Codex +# Codex's hooks file shares Claude Code's matcher-group JSON shape and event +# names (Stop + PermissionRequest), in seconds — only the path and agent-arg +# differ. See https://developers.openai.com/codex/hooks +if [[ -d "$HOME/.codex" ]]; then + echo "" + echo "Detected Codex (~/.codex)" + python3 - "$HOME/.codex/hooks.json" "$NOTIFY" "codex" <<'PY' +import json, os, re, sys +from pathlib import Path + +path = Path(sys.argv[1]) +notify = sys.argv[2] +agent = sys.argv[3] +path.parent.mkdir(parents=True, exist_ok=True) +if path.exists(): + settings = json.loads(path.read_text() or "{}") +else: + settings = {} + +STALE = re.compile(r"(?:^|/)\.?(?:tinynudge|stack-nudge)/notify\.sh(?:\s|$)") + +hooks = settings.setdefault("hooks", {}) +# PermissionRequest blocks on a FIFO until the user approves via stack-nudge, +# so it needs a longer timeout than the default. +for event, arg, timeout in [("Stop", "stop", 30), ("PermissionRequest", "permission", 600)]: + groups = hooks.setdefault(event, []) + + cleaned = [] + for g in groups: + inner = g.get("hooks", []) + kept = [h for h in inner if not STALE.search(h.get("command", "") or "")] + if not kept: + continue + if kept != inner: + g = {**g, "hooks": kept} + cleaned.append(g) + groups[:] = cleaned + + cmd = f"{notify} {agent} {arg}" + groups.append({ + "matcher": "", + "hooks": [{"type": "command", "command": cmd, "timeout": timeout}], + }) + +path.write_text(json.dumps(settings, indent=2) + "\n") +print(f" Updated {path}") +PY + installed_any=true +fi + # Gemini CLI if [[ -d "$HOME/.gemini" ]]; then echo "" @@ -395,7 +451,7 @@ fi if [[ "$installed_any" == "false" ]]; then echo "" - echo "No supported agents detected (Claude Code, Cursor, Gemini CLI, Antigravity CLI)." + echo "No supported agents detected (Claude Code, Cursor, Codex, Gemini CLI, Antigravity CLI)." echo "Install one, then re-run this script." exit 0 fi diff --git a/notify.sh b/notify.sh index aadc18d..7b9d606 100755 --- a/notify.sh +++ b/notify.sh @@ -40,6 +40,18 @@ permission_context() { file=$(printf '%s' "$HOOK_JSON" | jq -r '.tool_input.file_path // empty' 2>/dev/null | sed 's|.*/||') [[ -n "$file" ]] && echo "${tool_name}: ${file}" ;; + apply_patch) + # Codex's edit tool. tool_input carries a patch envelope rather than a + # plain file_path; pull the first target file out of the patch body. + # tool_input may be the patch string itself or wrap it under .input/.patch. + local patch file + patch=$(printf '%s' "$HOOK_JSON" \ + | jq -r '.tool_input | if type=="string" then . else (.input // .patch // empty) end' 2>/dev/null) + file=$(printf '%s\n' "$patch" \ + | grep -m1 -oE '^\*\*\* (Add|Update|Delete) File: .+' \ + | sed -E 's/^.*File: //; s|.*/||') + if [[ -n "$file" ]]; then echo "apply_patch: ${file}"; else echo "apply_patch"; fi + ;; *) echo "$tool_name" ;; @@ -64,6 +76,15 @@ voice_permission_context() { file=$(printf '%s' "$HOOK_JSON" | jq -r '.tool_input.file_path // empty' 2>/dev/null | sed 's|.*/||') [[ -n "$file" ]] && echo "${tool_name}: ${file}" ;; + apply_patch) + local patch file + patch=$(printf '%s' "$HOOK_JSON" \ + | jq -r '.tool_input | if type=="string" then . else (.input // .patch // empty) end' 2>/dev/null) + file=$(printf '%s\n' "$patch" \ + | grep -m1 -oE '^\*\*\* (Add|Update|Delete) File: .+' \ + | sed -E 's/^.*File: //; s|.*/||') + if [[ -n "$file" ]]; then echo "apply_patch: ${file}"; else echo "Edit needs approval"; fi + ;; *) echo "$tool_name" ;; @@ -219,6 +240,21 @@ agent_label() { esac } +# True when this agent's permission hook blocks on stdout for an allow/deny +# decision, so the panel's Approve/Deny can actually drive it. Claude Code and +# Codex use the blocking PermissionRequest hook with the hookSpecificOutput +# decision schema. Gemini and Antigravity route permission alerts through the +# fire-and-forget Notification hook, which can't consume a decision — creating +# a FIFO and blocking on it for those just burns the hook's timeout budget and +# shows an Allow button that does nothing. (Phase 2 will add antigravity here +# once it's wired via the decision-capable PreToolUse hook.) +agent_supports_decision() { + case "$AGENT" in + claude-code|codex) return 0 ;; + *) return 1 ;; + esac +} + # Bundled voice engine paths. stackvox 0.3.x consolidated the CLI — there # is no separate `stackvox-say` console script anymore; speech goes through # `stackvox say ` as a subcommand. @@ -477,9 +513,12 @@ notify_macos() { -e "end tell" 2>/dev/null) fi + # Only offer the in-panel Allow/Deny (and block on a FIFO for the response) + # when the agent's hook can actually consume a decision. For observability- + # only agents the banner still shows; the user approves in the agent's own UI. local has_action="false" local fifo_path="" - if [[ "${EVENT}" == "permission" ]]; then + if [[ "${EVENT}" == "permission" ]] && agent_supports_decision; then has_action="true" fifo_path=$(create_perm_fifo) fi diff --git a/panel/CodexTranscriptStats.swift b/panel/CodexTranscriptStats.swift new file mode 100644 index 0000000..23998ed --- /dev/null +++ b/panel/CodexTranscriptStats.swift @@ -0,0 +1,83 @@ +import Foundation + +// Reads a Codex CLI rollout JSONL (~/.codex/sessions/YYYY/MM/DD/rollout-*.jsonl) +// and returns the same TranscriptStats the Claude reader produces, so the +// Sessions/Compact UI shows context usage for Codex sessions identically. +// +// Codex differs from Claude Code's transcript in three ways that matter here: +// • Usage lives in `event_msg` lines whose payload.type == "token_count", +// not in assistant messages. `last_token_usage` is the latest turn's +// snapshot; `total_token_usage` is cumulative across the whole session +// (useful for cost, wrong for "how full is the window now"). +// • Context-window occupancy is `total_tokens - reasoning_output_tokens`, +// mirroring Codex's own TokenUsage::tokens_in_context_window() — reasoning +// tokens don't persist in the window between turns. +// • `input_tokens` already includes `cached_input_tokens` (cached is a +// subset, not additive), so we must not sum them — the mistake that +// produces the well-known token-inflation bugs in third-party parsers. +// +// The active model is stamped on `turn_context` lines (and `session_meta`). +enum CodexTranscriptReader { + + // Read the whole file and scan newest-first, same approach (and same + // tens-of-MB caveat) as the Claude reader. The latest token_count gives + // current context occupancy; the latest model-bearing line gives the + // active model. Returns nil for unreadable files or rollouts with no + // token_count event yet. + static func read(path: String) -> TranscriptStats? { + guard let data = try? Data(contentsOf: URL(fileURLWithPath: path), options: .mappedIfSafe), + let text = String(data: data, encoding: .utf8) + else { return nil } + + let lines = text.split(separator: "\n", omittingEmptySubsequences: true) + + var tokens: Int? + var model: String? + + for line in lines.reversed() { + if tokens != nil && model != nil { break } + guard let lineData = line.data(using: .utf8), + let obj = try? JSONSerialization.jsonObject(with: lineData) as? [String: Any] + else { continue } + + let payload = obj["payload"] as? [String: Any] + let payloadType = (payload?["type"] as? String) ?? (obj["type"] as? String) + + if tokens == nil, + payloadType == "token_count", + let info = payload?["info"] as? [String: Any], + let last = info["last_token_usage"] as? [String: Any] { + let total = intValue(last["total_tokens"]) + let reasoning = intValue(last["reasoning_output_tokens"]) + if total > 0 { tokens = max(0, total - reasoning) } + } + + if model == nil { + model = modelFrom(obj: obj, payload: payload) + } + } + + guard let tokens else { return nil } + return TranscriptStats(tokens: tokens, model: model) + } + + // Codex stamps the active model on turn_context lines; session_meta carries + // it for the session as a whole. Nesting has shifted across Codex versions, + // so probe the few known shapes defensively and ignore anything else. + private static func modelFrom(obj: [String: Any], payload: [String: Any]?) -> String? { + if let model = payload?["model"] as? String, !model.isEmpty { return model } + if let turnContext = payload?["turn_context"] as? [String: Any], + let model = turnContext["model"] as? String, !model.isEmpty { return model } + if let model = obj["model"] as? String, !model.isEmpty { return model } + return nil + } + + // Rollout numbers are emitted as JSON integers, but decode defensively in + // case a writer or version serialises them as doubles. + private static func intValue(_ value: Any?) -> Int { + if let int = value as? Int { return int } + if let number = value as? NSNumber { return number.intValue } + if let double = value as? Double { return Int(double) } + return 0 + } +} diff --git a/panel/TranscriptStats.swift b/panel/TranscriptStats.swift index 2630e19..8a3e923 100644 --- a/panel/TranscriptStats.swift +++ b/panel/TranscriptStats.swift @@ -14,17 +14,31 @@ struct TranscriptStats: Equatable { enum TranscriptReader { - // Read the transcript at `path`, scan from the end for the most - // recent assistant message with a usage block, and return its - // stats. Returns nil for unreadable files, empty transcripts, or - // transcripts that don't yet contain an assistant message. + // Dispatch by transcript path. Codex rollout files live under + // ~/.codex/sessions/.../rollout-*.jsonl and use a different JSONL schema + // (token_count events rather than assistant-message usage blocks), so they + // route to CodexTranscriptReader. Everything else is a Claude Code + // transcript. Both return the same TranscriptStats shape so the Sessions + // and Compact views render context usage identically across agents. + static func read(path: String) -> TranscriptStats? { + if path.contains("/.codex/") + || (path as NSString).lastPathComponent.hasPrefix("rollout-") { + return CodexTranscriptReader.read(path: path) + } + return readClaude(path: path) + } + + // Read a Claude Code transcript at `path`, scan from the end for the most + // recent assistant message with a usage block, and return its stats. + // Returns nil for unreadable files, empty transcripts, or transcripts that + // don't yet contain an assistant message. // // We read the whole file rather than tail-seeking — Claude Code // transcripts are typically a few MB even for long sessions, and // tail-seeking JSONL safely requires byte-by-byte reverse scanning // to find a newline boundary. If transcripts grow to tens of MB // in practice we can revisit. - static func read(path: String) -> TranscriptStats? { + private static func readClaude(path: String) -> TranscriptStats? { guard let data = try? Data(contentsOf: URL(fileURLWithPath: path), options: .mappedIfSafe), let text = String(data: data, encoding: .utf8) else { return nil } diff --git a/uninstall.sh b/uninstall.sh index 76ddc5c..eb4c9b8 100755 --- a/uninstall.sh +++ b/uninstall.sh @@ -130,6 +130,36 @@ print(f" Cleaned {path}") PY fi +# Remove hooks from Codex. Same matcher-group shape as Claude/Gemini. +if [[ -f "$HOME/.codex/hooks.json" ]]; then + python3 - "$HOME/.codex/hooks.json" <<'PY' +import json, re, sys +from pathlib import Path + +path = Path(sys.argv[1]) +STALE = re.compile(r'(?:^|/|")\.?(?:tinynudge|stack-nudge)/notify\.sh') +settings = json.loads(path.read_text()) +hooks = settings.get("hooks", {}) +for event in list(hooks.keys()): + cleaned = [] + for g in hooks[event]: + inner = g.get("hooks", []) + kept = [h for h in inner if not STALE.search(h.get("command", "") or "")] + if not kept: + continue + if kept != inner: + g = {**g, "hooks": kept} + cleaned.append(g) + hooks[event] = cleaned + if not hooks[event]: + del hooks[event] +if not hooks: + settings.pop("hooks", None) +path.write_text(json.dumps(settings, indent=2) + "\n") +print(f" Cleaned {path}") +PY +fi + # Stop and remove launchd agents (macOS) for label in com.stackonehq.stack-nudge com.stackonehq.stack-nudge-daemon com.stackonehq.stack-nudge-panel; do plist="$HOME/Library/LaunchAgents/${label}.plist"