From 4a43f4b3f2e01fcf6d045224c3308fabf2a77a36 Mon Sep 17 00:00:00 2001
From: john988 <john9882001@yahoo.com>
Date: Wed, 17 Jun 2026 10:06:30 +0800
Subject: [PATCH 1/8] feat(scanner): attribute subagent usage (is_subagent,
 agent_id, agents table)

Re-port of the subagent-attribution feature onto v1.4.0. Adds per-turn
is_subagent / agent_id columns and an agents dispatch table (agent_id ->
agent_type, captured from the parent's toolUseResult), populated in both the
full and incremental scan paths. Subagents are detected via isSidechain /
agentId / a transcript path under a subagents/ directory. Schema changes are
additive (ALTER columns + new table), so existing DBs migrate in place.

Tests: 124 pass (8 new in tests/test_subagent.py). Verified on real
transcripts: 11,428 subagent turns and 438 agent dispatches detected.
---
 scanner.py             | 142 +++++++++++++++++++++++++++++++--
 tests/test_scanner.py  |  28 +++----
 tests/test_subagent.py | 173 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 322 insertions(+), 21 deletions(-)
 create mode 100644 tests/test_subagent.py

diff --git a/scanner.py b/scanner.py
index 72747cda..b4ad0b41 100644
--- a/scanner.py
+++ b/scanner.py
@@ -74,7 +74,9 @@ def init_db(conn):
             cache_creation_tokens   INTEGER DEFAULT 0,
             tool_name               TEXT,
             cwd                     TEXT,
-            message_id              TEXT
+            message_id              TEXT,
+            is_subagent             INTEGER DEFAULT 0,
+            agent_id                TEXT
         );
 
         CREATE TABLE IF NOT EXISTS processed_files (
@@ -83,15 +85,32 @@ def init_db(conn):
             lines   INTEGER
         );
 
+        CREATE TABLE IF NOT EXISTS agents (
+            agent_id              TEXT PRIMARY KEY,
+            agent_type            TEXT,
+            dispatched_in_session TEXT,
+            completed_at          TEXT,
+            status                TEXT,
+            total_tokens          INTEGER,
+            total_duration_ms     INTEGER,
+            tool_use_count        INTEGER
+        );
+
         CREATE INDEX IF NOT EXISTS idx_turns_session ON turns(session_id);
         CREATE INDEX IF NOT EXISTS idx_turns_timestamp ON turns(timestamp);
         CREATE INDEX IF NOT EXISTS idx_sessions_first ON sessions(first_timestamp);
+        CREATE INDEX IF NOT EXISTS idx_agents_type ON agents(agent_type);
     """)
     # Add message_id column if upgrading from older schema
     try:
         conn.execute("SELECT message_id FROM turns LIMIT 1")
     except sqlite3.OperationalError:
         conn.execute("ALTER TABLE turns ADD COLUMN message_id TEXT")
+    # Subagent attribution columns (added in a later schema version)
+    _ensure_column(conn, "turns", "is_subagent", "INTEGER DEFAULT 0")
+    _ensure_column(conn, "turns", "agent_id", "TEXT")
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_turns_subagent ON turns(is_subagent)")
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_turns_agent_id ON turns(agent_id)")
     # Conditional unique index: only dedup non-null message IDs
     conn.execute("""
         CREATE UNIQUE INDEX IF NOT EXISTS idx_turns_message_id
@@ -100,6 +119,13 @@ def init_db(conn):
     conn.commit()
 
 
+def _ensure_column(conn, table, column, decl):
+    """Add a column to an existing table if it isn't already present."""
+    cols = {r["name"] for r in conn.execute(f"PRAGMA table_info({table})")}
+    if column not in cols:
+        conn.execute(f"ALTER TABLE {table} ADD COLUMN {column} {decl}")
+
+
 def project_name_from_cwd(cwd):
     """Derive a friendly project name from cwd path."""
     if not cwd:
@@ -111,8 +137,90 @@ def project_name_from_cwd(cwd):
     return parts[-1] if parts else "unknown"
 
 
+def is_subagent_record(record, source_path=""):
+    """True if a record belongs to a dispatched subagent (Task/Agent tool).
+
+    Subagents are detected three ways: an explicit ``isSidechain`` flag, an
+    ``agentId`` on the record (or its ``data`` wrapper), or a transcript path
+    under a ``subagents`` directory (Claude Code writes one jsonl per subagent).
+    """
+    if record.get("isSidechain"):
+        return True
+    if record.get("agentId"):
+        return True
+    data = record.get("data")
+    if isinstance(data, dict) and data.get("agentId"):
+        return True
+    sp = str(source_path).replace("\\", "/").lower()
+    return "/subagents/" in sp
+
+
+def record_agent_id(record):
+    """Pull the subagent id off a record, if any (top-level or data wrapper)."""
+    agent_id = record.get("agentId")
+    if not agent_id:
+        data = record.get("data")
+        if isinstance(data, dict):
+            agent_id = data.get("agentId")
+    return agent_id
+
+
+def extract_agent_dispatch(record):
+    """Pull subagent identity from a parent's tool_result record.
+
+    Claude Code writes a ``toolUseResult`` dict on the user-side record that
+    closes out an Agent/Task tool invocation. It carries ``agentId`` (matching
+    the subagent jsonl's records) and ``agentType`` (the human-readable type
+    such as 'general-purpose' or 'Explore') plus aggregate stats.
+    """
+    if record.get("type") != "user":
+        return None
+    tur = record.get("toolUseResult")
+    if not isinstance(tur, dict):
+        return None
+    agent_id = tur.get("agentId")
+    agent_type = tur.get("agentType")
+    if not agent_id or not agent_type:
+        return None
+    return {
+        "agent_id": agent_id,
+        "agent_type": agent_type,
+        "dispatched_in_session": record.get("sessionId"),
+        "completed_at": record.get("timestamp", ""),
+        "status": tur.get("status"),
+        "total_tokens": tur.get("totalTokens"),
+        "total_duration_ms": tur.get("totalDurationMs"),
+        "tool_use_count": tur.get("totalToolUseCount"),
+    }
+
+
+def upsert_agents(conn, agents):
+    """Insert or update agent dispatch metadata. Last write wins per agent_id."""
+    if not agents:
+        return
+    conn.executemany("""
+        INSERT INTO agents
+            (agent_id, agent_type, dispatched_in_session, completed_at,
+             status, total_tokens, total_duration_ms, tool_use_count)
+        VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+        ON CONFLICT(agent_id) DO UPDATE SET
+            agent_type            = excluded.agent_type,
+            dispatched_in_session = excluded.dispatched_in_session,
+            completed_at          = excluded.completed_at,
+            status                = excluded.status,
+            total_tokens          = excluded.total_tokens,
+            total_duration_ms     = excluded.total_duration_ms,
+            tool_use_count        = excluded.tool_use_count
+    """, [
+        (a["agent_id"], a["agent_type"], a.get("dispatched_in_session"),
+         a.get("completed_at"), a.get("status"),
+         a.get("total_tokens"), a.get("total_duration_ms"), a.get("tool_use_count"))
+        for a in agents
+    ])
+
+
 def parse_jsonl_file(filepath):
-    """Parse a JSONL file and return (session_metas, turns, line_count).
+    """Parse a JSONL file and return (session_metas, turns, agents, line_count).
 
     Deduplicates streaming events by message.id — Claude Code logs multiple
     JSONL records per API response, all sharing the same message.id. Only the
@@ -121,6 +229,7 @@ def parse_jsonl_file(filepath):
     seen_messages = {}  # message_id -> turn dict (dedup streaming records)
     turns_no_id = []    # turns without a message_id (kept as-is)
     session_meta = {}   # session_id -> dict
+    agents = {}         # agent_id -> dispatch dict
     line_count = 0
 
     try:
@@ -142,6 +251,11 @@ def parse_jsonl_file(filepath):
                 if not session_id:
                     continue
 
+                if rtype == "user":
+                    dispatch = extract_agent_dispatch(record)
+                    if dispatch is not None:
+                        agents[dispatch["agent_id"]] = dispatch
+
                 timestamp = record.get("timestamp", "")
                 cwd = record.get("cwd", "")
                 git_branch = record.get("gitBranch", "")
@@ -201,6 +315,8 @@ def parse_jsonl_file(filepath):
                         "tool_name": tool_name,
                         "cwd": cwd,
                         "message_id": message_id,
+                        "is_subagent": 1 if is_subagent_record(record, filepath) else 0,
+                        "agent_id": record_agent_id(record),
                     }
 
                     # Dedup: last record per message_id wins (final usage tallies)
@@ -213,7 +329,7 @@ def parse_jsonl_file(filepath):
         print(f"  Warning: error reading {filepath}: {e}")
 
     turns = turns_no_id + list(seen_messages.values())
-    return list(session_meta.values()), turns, line_count
+    return list(session_meta.values()), turns, list(agents.values()), line_count
 
 
 def aggregate_sessions(session_metas, turns):
@@ -312,13 +428,15 @@ def insert_turns(conn, turns):
     conn.executemany("""
         INSERT OR IGNORE INTO turns
             (session_id, timestamp, model, input_tokens, output_tokens,
-             cache_read_tokens, cache_creation_tokens, tool_name, cwd, message_id)
-        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+             cache_read_tokens, cache_creation_tokens, tool_name, cwd, message_id,
+             is_subagent, agent_id)
+        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
     """, [
         (t["session_id"], t["timestamp"], t["model"],
          t["input_tokens"], t["output_tokens"],
          t["cache_read_tokens"], t["cache_creation_tokens"],
-         t["tool_name"], t["cwd"], t.get("message_id", ""))
+         t["tool_name"], t["cwd"], t.get("message_id", ""),
+         t.get("is_subagent", 0), t.get("agent_id"))
         for t in turns
     ])
 
@@ -371,7 +489,8 @@ def scan(projects_dir=None, projects_dirs=None, db_path=DB_PATH, verbose=True):
 
         if is_new:
             # New file: full parse (single read, returns line count)
-            session_metas, turns, line_count = parse_jsonl_file(filepath)
+            session_metas, turns, agents, line_count = parse_jsonl_file(filepath)
+            upsert_agents(conn, agents)
 
             if turns or session_metas:
                 sessions = aggregate_sessions(session_metas, turns)
@@ -388,6 +507,7 @@ def scan(projects_dir=None, projects_dirs=None, db_path=DB_PATH, verbose=True):
             seen_messages = {}  # message_id -> turn (dedup streaming)
             turns_no_id = []
             new_session_metas = {}
+            agents = {}         # agent_id -> dispatch dict
             line_count = 0
 
             try:
@@ -411,6 +531,11 @@ def scan(projects_dir=None, projects_dirs=None, db_path=DB_PATH, verbose=True):
                         if not session_id:
                             continue
 
+                        if rtype == "user":
+                            dispatch = extract_agent_dispatch(record)
+                            if dispatch is not None:
+                                agents[dispatch["agent_id"]] = dispatch
+
                         timestamp = record.get("timestamp", "")
                         cwd = record.get("cwd", "")
 
@@ -465,6 +590,8 @@ def scan(projects_dir=None, projects_dirs=None, db_path=DB_PATH, verbose=True):
                                 "tool_name": tool_name,
                                 "cwd": cwd,
                                 "message_id": message_id,
+                                "is_subagent": 1 if is_subagent_record(record, filepath) else 0,
+                                "agent_id": record_agent_id(record),
                             }
 
                             if message_id:
@@ -483,6 +610,7 @@ def scan(projects_dir=None, projects_dirs=None, db_path=DB_PATH, verbose=True):
                 continue
 
             new_turns = turns_no_id + list(seen_messages.values())
+            upsert_agents(conn, list(agents.values()))
 
             if new_turns or new_session_metas:
                 sessions = aggregate_sessions(list(new_session_metas.values()), new_turns)
diff --git a/tests/test_scanner.py b/tests/test_scanner.py
index e6eb4524..21fc9421 100644
--- a/tests/test_scanner.py
+++ b/tests/test_scanner.py
@@ -89,7 +89,7 @@ def test_basic_parsing(self):
             _make_user_record(),
             _make_assistant_record(),
         ])
-        metas, turns, line_count = parse_jsonl_file(path)
+        metas, turns, _, line_count = parse_jsonl_file(path)
         self.assertEqual(len(metas), 1)
         self.assertEqual(len(turns), 1)
         self.assertEqual(metas[0]["session_id"], "sess-1")
@@ -102,7 +102,7 @@ def test_skips_zero_token_records(self):
             _make_assistant_record(input_tokens=0, output_tokens=0,
                                    cache_read=0, cache_creation=0),
         ])
-        _, turns, _ = parse_jsonl_file(path)
+        _, turns, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(turns), 0)
 
     def test_skips_non_assistant_user_types(self):
@@ -110,7 +110,7 @@ def test_skips_non_assistant_user_types(self):
             json.dumps({"type": "system", "sessionId": "s1"}),
             _make_assistant_record(session_id="s1"),
         ])
-        metas, turns, _ = parse_jsonl_file(path)
+        metas, turns, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(turns), 1)
 
     def test_handles_malformed_json(self):
@@ -118,12 +118,12 @@ def test_handles_malformed_json(self):
             "not valid json",
             _make_assistant_record(),
         ])
-        _, turns, _ = parse_jsonl_file(path)
+        _, turns, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(turns), 1)
 
     def test_handles_empty_file(self):
         path = self._write_jsonl("test.jsonl", [])
-        metas, turns, _ = parse_jsonl_file(path)
+        metas, turns, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(metas), 0)
         self.assertEqual(len(turns), 0)
 
@@ -132,7 +132,7 @@ def test_multiple_sessions(self):
             _make_assistant_record(session_id="s1"),
             _make_assistant_record(session_id="s2"),
         ])
-        metas, turns, _ = parse_jsonl_file(path)
+        metas, turns, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(metas), 2)
         self.assertEqual(len(turns), 2)
 
@@ -142,7 +142,7 @@ def test_session_timestamps_tracked(self):
             _make_assistant_record(timestamp="2026-04-08T09:05:00Z"),
             _make_assistant_record(timestamp="2026-04-08T09:10:00Z"),
         ])
-        metas, _, _ = parse_jsonl_file(path)
+        metas, _, _, _ = parse_jsonl_file(path)
         self.assertEqual(metas[0]["first_timestamp"], "2026-04-08T09:00:00Z")
         self.assertEqual(metas[0]["last_timestamp"], "2026-04-08T09:10:00Z")
 
@@ -161,7 +161,7 @@ def test_tool_name_extracted(self):
             },
         })
         path = self._write_jsonl("test.jsonl", [record])
-        _, turns, _ = parse_jsonl_file(path)
+        _, turns, _, _ = parse_jsonl_file(path)
         self.assertEqual(turns[0]["tool_name"], "Read")
 
 
@@ -188,7 +188,7 @@ def test_streaming_events_deduped(self):
             # Streaming event 3: final usage (same message)
             _make_assistant_record(message_id="msg-abc", input_tokens=150, output_tokens=80),
         ])
-        _, turns, _ = parse_jsonl_file(path)
+        _, turns, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(turns), 1)
         # Last record wins (has final tallies)
         self.assertEqual(turns[0]["input_tokens"], 150)
@@ -201,7 +201,7 @@ def test_different_message_ids_kept(self):
             _make_assistant_record(message_id="msg-1", input_tokens=100),
             _make_assistant_record(message_id="msg-2", input_tokens=200),
         ])
-        _, turns, _ = parse_jsonl_file(path)
+        _, turns, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(turns), 2)
 
     def test_records_without_message_id_kept(self):
@@ -210,7 +210,7 @@ def test_records_without_message_id_kept(self):
             _make_assistant_record(input_tokens=100),
             _make_assistant_record(input_tokens=200),
         ])
-        _, turns, _ = parse_jsonl_file(path)
+        _, turns, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(turns), 2)
 
     def test_mixed_with_and_without_ids(self):
@@ -220,7 +220,7 @@ def test_mixed_with_and_without_ids(self):
             _make_assistant_record(message_id="msg-1", input_tokens=100),  # deduped
             _make_assistant_record(input_tokens=200),  # no id, kept
         ])
-        _, turns, _ = parse_jsonl_file(path)
+        _, turns, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(turns), 2)  # 1 deduped + 1 without id
         token_sums = sorted([t["input_tokens"] for t in turns])
         self.assertEqual(token_sums, [100, 200])
@@ -664,14 +664,14 @@ def test_line_count_matches_file(self):
             f.write(_make_user_record() + "\n")
             f.write(_make_assistant_record() + "\n")
             f.write(_make_assistant_record(timestamp="2026-04-08T10:01:00Z") + "\n")
-        _, _, line_count = parse_jsonl_file(path)
+        _, _, _, line_count = parse_jsonl_file(path)
         self.assertEqual(line_count, 3)
 
     def test_empty_file_returns_zero(self):
         path = os.path.join(self.tmpdir, "empty.jsonl")
         with open(path, "w") as f:
             pass
-        _, _, line_count = parse_jsonl_file(path)
+        _, _, _, line_count = parse_jsonl_file(path)
         self.assertEqual(line_count, 0)
 
 
diff --git a/tests/test_subagent.py b/tests/test_subagent.py
new file mode 100644
index 00000000..c49105aa
--- /dev/null
+++ b/tests/test_subagent.py
@@ -0,0 +1,173 @@
+"""Tests for subagent attribution: detection, agent-dispatch capture, scan integration."""
+
+import json
+import os
+import sqlite3
+import tempfile
+import unittest
+from pathlib import Path
+
+from scanner import get_db, init_db, parse_jsonl_file, scan
+
+NL = chr(10)  # avoid backslash-escaped newline literals in source
+
+
+def _assistant(session_id="s1", model="claude-opus-4-8",
+               input_tokens=100, output_tokens=50,
+               cache_read=0, cache_creation=0,
+               timestamp="2026-04-08T10:00:00Z", cwd="/home/user/project",
+               message_id="m1", extra=None):
+    rec = {
+        "type": "assistant",
+        "sessionId": session_id,
+        "timestamp": timestamp,
+        "cwd": cwd,
+        "message": {
+            "model": model,
+            "id": message_id,
+            "usage": {
+                "input_tokens": input_tokens,
+                "output_tokens": output_tokens,
+                "cache_read_input_tokens": cache_read,
+                "cache_creation_input_tokens": cache_creation,
+            },
+            "content": [],
+        },
+    }
+    if extra:
+        rec.update(extra)
+    return json.dumps(rec)
+
+
+def _dispatch(session_id="s1", agent_id="agent-1", agent_type="Explore",
+              timestamp="2026-04-08T10:01:00Z", total_tokens=999):
+    return json.dumps({
+        "type": "user",
+        "sessionId": session_id,
+        "timestamp": timestamp,
+        "toolUseResult": {
+            "agentId": agent_id,
+            "agentType": agent_type,
+            "status": "completed",
+            "totalTokens": total_tokens,
+            "totalDurationMs": 4200,
+            "totalToolUseCount": 3,
+        },
+    })
+
+
+class TestSubagentDetection(unittest.TestCase):
+    def setUp(self):
+        self.tmpdir = tempfile.mkdtemp()
+
+    def _write(self, relpath, lines):
+        path = os.path.join(self.tmpdir, relpath)
+        os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
+        with open(path, "w") as f:
+            f.write(NL.join(lines) + NL)
+        return path
+
+    def test_sidechain_flag_marks_subagent(self):
+        path = self._write("a.jsonl", [_assistant(extra={"isSidechain": True})])
+        _, turns, _, _ = parse_jsonl_file(path)
+        self.assertEqual(turns[0]["is_subagent"], 1)
+
+    def test_agent_id_marks_subagent_and_is_captured(self):
+        path = self._write("a.jsonl", [_assistant(extra={"agentId": "agent-xyz"})])
+        _, turns, _, _ = parse_jsonl_file(path)
+        self.assertEqual(turns[0]["is_subagent"], 1)
+        self.assertEqual(turns[0]["agent_id"], "agent-xyz")
+
+    def test_path_under_subagents_marks_subagent(self):
+        path = self._write(os.path.join("proj", "subagents", "x.jsonl"),
+                           [_assistant()])
+        _, turns, _, _ = parse_jsonl_file(path)
+        self.assertEqual(turns[0]["is_subagent"], 1)
+
+    def test_normal_record_not_subagent(self):
+        path = self._write("a.jsonl", [_assistant()])
+        _, turns, _, _ = parse_jsonl_file(path)
+        self.assertEqual(turns[0]["is_subagent"], 0)
+        self.assertIsNone(turns[0]["agent_id"])
+
+    def test_agent_dispatch_extracted_from_tool_result(self):
+        path = self._write("a.jsonl", [_dispatch(agent_id="agent-xyz", agent_type="Plan",
+                                                  total_tokens=1234)])
+        _, _, agents, _ = parse_jsonl_file(path)
+        self.assertEqual(len(agents), 1)
+        self.assertEqual(agents[0]["agent_id"], "agent-xyz")
+        self.assertEqual(agents[0]["agent_type"], "Plan")
+        self.assertEqual(agents[0]["total_tokens"], 1234)
+
+    def test_tool_result_without_agent_fields_ignored(self):
+        rec = json.dumps({"type": "user", "sessionId": "s1",
+                          "toolUseResult": {"status": "ok"}})
+        path = self._write("a.jsonl", [rec])
+        _, _, agents, _ = parse_jsonl_file(path)
+        self.assertEqual(agents, [])
+
+
+class TestSubagentScanIntegration(unittest.TestCase):
+    def setUp(self):
+        self.tmpdir = tempfile.mkdtemp()
+        self.projects_dir = Path(self.tmpdir) / "projects"
+        self.projects_dir.mkdir()
+        self.db_path = Path(self.tmpdir) / "usage.db"
+
+    def test_scan_populates_agents_and_flags(self):
+        parent = self.projects_dir / "user" / "proj"
+        parent.mkdir(parents=True)
+        with open(parent / "sess-1.jsonl", "w") as f:
+            f.write(_assistant(session_id="sess-1", message_id="m-main",
+                               input_tokens=100, output_tokens=50) + NL)
+            f.write(_dispatch(session_id="sess-1", agent_id="agent-1",
+                              agent_type="Explore", total_tokens=999) + NL)
+        sub = parent / "subagents"
+        sub.mkdir()
+        with open(sub / "agent-1.jsonl", "w") as f:
+            f.write(_assistant(session_id="sess-1", message_id="m-sub",
+                               input_tokens=300, output_tokens=80,
+                               extra={"agentId": "agent-1"}) + NL)
+
+        scan(projects_dir=self.projects_dir, db_path=self.db_path, verbose=False)
+
+        conn = sqlite3.connect(self.db_path)
+        conn.row_factory = sqlite3.Row
+        agent = conn.execute("SELECT * FROM agents WHERE agent_id='agent-1'").fetchone()
+        self.assertIsNotNone(agent)
+        self.assertEqual(agent["agent_type"], "Explore")
+        self.assertEqual(agent["total_tokens"], 999)
+
+        sub_turn = conn.execute("SELECT * FROM turns WHERE message_id='m-sub'").fetchone()
+        self.assertEqual(sub_turn["is_subagent"], 1)
+        self.assertEqual(sub_turn["agent_id"], "agent-1")
+
+        main_turn = conn.execute("SELECT * FROM turns WHERE message_id='m-main'").fetchone()
+        self.assertEqual(main_turn["is_subagent"], 0)
+        conn.close()
+
+    def test_migration_adds_subagent_columns_and_agents_table(self):
+        conn = sqlite3.connect(self.db_path)
+        conn.executescript(
+            "CREATE TABLE turns ("
+            " id INTEGER PRIMARY KEY AUTOINCREMENT, session_id TEXT, timestamp TEXT,"
+            " model TEXT, input_tokens INTEGER, output_tokens INTEGER,"
+            " cache_read_tokens INTEGER, cache_creation_tokens INTEGER,"
+            " tool_name TEXT, cwd TEXT, message_id TEXT);"
+        )
+        conn.commit()
+        conn.close()
+
+        conn = get_db(self.db_path)
+        init_db(conn)
+        cols = {r["name"] for r in conn.execute("PRAGMA table_info(turns)")}
+        self.assertIn("is_subagent", cols)
+        self.assertIn("agent_id", cols)
+        tables = {r["name"] for r in conn.execute(
+            "SELECT name FROM sqlite_master WHERE type='table'")}
+        self.assertIn("agents", tables)
+        conn.close()
+
+
+if __name__ == "__main__":
+    unittest.main()

From 7936d2fe60d5cc56ff841441d60577f128dbd45e Mon Sep 17 00:00:00 2001
From: john988 <john9882001@yahoo.com>
Date: Wed, 17 Jun 2026 10:15:47 +0800
Subject: [PATCH 2/8] feat(dashboard,cli): surface subagent attribution
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Dashboard: get_dashboard_data now returns subagent_by_type and top_dispatches
(JOIN turns->agents, acompact-* -> auto-compact, else unknown). Adds a
"Subagent Tokens by Type" stacked bar chart, a "Top Subagent Dispatches"
table, a Subagent Tokens stat card, and an agent-type colour palette — all
filtered by the existing model + range controls. Dynamic values are escaped
via esc() (no innerHTML injection).

CLI: `today` and `stats` print subagent token/turn summaries (included in
totals).

Tests: 130 pass (+6 for dashboard/CLI subagent data). Verified on real
transcripts (75 type-rows, 50 dispatches) and inline JS passes `node --check`.
---
 cli.py                           |  21 +++
 dashboard.py                     | 214 ++++++++++++++++++++++++++++++-
 tests/test_cli_subagent.py       |  68 ++++++++++
 tests/test_dashboard_subagent.py |  78 +++++++++++
 4 files changed, 380 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_cli_subagent.py
 create mode 100644 tests/test_dashboard_subagent.py

diff --git a/cli.py b/cli.py
index 98f3a124..e0676b8a 100644
--- a/cli.py
+++ b/cli.py
@@ -118,6 +118,15 @@ def cmd_today():
         WHERE substr(timestamp, 1, 10) = ?
     """, (today,)).fetchone()
 
+    subagent = conn.execute("""
+        SELECT
+            COUNT(*) as turns,
+            SUM(input_tokens + output_tokens + cache_read_tokens + cache_creation_tokens) as tokens
+        FROM turns
+        WHERE substr(timestamp, 1, 10) = ?
+          AND COALESCE(is_subagent, 0) = 1
+    """, (today,)).fetchone()
+
     print()
     hr()
     print(f"  Today's Usage  ({today})")
@@ -145,6 +154,7 @@ def cmd_today():
     print(f"  {'TOTAL':<30}  turns={total_turns:<4}  in={fmt(total_inp):<8}  out={fmt(total_out):<8}  cost={fmt_cost(total_cost)}")
     print()
     print(f"  Sessions today:   {sessions['cnt']}")
+    print(f"  Subagent tokens:  {fmt(subagent['tokens'] or 0)}  ({fmt(subagent['turns'] or 0)} turns)")
     print(f"  Cache read:       {fmt(total_cr)}")
     print(f"  Cache creation:   {fmt(total_cc)}")
     hr()
@@ -302,6 +312,15 @@ def cmd_stats():
         LIMIT 5
     """).fetchall()
 
+    # Subagent totals (subagent tokens are included in the all-time totals above)
+    subagent = conn.execute("""
+        SELECT
+            COUNT(*) as turns,
+            SUM(input_tokens + output_tokens + cache_read_tokens + cache_creation_tokens) as tokens
+        FROM turns
+        WHERE COALESCE(is_subagent, 0) = 1
+    """).fetchone()
+
     # Daily average (last 30 days)
     daily_avg = conn.execute("""
         SELECT
@@ -334,11 +353,13 @@ def cmd_stats():
     print(f"  Period:           {first_date} to {last_date}")
     print(f"  Total sessions:   {session_info['sessions'] or 0:,}")
     print(f"  Total turns:      {fmt(totals['turns'] or 0)}")
+    print(f"  Subagent turns:   {fmt(subagent['turns'] or 0)}")
     print()
     print(f"  Input tokens:     {fmt(totals['inp'] or 0):<12}  (raw prompt tokens)")
     print(f"  Output tokens:    {fmt(totals['out'] or 0):<12}  (generated tokens)")
     print(f"  Cache read:       {fmt(totals['cr'] or 0):<12}  (90% cheaper than input)")
     print(f"  Cache creation:   {fmt(totals['cc'] or 0):<12}  (25% premium on input)")
+    print(f"  Subagent tokens:  {fmt(subagent['tokens'] or 0):<12}  (included in totals)")
     print()
     print(f"  Est. total cost:  ${total_cost:.4f}")
     hr()
diff --git a/dashboard.py b/dashboard.py
index e2ac2f6f..736d95fc 100644
--- a/dashboard.py
+++ b/dashboard.py
@@ -128,6 +128,87 @@ def get_dashboard_data(db_path=DB_PATH):
             "cache_creation": r["total_cache_creation"] or 0,
         })
 
+    # ── Subagent breakdown by type, by day & model ────────────────────────────
+    # JOIN turns to agents (parent tool_result metadata captured by the scanner).
+    # acompact-* ids are Claude Code's auto-compaction subagent (no parent
+    # dispatch record); anything else without a match is shown as 'unknown'.
+    AGENT_TYPE_EXPR = (
+        "COALESCE(a.agent_type, "
+        "CASE WHEN t.agent_id LIKE 'acompact-%' THEN 'auto-compact' "
+        "ELSE 'unknown' END)"
+    )
+
+    subagent_daily_rows = conn.execute(f"""
+        SELECT
+            substr(t.timestamp, 1, 10)               as day,
+            {AGENT_TYPE_EXPR}                        as agent_type,
+            COALESCE(NULLIF(t.model, ''), 'unknown') as model,
+            SUM(t.input_tokens)                      as input,
+            SUM(t.output_tokens)                     as output,
+            SUM(t.cache_read_tokens)                 as cache_read,
+            SUM(t.cache_creation_tokens)             as cache_creation,
+            COUNT(DISTINCT t.agent_id)               as dispatches,
+            COUNT(*)                                 as turns
+        FROM turns t
+        LEFT JOIN agents a ON t.agent_id = a.agent_id
+        WHERE t.is_subagent = 1
+        GROUP BY day, agent_type, model
+        ORDER BY day, agent_type
+    """).fetchall()
+
+    subagent_by_type = [{
+        "day":            r["day"],
+        "agent_type":     r["agent_type"],
+        "model":          r["model"],
+        "input":          r["input"] or 0,
+        "output":         r["output"] or 0,
+        "cache_read":     r["cache_read"] or 0,
+        "cache_creation": r["cache_creation"] or 0,
+        "dispatches":     r["dispatches"] or 0,
+        "turns":          r["turns"] or 0,
+    } for r in subagent_daily_rows]
+
+    # ── Top individual subagent dispatches (one row per agent_id) ─────────────
+    top_dispatch_rows = conn.execute(f"""
+        SELECT
+            t.agent_id                               as agent_id,
+            {AGENT_TYPE_EXPR}                        as agent_type,
+            COALESCE(NULLIF(t.model, ''), 'unknown') as model,
+            MIN(t.timestamp)                         as start_ts,
+            SUM(t.input_tokens)                      as input,
+            SUM(t.output_tokens)                     as output,
+            SUM(t.cache_read_tokens)                 as cache_read,
+            SUM(t.cache_creation_tokens)             as cache_creation,
+            COUNT(*)                                 as turns,
+            a.dispatched_in_session                  as parent_session,
+            a.total_duration_ms                      as duration_ms,
+            a.tool_use_count                         as tool_uses,
+            a.status                                 as status
+        FROM turns t
+        LEFT JOIN agents a ON t.agent_id = a.agent_id
+        WHERE t.is_subagent = 1 AND t.agent_id IS NOT NULL
+        GROUP BY t.agent_id
+        ORDER BY (SUM(t.input_tokens) + SUM(t.output_tokens)
+                  + SUM(t.cache_read_tokens) + SUM(t.cache_creation_tokens)) DESC
+        LIMIT 50
+    """).fetchall()
+
+    top_dispatches = [{
+        "agent_id":       r["agent_id"],
+        "agent_type":     r["agent_type"],
+        "model":          r["model"],
+        "start":          (r["start_ts"] or "")[:16].replace("T", " "),
+        "start_date":     (r["start_ts"] or "")[:10],
+        "input":          r["input"] or 0,
+        "output":         r["output"] or 0,
+        "cache_read":     r["cache_read"] or 0,
+        "cache_creation": r["cache_creation"] or 0,
+        "turns":          r["turns"] or 0,
+        "duration_ms":    r["duration_ms"],
+        "tool_uses":      r["tool_uses"],
+        "status":         r["status"],
+    } for r in top_dispatch_rows]
+
     conn.close()
 
     return {
@@ -135,6 +216,8 @@ def get_dashboard_data(db_path=DB_PATH):
         "daily_by_model":  daily_by_model,
         "hourly_by_model": hourly_by_model,
         "sessions_all":    sessions_all,
+        "subagent_by_type": subagent_by_type,
+        "top_dispatches":  top_dispatches,
         "generated_at":    datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
     }
 
@@ -356,6 +439,20 @@ def get_dashboard_data(db_path=DB_PATH):
       <h2>Top Projects by Tokens</h2>
       <div class="chart-wrap"><canvas id="chart-project"></canvas></div>
     </div>
+    <div class="chart-card wide">
+      <h2 id="subagent-chart-title">Subagent Tokens by Type</h2>
+      <div class="chart-wrap"><canvas id="chart-subagent"></canvas></div>
+    </div>
+  </div>
+  <div class="table-card">
+    <div class="section-title">Top Subagent Dispatches <span class="muted" style="font-weight:400;text-transform:none;letter-spacing:0;font-size:11px">&middot; ranked by total tokens; <em>unknown</em> = parent dispatch record not found</span></div>
+    <table>
+      <thead><tr>
+        <th>Type</th><th>Started</th><th>Model</th><th>Turns</th><th>Tool Uses</th>
+        <th>Duration</th><th>Input</th><th>Output</th><th>Cache Read</th><th>Tokens</th><th>Est. Cost</th>
+      </tr></thead>
+      <tbody id="dispatches-body"></tbody>
+    </table>
   </div>
   <div class="table-card">
     <div class="section-title">Cost by Model</div>
@@ -623,6 +720,26 @@ def get_dashboard_data(db_path=DB_PATH):
 // blue, mauve, ochre, taupe, terracotta) rather than a saturated rainbow.
 const MODEL_COLORS = ['#D97757','#C9A26B','#7FA98C','#6E97A8','#B98AA0','#D9A84E','#A88B6A','#C2705A'];
 
+// Subagent type swatches (table tag tint) — warm/neutral, matching the palette.
+const AGENT_TYPE_COLORS = {
+  'general-purpose':   '#6E97A8',
+  'Explore':           '#9B7EC7',
+  'Plan':              '#D9A84E',
+  'claude-code-guide': '#48A0C7',
+  'auto-compact':      '#A88B6A',
+  'unknown':           '#4F4F50',
+};
+function colorForAgentType(t) { return AGENT_TYPE_COLORS[t] || '#7FA98C'; }
+function fmtDuration(ms) {
+  if (!ms || ms < 0) return '—';
+  const s = Math.round(ms / 1000);
+  if (s < 60) return s + 's';
+  const m = Math.floor(s / 60), r = s % 60;
+  if (m < 60) return r ? `${m}m${r}s` : `${m}m`;
+  const h = Math.floor(m / 60);
+  return `${h}h${m % 60}m`;
+}
+
 // Tooltip color swatches: solid fill, no border (Chart.js's default draws a
 // bordered box that looked offset/inconsistent). Lines use their solid stroke
 // color instead of the translucent area fill.
@@ -642,7 +759,7 @@ def get_dashboard_data(db_path=DB_PATH):
 // series the user toggled off. We track hidden series by label per chart and
 // reapply on rebuild: dataset charts via `dataset.hidden`, the doughnut via
 // per-slice data visibility (see applyModelHidden).
-const hiddenSeries = { daily: new Set(), hourly: new Set(), project: new Set(), model: new Set() };
+const hiddenSeries = { daily: new Set(), hourly: new Set(), project: new Set(), model: new Set(), subagent: new Set() };
 function legendToggle(key) {
   return (e, item, legend) => {
     const ci = legend.chart;
@@ -1020,6 +1137,9 @@ def get_dashboard_data(db_path=DB_PATH):
     cache_read:     byModel.reduce((s, m) => s + m.cache_read, 0),
     cache_creation: byModel.reduce((s, m) => s + m.cache_creation, 0),
     cost:           byModel.reduce((s, m) => s + calcCost(m.model, m.input, m.output, m.cache_read, m.cache_creation), 0),
+    subagent_tokens: (rawData.subagent_by_type || [])
+      .filter(r => selectedModels.has(r.model) && (!start || r.day >= start) && (!end || r.day <= end))
+      .reduce((s, r) => s + r.input + r.output + r.cache_read + r.cache_creation, 0),
   };
 
   // Hourly aggregation (filtered by model + range, then bucketed by UTC hour)
@@ -1028,15 +1148,40 @@ def get_dashboard_data(db_path=DB_PATH):
   );
   const hourlyAgg = aggregateHourly(hourlySrc, hourlyTZ);
 
+  // Subagent breakdown by type (filtered by range + selected models)
+  const subagentTypeMap = {};
+  for (const r of (rawData.subagent_by_type || [])) {
+    if (!selectedModels.has(r.model)) continue;
+    if (start && r.day < start) continue;
+    if (end && r.day > end) continue;
+    const k = r.agent_type;
+    if (!subagentTypeMap[k]) subagentTypeMap[k] = { agent_type: k, input: 0, output: 0, cache_read: 0, cache_creation: 0, turns: 0 };
+    const m = subagentTypeMap[k];
+    m.input += r.input; m.output += r.output;
+    m.cache_read += r.cache_read; m.cache_creation += r.cache_creation;
+    m.turns += r.turns;
+  }
+  const byAgentType = Object.values(subagentTypeMap).sort((a, b) =>
+    (b.input + b.output + b.cache_read + b.cache_creation) -
+    (a.input + a.output + a.cache_read + a.cache_creation));
+
+  // Top dispatches: filter by range + selected model, take top 20
+  const filteredDispatches = (rawData.top_dispatches || []).filter(d =>
+    selectedModels.has(d.model) && (!start || d.start_date >= start) && (!end || d.start_date <= end)
+  ).slice(0, 20);
+
   // Update daily chart title
   document.getElementById('daily-chart-title').textContent = 'Daily Token Usage \u2014 ' + RANGE_LABELS[selectedRange];
   document.getElementById('hourly-chart-title').textContent = 'Average Hourly Distribution \u2014 ' + RANGE_LABELS[selectedRange];
+  document.getElementById('subagent-chart-title').textContent = 'Subagent Tokens by Type \u2014 ' + RANGE_LABELS[selectedRange];
 
   renderStats(totals);
   renderDailyChart(daily);
   renderHourlyChart(hourlyAgg);
   renderModelChart(byModel);
   renderProjectChart(byProject);
+  renderSubagentChart(byAgentType);
+  renderTopDispatches(filteredDispatches);
   lastFilteredSessions = sortSessions(filteredSessions);
   lastByModel = byModel;
   lastByProject = sortProjects(byProject);
@@ -1055,6 +1200,7 @@ def get_dashboard_data(db_path=DB_PATH):
     { label: 'Turns',          value: fmt(t.turns),                sub: rangeLabel },
     { label: 'Input Tokens',   value: fmt(t.input),                sub: rangeLabel },
     { label: 'Output Tokens',  value: fmt(t.output),               sub: rangeLabel },
+    { label: 'Subagent Tokens', value: fmt(t.subagent_tokens || 0), sub: 'included in totals' },
     { label: 'Cache Read',     value: fmt(t.cache_read),           sub: 'from prompt cache' },
     { label: 'Cache Creation', value: fmt(t.cache_creation),       sub: 'writes to prompt cache' },
     { label: 'Est. Cost',      value: fmtCostBig(t.cost),          sub: 'API pricing, June 2026', color: C.green },
@@ -1263,6 +1409,72 @@ def get_dashboard_data(db_path=DB_PATH):
   });
 }
 
+function renderSubagentChart(byType) {
+  const ctx = document.getElementById('chart-subagent').getContext('2d');
+  if (charts.subagent) charts.subagent.destroy();
+  if (!byType.length) { charts.subagent = null; return; }
+  charts.subagent = new Chart(ctx, {
+    type: 'bar',
+    data: {
+      labels: byType.map(t => t.agent_type),
+      datasets: [
+        { label: 'Input',          hidden: hiddenSeries.subagent.has('Input'),          data: byType.map(t => t.input),          backgroundColor: TOKEN_COLORS.input,          hoverBackgroundColor: TOKEN_HOVER.input,          stack: 'tokens' },
+        { label: 'Output',         hidden: hiddenSeries.subagent.has('Output'),         data: byType.map(t => t.output),         backgroundColor: TOKEN_COLORS.output,         hoverBackgroundColor: TOKEN_HOVER.output,         stack: 'tokens' },
+        { label: 'Cache Read',     hidden: hiddenSeries.subagent.has('Cache Read'),     data: byType.map(t => t.cache_read),     backgroundColor: TOKEN_COLORS.cache_read,     hoverBackgroundColor: TOKEN_HOVER.cache_read,     stack: 'tokens' },
+        { label: 'Cache Creation', hidden: hiddenSeries.subagent.has('Cache Creation'), data: byType.map(t => t.cache_creation), backgroundColor: TOKEN_COLORS.cache_creation, hoverBackgroundColor: TOKEN_HOVER.cache_creation, stack: 'tokens' },
+      ]
+    },
+    options: {
+      indexAxis: 'y', responsive: true, maintainAspectRatio: false, resizeDelay: 150,
+      plugins: {
+        legend: { onClick: legendToggle('subagent'), labels: { color: C.axis, boxWidth: 12 } },
+        tooltip: { callbacks: {
+          label: ctx => ` ${ctx.dataset.label}: ${fmt(ctx.raw)}`,
+          footer: items => {
+            const total = items.reduce((s, it) => s + it.raw, 0);
+            const row = byType[items[0].dataIndex];
+            return ` Total: ${fmt(total)} · ${row.turns} turns`;
+          }
+        } }
+      },
+      scales: {
+        x: { stacked: true, ticks: { color: C.axis, callback: v => fmt(v) }, grid: { color: C.border } },
+        y: { stacked: true, ticks: { color: C.axis, font: { size: 11 } }, grid: { color: C.border } },
+      }
+    }
+  });
+}
+
+function renderTopDispatches(rows) {
+  const body = document.getElementById('dispatches-body');
+  if (!rows.length) {
+    body.innerHTML = '<tr><td colspan="11" class="muted" style="text-align:center;padding:24px">No subagent dispatches in selected range.</td></tr>';
+    return;
+  }
+  body.innerHTML = rows.map(d => {
+    const tokensTotal = d.input + d.output + d.cache_read + d.cache_creation;
+    const cost = calcCost(d.model, d.input, d.output, d.cache_read, d.cache_creation);
+    const costCell = isBillable(d.model)
+      ? `<td class="cost">${fmtCost(cost)}</td>`
+      : `<td class="cost-na">n/a</td>`;
+    const col = colorForAgentType(d.agent_type);
+    const typeStyle = `background:${col}22;color:${col};border:1px solid ${col}44`;
+    return `<tr>
+      <td><span class="model-tag" style="${typeStyle}">${esc(d.agent_type)}</span></td>
+      <td class="muted">${esc(d.start || '—')}</td>
+      <td><span class="model-tag">${esc(d.model)}</span></td>
+      <td class="num">${d.turns}</td>
+      <td class="num">${d.tool_uses != null ? d.tool_uses : '—'}</td>
+      <td class="muted">${fmtDuration(d.duration_ms)}</td>
+      <td class="num">${fmt(d.input)}</td>
+      <td class="num">${fmt(d.output)}</td>
+      <td class="num">${fmt(d.cache_read)}</td>
+      <td class="num"><strong>${fmt(tokensTotal)}</strong></td>
+      ${costCell}
+    </tr>`;
+  }).join('');
+}
+
 // Fills a table card's footer with the row-reveal control. Three states:
 //   - more rows fit under the cap        -> "Show more" (plus "Show less" once expanded)
 //   - cap reached but more records exist -> "Download CSV to see all (N)" + "Show less"
diff --git a/tests/test_cli_subagent.py b/tests/test_cli_subagent.py
new file mode 100644
index 00000000..fdc1226f
--- /dev/null
+++ b/tests/test_cli_subagent.py
@@ -0,0 +1,68 @@
+"""Tests for the CLI subagent summary lines in `today` and `stats`."""
+
+import io
+import tempfile
+import unittest
+from contextlib import redirect_stdout
+from datetime import date
+from pathlib import Path
+
+import cli
+from scanner import get_db, init_db, insert_turns, upsert_sessions
+
+
+def _turn(message_id, inp, out, is_subagent, agent_id, ts):
+    return {
+        "session_id": "sess-1", "timestamp": ts, "model": "claude-opus-4-8",
+        "input_tokens": inp, "output_tokens": out,
+        "cache_read_tokens": 0, "cache_creation_tokens": 0,
+        "tool_name": None, "cwd": "/home/user/proj",
+        "message_id": message_id, "is_subagent": is_subagent, "agent_id": agent_id,
+    }
+
+
+class TestCliSubagentLines(unittest.TestCase):
+    def setUp(self):
+        self.db_path = Path(tempfile.mkdtemp()) / "usage.db"
+        today_ts = date.today().isoformat() + "T10:00:00Z"
+        conn = get_db(self.db_path)
+        init_db(conn)
+        upsert_sessions(conn, [{
+            "session_id": "sess-1", "project_name": "user/proj",
+            "first_timestamp": today_ts, "last_timestamp": today_ts,
+            "git_branch": "main", "model": "claude-opus-4-8",
+            "total_input_tokens": 400, "total_output_tokens": 130,
+            "total_cache_read": 0, "total_cache_creation": 0, "turn_count": 2,
+        }])
+        insert_turns(conn, [
+            _turn("m-main", 100, 50, 0, None, today_ts),
+            _turn("m-sub", 300, 80, 1, "agent-1", today_ts),
+        ])
+        conn.commit()
+        conn.close()
+        self._orig_db = cli.DB_PATH
+        cli.DB_PATH = self.db_path
+
+    def tearDown(self):
+        cli.DB_PATH = self._orig_db
+
+    def test_today_shows_subagent_tokens(self):
+        buf = io.StringIO()
+        with redirect_stdout(buf):
+            cli.cmd_today()
+        out = buf.getvalue()
+        self.assertIn("Subagent tokens:", out)
+        # 300 + 80 = 380 subagent tokens, 1 turn
+        self.assertIn("(1 turns)", out)
+
+    def test_stats_shows_subagent_turns_and_tokens(self):
+        buf = io.StringIO()
+        with redirect_stdout(buf):
+            cli.cmd_stats()
+        out = buf.getvalue()
+        self.assertIn("Subagent turns:", out)
+        self.assertIn("Subagent tokens:", out)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_dashboard_subagent.py b/tests/test_dashboard_subagent.py
new file mode 100644
index 00000000..203d17d8
--- /dev/null
+++ b/tests/test_dashboard_subagent.py
@@ -0,0 +1,78 @@
+"""Tests for the dashboard's subagent data layer (get_dashboard_data)."""
+
+import tempfile
+import unittest
+from pathlib import Path
+
+from scanner import get_db, init_db, insert_turns, upsert_agents, upsert_sessions
+import dashboard
+
+
+def _turn(session_id, message_id, model="claude-opus-4-8",
+          inp=100, out=50, is_subagent=0, agent_id=None,
+          timestamp="2026-04-08T10:00:00Z"):
+    return {
+        "session_id": session_id, "timestamp": timestamp, "model": model,
+        "input_tokens": inp, "output_tokens": out,
+        "cache_read_tokens": 0, "cache_creation_tokens": 0,
+        "tool_name": None, "cwd": "/home/user/proj",
+        "message_id": message_id, "is_subagent": is_subagent, "agent_id": agent_id,
+    }
+
+
+class TestDashboardSubagentData(unittest.TestCase):
+    def setUp(self):
+        self.db_path = Path(tempfile.mkdtemp()) / "usage.db"
+        conn = get_db(self.db_path)
+        init_db(conn)
+        upsert_sessions(conn, [{
+            "session_id": "sess-1", "project_name": "user/proj",
+            "first_timestamp": "2026-04-08T10:00:00Z",
+            "last_timestamp": "2026-04-08T10:30:00Z",
+            "git_branch": "main", "model": "claude-opus-4-8",
+            "total_input_tokens": 400, "total_output_tokens": 210,
+            "total_cache_read": 0, "total_cache_creation": 0, "turn_count": 3,
+        }])
+        insert_turns(conn, [
+            _turn("sess-1", "m-main", inp=100, out=50, is_subagent=0),
+            _turn("sess-1", "m-sub1", inp=300, out=80, is_subagent=1, agent_id="agent-1"),
+            _turn("sess-1", "m-sub2", inp=200, out=40, is_subagent=1, agent_id="acompact-xyz"),
+        ])
+        upsert_agents(conn, [{
+            "agent_id": "agent-1", "agent_type": "Explore",
+            "dispatched_in_session": "sess-1", "completed_at": "2026-04-08T10:20:00Z",
+            "status": "completed", "total_tokens": 380,
+            "total_duration_ms": 4200, "tool_use_count": 5,
+        }])
+        conn.commit()
+        conn.close()
+
+    def test_returns_subagent_keys(self):
+        d = dashboard.get_dashboard_data(self.db_path)
+        self.assertIn("subagent_by_type", d)
+        self.assertIn("top_dispatches", d)
+
+    def test_subagent_by_type_resolves_agent_type(self):
+        d = dashboard.get_dashboard_data(self.db_path)
+        types = {r["agent_type"] for r in d["subagent_by_type"]}
+        # agent-1 -> Explore (from agents table); acompact-* -> auto-compact
+        self.assertIn("Explore", types)
+        self.assertIn("auto-compact", types)
+
+    def test_top_dispatches_carries_dispatch_metadata(self):
+        d = dashboard.get_dashboard_data(self.db_path)
+        explore = [r for r in d["top_dispatches"] if r["agent_type"] == "Explore"]
+        self.assertEqual(len(explore), 1)
+        self.assertEqual(explore[0]["tool_uses"], 5)
+        self.assertEqual(explore[0]["duration_ms"], 4200)
+        self.assertEqual(explore[0]["turns"], 1)
+
+    def test_main_turn_excluded_from_subagent_data(self):
+        d = dashboard.get_dashboard_data(self.db_path)
+        # Only the 2 subagent turns contribute; the main turn must not appear.
+        total_turns = sum(r["turns"] for r in d["subagent_by_type"])
+        self.assertEqual(total_turns, 2)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 35ecf68b7bd3524381ed5db9c2ccc724f2248b51 Mon Sep 17 00:00:00 2001
From: john988 <john9882001@yahoo.com>
Date: Wed, 17 Jun 2026 10:40:48 +0800
Subject: [PATCH 3/8] =?UTF-8?q?feat(ccusage):=20optional=20ccusage=20bridg?=
 =?UTF-8?q?e=20=E2=80=94=20ingest=205h=20billing=20windows=20+=20daily=20t?=
 =?UTF-8?q?otals?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds ccusage_bridge.py wrapping the optional `ccusage` CLI as a data source:
detect_runtime() (probes npx.cmd first so Windows shell=False works), a
UTF-8 / LOG_LEVEL=0 subprocess runner that never raises, pure transforms
(blocks_to_rows / daily_to_rows mapping ccusage's camelCase + nested
burnRate/projection), and idempotent upserts. New SQLite tables
billing_windows and ccusage_daily_cache (additive; kept separate from the
native turns/sessions tables so the two sources never conflict). cmd_scan now
enriches via ccusage when available and degrades gracefully when not.

Tests: 140 pass (+10 hermetic bridge tests, no Node required). Verified
end-to-end against real ccusage 20.0.9: 129 billing windows (1 active,
762k tok/min, 140 min remaining) + 34 daily rows.
---
 ccusage_bridge.py            | 221 +++++++++++++++++++++++++++++++++++
 cli.py                       |   7 ++
 scanner.py                   |  40 +++++++
 tests/test_ccusage_bridge.py | 151 ++++++++++++++++++++++++
 4 files changed, 419 insertions(+)
 create mode 100644 ccusage_bridge.py
 create mode 100644 tests/test_ccusage_bridge.py

diff --git a/ccusage_bridge.py b/ccusage_bridge.py
new file mode 100644
index 00000000..8b36f42d
--- /dev/null
+++ b/ccusage_bridge.py
@@ -0,0 +1,221 @@
+"""
+ccusage_bridge.py - Optional integration with the `ccusage` CLI.
+
+ccusage (https://github.com/ryoppippi/ccusage) is a Node-distributed Rust binary
+that reads the same ~/.claude transcripts and emits deduplicated, billing-accurate
+JSON. We wrap it as an OPTIONAL data source: when Node/npx is available we ingest
+its 5-hour "blocks" (billing windows, burn rate, projections) and per-day totals
+into separate SQLite tables. When it isn't, everything degrades gracefully and the
+native scanner is unaffected.
+
+Nothing here is required for the core tool to work.
+"""
+
+import json
+import os
+import shutil
+import subprocess
+from datetime import datetime, timezone
+
+from scanner import DB_PATH, get_db, init_db
+
+# Pin via env if you want reproducible output; defaults to latest. The JSON shape
+# is stable across the 20.0.x line (field names verified against 20.0.9).
+CCUSAGE_SPEC = os.environ.get("CCUSAGE_SPEC", "ccusage@latest")
+_TIMEOUT_S = 90
+
+
+def detect_runtime():
+    """Locate an npx/bunx runner. On Windows shutil.which returns npx.cmd, which
+    must be invoked with its full path (a bare 'npx' raises FileNotFoundError
+    under shell=False), so we probe the .cmd name first."""
+    for name in ("npx.cmd", "npx", "bunx"):
+        path = shutil.which(name)
+        if path:
+            return {"available": True, "runner": path,
+                    "kind": "bunx" if name == "bunx" else "npx"}
+    return {
+        "available": False, "runner": None, "kind": None,
+        "reason": "Node/npx not found in PATH — install Node.js from "
+                  "https://nodejs.org to enable ccusage billing-window data.",
+    }
+
+
+def _build_argv(rt, sub_args):
+    # npx needs -y to auto-install; bunx takes the spec directly.
+    if rt["kind"] == "bunx":
+        return [rt["runner"], CCUSAGE_SPEC, *sub_args]
+    return [rt["runner"], "-y", CCUSAGE_SPEC, *sub_args]
+
+
+def run_ccusage(sub_args, rt=None):
+    """Run a ccusage subcommand and return parsed JSON, or None on any failure.
+
+    Always decodes as UTF-8 (Windows' default cp1252 mangles non-ASCII output),
+    suppresses ccusage's progress chatter via LOG_LEVEL=0, and never raises.
+    """
+    rt = rt or detect_runtime()
+    if not rt["available"]:
+        return None
+    env = {**os.environ, "LOG_LEVEL": "0"}
+    try:
+        proc = subprocess.run(
+            _build_argv(rt, sub_args),
+            capture_output=True, env=env, timeout=_TIMEOUT_S,
+            encoding="utf-8", errors="replace",
+        )
+    except (subprocess.SubprocessError, OSError):
+        return None
+    if proc.returncode != 0 or not (proc.stdout or "").strip():
+        return None
+    try:
+        return json.loads(proc.stdout)
+    except json.JSONDecodeError:
+        return None
+
+
+def _now_iso():
+    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+
+# ── Pure transforms (unit-testable without Node) ────────────────────────────────
+
+def blocks_to_rows(data, ingested_at):
+    """Map ccusage `blocks --json` output to billing_windows rows. Skips gaps.
+
+    ccusage fields are camelCase; burnRate/projection are nested objects present
+    only on the active block (null otherwise)."""
+    rows = []
+    for b in (data or {}).get("blocks", []):
+        if not isinstance(b, dict) or b.get("isGap"):
+            continue
+        if not b.get("id"):
+            continue
+        tc = b.get("tokenCounts") or {}
+        br = b.get("burnRate") or {}
+        pj = b.get("projection") or {}
+        rows.append({
+            "block_id": b.get("id"),
+            "start_time": b.get("startTime"),
+            "end_time": b.get("endTime"),
+            "actual_end_time": b.get("actualEndTime"),
+            "is_active": 1 if b.get("isActive") else 0,
+            "input_tokens": tc.get("inputTokens", 0) or 0,
+            "output_tokens": tc.get("outputTokens", 0) or 0,
+            "cache_read_tokens": tc.get("cacheReadInputTokens", 0) or 0,
+            "cache_creation_tokens": tc.get("cacheCreationInputTokens", 0) or 0,
+            "total_tokens": b.get("totalTokens", 0) or 0,
+            "cost_usd": b.get("costUSD", 0) or 0,
+            "models": json.dumps(b.get("models") or []),
+            "burn_rate_tpm": br.get("tokensPerMinute"),
+            "burn_rate_cost_per_hour": br.get("costPerHour"),
+            "projected_total_tokens": pj.get("totalTokens"),
+            "projected_cost_usd": pj.get("totalCost"),
+            "remaining_minutes": pj.get("remainingMinutes"),
+            "ingested_at": ingested_at,
+        })
+    return rows
+
+
+def daily_to_rows(data, source, ingested_at):
+    """Map ccusage `daily --json` output to ccusage_daily_cache rows.
+
+    `ccusage daily` uses period/totalCost/cacheReadTokens; source-prefixed
+    variants (e.g. `ccusage codex daily`) use date/costUSD — accept both."""
+    rows = []
+    for r in (data or {}).get("daily", []):
+        if not isinstance(r, dict):
+            continue
+        day = r.get("period") or r.get("date")
+        if not day:
+            continue
+        rows.append({
+            "day": day,
+            "source": source,
+            "input_tokens": r.get("inputTokens", 0) or 0,
+            "output_tokens": r.get("outputTokens", 0) or 0,
+            "cache_read_tokens": r.get("cacheReadTokens", 0) or 0,
+            "cache_creation_tokens": r.get("cacheCreationTokens", 0) or 0,
+            "total_tokens": r.get("totalTokens", 0) or 0,
+            "cost_usd": r.get("totalCost", r.get("costUSD", 0)) or 0,
+            "models": json.dumps(r.get("modelsUsed") or r.get("models") or []),
+            "ingested_at": ingested_at,
+        })
+    return rows
+
+
+# ── DB writes ──────────────────────────────────────────────────────────────────
+
+_BW_COLS = ("block_id", "start_time", "end_time", "actual_end_time", "is_active",
+            "input_tokens", "output_tokens", "cache_read_tokens",
+            "cache_creation_tokens", "total_tokens", "cost_usd", "models",
+            "burn_rate_tpm", "burn_rate_cost_per_hour", "projected_total_tokens",
+            "projected_cost_usd", "remaining_minutes", "ingested_at")
+
+
+def upsert_billing_windows(conn, rows):
+    if not rows:
+        return
+    placeholders = ", ".join("?" for _ in _BW_COLS)
+    updates = ", ".join(f"{c} = excluded.{c}" for c in _BW_COLS if c != "block_id")
+    conn.executemany(
+        f"INSERT INTO billing_windows ({', '.join(_BW_COLS)}) "
+        f"VALUES ({placeholders}) "
+        f"ON CONFLICT(block_id) DO UPDATE SET {updates}",
+        [tuple(r[c] for c in _BW_COLS) for r in rows],
+    )
+
+
+_DC_COLS = ("day", "source", "input_tokens", "output_tokens", "cache_read_tokens",
+            "cache_creation_tokens", "total_tokens", "cost_usd", "models",
+            "ingested_at")
+
+
+def upsert_ccusage_daily(conn, rows):
+    if not rows:
+        return
+    placeholders = ", ".join("?" for _ in _DC_COLS)
+    updates = ", ".join(f"{c} = excluded.{c}" for c in _DC_COLS if c not in ("day", "source"))
+    conn.executemany(
+        f"INSERT INTO ccusage_daily_cache ({', '.join(_DC_COLS)}) "
+        f"VALUES ({placeholders}) "
+        f"ON CONFLICT(day, source) DO UPDATE SET {updates}",
+        [tuple(r[c] for c in _DC_COLS) for r in rows],
+    )
+
+
+# ── Orchestration ───────────────────────────────────────────────────────────────
+
+def ingest(db_path=DB_PATH, verbose=True, rt=None):
+    """Fetch ccusage blocks + daily and upsert into the DB. Safe to call always:
+    returns {'available': False} (no error) when Node/ccusage is missing."""
+    rt = rt or detect_runtime()
+    if not rt["available"]:
+        if verbose:
+            print(f"[ccusage] {rt.get('reason', 'unavailable')}")
+        return {"available": False}
+
+    ingested_at = _now_iso()
+    blocks = run_ccusage(["blocks", "--json", "--offline"], rt)
+    daily = run_ccusage(["daily", "--json", "--offline"], rt)
+    if blocks is None and daily is None:
+        if verbose:
+            print("[ccusage] runner found but no data returned (first run downloads "
+                  "the package; check network/version).")
+        return {"available": True, "blocks": 0, "daily": 0}
+
+    conn = get_db(db_path)
+    init_db(conn)
+    bw = blocks_to_rows(blocks, ingested_at)
+    dl = daily_to_rows(daily, "ccusage-all", ingested_at)
+    upsert_billing_windows(conn, bw)
+    upsert_ccusage_daily(conn, dl)
+    conn.commit()
+    conn.close()
+    if verbose:
+        print(f"[ccusage] ingested {len(bw)} billing windows, {len(dl)} daily rows")
+    return {"available": True, "blocks": len(bw), "daily": len(dl)}
+
+
+if __name__ == "__main__":
+    print(ingest())
diff --git a/cli.py b/cli.py
index e0676b8a..05b89ed3 100644
--- a/cli.py
+++ b/cli.py
@@ -91,6 +91,13 @@ def require_db():
 def cmd_scan(projects_dir=None):
     from scanner import scan
     scan(projects_dir=Path(projects_dir) if projects_dir else None)
+    # Optional: enrich with ccusage billing-window data if Node/npx is present.
+    # Never let this block or fail a successful native scan.
+    try:
+        from ccusage_bridge import ingest
+        ingest()
+    except Exception as e:
+        print(f"[ccusage] skipped: {e}")
 
 
 def cmd_today():
diff --git a/scanner.py b/scanner.py
index b4ad0b41..71fd83ba 100644
--- a/scanner.py
+++ b/scanner.py
@@ -96,6 +96,46 @@ def init_db(conn):
             tool_use_count        INTEGER
         );
 
+        -- 5-hour billing windows ingested from ccusage `blocks --json` (optional,
+        -- only populated when Node/npx + ccusage are available). Kept separate
+        -- from the native turns/sessions tables so the two sources never conflict.
+        CREATE TABLE IF NOT EXISTS billing_windows (
+            block_id                TEXT PRIMARY KEY,
+            start_time              TEXT,
+            end_time                TEXT,
+            actual_end_time         TEXT,
+            is_active               INTEGER DEFAULT 0,
+            input_tokens            INTEGER DEFAULT 0,
+            output_tokens           INTEGER DEFAULT 0,
+            cache_read_tokens       INTEGER DEFAULT 0,
+            cache_creation_tokens   INTEGER DEFAULT 0,
+            total_tokens            INTEGER DEFAULT 0,
+            cost_usd                REAL DEFAULT 0,
+            models                  TEXT,
+            burn_rate_tpm           REAL,
+            burn_rate_cost_per_hour REAL,
+            projected_total_tokens  INTEGER,
+            projected_cost_usd      REAL,
+            remaining_minutes       INTEGER,
+            ingested_at             TEXT
+        );
+
+        -- Per-day usage from ccusage `daily --json`, keyed by (day, source) so
+        -- multiple agent CLIs (claude/codex/gemini/...) can coexist.
+        CREATE TABLE IF NOT EXISTS ccusage_daily_cache (
+            day                     TEXT,
+            source                  TEXT,
+            input_tokens            INTEGER DEFAULT 0,
+            output_tokens           INTEGER DEFAULT 0,
+            cache_read_tokens       INTEGER DEFAULT 0,
+            cache_creation_tokens   INTEGER DEFAULT 0,
+            total_tokens            INTEGER DEFAULT 0,
+            cost_usd                REAL DEFAULT 0,
+            models                  TEXT,
+            ingested_at             TEXT,
+            PRIMARY KEY (day, source)
+        );
+
         CREATE INDEX IF NOT EXISTS idx_turns_session ON turns(session_id);
         CREATE INDEX IF NOT EXISTS idx_turns_timestamp ON turns(timestamp);
         CREATE INDEX IF NOT EXISTS idx_sessions_first ON sessions(first_timestamp);
diff --git a/tests/test_ccusage_bridge.py b/tests/test_ccusage_bridge.py
new file mode 100644
index 00000000..f1e15865
--- /dev/null
+++ b/tests/test_ccusage_bridge.py
@@ -0,0 +1,151 @@
+"""Tests for ccusage_bridge: pure transforms, upserts, and ingest orchestration.
+
+These run without Node/ccusage installed — the subprocess layer is stubbed and
+the JSON fixtures mirror real ccusage 20.0.x output (camelCase, nested
+burnRate/projection on the active block only)."""
+
+import tempfile
+import unittest
+from pathlib import Path
+
+import ccusage_bridge as cb
+from scanner import get_db, init_db
+
+BLOCKS_FIXTURE = {
+    "blocks": [
+        {"id": "gap-1", "isGap": True},
+        {
+            "id": "2026-05-12T07:00:00.000Z", "isGap": False, "isActive": False,
+            "startTime": "2026-05-12T07:00:00.000Z", "endTime": "2026-05-12T12:00:00.000Z",
+            "actualEndTime": "2026-05-12T11:49:52.222Z",
+            "tokenCounts": {"inputTokens": 6677, "outputTokens": 146676,
+                            "cacheReadInputTokens": 6991052, "cacheCreationInputTokens": 424881},
+            "totalTokens": 7569286, "costUSD": 11.44, "models": ["claude-opus-4-8"],
+            "burnRate": None, "projection": None,
+        },
+        {
+            "id": "2026-06-17T05:00:00.000Z", "isGap": False, "isActive": True,
+            "startTime": "2026-06-17T05:00:00.000Z", "endTime": "2026-06-17T10:00:00.000Z",
+            "actualEndTime": "2026-06-17T07:30:00.000Z",
+            "tokenCounts": {"inputTokens": 91004, "outputTokens": 887241,
+                            "cacheReadInputTokens": 113841619, "cacheCreationInputTokens": 4455635},
+            "totalTokens": 119275499, "costUSD": 100.0, "models": ["claude-opus-4-8"],
+            "burnRate": {"costPerHour": 35.86, "tokensPerMinute": 763676.4,
+                         "tokensPerMinuteForIndicator": 6263.3},
+            "projection": {"remainingMinutes": 143, "totalCost": 178.83, "totalTokens": 228481225},
+        },
+    ]
+}
+
+DAILY_FIXTURE = {
+    "daily": [{
+        "period": "2026-05-12", "inputTokens": 6883, "outputTokens": 304540,
+        "cacheReadTokens": 27871959, "cacheCreationTokens": 1382762,
+        "totalTokens": 29566144, "totalCost": 35.41, "modelsUsed": ["claude-opus-4-8"],
+    }],
+    "totals": {},
+}
+
+
+class TestTransforms(unittest.TestCase):
+    def test_blocks_skip_gaps(self):
+        rows = cb.blocks_to_rows(BLOCKS_FIXTURE, "now")
+        self.assertEqual(len(rows), 2)  # gap dropped
+        self.assertTrue(all(r["block_id"] != "gap-1" for r in rows))
+
+    def test_blocks_map_fields(self):
+        rows = cb.blocks_to_rows(BLOCKS_FIXTURE, "now")
+        completed = next(r for r in rows if r["block_id"].startswith("2026-05-12"))
+        self.assertEqual(completed["input_tokens"], 6677)
+        self.assertEqual(completed["cache_read_tokens"], 6991052)
+        self.assertEqual(completed["total_tokens"], 7569286)
+        self.assertEqual(completed["cost_usd"], 11.44)
+        self.assertEqual(completed["is_active"], 0)
+        self.assertIsNone(completed["burn_rate_tpm"])
+
+    def test_active_block_has_burn_rate_and_projection(self):
+        rows = cb.blocks_to_rows(BLOCKS_FIXTURE, "now")
+        active = next(r for r in rows if r["is_active"] == 1)
+        self.assertAlmostEqual(active["burn_rate_tpm"], 763676.4)
+        self.assertAlmostEqual(active["burn_rate_cost_per_hour"], 35.86)
+        self.assertEqual(active["projected_total_tokens"], 228481225)
+        self.assertEqual(active["remaining_minutes"], 143)
+
+    def test_daily_map_period_and_cost(self):
+        rows = cb.daily_to_rows(DAILY_FIXTURE, "ccusage-all", "now")
+        self.assertEqual(len(rows), 1)
+        self.assertEqual(rows[0]["day"], "2026-05-12")
+        self.assertEqual(rows[0]["cost_usd"], 35.41)
+        self.assertEqual(rows[0]["cache_read_tokens"], 27871959)
+        self.assertEqual(rows[0]["source"], "ccusage-all")
+
+    def test_daily_accepts_codex_style_fields(self):
+        codex = {"daily": [{"date": "2026-05-12", "costUSD": 9.99, "inputTokens": 10}]}
+        rows = cb.daily_to_rows(codex, "ccusage-codex", "now")
+        self.assertEqual(rows[0]["day"], "2026-05-12")
+        self.assertEqual(rows[0]["cost_usd"], 9.99)
+
+
+class TestUpserts(unittest.TestCase):
+    def setUp(self):
+        self.db_path = Path(tempfile.mkdtemp()) / "usage.db"
+        self.conn = get_db(self.db_path)
+        init_db(self.conn)
+
+    def tearDown(self):
+        self.conn.close()
+
+    def test_billing_windows_upsert_idempotent(self):
+        rows = cb.blocks_to_rows(BLOCKS_FIXTURE, "t1")
+        cb.upsert_billing_windows(self.conn, rows)
+        cb.upsert_billing_windows(self.conn, rows)  # second call must not duplicate
+        n = self.conn.execute("SELECT COUNT(*) FROM billing_windows").fetchone()[0]
+        self.assertEqual(n, 2)
+
+    def test_billing_windows_update_on_conflict(self):
+        cb.upsert_billing_windows(self.conn, cb.blocks_to_rows(BLOCKS_FIXTURE, "t1"))
+        # Re-ingest the active block with a higher total (window grew)
+        grown = {"blocks": [dict(BLOCKS_FIXTURE["blocks"][2], totalTokens=999)]}
+        cb.upsert_billing_windows(self.conn, cb.blocks_to_rows(grown, "t2"))
+        row = self.conn.execute(
+            "SELECT total_tokens, ingested_at FROM billing_windows WHERE is_active=1").fetchone()
+        self.assertEqual(row[0], 999)
+        self.assertEqual(row[1], "t2")
+
+    def test_daily_upsert_idempotent_per_source(self):
+        rows = cb.daily_to_rows(DAILY_FIXTURE, "ccusage-all", "t1")
+        cb.upsert_ccusage_daily(self.conn, rows)
+        cb.upsert_ccusage_daily(self.conn, rows)
+        n = self.conn.execute("SELECT COUNT(*) FROM ccusage_daily_cache").fetchone()[0]
+        self.assertEqual(n, 1)
+
+
+class TestIngestOrchestration(unittest.TestCase):
+    def setUp(self):
+        self.db_path = Path(tempfile.mkdtemp()) / "usage.db"
+
+    def test_ingest_unavailable_is_graceful(self):
+        rt = {"available": False, "reason": "no node"}
+        res = cb.ingest(db_path=self.db_path, verbose=False, rt=rt)
+        self.assertEqual(res, {"available": False})
+
+    def test_ingest_populates_tables(self):
+        rt = {"available": True, "runner": "npx", "kind": "npx"}
+        orig = cb.run_ccusage
+        cb.run_ccusage = lambda sub_args, rt=None: (
+            BLOCKS_FIXTURE if "blocks" in sub_args else DAILY_FIXTURE)
+        try:
+            res = cb.ingest(db_path=self.db_path, verbose=False, rt=rt)
+        finally:
+            cb.run_ccusage = orig
+        self.assertEqual(res["available"], True)
+        self.assertEqual(res["blocks"], 2)
+        self.assertEqual(res["daily"], 1)
+        conn = get_db(self.db_path)
+        self.assertEqual(conn.execute("SELECT COUNT(*) FROM billing_windows").fetchone()[0], 2)
+        self.assertEqual(conn.execute("SELECT COUNT(*) FROM ccusage_daily_cache").fetchone()[0], 1)
+        conn.close()
+
+
+if __name__ == "__main__":
+    unittest.main()

From 83ed44d4d9e92c2dbd295ed23d6ce9ccf6f6998e Mon Sep 17 00:00:00 2001
From: john988 <john9882001@yahoo.com>
Date: Wed, 17 Jun 2026 10:43:27 +0800
Subject: [PATCH 4/8] feat(ccusage): expose 5h billing summary + P90 baseline
 in dashboard data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ccusage_bridge gains compute_p90_limit() (90th-percentile of your completed 5h
window totals — a personal "typical heavy window" baseline, since Anthropic's
hard token limit isn't exposed; same approach as Claude-Code-Usage-Monitor) and
summarize_billing() (active window + window count + P90 + recent history, or
{available: False} so the UI can show an install prompt). get_dashboard_data
returns a "billing" key, guarded so a bridge issue can never break the page.

Tests: 145 pass (+5). Verified on real data: 129 windows, P90 99M, active
window 131% of baseline at 798k tok/min.
---
 ccusage_bridge.py            | 43 ++++++++++++++++++++++++++++++++++++
 dashboard.py                 |  9 ++++++++
 tests/test_ccusage_bridge.py | 36 ++++++++++++++++++++++++++++++
 3 files changed, 88 insertions(+)

diff --git a/ccusage_bridge.py b/ccusage_bridge.py
index 8b36f42d..0bc91148 100644
--- a/ccusage_bridge.py
+++ b/ccusage_bridge.py
@@ -14,6 +14,8 @@
 import json
 import os
 import shutil
+import sqlite3
+import statistics
 import subprocess
 from datetime import datetime, timezone
 
@@ -184,6 +186,47 @@ def upsert_ccusage_daily(conn, rows):
     )
 
 
+# ── Plan-limit baseline (Monitor-style P90 of your own history) ─────────────────
+
+def compute_p90_limit(window_totals, floor=0):
+    """The 90th-percentile of your completed 5h-window totals — a personal
+    'typical heavy window' baseline. Anthropic's hard token limit isn't exposed
+    anywhere, so (like Claude-Code-Usage-Monitor) we use your own history as the
+    yardstick. Returns `floor` when there isn't enough history."""
+    vals = [int(v) for v in window_totals if v and v > 0]
+    if not vals:
+        return floor
+    if len(vals) == 1:
+        return max(vals[0], floor)
+    p90 = statistics.quantiles(vals, n=10)[8]  # 9 cut points; [8] = 90th pct
+    return max(int(p90), floor)
+
+
+def summarize_billing(conn):
+    """Read billing_windows into a dashboard-ready summary. Returns
+    {'available': False} when ccusage has never populated the table (no Node),
+    so the UI can show an install prompt instead of an empty card."""
+    try:
+        rows = conn.execute(
+            "SELECT * FROM billing_windows ORDER BY start_time"
+        ).fetchall()
+    except sqlite3.OperationalError:
+        return {"available": False}
+    if not rows:
+        return {"available": False}
+
+    windows = [dict(r) for r in rows]
+    completed_totals = [w["total_tokens"] for w in windows if not w["is_active"]]
+    active = next((w for w in windows if w["is_active"]), None)
+    return {
+        "available": True,
+        "window_count": len(windows),
+        "plan_limit_estimate": compute_p90_limit(completed_totals),
+        "active": active,
+        "recent": windows[-30:],
+    }
+
+
 # ── Orchestration ───────────────────────────────────────────────────────────────
 
 def ingest(db_path=DB_PATH, verbose=True, rt=None):
diff --git a/dashboard.py b/dashboard.py
index 736d95fc..cb426120 100644
--- a/dashboard.py
+++ b/dashboard.py
@@ -209,6 +209,14 @@ def get_dashboard_data(db_path=DB_PATH):
         "status":         r["status"],
     } for r in top_dispatch_rows]
 
+    # Optional ccusage billing-window summary (5h windows + P90 baseline).
+    # Guarded so a bridge issue can never take down the dashboard.
+    try:
+        from ccusage_bridge import summarize_billing
+        billing = summarize_billing(conn)
+    except Exception:
+        billing = {"available": False}
+
     conn.close()
 
     return {
@@ -218,6 +226,7 @@ def get_dashboard_data(db_path=DB_PATH):
         "sessions_all":    sessions_all,
         "subagent_by_type": subagent_by_type,
         "top_dispatches":  top_dispatches,
+        "billing":         billing,
         "generated_at":    datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
     }
 
diff --git a/tests/test_ccusage_bridge.py b/tests/test_ccusage_bridge.py
index f1e15865..197e181c 100644
--- a/tests/test_ccusage_bridge.py
+++ b/tests/test_ccusage_bridge.py
@@ -147,5 +147,41 @@ def test_ingest_populates_tables(self):
         conn.close()
 
 
+class TestP90AndSummary(unittest.TestCase):
+    def test_p90_empty_returns_floor(self):
+        self.assertEqual(cb.compute_p90_limit([], floor=19000), 19000)
+        self.assertEqual(cb.compute_p90_limit([0, None]), 0)
+
+    def test_p90_single_value(self):
+        self.assertEqual(cb.compute_p90_limit([500], floor=100), 500)
+        self.assertEqual(cb.compute_p90_limit([50], floor=100), 100)
+
+    def test_p90_multiple_is_high_percentile(self):
+        vals = list(range(10, 110, 10))  # 10..100
+        p90 = cb.compute_p90_limit(vals)
+        self.assertGreaterEqual(p90, 90)
+        self.assertIsInstance(p90, int)
+
+    def test_summarize_billing_empty(self):
+        db = Path(tempfile.mkdtemp()) / "u.db"
+        conn = get_db(db); init_db(conn)
+        self.assertEqual(cb.summarize_billing(conn), {"available": False})
+        conn.close()
+
+    def test_summarize_billing_populated(self):
+        db = Path(tempfile.mkdtemp()) / "u.db"
+        conn = get_db(db); init_db(conn)
+        cb.upsert_billing_windows(conn, cb.blocks_to_rows(BLOCKS_FIXTURE, "t1"))
+        conn.commit()
+        s = cb.summarize_billing(conn)
+        self.assertTrue(s["available"])
+        self.assertEqual(s["window_count"], 2)
+        self.assertIsNotNone(s["active"])
+        self.assertEqual(s["active"]["is_active"], 1)
+        # one completed window (total 7,569,286) -> P90 == that value
+        self.assertEqual(s["plan_limit_estimate"], 7569286)
+        conn.close()
+
+
 if __name__ == "__main__":
     unittest.main()

From 549891811f53ca3b95314b67bfd8750b0ea92fae Mon Sep 17 00:00:00 2001
From: john988 <john9882001@yahoo.com>
Date: Wed, 17 Jun 2026 10:45:49 +0800
Subject: [PATCH 5/8] feat(dashboard): live 5h billing-window card (progress
 bar + burn rate)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a "Current 5h Billing Window" card above the charts: a progress bar of the
active window's tokens vs the P90 baseline (green/amber/red at 80%/100%), plus
burn rate, time remaining, projected end-of-window tokens/cost, and cost so far.
Falls back to a slim ccusage/Node install prompt when billing data is absent —
the rest of the dashboard is unaffected.

Verified: inline JS passes node --check; 145 tests pass.
---
 dashboard.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/dashboard.py b/dashboard.py
index cb426120..d51080f3 100644
--- a/dashboard.py
+++ b/dashboard.py
@@ -362,6 +362,14 @@ def get_dashboard_data(db_path=DB_PATH):
   .export-btn { background: var(--card); border: 1px solid var(--border); color: var(--muted); padding: 3px 10px; border-radius: 5px; cursor: pointer; font-size: 11px; }
   .export-btn:hover { color: var(--text); border-color: var(--accent); }
   .table-card { background: var(--card); border: 1px solid var(--border); border-radius: 8px; padding: 20px; margin-bottom: 24px; overflow-x: auto; }
+  .bw-track { height: 14px; background: var(--bg); border: 1px solid var(--border); border-radius: 7px; overflow: hidden; margin: 10px 0 16px; }
+  .bw-fill { height: 100%; border-radius: 7px; transition: width .3s ease; }
+  .bw-metrics { display: flex; flex-wrap: wrap; gap: 28px; }
+  .bw-metric { font-size: 11px; color: var(--muted); text-transform: uppercase; letter-spacing: 0.05em; }
+  .bw-metric strong { display: block; font-size: 18px; color: var(--text); font-weight: 700; margin-top: 4px; letter-spacing: 0; text-transform: none; }
+  .bw-prompt { color: var(--muted); font-size: 13px; line-height: 1.6; }
+  .bw-prompt a { color: var(--blue); text-decoration: none; }
+  .bw-prompt a:hover { text-decoration: underline; }
   .table-foot { display: flex; justify-content: flex-end; align-items: center; gap: 12px; margin-top: 12px; }
   .table-foot:empty { margin-top: 0; }
   .show-more-btn { background: transparent; border: 1px solid var(--border); color: var(--muted); padding: 4px 12px; border-radius: 6px; cursor: pointer; font-size: 12px; }
@@ -421,6 +429,10 @@ def get_dashboard_data(db_path=DB_PATH):
 
 <div class="container">
   <div class="stats-row" id="stats-row"></div>
+  <div class="table-card" id="billing-card" style="display:none">
+    <div class="section-title">Current 5h Billing Window <span class="muted" id="billing-sub" style="font-weight:400;text-transform:none;letter-spacing:0;font-size:11px"></span></div>
+    <div id="billing-body"></div>
+  </div>
   <div class="charts-grid">
     <div class="chart-card wide">
       <h2 id="daily-chart-title">Daily Token Usage</h2>
@@ -1191,6 +1203,7 @@ def get_dashboard_data(db_path=DB_PATH):
   renderProjectChart(byProject);
   renderSubagentChart(byAgentType);
   renderTopDispatches(filteredDispatches);
+  renderBilling(rawData.billing);
   lastFilteredSessions = sortSessions(filteredSessions);
   lastByModel = byModel;
   lastByProject = sortProjects(byProject);
@@ -1484,6 +1497,49 @@ def get_dashboard_data(db_path=DB_PATH):
   }).join('');
 }
 
+function renderBilling(b) {
+  const card = document.getElementById('billing-card');
+  const body = document.getElementById('billing-body');
+  const sub  = document.getElementById('billing-sub');
+  if (!b) { card.style.display = 'none'; return; }
+  card.style.display = '';
+
+  if (!b.available) {
+    sub.textContent = '';
+    body.innerHTML = '<div class="bw-prompt">Install <a href="https://github.com/ryoppippi/ccusage" target="_blank" rel="noopener">ccusage</a> (needs <a href="https://nodejs.org" target="_blank" rel="noopener">Node.js</a>), then re-run <code>scan</code> to see live 5-hour billing windows, burn rate and projections. The rest of the dashboard works without it.</div>';
+    return;
+  }
+
+  const a = b.active;
+  const limit = b.plan_limit_estimate || 0;
+  if (!a) {
+    sub.textContent = `· ${b.window_count} windows tracked · none active right now`;
+    body.innerHTML = '<div class="bw-prompt">No active 5-hour window. Start a Claude Code session and re-scan to see live burn rate.</div>';
+    return;
+  }
+
+  const used = a.total_tokens || 0;
+  const pctRaw = limit ? Math.round(used / limit * 100) : 0;
+  const pct = Math.min(100, pctRaw);
+  const color = pctRaw >= 100 ? C.red : pctRaw >= 80 ? C.amber : C.green;
+  sub.textContent = `· baseline = P90 of your windows (${fmt(limit)} tok) · ${b.window_count} windows tracked`;
+  const burn   = a.burn_rate_tpm ? `${fmt(Math.round(a.burn_rate_tpm))} tok/min` : '—';
+  const remain = a.remaining_minutes != null ? `${a.remaining_minutes} min` : '—';
+  const proj   = a.projected_total_tokens
+    ? `${fmt(a.projected_total_tokens)} · ${a.projected_cost_usd != null ? fmtCostBig(a.projected_cost_usd) : '—'}`
+    : '—';
+  body.innerHTML =
+    `<div class="bw-track"><div class="bw-fill" style="width:${pct}%;background:${color}"></div></div>` +
+    `<div class="bw-metrics">` +
+      `<div class="bw-metric">Used this window<strong>${fmt(used)} tok</strong></div>` +
+      `<div class="bw-metric">% of P90 baseline<strong style="color:${color}">${pctRaw}%</strong></div>` +
+      `<div class="bw-metric">Burn rate<strong>${burn}</strong></div>` +
+      `<div class="bw-metric">Time remaining<strong>${remain}</strong></div>` +
+      `<div class="bw-metric">Projected end-of-window<strong>${proj}</strong></div>` +
+      `<div class="bw-metric">Window cost so far<strong>${fmtCostBig(a.cost_usd || 0)}</strong></div>` +
+    `</div>`;
+}
+
 // Fills a table card's footer with the row-reveal control. Three states:
 //   - more rows fit under the cap        -> "Show more" (plus "Show less" once expanded)
 //   - cap reached but more records exist -> "Download CSV to see all (N)" + "Show less"

From d853037777dc71377166a44b276610ffd35f018e Mon Sep 17 00:00:00 2001
From: john988 <john9882001@yahoo.com>
Date: Wed, 17 Jun 2026 10:51:29 +0800
Subject: [PATCH 6/8] feat(ccusage): multi-agent CLI breakdown
 (codex/gemini/copilot/...)

Bridge ingests per-source daily totals for non-Claude agent CLIs
(CCUSAGE_EXTRA_SOURCES) into ccusage_daily_cache; Claude Code is excluded here
(covered natively) so it's never double-counted. get_dashboard_data exposes
ccusage_daily (per-source, sans the unified 'ccusage-all'), and a new
"Other Agent CLIs (via ccusage)" chart shows non-Claude token usage by source,
range-filtered, hidden when there's nothing to show.

Tests: 145 pass. Verified end-to-end: real ccusage run surfaced codex usage
(665.8M tokens, ~$694) alongside the native Claude Code views.
---
 ccusage_bridge.py            | 25 ++++++++++--
 dashboard.py                 | 75 ++++++++++++++++++++++++++++++++++++
 tests/test_ccusage_bridge.py | 10 ++++-
 3 files changed, 105 insertions(+), 5 deletions(-)

diff --git a/ccusage_bridge.py b/ccusage_bridge.py
index 0bc91148..5c00d473 100644
--- a/ccusage_bridge.py
+++ b/ccusage_bridge.py
@@ -26,6 +26,11 @@
 CCUSAGE_SPEC = os.environ.get("CCUSAGE_SPEC", "ccusage@latest")
 _TIMEOUT_S = 90
 
+# Non-Claude agent CLIs ccusage can read. Claude Code is intentionally excluded
+# here — it's covered billing-accurately by the native scanner, and including the
+# unified `ccusage daily` (which also counts Claude) would double-count it.
+CCUSAGE_EXTRA_SOURCES = ["codex", "gemini", "copilot", "amp", "droid", "opencode"]
+
 
 def detect_runtime():
     """Locate an npx/bunx runner. On Windows shutil.which returns npx.cmd, which
@@ -245,7 +250,7 @@ def ingest(db_path=DB_PATH, verbose=True, rt=None):
         if verbose:
             print("[ccusage] runner found but no data returned (first run downloads "
                   "the package; check network/version).")
-        return {"available": True, "blocks": 0, "daily": 0}
+        return {"available": True, "blocks": 0, "daily": 0, "sources": {}}
 
     conn = get_db(db_path)
     init_db(conn)
@@ -253,11 +258,25 @@ def ingest(db_path=DB_PATH, verbose=True, rt=None):
     dl = daily_to_rows(daily, "ccusage-all", ingested_at)
     upsert_billing_windows(conn, bw)
     upsert_ccusage_daily(conn, dl)
+
+    # Per-source daily totals for the OTHER agent CLIs (Claude Code is already
+    # covered, billing-accurately, by the native scanner). Best effort: a source
+    # the user doesn't use returns nothing and is skipped.
+    sources = {}
+    for src in CCUSAGE_EXTRA_SOURCES:
+        srows = daily_to_rows(
+            run_ccusage([src, "daily", "--json", "--offline"], rt),
+            f"ccusage-{src}", ingested_at)
+        if srows:
+            upsert_ccusage_daily(conn, srows)
+            sources[src] = len(srows)
+
     conn.commit()
     conn.close()
     if verbose:
-        print(f"[ccusage] ingested {len(bw)} billing windows, {len(dl)} daily rows")
-    return {"available": True, "blocks": len(bw), "daily": len(dl)}
+        extra = (" + " + ", ".join(f"{k}:{v}" for k, v in sources.items())) if sources else ""
+        print(f"[ccusage] ingested {len(bw)} billing windows, {len(dl)} daily rows{extra}")
+    return {"available": True, "blocks": len(bw), "daily": len(dl), "sources": sources}
 
 
 if __name__ == "__main__":
diff --git a/dashboard.py b/dashboard.py
index d51080f3..f28f36f3 100644
--- a/dashboard.py
+++ b/dashboard.py
@@ -209,6 +209,29 @@ def get_dashboard_data(db_path=DB_PATH):
         "status":         r["status"],
     } for r in top_dispatch_rows]
 
+    # Per-source daily from ccusage (other agent CLIs). Excludes the unified
+    # 'ccusage-all' so Claude Code (counted natively) is never double-counted.
+    try:
+        cda_rows = conn.execute("""
+            SELECT day, source, input_tokens, output_tokens,
+                   cache_read_tokens, cache_creation_tokens, total_tokens, cost_usd
+            FROM ccusage_daily_cache
+            WHERE source != 'ccusage-all'
+            ORDER BY day
+        """).fetchall()
+        ccusage_daily = [{
+            "day":            r["day"],
+            "source":         (r["source"] or "").replace("ccusage-", ""),
+            "input":          r["input_tokens"] or 0,
+            "output":         r["output_tokens"] or 0,
+            "cache_read":     r["cache_read_tokens"] or 0,
+            "cache_creation": r["cache_creation_tokens"] or 0,
+            "total":          r["total_tokens"] or 0,
+            "cost":           r["cost_usd"] or 0,
+        } for r in cda_rows]
+    except sqlite3.OperationalError:
+        ccusage_daily = []
+
     # Optional ccusage billing-window summary (5h windows + P90 baseline).
     # Guarded so a bridge issue can never take down the dashboard.
     try:
@@ -227,6 +250,7 @@ def get_dashboard_data(db_path=DB_PATH):
         "subagent_by_type": subagent_by_type,
         "top_dispatches":  top_dispatches,
         "billing":         billing,
+        "ccusage_daily":   ccusage_daily,
         "generated_at":    datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
     }
 
@@ -464,6 +488,10 @@ def get_dashboard_data(db_path=DB_PATH):
       <h2 id="subagent-chart-title">Subagent Tokens by Type</h2>
       <div class="chart-wrap"><canvas id="chart-subagent"></canvas></div>
     </div>
+    <div class="chart-card wide" id="multiagent-card" style="display:none">
+      <h2>Other Agent CLIs (via ccusage) <span class="muted" style="font-weight:400;text-transform:none;letter-spacing:0;font-size:11px">&middot; non-Claude tokens; Claude Code shown natively above</span></h2>
+      <div class="chart-wrap"><canvas id="chart-multiagent"></canvas></div>
+    </div>
   </div>
   <div class="table-card">
     <div class="section-title">Top Subagent Dispatches <span class="muted" style="font-weight:400;text-transform:none;letter-spacing:0;font-size:11px">&middot; ranked by total tokens; <em>unknown</em> = parent dispatch record not found</span></div>
@@ -1191,6 +1219,18 @@ def get_dashboard_data(db_path=DB_PATH):
     selectedModels.has(d.model) && (!start || d.start_date >= start) && (!end || d.start_date <= end)
   ).slice(0, 20);
 
+  // Other agent CLIs (ccusage per-source, non-Claude) — range-filtered, by source.
+  const maMap = {};
+  for (const r of (rawData.ccusage_daily || [])) {
+    if (start && r.day < start) continue;
+    if (end && r.day > end) continue;
+    if (!maMap[r.source]) maMap[r.source] = { source: r.source, total: 0, cost: 0 };
+    maMap[r.source].total += r.total;
+    maMap[r.source].cost  += r.cost;
+  }
+  const byMultiAgent = Object.values(maMap).filter(s => s.total > 0)
+    .sort((a, b) => b.total - a.total);
+
   // Update daily chart title
   document.getElementById('daily-chart-title').textContent = 'Daily Token Usage \u2014 ' + RANGE_LABELS[selectedRange];
   document.getElementById('hourly-chart-title').textContent = 'Average Hourly Distribution \u2014 ' + RANGE_LABELS[selectedRange];
@@ -1204,6 +1244,7 @@ def get_dashboard_data(db_path=DB_PATH):
   renderSubagentChart(byAgentType);
   renderTopDispatches(filteredDispatches);
   renderBilling(rawData.billing);
+  renderMultiAgent(byMultiAgent);
   lastFilteredSessions = sortSessions(filteredSessions);
   lastByModel = byModel;
   lastByProject = sortProjects(byProject);
@@ -1540,6 +1581,40 @@ def get_dashboard_data(db_path=DB_PATH):
     `</div>`;
 }
 
+function renderMultiAgent(bySource) {
+  const el = document.getElementById('chart-multiagent');
+  const card = document.getElementById('multiagent-card');
+  if (charts.multiagent) charts.multiagent.destroy();
+  if (!bySource.length) { charts.multiagent = null; card.style.display = 'none'; return; }
+  card.style.display = '';
+  charts.multiagent = new Chart(el.getContext('2d'), {
+    type: 'bar',
+    data: {
+      labels: bySource.map(s => s.source),
+      datasets: [{
+        label: 'Tokens',
+        data: bySource.map(s => s.total),
+        backgroundColor: bySource.map((s, i) => MODEL_COLORS[i % MODEL_COLORS.length]),
+        hoverBackgroundColor: bySource.map((s, i) => MODEL_COLORS[i % MODEL_COLORS.length]),
+      }]
+    },
+    options: {
+      indexAxis: 'y', responsive: true, maintainAspectRatio: false, resizeDelay: 150,
+      plugins: {
+        legend: { display: false },
+        tooltip: { callbacks: {
+          label: ctx => ` ${fmt(ctx.raw)} tokens`,
+          footer: items => ` ccusage est. ${fmtCostBig(bySource[items[0].dataIndex].cost)}`,
+        } }
+      },
+      scales: {
+        x: { ticks: { color: C.axis, callback: v => fmt(v) }, grid: { color: C.border } },
+        y: { ticks: { color: C.axis, font: { size: 11 } }, grid: { color: C.border } },
+      }
+    }
+  });
+}
+
 // Fills a table card's footer with the row-reveal control. Three states:
 //   - more rows fit under the cap        -> "Show more" (plus "Show less" once expanded)
 //   - cap reached but more records exist -> "Download CSV to see all (N)" + "Show less"
diff --git a/tests/test_ccusage_bridge.py b/tests/test_ccusage_bridge.py
index 197e181c..9321ac88 100644
--- a/tests/test_ccusage_bridge.py
+++ b/tests/test_ccusage_bridge.py
@@ -140,10 +140,16 @@ def test_ingest_populates_tables(self):
             cb.run_ccusage = orig
         self.assertEqual(res["available"], True)
         self.assertEqual(res["blocks"], 2)
-        self.assertEqual(res["daily"], 1)
+        self.assertEqual(res["daily"], 1)            # unified ccusage-all
+        self.assertEqual(len(res["sources"]), len(cb.CCUSAGE_EXTRA_SOURCES))
         conn = get_db(self.db_path)
         self.assertEqual(conn.execute("SELECT COUNT(*) FROM billing_windows").fetchone()[0], 2)
-        self.assertEqual(conn.execute("SELECT COUNT(*) FROM ccusage_daily_cache").fetchone()[0], 1)
+        # unified (1) + one row per extra source
+        self.assertEqual(
+            conn.execute("SELECT COUNT(*) FROM ccusage_daily_cache").fetchone()[0],
+            1 + len(cb.CCUSAGE_EXTRA_SOURCES))
+        self.assertIsNotNone(
+            conn.execute("SELECT 1 FROM ccusage_daily_cache WHERE source='ccusage-codex'").fetchone())
         conn.close()
 
 

From 3159f60e6b41f7f048ce0c74411671f784d9887d Mon Sep 17 00:00:00 2001
From: john988 <john9882001@yahoo.com>
Date: Wed, 17 Jun 2026 11:25:34 +0800
Subject: [PATCH 7/8] feat: pricing single-source + limit-event detection +
 honest caveat; release v1.5.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

P6 — single source of pricing:
- Move PRICING + get_pricing + calc_cost into pricing.py, imported by cli.py and
  served via /api/data. The dashboard JS reads rawData.pricing at runtime
  (its embedded table is now only a cold-start fallback), so the Python and JS
  tables can no longer drift.

P7 — limit events + transparency:
- Detect "Claude AI usage limit reached" events into a new limit_events table,
  gated on isApiErrorMessage so ordinary text mentioning a limit isn't
  misdetected (parse_jsonl_file now returns limit events; both scan paths upsert).
- Footer notes that figures are transcript-derived estimates that may not match
  Anthropic billing, and that native vs ccusage numbers are shown separately.

Release: bump to v1.5.0 across scanner.VERSION, CHANGELOG, and the extension
package.json (parity test enforces all three).

Tests: 150 pass (+5 limit-event tests); cli.calc_cost is pricing.calc_cost;
inline JS passes node --check.
---
 CHANGELOG.md                  | 22 +++++++++
 cli.py                        | 51 ++-------------------
 dashboard.py                  |  9 +++-
 pricing.py                    | 63 +++++++++++++++++++++++++
 scanner.py                    | 83 +++++++++++++++++++++++++++++++--
 tests/test_limits.py          | 86 +++++++++++++++++++++++++++++++++++
 tests/test_scanner.py         | 28 ++++++------
 tests/test_subagent.py        | 12 ++---
 vscode-extension/package.json |  2 +-
 9 files changed, 283 insertions(+), 73 deletions(-)
 create mode 100644 pricing.py
 create mode 100644 tests/test_limits.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 63f05c14..ebdce371 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,27 @@
 # Changelog
 
+## v1.5.0 — 2026-06-17
+
+### Subagent attribution
+
+- The scanner now records which turns came from dispatched subagents (Task/Agent tool) via new `turns.is_subagent` / `turns.agent_id` columns and an `agents` dispatch table (agent type + aggregate stats captured from the parent `toolUseResult`). Subagents are detected by `isSidechain`, an `agentId`, or a `subagents/` transcript path. Schema changes are additive, so existing `usage.db` files migrate in place (no rebuild).
+- Dashboard: a **Subagent Tokens by Type** chart, a **Top Subagent Dispatches** table, and a Subagent Tokens stat card — all driven by the existing model + range filters. Dynamic values are HTML-escaped.
+- CLI: `today` and `stats` now print subagent token/turn summaries (included in totals).
+
+### ccusage integration (optional)
+
+- New optional bridge (`ccusage_bridge.py`): when Node/`npx` + [ccusage](https://github.com/ryoppippi/ccusage) are present, `scan` ingests ccusage's 5-hour billing **blocks** and per-source **daily** totals into new `billing_windows` / `ccusage_daily_cache` tables. It degrades gracefully when absent — the native, stdlib-only tool is unchanged and ccusage is never required. Windows `npx.cmd` invocation and UTF-8 output are handled; the subprocess never raises.
+- Dashboard: a **Current 5h Billing Window** card (progress bar vs your P90 window baseline, burn rate, time remaining, projected end-of-window tokens/cost, cost so far) with an install prompt when ccusage isn't present, and an **Other Agent CLIs (via ccusage)** chart for non-Claude usage (Codex/Gemini/Copilot/…). Claude Code is always counted natively and never double-counted against ccusage.
+
+### Scanner / CLI
+
+- Pricing now lives in a single `pricing.py` module shared by the CLI and (via `/api/data`) the dashboard, so the Python and JS pricing tables can no longer drift; the embedded JS table is now only a cold-start fallback.
+- Detect "Claude AI usage limit reached" events into a new `limit_events` table, gated on `isApiErrorMessage` so ordinary text mentioning a limit isn't misdetected.
+
+### Project / docs
+
+- Footer now notes that figures are transcript-derived estimates (Claude Code doesn't write every request to disk) and may not match Anthropic billing exactly, and that native vs ccusage numbers are shown separately, never summed.
+
 ## v1.4.0 — 2026-06-15
 
 ### Dashboard
diff --git a/cli.py b/cli.py
index 05b89ed3..7dedb1c2 100644
--- a/cli.py
+++ b/cli.py
@@ -15,57 +15,12 @@
 from datetime import datetime, date, timedelta
 
 from scanner import VERSION
+# Pricing lives in a single module shared by the CLI and (via /api/data) the
+# dashboard, so the two can never drift. See pricing.py.
+from pricing import PRICING, get_pricing, calc_cost
 
 DB_PATH = Path.home() / ".claude" / "usage.db"
 
-PRICING = {
-    # Fable / Mythos — Anthropic's most capable class, priced at 2x Opus.
-    # (Mythos 5 shares Fable 5's pricing; Project-Glasswing access only.)
-    "claude-fable-5":    {"input": 10.00, "output": 50.00, "cache_read": 1.00, "cache_write": 12.50},
-    "claude-mythos-5":   {"input": 10.00, "output": 50.00, "cache_read": 1.00, "cache_write": 12.50},
-    "claude-opus-4-8":   {"input": 5.00, "output": 25.00, "cache_read": 0.50, "cache_write": 6.25},
-    "claude-opus-4-7":   {"input": 5.00, "output": 25.00, "cache_read": 0.50, "cache_write": 6.25},
-    "claude-opus-4-6":   {"input": 5.00, "output": 25.00, "cache_read": 0.50, "cache_write": 6.25},
-    "claude-opus-4-5":   {"input": 5.00, "output": 25.00, "cache_read": 0.50, "cache_write": 6.25},
-    "claude-sonnet-4-7": {"input": 3.00, "output": 15.00, "cache_read": 0.30, "cache_write": 3.75},
-    "claude-sonnet-4-6": {"input": 3.00, "output": 15.00, "cache_read": 0.30, "cache_write": 3.75},
-    "claude-sonnet-4-5": {"input": 3.00, "output": 15.00, "cache_read": 0.30, "cache_write": 3.75},
-    "claude-haiku-4-7":  {"input": 1.00, "output":  5.00, "cache_read": 0.10, "cache_write": 1.25},
-    "claude-haiku-4-6":  {"input": 1.00, "output":  5.00, "cache_read": 0.10, "cache_write": 1.25},
-    "claude-haiku-4-5":  {"input": 1.00, "output":  5.00, "cache_read": 0.10, "cache_write": 1.25},
-}
-
-def get_pricing(model):
-    if not model:
-        return None
-    if model in PRICING:
-        return PRICING[model]
-    for key in PRICING:
-        if model.startswith(key):
-            return PRICING[key]
-    # Substring fallback: match model family by keyword
-    m = model.lower()
-    if "fable" in m or "mythos" in m:
-        return PRICING["claude-fable-5"]
-    if "opus" in m:
-        return PRICING["claude-opus-4-8"]
-    if "sonnet" in m:
-        return PRICING["claude-sonnet-4-6"]
-    if "haiku" in m:
-        return PRICING["claude-haiku-4-5"]
-    return None
-
-def calc_cost(model, inp, out, cache_read, cache_creation):
-    p = get_pricing(model)
-    if not p:
-        return 0.0
-    return (
-        inp            * p["input"]       / 1_000_000 +
-        out            * p["output"]      / 1_000_000 +
-        cache_read     * p["cache_read"]  / 1_000_000 +
-        cache_creation * p["cache_write"] / 1_000_000
-    )
-
 def fmt(n):
     if n >= 1_000_000:
         return f"{n/1_000_000:.2f}M"
diff --git a/dashboard.py b/dashboard.py
index f28f36f3..9e9baed9 100644
--- a/dashboard.py
+++ b/dashboard.py
@@ -11,6 +11,7 @@
 from datetime import datetime
 
 from scanner import VERSION
+from pricing import PRICING  # single source of truth, also served via /api/data
 
 DB_PATH = Path.home() / ".claude" / "usage.db"
 
@@ -251,6 +252,7 @@ def get_dashboard_data(db_path=DB_PATH):
         "top_dispatches":  top_dispatches,
         "billing":         billing,
         "ccusage_daily":   ccusage_daily,
+        "pricing":         PRICING,
         "generated_at":    datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
     }
 
@@ -573,6 +575,7 @@ def get_dashboard_data(db_path=DB_PATH):
 <footer>
   <div class="footer-content">
     <p>Cost estimates based on Anthropic API pricing (<a href="https://claude.com/pricing#api" target="_blank">claude.com/pricing#api</a>) as of June 2026. Only models containing <em>fable</em>, <em>mythos</em>, <em>opus</em>, <em>sonnet</em>, or <em>haiku</em> in the name are included in cost calculations. Actual costs for Max/Pro subscribers differ from API pricing.</p>
+    <p>Figures are derived from local Claude Code transcripts, which don't capture every request Claude Code makes (e.g. some internal/system calls), so totals are estimates and may not match Anthropic billing exactly. Billing-window figures come from <em>ccusage</em> (its own pricing snapshot) when installed; native and ccusage numbers are shown separately, never summed.</p>
     <p>
       GitHub: <a href="https://github.com/phuryn/claude-usage" target="_blank">https://github.com/phuryn/claude-usage</a>
       &nbsp;&middot;&nbsp;
@@ -672,7 +675,10 @@ def get_dashboard_data(db_path=DB_PATH):
 }
 
 // ── Pricing (Anthropic API, June 2026) ─────────────────────────────────────
-const PRICING = {
+// Canonical table lives in pricing.py and is served via /api/data; loadData()
+// overrides this object from rawData.pricing when present. This literal is only
+// a cold-start fallback so the two can't drift.
+let PRICING = {
   // Fable / Mythos — Anthropic's most capable class, priced at 2x Opus.
   // (Mythos 5 shares Fable 5's pricing; Project-Glasswing access only.)
   'claude-fable-5':    { input: 10.00, output: 50.00, cache_write: 12.50, cache_read: 1.00 },
@@ -1926,6 +1932,7 @@ def get_dashboard_data(db_path=DB_PATH):
 
     const isFirstLoad = rawData === null;
     rawData = d;
+    if (d.pricing && Object.keys(d.pricing).length) PRICING = d.pricing;  // server is canonical
 
     if (isFirstLoad) {
       // Restore range from URL, mark active button
diff --git a/pricing.py b/pricing.py
new file mode 100644
index 00000000..a0c7579f
--- /dev/null
+++ b/pricing.py
@@ -0,0 +1,63 @@
+"""
+pricing.py - Single source of truth for model pricing and per-turn cost.
+
+Both the CLI (cli.py) and the web dashboard use these rates. The dashboard's
+embedded JS keeps a copy as a cold-start fallback, but at runtime it reads this
+table from /api/data (get_dashboard_data injects it), so the two can't drift.
+
+Anthropic API pricing per million tokens (USD), verified June 2026.
+Source: https://platform.claude.com/docs/en/about-claude/pricing
+"""
+
+PRICING = {
+    # Fable / Mythos — Anthropic's most capable class, priced at 2x Opus.
+    # (Mythos 5 shares Fable 5's pricing; Project-Glasswing access only.)
+    "claude-fable-5":    {"input": 10.00, "output": 50.00, "cache_read": 1.00, "cache_write": 12.50},
+    "claude-mythos-5":   {"input": 10.00, "output": 50.00, "cache_read": 1.00, "cache_write": 12.50},
+    "claude-opus-4-8":   {"input": 5.00, "output": 25.00, "cache_read": 0.50, "cache_write": 6.25},
+    "claude-opus-4-7":   {"input": 5.00, "output": 25.00, "cache_read": 0.50, "cache_write": 6.25},
+    "claude-opus-4-6":   {"input": 5.00, "output": 25.00, "cache_read": 0.50, "cache_write": 6.25},
+    "claude-opus-4-5":   {"input": 5.00, "output": 25.00, "cache_read": 0.50, "cache_write": 6.25},
+    "claude-sonnet-4-7": {"input": 3.00, "output": 15.00, "cache_read": 0.30, "cache_write": 3.75},
+    "claude-sonnet-4-6": {"input": 3.00, "output": 15.00, "cache_read": 0.30, "cache_write": 3.75},
+    "claude-sonnet-4-5": {"input": 3.00, "output": 15.00, "cache_read": 0.30, "cache_write": 3.75},
+    "claude-haiku-4-7":  {"input": 1.00, "output":  5.00, "cache_read": 0.10, "cache_write": 1.25},
+    "claude-haiku-4-6":  {"input": 1.00, "output":  5.00, "cache_read": 0.10, "cache_write": 1.25},
+    "claude-haiku-4-5":  {"input": 1.00, "output":  5.00, "cache_read": 0.10, "cache_write": 1.25},
+}
+
+
+def get_pricing(model):
+    """Resolve a model id to a price record in three tiers: exact match ->
+    startswith (handles date-suffixed ids like claude-opus-4-7-20260215) ->
+    substring family fallback. Returns None for non-Anthropic / unknown models
+    so they're billed at $0 (shown as n/a) rather than at Sonnet rates."""
+    if not model:
+        return None
+    if model in PRICING:
+        return PRICING[model]
+    for key in PRICING:
+        if model.startswith(key):
+            return PRICING[key]
+    m = model.lower()
+    if "fable" in m or "mythos" in m:
+        return PRICING["claude-fable-5"]
+    if "opus" in m:
+        return PRICING["claude-opus-4-8"]
+    if "sonnet" in m:
+        return PRICING["claude-sonnet-4-6"]
+    if "haiku" in m:
+        return PRICING["claude-haiku-4-5"]
+    return None
+
+
+def calc_cost(model, inp, out, cache_read, cache_creation):
+    p = get_pricing(model)
+    if not p:
+        return 0.0
+    return (
+        inp            * p["input"]       / 1_000_000 +
+        out            * p["output"]      / 1_000_000 +
+        cache_read     * p["cache_read"]  / 1_000_000 +
+        cache_creation * p["cache_write"] / 1_000_000
+    )
diff --git a/scanner.py b/scanner.py
index 71fd83ba..4b1e801f 100644
--- a/scanner.py
+++ b/scanner.py
@@ -5,6 +5,7 @@
 import json
 import os
 import glob
+import re
 import sqlite3
 from pathlib import Path
 from datetime import datetime, timezone
@@ -15,7 +16,7 @@
 # runtime version has to live here as a constant. Keep this in lockstep with the
 # top CHANGELOG heading and vscode-extension/package.json (a parity test guards
 # all three; see tests/test_version.py).
-VERSION = "1.4.0"
+VERSION = "1.5.0"
 
 PROJECTS_DIR = Path.home() / ".claude" / "projects"
 XCODE_PROJECTS_DIR = Path.home() / "Library" / "Developer" / "Xcode" / "CodingAssistant" / "ClaudeAgentConfig" / "projects"
@@ -136,6 +137,16 @@ def init_db(conn):
             PRIMARY KEY (day, source)
         );
 
+        -- "Claude AI usage limit reached" events (rate/quota), detected from
+        -- API-error records. Deduped by the record uuid.
+        CREATE TABLE IF NOT EXISTS limit_events (
+            uuid         TEXT PRIMARY KEY,
+            session_id   TEXT,
+            timestamp    TEXT,
+            reset_at     INTEGER,
+            message      TEXT
+        );
+
         CREATE INDEX IF NOT EXISTS idx_turns_session ON turns(session_id);
         CREATE INDEX IF NOT EXISTS idx_turns_timestamp ON turns(timestamp);
         CREATE INDEX IF NOT EXISTS idx_sessions_first ON sessions(first_timestamp);
@@ -259,6 +270,58 @@ def upsert_agents(conn, agents):
     ])
 
 
+_LIMIT_RE = re.compile(r"limit reached\|(\d+)")
+
+
+def extract_limit_event(record):
+    """Detect a genuine "Claude AI usage limit reached" event.
+
+    Gated on ``isApiErrorMessage`` so ordinary text that merely mentions a limit
+    (e.g. this very conversation) is not misdetected. The number after the pipe
+    in "...limit reached|<unix_ts>" is the reset time (0 = unknown)."""
+    if not record.get("isApiErrorMessage"):
+        return None
+    msg = record.get("message", {})
+    content = msg.get("content") if isinstance(msg, dict) else None
+    text = ""
+    if isinstance(content, list):
+        for item in content:
+            if isinstance(item, dict) and "limit reached" in str(item.get("text", "")):
+                text = item.get("text", "")
+                break
+    elif isinstance(content, str):
+        text = content
+    if "limit reached" not in text.lower():
+        return None
+    m = _LIMIT_RE.search(text)
+    reset_at = int(m.group(1)) if m else None
+    return {
+        "uuid": record.get("uuid"),
+        "session_id": record.get("sessionId"),
+        "timestamp": record.get("timestamp", ""),
+        "reset_at": reset_at,
+        "message": text[:200],
+    }
+
+
+def upsert_limit_events(conn, events):
+    if not events:
+        return
+    conn.executemany("""
+        INSERT INTO limit_events (uuid, session_id, timestamp, reset_at, message)
+        VALUES (?, ?, ?, ?, ?)
+        ON CONFLICT(uuid) DO UPDATE SET
+            session_id = excluded.session_id,
+            timestamp  = excluded.timestamp,
+            reset_at   = excluded.reset_at,
+            message    = excluded.message
+    """, [
+        (e["uuid"], e.get("session_id"), e.get("timestamp"),
+         e.get("reset_at"), e.get("message"))
+        for e in events
+    ])
+
+
 def parse_jsonl_file(filepath):
     """Parse a JSONL file and return (session_metas, turns, agents, line_count).
 
@@ -270,6 +333,7 @@ def parse_jsonl_file(filepath):
     turns_no_id = []    # turns without a message_id (kept as-is)
     session_meta = {}   # session_id -> dict
     agents = {}         # agent_id -> dispatch dict
+    limit_events = {}   # uuid -> limit event
     line_count = 0
 
     try:
@@ -283,6 +347,11 @@ def parse_jsonl_file(filepath):
                 except json.JSONDecodeError:
                     continue
 
+                # Limit events can be any record type — detect before the filter.
+                le = extract_limit_event(record)
+                if le is not None and le.get("uuid"):
+                    limit_events[le["uuid"]] = le
+
                 rtype = record.get("type")
                 if rtype not in ("assistant", "user"):
                     continue
@@ -369,7 +438,8 @@ def parse_jsonl_file(filepath):
         print(f"  Warning: error reading {filepath}: {e}")
 
     turns = turns_no_id + list(seen_messages.values())
-    return list(session_meta.values()), turns, list(agents.values()), line_count
+    return (list(session_meta.values()), turns, list(agents.values()),
+            list(limit_events.values()), line_count)
 
 
 def aggregate_sessions(session_metas, turns):
@@ -529,8 +599,9 @@ def scan(projects_dir=None, projects_dirs=None, db_path=DB_PATH, verbose=True):
 
         if is_new:
             # New file: full parse (single read, returns line count)
-            session_metas, turns, agents, line_count = parse_jsonl_file(filepath)
+            session_metas, turns, agents, limit_events, line_count = parse_jsonl_file(filepath)
             upsert_agents(conn, agents)
+            upsert_limit_events(conn, limit_events)
 
             if turns or session_metas:
                 sessions = aggregate_sessions(session_metas, turns)
@@ -548,6 +619,7 @@ def scan(projects_dir=None, projects_dirs=None, db_path=DB_PATH, verbose=True):
             turns_no_id = []
             new_session_metas = {}
             agents = {}         # agent_id -> dispatch dict
+            limit_events = {}   # uuid -> limit event
             line_count = 0
 
             try:
@@ -563,6 +635,10 @@ def scan(projects_dir=None, projects_dirs=None, db_path=DB_PATH, verbose=True):
                         except json.JSONDecodeError:
                             continue
 
+                        le = extract_limit_event(record)
+                        if le is not None and le.get("uuid"):
+                            limit_events[le["uuid"]] = le
+
                         rtype = record.get("type")
                         if rtype not in ("assistant", "user"):
                             continue
@@ -651,6 +727,7 @@ def scan(projects_dir=None, projects_dirs=None, db_path=DB_PATH, verbose=True):
 
             new_turns = turns_no_id + list(seen_messages.values())
             upsert_agents(conn, list(agents.values()))
+            upsert_limit_events(conn, list(limit_events.values()))
 
             if new_turns or new_session_metas:
                 sessions = aggregate_sessions(list(new_session_metas.values()), new_turns)
diff --git a/tests/test_limits.py b/tests/test_limits.py
new file mode 100644
index 00000000..b6409c8f
--- /dev/null
+++ b/tests/test_limits.py
@@ -0,0 +1,86 @@
+"""Tests for usage-limit event detection (scanner.extract_limit_event + scan)."""
+
+import json
+import os
+import sqlite3
+import tempfile
+import unittest
+from pathlib import Path
+
+from scanner import extract_limit_event, scan
+
+NL = chr(10)
+
+
+def _limit_record(reset_ts="1736503200", uuid="u-limit-1", session_id="s1",
+                  api_error=True):
+    rec = {
+        "type": "assistant",
+        "uuid": uuid,
+        "sessionId": session_id,
+        "timestamp": "2026-06-17T05:00:00Z",
+        "message": {"content": [
+            {"type": "text", "text": f"Claude AI usage limit reached|{reset_ts}"}
+        ]},
+    }
+    if api_error:
+        rec["isApiErrorMessage"] = True
+    return rec
+
+
+class TestExtractLimitEvent(unittest.TestCase):
+    def test_detects_api_error_limit(self):
+        e = extract_limit_event(_limit_record(reset_ts="1736503200"))
+        self.assertIsNotNone(e)
+        self.assertEqual(e["reset_at"], 1736503200)
+        self.assertEqual(e["session_id"], "s1")
+        self.assertEqual(e["uuid"], "u-limit-1")
+
+    def test_no_api_error_flag_is_ignored(self):
+        # Same text but not flagged as an API error -> not a real limit event
+        # (guards against conversation text that merely mentions limits).
+        self.assertIsNone(extract_limit_event(_limit_record(api_error=False)))
+
+    def test_api_error_without_limit_text_ignored(self):
+        rec = {"type": "assistant", "uuid": "u2", "isApiErrorMessage": True,
+               "message": {"content": [{"type": "text", "text": "some other error"}]}}
+        self.assertIsNone(extract_limit_event(rec))
+
+    def test_reset_at_zero_when_unknown(self):
+        e = extract_limit_event(_limit_record(reset_ts="0"))
+        self.assertEqual(e["reset_at"], 0)
+
+
+class TestLimitEventScanIntegration(unittest.TestCase):
+    def setUp(self):
+        self.tmpdir = tempfile.mkdtemp()
+        self.projects_dir = Path(self.tmpdir) / "projects" / "user" / "proj"
+        self.projects_dir.mkdir(parents=True)
+        self.db_path = Path(self.tmpdir) / "usage.db"
+
+    def test_scan_records_limit_event(self):
+        with open(self.projects_dir / "sess-1.jsonl", "w") as f:
+            f.write(json.dumps({
+                "type": "assistant", "sessionId": "s1", "uuid": "m-1",
+                "timestamp": "2026-06-17T04:00:00Z",
+                "message": {"id": "m-1", "model": "claude-opus-4-8",
+                            "usage": {"input_tokens": 100, "output_tokens": 50,
+                                      "cache_read_input_tokens": 0,
+                                      "cache_creation_input_tokens": 0},
+                            "content": []},
+            }) + NL)
+            f.write(json.dumps(_limit_record(reset_ts="1750000000", uuid="u-lim")) + NL)
+
+        scan(projects_dir=self.projects_dir.parent.parent, db_path=self.db_path, verbose=False)
+
+        conn = sqlite3.connect(self.db_path)
+        conn.row_factory = sqlite3.Row
+        rows = conn.execute("SELECT * FROM limit_events").fetchall()
+        self.assertEqual(len(rows), 1)
+        self.assertEqual(rows[0]["reset_at"], 1750000000)
+        self.assertEqual(rows[0]["uuid"], "u-lim")
+        conn.close()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_scanner.py b/tests/test_scanner.py
index 21fc9421..8a645a07 100644
--- a/tests/test_scanner.py
+++ b/tests/test_scanner.py
@@ -89,7 +89,7 @@ def test_basic_parsing(self):
             _make_user_record(),
             _make_assistant_record(),
         ])
-        metas, turns, _, line_count = parse_jsonl_file(path)
+        metas, turns, _, _, line_count = parse_jsonl_file(path)
         self.assertEqual(len(metas), 1)
         self.assertEqual(len(turns), 1)
         self.assertEqual(metas[0]["session_id"], "sess-1")
@@ -102,7 +102,7 @@ def test_skips_zero_token_records(self):
             _make_assistant_record(input_tokens=0, output_tokens=0,
                                    cache_read=0, cache_creation=0),
         ])
-        _, turns, _, _ = parse_jsonl_file(path)
+        _, turns, _, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(turns), 0)
 
     def test_skips_non_assistant_user_types(self):
@@ -110,7 +110,7 @@ def test_skips_non_assistant_user_types(self):
             json.dumps({"type": "system", "sessionId": "s1"}),
             _make_assistant_record(session_id="s1"),
         ])
-        metas, turns, _, _ = parse_jsonl_file(path)
+        metas, turns, _, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(turns), 1)
 
     def test_handles_malformed_json(self):
@@ -118,12 +118,12 @@ def test_handles_malformed_json(self):
             "not valid json",
             _make_assistant_record(),
         ])
-        _, turns, _, _ = parse_jsonl_file(path)
+        _, turns, _, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(turns), 1)
 
     def test_handles_empty_file(self):
         path = self._write_jsonl("test.jsonl", [])
-        metas, turns, _, _ = parse_jsonl_file(path)
+        metas, turns, _, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(metas), 0)
         self.assertEqual(len(turns), 0)
 
@@ -132,7 +132,7 @@ def test_multiple_sessions(self):
             _make_assistant_record(session_id="s1"),
             _make_assistant_record(session_id="s2"),
         ])
-        metas, turns, _, _ = parse_jsonl_file(path)
+        metas, turns, _, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(metas), 2)
         self.assertEqual(len(turns), 2)
 
@@ -142,7 +142,7 @@ def test_session_timestamps_tracked(self):
             _make_assistant_record(timestamp="2026-04-08T09:05:00Z"),
             _make_assistant_record(timestamp="2026-04-08T09:10:00Z"),
         ])
-        metas, _, _, _ = parse_jsonl_file(path)
+        metas, _, _, _, _ = parse_jsonl_file(path)
         self.assertEqual(metas[0]["first_timestamp"], "2026-04-08T09:00:00Z")
         self.assertEqual(metas[0]["last_timestamp"], "2026-04-08T09:10:00Z")
 
@@ -161,7 +161,7 @@ def test_tool_name_extracted(self):
             },
         })
         path = self._write_jsonl("test.jsonl", [record])
-        _, turns, _, _ = parse_jsonl_file(path)
+        _, turns, _, _, _ = parse_jsonl_file(path)
         self.assertEqual(turns[0]["tool_name"], "Read")
 
 
@@ -188,7 +188,7 @@ def test_streaming_events_deduped(self):
             # Streaming event 3: final usage (same message)
             _make_assistant_record(message_id="msg-abc", input_tokens=150, output_tokens=80),
         ])
-        _, turns, _, _ = parse_jsonl_file(path)
+        _, turns, _, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(turns), 1)
         # Last record wins (has final tallies)
         self.assertEqual(turns[0]["input_tokens"], 150)
@@ -201,7 +201,7 @@ def test_different_message_ids_kept(self):
             _make_assistant_record(message_id="msg-1", input_tokens=100),
             _make_assistant_record(message_id="msg-2", input_tokens=200),
         ])
-        _, turns, _, _ = parse_jsonl_file(path)
+        _, turns, _, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(turns), 2)
 
     def test_records_without_message_id_kept(self):
@@ -210,7 +210,7 @@ def test_records_without_message_id_kept(self):
             _make_assistant_record(input_tokens=100),
             _make_assistant_record(input_tokens=200),
         ])
-        _, turns, _, _ = parse_jsonl_file(path)
+        _, turns, _, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(turns), 2)
 
     def test_mixed_with_and_without_ids(self):
@@ -220,7 +220,7 @@ def test_mixed_with_and_without_ids(self):
             _make_assistant_record(message_id="msg-1", input_tokens=100),  # deduped
             _make_assistant_record(input_tokens=200),  # no id, kept
         ])
-        _, turns, _, _ = parse_jsonl_file(path)
+        _, turns, _, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(turns), 2)  # 1 deduped + 1 without id
         token_sums = sorted([t["input_tokens"] for t in turns])
         self.assertEqual(token_sums, [100, 200])
@@ -664,14 +664,14 @@ def test_line_count_matches_file(self):
             f.write(_make_user_record() + "\n")
             f.write(_make_assistant_record() + "\n")
             f.write(_make_assistant_record(timestamp="2026-04-08T10:01:00Z") + "\n")
-        _, _, _, line_count = parse_jsonl_file(path)
+        _, _, _, _, line_count = parse_jsonl_file(path)
         self.assertEqual(line_count, 3)
 
     def test_empty_file_returns_zero(self):
         path = os.path.join(self.tmpdir, "empty.jsonl")
         with open(path, "w") as f:
             pass
-        _, _, _, line_count = parse_jsonl_file(path)
+        _, _, _, _, line_count = parse_jsonl_file(path)
         self.assertEqual(line_count, 0)
 
 
diff --git a/tests/test_subagent.py b/tests/test_subagent.py
index c49105aa..e6883169 100644
--- a/tests/test_subagent.py
+++ b/tests/test_subagent.py
@@ -69,31 +69,31 @@ def _write(self, relpath, lines):
 
     def test_sidechain_flag_marks_subagent(self):
         path = self._write("a.jsonl", [_assistant(extra={"isSidechain": True})])
-        _, turns, _, _ = parse_jsonl_file(path)
+        _, turns, _, _, _ = parse_jsonl_file(path)
         self.assertEqual(turns[0]["is_subagent"], 1)
 
     def test_agent_id_marks_subagent_and_is_captured(self):
         path = self._write("a.jsonl", [_assistant(extra={"agentId": "agent-xyz"})])
-        _, turns, _, _ = parse_jsonl_file(path)
+        _, turns, _, _, _ = parse_jsonl_file(path)
         self.assertEqual(turns[0]["is_subagent"], 1)
         self.assertEqual(turns[0]["agent_id"], "agent-xyz")
 
     def test_path_under_subagents_marks_subagent(self):
         path = self._write(os.path.join("proj", "subagents", "x.jsonl"),
                            [_assistant()])
-        _, turns, _, _ = parse_jsonl_file(path)
+        _, turns, _, _, _ = parse_jsonl_file(path)
         self.assertEqual(turns[0]["is_subagent"], 1)
 
     def test_normal_record_not_subagent(self):
         path = self._write("a.jsonl", [_assistant()])
-        _, turns, _, _ = parse_jsonl_file(path)
+        _, turns, _, _, _ = parse_jsonl_file(path)
         self.assertEqual(turns[0]["is_subagent"], 0)
         self.assertIsNone(turns[0]["agent_id"])
 
     def test_agent_dispatch_extracted_from_tool_result(self):
         path = self._write("a.jsonl", [_dispatch(agent_id="agent-xyz", agent_type="Plan",
                                                   total_tokens=1234)])
-        _, _, agents, _ = parse_jsonl_file(path)
+        _, _, agents, _, _ = parse_jsonl_file(path)
         self.assertEqual(len(agents), 1)
         self.assertEqual(agents[0]["agent_id"], "agent-xyz")
         self.assertEqual(agents[0]["agent_type"], "Plan")
@@ -103,7 +103,7 @@ def test_tool_result_without_agent_fields_ignored(self):
         rec = json.dumps({"type": "user", "sessionId": "s1",
                           "toolUseResult": {"status": "ok"}})
         path = self._write("a.jsonl", [rec])
-        _, _, agents, _ = parse_jsonl_file(path)
+        _, _, agents, _, _ = parse_jsonl_file(path)
         self.assertEqual(agents, [])
 
 
diff --git a/vscode-extension/package.json b/vscode-extension/package.json
index 1004584f..5b80fbc7 100644
--- a/vscode-extension/package.json
+++ b/vscode-extension/package.json
@@ -2,7 +2,7 @@
   "name": "claude-usage-phuryn",
   "displayName": "Claude Code Usage by Paweł Huryn",
   "description": "Embed your Claude Code usage dashboard (token counts, costs, sessions, projects) directly inside VS Code. Reads local JSONL transcripts, no API calls.",
-  "version": "1.4.0",
+  "version": "1.5.0",
   "publisher": "PawelHuryn",
   "author": {
     "name": "Paweł Huryn",

From b363d793a59790e1d55232fb34dd469d91ceb6a3 Mon Sep 17 00:00:00 2001
From: john988 <john9882001@yahoo.com>
Date: Wed, 17 Jun 2026 11:39:12 +0800
Subject: [PATCH 8/8] fix: resolve remaining audit findings (scanner shrink,
 UTC dates, dashboard cache)

- Scanner: the no-growth branch now also updates processed_files.lines, so a
  transcript rewritten shorter (compaction) isn't skipped forever and later
  appends are still ingested.
- CLI: `today` / `week` compute their window in UTC to match UTC transcript
  timestamps (drop now-unused `date` import).
- Dashboard: cache the /api/data payload keyed on DB path + mtime so the 30s
  poll doesn't re-run every query when nothing changed; getRangeBounds now uses
  UTC date math (week/month/relative) for consistency with the data.

Tests: 154 pass (+4). Inline JS passes node --check.
---
 CHANGELOG.md              |   6 +++
 cli.py                    |  13 +++--
 dashboard.py              |  38 +++++++++----
 scanner.py                |  10 ++--
 tests/test_audit_fixes.py | 109 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 161 insertions(+), 15 deletions(-)
 create mode 100644 tests/test_audit_fixes.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ebdce371..3ce271ce 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,12 @@
 
 - Footer now notes that figures are transcript-derived estimates (Claude Code doesn't write every request to disk) and may not match Anthropic billing exactly, and that native vs ccusage numbers are shown separately, never summed.
 
+### Fixes
+
+- **Scanner:** a transcript rewritten *shorter* (e.g. compaction) no longer gets skipped forever — the shrink path now syncs the stored line count, not just the mtime, so later appends are still ingested.
+- **CLI:** `today` and `week` now compute their date window in **UTC** to match the UTC transcript timestamps (previously used the local date, off by one near midnight away from UTC).
+- **Dashboard:** the `/api/data` payload is cached keyed on the DB's path + mtime, so the 30-second poll doesn't re-run every GROUP BY/JOIN when nothing changed; any scan/ingest invalidates it. Dashboard date ranges (week/month/relative) are now computed in UTC for consistency with the data.
+
 ## v1.4.0 — 2026-06-15
 
 ### Dashboard
diff --git a/cli.py b/cli.py
index 7dedb1c2..8e6cccac 100644
--- a/cli.py
+++ b/cli.py
@@ -12,7 +12,7 @@
 import sys
 import sqlite3
 from pathlib import Path
-from datetime import datetime, date, timedelta
+from datetime import datetime, timedelta, timezone
 
 from scanner import VERSION
 # Pricing lives in a single module shared by the CLI and (via /api/data) the
@@ -21,6 +21,13 @@
 
 DB_PATH = Path.home() / ".claude" / "usage.db"
 
+
+def _utc_today():
+    """Today's date in UTC. Transcript timestamps are stored as UTC ISO strings,
+    so day-range filters must use the UTC date too — otherwise `today` / `week`
+    are off by one near midnight for users far from UTC."""
+    return datetime.now(timezone.utc).date()
+
 def fmt(n):
     if n >= 1_000_000:
         return f"{n/1_000_000:.2f}M"
@@ -58,7 +65,7 @@ def cmd_scan(projects_dir=None):
 def cmd_today():
     conn = require_db()
     conn.row_factory = sqlite3.Row
-    today = date.today().isoformat()
+    today = _utc_today().isoformat()
 
     rows = conn.execute("""
         SELECT
@@ -128,7 +135,7 @@ def cmd_week():
     conn = require_db()
     conn.row_factory = sqlite3.Row
 
-    today_d = date.today()
+    today_d = _utc_today()
     start_d = today_d - timedelta(days=6)
     start = start_d.isoformat()
     end = today_d.isoformat()
diff --git a/dashboard.py b/dashboard.py
index 9e9baed9..a9921f44 100644
--- a/dashboard.py
+++ b/dashboard.py
@@ -24,11 +24,24 @@
 # would misfire there because the Marketplace publish lags the GitHub release).
 SURFACE = "web"
 
+# Cache the assembled /api/data payload keyed on the DB file's path + mtime, so
+# repeated polls (the client refreshes every 30s) don't re-run every GROUP BY /
+# JOIN when nothing has changed. Any scan/ingest commit bumps usage.db's mtime
+# and invalidates it. Single entry — the dashboard only ever reads one DB.
+_DATA_CACHE = {}
+
 
 def get_dashboard_data(db_path=DB_PATH):
     if not db_path.exists():
         return {"error": "Database not found. Run: python cli.py scan"}
 
+    try:
+        cache_key = (str(db_path), db_path.stat().st_mtime)
+    except OSError:
+        cache_key = None
+    if cache_key is not None and _DATA_CACHE.get("key") == cache_key:
+        return _DATA_CACHE["data"]
+
     conn = sqlite3.connect(db_path)
     # The dashboard reads while a background scan may be committing (cmd_dashboard
     # serves first, scans in a background thread; /api/rescan scans in-process too).
@@ -243,7 +256,7 @@ def get_dashboard_data(db_path=DB_PATH):
 
     conn.close()
 
-    return {
+    result = {
         "all_models":      all_models,
         "daily_by_model":  daily_by_model,
         "hourly_by_model": hourly_by_model,
@@ -255,6 +268,10 @@ def get_dashboard_data(db_path=DB_PATH):
         "pricing":         PRICING,
         "generated_at":    datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
     }
+    if cache_key is not None:
+        _DATA_CACHE["key"] = cache_key
+        _DATA_CACHE["data"] = result
+    return result
 
 
 HTML_TEMPLATE = r"""<!DOCTYPE html>
@@ -840,6 +857,9 @@ def get_dashboard_data(db_path=DB_PATH):
 }
 
 function getRangeBounds(range) {
+  // All bounds are computed in UTC to match the UTC date strings in the data
+  // (substr(timestamp,1,10)); mixing local calendar math with UTC serialization
+  // shifted week/month/relative ranges by a day near boundaries off-UTC.
   if (range === 'all') return { start: null, end: null };
   const today = new Date();
   const iso = d => d.toISOString().slice(0, 10);
@@ -848,25 +868,25 @@ def get_dashboard_data(db_path=DB_PATH):
     return { start: t, end: t };
   }
   if (range === 'week') {
-    const day = today.getDay();
+    const day = today.getUTCDay();
     const diffToMon = day === 0 ? 6 : day - 1;
-    const mon = new Date(today); mon.setDate(today.getDate() - diffToMon);
-    const sun = new Date(mon); sun.setDate(mon.getDate() + 6);
+    const mon = new Date(Date.UTC(today.getUTCFullYear(), today.getUTCMonth(), today.getUTCDate() - diffToMon));
+    const sun = new Date(mon); sun.setUTCDate(mon.getUTCDate() + 6);
     return { start: iso(mon), end: iso(sun) };
   }
   if (range === 'month') {
-    const start = new Date(today.getFullYear(), today.getMonth(), 1);
-    const end = new Date(today.getFullYear(), today.getMonth() + 1, 0);
+    const start = new Date(Date.UTC(today.getUTCFullYear(), today.getUTCMonth(), 1));
+    const end = new Date(Date.UTC(today.getUTCFullYear(), today.getUTCMonth() + 1, 0));
     return { start: iso(start), end: iso(end) };
   }
   if (range === 'prev-month') {
-    const start = new Date(today.getFullYear(), today.getMonth() - 1, 1);
-    const end = new Date(today.getFullYear(), today.getMonth(), 0);
+    const start = new Date(Date.UTC(today.getUTCFullYear(), today.getUTCMonth() - 1, 1));
+    const end = new Date(Date.UTC(today.getUTCFullYear(), today.getUTCMonth(), 0));
     return { start: iso(start), end: iso(end) };
   }
   const days = range === '7d' ? 7 : range === '30d' ? 30 : 90;
   const d = new Date();
-  d.setDate(d.getDate() - days);
+  d.setUTCDate(d.getUTCDate() - days);
   return { start: iso(d), end: null };
 }
 
diff --git a/scanner.py b/scanner.py
index 4b1e801f..2303ebef 100644
--- a/scanner.py
+++ b/scanner.py
@@ -718,9 +718,13 @@ def scan(projects_dir=None, projects_dirs=None, db_path=DB_PATH, verbose=True):
                 print(f"  Warning: {e}")
 
             if line_count <= old_lines:
-                # File didn't grow (mtime changed but no new content)
-                conn.execute("UPDATE processed_files SET mtime = ? WHERE path = ?",
-                             (mtime, filepath))
+                # File didn't grow (mtime changed but no new content) — or it was
+                # rewritten SHORTER (e.g. transcript compaction). Sync BOTH mtime
+                # and the stored line count: if a shrunk file kept its old, larger
+                # `lines`, the next scan's mtime check would skip it forever and
+                # later appends past the new length would never be re-ingested.
+                conn.execute("UPDATE processed_files SET mtime = ?, lines = ? WHERE path = ?",
+                             (mtime, line_count, filepath))
                 conn.commit()
                 skipped_files += 1
                 continue
diff --git a/tests/test_audit_fixes.py b/tests/test_audit_fixes.py
new file mode 100644
index 00000000..4d435f94
--- /dev/null
+++ b/tests/test_audit_fixes.py
@@ -0,0 +1,109 @@
+"""Regression tests for the three remaining audit findings:
+1. scanner shrink/compaction permanent-skip
+2. CLI today/week UTC date
+3. dashboard /api/data result cache
+"""
+
+import json
+import os
+import sqlite3
+import time
+import tempfile
+import unittest
+from datetime import datetime, timezone
+from pathlib import Path
+
+import dashboard
+from scanner import scan
+
+NL = chr(10)
+
+
+def _assistant(message_id, session_id="s1", inp=100, out=50,
+               ts="2026-06-17T10:00:00Z"):
+    return json.dumps({
+        "type": "assistant", "sessionId": session_id, "timestamp": ts,
+        "cwd": "/home/user/proj",
+        "message": {"id": message_id, "model": "claude-opus-4-8",
+                    "usage": {"input_tokens": inp, "output_tokens": out,
+                              "cache_read_input_tokens": 0,
+                              "cache_creation_input_tokens": 0},
+                    "content": []},
+    })
+
+
+class TestScannerShrinkNotStuck(unittest.TestCase):
+    """A file rewritten shorter must update its stored line count, so later
+    growth is still detected (was: skipped forever)."""
+
+    def setUp(self):
+        self.tmp = Path(tempfile.mkdtemp())
+        self.projects = self.tmp / "projects" / "p"
+        self.projects.mkdir(parents=True)
+        self.db = self.tmp / "usage.db"
+        self.f = self.projects / "sess.jsonl"
+
+    def _scan(self):
+        return scan(projects_dir=self.tmp / "projects", db_path=self.db, verbose=False)
+
+    def _lines_recorded(self):
+        conn = sqlite3.connect(self.db)
+        row = conn.execute("SELECT lines FROM processed_files").fetchone()
+        conn.close()
+        return row[0]
+
+    def test_shrink_updates_lines_and_allows_future_growth(self):
+        # Initial: 2 lines.
+        self.f.write_text(_assistant("m1") + NL + _assistant("m2") + NL)
+        self._scan()
+        self.assertEqual(self._lines_recorded(), 2)
+
+        # Rewrite SHORTER: 1 line (compaction). mtime must change.
+        time.sleep(0.05)
+        self.f.write_text(_assistant("m3") + NL)
+        self._scan()
+        # Bug fix: stored line count follows the shrink (was stuck at 2).
+        self.assertEqual(self._lines_recorded(), 1)
+
+        # Append a new turn -> file grows to 2 lines; must be detected & ingested
+        # (pre-fix this was permanently skipped because stored lines stayed 2).
+        time.sleep(0.05)
+        with open(self.f, "a") as fh:
+            fh.write(_assistant("m4") + NL)
+        self._scan()
+        conn = sqlite3.connect(self.db)
+        got = conn.execute("SELECT 1 FROM turns WHERE message_id='m4'").fetchone()
+        conn.close()
+        self.assertIsNotNone(got, "appended turn after a shrink must be ingested")
+
+
+class TestCliUtcToday(unittest.TestCase):
+    def test_utc_today_matches_utc_clock(self):
+        from cli import _utc_today
+        self.assertEqual(_utc_today(), datetime.now(timezone.utc).date())
+
+
+class TestDashboardCache(unittest.TestCase):
+    def setUp(self):
+        self.tmp = Path(tempfile.mkdtemp())
+        self.projects = self.tmp / "projects" / "p"
+        self.projects.mkdir(parents=True)
+        self.db = self.tmp / "usage.db"
+        (self.projects / "sess.jsonl").write_text(_assistant("m1") + NL)
+        scan(projects_dir=self.tmp / "projects", db_path=self.db, verbose=False)
+
+    def test_cache_hit_returns_same_object(self):
+        d1 = dashboard.get_dashboard_data(self.db)
+        d2 = dashboard.get_dashboard_data(self.db)
+        self.assertIs(d1, d2)  # unchanged DB -> cached payload reused
+
+    def test_mtime_change_invalidates_cache(self):
+        d1 = dashboard.get_dashboard_data(self.db)
+        future = time.time() + 10
+        os.utime(self.db, (future, future))
+        d3 = dashboard.get_dashboard_data(self.db)
+        self.assertIsNot(d3, d1)  # mtime bump -> recomputed
+
+
+if __name__ == "__main__":
+    unittest.main()