From e33426beb5c922fd635cb54bdd87faeb9891203a Mon Sep 17 00:00:00 2001 From: AgentSeal Date: Sat, 20 Jun 2026 21:31:26 +0200 Subject: [PATCH] feat(providers): add ZCode (z.ai GLM-5.2) usage provider Reads ZCode CLI usage from ~/.zcode/cli/db/db.sqlite. The model_usage table has exact per-request tokens; cost is computed from the pricing table since ZCode stores none (GLM-5.2 runs on the z.ai start-plan subscription). - Split cached tokens out of input_tokens (OpenAI-style) so fresh input and cache reads price correctly - Attach each turn's tool calls to one request to avoid double-counting - Map GLM-5.2 to glm-5p1 (GLM-5.1 rate) until LiteLLM lists it - Register as a lazy SQLite provider; add test and provider doc --- docs/providers/README.md | 1 + docs/providers/zcode.md | 89 +++++++++++++ src/models.ts | 3 + src/providers/index.ts | 26 +++- src/providers/zcode.ts | 227 ++++++++++++++++++++++++++++++++++ tests/providers/zcode.test.ts | 141 +++++++++++++++++++++ 6 files changed, 484 insertions(+), 3 deletions(-) create mode 100644 docs/providers/zcode.md create mode 100644 src/providers/zcode.ts create mode 100644 tests/providers/zcode.test.ts diff --git a/docs/providers/README.md b/docs/providers/README.md index 56ac48e0..1a090546 100644 --- a/docs/providers/README.md +++ b/docs/providers/README.md @@ -43,6 +43,7 @@ For the architectural picture, see `../architecture.md`. | [OpenCode](opencode.md) | SQLite | `src/providers/opencode.ts` | `tests/providers/opencode.test.ts` | | [Warp](warp.md) | SQLite | `src/providers/warp.ts` | `tests/providers/warp.test.ts` | | [Vercel AI Gateway](vercel-gateway.md) | REST API | `src/providers/vercel-gateway.ts` | `tests/providers/vercel-gateway.test.ts` | +| [ZCode](zcode.md) | SQLite | `src/providers/zcode.ts` | `tests/providers/zcode.test.ts` | ### Shared diff --git a/docs/providers/zcode.md b/docs/providers/zcode.md new file mode 100644 index 00000000..9d421391 --- /dev/null +++ b/docs/providers/zcode.md @@ -0,0 +1,89 @@ +# ZCode + +ZCode CLI coding agent (z.ai), running GLM-5.2 over the z.ai start-plan. + +- **Source:** `src/providers/zcode.ts` +- **Loading:** lazy (`src/providers/index.ts`). Lazy because we read ZCode's SQLite database with `node:sqlite`. +- **Test:** `tests/providers/zcode.test.ts` (3 tests, fixture-based) + +## Where it reads from + +ZCode keeps a single global SQLite database for the CLI. + +| Source | Path | +|---|---| +| ZCode CLI db | `~/.zcode/cli/db/db.sqlite` | + +The desktop app dir (`~/Library/Application Support/ZCode`) only holds Electron runtime state, and the JSONL activity log (`~/.zcode/cli/log/*.jsonl`) redacts token counts, so neither is used. + +## Storage format + +SQLite. Schema verified against CLI db v0.14.8. Three tables matter: + +```sql +CREATE TABLE session ( + id TEXT PRIMARY KEY, + directory TEXT NOT NULL, + ... +); + +CREATE TABLE model_usage ( + id TEXT PRIMARY KEY, + session_id TEXT NOT NULL, + turn_id TEXT, + model_id TEXT NOT NULL, + input_tokens INTEGER NOT NULL DEFAULT 0, + output_tokens INTEGER NOT NULL DEFAULT 0, + reasoning_tokens INTEGER NOT NULL DEFAULT 0, + cache_creation_input_tokens INTEGER NOT NULL DEFAULT 0, + cache_read_input_tokens INTEGER NOT NULL DEFAULT 0, + started_at INTEGER NOT NULL, + completed_at INTEGER, + ... +); + +CREATE TABLE tool_usage ( + session_id TEXT NOT NULL, + turn_id TEXT, + tool_name TEXT NOT NULL, + started_at INTEGER NOT NULL, + ... +); +``` + +## Caching + +None at the provider level. + +## Deduplication + +Per `zcode:` (`zcode.ts`). `model_usage.id` is the row primary key, unique per request. + +## What we extract + +| codeburn field | ZCode source | +|---|---| +| `inputTokens` | `model_usage.input_tokens` minus cached + created (see quirks) | +| `outputTokens` | `model_usage.output_tokens` | +| `reasoningTokens` | `model_usage.reasoning_tokens` | +| `cacheCreationInputTokens` | `model_usage.cache_creation_input_tokens` | +| `cacheReadInputTokens` | `model_usage.cache_read_input_tokens` | +| `costUSD` | computed by `calculateCost` (ZCode stores no cost) | +| `model` | `model_usage.model_id` (e.g. `GLM-5.2`) | +| `timestamp` | `model_usage.completed_at` if set, otherwise `started_at` (epoch ms) | +| `tools` | `tool_usage.tool_name` for the turn, attached to one request per turn | + +## Quirks worth knowing + +- **Cached tokens are folded into `input_tokens` (OpenAI-style).** The row's `input_tokens` is the full prompt size including cache reads/writes, and `provider_total_tokens = input_tokens + output_tokens`. The parser subtracts `cache_read_input_tokens` and `cache_creation_input_tokens` from `input_tokens` so fresh input bills at the input rate and cached at the cache-read rate. Confirmed against the nested Anthropic usage in `provider_metadata_json` (e.g. 100 input = 36 fresh + 64 cached). +- **No cost is stored anywhere.** GLM-5.2 runs on z.ai's `start-plan` subscription, so ZCode logs tokens only. CodeBurn computes a notional cost from the pricing table. +- **GLM-5.2 is priced via an alias.** LiteLLM does not list GLM-5.2 yet, so `GLM-5.2` maps to `glm-5p1` (GLM-5.1) in `BUILTIN_ALIASES` (`src/models.ts`). Reports therefore show the model as `glm-5p1`, the same way any aliased model displays as its priced-as target. Drop the alias once LiteLLM adds GLM-5.2. +- **Timestamps are milliseconds.** Unlike Crush (seconds), ZCode stores epoch ms; the parser passes them straight to `Date`. +- **Tools are attached per turn, not per request.** `tool_usage` links to a turn, not a specific `model_usage` row, so each turn's tools are attached to its first request to avoid double-counting. Bash command text is not stored, so `bashCommands` is always empty. + +## When fixing a bug here + +1. Confirm the schema against a real ZCode install; copy `~/.zcode/cli/db/db.sqlite` to a temp file before querying so you do not lock the live db. +2. If costs are $0, check that `GLM-5.2` (or the current model id) still resolves through `BUILTIN_ALIASES` to a priced model. +3. If tokens look ~8x too high, someone likely removed the cache-subtraction in the input normalization; the row's `input_tokens` already includes cached tokens. +4. New fixtures go under the inline schema in `tests/providers/zcode.test.ts`. diff --git a/src/models.ts b/src/models.ts index 658d3fe1..d929edaf 100644 --- a/src/models.ts +++ b/src/models.ts @@ -322,6 +322,9 @@ const BUILTIN_ALIASES: Record = { 'gemini-3-pro': 'gemini-3-pro-preview', 'gemini-3.1-flash-image': 'gemini-3.1-flash-image-preview', 'gemini-3.1-flash-lite': 'gemini-3.1-flash-lite-preview', + // ZCode runs GLM-5.2 through z.ai's start-plan subscription; it isn't in + // LiteLLM yet. Price as the nearest released sibling (GLM-5.1) until it is. + 'GLM-5.2': 'glm-5p1', } let userAliases: Record = {} diff --git a/src/providers/index.ts b/src/providers/index.ts index 0a36edea..3fb5580c 100644 --- a/src/providers/index.ts +++ b/src/providers/index.ts @@ -154,11 +154,26 @@ async function loadCrush(): Promise { } } +let zcodeProvider: Provider | null = null +let zcodeLoadAttempted = false + +async function loadZcode(): Promise { + if (zcodeLoadAttempted) return zcodeProvider + zcodeLoadAttempted = true + try { + const { zcode } = await import('./zcode.js') + zcodeProvider = zcode + return zcode + } catch { + return null + } +} + const coreProviders: Provider[] = [claude, cline, codebuff, codex, copilot, devin, droid, gemini, ibmBob, kiloCode, kiro, kimi, mistralVibe, mux, openclaw, pi, omp, qwen, rooCode, zerostack, grok] // Lazily loaded providers, listed by name so --provider validation works even // when an optional module fails to load. Must stay in sync with getAllProviders. -const lazyProviderNames = ['antigravity', 'forge', 'goose', 'cursor', 'opencode', 'cursor-agent', 'crush', 'warp', 'vercel-gateway'] +const lazyProviderNames = ['antigravity', 'forge', 'goose', 'cursor', 'opencode', 'cursor-agent', 'crush', 'warp', 'vercel-gateway', 'zcode'] // Canonical set of every provider name (core + lazy), used to validate the // --provider CLI flag. Computed lazily so importing this module never depends on @@ -173,8 +188,8 @@ export function allProviderNames(): readonly string[] { } export async function getAllProviders(): Promise { - const [ag, forge, gs, cursor, opencode, cursorAgent, crush, warp, vercelGw] = await Promise.all([ - loadAntigravity(), loadForge(), loadGoose(), loadCursor(), loadOpenCode(), loadCursorAgent(), loadCrush(), loadWarp(), loadVercelGateway(), + const [ag, forge, gs, cursor, opencode, cursorAgent, crush, warp, vercelGw, zc] = await Promise.all([ + loadAntigravity(), loadForge(), loadGoose(), loadCursor(), loadOpenCode(), loadCursorAgent(), loadCrush(), loadWarp(), loadVercelGateway(), loadZcode(), ]) const all = [...coreProviders] if (ag) all.push(ag) @@ -186,6 +201,7 @@ export async function getAllProviders(): Promise { if (crush) all.push(crush) if (warp) all.push(warp) if (vercelGw) all.push(vercelGw) + if (zc) all.push(zc) return all } @@ -241,5 +257,9 @@ export async function getProvider(name: string): Promise { const vg = await loadVercelGateway() return vg ?? undefined } + if (name === 'zcode') { + const z = await loadZcode() + return z ?? undefined + } return coreProviders.find(p => p.name === name) } diff --git a/src/providers/zcode.ts b/src/providers/zcode.ts new file mode 100644 index 00000000..d9eaf47f --- /dev/null +++ b/src/providers/zcode.ts @@ -0,0 +1,227 @@ +import { join } from 'path' +import { homedir } from 'os' + +import { calculateCost } from '../models.js' +import { isSqliteAvailable, getSqliteLoadError, openDatabase, type SqliteDatabase } from '../sqlite.js' +import type { Provider, SessionSource, SessionParser, ParsedProviderCall } from './types.js' + +/// ZCode (CLI v0.14.x) records usage in a single SQLite database at +/// ~/.zcode/cli/db/db.sqlite. We read it because the other on-disk sources are +/// unusable for billing: the JSONL activity log redacts token counts, and no +/// source stores a dollar cost (GLM-5.2 runs on z.ai's start-plan subscription). +/// Tokens are exact; cost is computed from the pricing table. Schema verified +/// against db v0.14.8 on 2026-06-20. + +type SessionRow = { + id: string + directory: string +} + +type UsageRow = { + id: string + turn_id: string | null + model_id: string + input_tokens: number + output_tokens: number + reasoning_tokens: number + cache_creation_input_tokens: number + cache_read_input_tokens: number + started_at: number + completed_at: number | null +} + +type ToolRow = { + turn_id: string | null + tool_name: string +} + +function getDbPath(override?: string): string { + return override ?? join(homedir(), '.zcode', 'cli', 'db', 'db.sqlite') +} + +function sanitizeProject(path: string): string { + return path.replace(/^\//, '').replace(/\//g, '-') +} + +function epochMsToIso(ms: number | null): string { + if (ms === null || !Number.isFinite(ms) || ms <= 0) return new Date(0).toISOString() + return new Date(ms).toISOString() +} + +function validateSchema(db: SqliteDatabase): boolean { + try { + db.query<{ cnt: number }>('SELECT COUNT(*) as cnt FROM model_usage LIMIT 1') + db.query<{ cnt: number }>('SELECT COUNT(*) as cnt FROM session LIMIT 1') + return true + } catch { + return false + } +} + +function discover(dbPath: string): SessionSource[] { + let db: SqliteDatabase + try { + db = openDatabase(dbPath) + } catch { + return [] + } + try { + if (!validateSchema(db)) return [] + const rows = db.query( + `SELECT DISTINCT s.id as id, s.directory as directory + FROM session s + JOIN model_usage m ON m.session_id = s.id + WHERE m.input_tokens > 0 OR m.output_tokens > 0 OR m.reasoning_tokens > 0 + OR m.cache_read_input_tokens > 0 OR m.cache_creation_input_tokens > 0`, + ) + return rows.map(row => ({ + path: `${dbPath}:${row.id}`, + project: sanitizeProject(row.directory), + provider: 'zcode', + })) + } catch { + return [] + } finally { + db.close() + } +} + +function createParser(source: SessionSource, seenKeys: Set): SessionParser { + return { + async *parse(): AsyncGenerator { + if (!isSqliteAvailable()) { + process.stderr.write(getSqliteLoadError() + '\n') + return + } + + // Source paths are `:`. Split from the right so a colon + // in the path (Windows drive letter) doesn't corrupt the session id. + const segments = source.path.split(':') + const sessionId = segments[segments.length - 1]! + const dbPath = segments.slice(0, -1).join(':') + + let db: SqliteDatabase + try { + db = openDatabase(dbPath) + } catch (err) { + process.stderr.write( + `codeburn: cannot open ZCode database: ${err instanceof Error ? err.message : err}\n`, + ) + return + } + + try { + if (!validateSchema(db)) return + + // model_usage rows don't link to individual tool calls, only to a turn, + // so collect each turn's tools and attach them to one request per turn + // (below) to avoid double-counting across a turn's multiple requests. + const toolRows = db.query( + `SELECT turn_id, tool_name FROM tool_usage + WHERE session_id = ? AND turn_id IS NOT NULL + ORDER BY started_at ASC`, + [sessionId], + ) + const toolsByTurn = new Map() + for (const tool of toolRows) { + if (!tool.turn_id) continue + const list = toolsByTurn.get(tool.turn_id) ?? [] + list.push(tool.tool_name) + toolsByTurn.set(tool.turn_id, list) + } + + const rows = db.query( + `SELECT id, turn_id, model_id, input_tokens, output_tokens, reasoning_tokens, + cache_creation_input_tokens, cache_read_input_tokens, started_at, completed_at + FROM model_usage WHERE session_id = ? + ORDER BY started_at ASC`, + [sessionId], + ) + + const turnsWithToolsEmitted = new Set() + + for (const row of rows) { + const cacheRead = row.cache_read_input_tokens ?? 0 + const cacheCreation = row.cache_creation_input_tokens ?? 0 + const output = row.output_tokens ?? 0 + const reasoning = row.reasoning_tokens ?? 0 + // ZCode folds cached tokens into input_tokens (OpenAI-style). Split + // them back out so fresh input bills at the input rate and cached at + // the cache-read rate, matching the pricing table's Anthropic-style + // semantics. + const freshInput = Math.max(0, (row.input_tokens ?? 0) - cacheRead - cacheCreation) + + if (freshInput === 0 && output === 0 && reasoning === 0 && cacheRead === 0 && cacheCreation === 0) { + continue + } + + const dedupKey = `zcode:${row.id}` + if (seenKeys.has(dedupKey)) continue + seenKeys.add(dedupKey) + + let tools: string[] = [] + if (row.turn_id && !turnsWithToolsEmitted.has(row.turn_id)) { + const turnTools = toolsByTurn.get(row.turn_id) + if (turnTools && turnTools.length > 0) { + tools = turnTools + turnsWithToolsEmitted.add(row.turn_id) + } + } + + const model = row.model_id + const costUSD = calculateCost(model, freshInput, output, cacheCreation, cacheRead, 0) + + yield { + provider: 'zcode', + model, + inputTokens: freshInput, + outputTokens: output, + cacheCreationInputTokens: cacheCreation, + cacheReadInputTokens: cacheRead, + cachedInputTokens: 0, + reasoningTokens: reasoning, + webSearchRequests: 0, + costUSD, + tools, + bashCommands: [], + timestamp: epochMsToIso(row.completed_at ?? row.started_at), + speed: 'standard', + deduplicationKey: dedupKey, + turnId: row.turn_id ?? undefined, + userMessage: '', + sessionId, + } + } + } finally { + db.close() + } + }, + } +} + +export function createZcodeProvider(dbPathOverride?: string): Provider { + const dbPath = getDbPath(dbPathOverride) + return { + name: 'zcode', + displayName: 'ZCode', + + modelDisplayName(model: string): string { + return model + }, + + toolDisplayName(rawTool: string): string { + return rawTool + }, + + async discoverSessions(): Promise { + if (!isSqliteAvailable()) return [] + return discover(dbPath) + }, + + createSessionParser(source: SessionSource, seenKeys: Set): SessionParser { + return createParser(source, seenKeys) + }, + } +} + +export const zcode = createZcodeProvider() diff --git a/tests/providers/zcode.test.ts b/tests/providers/zcode.test.ts new file mode 100644 index 00000000..f2057378 --- /dev/null +++ b/tests/providers/zcode.test.ts @@ -0,0 +1,141 @@ +import { mkdtemp, rm } from 'fs/promises' +import { join } from 'path' +import { tmpdir } from 'os' +import { createRequire } from 'node:module' + +import { describe, it, expect, beforeEach, afterEach } from 'vitest' +import { isSqliteAvailable } from '../../src/sqlite.js' +import { createZcodeProvider } from '../../src/providers/zcode.js' +import type { ParsedProviderCall } from '../../src/providers/types.js' + +const requireForTest = createRequire(import.meta.url) + +let tmpRoot: string + +beforeEach(async () => { + tmpRoot = await mkdtemp(join(tmpdir(), 'zcode-test-')) +}) + +afterEach(async () => { + await rm(tmpRoot, { recursive: true, force: true }) +}) + +// Minimal subset of the real ZCode schema (db v0.14.8) covering only the +// columns the provider reads. +function createZcodeDb(dir: string): string { + const dbPath = join(dir, 'db.sqlite') + const { DatabaseSync: Database } = requireForTest('node:sqlite') + const db = new Database(dbPath) + db.exec(` + CREATE TABLE session ( + id TEXT PRIMARY KEY, + directory TEXT NOT NULL + ) + `) + db.exec(` + CREATE TABLE model_usage ( + id TEXT PRIMARY KEY, + session_id TEXT NOT NULL, + turn_id TEXT, + model_id TEXT NOT NULL, + input_tokens INTEGER NOT NULL DEFAULT 0, + output_tokens INTEGER NOT NULL DEFAULT 0, + reasoning_tokens INTEGER NOT NULL DEFAULT 0, + cache_creation_input_tokens INTEGER NOT NULL DEFAULT 0, + cache_read_input_tokens INTEGER NOT NULL DEFAULT 0, + started_at INTEGER NOT NULL, + completed_at INTEGER + ) + `) + db.exec(` + CREATE TABLE tool_usage ( + id TEXT PRIMARY KEY, + session_id TEXT NOT NULL, + turn_id TEXT, + tool_name TEXT NOT NULL, + started_at INTEGER NOT NULL + ) + `) + db.close() + return dbPath +} + +// Seeds one session with a single GLM-5.2 request whose 9125 input tokens +// include 8064 cached, plus two tool calls in the same turn. +function seed(dbPath: string): void { + const { DatabaseSync: Database } = requireForTest('node:sqlite') + const db = new Database(dbPath) + try { + db.prepare('INSERT INTO session (id, directory) VALUES (?, ?)').run('sess-1', '/Users/me/proj') + db.prepare( + `INSERT INTO model_usage + (id, session_id, turn_id, model_id, input_tokens, output_tokens, reasoning_tokens, + cache_creation_input_tokens, cache_read_input_tokens, started_at, completed_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + ).run('mu-1', 'sess-1', 'turn-1', 'GLM-5.2', 9125, 27, 0, 0, 8064, 1781981181862, 1781981202412) + db.prepare( + 'INSERT INTO tool_usage (id, session_id, turn_id, tool_name, started_at) VALUES (?, ?, ?, ?, ?)', + ).run('tu-1', 'sess-1', 'turn-1', 'Bash', 1781981299176) + db.prepare( + 'INSERT INTO tool_usage (id, session_id, turn_id, tool_name, started_at) VALUES (?, ?, ?, ?, ?)', + ).run('tu-2', 'sess-1', 'turn-1', 'Read', 1781981315829) + } finally { + db.close() + } +} + +async function collect(parser: { parse(): AsyncGenerator }): Promise { + const out: ParsedProviderCall[] = [] + for await (const call of parser.parse()) out.push(call) + return out +} + +describe('zcode provider', () => { + it('discovers sessions that have usage', async () => { + if (!isSqliteAvailable()) return + const dbPath = createZcodeDb(tmpRoot) + seed(dbPath) + + const provider = createZcodeProvider(dbPath) + const sessions = await provider.discoverSessions() + + expect(sessions).toHaveLength(1) + expect(sessions[0]?.provider).toBe('zcode') + expect(sessions[0]?.project).toBe('Users-me-proj') + }) + + it('splits cached tokens out of input and prices via the GLM-5.2 alias', async () => { + if (!isSqliteAvailable()) return + const dbPath = createZcodeDb(tmpRoot) + seed(dbPath) + + const provider = createZcodeProvider(dbPath) + const [source] = await provider.discoverSessions() + const calls = await collect(provider.createSessionParser(source!, new Set())) + + expect(calls).toHaveLength(1) + const call = calls[0]! + expect(call.model).toBe('GLM-5.2') + expect(call.inputTokens).toBe(1061) // 9125 - 8064 cached + expect(call.cacheReadInputTokens).toBe(8064) + expect(call.outputTokens).toBe(27) + expect(call.tools).toEqual(['Bash', 'Read']) + expect(call.costUSD).toBeGreaterThan(0) + }) + + it('does not re-emit rows already in the seen set', async () => { + if (!isSqliteAvailable()) return + const dbPath = createZcodeDb(tmpRoot) + seed(dbPath) + + const provider = createZcodeProvider(dbPath) + const [source] = await provider.discoverSessions() + const seen = new Set() + + const first = await collect(provider.createSessionParser(source!, seen)) + const second = await collect(provider.createSessionParser(source!, seen)) + + expect(first).toHaveLength(1) + expect(second).toHaveLength(0) + }) +})