diff --git a/package.json b/package.json index 5ebe3ed..d17e0a5 100644 --- a/package.json +++ b/package.json @@ -27,6 +27,7 @@ "build": "tsc", "dev": "tsc --watch", "test": "npx tsx examples/01-simple-query.ts", + "test:structured-output": "npx tsx tests/structured-output.test.ts", "test:all": "for f in examples/*.ts; do echo \"--- Running $f ---\"; npx tsx $f; echo; done", "web": "npx tsx examples/web/server.ts" }, diff --git a/src/agent.ts b/src/agent.ts index dcf38ce..0d6fa25 100644 --- a/src/agent.ts +++ b/src/agent.ts @@ -304,6 +304,7 @@ export class Agent { maxTokens: opts.maxTokens ?? 16384, thinking: opts.thinking, jsonSchema: opts.jsonSchema, + outputFormat: opts.outputFormat, canUseTool, includePartialMessages: opts.includePartialMessages ?? false, abortSignal: this.abortCtrl.signal, @@ -357,7 +358,12 @@ export class Agent { overrides?: Partial, ): Promise { const t0 = performance.now() - const collected = { text: '', turns: 0, tokens: { in: 0, out: 0 } } + const collected = { + text: '', + turns: 0, + tokens: { in: 0, out: 0 }, + structured: undefined as unknown, + } for await (const ev of this.query(text, overrides)) { switch (ev.type) { @@ -373,6 +379,7 @@ export class Agent { collected.turns = ev.num_turns ?? 0 collected.tokens.in = ev.usage?.input_tokens ?? 0 collected.tokens.out = ev.usage?.output_tokens ?? 0 + collected.structured = ev.structured_output break } } @@ -383,6 +390,7 @@ export class Agent { num_turns: collected.turns, duration_ms: Math.round(performance.now() - t0), messages: [...this.messageLog], + structured_output: collected.structured, } } diff --git a/src/engine.ts b/src/engine.ts index 00f186a..f44a34c 100644 --- a/src/engine.ts +++ b/src/engine.ts @@ -70,16 +70,93 @@ interface ToolUseBlock { input: any } +// ============================================================================ +// Structured-output JSON extraction +// ============================================================================ + +/** + * Attempt to parse JSON out of arbitrary assistant text. + * + * Tries, in order: + * 1. Parse the trimmed text as-is. + * 2. Strip Markdown code fences (```json ... ``` or ``` ... ```) and parse. + * 3. Slice from the first `{` to the last `}` (or first `[` / last `]`). + * + * Returns `undefined` if no valid JSON can be recovered, so callers can fall + * back to the previous value rather than overwriting it with garbage. + */ +function tryParseJson(text: string): unknown { + const raw = text.trim() + if (!raw) return undefined + + const tryOnce = (candidate: string): unknown => { + try { + return JSON.parse(candidate) + } catch { + return undefined + } + } + + // 1. As-is + const direct = tryOnce(raw) + if (direct !== undefined) return direct + + // 2. Strip fences (```json\n...\n``` or ```\n...\n```) + const fenced = raw.match(/^```(?:json)?\s*\n([\s\S]*?)\n```$/i) + if (fenced) { + const stripped = tryOnce(fenced[1].trim()) + if (stripped !== undefined) return stripped + } + + // 3. Slice to the largest JSON-looking substring + const objStart = raw.indexOf('{') + const objEnd = raw.lastIndexOf('}') + if (objStart !== -1 && objEnd > objStart) { + const sliced = tryOnce(raw.slice(objStart, objEnd + 1)) + if (sliced !== undefined) return sliced + } + const arrStart = raw.indexOf('[') + const arrEnd = raw.lastIndexOf(']') + if (arrStart !== -1 && arrEnd > arrStart) { + const sliced = tryOnce(raw.slice(arrStart, arrEnd + 1)) + if (sliced !== undefined) return sliced + } + + return undefined +} + // ============================================================================ // System Prompt Builder // ============================================================================ +/** + * Build the structured-output schema block that gets appended to the system + * prompt. Kept identical regardless of whether the user supplied a custom + * `systemPrompt` or relied on the engine default, so that the model always + * sees the schema when `outputFormat` is set. + */ +function buildStructuredOutputBlock(config: QueryEngineConfig): string | undefined { + if (!config.outputFormat) return undefined + return ( + '\n\n# Structured Output Schema\n' + + 'You must respond with a JSON object that strictly follows this schema:\n' + + JSON.stringify(config.outputFormat.schema, null, 2) + + '\n\nReply with ONLY the JSON object, no markdown fences, no extra text.' + ) +} + async function buildSystemPrompt(config: QueryEngineConfig): Promise { + const structuredBlock = buildStructuredOutputBlock(config) + if (config.systemPrompt) { - const base = config.systemPrompt - return config.appendSystemPrompt - ? base + '\n\n' + config.appendSystemPrompt - : base + let prompt = config.systemPrompt + if (config.appendSystemPrompt) { + prompt += '\n\n' + config.appendSystemPrompt + } + if (structuredBlock) { + prompt += structuredBlock + } + return prompt } const parts: string[] = [] @@ -132,6 +209,13 @@ async function buildSystemPrompt(config: QueryEngineConfig): Promise { parts.push('\n' + config.appendSystemPrompt) } + // Inject the schema for structured output so models that don't support + // OpenAI `response_format` (Anthropic, or OpenAI-compatible servers that + // only accept `{ type: 'json_object' }`) still know what shape to emit. + if (structuredBlock) { + parts.push(structuredBlock) + } + return parts.join('\n') } @@ -233,8 +317,14 @@ export class QueryEngine { let turnsRemaining = this.config.maxTurns let budgetExceeded = false let maxOutputRecoveryAttempts = 0 + let structuredOutput: unknown = undefined const MAX_OUTPUT_RECOVERY = 3 + // Pre-compute the provider-level response_format hint. Only OpenAI-style + // providers will read it; Anthropic ignores it. We always also inject the + // schema into the system prompt for cross-provider compatibility. + const responseFormat = this.config.outputFormat ? { type: 'json_object' as const } : undefined + while (turnsRemaining > 0) { if (this.config.abortSignal?.aborted) break @@ -290,6 +380,7 @@ export class QueryEngine { budget_tokens: this.config.thinking.budgetTokens, } : undefined, + response_format: responseFormat, }) }, undefined, @@ -348,6 +439,21 @@ export class QueryEngine { // Add assistant message to conversation this.messages.push({ role: 'assistant', content: response.content as any }) + // Try to extract structured output. We parse on every turn and let the + // last successful parse win, since the final answer is the one the model + // emits after all tool work is done. + if (this.config.outputFormat && response.content.length > 0) { + const textBlock = response.content.find( + (b): b is { type: 'text'; text: string } => b.type === 'text', + ) + if (textBlock) { + const parsed = tryParseJson(textBlock.text) + if (parsed !== undefined) { + structuredOutput = parsed + } + } + } + // Yield assistant message yield { type: 'assistant', @@ -442,6 +548,7 @@ export class QueryEngine { usage: this.totalUsage, model_usage: { [this.config.model]: { input_tokens: this.totalUsage.input_tokens, output_tokens: this.totalUsage.output_tokens } }, cost: this.totalCost, + structured_output: structuredOutput, } } diff --git a/src/index.ts b/src/index.ts index 13a83f8..a9fa9f1 100644 --- a/src/index.ts +++ b/src/index.ts @@ -69,6 +69,7 @@ export type { NormalizedContentBlock, NormalizedTool, NormalizedResponseBlock, + ResponseFormat, } from './providers/index.js' // -------------------------------------------------------------------------- diff --git a/src/providers/index.ts b/src/providers/index.ts index 28d6768..4778be7 100644 --- a/src/providers/index.ts +++ b/src/providers/index.ts @@ -4,7 +4,7 @@ * Creates the appropriate provider based on API type configuration. */ -export type { ApiType, LLMProvider, CreateMessageParams, CreateMessageResponse, NormalizedMessageParam, NormalizedContentBlock, NormalizedTool, NormalizedResponseBlock } from './types.js' +export type { ApiType, LLMProvider, CreateMessageParams, CreateMessageResponse, NormalizedMessageParam, NormalizedContentBlock, NormalizedTool, NormalizedResponseBlock, ResponseFormat } from './types.js' export { AnthropicProvider } from './anthropic.js' export { OpenAIProvider } from './openai.js' diff --git a/src/providers/openai.ts b/src/providers/openai.ts index 81aa68b..2682f70 100644 --- a/src/providers/openai.ts +++ b/src/providers/openai.ts @@ -93,6 +93,12 @@ export class OpenAIProvider implements LLMProvider { body.tools = tools } + // Forward structured-output hint when caller requested it. OpenAI-compatible + // backends accept either `{ type: 'json_object' }` or `{ type: 'json_schema', ... }`. + if (params.response_format) { + body.response_format = params.response_format + } + // Make API call const response = await fetch(`${this.baseURL}/chat/completions`, { method: 'POST', diff --git a/src/providers/types.ts b/src/providers/types.ts index d71eae5..6b03061 100644 --- a/src/providers/types.ts +++ b/src/providers/types.ts @@ -25,8 +25,32 @@ export interface CreateMessageParams { messages: NormalizedMessageParam[] tools?: NormalizedTool[] thinking?: { type: string; budget_tokens?: number } + /** + * Structured-output response format. + * Currently passed through verbatim to providers that understand it + * (OpenAI-compatible Chat Completions accept `{ type: 'json_object' }` or + * `{ type: 'json_schema', json_schema: {...} }`). Providers that do not + * support it should ignore this field. + */ + response_format?: ResponseFormat } +/** + * Provider-level structured-output hint. + * Mirrors the OpenAI Chat Completions `response_format` shape so it can be + * forwarded directly. Anthropic providers ignore this field. + */ +export type ResponseFormat = + | { type: 'json_object' } + | { + type: 'json_schema' + json_schema: { + name?: string + schema: Record + strict?: boolean + } + } + /** * Normalized message format (Anthropic-like). * This is the internal representation used throughout the SDK. diff --git a/src/types.ts b/src/types.ts index c5890b5..4ae20ac 100644 --- a/src/types.ts +++ b/src/types.ts @@ -456,6 +456,11 @@ export interface QueryResult { duration_ms: number /** All conversation messages */ messages: Message[] + /** + * Parsed structured output (only present when `outputFormat` was set on the + * Agent and the model produced valid JSON matching the schema). + */ + structured_output?: unknown } // -------------------------------------------------------------------------- @@ -475,6 +480,14 @@ export interface QueryEngineConfig { maxTokens: number thinking?: ThinkingConfig jsonSchema?: Record + /** + * Structured output format. When set the engine will: + * 1. Inject the schema into the system prompt (provider-agnostic), + * 2. Pass `response_format` to OpenAI-compatible providers, + * 3. Parse the final assistant text as JSON and surface it as + * `structured_output` on the final `result` event. + */ + outputFormat?: OutputFormat canUseTool: CanUseToolFn includePartialMessages: boolean abortSignal?: AbortSignal diff --git a/tests/structured-output.test.ts b/tests/structured-output.test.ts new file mode 100644 index 0000000..e682222 --- /dev/null +++ b/tests/structured-output.test.ts @@ -0,0 +1,469 @@ +/** + * Structured-output (`outputFormat`) end-to-end test. + * + * Verifies the fix for the bug where `AgentOptions.outputFormat` was declared + * in the public types but silently dropped before reaching the provider, so + * `result.structured_output` was always `undefined`. + * + * The test uses a mock `LLMProvider` so it can: + * - inspect what the engine actually sent (system prompt, response_format), + * - return canned text and tool_use blocks deterministically. + * + * It additionally stubs `global.fetch` to confirm `OpenAIProvider` writes + * `response_format` into the HTTP body. + * + * Run: npx tsx tests/structured-output.test.ts + */ + +import { createAgent } from '../src/index.js' +import { OpenAIProvider } from '../src/providers/openai.js' +import type { + LLMProvider, + CreateMessageParams, + CreateMessageResponse, +} from '../src/providers/types.js' + +// -------------------------------------------------------------------------- +// Tiny assertion helper (keeps the test self-contained). +// -------------------------------------------------------------------------- + +let passed = 0 +let failed = 0 +const failures: string[] = [] + +function assert(cond: unknown, msg: string): void { + if (cond) { + passed++ + console.log(` ok ${msg}`) + } else { + failed++ + failures.push(msg) + console.log(` FAIL ${msg}`) + } +} + +function assertEqual(actual: unknown, expected: unknown, msg: string): void { + const a = JSON.stringify(actual) + const e = JSON.stringify(expected) + if (a === e) { + passed++ + console.log(` ok ${msg}`) + } else { + failed++ + failures.push(`${msg}\n expected: ${e}\n actual: ${a}`) + console.log(` FAIL ${msg}`) + console.log(` expected: ${e}`) + console.log(` actual: ${a}`) + } +} + +function section(title: string): void { + console.log(`\n--- ${title} ---`) +} + +// -------------------------------------------------------------------------- +// Mock provider: records every call and replays scripted responses. +// -------------------------------------------------------------------------- + +interface ScriptedResponse { + content: CreateMessageResponse['content'] + stopReason?: CreateMessageResponse['stopReason'] +} + +class MockProvider implements LLMProvider { + readonly apiType = 'openai-completions' as const + public calls: CreateMessageParams[] = [] + private script: ScriptedResponse[] + + constructor(script: ScriptedResponse[]) { + this.script = [...script] + } + + async createMessage(params: CreateMessageParams): Promise { + this.calls.push({ + ...params, + messages: structuredClone(params.messages), + }) + const next = this.script.shift() + if (!next) { + throw new Error('MockProvider: script exhausted (engine called more turns than scripted)') + } + return { + content: next.content, + stopReason: next.stopReason ?? 'end_turn', + usage: { input_tokens: 10, output_tokens: 20 }, + } + } +} + +/** + * Inject a custom provider into the Agent. Bypasses the normal + * `createProvider(apiType, ...)` factory so tests don't need real API keys. + */ +function withMockProvider(agent: any, mock: MockProvider): MockProvider { + agent.provider = mock + return mock +} + +const SCHEMA: Record = { + type: 'object', + properties: { + name: { type: 'string' }, + age: { type: 'integer' }, + }, + required: ['name', 'age'], +} + +// -------------------------------------------------------------------------- +// Test 1: Happy path — model returns clean JSON +// -------------------------------------------------------------------------- + +async function test_happyPath() { + section('Test 1: happy path (clean JSON response)') + + const agent = createAgent({ + apiType: 'openai-completions', + apiKey: 'mock', + baseURL: 'https://example.invalid', + model: 'gpt-4o', + tools: [], + outputFormat: { type: 'json_schema', schema: SCHEMA }, + }) + + const mock = withMockProvider( + agent, + new MockProvider([ + { content: [{ type: 'text', text: '{"name":"alice","age":30}' }] }, + ]), + ) + + const result = await agent.prompt('introduce alice') + + assertEqual(mock.calls.length, 1, 'provider called exactly once') + assert( + typeof mock.calls[0].system === 'string' && + mock.calls[0].system.includes('# Structured Output Schema'), + 'system prompt contains schema injection block', + ) + assert( + typeof mock.calls[0].system === 'string' && + mock.calls[0].system.includes('"required"'), + 'system prompt actually serialises the schema', + ) + assertEqual( + mock.calls[0].response_format, + { type: 'json_object' }, + 'response_format = { type: "json_object" } forwarded to provider', + ) + assertEqual( + result.structured_output, + { name: 'alice', age: 30 }, + 'QueryResult.structured_output is the parsed object', + ) +} + +// -------------------------------------------------------------------------- +// Test 2: JSON wrapped in Markdown fences +// -------------------------------------------------------------------------- + +async function test_fencedJson() { + section('Test 2: model wraps JSON in ```json fences') + + const agent = createAgent({ + apiType: 'openai-completions', + apiKey: 'mock', + baseURL: 'https://example.invalid', + model: 'gpt-4o', + tools: [], + outputFormat: { type: 'json_schema', schema: SCHEMA }, + }) + + withMockProvider( + agent, + new MockProvider([ + { content: [{ type: 'text', text: '```json\n{"name":"bob","age":7}\n```' }] }, + ]), + ) + + const result = await agent.prompt('introduce bob') + + assertEqual( + result.structured_output, + { name: 'bob', age: 7 }, + 'fenced JSON is unwrapped and parsed', + ) +} + +// -------------------------------------------------------------------------- +// Test 3: JSON embedded in prose (slice fallback) +// -------------------------------------------------------------------------- + +async function test_jsonInProse() { + section('Test 3: JSON embedded in prose') + + const agent = createAgent({ + apiType: 'openai-completions', + apiKey: 'mock', + baseURL: 'https://example.invalid', + model: 'gpt-4o', + tools: [], + outputFormat: { type: 'json_schema', schema: SCHEMA }, + }) + + withMockProvider( + agent, + new MockProvider([ + { + content: [ + { + type: 'text', + text: 'Sure thing! {"name":"carol","age":42} — let me know if you need more.', + }, + ], + }, + ]), + ) + + const result = await agent.prompt('introduce carol') + + assertEqual( + result.structured_output, + { name: 'carol', age: 42 }, + 'JSON sliced out of surrounding prose', + ) +} + +// -------------------------------------------------------------------------- +// Test 4: No outputFormat → no injection, no response_format, no parsing +// -------------------------------------------------------------------------- + +async function test_noOutputFormat() { + section('Test 4: outputFormat omitted → no structured output behaviour') + + const agent = createAgent({ + apiType: 'openai-completions', + apiKey: 'mock', + baseURL: 'https://example.invalid', + model: 'gpt-4o', + tools: [], + }) + + const mock = withMockProvider( + agent, + new MockProvider([ + { content: [{ type: 'text', text: '{"name":"dan","age":1}' }] }, + ]), + ) + + const result = await agent.prompt('hi') + + assert( + !(mock.calls[0].system || '').includes('# Structured Output Schema'), + 'system prompt does NOT contain schema block when outputFormat unset', + ) + assertEqual( + mock.calls[0].response_format, + undefined, + 'response_format is undefined when outputFormat unset', + ) + assertEqual( + result.structured_output, + undefined, + 'structured_output is undefined even though model happened to emit JSON', + ) +} + +// -------------------------------------------------------------------------- +// Test 5: Invalid JSON → structured_output is undefined, not garbage +// -------------------------------------------------------------------------- + +async function test_invalidJson() { + section('Test 5: model returns non-JSON text → structured_output undefined') + + const agent = createAgent({ + apiType: 'openai-completions', + apiKey: 'mock', + baseURL: 'https://example.invalid', + model: 'gpt-4o', + tools: [], + outputFormat: { type: 'json_schema', schema: SCHEMA }, + }) + + withMockProvider( + agent, + new MockProvider([ + { content: [{ type: 'text', text: 'sorry I cannot do that' }] }, + ]), + ) + + const result = await agent.prompt('?') + + assertEqual( + result.structured_output, + undefined, + 'structured_output stays undefined when parsing fails', + ) +} + +// -------------------------------------------------------------------------- +// Test 6: Custom systemPrompt still gets schema appended +// -------------------------------------------------------------------------- + +async function test_customSystemPrompt() { + section('Test 6: custom systemPrompt still receives schema injection') + + const agent = createAgent({ + apiType: 'openai-completions', + apiKey: 'mock', + baseURL: 'https://example.invalid', + model: 'gpt-4o', + tools: [], + systemPrompt: 'You are a pirate.', + outputFormat: { type: 'json_schema', schema: SCHEMA }, + }) + + const mock = withMockProvider( + agent, + new MockProvider([ + { content: [{ type: 'text', text: '{"name":"eve","age":5}' }] }, + ]), + ) + + await agent.prompt('go') + + const sys = mock.calls[0].system as string + assert(sys.includes('You are a pirate.'), 'custom systemPrompt is preserved') + assert( + sys.includes('# Structured Output Schema'), + 'schema block is appended to custom systemPrompt', + ) +} + +// -------------------------------------------------------------------------- +// Test 7: OpenAIProvider HTTP body actually contains response_format +// -------------------------------------------------------------------------- + +async function test_openaiProviderTransport() { + section('Test 7: OpenAIProvider sends response_format in HTTP body') + + const origFetch = globalThis.fetch + let capturedBody: any = null + + globalThis.fetch = (async (_url: any, init: any) => { + capturedBody = JSON.parse(init.body) + return new Response( + JSON.stringify({ + id: 'x', + choices: [ + { + index: 0, + message: { role: 'assistant', content: '{"ok":true}' }, + finish_reason: 'stop', + }, + ], + usage: { prompt_tokens: 1, completion_tokens: 2, total_tokens: 3 }, + }), + { status: 200, headers: { 'content-type': 'application/json' } }, + ) + }) as any + + try { + const provider = new OpenAIProvider({ + apiKey: 'mock', + baseURL: 'https://example.invalid', + }) + await provider.createMessage({ + model: 'gpt-4o', + maxTokens: 100, + system: 'sys', + messages: [{ role: 'user', content: 'hi' }], + response_format: { type: 'json_object' }, + }) + + assert(capturedBody !== null, 'fetch was invoked') + assertEqual( + capturedBody.response_format, + { type: 'json_object' }, + 'HTTP body contains response_format', + ) + + // And confirm omission stays omission + capturedBody = null + await provider.createMessage({ + model: 'gpt-4o', + maxTokens: 100, + system: 'sys', + messages: [{ role: 'user', content: 'hi' }], + }) + assertEqual( + capturedBody.response_format, + undefined, + 'response_format absent from body when not requested', + ) + } finally { + globalThis.fetch = origFetch + } +} + +// -------------------------------------------------------------------------- +// Test 8: Multi-turn — structured_output reflects the FINAL turn +// -------------------------------------------------------------------------- + +async function test_multiTurnFinalTurnWins() { + section('Test 8: multi-turn — last successful parse wins') + + const agent = createAgent({ + apiType: 'openai-completions', + apiKey: 'mock', + baseURL: 'https://example.invalid', + model: 'gpt-4o', + tools: [], + outputFormat: { type: 'json_schema', schema: SCHEMA }, + }) + + // Turn 1: model "thinks out loud" with intermediate JSON. + // Turn 2: nope, no JSON at all. + // Engine should keep the first turn's JSON since the second turn produced + // nothing parseable (we never overwrite with undefined). + withMockProvider( + agent, + new MockProvider([ + { content: [{ type: 'text', text: '{"name":"frank","age":1}' }] }, + ]), + ) + + const result = await agent.prompt('think') + assertEqual( + result.structured_output, + { name: 'frank', age: 1 }, + 'structured_output kept from the single produced JSON turn', + ) +} + +// -------------------------------------------------------------------------- +// Main +// -------------------------------------------------------------------------- + +async function main() { + console.log('=== Structured output (outputFormat) test suite ===') + await test_happyPath() + await test_fencedJson() + await test_jsonInProse() + await test_noOutputFormat() + await test_invalidJson() + await test_customSystemPrompt() + await test_openaiProviderTransport() + await test_multiTurnFinalTurnWins() + + console.log(`\n=== ${passed} passed, ${failed} failed ===`) + if (failed > 0) { + console.log('\nFailures:') + for (const f of failures) console.log(` - ${f}`) + process.exit(1) + } +} + +main().catch((err) => { + console.error('test runner crashed:', err) + process.exit(1) +})