codeany-ai · alexxling · May 11, 2026
diff --git a/package.json b/package.json
@@ -27,6 +27,7 @@
     "build": "tsc",
     "dev": "tsc --watch",
     "test": "npx tsx examples/01-simple-query.ts",
+    "test:structured-output": "npx tsx tests/structured-output.test.ts",
     "test:all": "for f in examples/*.ts; do echo \"--- Running $f ---\"; npx tsx $f; echo; done",
     "web": "npx tsx examples/web/server.ts"
   },

diff --git a/src/agent.ts b/src/agent.ts
@@ -304,6 +304,7 @@ export class Agent {
       maxTokens: opts.maxTokens ?? 16384,
       thinking: opts.thinking,
       jsonSchema: opts.jsonSchema,
+      outputFormat: opts.outputFormat,
       canUseTool,
       includePartialMessages: opts.includePartialMessages ?? false,
       abortSignal: this.abortCtrl.signal,
@@ -357,7 +358,12 @@ export class Agent {
     overrides?: Partial<AgentOptions>,
   ): Promise<QueryResult> {
     const t0 = performance.now()
-    const collected = { text: '', turns: 0, tokens: { in: 0, out: 0 } }
+    const collected = {
+      text: '',
+      turns: 0,
+      tokens: { in: 0, out: 0 },
+      structured: undefined as unknown,
+    }
 
     for await (const ev of this.query(text, overrides)) {
       switch (ev.type) {
@@ -373,6 +379,7 @@ export class Agent {
           collected.turns = ev.num_turns ?? 0
           collected.tokens.in = ev.usage?.input_tokens ?? 0
           collected.tokens.out = ev.usage?.output_tokens ?? 0
+          collected.structured = ev.structured_output
           break
       }
     }
@@ -383,6 +390,7 @@ export class Agent {
       num_turns: collected.turns,
       duration_ms: Math.round(performance.now() - t0),
       messages: [...this.messageLog],
+      structured_output: collected.structured,
     }
   }
 

diff --git a/src/engine.ts b/src/engine.ts
@@ -70,16 +70,93 @@ interface ToolUseBlock {
   input: any
 }
 
+// ============================================================================
+// Structured-output JSON extraction
+// ============================================================================
+
+/**
+ * Attempt to parse JSON out of arbitrary assistant text.
+ *
+ * Tries, in order:
+ *   1. Parse the trimmed text as-is.
+ *   2. Strip Markdown code fences (```json ... ``` or ``` ... ```) and parse.
+ *   3. Slice from the first `{` to the last `}` (or first `[` / last `]`).
+ *
+ * Returns `undefined` if no valid JSON can be recovered, so callers can fall
+ * back to the previous value rather than overwriting it with garbage.
+ */
+function tryParseJson(text: string): unknown {
+  const raw = text.trim()
+  if (!raw) return undefined
+
+  const tryOnce = (candidate: string): unknown => {
+    try {
+      return JSON.parse(candidate)
+    } catch {
+      return undefined
+    }
+  }
+
+  // 1. As-is
+  const direct = tryOnce(raw)
+  if (direct !== undefined) return direct
+
+  // 2. Strip fences (```json\n...\n``` or ```\n...\n```)
+  const fenced = raw.match(/^```(?:json)?\s*\n([\s\S]*?)\n```$/i)
+  if (fenced) {
+    const stripped = tryOnce(fenced[1].trim())
+    if (stripped !== undefined) return stripped
+  }
+
+  // 3. Slice to the largest JSON-looking substring
+  const objStart = raw.indexOf('{')
+  const objEnd = raw.lastIndexOf('}')
+  if (objStart !== -1 && objEnd > objStart) {
+    const sliced = tryOnce(raw.slice(objStart, objEnd + 1))
+    if (sliced !== undefined) return sliced
+  }
+  const arrStart = raw.indexOf('[')
+  const arrEnd = raw.lastIndexOf(']')
+  if (arrStart !== -1 && arrEnd > arrStart) {
+    const sliced = tryOnce(raw.slice(arrStart, arrEnd + 1))
+    if (sliced !== undefined) return sliced
+  }
+
+  return undefined
+}
+
 // ============================================================================
 // System Prompt Builder
 // ============================================================================
 
+/**
+ * Build the structured-output schema block that gets appended to the system
+ * prompt. Kept identical regardless of whether the user supplied a custom
+ * `systemPrompt` or relied on the engine default, so that the model always
+ * sees the schema when `outputFormat` is set.
+ */
+function buildStructuredOutputBlock(config: QueryEngineConfig): string | undefined {
+  if (!config.outputFormat) return undefined
+  return (
+    '\n\n# Structured Output Schema\n' +
+    'You must respond with a JSON object that strictly follows this schema:\n' +
+    JSON.stringify(config.outputFormat.schema, null, 2) +
+    '\n\nReply with ONLY the JSON object, no markdown fences, no extra text.'
+  )
+}
+
 async function buildSystemPrompt(config: QueryEngineConfig): Promise<string> {
+  const structuredBlock = buildStructuredOutputBlock(config)
+
   if (config.systemPrompt) {
-    const base = config.systemPrompt
-    return config.appendSystemPrompt
-      ? base + '\n\n' + config.appendSystemPrompt
-      : base
+    let prompt = config.systemPrompt
+    if (config.appendSystemPrompt) {
+      prompt += '\n\n' + config.appendSystemPrompt
+    }
+    if (structuredBlock) {
+      prompt += structuredBlock
+    }
+    return prompt
   }
 
   const parts: string[] = []
@@ -132,6 +209,13 @@ async function buildSystemPrompt(config: QueryEngineConfig): Promise<string> {
     parts.push('\n' + config.appendSystemPrompt)
   }
 
+  // Inject the schema for structured output so models that don't support
+  // OpenAI `response_format` (Anthropic, or OpenAI-compatible servers that
+  // only accept `{ type: 'json_object' }`) still know what shape to emit.
+  if (structuredBlock) {
+    parts.push(structuredBlock)
+  }
+
   return parts.join('\n')
 }
 
@@ -233,8 +317,14 @@ export class QueryEngine {
     let turnsRemaining = this.config.maxTurns
     let budgetExceeded = false
     let maxOutputRecoveryAttempts = 0
+    let structuredOutput: unknown = undefined
     const MAX_OUTPUT_RECOVERY = 3
 
+    // Pre-compute the provider-level response_format hint. Only OpenAI-style
+    // providers will read it; Anthropic ignores it. We always also inject the
+    // schema into the system prompt for cross-provider compatibility.
+    const responseFormat = this.config.outputFormat ? { type: 'json_object' as const } : undefined
+
     while (turnsRemaining > 0) {
       if (this.config.abortSignal?.aborted) break
 
@@ -290,6 +380,7 @@ export class QueryEngine {
                       budget_tokens: this.config.thinking.budgetTokens,
                     }
                   : undefined,
+              response_format: responseFormat,
             })
           },
           undefined,
@@ -348,6 +439,21 @@ export class QueryEngine {
       // Add assistant message to conversation
       this.messages.push({ role: 'assistant', content: response.content as any })
 
+      // Try to extract structured output. We parse on every turn and let the
+      // last successful parse win, since the final answer is the one the model
+      // emits after all tool work is done.
+      if (this.config.outputFormat && response.content.length > 0) {
+        const textBlock = response.content.find(
+          (b): b is { type: 'text'; text: string } => b.type === 'text',
+        )
+        if (textBlock) {
+          const parsed = tryParseJson(textBlock.text)
+          if (parsed !== undefined) {
+            structuredOutput = parsed
+          }
+        }
+      }
+
       // Yield assistant message
       yield {
         type: 'assistant',
@@ -442,6 +548,7 @@ export class QueryEngine {
       usage: this.totalUsage,
       model_usage: { [this.config.model]: { input_tokens: this.totalUsage.input_tokens, output_tokens: this.totalUsage.output_tokens } },
       cost: this.totalCost,
+      structured_output: structuredOutput,
     }
   }
 

diff --git a/src/index.ts b/src/index.ts
@@ -69,6 +69,7 @@ export type {
   NormalizedContentBlock,
   NormalizedTool,
   NormalizedResponseBlock,
+  ResponseFormat,
 } from './providers/index.js'
 
 // --------------------------------------------------------------------------

diff --git a/src/providers/index.ts b/src/providers/index.ts
@@ -4,7 +4,7 @@
  * Creates the appropriate provider based on API type configuration.
  */
 
-export type { ApiType, LLMProvider, CreateMessageParams, CreateMessageResponse, NormalizedMessageParam, NormalizedContentBlock, NormalizedTool, NormalizedResponseBlock } from './types.js'
+export type { ApiType, LLMProvider, CreateMessageParams, CreateMessageResponse, NormalizedMessageParam, NormalizedContentBlock, NormalizedTool, NormalizedResponseBlock, ResponseFormat } from './types.js'
 
 export { AnthropicProvider } from './anthropic.js'
 export { OpenAIProvider } from './openai.js'

diff --git a/src/providers/openai.ts b/src/providers/openai.ts
@@ -93,6 +93,12 @@ export class OpenAIProvider implements LLMProvider {
       body.tools = tools
     }
 
+    // Forward structured-output hint when caller requested it. OpenAI-compatible
+    // backends accept either `{ type: 'json_object' }` or `{ type: 'json_schema', ... }`.
+    if (params.response_format) {
+      body.response_format = params.response_format
+    }
+
     // Make API call
     const response = await fetch(`${this.baseURL}/chat/completions`, {
       method: 'POST',

diff --git a/src/providers/types.ts b/src/providers/types.ts
@@ -25,8 +25,32 @@ export interface CreateMessageParams {
   messages: NormalizedMessageParam[]
   tools?: NormalizedTool[]
   thinking?: { type: string; budget_tokens?: number }
+  /**
+   * Structured-output response format.
+   * Currently passed through verbatim to providers that understand it
+   * (OpenAI-compatible Chat Completions accept `{ type: 'json_object' }` or
+   * `{ type: 'json_schema', json_schema: {...} }`). Providers that do not
+   * support it should ignore this field.
+   */
+  response_format?: ResponseFormat
 }
 
+/**
+ * Provider-level structured-output hint.
+ * Mirrors the OpenAI Chat Completions `response_format` shape so it can be
+ * forwarded directly. Anthropic providers ignore this field.
+ */
+export type ResponseFormat =
+  | { type: 'json_object' }
+  | {
+      type: 'json_schema'
+      json_schema: {
+        name?: string
+        schema: Record<string, unknown>
+        strict?: boolean
+      }
+    }
+
 /**
  * Normalized message format (Anthropic-like).
  * This is the internal representation used throughout the SDK.

diff --git a/src/types.ts b/src/types.ts
@@ -456,6 +456,11 @@ export interface QueryResult {
   duration_ms: number
   /** All conversation messages */
   messages: Message[]
+  /**
+   * Parsed structured output (only present when `outputFormat` was set on the
+   * Agent and the model produced valid JSON matching the schema).
+   */
+  structured_output?: unknown
 }
 
 // --------------------------------------------------------------------------
@@ -475,6 +480,14 @@ export interface QueryEngineConfig {
   maxTokens: number
   thinking?: ThinkingConfig
   jsonSchema?: Record<string, unknown>
+  /**
+   * Structured output format. When set the engine will:
+   *  1. Inject the schema into the system prompt (provider-agnostic),
+   *  2. Pass `response_format` to OpenAI-compatible providers,
+   *  3. Parse the final assistant text as JSON and surface it as
+   *     `structured_output` on the final `result` event.
+   */
+  outputFormat?: OutputFormat
   canUseTool: CanUseToolFn
   includePartialMessages: boolean
   abortSignal?: AbortSignal