From 498f5952602e676cef2bd5a70602fd540cdc8485 Mon Sep 17 00:00:00 2001
From: Richard Das <richard@richarddas.com>
Date: Tue, 2 Jun 2026 15:46:51 +0100
Subject: [PATCH] Add Realtime Reasoning API parity

Includes Realtime Reasoning session and response-create types for reasoning effort and parallel tool calls while preserving existing Performance Realtime call sites.

Decodes phased Realtime output for commentary and final answer items across response completion, output item, and conversation item events.

Documents the current Realtime schema mapping and README examples, and removes obsolete Realtime GA/beta terminology.

Adds focused encoding and decoding tests for the new wire shapes and compatibility behavior.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 Documentation/OpenAI/RealtimeSchemaMatrix.md  |  92 +++++++++
 README.md                                     |  74 ++++++--
 .../OpenAI/OpenAIRealtimeMessage.swift        |  42 +++++
 ...OpenAIRealtimeReasoningConfiguration.swift |  24 +++
 ...penAIRealtimeReasoningResponseCreate.swift |  78 ++++++++
 ...ealtimeReasoningSessionConfiguration.swift |  25 +++
 .../OpenAI/OpenAIRealtimeSession.swift        |  19 +-
 .../OpenAIRealtimeSessionConfiguration.swift  |  30 ++-
 .../OpenAI/OpenAIRealtimeSessionUpdate.swift  |  22 +++
 Sources/AIProxy/OpenAI/OpenAIService.swift    |  29 +++
 .../OpenAIRealtimeMessageTests.swift          | 175 ++++++++++++++++++
 .../OpenAIRealtimeSessionEncodingTests.swift  |  82 ++++++++
 12 files changed, 676 insertions(+), 16 deletions(-)
 create mode 100644 Documentation/OpenAI/RealtimeSchemaMatrix.md
 create mode 100644 Sources/AIProxy/OpenAI/OpenAIRealtimeReasoningConfiguration.swift
 create mode 100644 Sources/AIProxy/OpenAI/OpenAIRealtimeReasoningResponseCreate.swift
 create mode 100644 Sources/AIProxy/OpenAI/OpenAIRealtimeReasoningSessionConfiguration.swift

diff --git a/Documentation/OpenAI/RealtimeSchemaMatrix.md b/Documentation/OpenAI/RealtimeSchemaMatrix.md
new file mode 100644
index 00000000..0c825182
--- /dev/null
+++ b/Documentation/OpenAI/RealtimeSchemaMatrix.md
@@ -0,0 +1,92 @@
+# Realtime API Schema Matrix
+
+This matrix maps the current OpenAI Realtime `session.update.session` and `response.create.response`
+fields to AIProxySwift types and wire encoding behavior.
+
+Reference: https://developers.openai.com/api/reference/resources/realtime
+
+## Shared Realtime Session
+
+These fields are used by Performance Realtime models, such as `gpt-realtime-1.5`, and are also the
+base session shape composed by Realtime Reasoning models.
+
+| Wire field | AIProxySwift API | Wire shape emitted |
+| --- | --- | --- |
+| `type` | `OpenAIRealtimeSessionConfiguration.type` | string |
+| `include` | `OpenAIRealtimeSessionConfiguration.include` | string array |
+| `model` | `OpenAIRealtimeSessionConfiguration.model` | string |
+| `instructions` | `OpenAIRealtimeSessionConfiguration.instructions` | string |
+| `max_output_tokens` | `OpenAIRealtimeSessionConfiguration.maxOutputTokens` | int or `"inf"` |
+| `output_modalities` | `OpenAIRealtimeSessionConfiguration.outputModalities` | enum string array |
+| `prompt` | `OpenAIRealtimeSessionConfiguration.prompt` | object (`id`, optional `variables`, optional `version`) |
+| `tracing` | `OpenAIRealtimeSessionConfiguration.tracing` | string `"auto"` or object (`group_id`, `metadata`, `workflow_name`) |
+| `truncation` | `OpenAIRealtimeSessionConfiguration.truncation` | string (`"auto"`/`"disabled"`) or retention-ratio object |
+| `tools` | `OpenAIRealtimeSessionConfiguration.tools` | union array (`function`, `mcp`, `web_search`) |
+| `tool_choice` | `OpenAIRealtimeSessionConfiguration.toolChoice` | string (`auto`/`none`/`required`) or typed selector object |
+| `audio.input.format` | `OpenAIRealtimeSessionConfiguration.inputAudioFormat` | object (`type`, optional `rate`) |
+| `audio.input.noise_reduction` | `OpenAIRealtimeSessionConfiguration.inputAudioNoiseReduction` | object (`type`) |
+| `audio.input.transcription` | `OpenAIRealtimeSessionConfiguration.inputAudioTranscription` | object (`language`, `model`, `prompt`) |
+| `audio.input.turn_detection` | `OpenAIRealtimeSessionConfiguration.turnDetection` | typed object union (`server_vad` / `semantic_vad`) |
+| `audio.output.format` | `OpenAIRealtimeSessionConfiguration.outputAudioFormat` | object (`type`, optional `rate`) |
+| `audio.output.speed` | `OpenAIRealtimeSessionConfiguration.speed` | number (range 0.25...1.5) |
+| `audio.output.voice` | `OpenAIRealtimeSessionConfiguration.voice` | string or object (`id`) |
+
+## Realtime Reasoning Session
+
+Realtime Reasoning models, such as `gpt-realtime-2`, compose the shared session fields above and add
+Reasoning-only fields to the same `session.update.session` object.
+
+| Wire field | AIProxySwift API | Wire shape emitted |
+| --- | --- | --- |
+| `reasoning` | `OpenAIRealtimeReasoningSessionConfiguration.reasoning` | object |
+| `reasoning.effort` | `OpenAIRealtimeReasoningConfiguration.effort` | `minimal`, `low`, `medium`, `high`, or `xhigh` |
+| `parallel_tool_calls` | `OpenAIRealtimeReasoningSessionConfiguration.parallelToolCalls` | boolean |
+
+## Shared `response.create`
+
+| Wire field | AIProxySwift API | Wire shape emitted |
+| --- | --- | --- |
+| `type` | `OpenAIRealtimeResponseCreate.type` | `"response.create"` |
+| `event_id` | `OpenAIRealtimeResponseCreate.eventID` | optional string |
+| `response.instructions` | `OpenAIRealtimeResponseCreate.Response.instructions` | optional string |
+| `response.output_modalities` | `OpenAIRealtimeResponseCreate.Response.outputModalities` | optional enum string array |
+| `response.tools` | `OpenAIRealtimeResponseCreate.Response.tools` | optional tool union array (`function`, `mcp`, `web_search`) |
+| `response.tool_choice` | `OpenAIRealtimeResponseCreate.Response.toolChoice` | optional string/object union |
+
+## Realtime Reasoning `response.create`
+
+| Wire field | AIProxySwift API | Wire shape emitted |
+| --- | --- | --- |
+| `type` | `OpenAIRealtimeReasoningResponseCreate.type` | `"response.create"` |
+| `event_id` | `OpenAIRealtimeReasoningResponseCreate.eventID` | optional string |
+| `response.reasoning` | `OpenAIRealtimeReasoningResponseCreate.Response.reasoning` | object |
+| `response.reasoning.effort` | `OpenAIRealtimeReasoningConfiguration.effort` | `minimal`, `low`, `medium`, `high`, or `xhigh` |
+| `response.parallel_tool_calls` | `OpenAIRealtimeReasoningResponseCreate.Response.parallelToolCalls` | boolean |
+
+## Realtime Reasoning Output Phases
+
+Realtime Reasoning output can be split into commentary and final answer phases.
+
+| Wire field | AIProxySwift API | Wire shape decoded |
+| --- | --- | --- |
+| `response.output[].phase` | `OpenAIRealtimeResponseOutputItem.phase` | `commentary` or `final_answer` |
+| `response.output_item.*.item.phase` | `OpenAIRealtimeResponseOutputItemAddedEvent.phase` / `OpenAIRealtimeResponseOutputItemDoneEvent.phase` | `commentary` or `final_answer` |
+| `conversation.item.*.item.phase` | `OpenAIRealtimeConversationItemCreatedEvent.phase` | `commentary` or `final_answer` |
+
+## `conversation.item.create`
+
+Reference: https://platform.openai.com/docs/api-reference/realtime-client-events/conversation/item/create
+
+| Wire field | AIProxySwift API | Wire shape emitted |
+| --- | --- | --- |
+| `type` | `OpenAIRealtimeConversationItemCreate.type` | `"conversation.item.create"` |
+| `item.type` | `OpenAIRealtimeConversationItemCreate.Item` | `"message"`, `"function_call"`, `"function_call_output"` |
+| `item.role` | `OpenAIRealtimeConversationItemCreate.Item.role` | optional string for message items |
+| `item.content[].type` | `OpenAIRealtimeConversationItemCreate.Item.Content.type` | `input_text`, `output_text`, `input_audio`, `item_reference`, `input_image` |
+| `item.content[].text` | `OpenAIRealtimeConversationItemCreate.Item.Content.text` | optional string |
+| `item.content[].audio` | `OpenAIRealtimeConversationItemCreate.Item.Content.audio` | optional string |
+| `item.content[].item_id` | `OpenAIRealtimeConversationItemCreate.Item.Content.itemID` | optional string |
+| `item.call_id` | `OpenAIRealtimeConversationItemCreate.Item.callID` | optional string |
+| `item.name` | `OpenAIRealtimeConversationItemCreate.Item.name` | optional string |
+| `item.arguments` | `OpenAIRealtimeConversationItemCreate.Item.arguments` | optional string |
+| `item.output` | `OpenAIRealtimeConversationItemCreate.Item.output` | optional string |
diff --git a/README.md b/README.md
index 3aea6cbf..71b85480 100644
--- a/README.md
+++ b/README.md
@@ -1384,13 +1384,10 @@ final class RealtimeManager {
             inputAudioFormat: .pcm16,
             inputAudioTranscription: .init(model: "whisper-1"),
             instructions: "You are a tour guide of Yosemite national park",
-            maxResponseOutputTokens: .int(4096),
-            modalities: [.audio],
+            maxOutputTokens: .int(4096),
+            outputModalities: [.audio],
             outputAudioFormat: .pcm16,
-            temperature: 0.7,
-            turnDetection: .init(
-                type: .semanticVAD(eagerness: .medium)
-            ),
+            turnDetection: .semanticVAD(.init(eagerness: .medium)),
             voice: "shimmer"
         )
 
@@ -1449,14 +1446,15 @@ final class RealtimeManager {
 }
 ```
 
-#### General Availability (GA) Realtime migration notes
+#### Current Realtime API notes
 
-- OpenAI has announced Realtime beta (`OpenAI-Beta: realtime=v1`) deprecation and shutdown on 2026-05-07.
-- For `response.create`, GA uses `output_modalities` (not `modalities`).
-- The new `output_modalities` for OpenAI realtime GA (general availability) is as follows:
+- For a field-by-field mapping of the Realtime wire shape to AIProxySwift types, see
+  [Realtime schema matrix](Documentation/OpenAI/RealtimeSchemaMatrix.md).
+- For `response.create`, the current Realtime API uses `output_modalities` (not `modalities`).
+- `output_modalities` is as follows:
   - `["audio"]` returns audio with transcript.
   - `["text"]` returns text only.
-- For voice mode with built-in web search, use GA tool (`.webSearch`) and specify `.auto` for toolChoice to let the model decide when to use it.
+- For voice mode with built-in web search, use the `.webSearch` tool and specify `.auto` for `toolChoice` to let the model decide when to use it.
 
 ```swift
 let configuration = OpenAIRealtimeSessionConfiguration(
@@ -1473,6 +1471,60 @@ let session = try await openAIService.realtimeSession(
 )
 ```
 
+#### Realtime Reasoning models
+
+OpenAI's Realtime Reasoning models, such as `gpt-realtime-2`, use the same Realtime WebSocket
+transport and shared session fields as Performance models like `gpt-realtime-1.5`, plus
+Reasoning-only configuration for effort and parallel tool calls.
+
+```swift
+let configuration = OpenAIRealtimeReasoningSessionConfiguration(
+    session: OpenAIRealtimeSessionConfiguration(
+        outputModalities: [.audio],
+        voice: .builtin("alloy"),
+        tools: [.webSearch(.init(searchContextSize: .medium))],
+        toolChoice: .auto
+    ),
+    reasoning: .init(effort: .low),
+    parallelToolCalls: true
+)
+
+let session = try await openAIService.realtimeSession(
+    model: "gpt-realtime-2",
+    configuration: configuration,
+    logLevel: .info
+)
+```
+
+You can also override Reasoning settings for a single response:
+
+```swift
+await session.sendMessage(
+    OpenAIRealtimeReasoningResponseCreate(
+        response: .init(
+            base: .init(
+                instructions: "Use the lowest sufficient reasoning effort.",
+                outputModalities: [.audio]
+            ),
+            reasoning: .init(effort: .minimal),
+            parallelToolCalls: false
+        )
+    )
+)
+```
+
+Realtime Reasoning responses can include phased output. Use `phase` to separate short commentary
+from the final answer when the model emits both in a turn:
+
+```swift
+for await message in session.receiver {
+    if case .responseDone(let event) = message {
+        let commentary = event.output?.filter { $0.phase == .commentary }
+        let finalAnswer = event.output?.filter { $0.phase == .finalAnswer }
+    }
+}
+```
+
 ### How to make a basic request using OpenAI's Responses API
 Note: there is also a streaming version of this snippet below.
 
diff --git a/Sources/AIProxy/OpenAI/OpenAIRealtimeMessage.swift b/Sources/AIProxy/OpenAI/OpenAIRealtimeMessage.swift
index 55a77d0d..6f67ea7d 100644
--- a/Sources/AIProxy/OpenAI/OpenAIRealtimeMessage.swift
+++ b/Sources/AIProxy/OpenAI/OpenAIRealtimeMessage.swift
@@ -277,15 +277,46 @@ public struct OpenAIRealtimeInputAudioBufferDTMFEventReceivedEvent: Decodable, S
     }
 }
 
+public enum OpenAIRealtimeResponsePhase: String, Decodable, Sendable {
+    case commentary
+    case finalAnswer = "final_answer"
+}
+
+public struct OpenAIRealtimeResponseOutputItem: Decodable, Sendable {
+    public let id: String?
+    public let phase: OpenAIRealtimeResponsePhase?
+    public let content: [Content]?
+
+    public var transcript: String? {
+        content?.first(where: { ($0.transcript?.isEmpty == false) })?.transcript
+    }
+
+    private enum CodingKeys: String, CodingKey {
+        case id
+        case phase
+        case content
+    }
+}
+
+extension OpenAIRealtimeResponseOutputItem {
+    public struct Content: Decodable, Sendable {
+        public let type: String?
+        public let text: String?
+        public let transcript: String?
+    }
+}
+
 public struct OpenAIRealtimeConversationItemCreatedEvent: Decodable, Sendable {
     public let itemID: String?
     public let previousItemID: String?
     public let role: String?
+    public let phase: OpenAIRealtimeResponsePhase?
     public let eventID: String?
 
     private struct ItemBody: Decodable {
         let id: String?
         let role: String?
+        let phase: OpenAIRealtimeResponsePhase?
     }
 
     private enum CodingKeys: String, CodingKey {
@@ -302,6 +333,7 @@ public struct OpenAIRealtimeConversationItemCreatedEvent: Decodable, Sendable {
         self.itemID = item?.id ?? fallbackItemID
         self.previousItemID = try container.decodeIfPresent(String.self, forKey: .previousItemID)
         self.role = item?.role
+        self.phase = item?.phase
         self.eventID = try container.decodeIfPresent(String.self, forKey: .eventID)
     }
 }
@@ -325,10 +357,12 @@ public struct OpenAIRealtimeResponseOutputItemAddedEvent: Decodable, Sendable {
     public let responseID: String?
     public let itemID: String?
     public let outputIndex: Int?
+    public let phase: OpenAIRealtimeResponsePhase?
     public let eventID: String?
 
     private struct ItemBody: Decodable {
         let id: String?
+        let phase: OpenAIRealtimeResponsePhase?
     }
 
     private enum CodingKeys: String, CodingKey {
@@ -346,6 +380,7 @@ public struct OpenAIRealtimeResponseOutputItemAddedEvent: Decodable, Sendable {
         let fallbackItemID = try container.decodeIfPresent(String.self, forKey: .itemID)
         self.itemID = item?.id ?? fallbackItemID
         self.outputIndex = container.decodeFlexibleIntIfPresent(forKey: .outputIndex)
+        self.phase = item?.phase
         self.eventID = try container.decodeIfPresent(String.self, forKey: .eventID)
     }
 }
@@ -354,6 +389,7 @@ public struct OpenAIRealtimeResponseOutputItemDoneEvent: Decodable, Sendable {
     public let responseID: String?
     public let itemID: String?
     public let outputIndex: Int?
+    public let phase: OpenAIRealtimeResponsePhase?
     public let transcript: String?
     public let eventID: String?
 
@@ -362,6 +398,7 @@ public struct OpenAIRealtimeResponseOutputItemDoneEvent: Decodable, Sendable {
             let transcript: String?
         }
         let id: String?
+        let phase: OpenAIRealtimeResponsePhase?
         let content: [ContentBody]?
     }
 
@@ -380,6 +417,7 @@ public struct OpenAIRealtimeResponseOutputItemDoneEvent: Decodable, Sendable {
         let fallbackItemID = try container.decodeIfPresent(String.self, forKey: .itemID)
         self.itemID = item?.id ?? fallbackItemID
         self.outputIndex = container.decodeFlexibleIntIfPresent(forKey: .outputIndex)
+        self.phase = item?.phase
         self.transcript = item?.content?.first(where: { ($0.transcript?.isEmpty == false) })?.transcript
         self.eventID = try container.decodeIfPresent(String.self, forKey: .eventID)
     }
@@ -473,6 +511,7 @@ public struct OpenAIRealtimeResponseDoneEvent: Decodable, Sendable {
     public let responseID: String?
     public let conversationID: String?
     public let status: String?
+    public let output: [OpenAIRealtimeResponseOutputItem]?
     public let usage: OpenAIRealtimeResponseUsage?
     public let eventID: String?
 
@@ -480,12 +519,14 @@ public struct OpenAIRealtimeResponseDoneEvent: Decodable, Sendable {
         let id: String?
         let conversationID: String?
         let status: String?
+        let output: [OpenAIRealtimeResponseOutputItem]?
         let usage: OpenAIRealtimeResponseUsage?
 
         private enum CodingKeys: String, CodingKey {
             case id
             case conversationID = "conversation_id"
             case status
+            case output
             case usage
         }
     }
@@ -503,6 +544,7 @@ public struct OpenAIRealtimeResponseDoneEvent: Decodable, Sendable {
         self.responseID = response?.id ?? fallbackResponseID
         self.conversationID = response?.conversationID
         self.status = response?.status
+        self.output = response?.output
         self.usage = response?.usage
         self.eventID = try container.decodeIfPresent(String.self, forKey: .eventID)
     }
diff --git a/Sources/AIProxy/OpenAI/OpenAIRealtimeReasoningConfiguration.swift b/Sources/AIProxy/OpenAI/OpenAIRealtimeReasoningConfiguration.swift
new file mode 100644
index 00000000..e049cb10
--- /dev/null
+++ b/Sources/AIProxy/OpenAI/OpenAIRealtimeReasoningConfiguration.swift
@@ -0,0 +1,24 @@
+//
+//  OpenAIRealtimeReasoningConfiguration.swift
+//  AIProxy
+//
+
+/// Configuration for OpenAI Realtime Reasoning models such as `gpt-realtime-2`.
+nonisolated public struct OpenAIRealtimeReasoningConfiguration: Encodable, Sendable {
+    /// Constrains effort on Realtime Reasoning models.
+    public let effort: Effort?
+
+    public init(effort: Effort? = nil) {
+        self.effort = effort
+    }
+}
+
+extension OpenAIRealtimeReasoningConfiguration {
+    nonisolated public enum Effort: String, Encodable, Sendable {
+        case minimal
+        case low
+        case medium
+        case high
+        case xhigh
+    }
+}
diff --git a/Sources/AIProxy/OpenAI/OpenAIRealtimeReasoningResponseCreate.swift b/Sources/AIProxy/OpenAI/OpenAIRealtimeReasoningResponseCreate.swift
new file mode 100644
index 00000000..60086893
--- /dev/null
+++ b/Sources/AIProxy/OpenAI/OpenAIRealtimeReasoningResponseCreate.swift
@@ -0,0 +1,78 @@
+//
+//  OpenAIRealtimeReasoningResponseCreate.swift
+//  AIProxy
+//
+
+/// `response.create` for Realtime Reasoning models.
+nonisolated public struct OpenAIRealtimeReasoningResponseCreate: Encodable {
+    public let type = "response.create"
+    public let eventID: String?
+    public let response: Response?
+
+    private enum CodingKeys: String, CodingKey {
+        case type
+        case eventID = "event_id"
+        case response
+    }
+
+    public init(eventID: String? = nil, response: Response? = nil) {
+        self.eventID = eventID
+        self.response = response
+    }
+}
+
+extension OpenAIRealtimeReasoningResponseCreate {
+    nonisolated public struct Response: Encodable {
+        public let conversation: String?
+        public let instructions: String?
+        public let outputModalities: [OpenAIRealtimeSessionConfiguration.Modality]?
+        public let tools: [OpenAIRealtimeResponseCreate.Response.Tool]?
+        public let toolChoice: OpenAIRealtimeSessionConfiguration.ToolChoice?
+        public let reasoning: OpenAIRealtimeReasoningConfiguration?
+        public let parallelToolCalls: Bool?
+
+        private enum CodingKeys: String, CodingKey {
+            case conversation
+            case instructions
+            case outputModalities = "output_modalities"
+            case tools
+            case toolChoice = "tool_choice"
+            case reasoning
+            case parallelToolCalls = "parallel_tool_calls"
+        }
+
+        public init(
+            conversation: String? = nil,
+            instructions: String? = nil,
+            outputModalities: [OpenAIRealtimeSessionConfiguration.Modality]? = nil,
+            tools: [OpenAIRealtimeResponseCreate.Response.Tool]? = nil,
+            toolChoice: OpenAIRealtimeSessionConfiguration.ToolChoice? = nil,
+            reasoning: OpenAIRealtimeReasoningConfiguration? = nil,
+            parallelToolCalls: Bool? = nil
+        ) {
+            self.conversation = conversation
+            self.instructions = instructions
+            self.outputModalities = outputModalities
+            self.tools = tools
+            self.toolChoice = toolChoice
+            self.reasoning = reasoning
+            self.parallelToolCalls = parallelToolCalls
+        }
+
+        public init(
+            base: OpenAIRealtimeResponseCreate.Response,
+            reasoning: OpenAIRealtimeReasoningConfiguration? = nil,
+            parallelToolCalls: Bool? = nil
+        ) {
+            self.init(
+                conversation: base.conversation,
+                instructions: base.instructions,
+                outputModalities: base.outputModalities,
+                tools: base.tools,
+                toolChoice: base.toolChoice,
+                reasoning: reasoning,
+                parallelToolCalls: parallelToolCalls
+            )
+        }
+    }
+}
diff --git a/Sources/AIProxy/OpenAI/OpenAIRealtimeReasoningSessionConfiguration.swift b/Sources/AIProxy/OpenAI/OpenAIRealtimeReasoningSessionConfiguration.swift
new file mode 100644
index 00000000..39025277
--- /dev/null
+++ b/Sources/AIProxy/OpenAI/OpenAIRealtimeReasoningSessionConfiguration.swift
@@ -0,0 +1,25 @@
+//
+//  OpenAIRealtimeReasoningSessionConfiguration.swift
+//  AIProxy
+//
+
+/// Session configuration for Realtime Reasoning models.
+///
+/// The Realtime API still expects one `session.update.session` object. This type composes
+/// the shared Realtime session configuration with Reasoning-only fields and flattens them
+/// into that single wire object when encoded.
+nonisolated public struct OpenAIRealtimeReasoningSessionConfiguration: Encodable, Sendable {
+    public let session: OpenAIRealtimeSessionConfiguration
+    public let reasoning: OpenAIRealtimeReasoningConfiguration?
+    public let parallelToolCalls: Bool?
+
+    public init(
+        session: OpenAIRealtimeSessionConfiguration,
+        reasoning: OpenAIRealtimeReasoningConfiguration? = nil,
+        parallelToolCalls: Bool? = nil
+    ) {
+        self.session = session
+        self.reasoning = reasoning
+        self.parallelToolCalls = parallelToolCalls
+    }
+}
diff --git a/Sources/AIProxy/OpenAI/OpenAIRealtimeSession.swift b/Sources/AIProxy/OpenAI/OpenAIRealtimeSession.swift
index 27f33e9c..eb699a14 100644
--- a/Sources/AIProxy/OpenAI/OpenAIRealtimeSession.swift
+++ b/Sources/AIProxy/OpenAI/OpenAIRealtimeSession.swift
@@ -17,6 +17,7 @@ nonisolated private let kWebsocketDisconnectedEarlyThreshold: TimeInterval = 3
     private var continuation: AsyncStream<OpenAIRealtimeMessage>.Continuation?
     private let setupTime = Date()
     let sessionConfiguration: OpenAIRealtimeSessionConfiguration
+    private let initialSessionUpdate: OpenAIRealtimeSessionUpdate
 
     init(
         webSocketTask: URLSessionWebSocketTask,
@@ -24,9 +25,25 @@ nonisolated private let kWebsocketDisconnectedEarlyThreshold: TimeInterval = 3
     ) {
         self.webSocketTask = webSocketTask
         self.sessionConfiguration = sessionConfiguration
+        self.initialSessionUpdate = OpenAIRealtimeSessionUpdate(session: sessionConfiguration)
 
         Task {
-            await self.sendMessage(OpenAIRealtimeSessionUpdate(session: self.sessionConfiguration))
+            await self.sendMessage(self.initialSessionUpdate)
+        }
+        self.webSocketTask.resume()
+        self.receiveMessage()
+    }
+
+    init(
+        webSocketTask: URLSessionWebSocketTask,
+        sessionConfiguration: OpenAIRealtimeReasoningSessionConfiguration
+    ) {
+        self.webSocketTask = webSocketTask
+        self.sessionConfiguration = sessionConfiguration.session
+        self.initialSessionUpdate = OpenAIRealtimeSessionUpdate(session: sessionConfiguration)
+
+        Task {
+            await self.sendMessage(self.initialSessionUpdate)
         }
         self.webSocketTask.resume()
         self.receiveMessage()
diff --git a/Sources/AIProxy/OpenAI/OpenAIRealtimeSessionConfiguration.swift b/Sources/AIProxy/OpenAI/OpenAIRealtimeSessionConfiguration.swift
index 4502773e..e78d7aad 100644
--- a/Sources/AIProxy/OpenAI/OpenAIRealtimeSessionConfiguration.swift
+++ b/Sources/AIProxy/OpenAI/OpenAIRealtimeSessionConfiguration.swift
@@ -45,7 +45,7 @@ nonisolated public struct OpenAIRealtimeSessionConfiguration: Encodable, Sendabl
         outputModalities: [OpenAIRealtimeSessionConfiguration.Modality]? = nil,
         outputAudioFormat: OpenAIRealtimeSessionConfiguration.AudioFormat? = nil,
         speed: Float? = 1.0,
-        temperature: Double? = nil, // Deprecated in realtime GA
+        temperature: Double? = nil, // Deprecated in the current Realtime API
         tools: [Tool]? = nil,
         toolChoice: ToolChoice? = nil,
         turnDetection: TurnDetection? = nil,
@@ -549,7 +549,7 @@ extension OpenAIRealtimeSessionConfiguration {
 }
 
 
-// MARK: - Legacy fixes for pre-GA callsites
+// MARK: - Legacy callsite compatibility
 extension OpenAIRealtimeSessionConfiguration {
     public typealias MaxResponseOutputTokens = MaxOutputTokens
 }
@@ -561,7 +561,7 @@ extension OpenAIRealtimeSessionConfiguration.Voice: ExpressibleByStringLiteral {
 }
 
 extension OpenAIRealtimeSessionConfiguration.TurnDetection {
-    /// Pre-GA initializer kept for source compatibility with call sites that
+    /// Legacy initializer kept for source compatibility with call sites that
     /// build `TurnDetection(type: .semanticVAD(eagerness: ...))`.
     public init(type: DetectionType) {
         switch type {
@@ -639,8 +639,14 @@ private struct OpenAIRealtimeSessionConfigurationWire: Encodable, Sendable {
     let prompt: OpenAIRealtimeSessionConfiguration.Prompt?
     let tracing: OpenAIRealtimeSessionConfiguration.Tracing?
     let truncation: OpenAIRealtimeSessionConfiguration.Truncation?
+    let reasoning: OpenAIRealtimeReasoningConfiguration?
+    let parallelToolCalls: Bool?
 
-    init(_ configuration: OpenAIRealtimeSessionConfiguration) {
+    init(
+        _ configuration: OpenAIRealtimeSessionConfiguration,
+        reasoning: OpenAIRealtimeReasoningConfiguration? = nil,
+        parallelToolCalls: Bool? = nil
+    ) {
         self.include = configuration.include
         self.type = configuration.type
         self.inputAudioFormat = configuration.inputAudioFormat
@@ -659,6 +665,8 @@ private struct OpenAIRealtimeSessionConfigurationWire: Encodable, Sendable {
         self.prompt = configuration.prompt
         self.tracing = configuration.tracing
         self.truncation = configuration.truncation
+        self.reasoning = reasoning
+        self.parallelToolCalls = parallelToolCalls
     }
 
     private enum CodingKeys: String, CodingKey {
@@ -670,6 +678,8 @@ private struct OpenAIRealtimeSessionConfigurationWire: Encodable, Sendable {
         case model
         case outputModalities = "output_modalities"
         case prompt
+        case reasoning
+        case parallelToolCalls = "parallel_tool_calls"
         case tracing
         case truncation
         case tools
@@ -722,6 +732,8 @@ private struct OpenAIRealtimeSessionConfigurationWire: Encodable, Sendable {
         try container.encodeIfPresent(model, forKey: .model)
         try container.encodeIfPresent(outputModalities, forKey: .outputModalities)
         try container.encodeIfPresent(prompt, forKey: .prompt)
+        try container.encodeIfPresent(reasoning, forKey: .reasoning)
+        try container.encodeIfPresent(parallelToolCalls, forKey: .parallelToolCalls)
         try container.encodeIfPresent(tracing, forKey: .tracing)
         try container.encodeIfPresent(truncation, forKey: .truncation)
         try container.encodeIfPresent(tools, forKey: .tools)
@@ -775,3 +787,13 @@ extension OpenAIRealtimeSessionConfiguration {
         try OpenAIRealtimeSessionConfigurationWire(self).encode(to: encoder)
     }
 }
+
+extension OpenAIRealtimeReasoningSessionConfiguration {
+    public func encode(to encoder: Encoder) throws {
+        try OpenAIRealtimeSessionConfigurationWire(
+            session,
+            reasoning: reasoning,
+            parallelToolCalls: parallelToolCalls
+        ).encode(to: encoder)
+    }
+}
diff --git a/Sources/AIProxy/OpenAI/OpenAIRealtimeSessionUpdate.swift b/Sources/AIProxy/OpenAI/OpenAIRealtimeSessionUpdate.swift
index c22121dc..8134bc0f 100644
--- a/Sources/AIProxy/OpenAI/OpenAIRealtimeSessionUpdate.swift
+++ b/Sources/AIProxy/OpenAI/OpenAIRealtimeSessionUpdate.swift
@@ -8,6 +8,7 @@ nonisolated public struct OpenAIRealtimeSessionUpdate: Encodable {
 
     /// Session configuration to update
     public let session: OpenAIRealtimeSessionConfiguration
+    private let reasoningSession: OpenAIRealtimeReasoningSessionConfiguration?
 
     /// The event type, must be "session.update".
     public let type = "session.update"
@@ -24,5 +25,26 @@ nonisolated public struct OpenAIRealtimeSessionUpdate: Encodable {
     ) {
         self.eventId = eventId
         self.session = session
+        self.reasoningSession = nil
+    }
+
+    public init(
+        eventId: String? = nil,
+        session: OpenAIRealtimeReasoningSessionConfiguration
+    ) {
+        self.eventId = eventId
+        self.session = session.session
+        self.reasoningSession = session
+    }
+
+    public func encode(to encoder: Encoder) throws {
+        var container = encoder.container(keyedBy: CodingKeys.self)
+        try container.encodeIfPresent(eventId, forKey: .eventId)
+        if let reasoningSession {
+            try container.encode(reasoningSession, forKey: .session)
+        } else {
+            try container.encode(session, forKey: .session)
+        }
+        try container.encode(type, forKey: .type)
     }
 }
diff --git a/Sources/AIProxy/OpenAI/OpenAIService.swift b/Sources/AIProxy/OpenAI/OpenAIService.swift
index e7adfb8e..5184ad91 100644
--- a/Sources/AIProxy/OpenAI/OpenAIService.swift
+++ b/Sources/AIProxy/OpenAI/OpenAIService.swift
@@ -285,6 +285,35 @@ import Foundation
         )
     }
 
+    /// Starts a realtime session for Realtime Reasoning models such as `gpt-realtime-2`.
+    ///
+    /// This uses the same Realtime WebSocket transport as performance models, but sends
+    /// Reasoning-only session fields such as `reasoning` and `parallel_tool_calls` in
+    /// the initial `session.update`.
+    ///
+    /// - Parameters:
+    ///   - model: The Realtime Reasoning model to use, for example `gpt-realtime-2`.
+    ///   - configuration: The Reasoning session configuration object.
+    ///   - logLevel: The threshold level that this library begins emitting log messages.
+    ///
+    /// - Returns: A realtime session manager that the caller can send and receive messages with.
+    public func realtimeSession(
+        model: String,
+        configuration: OpenAIRealtimeReasoningSessionConfiguration,
+        logLevel: AIProxyLogLevel
+    ) async throws -> OpenAIRealtimeSession {
+        AIProxyLogLevel.callerDesiredLogLevel = logLevel
+        let request = try await self.requestBuilder.plainGET(
+            path: "/v1/realtime?model=\(model)",
+            secondsToWait: 60,
+            additionalHeaders: [:]
+        )
+        return OpenAIRealtimeSession(
+            webSocketTask: self.serviceNetworker.urlSession.webSocketTask(with: request),
+            sessionConfiguration: configuration
+        )
+    }
+
     /// Uploads a file to OpenAI for use in a future tool call
     /// https://platform.openai.com/docs/api-reference/files/create
     ///
diff --git a/Tests/AIProxyTests/OpenAIRealtimeMessageTests.swift b/Tests/AIProxyTests/OpenAIRealtimeMessageTests.swift
index d5990071..016a653d 100644
--- a/Tests/AIProxyTests/OpenAIRealtimeMessageTests.swift
+++ b/Tests/AIProxyTests/OpenAIRealtimeMessageTests.swift
@@ -214,6 +214,181 @@ struct OpenAIRealtimeMessageTests {
         #expect(payload.usage?.totalTokens == 225)
     }
 
+    @Test
+    func testResponseDoneDecodesPhasedOutput() throws {
+        let event = try decode(
+            #"""
+            {
+              "type": "response.done",
+              "event_id": "event_40",
+              "response": {
+                "id": "resp_40",
+                "conversation_id": "conv_40",
+                "status": "completed",
+                "output": [
+                  {
+                    "id": "msg_commentary",
+                    "phase": "commentary",
+                    "content": [
+                      {
+                        "type": "output_audio",
+                        "transcript": "I'll check that now."
+                      }
+                    ]
+                  },
+                  {
+                    "id": "msg_final",
+                    "phase": "final_answer",
+                    "content": [
+                      {
+                        "type": "output_audio",
+                        "transcript": "The appointment is confirmed."
+                      }
+                    ]
+                  }
+                ]
+              }
+            }
+            """#
+        )
+
+        guard case .responseDone(let payload) = event else {
+            Issue.record("Expected responseDone")
+            return
+        }
+        #expect(payload.output?.count == 2)
+        #expect(payload.output?.first?.phase == .commentary)
+        #expect(payload.output?.first?.transcript == "I'll check that now.")
+        #expect(payload.output?.last?.phase == .finalAnswer)
+        #expect(payload.output?.last?.transcript == "The appointment is confirmed.")
+    }
+
+    @Test
+    func testResponseOutputItemDoneDecodesPhase() throws {
+        let event = try decode(
+            #"""
+            {
+              "type": "response.output_item.done",
+              "event_id": "event_41",
+              "response_id": "resp_41",
+              "output_index": 0,
+              "item": {
+                "id": "msg_41",
+                "phase": "final_answer",
+                "content": [
+                  {
+                    "type": "output_audio",
+                    "transcript": "Done."
+                  }
+                ]
+              }
+            }
+            """#
+        )
+
+        guard case .responseOutputItemDone(let payload) = event else {
+            Issue.record("Expected responseOutputItemDone")
+            return
+        }
+        #expect(payload.itemID == "msg_41")
+        #expect(payload.phase == .finalAnswer)
+        #expect(payload.transcript == "Done.")
+    }
+
+    @Test
+    func testResponseOutputItemAddedDecodesPhase() throws {
+        let event = try decode(
+            #"""
+            {
+              "type": "response.output_item.added",
+              "event_id": "event_42",
+              "response_id": "resp_42",
+              "output_index": 0,
+              "item": {
+                "id": "msg_42",
+                "phase": "commentary"
+              }
+            }
+            """#
+        )
+
+        guard case .responseOutputItemAdded(let payload) = event else {
+            Issue.record("Expected responseOutputItemAdded")
+            return
+        }
+        #expect(payload.itemID == "msg_42")
+        #expect(payload.phase == .commentary)
+    }
+
+    @Test
+    func testConversationItemAddedDecodesPhase() throws {
+        let event = try decode(
+            #"""
+            {
+              "type": "conversation.item.added",
+              "event_id": "event_43",
+              "item": {
+                "id": "msg_43",
+                "role": "assistant",
+                "phase": "final_answer"
+              }
+            }
+            """#
+        )
+
+        guard case .conversationItemAdded(let payload) = event else {
+            Issue.record("Expected conversationItemAdded")
+            return
+        }
+        #expect(payload.itemID == "msg_43")
+        #expect(payload.role == "assistant")
+        #expect(payload.phase == .finalAnswer)
+    }
+
+    @Test
+    func testConversationItemCreatedAndDoneDecodePhase() throws {
+        let created = try decode(
+            #"""
+            {
+              "type": "conversation.item.created",
+              "event_id": "event_44",
+              "item": {
+                "id": "msg_44",
+                "role": "assistant",
+                "phase": "commentary"
+              }
+            }
+            """#
+        )
+        let done = try decode(
+            #"""
+            {
+              "type": "conversation.item.done",
+              "event_id": "event_45",
+              "item": {
+                "id": "msg_45",
+                "role": "assistant",
+                "phase": "final_answer"
+              }
+            }
+            """#
+        )
+
+        guard case .conversationItemCreated(let createdPayload) = created else {
+            Issue.record("Expected conversationItemCreated")
+            return
+        }
+        #expect(createdPayload.itemID == "msg_44")
+        #expect(createdPayload.phase == .commentary)
+
+        guard case .conversationItemDone(let donePayload) = done else {
+            Issue.record("Expected conversationItemDone")
+            return
+        }
+        #expect(donePayload.itemID == "msg_45")
+        #expect(donePayload.phase == .finalAnswer)
+    }
+
     @Test
     func testInputAudioTranscriptionDeltaLogprobsAreDecodable() throws {
         let event = try decode(
diff --git a/Tests/AIProxyTests/OpenAIRealtimeSessionEncodingTests.swift b/Tests/AIProxyTests/OpenAIRealtimeSessionEncodingTests.swift
index 7d3b8d1e..e4a6eadc 100644
--- a/Tests/AIProxyTests/OpenAIRealtimeSessionEncodingTests.swift
+++ b/Tests/AIProxyTests/OpenAIRealtimeSessionEncodingTests.swift
@@ -16,6 +16,17 @@ struct OpenAIRealtimeSessionEncodingTests {
         return e
     }()
 
+    @AIProxyActor
+    private func compilePerformanceRealtimeSessionCall(
+        service: OpenAIService
+    ) async throws {
+        _ = try await service.realtimeSession(
+            model: "gpt-realtime-1.5",
+            configuration: .init(),
+            logLevel: .debug
+        )
+    }
+
     @Test
     func sessionUpdateEncodesNestedAudioAndOutputModalities() throws {
         let update = OpenAIRealtimeSessionUpdate(
@@ -38,6 +49,8 @@ struct OpenAIRealtimeSessionEncodingTests {
         #expect(session["output_modalities"] as? [String] == ["audio"])
         #expect(session["modalities"] == nil)
         #expect(session["max_response_output_tokens"] == nil)
+        #expect(session["reasoning"] == nil)
+        #expect(session["parallel_tool_calls"] == nil)
         let audio = session["audio"] as! [String: Any]
         let input = audio["input"] as! [String: Any]
         let inputFormat = input["format"] as! [String: Any]
@@ -45,6 +58,48 @@ struct OpenAIRealtimeSessionEncodingTests {
         #expect(inputFormat["rate"] as? Int == 24000)
     }
 
+    @Test
+    func sessionUpdateAcceptsInlineDefaultPerformanceConfiguration() throws {
+        let update = OpenAIRealtimeSessionUpdate(session: .init())
+        let encoded = try encoder.encode(update)
+        let root = try Self.jsonObject(encoded) as! [String: Any]
+        let session = root["session"] as! [String: Any]
+
+        #expect(session["type"] as? String == "realtime")
+        #expect(session["reasoning"] == nil)
+        #expect(session["parallel_tool_calls"] == nil)
+    }
+
+    @Test
+    func reasoningSessionUpdateMergesBaseAndReasoningFields() throws {
+        let update = OpenAIRealtimeSessionUpdate(
+            session: OpenAIRealtimeReasoningSessionConfiguration(
+                session: OpenAIRealtimeSessionConfiguration(
+                    inputAudioFormat: .pcm16,
+                    instructions: "Solve carefully.",
+                    outputModalities: [.audio],
+                    voice: .builtin("alloy")
+                ),
+                reasoning: .init(effort: .low),
+                parallelToolCalls: true
+            )
+        )
+        let encoded = try encoder.encode(update)
+        let root = try Self.jsonObject(encoded) as! [String: Any]
+        #expect(root["type"] as? String == "session.update")
+        let session = root["session"] as! [String: Any]
+        #expect(session["instructions"] as? String == "Solve carefully.")
+        #expect(session["output_modalities"] as? [String] == ["audio"])
+        #expect(session["parallel_tool_calls"] as? Bool == true)
+        let reasoning = session["reasoning"] as! [String: Any]
+        #expect(reasoning["effort"] as? String == "low")
+        let audio = session["audio"] as! [String: Any]
+        let inputFormat = (audio["input"] as! [String: Any])["format"] as! [String: Any]
+        #expect(inputFormat["type"] as? String == "audio/pcm")
+        let output = audio["output"] as! [String: Any]
+        #expect(output["voice"] as? String == "alloy")
+    }
+
     @Test
     func sessionUpdateEncodesG711AudioFormatObjects() throws {
         let update = OpenAIRealtimeSessionUpdate(
@@ -153,6 +208,33 @@ struct OpenAIRealtimeSessionEncodingTests {
         #expect(response["modalities"] == nil)
     }
 
+    @Test
+    func reasoningResponseCreateEncodesReasoning() throws {
+        let event = OpenAIRealtimeReasoningResponseCreate(
+            eventID: "evt_reasoning",
+            response: .init(
+                base: .init(
+                    instructions: "Use the lowest sufficient reasoning effort.",
+                    outputModalities: [.audio],
+                    toolChoice: .auto
+                ),
+                reasoning: .init(effort: .minimal),
+                parallelToolCalls: false
+            )
+        )
+        let encoded = try encoder.encode(event)
+        let root = try Self.jsonObject(encoded) as! [String: Any]
+        #expect(root["type"] as? String == "response.create")
+        #expect(root["event_id"] as? String == "evt_reasoning")
+        let response = root["response"] as! [String: Any]
+        #expect(response["instructions"] as? String == "Use the lowest sufficient reasoning effort.")
+        #expect(response["output_modalities"] as? [String] == ["audio"])
+        #expect(response["tool_choice"] as? String == "auto")
+        #expect(response["parallel_tool_calls"] as? Bool == false)
+        let reasoning = response["reasoning"] as! [String: Any]
+        #expect(reasoning["effort"] as? String == "minimal")
+    }
+
     @Test
     func responseCreateToolChoiceMCPEncodesObjectShape() throws {
         let event = OpenAIRealtimeResponseCreate(