From 321a103739edd14d3422cec9408a12ed2b1b4787 Mon Sep 17 00:00:00 2001
From: Simo Lin <25425177+slin1237@users.noreply.github.com>
Date: Mon, 4 May 2026 10:21:11 -0700
Subject: [PATCH 01/15] fix(openai-bridge): mirror format on alias key + clear
 stale entries on Passthrough downgrade
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`McpToolSession::collect_visible_mcp_tools` (crates/mcp/src/core/session.rs:565-571)
*replaces* a direct tool entry with its alias entries when an alias is
registered. The session-side lookup path
(`qualified_name_for_exposed("web_search")` →
`("alias", "web_search")` → `registry.lookup`) therefore queries the
alias key — but `populate_from_server_config` was attaching the format
*only* to one of the two keys depending on whether the user supplied an
explicit format. Two consequences:

- An alias-only stanza on a builtin tool (e.g. `tools.do_search.alias =
  "web_search"`, no `response_format`) wrote the builtin default at the
  direct key only. The session resolved through the alias key and
  silently degraded to `mcp_call` instead of the hosted shape.
- A stanza with both `alias` and a concrete `response_format` wrote only
  the alias key. Direct dispatch via `(server, tool)` then degraded to
  Passthrough.

Mirror non-Passthrough formats on both keys whenever an alias exists —
both for explicit per-tool configs and for the builtin-default fallback.
And while we're here, make the documented "safe to call repeatedly"
contract real: `Some(Passthrough)` now explicitly clears the previous
hosted entry on every key (a config that flips a tool back from a
hosted format to passthrough no longer leaves the stale mapping behind).

Updates the existing alias-format test to assert both keys carry the
hosted shape, replaces the alias-only-builtin test with one that exercises
the alias-key lookup that production actually hits, and adds a new
regression test for the downgrade path.

Comments addressed (PR #1429): coderabbitai 3174799419 / 3174799424 /
3174987646, claude 3174955420 / 3174955773 / 3174778983,
chatgpt-codex-connector 3174963504 / 3176332060.

Signed-off-by: Simo Lin <25425177+slin1237@users.noreply.github.com>
---
 .../common/openai_bridge/format_registry.rs   | 172 ++++++++++++++----
 1 file changed, 136 insertions(+), 36 deletions(-)

diff --git a/model_gateway/src/routers/common/openai_bridge/format_registry.rs b/model_gateway/src/routers/common/openai_bridge/format_registry.rs
index 7170e15fa..ba8844dce 100644
--- a/model_gateway/src/routers/common/openai_bridge/format_registry.rs
+++ b/model_gateway/src/routers/common/openai_bridge/format_registry.rs
@@ -52,39 +52,65 @@ impl FormatRegistry {
         self.formats.insert(qualified, format);
     }
 
+    fn remove(&self, qualified: &QualifiedToolName) {
+        self.formats.remove(qualified);
+    }
+
     /// Populate from a server config: per-tool overrides + builtin defaults.
-    /// Safe to call repeatedly — entries for non-Passthrough formats are
-    /// overwritten. Downgrading a format back to `Passthrough` requires a
-    /// separate registry rebuild (no production caller mutates configs in
-    /// place today).
     ///
-    /// Mirrors `McpOrchestrator::apply_tool_configs`:
-    /// - When a tool has an `alias`, the format is attached **only** to the
-    ///   alias entry (under `("alias", alias_name)`), matching the orchestrator's
-    ///   `register_alias` qualified-name shape. The underlying `(server, tool)`
-    ///   stays at the `Passthrough` default so direct calls aren't transformed.
-    /// - When a tool has no alias but a non-default format, attach to
-    ///   `(server, tool)` directly.
-    /// - When the per-tool stanza omits `response_format` entirely
-    ///   (`None`), the builtin default still applies. This lets users add an
-    ///   `alias` or `arg_mapping` to a builtin tool without disabling its
-    ///   hosted-format wire shape. An explicit `Some(Passthrough)` *does*
-    ///   block the builtin default — that is the documented escape hatch
-    ///   for opting out of the hosted shape.
+    /// Safe to call repeatedly. Each affected key is unconditionally rewritten
+    /// (or removed for an explicit `Some(Passthrough)` downgrade) so a later
+    /// config that demotes a tool from a hosted format back to `Passthrough`
+    /// does not leave the previous entry behind in the map.
+    ///
+    /// Mirrors `McpOrchestrator::apply_tool_configs` *and* the dispatch shape
+    /// of `McpToolSession`. Two production lookup paths must resolve to the
+    /// same format:
+    /// - via the alias key `("alias", alias_name)` — what the session exposes
+    ///   when `collect_visible_mcp_tools` replaces the direct entry with its
+    ///   alias entry (see `crates/mcp/src/core/session.rs:565-571`).
+    /// - via the underlying `(server_key, tool_name)` — what direct dispatch
+    ///   uses when no alias hides the tool.
+    ///
+    /// So when a tool has an alias the format is mirrored on **both** keys.
+    /// Aliased builtin tools get the same treatment, and only when the
+    /// per-tool stanza omits `response_format` entirely (`None`) is the
+    /// builtin default applied; an explicit `Some(Passthrough)` is the
+    /// documented escape hatch for opting out of the hosted shape.
     pub fn populate_from_server_config(&self, config: &McpServerConfig) {
         if let Some(tools) = &config.tools {
             for (tool_name, tool_config) in tools {
+                let direct_key = QualifiedToolName::new(&config.name, tool_name);
+                let alias_key = tool_config
+                    .alias
+                    .as_deref()
+                    .map(|alias| QualifiedToolName::new(ALIAS_SERVER_KEY, alias));
+
                 let Some(format_config) = tool_config.response_format else {
+                    // `response_format` omitted: defer to the builtin-default
+                    // pass below. Don't touch existing entries here so a
+                    // direct-dispatch override placed by an earlier loop
+                    // iteration survives.
                     continue;
                 };
                 let format: ResponseFormat = format_config.into();
                 if format == ResponseFormat::Passthrough {
+                    // Explicit downgrade — clear any prior hosted-format entry
+                    // on every key the production lookup might consult.
+                    self.remove(&direct_key);
+                    if let Some(alias_key) = &alias_key {
+                        self.remove(alias_key);
+                    }
                     continue;
                 }
-                if let Some(alias) = &tool_config.alias {
-                    self.insert(QualifiedToolName::new(ALIAS_SERVER_KEY, alias), format);
-                } else {
-                    self.insert(QualifiedToolName::new(&config.name, tool_name), format);
+
+                // Mirror the format on every key production might query.
+                // Also write the direct key so that direct dispatch (which
+                // does not go through the alias rewrite) still gets the
+                // hosted shape.
+                self.insert(direct_key, format);
+                if let Some(alias_key) = alias_key {
+                    self.insert(alias_key, format);
                 }
             }
         }
@@ -92,14 +118,18 @@ impl FormatRegistry {
         if let (Some(builtin_type), Some(tool_name)) =
             (&config.builtin_type, &config.builtin_tool_name)
         {
-            let has_explicit_format = config
-                .tools
-                .as_ref()
-                .and_then(|tools| tools.get(tool_name))
-                .is_some_and(|cfg| cfg.response_format.is_some());
+            let stanza = config.tools.as_ref().and_then(|tools| tools.get(tool_name));
+            let has_explicit_format = stanza.is_some_and(|cfg| cfg.response_format.is_some());
             if !has_explicit_format {
                 let format: ResponseFormat = builtin_type.response_format().into();
                 self.insert(QualifiedToolName::new(&config.name, tool_name), format);
+                // `collect_visible_mcp_tools` exposes the alias entry instead
+                // of the direct entry, so the production lookup queries
+                // `("alias", alias)`. Without this mirror an alias-only stanza
+                // on a builtin tool silently degrades to `Passthrough`.
+                if let Some(alias) = stanza.and_then(|cfg| cfg.alias.as_deref()) {
+                    self.insert(QualifiedToolName::new(ALIAS_SERVER_KEY, alias), format);
+                }
             }
         }
     }
@@ -142,9 +172,11 @@ mod tests {
     }
 
     #[test]
-    fn alias_format_stored_under_alias_server_key() {
-        // Mirrors orchestrator::register_alias which uses
-        // QualifiedToolName::new("alias", alias_name).
+    fn alias_format_mirrored_on_both_keys() {
+        // The session lookup path goes through `("alias", alias_name)` because
+        // `collect_visible_mcp_tools` replaces the direct entry with its alias
+        // entry. Direct dispatch (no alias rewrite) still queries
+        // `(server_key, tool_name)`, so both keys must carry the format.
         let mut tools = HashMap::new();
         tools.insert(
             "brave_web_search".to_string(),
@@ -167,8 +199,9 @@ mod tests {
         );
         assert_eq!(
             r.lookup_by_names("brave", "brave_web_search"),
-            ResponseFormat::Passthrough,
-            "underlying tool entry must NOT receive the format when an alias exists"
+            ResponseFormat::WebSearchCall,
+            "direct (server_key, tool_name) entry must also carry the format \
+             so direct dispatch resolves the same shape as alias dispatch"
         );
     }
 
@@ -239,12 +272,15 @@ mod tests {
     }
 
     #[test]
-    fn alias_only_stanza_preserves_builtin_default() {
+    fn alias_only_stanza_preserves_builtin_default_on_both_keys() {
         // Regression: a per-tool stanza that only aliases a builtin tool
         // (or only sets arg_mapping) used to suppress the builtin default,
-        // collapsing the hosted format to plain mcp_call. With
-        // `response_format: None` meaning "inherit context", the builtin
-        // default must still apply.
+        // collapsing the hosted format to plain mcp_call. The crucial
+        // production path is the alias key — `collect_visible_mcp_tools`
+        // exposes the alias entry, so `lookup_tool_format(session, …,
+        // "web_search")` resolves to `("alias", "web_search")`. If the
+        // builtin default only landed at the direct key, dispatch silently
+        // degrades to `Passthrough`.
         let mut tools = HashMap::new();
         tools.insert(
             "do_search".to_string(),
@@ -262,10 +298,74 @@ mod tests {
         let r = FormatRegistry::new();
         r.populate_from_server_config(&cfg);
 
+        // Alias key — what production session lookup actually hits.
+        assert_eq!(
+            r.lookup_by_names("alias", "web_search"),
+            ResponseFormat::WebSearchCall,
+            "alias-only stanza on a builtin must mirror the hosted format on \
+             the alias key — that is the key production resolves through"
+        );
+        // Direct key — what direct dispatch hits.
         assert_eq!(
             r.lookup_by_names("search", "do_search"),
             ResponseFormat::WebSearchCall,
-            "alias-only stanza must not disable the builtin's hosted format"
+            "direct (server, tool) lookup must still resolve the hosted format"
+        );
+    }
+
+    #[test]
+    fn explicit_passthrough_downgrade_clears_prior_hosted_entry() {
+        // `populate_from_server_config` is called every time a server
+        // (re)registers, so a config that flips a tool from a hosted format
+        // back to `Passthrough` must clear the stale entry — otherwise the
+        // registry keeps transforming outputs as the old hosted type.
+        let r = FormatRegistry::new();
+
+        let mut hosted = HashMap::new();
+        hosted.insert(
+            "brave_web_search".to_string(),
+            ToolConfig {
+                alias: Some("web_search".to_string()),
+                response_format: Some(ResponseFormatConfig::WebSearchCall),
+                arg_mapping: None,
+            },
+        );
+        let mut hosted_cfg = server("brave");
+        hosted_cfg.tools = Some(hosted);
+        r.populate_from_server_config(&hosted_cfg);
+        assert_eq!(
+            r.lookup_by_names("alias", "web_search"),
+            ResponseFormat::WebSearchCall,
+            "precondition: alias key carries the hosted format"
+        );
+        assert_eq!(
+            r.lookup_by_names("brave", "brave_web_search"),
+            ResponseFormat::WebSearchCall,
+            "precondition: direct key carries the hosted format"
+        );
+
+        let mut downgraded = HashMap::new();
+        downgraded.insert(
+            "brave_web_search".to_string(),
+            ToolConfig {
+                alias: Some("web_search".to_string()),
+                response_format: Some(ResponseFormatConfig::Passthrough),
+                arg_mapping: None,
+            },
+        );
+        let mut downgraded_cfg = server("brave");
+        downgraded_cfg.tools = Some(downgraded);
+        r.populate_from_server_config(&downgraded_cfg);
+
+        assert_eq!(
+            r.lookup_by_names("alias", "web_search"),
+            ResponseFormat::Passthrough,
+            "explicit Passthrough must clear the previous alias entry"
+        );
+        assert_eq!(
+            r.lookup_by_names("brave", "brave_web_search"),
+            ResponseFormat::Passthrough,
+            "explicit Passthrough must clear the previous direct entry"
         );
     }
 }

From d078b01c5febad1f0bccfdf531657e790b884941 Mon Sep 17 00:00:00 2001
From: Simo Lin <25425177+slin1237@users.noreply.github.com>
Date: Mon, 4 May 2026 10:21:30 -0700
Subject: [PATCH 02/15] fix(openai-bridge): preserve tool-execution failure
 state in transform_tool_output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously `transform_tool_output` always delegated to the success-path
builders (`to_mcp_call`, `to_web_search_call`, etc.), which stamp
`status = "completed"` and `error = None`. A failed tool execution
therefore got persisted as a success and replayed as such on the next
turn — and on the streaming path emitted on the wire as a successful
item too.

Bypass the success builders when `output.is_error` is set and emit a
typed failure item instead:

- `mcp_call`: `status = "failed"`, `error = Some(error_message)`,
  preserving the call's tool name / arguments / server label so the
  client can correlate.
- Hosted-builtin variants (`web_search_call` / `code_interpreter_call`
  / `file_search_call` / `image_generation_call`): `status = Failed`
  only — those four families have no `error` field on the wire per the
  Responses API spec.

`smg_mcp::ToolExecutionOutput` is `#[non_exhaustive]`, which blocks
fixture construction in the router test crate. Add a
`pub fn new_for_test(...)` constructor on the type so external test
crates can fabricate one without spinning up a real MCP server. The
helper is annotated with `#[expect(clippy::too_many_arguments)]` and
documented as test-only — adding a field to the struct doesn't break
it, only adding a *required* field would, which is a deliberate signal.

Adds two regression tests: one for the `mcp_call` failure shape and
one parameterized over all four hosted-builtin variants asserting
`status = "failed"` with no `error` field on the wire.

Comment addressed (PR #1429): coderabbitai 3175392558.

Signed-off-by: Simo Lin <25425177+slin1237@users.noreply.github.com>
---
 crates/mcp/src/core/orchestrator.rs           |  42 +++++-
 .../common/openai_bridge/transformer.rs       | 137 ++++++++++++++++++
 2 files changed, 178 insertions(+), 1 deletion(-)

diff --git a/crates/mcp/src/core/orchestrator.rs b/crates/mcp/src/core/orchestrator.rs
index e0197cdcc..43fa03c64 100644
--- a/crates/mcp/src/core/orchestrator.rs
+++ b/crates/mcp/src/core/orchestrator.rs
@@ -158,7 +158,8 @@ pub struct ToolExecutionInput {
 /// Output from batch tool execution.
 ///
 /// `#[non_exhaustive]` so additive fields don't break consumers; only
-/// `smg-mcp` constructs this.
+/// `smg-mcp` constructs this in production. External crates that need to
+/// fabricate one for tests should use [`ToolExecutionOutput::new_for_test`].
 #[derive(Debug, Clone)]
 #[non_exhaustive]
 pub struct ToolExecutionOutput {
@@ -184,6 +185,45 @@ pub struct ToolExecutionOutput {
     pub duration: Duration,
 }
 
+impl ToolExecutionOutput {
+    /// Construct an instance from explicit field values.
+    ///
+    /// `#[non_exhaustive]` blocks struct-literal syntax in downstream crates,
+    /// but those crates still need a way to fabricate fixtures (e.g. router
+    /// transformer tests that need a `is_error: true` output without spinning
+    /// up an MCP server). Adding a field to the struct doesn't break this
+    /// constructor — only adding a *required* field would, which is a
+    /// deliberate signal that all callers need to think about the new field.
+    #[expect(
+        clippy::too_many_arguments,
+        reason = "Test fixture constructor; one positional arg per public field is intentional \
+                  so adding a field forces every test to consider it."
+    )]
+    pub fn new_for_test(
+        call_id: impl Into<String>,
+        tool_name: impl Into<String>,
+        server_key: impl Into<String>,
+        server_label: impl Into<String>,
+        arguments_str: impl Into<String>,
+        output: Value,
+        is_error: bool,
+        error_message: Option<String>,
+        duration: Duration,
+    ) -> Self {
+        Self {
+            call_id: call_id.into(),
+            tool_name: tool_name.into(),
+            server_key: server_key.into(),
+            server_label: server_label.into(),
+            arguments_str: arguments_str.into(),
+            output,
+            is_error,
+            error_message,
+            duration,
+        }
+    }
+}
+
 /// Result from resolved tool execution that preserves interactive approval state.
 #[derive(Debug, Clone)]
 pub enum ToolExecutionResult {
diff --git a/model_gateway/src/routers/common/openai_bridge/transformer.rs b/model_gateway/src/routers/common/openai_bridge/transformer.rs
index 3ec3fe745..4590ca7ab 100644
--- a/model_gateway/src/routers/common/openai_bridge/transformer.rs
+++ b/model_gateway/src/routers/common/openai_bridge/transformer.rs
@@ -18,10 +18,21 @@ use super::ResponseFormat;
 /// lookup against `(output.server_key, output.tool_name)` would miss for
 /// disambiguated names like `mcp_<server>_<tool>` and silently degrade to
 /// `Passthrough`.
+///
+/// Failure handling: when `output.is_error` is set, the success-path builders
+/// (which always stamp `status = "completed"` and `error = None`) would
+/// persist a failed call as a success and replay it as such on the next turn.
+/// We bypass them and emit a typed failure item — `status = "failed"` on every
+/// hosted-builtin variant; only the `mcp_call` variant carries an additional
+/// `error` field, matching the OpenAI Responses spec where the four
+/// hosted-builtin families convey failure via `status` alone.
 pub fn transform_tool_output(
     output: &smg_mcp::ToolExecutionOutput,
     response_format: ResponseFormat,
 ) -> ResponseOutputItem {
+    if output.is_error {
+        return failed_output_item(output, response_format);
+    }
     ResponseTransformer::transform(
         &output.output,
         response_format,
@@ -32,6 +43,67 @@ pub fn transform_tool_output(
     )
 }
 
+/// Build a typed failure-state output item for a `ToolExecutionOutput` whose
+/// `is_error` is set. Centralizes the per-format failure shape so callers
+/// don't need to reach into the typed `ResponseOutputItem` enum.
+fn failed_output_item(
+    output: &smg_mcp::ToolExecutionOutput,
+    response_format: ResponseFormat,
+) -> ResponseOutputItem {
+    let err_msg = output
+        .error_message
+        .clone()
+        .unwrap_or_else(|| "Tool execution failed".to_string());
+    match response_format {
+        ResponseFormat::Passthrough => ResponseOutputItem::McpCall {
+            id: mcp_response_item_id(&output.call_id),
+            status: "failed".to_string(),
+            approval_request_id: None,
+            arguments: output.arguments_str.clone(),
+            error: Some(err_msg),
+            name: output.tool_name.clone(),
+            output: String::new(),
+            server_label: output.server_label.clone(),
+        },
+        ResponseFormat::WebSearchCall => ResponseOutputItem::WebSearchCall {
+            id: format!("ws_{}", output.call_id),
+            status: WebSearchCallStatus::Failed,
+            // No query is recoverable from a failed dispatch; emit the
+            // empty `Search` action variant so the wire shape stays valid.
+            action: WebSearchAction::Search {
+                query: None,
+                queries: Vec::new(),
+                sources: Vec::new(),
+            },
+            results: None,
+        },
+        ResponseFormat::CodeInterpreterCall => ResponseOutputItem::CodeInterpreterCall {
+            id: format!("ci_{}", output.call_id),
+            status: CodeInterpreterCallStatus::Failed,
+            container_id: String::new(),
+            code: None,
+            outputs: None,
+        },
+        ResponseFormat::FileSearchCall => ResponseOutputItem::FileSearchCall {
+            id: format!("fs_{}", output.call_id),
+            status: FileSearchCallStatus::Failed,
+            queries: Vec::new(),
+            results: None,
+        },
+        ResponseFormat::ImageGenerationCall => ResponseOutputItem::ImageGenerationCall {
+            id: format!("ig_{}", output.call_id),
+            action: None,
+            background: None,
+            output_format: None,
+            quality: None,
+            result: String::new(),
+            revised_prompt: None,
+            size: None,
+            status: ImageGenerationCallStatus::Failed,
+        },
+    }
+}
+
 /// Normalize an MCP response item id source into an external `mcp_call.id`.
 ///
 /// The input may be an upstream output item id (`fc_*`), an internal call id
@@ -1547,6 +1619,71 @@ mod tests {
         }
     }
 
+    fn failed_tool_output(tool_name: &str) -> smg_mcp::ToolExecutionOutput {
+        smg_mcp::ToolExecutionOutput::new_for_test(
+            "call_xyz",
+            tool_name,
+            "srv",
+            "srv-label",
+            "{\"q\":\"v\"}",
+            serde_json::json!({}),
+            /* is_error */ true,
+            Some("upstream broke".to_string()),
+            std::time::Duration::default(),
+        )
+    }
+
+    #[test]
+    fn transform_tool_output_emits_mcp_call_failed_with_error_message() {
+        let item = transform_tool_output(
+            &failed_tool_output("brave_web_search"),
+            ResponseFormat::Passthrough,
+        );
+        match item {
+            ResponseOutputItem::McpCall {
+                status,
+                error,
+                output,
+                arguments,
+                name,
+                ..
+            } => {
+                assert_eq!(status, "failed");
+                assert_eq!(error.as_deref(), Some("upstream broke"));
+                assert_eq!(output, "");
+                assert_eq!(arguments, "{\"q\":\"v\"}");
+                assert_eq!(name, "brave_web_search");
+            }
+            _ => panic!("Expected McpCall"),
+        }
+    }
+
+    #[test]
+    fn transform_tool_output_emits_hosted_failed_status_only() {
+        // Hosted-builtin variants must convey failure via `status: "failed"`
+        // alone; they have no `error` field on the wire. Using
+        // `transform_tool_output` against the success-path builders would
+        // stamp `status: "completed"` on a failed call — that is the bug.
+        for fmt in [
+            ResponseFormat::WebSearchCall,
+            ResponseFormat::CodeInterpreterCall,
+            ResponseFormat::FileSearchCall,
+            ResponseFormat::ImageGenerationCall,
+        ] {
+            let item = transform_tool_output(&failed_tool_output("search"), fmt);
+            let serialized = serde_json::to_value(&item).expect("serialize failed item");
+            assert_eq!(
+                serialized["status"],
+                serde_json::json!("failed"),
+                "{fmt:?} must serialize status=failed on error"
+            );
+            assert!(
+                serialized.get("error").is_none(),
+                "{fmt:?} must not carry an `error` field (hosted variants don't have one): {serialized}"
+            );
+        }
+    }
+
     #[test]
     fn test_compact_image_generation_outputs_json_strips_base64() {
         let mut outputs = vec![

From 3aed9007df57da9f231b5030fd9d3ba151dbaede Mon Sep 17 00:00:00 2001
From: Simo Lin <25425177+slin1237@users.noreply.github.com>
Date: Mon, 4 May 2026 10:21:41 -0700
Subject: [PATCH 03/15] fix(openai-bridge): narrow mcp_list_tools annotations
 back to {read_only}
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR #1429 inadvertently reverted the wire-spec narrowing introduced by
1c82bca1 (\"refactor(grpc): unify Responses surfaces\"). That earlier
commit explicitly cut down `mcp_list_tools[].tools[].annotations` to
`{\"read_only\": …}` to match OpenAI's Responses API surface, after
which the bridge move re-serialized the full SMG `ToolAnnotations`
struct (which carries `destructive`/`idempotent`/`open_world` for
internal approval-pipeline use).

Restore the narrowed shape and add a regression test that asserts the
serialized field is exactly `{"read_only": <bool>}`. The richer
annotations stay readable internally via `ToolEntry::annotations`; only
the wire form changes.

Comments addressed (PR #1429): coderabbitai 3174799428 / 3174799434.

Signed-off-by: Simo Lin <25425177+slin1237@users.noreply.github.com>
---
 .../common/openai_bridge/tool_descriptors.rs  | 61 +++++++++++++++++--
 1 file changed, 56 insertions(+), 5 deletions(-)

diff --git a/model_gateway/src/routers/common/openai_bridge/tool_descriptors.rs b/model_gateway/src/routers/common/openai_bridge/tool_descriptors.rs
index 7d513bb87..cf486f077 100644
--- a/model_gateway/src/routers/common/openai_bridge/tool_descriptors.rs
+++ b/model_gateway/src/routers/common/openai_bridge/tool_descriptors.rs
@@ -85,6 +85,14 @@ pub fn response_tools(session: &McpToolSession<'_>) -> Vec<ResponseTool> {
 }
 
 /// `McpToolInfo` records used inside `mcp_list_tools` output items.
+///
+/// Wire-shape note: OpenAI's Responses API exposes only `{"read_only": …}`
+/// inside `mcp_list_tools[].tools[].annotations`. The richer SMG
+/// `ToolAnnotations` (which also carries `destructive`/`idempotent`/
+/// `open_world`) is intentionally narrowed here to match that shape — those
+/// extra hints are read directly off `ToolEntry::annotations` by the approval
+/// pipeline and would only confuse downstream OpenAI-compatible clients if
+/// emitted on the wire.
 pub fn build_mcp_tool_infos(entries: &[smg_mcp::ToolEntry]) -> Vec<McpToolInfo> {
     entries
         .iter()
@@ -92,11 +100,9 @@ pub fn build_mcp_tool_infos(entries: &[smg_mcp::ToolEntry]) -> Vec<McpToolInfo>
             name: entry.tool_name().to_string(),
             description: entry.tool.description.as_ref().map(|d| d.to_string()),
             input_schema: schema_to_value(&entry.tool.input_schema),
-            annotations: entry
-                .tool
-                .annotations
-                .as_ref()
-                .and_then(|a| serde_json::to_value(a).ok()),
+            annotations: Some(json!({
+                "read_only": entry.annotations.read_only,
+            })),
         })
         .collect()
 }
@@ -311,3 +317,48 @@ fn is_client_visible_output_item(
         | ResponseOutputItem::LocalShellCallOutput { .. } => true,
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use std::{borrow::Cow, sync::Arc};
+
+    use rmcp::model::Tool;
+    use smg_mcp::{ToolAnnotations, ToolEntry};
+
+    use super::*;
+
+    fn entry_with_annotations(annotations: ToolAnnotations) -> ToolEntry {
+        let tool = Tool {
+            name: Cow::Owned("widget".to_string()),
+            title: None,
+            description: Some(Cow::Owned("widget description".to_string())),
+            input_schema: Arc::new(serde_json::Map::new()),
+            output_schema: None,
+            annotations: None,
+            icons: None,
+        };
+        ToolEntry::from_server_tool("srv", tool).with_annotations(annotations)
+    }
+
+    #[test]
+    fn build_mcp_tool_infos_emits_only_read_only_annotation() {
+        // Wire-spec parity: OpenAI's `mcp_list_tools[].tools[].annotations`
+        // exposes `{"read_only": …}` only. The richer SMG `ToolAnnotations`
+        // carries `destructive`/`idempotent`/`open_world` that the approval
+        // pipeline reads internally; surfacing them on the wire would change
+        // the documented shape.
+        let annotations = ToolAnnotations::default()
+            .with_read_only(true)
+            .with_destructive(true);
+        let entries = vec![entry_with_annotations(annotations)];
+
+        let infos = build_mcp_tool_infos(&entries);
+        assert_eq!(infos.len(), 1);
+        let serialized = serde_json::to_value(&infos[0])
+            .expect("McpToolInfo must serialize")
+            .get("annotations")
+            .cloned()
+            .expect("annotations must serialize");
+        assert_eq!(serialized, json!({ "read_only": true }));
+    }
+}

From 5c28cac9c1941f59b1a2c3d56c59adc841f0ad7d Mon Sep 17 00:00:00 2001
From: Simo Lin <25425177+slin1237@users.noreply.github.com>
Date: Mon, 4 May 2026 10:21:55 -0700
Subject: [PATCH 04/15] fix(persistence): preserve replayed structural input
 items verbatim
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`item_to_new_conversation_item` only treated `function_call` /
`function_call_output` as whole-item on the input side. Other
structural input items (replayed `image_generation_call`,
`web_search_call`, `mcp_call`, `mcp_list_tools`, etc. on a multi-turn
turn carrying prior output back in as input) fell through to
`item_value.get("content").cloned().unwrap_or(json!([]))`. Those types
have no `content` field — the wrapper became `[]`, and the next
history load lost the original item.

Unify the rule to `item_type != "message"` for both sides. Non-message
items have no `content` field on the wire either way, so the input
side now matches the output side's existing whole-item store. Adds two
regression tests: one for the structural input item case and one
asserting message items still extract their `content` array
(message-side behavior is unchanged).

Comment addressed (PR #1429): coderabbitai 3175392562.

Signed-off-by: Simo Lin <25425177+slin1237@users.noreply.github.com>
---
 .../src/routers/common/persistence_utils.rs   | 78 +++++++++++++++++--
 1 file changed, 71 insertions(+), 7 deletions(-)

diff --git a/model_gateway/src/routers/common/persistence_utils.rs b/model_gateway/src/routers/common/persistence_utils.rs
index 98010bd8e..8a1ac3b27 100644
--- a/model_gateway/src/routers/common/persistence_utils.rs
+++ b/model_gateway/src/routers/common/persistence_utils.rs
@@ -320,19 +320,23 @@ fn extract_input_items(input: &ResponseInput) -> Result<Vec<Value>, String> {
 fn item_to_new_conversation_item(
     item_value: &Value,
     response_id: Option<String>,
-    is_input: bool,
+    _is_input: bool,
 ) -> NewConversationItem {
     let item_type = item_value
         .get("type")
         .and_then(|v| v.as_str())
         .unwrap_or("message");
 
-    // Determine if we should store the whole item or just the content field
-    let store_whole_item = if is_input {
-        item_type == "function_call" || item_type == "function_call_output"
-    } else {
-        item_type != "message"
-    };
+    // Store the whole item for every non-message type, regardless of input vs
+    // output side. The previous input-side branch only treated
+    // `function_call` / `function_call_output` as whole-item, so a replayed
+    // structural item (e.g. `image_generation_call`, `web_search_call`,
+    // `mcp_call` carried back into a turn's input) fell through to the
+    // `content` extractor — but those items have no `content` field, so
+    // persistence would store an empty array and the next history load would
+    // lose the original item. Non-message items have no `content` field on
+    // the wire either way, so unifying the rule is safe.
+    let store_whole_item = item_type != "message";
 
     let content = if store_whole_item {
         item_value.clone()
@@ -515,3 +519,63 @@ async fn persist_conversation_items_inner(
 
     Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn replayed_image_generation_input_item_is_stored_whole() {
+        // Regression: structural input items (replayed `image_generation_call`
+        // and friends) carry their fields at the top level — not under
+        // `content`. The previous input-side branch only treated
+        // `function_call`/`function_call_output` as whole-item, so any other
+        // structural type fell through to `item_value.get("content")` which
+        // collapsed to `[]`, dropping the whole item from the next history
+        // load.
+        let input_item = json!({
+            "id": "ig_replay_1",
+            "type": "image_generation_call",
+            "status": "completed",
+            "revised_prompt": "cat",
+        });
+
+        let new_item = item_to_new_conversation_item(&input_item, None, /* is_input */ true);
+        assert_eq!(new_item.item_type, "image_generation_call");
+        // Whole-item content preserves all top-level fields, not just `content`.
+        assert_eq!(
+            new_item
+                .content
+                .get("revised_prompt")
+                .and_then(|v| v.as_str()),
+            Some("cat"),
+            "structural input items must round-trip their top-level fields, \
+             not collapse to `content` (was bug for replayed hosted-tool items)"
+        );
+        assert_eq!(
+            new_item.content.get("type").and_then(|v| v.as_str()),
+            Some("image_generation_call"),
+        );
+    }
+
+    #[test]
+    fn replayed_message_input_item_still_extracts_content() {
+        // The unification only changes non-message handling; message items
+        // continue to extract their `content` array (and may wrap with `phase`
+        // when present, exercised separately).
+        let input_item = json!({
+            "id": "msg_1",
+            "type": "message",
+            "role": "user",
+            "content": [{"type": "input_text", "text": "hi"}],
+            "status": "completed",
+        });
+
+        let new_item = item_to_new_conversation_item(&input_item, None, /* is_input */ true);
+        assert_eq!(new_item.item_type, "message");
+        assert_eq!(
+            new_item.content,
+            json!([{"type": "input_text", "text": "hi"}])
+        );
+    }
+}

From bb313490f7e63b375a099861ce378382e0622ae0 Mon Sep 17 00:00:00 2001
From: Simo Lin <25425177+slin1237@users.noreply.github.com>
Date: Mon, 4 May 2026 10:22:08 -0700
Subject: [PATCH 05/15] fix(openai-router): scope format-registry fail-fast to
 MCP-laden requests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous fail-fast on `mcp_format_registry()` ran before the router
knew whether the request would even enter the MCP interception path.
Plain streaming requests that should pass through
`handle_simple_streaming_passthrough()` (and the non-streaming
equivalent) now returned 500 in deployments where the gateway runs
without MCP wiring.

Hoist the registry resolution into the same arm that decides whether
the request needs MCP routing. The arm:

- Detects MCP-laden requests via a new
  `request_uses_mcp_routing(tools)` predicate (any of `Mcp`,
  `WebSearchPreview`, `CodeInterpreter`, `ImageGeneration`).
- Hard-fails with 500 when an MCP-laden request hits a router with no
  registry component — the original safety contract is preserved
  exactly when it matters.
- Returns `(mcp_servers, registry)` as a tuple so the cloned registry
  follows `mcp_servers` into the interception path without a second
  Option lookup downstream.

Plain (non-MCP) requests no longer touch the registry at all and
succeed regardless of whether it's wired up.

Comment addressed (PR #1429): coderabbitai 3175392563.

Signed-off-by: Simo Lin <25425177+slin1237@users.noreply.github.com>
---
 model_gateway/src/routers/common/mcp_utils.rs | 16 ++++++++
 .../routers/openai/responses/non_streaming.rs | 37 +++++++++++-------
 .../src/routers/openai/responses/streaming.rs | 38 +++++++++++--------
 3 files changed, 62 insertions(+), 29 deletions(-)

diff --git a/model_gateway/src/routers/common/mcp_utils.rs b/model_gateway/src/routers/common/mcp_utils.rs
index d9ba42ced..e431aab24 100644
--- a/model_gateway/src/routers/common/mcp_utils.rs
+++ b/model_gateway/src/routers/common/mcp_utils.rs
@@ -247,6 +247,22 @@ pub fn extract_builtin_types(tools: &[ResponseTool]) -> Vec<BuiltinToolType> {
         .collect()
 }
 
+/// True if `tools` carries any MCP-routed entry (declared MCP server or a
+/// builtin family that the gateway intercepts via MCP). Used by the OpenAI
+/// router to decide whether the format-registry component is required for
+/// the request — plain-tool requests don't need it and must not 500.
+pub fn request_uses_mcp_routing(tools: &[ResponseTool]) -> bool {
+    tools.iter().any(|t| {
+        matches!(
+            t,
+            ResponseTool::Mcp(_)
+                | ResponseTool::WebSearchPreview(_)
+                | ResponseTool::CodeInterpreter(_)
+                | ResponseTool::ImageGeneration(_)
+        )
+    })
+}
+
 /// Collect user-declared function tool names from a Responses request.
 pub(crate) fn collect_user_function_names(request: &ResponsesRequest) -> HashSet<String> {
     request
diff --git a/model_gateway/src/routers/openai/responses/non_streaming.rs b/model_gateway/src/routers/openai/responses/non_streaming.rs
index 44ae40224..12d96bc23 100644
--- a/model_gateway/src/routers/openai/responses/non_streaming.rs
+++ b/model_gateway/src/routers/openai/responses/non_streaming.rs
@@ -15,7 +15,7 @@ use super::utils::{patch_response_with_request_metadata, restore_original_tools}
 use crate::routers::{
     common::{
         header_utils::{extract_forwardable_request_headers, ApiProvider},
-        mcp_utils::ensure_request_mcp_client,
+        mcp_utils::{ensure_request_mcp_client, request_uses_mcp_routing},
         openai_bridge,
         persistence_utils::persist_conversation_items,
     },
@@ -65,24 +65,33 @@ pub async fn handle_non_streaming_response(mut ctx: RequestContext) -> Response
 
     // The format registry is the router-side source of truth for MCP
     // builtin/alias format resolution; falling back to a default would
-    // silently mis-route hosted tools instead of failing fast.
-    let mcp_format_registry = match ctx.components.mcp_format_registry() {
-        Some(r) => r.clone(),
-        None => {
-            return error::internal_error("internal_error", "MCP format registry required");
+    // silently mis-route hosted tools instead of failing fast. The check is
+    // scoped to MCP-laden requests — plain non-MCP requests must still
+    // succeed in deployments where the gateway runs without MCP wiring.
+    //
+    // A request with MCP tools but no registry component is a configuration
+    // error: route resolution would silently degrade to `Passthrough`, so we
+    // fail fast instead. Resolve `(mcp_servers, registry)` together so the
+    // typed result carries the registry into the MCP arm without a second
+    // option lookup.
+    let mcp_routing = match original_body.tools.as_deref() {
+        Some(tools) if request_uses_mcp_routing(tools) => {
+            let Some(registry) = ctx.components.mcp_format_registry() else {
+                return error::internal_error(
+                    "internal_error",
+                    "MCP format registry required for requests carrying MCP/builtin tools",
+                );
+            };
+            ensure_request_mcp_client(mcp_orchestrator, registry, tools)
+                .await
+                .map(|servers| (servers, registry.clone()))
         }
-    };
-
-    // Check for MCP tools and create session if needed
-    let mcp_servers = if let Some(tools) = original_body.tools.as_deref() {
-        ensure_request_mcp_client(mcp_orchestrator, &mcp_format_registry, tools).await
-    } else {
-        None
+        _ => None,
     };
 
     let mut response_json: Value;
 
-    if let Some(mcp_servers) = mcp_servers {
+    if let Some((mcp_servers, mcp_format_registry)) = mcp_routing {
         let session_request_id = original_body
             .request_id
             .clone()
diff --git a/model_gateway/src/routers/openai/responses/streaming.rs b/model_gateway/src/routers/openai/responses/streaming.rs
index 2b6a204a3..c2195466e 100644
--- a/model_gateway/src/routers/openai/responses/streaming.rs
+++ b/model_gateway/src/routers/openai/responses/streaming.rs
@@ -1065,7 +1065,7 @@ pub(super) fn handle_streaming_with_tool_interception(
 
 /// Main entry point for streaming responses
 pub async fn handle_streaming_response(ctx: RequestContext) -> Response {
-    use crate::routers::common::mcp_utils::ensure_request_mcp_client;
+    use crate::routers::common::mcp_utils::{ensure_request_mcp_client, request_uses_mcp_routing};
 
     let worker = match ctx.worker() {
         Some(w) => w.clone(),
@@ -1086,20 +1086,28 @@ pub async fn handle_streaming_response(ctx: RequestContext) -> Response {
             return error::internal_error("internal_error", "MCP orchestrator required");
         }
     };
-    // Same fail-fast contract as the non-streaming path: a missing format
-    // registry means MCP routing decisions would be silently wrong.
-    let mcp_format_registry = match ctx.components.mcp_format_registry() {
-        Some(r) => r.clone(),
-        None => {
-            return error::internal_error("internal_error", "MCP format registry required");
+    // Scope the format-registry fail-fast to MCP-laden requests. Plain
+    // streaming requests must still pass through deployments that run
+    // without MCP wiring; only requests that actually need MCP routing
+    // require the registry.
+    //
+    // Resolve `(mcp_servers, registry)` as a single tuple so the cloned
+    // registry follows mcp_servers into the interception path without a
+    // second option lookup downstream.
+    let mcp_routing = match original_body.tools.as_deref() {
+        Some(tools) if request_uses_mcp_routing(tools) => {
+            let Some(registry) = ctx.components.mcp_format_registry() else {
+                return error::internal_error(
+                    "internal_error",
+                    "MCP format registry required for requests carrying MCP/builtin tools",
+                );
+            };
+            let registry = registry.clone();
+            ensure_request_mcp_client(&mcp_orchestrator, &registry, tools)
+                .await
+                .map(|servers| (servers, registry))
         }
-    };
-
-    // Check for MCP tools and create request context if needed
-    let mcp_servers = if let Some(tools) = original_body.tools.as_deref() {
-        ensure_request_mcp_client(&mcp_orchestrator, &mcp_format_registry, tools).await
-    } else {
-        None
+        _ => None,
     };
 
     let client = ctx.components.client().clone();
@@ -1110,7 +1118,7 @@ pub async fn handle_streaming_response(ctx: RequestContext) -> Response {
         }
     };
 
-    let Some(mcp_servers) = mcp_servers else {
+    let Some((mcp_servers, mcp_format_registry)) = mcp_routing else {
         return handle_simple_streaming_passthrough(&client, &worker, headers.as_ref(), req).await;
     };
 

From cde08582cf42a44aada2bbe4ef86727536828172 Mon Sep 17 00:00:00 2001
From: Simo Lin <25425177+slin1237@users.noreply.github.com>
Date: Mon, 4 May 2026 10:22:26 -0700
Subject: [PATCH 06/15] fix(grpc-streaming): emit transformed item for
 output_item.done
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The gRPC regular streaming path hand-rolled an ad-hoc envelope
(`{id, type, name, status, arguments, output|error}`) for the
`output_item.done` SSE payload, then SEPARATELY built the typed item
via `transform_tool_output(...)` only for `state.record_call`. For
hosted-builtin formats this diverged from the persisted item:

- Hosted variants (`web_search_call`/`code_interpreter_call`/
  `file_search_call`/`image_generation_call`) have no `name` /
  `arguments` / `output` fields — the ad-hoc envelope shipped fields
  the wire shape doesn't recognize.
- Failures leaked an `error` field even though only `mcp_call` carries
  one in the Responses API spec; the four hosted families convey
  failure via `status: "failed"` alone.
- `mcp_call.failed` was emitted for every format, including the
  hosted-builtin families that don't have a `*.failed` event in the
  event taxonomy (only `response.mcp_call.failed` exists).

Build the typed item once via `transform_tool_output` (which now
preserves failure state — see preceding commit), serialize it, override
the `id` so `output_item.done` matches the streaming-allocated id used
by the earlier `output_item.added`, and reuse the same item for both
the SSE payload and `state.record_call`. Restrict
`emit_mcp_call_failed` to `Passthrough`; hosted-builtin failure now
emits the format-specific `*.completed` event followed by
`output_item.done` carrying the failed typed item.

Comment addressed (PR #1429): coderabbitai 3174987653.

Signed-off-by: Simo Lin <25425177+slin1237@users.noreply.github.com>
---
 .../grpc/regular/responses/streaming.rs       | 108 ++++++++++--------
 1 file changed, 60 insertions(+), 48 deletions(-)

diff --git a/model_gateway/src/routers/grpc/regular/responses/streaming.rs b/model_gateway/src/routers/grpc/regular/responses/streaming.rs
index ad00ae4d6..237096b86 100644
--- a/model_gateway/src/routers/grpc/regular/responses/streaming.rs
+++ b/model_gateway/src/routers/grpc/regular/responses/streaming.rs
@@ -739,31 +739,46 @@ async fn execute_tool_loop_streaming_internal(
                 let success = !tool_output.is_error;
                 let output_str = tool_output.output.to_string();
 
-                if success {
-                    // Emit tool_call.completed
-                    let event =
-                        emitter.emit_tool_call_completed(output_index, &item_id, response_format);
-                    emitter.send_event(&event, &tx)?;
-
-                    // Build complete item with output
-                    let mut item_done = json!({
+                // Build the typed output item once and reuse it for both the
+                // streamed `output_item.done` payload and the persisted state
+                // record. The previous code hand-rolled an ad-hoc envelope
+                // (`{name, status, arguments, output|error}`) for the wire
+                // payload — a shape that does not match the hosted-builtin
+                // variants (`web_search_call`/`code_interpreter_call`/
+                // `file_search_call`/`image_generation_call` have no `name`/
+                // `arguments`/`output` fields, and only `mcp_call` carries an
+                // `error` field) — so the streamed final item could diverge
+                // from the persisted one and leak invalid fields on failure.
+                let output_item =
+                    openai_bridge::transform_tool_output(&tool_output, response_format);
+                let mut item_done = serde_json::to_value(&output_item).unwrap_or_else(|e| {
+                    warn!(
+                        tool = %tool_output.tool_name,
+                        error = %e,
+                        "Failed to serialize transformed output item; falling back to a minimal stub",
+                    );
+                    json!({
                         "id": item_id,
                         "type": item_type,
-                        "name": tool_output.tool_name,
-                        "status": "completed",
-                        "arguments": tool_output.arguments_str,
-                        "output": output_str
-                    });
-                    attach_mcp_server_label(
-                        &mut item_done,
-                        Some(tool_output.server_label.as_str()),
-                        Some(&response_format),
-                    );
+                        "status": if success { "completed" } else { "failed" },
+                    })
+                });
+                // Preserve the streaming-allocated id so `output_item.done`
+                // matches the earlier `output_item.added`.
+                if let Some(obj) = item_done.as_object_mut() {
+                    obj.insert("id".to_string(), json!(&item_id));
+                }
+                attach_mcp_server_label(
+                    &mut item_done,
+                    Some(tool_output.server_label.as_str()),
+                    Some(&response_format),
+                );
 
-                    // Emit output_item.done
-                    let event = emitter.emit_output_item_done(output_index, &item_done);
+                if success {
+                    // Emit format-specific tool_call.completed event.
+                    let event =
+                        emitter.emit_tool_call_completed(output_index, &item_id, response_format);
                     emitter.send_event(&event, &tx)?;
-                    emitter.complete_output_item(output_index);
                 } else {
                     let err_text = tool_output
                         .error_message
@@ -771,31 +786,30 @@ async fn execute_tool_loop_streaming_internal(
                         .unwrap_or_else(|| output_str.clone());
                     warn!("Tool execution returned error: {}", err_text);
 
-                    // Emit mcp_call.failed (no web_search_call.failed event exists)
-                    let event = emitter.emit_mcp_call_failed(output_index, &item_id, &err_text);
-                    emitter.send_event(&event, &tx)?;
-
-                    // Build failed item
-                    let mut item_done = json!({
-                        "id": item_id,
-                        "type": item_type,
-                        "name": tool_output.tool_name,
-                        "status": "failed",
-                        "arguments": tool_output.arguments_str,
-                        "error": err_text
-                    });
-                    attach_mcp_server_label(
-                        &mut item_done,
-                        Some(tool_output.server_label.as_str()),
-                        Some(&response_format),
-                    );
-
-                    // Emit output_item.done
-                    let event = emitter.emit_output_item_done(output_index, &item_done);
-                    emitter.send_event(&event, &tx)?;
-                    emitter.complete_output_item(output_index);
+                    // `mcp_call.failed` is the only `*.failed` event in the
+                    // Responses API event taxonomy — hosted-builtin families
+                    // close out via `*.completed` followed by `output_item.done`
+                    // carrying `status: "failed"` (no `error` field on those
+                    // variants).
+                    if matches!(response_format, ResponseFormat::Passthrough) {
+                        let event = emitter.emit_mcp_call_failed(output_index, &item_id, &err_text);
+                        emitter.send_event(&event, &tx)?;
+                    } else {
+                        let event = emitter.emit_tool_call_completed(
+                            output_index,
+                            &item_id,
+                            response_format,
+                        );
+                        emitter.send_event(&event, &tx)?;
+                    }
                 }
 
+                // Emit output_item.done with the transformed item (correct
+                // shape for both mcp_call and hosted-builtin variants).
+                let event = emitter.emit_output_item_done(output_index, &item_done);
+                emitter.send_event(&event, &tx)?;
+                emitter.complete_output_item(output_index);
+
                 // Record MCP tool metrics
                 Metrics::record_mcp_tool_duration(
                     &current_request.model,
@@ -812,10 +826,8 @@ async fn execute_tool_loop_streaming_internal(
                     },
                 );
 
-                let output_item =
-                    openai_bridge::transform_tool_output(&tool_output, response_format);
-
-                // Record the call in state with transformed output item
+                // Record the call in state with the same transformed item we
+                // just emitted on the wire.
                 state.record_call(
                     tool_output.call_id,
                     tool_output.tool_name,

From b6bf495ac3fb5df508fe874c1b9a495e007e3498 Mon Sep 17 00:00:00 2001
From: Simo Lin <25425177+slin1237@users.noreply.github.com>
Date: Mon, 4 May 2026 10:22:36 -0700
Subject: [PATCH 07/15] test(mcp): route web_search regression assertions
 through transform_tool_output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The two `test_web_search_transform_*_with_mock` integration tests
hand-fed `ResponseTransformer::transform` with hardcoded
call_id/server_label/tool_name/arguments strings. A regression in
session-side rewriting (which lives on `ToolExecutionOutput.{call_id,
server_label, tool_name, arguments_str}`) would still leave these
assertions green.

Route the assertions through `openai_bridge::transform_tool_output`
instead, which consumes the actual `ToolExecutionOutput` returned by
`session.execute_tool(...)`. Same coverage of the WebSearchCall shape,
plus catches future drift in the session→bridge field plumbing.

Comment addressed (PR #1429): coderabbitai 3175392565.

Signed-off-by: Simo Lin <25425177+slin1237@users.noreply.github.com>
---
 model_gateway/tests/mcp_test.rs | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/model_gateway/tests/mcp_test.rs b/model_gateway/tests/mcp_test.rs
index 0aa84eb81..09a27dabc 100644
--- a/model_gateway/tests/mcp_test.rs
+++ b/model_gateway/tests/mcp_test.rs
@@ -14,7 +14,7 @@ use std::collections::HashMap;
 use common::mock_mcp_server::{MockMCPServer, MockSearchResponseMCPServer, MockSearchResponseMode};
 use openai_protocol::responses::{ResponseOutputItem, WebSearchAction};
 use serde_json::json;
-use smg::routers::common::openai_bridge::{ResponseFormat, ResponseTransformer};
+use smg::routers::common::openai_bridge::{transform_tool_output, ResponseFormat};
 use smg_mcp::{
     core::config::{ResponseFormatConfig, ToolConfig},
     McpConfig, McpOrchestrator, McpServerBinding, McpServerConfig, McpToolSession, McpTransport,
@@ -334,17 +334,13 @@ async fn test_web_search_transform_handles_openai_search_response_with_mock() {
 
     assert!(!output.is_error, "Tool execution should succeed");
 
-    // The session returns the raw `output` Value from the MCP call. Re-transform
-    // with WebSearchCall format to verify serialization (end-to-end source
-    // extraction is covered by the gateway bridge's own tests).
-    let transformed = ResponseTransformer::transform(
-        &output.output,
-        ResponseFormat::WebSearchCall,
-        "test-request-openai-search",
-        "openai_search_server",
-        "brave_web_search",
-        "{\"query\":\"rust openai search\"}",
-    );
+    // Route the actual `ToolExecutionOutput` through the production bridge
+    // helper so a regression in session-side rewrites (`call_id`,
+    // `server_label`, `tool_name`, `arguments_str` all live on `output`)
+    // would surface here too. Hardcoding those fields against
+    // `ResponseTransformer::transform` would let such a regression slip
+    // through with the test still green.
+    let transformed = transform_tool_output(&output, ResponseFormat::WebSearchCall);
     match transformed {
         ResponseOutputItem::WebSearchCall { action, .. } => match action {
             WebSearchAction::Search {
@@ -419,14 +415,10 @@ async fn test_web_search_transform_sets_action_query_for_brave_search_with_mock(
 
     assert!(!output.is_error, "Tool execution should succeed");
 
-    let transformed = ResponseTransformer::transform(
-        &output.output,
-        ResponseFormat::WebSearchCall,
-        "test-request-brave",
-        "brave_response_server",
-        "brave_web_search",
-        "{\"query\":\"rust brave query\"}",
-    );
+    // Same reasoning as the OpenAI variant above — feed the actual
+    // `ToolExecutionOutput` through the production bridge helper so any
+    // future regression in session-side field rewrites surfaces here.
+    let transformed = transform_tool_output(&output, ResponseFormat::WebSearchCall);
     match transformed {
         ResponseOutputItem::WebSearchCall { action, .. } => match action {
             WebSearchAction::Search {

From 9bbdc28104515896a24f1004cdc3c60b91324fdc Mon Sep 17 00:00:00 2001
From: Simo Lin <25425177+slin1237@users.noreply.github.com>
Date: Mon, 4 May 2026 11:01:59 -0700
Subject: [PATCH 08/15] style: trim narrative comments, keep only the
 load-bearing ones
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop narration that explained what the code does, plus PR/commit
references that will rot. Keep the cross-file invariants and wire-spec
rules that aren't expressed elsewhere — alias-key vs direct-key
mirroring, hosted-builtin failure shape, mcp_call.failed event scope,
non-message items having no `content` field on the wire, and
output_item.done id-override rationale.

Signed-off-by: Simo Lin <25425177+slin1237@users.noreply.github.com>
---
 crates/mcp/src/core/orchestrator.rs           | 18 ++---
 model_gateway/src/routers/common/mcp_utils.rs |  4 +-
 .../common/openai_bridge/format_registry.rs   | 71 ++-----------------
 .../common/openai_bridge/tool_descriptors.rs  | 16 ++---
 .../common/openai_bridge/transformer.rs       | 43 +++--------
 .../src/routers/common/persistence_utils.rs   | 26 ++-----
 .../grpc/regular/responses/streaming.rs       | 28 ++------
 .../routers/openai/responses/non_streaming.rs | 15 ++--
 .../src/routers/openai/responses/streaming.rs | 10 +--
 model_gateway/tests/mcp_test.rs               |  9 ---
 10 files changed, 44 insertions(+), 196 deletions(-)

diff --git a/crates/mcp/src/core/orchestrator.rs b/crates/mcp/src/core/orchestrator.rs
index 43fa03c64..545350f67 100644
--- a/crates/mcp/src/core/orchestrator.rs
+++ b/crates/mcp/src/core/orchestrator.rs
@@ -158,8 +158,8 @@ pub struct ToolExecutionInput {
 /// Output from batch tool execution.
 ///
 /// `#[non_exhaustive]` so additive fields don't break consumers; only
-/// `smg-mcp` constructs this in production. External crates that need to
-/// fabricate one for tests should use [`ToolExecutionOutput::new_for_test`].
+/// `smg-mcp` constructs this in production. External tests use
+/// [`ToolExecutionOutput::new_for_test`].
 #[derive(Debug, Clone)]
 #[non_exhaustive]
 pub struct ToolExecutionOutput {
@@ -186,18 +186,12 @@ pub struct ToolExecutionOutput {
 }
 
 impl ToolExecutionOutput {
-    /// Construct an instance from explicit field values.
-    ///
-    /// `#[non_exhaustive]` blocks struct-literal syntax in downstream crates,
-    /// but those crates still need a way to fabricate fixtures (e.g. router
-    /// transformer tests that need a `is_error: true` output without spinning
-    /// up an MCP server). Adding a field to the struct doesn't break this
-    /// constructor — only adding a *required* field would, which is a
-    /// deliberate signal that all callers need to think about the new field.
+    /// Test-only constructor for downstream crates blocked by
+    /// `#[non_exhaustive]`. One positional arg per public field is
+    /// intentional so adding a field forces every test to consider it.
     #[expect(
         clippy::too_many_arguments,
-        reason = "Test fixture constructor; one positional arg per public field is intentional \
-                  so adding a field forces every test to consider it."
+        reason = "Test fixture constructor; one arg per field is intentional."
     )]
     pub fn new_for_test(
         call_id: impl Into<String>,
diff --git a/model_gateway/src/routers/common/mcp_utils.rs b/model_gateway/src/routers/common/mcp_utils.rs
index e431aab24..f1fb46aae 100644
--- a/model_gateway/src/routers/common/mcp_utils.rs
+++ b/model_gateway/src/routers/common/mcp_utils.rs
@@ -248,9 +248,7 @@ pub fn extract_builtin_types(tools: &[ResponseTool]) -> Vec<BuiltinToolType> {
 }
 
 /// True if `tools` carries any MCP-routed entry (declared MCP server or a
-/// builtin family that the gateway intercepts via MCP). Used by the OpenAI
-/// router to decide whether the format-registry component is required for
-/// the request — plain-tool requests don't need it and must not 500.
+/// builtin family that the gateway intercepts via MCP).
 pub fn request_uses_mcp_routing(tools: &[ResponseTool]) -> bool {
     tools.iter().any(|t| {
         matches!(
diff --git a/model_gateway/src/routers/common/openai_bridge/format_registry.rs b/model_gateway/src/routers/common/openai_bridge/format_registry.rs
index ba8844dce..14c5652ac 100644
--- a/model_gateway/src/routers/common/openai_bridge/format_registry.rs
+++ b/model_gateway/src/routers/common/openai_bridge/format_registry.rs
@@ -56,27 +56,13 @@ impl FormatRegistry {
         self.formats.remove(qualified);
     }
 
-    /// Populate from a server config: per-tool overrides + builtin defaults.
+    /// Populate from a server config. Safe to call repeatedly.
     ///
-    /// Safe to call repeatedly. Each affected key is unconditionally rewritten
-    /// (or removed for an explicit `Some(Passthrough)` downgrade) so a later
-    /// config that demotes a tool from a hosted format back to `Passthrough`
-    /// does not leave the previous entry behind in the map.
-    ///
-    /// Mirrors `McpOrchestrator::apply_tool_configs` *and* the dispatch shape
-    /// of `McpToolSession`. Two production lookup paths must resolve to the
-    /// same format:
-    /// - via the alias key `("alias", alias_name)` — what the session exposes
-    ///   when `collect_visible_mcp_tools` replaces the direct entry with its
-    ///   alias entry (see `crates/mcp/src/core/session.rs:565-571`).
-    /// - via the underlying `(server_key, tool_name)` — what direct dispatch
-    ///   uses when no alias hides the tool.
-    ///
-    /// So when a tool has an alias the format is mirrored on **both** keys.
-    /// Aliased builtin tools get the same treatment, and only when the
-    /// per-tool stanza omits `response_format` entirely (`None`) is the
-    /// builtin default applied; an explicit `Some(Passthrough)` is the
-    /// documented escape hatch for opting out of the hosted shape.
+    /// `McpToolSession::collect_visible_mcp_tools` replaces a direct tool
+    /// entry with its alias entry, so production session lookup of an
+    /// aliased tool resolves through `("alias", alias_name)`. Direct
+    /// dispatch still uses `(server_key, tool_name)`. Both keys must carry
+    /// the same format, so non-Passthrough formats are mirrored on both.
     pub fn populate_from_server_config(&self, config: &McpServerConfig) {
         if let Some(tools) = &config.tools {
             for (tool_name, tool_config) in tools {
@@ -87,16 +73,10 @@ impl FormatRegistry {
                     .map(|alias| QualifiedToolName::new(ALIAS_SERVER_KEY, alias));
 
                 let Some(format_config) = tool_config.response_format else {
-                    // `response_format` omitted: defer to the builtin-default
-                    // pass below. Don't touch existing entries here so a
-                    // direct-dispatch override placed by an earlier loop
-                    // iteration survives.
                     continue;
                 };
                 let format: ResponseFormat = format_config.into();
                 if format == ResponseFormat::Passthrough {
-                    // Explicit downgrade — clear any prior hosted-format entry
-                    // on every key the production lookup might consult.
                     self.remove(&direct_key);
                     if let Some(alias_key) = &alias_key {
                         self.remove(alias_key);
@@ -104,10 +84,6 @@ impl FormatRegistry {
                     continue;
                 }
 
-                // Mirror the format on every key production might query.
-                // Also write the direct key so that direct dispatch (which
-                // does not go through the alias rewrite) still gets the
-                // hosted shape.
                 self.insert(direct_key, format);
                 if let Some(alias_key) = alias_key {
                     self.insert(alias_key, format);
@@ -123,10 +99,6 @@ impl FormatRegistry {
             if !has_explicit_format {
                 let format: ResponseFormat = builtin_type.response_format().into();
                 self.insert(QualifiedToolName::new(&config.name, tool_name), format);
-                // `collect_visible_mcp_tools` exposes the alias entry instead
-                // of the direct entry, so the production lookup queries
-                // `("alias", alias)`. Without this mirror an alias-only stanza
-                // on a builtin tool silently degrades to `Passthrough`.
                 if let Some(alias) = stanza.and_then(|cfg| cfg.alias.as_deref()) {
                     self.insert(QualifiedToolName::new(ALIAS_SERVER_KEY, alias), format);
                 }
@@ -173,10 +145,6 @@ mod tests {
 
     #[test]
     fn alias_format_mirrored_on_both_keys() {
-        // The session lookup path goes through `("alias", alias_name)` because
-        // `collect_visible_mcp_tools` replaces the direct entry with its alias
-        // entry. Direct dispatch (no alias rewrite) still queries
-        // `(server_key, tool_name)`, so both keys must carry the format.
         let mut tools = HashMap::new();
         tools.insert(
             "brave_web_search".to_string(),
@@ -195,13 +163,10 @@ mod tests {
         assert_eq!(
             r.lookup_by_names("alias", "web_search"),
             ResponseFormat::WebSearchCall,
-            "alias entry must use the literal `alias` server_key prefix"
         );
         assert_eq!(
             r.lookup_by_names("brave", "brave_web_search"),
             ResponseFormat::WebSearchCall,
-            "direct (server_key, tool_name) entry must also carry the format \
-             so direct dispatch resolves the same shape as alias dispatch"
         );
     }
 
@@ -250,7 +215,6 @@ mod tests {
             "do_search".to_string(),
             ToolConfig {
                 alias: None,
-                // Explicit override differs from the builtin default.
                 response_format: Some(ResponseFormatConfig::Passthrough),
                 arg_mapping: None,
             },
@@ -263,8 +227,6 @@ mod tests {
         let r = FormatRegistry::new();
         r.populate_from_server_config(&cfg);
 
-        // Explicit Some(Passthrough) override means "no entry inserted" AND
-        // the builtin default is NOT applied on top.
         assert_eq!(
             r.lookup_by_names("search", "do_search"),
             ResponseFormat::Passthrough
@@ -273,14 +235,6 @@ mod tests {
 
     #[test]
     fn alias_only_stanza_preserves_builtin_default_on_both_keys() {
-        // Regression: a per-tool stanza that only aliases a builtin tool
-        // (or only sets arg_mapping) used to suppress the builtin default,
-        // collapsing the hosted format to plain mcp_call. The crucial
-        // production path is the alias key — `collect_visible_mcp_tools`
-        // exposes the alias entry, so `lookup_tool_format(session, …,
-        // "web_search")` resolves to `("alias", "web_search")`. If the
-        // builtin default only landed at the direct key, dispatch silently
-        // degrades to `Passthrough`.
         let mut tools = HashMap::new();
         tools.insert(
             "do_search".to_string(),
@@ -298,27 +252,20 @@ mod tests {
         let r = FormatRegistry::new();
         r.populate_from_server_config(&cfg);
 
-        // Alias key — what production session lookup actually hits.
+        // Alias key — what production session lookup hits.
         assert_eq!(
             r.lookup_by_names("alias", "web_search"),
             ResponseFormat::WebSearchCall,
-            "alias-only stanza on a builtin must mirror the hosted format on \
-             the alias key — that is the key production resolves through"
         );
         // Direct key — what direct dispatch hits.
         assert_eq!(
             r.lookup_by_names("search", "do_search"),
             ResponseFormat::WebSearchCall,
-            "direct (server, tool) lookup must still resolve the hosted format"
         );
     }
 
     #[test]
     fn explicit_passthrough_downgrade_clears_prior_hosted_entry() {
-        // `populate_from_server_config` is called every time a server
-        // (re)registers, so a config that flips a tool from a hosted format
-        // back to `Passthrough` must clear the stale entry — otherwise the
-        // registry keeps transforming outputs as the old hosted type.
         let r = FormatRegistry::new();
 
         let mut hosted = HashMap::new();
@@ -336,12 +283,10 @@ mod tests {
         assert_eq!(
             r.lookup_by_names("alias", "web_search"),
             ResponseFormat::WebSearchCall,
-            "precondition: alias key carries the hosted format"
         );
         assert_eq!(
             r.lookup_by_names("brave", "brave_web_search"),
             ResponseFormat::WebSearchCall,
-            "precondition: direct key carries the hosted format"
         );
 
         let mut downgraded = HashMap::new();
@@ -360,12 +305,10 @@ mod tests {
         assert_eq!(
             r.lookup_by_names("alias", "web_search"),
             ResponseFormat::Passthrough,
-            "explicit Passthrough must clear the previous alias entry"
         );
         assert_eq!(
             r.lookup_by_names("brave", "brave_web_search"),
             ResponseFormat::Passthrough,
-            "explicit Passthrough must clear the previous direct entry"
         );
     }
 }
diff --git a/model_gateway/src/routers/common/openai_bridge/tool_descriptors.rs b/model_gateway/src/routers/common/openai_bridge/tool_descriptors.rs
index cf486f077..19f159b6c 100644
--- a/model_gateway/src/routers/common/openai_bridge/tool_descriptors.rs
+++ b/model_gateway/src/routers/common/openai_bridge/tool_descriptors.rs
@@ -86,13 +86,10 @@ pub fn response_tools(session: &McpToolSession<'_>) -> Vec<ResponseTool> {
 
 /// `McpToolInfo` records used inside `mcp_list_tools` output items.
 ///
-/// Wire-shape note: OpenAI's Responses API exposes only `{"read_only": …}`
-/// inside `mcp_list_tools[].tools[].annotations`. The richer SMG
-/// `ToolAnnotations` (which also carries `destructive`/`idempotent`/
-/// `open_world`) is intentionally narrowed here to match that shape — those
-/// extra hints are read directly off `ToolEntry::annotations` by the approval
-/// pipeline and would only confuse downstream OpenAI-compatible clients if
-/// emitted on the wire.
+/// `annotations` is narrowed to `{"read_only": …}` to match OpenAI's
+/// Responses API shape. The richer `ToolAnnotations` (with
+/// `destructive`/`idempotent`/`open_world`) is read internally off
+/// `ToolEntry::annotations` by the approval pipeline.
 pub fn build_mcp_tool_infos(entries: &[smg_mcp::ToolEntry]) -> Vec<McpToolInfo> {
     entries
         .iter()
@@ -342,11 +339,6 @@ mod tests {
 
     #[test]
     fn build_mcp_tool_infos_emits_only_read_only_annotation() {
-        // Wire-spec parity: OpenAI's `mcp_list_tools[].tools[].annotations`
-        // exposes `{"read_only": …}` only. The richer SMG `ToolAnnotations`
-        // carries `destructive`/`idempotent`/`open_world` that the approval
-        // pipeline reads internally; surfacing them on the wire would change
-        // the documented shape.
         let annotations = ToolAnnotations::default()
             .with_read_only(true)
             .with_destructive(true);
diff --git a/model_gateway/src/routers/common/openai_bridge/transformer.rs b/model_gateway/src/routers/common/openai_bridge/transformer.rs
index 4590ca7ab..542693c91 100644
--- a/model_gateway/src/routers/common/openai_bridge/transformer.rs
+++ b/model_gateway/src/routers/common/openai_bridge/transformer.rs
@@ -9,23 +9,18 @@ use tracing::warn;
 
 use super::ResponseFormat;
 
-/// Transform a `ToolExecutionOutput` to a `ResponseOutputItem` using a
+/// Transform a `ToolExecutionOutput` into a `ResponseOutputItem` using a
 /// pre-resolved `ResponseFormat`.
 ///
-/// The format MUST be resolved via the session's exposed-name map (e.g.
-/// [`super::lookup_tool_format`]). `output.tool_name` is the *invoked/exposed*
-/// name after `McpToolSession::execute_tool_result` rewrites it, so a registry
-/// lookup against `(output.server_key, output.tool_name)` would miss for
-/// disambiguated names like `mcp_<server>_<tool>` and silently degrade to
-/// `Passthrough`.
+/// `response_format` must be resolved via the session's exposed-name map
+/// (e.g. [`super::lookup_tool_format`]) — `output.tool_name` carries the
+/// *invoked/exposed* name after session-side rewriting, so a fresh
+/// registry lookup against `(output.server_key, output.tool_name)` would
+/// miss for disambiguated names like `mcp_<server>_<tool>`.
 ///
-/// Failure handling: when `output.is_error` is set, the success-path builders
-/// (which always stamp `status = "completed"` and `error = None`) would
-/// persist a failed call as a success and replay it as such on the next turn.
-/// We bypass them and emit a typed failure item — `status = "failed"` on every
-/// hosted-builtin variant; only the `mcp_call` variant carries an additional
-/// `error` field, matching the OpenAI Responses spec where the four
-/// hosted-builtin families convey failure via `status` alone.
+/// On `is_error`, the four hosted-builtin variants emit `status: "failed"`
+/// only (those wire shapes have no `error` field); `mcp_call` carries the
+/// `error_message` in its `error` field.
 pub fn transform_tool_output(
     output: &smg_mcp::ToolExecutionOutput,
     response_format: ResponseFormat,
@@ -43,9 +38,6 @@ pub fn transform_tool_output(
     )
 }
 
-/// Build a typed failure-state output item for a `ToolExecutionOutput` whose
-/// `is_error` is set. Centralizes the per-format failure shape so callers
-/// don't need to reach into the typed `ResponseOutputItem` enum.
 fn failed_output_item(
     output: &smg_mcp::ToolExecutionOutput,
     response_format: ResponseFormat,
@@ -68,8 +60,6 @@ fn failed_output_item(
         ResponseFormat::WebSearchCall => ResponseOutputItem::WebSearchCall {
             id: format!("ws_{}", output.call_id),
             status: WebSearchCallStatus::Failed,
-            // No query is recoverable from a failed dispatch; emit the
-            // empty `Search` action variant so the wire shape stays valid.
             action: WebSearchAction::Search {
                 query: None,
                 queries: Vec::new(),
@@ -1660,10 +1650,6 @@ mod tests {
 
     #[test]
     fn transform_tool_output_emits_hosted_failed_status_only() {
-        // Hosted-builtin variants must convey failure via `status: "failed"`
-        // alone; they have no `error` field on the wire. Using
-        // `transform_tool_output` against the success-path builders would
-        // stamp `status: "completed"` on a failed call — that is the bug.
         for fmt in [
             ResponseFormat::WebSearchCall,
             ResponseFormat::CodeInterpreterCall,
@@ -1672,15 +1658,8 @@ mod tests {
         ] {
             let item = transform_tool_output(&failed_tool_output("search"), fmt);
             let serialized = serde_json::to_value(&item).expect("serialize failed item");
-            assert_eq!(
-                serialized["status"],
-                serde_json::json!("failed"),
-                "{fmt:?} must serialize status=failed on error"
-            );
-            assert!(
-                serialized.get("error").is_none(),
-                "{fmt:?} must not carry an `error` field (hosted variants don't have one): {serialized}"
-            );
+            assert_eq!(serialized["status"], serde_json::json!("failed"), "{fmt:?}");
+            assert!(serialized.get("error").is_none(), "{fmt:?} {serialized}");
         }
     }
 
diff --git a/model_gateway/src/routers/common/persistence_utils.rs b/model_gateway/src/routers/common/persistence_utils.rs
index 8a1ac3b27..4482a549d 100644
--- a/model_gateway/src/routers/common/persistence_utils.rs
+++ b/model_gateway/src/routers/common/persistence_utils.rs
@@ -327,15 +327,10 @@ fn item_to_new_conversation_item(
         .and_then(|v| v.as_str())
         .unwrap_or("message");
 
-    // Store the whole item for every non-message type, regardless of input vs
-    // output side. The previous input-side branch only treated
-    // `function_call` / `function_call_output` as whole-item, so a replayed
-    // structural item (e.g. `image_generation_call`, `web_search_call`,
-    // `mcp_call` carried back into a turn's input) fell through to the
-    // `content` extractor — but those items have no `content` field, so
-    // persistence would store an empty array and the next history load would
-    // lose the original item. Non-message items have no `content` field on
-    // the wire either way, so unifying the rule is safe.
+    // Non-message items carry their fields at the top level (no `content`
+    // field on the wire), so reading `content` on the input side would
+    // collapse replayed structural items (`image_generation_call`,
+    // `web_search_call`, …) to `[]`.
     let store_whole_item = item_type != "message";
 
     let content = if store_whole_item {
@@ -526,13 +521,6 @@ mod tests {
 
     #[test]
     fn replayed_image_generation_input_item_is_stored_whole() {
-        // Regression: structural input items (replayed `image_generation_call`
-        // and friends) carry their fields at the top level — not under
-        // `content`. The previous input-side branch only treated
-        // `function_call`/`function_call_output` as whole-item, so any other
-        // structural type fell through to `item_value.get("content")` which
-        // collapsed to `[]`, dropping the whole item from the next history
-        // load.
         let input_item = json!({
             "id": "ig_replay_1",
             "type": "image_generation_call",
@@ -542,15 +530,12 @@ mod tests {
 
         let new_item = item_to_new_conversation_item(&input_item, None, /* is_input */ true);
         assert_eq!(new_item.item_type, "image_generation_call");
-        // Whole-item content preserves all top-level fields, not just `content`.
         assert_eq!(
             new_item
                 .content
                 .get("revised_prompt")
                 .and_then(|v| v.as_str()),
             Some("cat"),
-            "structural input items must round-trip their top-level fields, \
-             not collapse to `content` (was bug for replayed hosted-tool items)"
         );
         assert_eq!(
             new_item.content.get("type").and_then(|v| v.as_str()),
@@ -560,9 +545,6 @@ mod tests {
 
     #[test]
     fn replayed_message_input_item_still_extracts_content() {
-        // The unification only changes non-message handling; message items
-        // continue to extract their `content` array (and may wrap with `phase`
-        // when present, exercised separately).
         let input_item = json!({
             "id": "msg_1",
             "type": "message",
diff --git a/model_gateway/src/routers/grpc/regular/responses/streaming.rs b/model_gateway/src/routers/grpc/regular/responses/streaming.rs
index 237096b86..a2d964ea4 100644
--- a/model_gateway/src/routers/grpc/regular/responses/streaming.rs
+++ b/model_gateway/src/routers/grpc/regular/responses/streaming.rs
@@ -739,16 +739,6 @@ async fn execute_tool_loop_streaming_internal(
                 let success = !tool_output.is_error;
                 let output_str = tool_output.output.to_string();
 
-                // Build the typed output item once and reuse it for both the
-                // streamed `output_item.done` payload and the persisted state
-                // record. The previous code hand-rolled an ad-hoc envelope
-                // (`{name, status, arguments, output|error}`) for the wire
-                // payload — a shape that does not match the hosted-builtin
-                // variants (`web_search_call`/`code_interpreter_call`/
-                // `file_search_call`/`image_generation_call` have no `name`/
-                // `arguments`/`output` fields, and only `mcp_call` carries an
-                // `error` field) — so the streamed final item could diverge
-                // from the persisted one and leak invalid fields on failure.
                 let output_item =
                     openai_bridge::transform_tool_output(&tool_output, response_format);
                 let mut item_done = serde_json::to_value(&output_item).unwrap_or_else(|e| {
@@ -763,8 +753,8 @@ async fn execute_tool_loop_streaming_internal(
                         "status": if success { "completed" } else { "failed" },
                     })
                 });
-                // Preserve the streaming-allocated id so `output_item.done`
-                // matches the earlier `output_item.added`.
+                // Override the typed item's id so output_item.done matches the
+                // streaming-allocated id used by the earlier output_item.added.
                 if let Some(obj) = item_done.as_object_mut() {
                     obj.insert("id".to_string(), json!(&item_id));
                 }
@@ -775,7 +765,6 @@ async fn execute_tool_loop_streaming_internal(
                 );
 
                 if success {
-                    // Emit format-specific tool_call.completed event.
                     let event =
                         emitter.emit_tool_call_completed(output_index, &item_id, response_format);
                     emitter.send_event(&event, &tx)?;
@@ -786,11 +775,9 @@ async fn execute_tool_loop_streaming_internal(
                         .unwrap_or_else(|| output_str.clone());
                     warn!("Tool execution returned error: {}", err_text);
 
-                    // `mcp_call.failed` is the only `*.failed` event in the
-                    // Responses API event taxonomy — hosted-builtin families
-                    // close out via `*.completed` followed by `output_item.done`
-                    // carrying `status: "failed"` (no `error` field on those
-                    // variants).
+                    // `response.mcp_call.failed` is the only `*.failed` event
+                    // in the Responses API; hosted-builtin families close via
+                    // `*.completed` + `output_item.done` with status=failed.
                     if matches!(response_format, ResponseFormat::Passthrough) {
                         let event = emitter.emit_mcp_call_failed(output_index, &item_id, &err_text);
                         emitter.send_event(&event, &tx)?;
@@ -804,13 +791,10 @@ async fn execute_tool_loop_streaming_internal(
                     }
                 }
 
-                // Emit output_item.done with the transformed item (correct
-                // shape for both mcp_call and hosted-builtin variants).
                 let event = emitter.emit_output_item_done(output_index, &item_done);
                 emitter.send_event(&event, &tx)?;
                 emitter.complete_output_item(output_index);
 
-                // Record MCP tool metrics
                 Metrics::record_mcp_tool_duration(
                     &current_request.model,
                     &tool_output.tool_name,
@@ -826,8 +810,6 @@ async fn execute_tool_loop_streaming_internal(
                     },
                 );
 
-                // Record the call in state with the same transformed item we
-                // just emitted on the wire.
                 state.record_call(
                     tool_output.call_id,
                     tool_output.tool_name,
diff --git a/model_gateway/src/routers/openai/responses/non_streaming.rs b/model_gateway/src/routers/openai/responses/non_streaming.rs
index 12d96bc23..1fd2b170e 100644
--- a/model_gateway/src/routers/openai/responses/non_streaming.rs
+++ b/model_gateway/src/routers/openai/responses/non_streaming.rs
@@ -63,17 +63,10 @@ pub async fn handle_non_streaming_response(mut ctx: RequestContext) -> Response
         }
     };
 
-    // The format registry is the router-side source of truth for MCP
-    // builtin/alias format resolution; falling back to a default would
-    // silently mis-route hosted tools instead of failing fast. The check is
-    // scoped to MCP-laden requests — plain non-MCP requests must still
-    // succeed in deployments where the gateway runs without MCP wiring.
-    //
-    // A request with MCP tools but no registry component is a configuration
-    // error: route resolution would silently degrade to `Passthrough`, so we
-    // fail fast instead. Resolve `(mcp_servers, registry)` together so the
-    // typed result carries the registry into the MCP arm without a second
-    // option lookup.
+    // Only MCP-laden requests need the format registry; without this
+    // narrowing, plain non-MCP requests would 500 in deployments that run
+    // the gateway without MCP wiring. A registry-less MCP request still
+    // hard-fails — silent fallback would mis-route hosted tools.
     let mcp_routing = match original_body.tools.as_deref() {
         Some(tools) if request_uses_mcp_routing(tools) => {
             let Some(registry) = ctx.components.mcp_format_registry() else {
diff --git a/model_gateway/src/routers/openai/responses/streaming.rs b/model_gateway/src/routers/openai/responses/streaming.rs
index c2195466e..bf86d089d 100644
--- a/model_gateway/src/routers/openai/responses/streaming.rs
+++ b/model_gateway/src/routers/openai/responses/streaming.rs
@@ -1086,14 +1086,8 @@ pub async fn handle_streaming_response(ctx: RequestContext) -> Response {
             return error::internal_error("internal_error", "MCP orchestrator required");
         }
     };
-    // Scope the format-registry fail-fast to MCP-laden requests. Plain
-    // streaming requests must still pass through deployments that run
-    // without MCP wiring; only requests that actually need MCP routing
-    // require the registry.
-    //
-    // Resolve `(mcp_servers, registry)` as a single tuple so the cloned
-    // registry follows mcp_servers into the interception path without a
-    // second option lookup downstream.
+    // Only MCP-laden requests need the format registry; plain streaming
+    // requests must still pass through deployments without MCP wiring.
     let mcp_routing = match original_body.tools.as_deref() {
         Some(tools) if request_uses_mcp_routing(tools) => {
             let Some(registry) = ctx.components.mcp_format_registry() else {
diff --git a/model_gateway/tests/mcp_test.rs b/model_gateway/tests/mcp_test.rs
index 09a27dabc..d8b2920fd 100644
--- a/model_gateway/tests/mcp_test.rs
+++ b/model_gateway/tests/mcp_test.rs
@@ -334,12 +334,6 @@ async fn test_web_search_transform_handles_openai_search_response_with_mock() {
 
     assert!(!output.is_error, "Tool execution should succeed");
 
-    // Route the actual `ToolExecutionOutput` through the production bridge
-    // helper so a regression in session-side rewrites (`call_id`,
-    // `server_label`, `tool_name`, `arguments_str` all live on `output`)
-    // would surface here too. Hardcoding those fields against
-    // `ResponseTransformer::transform` would let such a regression slip
-    // through with the test still green.
     let transformed = transform_tool_output(&output, ResponseFormat::WebSearchCall);
     match transformed {
         ResponseOutputItem::WebSearchCall { action, .. } => match action {
@@ -415,9 +409,6 @@ async fn test_web_search_transform_sets_action_query_for_brave_search_with_mock(
 
     assert!(!output.is_error, "Tool execution should succeed");
 
-    // Same reasoning as the OpenAI variant above — feed the actual
-    // `ToolExecutionOutput` through the production bridge helper so any
-    // future regression in session-side field rewrites surfaces here.
     let transformed = transform_tool_output(&output, ResponseFormat::WebSearchCall);
     match transformed {
         ResponseOutputItem::WebSearchCall { action, .. } => match action {

From 46a72a8a7dcd3db54095d1245c099ecb6b3b743f Mon Sep 17 00:00:00 2001
From: Simo Lin <25425177+slin1237@users.noreply.github.com>
Date: Mon, 4 May 2026 11:30:15 -0700
Subject: [PATCH 09/15] fix(mcp): lift rmcp tool annotations into ToolEntry

ToolEntry::from_server_tool initialised .annotations to default and
never copied tool.annotations across, so every server-loaded tool
reported read_only=false regardless of what the MCP server advertised.
Combined with the read_only narrowing in the bridge, mcp_list_tools
on the wire became uniformly read_only=false even for tools the server
explicitly marked read-only.

Lift via ToolAnnotations::from_rmcp_option so the read_only/destructive/
idempotent/open_world hints flow through the way the approval pipeline
already expects.

Comment addressed (PR #1442): chatgpt-codex-connector 3183335208.

Signed-off-by: Simo Lin <25425177+slin1237@users.noreply.github.com>
---
 crates/mcp/src/inventory/types.rs | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/crates/mcp/src/inventory/types.rs b/crates/mcp/src/inventory/types.rs
index 0cf66556e..b7c5dfb5f 100644
--- a/crates/mcp/src/inventory/types.rs
+++ b/crates/mcp/src/inventory/types.rs
@@ -161,7 +161,13 @@ impl ToolEntry {
 
     pub fn from_server_tool(server_key: impl AsRef<str>, tool: Tool) -> Self {
         let name = tool.name.to_string();
-        Self::new(QualifiedToolName::new(server_key, name), tool)
+        // Lift the rmcp annotations the server advertised into our richer
+        // wrapper so `entry.annotations.read_only` (and the other hints the
+        // approval pipeline reads) reflect what the tool actually declared.
+        // Without this, every server-loaded tool reports `read_only: false`
+        // because `Self::new` initialises annotations to default.
+        let annotations = ToolAnnotations::from_rmcp_option(tool.annotations.as_ref());
+        Self::new(QualifiedToolName::new(server_key, name), tool).with_annotations(annotations)
     }
 
     #[must_use]
@@ -272,6 +278,28 @@ mod tests {
         assert!(!entry.is_expired());
     }
 
+    #[test]
+    fn from_server_tool_lifts_rmcp_annotations() {
+        // Without this lift the read_only hint is lost on every server-loaded
+        // tool, and `mcp_list_tools[].tools[].annotations.read_only` reports
+        // false even for tools the MCP server explicitly marked read-only.
+        use rmcp::model::ToolAnnotations as RmcpToolAnnotations;
+
+        let mut tool = create_test_tool("read_only_tool");
+        tool.annotations = Some(RmcpToolAnnotations {
+            title: None,
+            read_only_hint: Some(true),
+            destructive_hint: Some(false),
+            idempotent_hint: Some(true),
+            open_world_hint: Some(false),
+        });
+        let entry = ToolEntry::from_server_tool("server", tool);
+        assert!(entry.annotations.read_only);
+        assert!(!entry.annotations.destructive);
+        assert!(entry.annotations.idempotent);
+        assert!(!entry.annotations.open_world);
+    }
+
     #[test]
     fn test_tool_entry_with_alias() {
         let tool = create_test_tool("web_search");

From 69b7f9bb20d96cbe06925245e6d11bf991fb07a6 Mon Sep 17 00:00:00 2001
From: Simo Lin <25425177+slin1237@users.noreply.github.com>
Date: Mon, 4 May 2026 11:30:23 -0700
Subject: [PATCH 10/15] fix(openai-bridge): preserve MCP error text when
 error_message is None

CallToolResult { is_error: true, .. } can leave error_message unset
while carrying the real failure text inside output.output (typed text
blocks). The previous fallback chain went straight to the generic
"Tool execution failed" placeholder, hiding the actual MCP-declared
error from the client.

Add a flatten_mcp_output fallback ahead of the placeholder so
mcp_call.error stays informative when the server uses the
result-blocks convention.

Comment addressed (PR #1442): coderabbitai 3183370193.

Signed-off-by: Simo Lin <25425177+slin1237@users.noreply.github.com>
---
 .../common/openai_bridge/transformer.rs       | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/model_gateway/src/routers/common/openai_bridge/transformer.rs b/model_gateway/src/routers/common/openai_bridge/transformer.rs
index 542693c91..c66644ae6 100644
--- a/model_gateway/src/routers/common/openai_bridge/transformer.rs
+++ b/model_gateway/src/routers/common/openai_bridge/transformer.rs
@@ -42,9 +42,18 @@ fn failed_output_item(
     output: &smg_mcp::ToolExecutionOutput,
     response_format: ResponseFormat,
 ) -> ResponseOutputItem {
+    // The MCP `CallToolResult { is_error: true, .. }` path can leave
+    // `error_message` unset while carrying the real failure text inside
+    // `output.output` (typed text blocks). Fall back to that before the
+    // generic placeholder so client-visible mcp_call.error stays useful.
     let err_msg = output
         .error_message
         .clone()
+        .filter(|msg| !msg.is_empty())
+        .or_else(|| {
+            let text = ResponseTransformer::flatten_mcp_output(&output.output);
+            (!text.is_empty()).then_some(text)
+        })
         .unwrap_or_else(|| "Tool execution failed".to_string());
     match response_format {
         ResponseFormat::Passthrough => ResponseOutputItem::McpCall {
@@ -1648,6 +1657,25 @@ mod tests {
         }
     }
 
+    #[test]
+    fn transform_tool_output_falls_back_to_output_text_when_error_message_missing() {
+        // MCP `CallToolResult { is_error: true }` can leave error_message
+        // unset and put the failure text inside the result blocks.
+        let mut output = failed_tool_output("brave_web_search");
+        output.error_message = None;
+        output.output = serde_json::json!([
+            {"type": "text", "text": "rate limited"}
+        ]);
+
+        let item = transform_tool_output(&output, ResponseFormat::Passthrough);
+        match item {
+            ResponseOutputItem::McpCall { error, .. } => {
+                assert_eq!(error.as_deref(), Some("rate limited"));
+            }
+            _ => panic!("Expected McpCall"),
+        }
+    }
+
     #[test]
     fn transform_tool_output_emits_hosted_failed_status_only() {
         for fmt in [

From 4ce01c10ad56871bc524429b5222648b58a81f59 Mon Sep 17 00:00:00 2001
From: Simo Lin <25425177+slin1237@users.noreply.github.com>
Date: Mon, 4 May 2026 11:30:34 -0700
Subject: [PATCH 11/15] fix(persistence): hoist top-level fields when reading
 whole-item rows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bug E ("replayed structural input items lose content") fixed the
WRITE side via store_whole_item = item_type != "message", but
item_to_json kept wrapping non-listed types under {"content": …}.
Result: an image_generation_call linked to a conversation came back
as {"type":"image_generation_call","content":{"type":"image_generation_call","revised_prompt":"cat",…}}
instead of restoring revised_prompt/status at the top level.

Mirror the write rule: when item.content is an object, hoist its
top-level fields (skipping id/type/role/status which we already
populated from the ConversationItem record). For pre-fix rows that
stored an array, keep the legacy {"content": …} wrap so old data
still round-trips.

Comment addressed (PR #1442): coderabbitai 3183370204.

Signed-off-by: Simo Lin <25425177+slin1237@users.noreply.github.com>
---
 .../src/routers/common/persistence_utils.rs   | 62 ++++++++++++++++++-
 1 file changed, 61 insertions(+), 1 deletion(-)

diff --git a/model_gateway/src/routers/common/persistence_utils.rs b/model_gateway/src/routers/common/persistence_utils.rs
index 4482a549d..13fb66b42 100644
--- a/model_gateway/src/routers/common/persistence_utils.rs
+++ b/model_gateway/src/routers/common/persistence_utils.rs
@@ -85,8 +85,23 @@ pub fn item_to_json(item: &ConversationItem) -> Value {
                 obj.insert("phase".to_string(), phase_value);
             }
         }
+    } else if let Some(content_obj) = item.content.as_object() {
+        // Whole-item store path (`store_whole_item` in
+        // `item_to_new_conversation_item`): `content` is the original item
+        // value with its fields at the top level. Hoist them back rather
+        // than wrapping them under `content`, otherwise replayed structural
+        // items come back as `{"type":"…","content":{"type":"…",…}}` and
+        // lose `revised_prompt`/`status`/etc. on the wire.
+        for (k, v) in content_obj {
+            if matches!(k.as_str(), "id" | "type" | "role" | "status") {
+                continue;
+            }
+            obj.insert(k.clone(), v.clone());
+        }
     } else {
-        // Default: include content as-is
+        // Pre-fix rows for non-listed types stored `[]` instead of the
+        // whole item; preserve the legacy `{"content": …}` wrapping so we
+        // don't drop data that came in under the old write path.
         obj.insert("content".to_string(), item.content.clone());
     }
 
@@ -517,8 +532,53 @@ async fn persist_conversation_items_inner(
 
 #[cfg(test)]
 mod tests {
+    use chrono::Utc;
+
     use super::*;
 
+    fn stored_item(item_type: &str, content: Value) -> ConversationItem {
+        ConversationItem {
+            id: ConversationItemId::from("ig_replay_1"),
+            response_id: None,
+            item_type: item_type.to_string(),
+            role: None,
+            content,
+            status: Some("completed".to_string()),
+            created_at: Utc::now(),
+        }
+    }
+
+    #[test]
+    fn item_to_json_restores_top_level_fields_for_whole_item_store() {
+        let stored = stored_item(
+            "image_generation_call",
+            json!({
+                "id": "ig_replay_1",
+                "type": "image_generation_call",
+                "status": "completed",
+                "revised_prompt": "cat",
+            }),
+        );
+
+        let serialized = item_to_json(&stored);
+        assert_eq!(serialized["type"], json!("image_generation_call"));
+        assert_eq!(serialized["revised_prompt"], json!("cat"));
+        assert!(
+            serialized.get("content").is_none(),
+            "non-message whole-item store must hoist top-level fields, not \
+             nest the original item under `content`: {serialized}"
+        );
+    }
+
+    #[test]
+    fn item_to_json_falls_back_to_legacy_content_wrap_for_array_content() {
+        // Pre-fix rows for non-listed types may have stored `[]`. Preserve
+        // the legacy wrapping so old data still round-trips.
+        let stored = stored_item("image_generation_call", json!([]));
+        let serialized = item_to_json(&stored);
+        assert_eq!(serialized["content"], json!([]));
+    }
+
     #[test]
     fn replayed_image_generation_input_item_is_stored_whole() {
         let input_item = json!({

From 579fad94cc922f61a4f4b361e364fc95f4280f6a Mon Sep 17 00:00:00 2001
From: Simo Lin <25425177+slin1237@users.noreply.github.com>
Date: Mon, 4 May 2026 11:30:42 -0700
Subject: [PATCH 12/15] fix(mcp-utils): derive request_uses_mcp_routing from
 extract_builtin_types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The two helpers had drifted: extract_builtin_types covered
WebSearchPreview/CodeInterpreter/ImageGeneration but not FileSearch,
while request_uses_mcp_routing hard-coded the same trio independently.
A future addition to either list (notably file_search) would only
land in one place.

Make the predicate consult extract_builtin_types so they can't
diverge — adding a new builtin to the routing path automatically makes
the predicate cover it.

Comment addressed (PR #1442): coderabbitai 3183370189.

Signed-off-by: Simo Lin <25425177+slin1237@users.noreply.github.com>
---
 model_gateway/src/routers/common/mcp_utils.rs | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/model_gateway/src/routers/common/mcp_utils.rs b/model_gateway/src/routers/common/mcp_utils.rs
index f1fb46aae..e22271fc5 100644
--- a/model_gateway/src/routers/common/mcp_utils.rs
+++ b/model_gateway/src/routers/common/mcp_utils.rs
@@ -249,16 +249,13 @@ pub fn extract_builtin_types(tools: &[ResponseTool]) -> Vec<BuiltinToolType> {
 
 /// True if `tools` carries any MCP-routed entry (declared MCP server or a
 /// builtin family that the gateway intercepts via MCP).
+///
+/// Derived from [`extract_builtin_types`] so the predicate and the actual
+/// routing path can't drift — adding a new builtin to the routing path
+/// (e.g. `file_search`) makes this predicate cover it too.
 pub fn request_uses_mcp_routing(tools: &[ResponseTool]) -> bool {
-    tools.iter().any(|t| {
-        matches!(
-            t,
-            ResponseTool::Mcp(_)
-                | ResponseTool::WebSearchPreview(_)
-                | ResponseTool::CodeInterpreter(_)
-                | ResponseTool::ImageGeneration(_)
-        )
-    })
+    tools.iter().any(|t| matches!(t, ResponseTool::Mcp(_)))
+        || !extract_builtin_types(tools).is_empty()
 }
 
 /// Collect user-declared function tool names from a Responses request.

From a5b6d243b8ea359d87663b967026fc200d6f19bb Mon Sep 17 00:00:00 2001
From: Simo Lin <25425177+slin1237@users.noreply.github.com>
Date: Mon, 4 May 2026 11:30:53 -0700
Subject: [PATCH 13/15] fix(openai-router): gate orchestrator lookup behind
 MCP-routing arm

Round 1 of D scoped the format-registry fail-fast to MCP-laden
requests, but ctx.components.mcp_orchestrator() was still required
unconditionally on entry. Plain non-MCP requests still 500'd in
deployments without MCP wiring, defeating the original goal.

Move the orchestrator lookup into the same MCP-routing arm and
extend the resolved tuple to (mcp_servers, mcp_orchestrator,
mcp_format_registry). Plain requests no longer touch either
component.

Comments addressed (PR #1442): coderabbitai 3183370208 / 3183370213.

Signed-off-by: Simo Lin <25425177+slin1237@users.noreply.github.com>
---
 .../routers/openai/responses/non_streaming.rs | 28 +++++++++----------
 .../src/routers/openai/responses/streaming.rs | 21 +++++++-------
 2 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/model_gateway/src/routers/openai/responses/non_streaming.rs b/model_gateway/src/routers/openai/responses/non_streaming.rs
index 1fd2b170e..08df7a1d7 100644
--- a/model_gateway/src/routers/openai/responses/non_streaming.rs
+++ b/model_gateway/src/routers/openai/responses/non_streaming.rs
@@ -56,19 +56,19 @@ pub async fn handle_non_streaming_response(mut ctx: RequestContext) -> Response
             return error::internal_error("internal_error", "Worker not selected");
         }
     };
-    let mcp_orchestrator = match ctx.components.mcp_orchestrator() {
-        Some(m) => m,
-        None => {
-            return error::internal_error("internal_error", "MCP orchestrator required");
-        }
-    };
-
-    // Only MCP-laden requests need the format registry; without this
-    // narrowing, plain non-MCP requests would 500 in deployments that run
-    // the gateway without MCP wiring. A registry-less MCP request still
-    // hard-fails — silent fallback would mis-route hosted tools.
+    // Only MCP-laden requests need the orchestrator and format registry;
+    // without this narrowing, plain non-MCP requests would 500 in
+    // deployments that run the gateway without MCP wiring. A registry-less
+    // MCP request still hard-fails — silent fallback would mis-route
+    // hosted tools.
     let mcp_routing = match original_body.tools.as_deref() {
         Some(tools) if request_uses_mcp_routing(tools) => {
+            let Some(mcp_orchestrator) = ctx.components.mcp_orchestrator() else {
+                return error::internal_error(
+                    "internal_error",
+                    "MCP orchestrator required for requests carrying MCP/builtin tools",
+                );
+            };
             let Some(registry) = ctx.components.mcp_format_registry() else {
                 return error::internal_error(
                     "internal_error",
@@ -77,21 +77,21 @@ pub async fn handle_non_streaming_response(mut ctx: RequestContext) -> Response
             };
             ensure_request_mcp_client(mcp_orchestrator, registry, tools)
                 .await
-                .map(|servers| (servers, registry.clone()))
+                .map(|servers| (servers, mcp_orchestrator.clone(), registry.clone()))
         }
         _ => None,
     };
 
     let mut response_json: Value;
 
-    if let Some((mcp_servers, mcp_format_registry)) = mcp_routing {
+    if let Some((mcp_servers, mcp_orchestrator, mcp_format_registry)) = mcp_routing {
         let session_request_id = original_body
             .request_id
             .clone()
             .unwrap_or_else(|| format!("req_{}", uuid::Uuid::now_v7()));
         let forwarded_headers = extract_forwardable_request_headers(ctx.headers());
         let mut session = McpToolSession::new_with_headers(
-            mcp_orchestrator,
+            &mcp_orchestrator,
             mcp_servers,
             &session_request_id,
             forwarded_headers,
diff --git a/model_gateway/src/routers/openai/responses/streaming.rs b/model_gateway/src/routers/openai/responses/streaming.rs
index bf86d089d..2c8b27288 100644
--- a/model_gateway/src/routers/openai/responses/streaming.rs
+++ b/model_gateway/src/routers/openai/responses/streaming.rs
@@ -1080,16 +1080,17 @@ pub async fn handle_streaming_response(ctx: RequestContext) -> Response {
             return error::internal_error("internal_error", "Expected responses request");
         }
     };
-    let mcp_orchestrator = match ctx.components.mcp_orchestrator() {
-        Some(m) => m.clone(),
-        None => {
-            return error::internal_error("internal_error", "MCP orchestrator required");
-        }
-    };
-    // Only MCP-laden requests need the format registry; plain streaming
-    // requests must still pass through deployments without MCP wiring.
+    // Only MCP-laden requests need the orchestrator and format registry;
+    // plain streaming requests must still pass through deployments without
+    // MCP wiring.
     let mcp_routing = match original_body.tools.as_deref() {
         Some(tools) if request_uses_mcp_routing(tools) => {
+            let Some(mcp_orchestrator) = ctx.components.mcp_orchestrator().cloned() else {
+                return error::internal_error(
+                    "internal_error",
+                    "MCP orchestrator required for requests carrying MCP/builtin tools",
+                );
+            };
             let Some(registry) = ctx.components.mcp_format_registry() else {
                 return error::internal_error(
                     "internal_error",
@@ -1099,7 +1100,7 @@ pub async fn handle_streaming_response(ctx: RequestContext) -> Response {
             let registry = registry.clone();
             ensure_request_mcp_client(&mcp_orchestrator, &registry, tools)
                 .await
-                .map(|servers| (servers, registry))
+                .map(|servers| (servers, mcp_orchestrator, registry))
         }
         _ => None,
     };
@@ -1112,7 +1113,7 @@ pub async fn handle_streaming_response(ctx: RequestContext) -> Response {
         }
     };
 
-    let Some((mcp_servers, mcp_format_registry)) = mcp_routing else {
+    let Some((mcp_servers, mcp_orchestrator, mcp_format_registry)) = mcp_routing else {
         return handle_simple_streaming_passthrough(&client, &worker, headers.as_ref(), req).await;
     };
 

From e9cb555d363b00fa81506b7f5bc27cf1ebe436c9 Mon Sep 17 00:00:00 2001
From: Simo Lin <25425177+slin1237@users.noreply.github.com>
Date: Mon, 4 May 2026 16:52:29 -0700
Subject: [PATCH 14/15] fix(openai-bridge): read read_only_hint from rmcp
 directly, don't lift into entry

The previous fix lifted rmcp tool annotations into ToolEntry.annotations
via ToolAnnotations::from_rmcp_option. Production approval policy reads
those same annotations and applies conservative defaults
(destructive_hint.unwrap_or(true)), so any tool whose server provided an
annotations object without an explicit destructive_hint started getting
denied with "Tool execution denied: <tool>". This took out
brave_web_search across the openai-responses and anthropic-messages e2e
lanes (mcp_tool_result.is_error=true, multiple streaming/non-streaming
asserts).

Drop the lift in from_server_tool. Read read_only_hint directly off
tool.annotations inside build_mcp_tool_infos so the wire surface
(mcp_list_tools[].tools[].annotations.read_only) still reflects what the
server advertised, without retroactively changing the policy gating
that production was de-facto bypassing. Replaces the lift-side test
with two read-side tests (rmcp hint surfaces + absent hint defaults to
false).

Signed-off-by: Simo Lin <25425177+slin1237@users.noreply.github.com>
---
 crates/mcp/src/inventory/types.rs             | 30 +-------
 .../common/openai_bridge/tool_descriptors.rs  | 70 +++++++++++++------
 2 files changed, 49 insertions(+), 51 deletions(-)

diff --git a/crates/mcp/src/inventory/types.rs b/crates/mcp/src/inventory/types.rs
index b7c5dfb5f..0cf66556e 100644
--- a/crates/mcp/src/inventory/types.rs
+++ b/crates/mcp/src/inventory/types.rs
@@ -161,13 +161,7 @@ impl ToolEntry {
 
     pub fn from_server_tool(server_key: impl AsRef<str>, tool: Tool) -> Self {
         let name = tool.name.to_string();
-        // Lift the rmcp annotations the server advertised into our richer
-        // wrapper so `entry.annotations.read_only` (and the other hints the
-        // approval pipeline reads) reflect what the tool actually declared.
-        // Without this, every server-loaded tool reports `read_only: false`
-        // because `Self::new` initialises annotations to default.
-        let annotations = ToolAnnotations::from_rmcp_option(tool.annotations.as_ref());
-        Self::new(QualifiedToolName::new(server_key, name), tool).with_annotations(annotations)
+        Self::new(QualifiedToolName::new(server_key, name), tool)
     }
 
     #[must_use]
@@ -278,28 +272,6 @@ mod tests {
         assert!(!entry.is_expired());
     }
 
-    #[test]
-    fn from_server_tool_lifts_rmcp_annotations() {
-        // Without this lift the read_only hint is lost on every server-loaded
-        // tool, and `mcp_list_tools[].tools[].annotations.read_only` reports
-        // false even for tools the MCP server explicitly marked read-only.
-        use rmcp::model::ToolAnnotations as RmcpToolAnnotations;
-
-        let mut tool = create_test_tool("read_only_tool");
-        tool.annotations = Some(RmcpToolAnnotations {
-            title: None,
-            read_only_hint: Some(true),
-            destructive_hint: Some(false),
-            idempotent_hint: Some(true),
-            open_world_hint: Some(false),
-        });
-        let entry = ToolEntry::from_server_tool("server", tool);
-        assert!(entry.annotations.read_only);
-        assert!(!entry.annotations.destructive);
-        assert!(entry.annotations.idempotent);
-        assert!(!entry.annotations.open_world);
-    }
-
     #[test]
     fn test_tool_entry_with_alias() {
         let tool = create_test_tool("web_search");
diff --git a/model_gateway/src/routers/common/openai_bridge/tool_descriptors.rs b/model_gateway/src/routers/common/openai_bridge/tool_descriptors.rs
index 19f159b6c..76561e82a 100644
--- a/model_gateway/src/routers/common/openai_bridge/tool_descriptors.rs
+++ b/model_gateway/src/routers/common/openai_bridge/tool_descriptors.rs
@@ -87,19 +87,28 @@ pub fn response_tools(session: &McpToolSession<'_>) -> Vec<ResponseTool> {
 /// `McpToolInfo` records used inside `mcp_list_tools` output items.
 ///
 /// `annotations` is narrowed to `{"read_only": …}` to match OpenAI's
-/// Responses API shape. The richer `ToolAnnotations` (with
-/// `destructive`/`idempotent`/`open_world`) is read internally off
-/// `ToolEntry::annotations` by the approval pipeline.
+/// Responses API shape. The hint is read straight off the rmcp tool
+/// (`tool.annotations.read_only_hint`) rather than the SMG
+/// `ToolAnnotations` wrapper — the wrapper applies conservative policy
+/// defaults (destructive=true on absent hint) that are intended for the
+/// approval pipeline, not the wire surface, and reading them here would
+/// surface the wrong `read_only` for tools the server didn't annotate.
 pub fn build_mcp_tool_infos(entries: &[smg_mcp::ToolEntry]) -> Vec<McpToolInfo> {
     entries
         .iter()
-        .map(|entry| McpToolInfo {
-            name: entry.tool_name().to_string(),
-            description: entry.tool.description.as_ref().map(|d| d.to_string()),
-            input_schema: schema_to_value(&entry.tool.input_schema),
-            annotations: Some(json!({
-                "read_only": entry.annotations.read_only,
-            })),
+        .map(|entry| {
+            let read_only = entry
+                .tool
+                .annotations
+                .as_ref()
+                .and_then(|a| a.read_only_hint)
+                .unwrap_or(false);
+            McpToolInfo {
+                name: entry.tool_name().to_string(),
+                description: entry.tool.description.as_ref().map(|d| d.to_string()),
+                input_schema: schema_to_value(&entry.tool.input_schema),
+                annotations: Some(json!({ "read_only": read_only })),
+            }
         })
         .collect()
 }
@@ -319,33 +328,38 @@ fn is_client_visible_output_item(
 mod tests {
     use std::{borrow::Cow, sync::Arc};
 
-    use rmcp::model::Tool;
-    use smg_mcp::{ToolAnnotations, ToolEntry};
+    use rmcp::model::{Tool, ToolAnnotations as RmcpToolAnnotations};
+    use smg_mcp::ToolEntry;
 
     use super::*;
 
-    fn entry_with_annotations(annotations: ToolAnnotations) -> ToolEntry {
+    fn entry_with_rmcp_annotations(annotations: Option<RmcpToolAnnotations>) -> ToolEntry {
         let tool = Tool {
             name: Cow::Owned("widget".to_string()),
             title: None,
             description: Some(Cow::Owned("widget description".to_string())),
             input_schema: Arc::new(serde_json::Map::new()),
             output_schema: None,
-            annotations: None,
+            annotations,
             icons: None,
         };
-        ToolEntry::from_server_tool("srv", tool).with_annotations(annotations)
+        ToolEntry::from_server_tool("srv", tool)
     }
 
-    #[test]
-    fn build_mcp_tool_infos_emits_only_read_only_annotation() {
-        let annotations = ToolAnnotations::default()
-            .with_read_only(true)
-            .with_destructive(true);
-        let entries = vec![entry_with_annotations(annotations)];
+    fn read_only_hint(value: bool) -> RmcpToolAnnotations {
+        RmcpToolAnnotations {
+            title: None,
+            read_only_hint: Some(value),
+            destructive_hint: None,
+            idempotent_hint: None,
+            open_world_hint: None,
+        }
+    }
 
+    #[test]
+    fn build_mcp_tool_infos_surfaces_rmcp_read_only_hint() {
+        let entries = vec![entry_with_rmcp_annotations(Some(read_only_hint(true)))];
         let infos = build_mcp_tool_infos(&entries);
-        assert_eq!(infos.len(), 1);
         let serialized = serde_json::to_value(&infos[0])
             .expect("McpToolInfo must serialize")
             .get("annotations")
@@ -353,4 +367,16 @@ mod tests {
             .expect("annotations must serialize");
         assert_eq!(serialized, json!({ "read_only": true }));
     }
+
+    #[test]
+    fn build_mcp_tool_infos_defaults_to_false_when_hint_absent() {
+        let entries = vec![entry_with_rmcp_annotations(None)];
+        let infos = build_mcp_tool_infos(&entries);
+        let serialized = serde_json::to_value(&infos[0])
+            .expect("McpToolInfo must serialize")
+            .get("annotations")
+            .cloned()
+            .expect("annotations must serialize");
+        assert_eq!(serialized, json!({ "read_only": false }));
+    }
 }

From 4ef4bcdd79e04423cd1fdb9d6452bf64d4838705 Mon Sep 17 00:00:00 2001
From: Simo Lin <25425177+slin1237@users.noreply.github.com>
Date: Mon, 4 May 2026 21:20:56 -0700
Subject: [PATCH 15/15] fix(openai-bridge): hosted-builtin formats keep
 status=completed on is_error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bug B (3175392558) made transform_tool_output respect output.is_error
to surface failed tool executions instead of stamping them as
successful. For mcp_call this is correct — the variant has an `error`
field and persisting+replaying a fake-success was the documented
concern. For the four hosted-builtin variants
(web_search_call/code_interpreter_call/file_search_call/
image_generation_call) the behavior was over-strict:

- The wire variants have no `error` field, so status is the only
  channel and "failed" is the only signal.
- OpenAI cloud's native hosted tools never emit Failed for soft
  failures (rate-limited search, no results, transient backend errors)
  and instead return Completed with empty/limited content.
- Many MCP servers set isError=true for those soft-failure cases,
  exactly matching the soft-failure pattern OpenAI cloud handles
  silently.

The result: brave-search MCP returning isError=true for a normal
"completed but with caveats" response made the gateway emit
status=failed, breaking test_web_search_preview_streaming_events
(which asserts cloud-parity status=completed). The streaming events
fired the success path (web_search_call.completed) but the persisted
item said failed — internally inconsistent.

Restrict failed_output_item to Passthrough only; hosted-builtin
formats fall through to the success-path builders unconditionally.
The reviewer's persistence-replay concern is still met for mcp_call
(where it actually applies — that variant has the error channel).

Updates the regression test to assert the new hosted behavior. Updates
the now-stale comment in grpc/regular/responses/streaming.rs that
described the previous status=failed pattern.

Signed-off-by: Simo Lin <25425177+slin1237@users.noreply.github.com>
---
 .../common/openai_bridge/transformer.rs       | 88 +++++++------------
 .../grpc/regular/responses/streaming.rs       |  6 +-
 2 files changed, 35 insertions(+), 59 deletions(-)

diff --git a/model_gateway/src/routers/common/openai_bridge/transformer.rs b/model_gateway/src/routers/common/openai_bridge/transformer.rs
index c66644ae6..9a4075946 100644
--- a/model_gateway/src/routers/common/openai_bridge/transformer.rs
+++ b/model_gateway/src/routers/common/openai_bridge/transformer.rs
@@ -18,15 +18,19 @@ use super::ResponseFormat;
 /// registry lookup against `(output.server_key, output.tool_name)` would
 /// miss for disambiguated names like `mcp_<server>_<tool>`.
 ///
-/// On `is_error`, the four hosted-builtin variants emit `status: "failed"`
-/// only (those wire shapes have no `error` field); `mcp_call` carries the
-/// `error_message` in its `error` field.
+/// On `is_error`: only `mcp_call` surfaces the failure (`status: "failed"`,
+/// `error: Some(msg)`). The four hosted-builtin variants always emit
+/// `status: "completed"` — that matches OpenAI cloud's de-facto behavior
+/// (the cloud's native `web_search_call`/`code_interpreter_call`/etc.
+/// don't emit `Failed` for soft failures like empty results or rate
+/// limits, and many MCP servers set `isError: true` for those very cases).
+/// The error context lives in the item's content, not in `status`.
 pub fn transform_tool_output(
     output: &smg_mcp::ToolExecutionOutput,
     response_format: ResponseFormat,
 ) -> ResponseOutputItem {
-    if output.is_error {
-        return failed_output_item(output, response_format);
+    if output.is_error && response_format == ResponseFormat::Passthrough {
+        return failed_mcp_call(output);
     }
     ResponseTransformer::transform(
         &output.output,
@@ -38,10 +42,7 @@ pub fn transform_tool_output(
     )
 }
 
-fn failed_output_item(
-    output: &smg_mcp::ToolExecutionOutput,
-    response_format: ResponseFormat,
-) -> ResponseOutputItem {
+fn failed_mcp_call(output: &smg_mcp::ToolExecutionOutput) -> ResponseOutputItem {
     // The MCP `CallToolResult { is_error: true, .. }` path can leave
     // `error_message` unset while carrying the real failure text inside
     // `output.output` (typed text blocks). Fall back to that before the
@@ -55,51 +56,15 @@ fn failed_output_item(
             (!text.is_empty()).then_some(text)
         })
         .unwrap_or_else(|| "Tool execution failed".to_string());
-    match response_format {
-        ResponseFormat::Passthrough => ResponseOutputItem::McpCall {
-            id: mcp_response_item_id(&output.call_id),
-            status: "failed".to_string(),
-            approval_request_id: None,
-            arguments: output.arguments_str.clone(),
-            error: Some(err_msg),
-            name: output.tool_name.clone(),
-            output: String::new(),
-            server_label: output.server_label.clone(),
-        },
-        ResponseFormat::WebSearchCall => ResponseOutputItem::WebSearchCall {
-            id: format!("ws_{}", output.call_id),
-            status: WebSearchCallStatus::Failed,
-            action: WebSearchAction::Search {
-                query: None,
-                queries: Vec::new(),
-                sources: Vec::new(),
-            },
-            results: None,
-        },
-        ResponseFormat::CodeInterpreterCall => ResponseOutputItem::CodeInterpreterCall {
-            id: format!("ci_{}", output.call_id),
-            status: CodeInterpreterCallStatus::Failed,
-            container_id: String::new(),
-            code: None,
-            outputs: None,
-        },
-        ResponseFormat::FileSearchCall => ResponseOutputItem::FileSearchCall {
-            id: format!("fs_{}", output.call_id),
-            status: FileSearchCallStatus::Failed,
-            queries: Vec::new(),
-            results: None,
-        },
-        ResponseFormat::ImageGenerationCall => ResponseOutputItem::ImageGenerationCall {
-            id: format!("ig_{}", output.call_id),
-            action: None,
-            background: None,
-            output_format: None,
-            quality: None,
-            result: String::new(),
-            revised_prompt: None,
-            size: None,
-            status: ImageGenerationCallStatus::Failed,
-        },
+    ResponseOutputItem::McpCall {
+        id: mcp_response_item_id(&output.call_id),
+        status: "failed".to_string(),
+        approval_request_id: None,
+        arguments: output.arguments_str.clone(),
+        error: Some(err_msg),
+        name: output.tool_name.clone(),
+        output: String::new(),
+        server_label: output.server_label.clone(),
     }
 }
 
@@ -1677,7 +1642,13 @@ mod tests {
     }
 
     #[test]
-    fn transform_tool_output_emits_hosted_failed_status_only() {
+    fn transform_tool_output_emits_hosted_completed_even_on_is_error() {
+        // Hosted-builtin variants always emit status=completed regardless of
+        // upstream is_error. Real OpenAI cloud doesn't emit Failed for soft
+        // failures (rate-limited search, no results, etc.) and many MCP
+        // servers set isError=true for those exact cases — propagating it
+        // would break clients that expect cloud-parity wire shape. The
+        // failure context lives in the item content, not in `status`.
         for fmt in [
             ResponseFormat::WebSearchCall,
             ResponseFormat::CodeInterpreterCall,
@@ -1686,8 +1657,11 @@ mod tests {
         ] {
             let item = transform_tool_output(&failed_tool_output("search"), fmt);
             let serialized = serde_json::to_value(&item).expect("serialize failed item");
-            assert_eq!(serialized["status"], serde_json::json!("failed"), "{fmt:?}");
-            assert!(serialized.get("error").is_none(), "{fmt:?} {serialized}");
+            assert_eq!(
+                serialized["status"],
+                serde_json::json!("completed"),
+                "{fmt:?}"
+            );
         }
     }
 
diff --git a/model_gateway/src/routers/grpc/regular/responses/streaming.rs b/model_gateway/src/routers/grpc/regular/responses/streaming.rs
index a2d964ea4..19bd6624b 100644
--- a/model_gateway/src/routers/grpc/regular/responses/streaming.rs
+++ b/model_gateway/src/routers/grpc/regular/responses/streaming.rs
@@ -776,8 +776,10 @@ async fn execute_tool_loop_streaming_internal(
                     warn!("Tool execution returned error: {}", err_text);
 
                     // `response.mcp_call.failed` is the only `*.failed` event
-                    // in the Responses API; hosted-builtin families close via
-                    // `*.completed` + `output_item.done` with status=failed.
+                    // in the Responses API. Hosted-builtin families close via
+                    // `*.completed` to mirror OpenAI cloud's wire shape;
+                    // the failure context (when present) lives in the item
+                    // content.
                     if matches!(response_format, ResponseFormat::Passthrough) {
                         let event = emitter.emit_mcp_call_failed(output_index, &item_id, &err_text);
                         emitter.send_event(&event, &tx)?;