soapbucket · rickcrawford · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026
diff --git a/crates/mcptest-config/schemas/v1.json b/crates/mcptest-config/schemas/v1.json
@@ -575,9 +575,55 @@
           }
         }
       }
+    },
+    "input_responder": {
+      "$ref": "#/$defs/InputResponder",
+      "description": "Suite-level default elicitation answer source (SEP-2322). Applies to any tool test whose tool returns an InputRequiredResult, unless the test declares its own `input_responses` or `input_responder`."
     }
   },
   "$defs": {
+    "InputResponder": {
+      "title": "Elicitation answer source",
+      "description": "A dynamic source the runner uses to answer a 2026-07-28 InputRequiredResult elicitation (SEP-2322). Only the `rest` provider exists today.",
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "rest"
+      ],
+      "properties": {
+        "rest": {
+          "$ref": "#/$defs/RestInputResponder"
+        }
+      }
+    },
+    "RestInputResponder": {
+      "title": "REST elicitation responder",
+      "description": "The runner POSTs each elicitation ({ tool, arguments, requestState, inputRequests }) to `url` and reads back { inputResponses: [{ id, value }] }.",
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "url"
+      ],
+      "properties": {
+        "url": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Endpoint the runner POSTs each elicitation to."
+        },
+        "headers": {
+          "type": "object",
+          "additionalProperties": {
+            "type": "string"
+          },
+          "description": "Static headers sent on every POST (for example an Authorization token). Values are redacted in diagnostics."
+        },
+        "timeout_ms": {
+          "type": "integer",
+          "minimum": 0,
+          "description": "Per-request timeout in milliseconds. Defaults to 30000 when omitted."
+        }
+      }
+    },
     "CalibrationCheck": {
       "title": "Calibration check",
       "description": "One judge-calibration check. Reads a labels file (JSONL or a YAML array of {confidence, correct}) and exposes the computed metrics as assertion targets: `ece`, `brier`, and (when `reliability` and `observed_positive_rate` are both given) `corrected_rate` plus its Wald 95% interval `corrected_rate_low` / `corrected_rate_high`. The `expect:` reuses the standard assertion grammar (target plus matcher); omit it to apply the defaults `ece <= 0.10`, `brier <= 0.25`, and `corrected_rate <= observed_positive_rate`. The target names are runtime-resolved free strings, documented rather than schema-enforced.",
@@ -1856,6 +1902,14 @@
           "minLength": 1,
           "description": "Name of a fixture defined under `fixtures.errors[]` that the runner will inject in place of a real tool call. The schema does not enforce that the name resolves to a declared fixture; that cross-reference check happens in the loader (and will be wired up by the runner in a future release)."
         },
+        "input_responses": {
+          "type": "object",
+          "description": "Static answers for a 2026-07-28 InputRequiredResult elicitation (SEP-2322): each inputRequest id maps to the answer value. When the tool returns an input-required result, the runner satisfies it from this map and retries. Mutually exclusive with `input_responder`.",
+          "additionalProperties": true
+        },
+        "input_responder": {
+          "$ref": "#/$defs/InputResponder"
+        },
         "cache": {
           "$ref": "#/$defs/CacheDirective"
         },

diff --git a/crates/mcptest-core/src/executor/dispatch.rs b/crates/mcptest-core/src/executor/dispatch.rs
@@ -85,12 +85,25 @@ pub(super) async fn apply_response_transform(
 /// same assertable envelope the offline `inject_error` path produces instead
 /// of failing the test hard. An unexpected error (no error assertion) still
 /// fails loudly with the server's message, preserving the safety net.
+/// What the executor needs to satisfy an `InputRequiredResult`
+/// elicitation: the resolved answer source plus the test/server labels
+/// for the REST payload and the trace (WOR-1383).
+pub(super) struct ElicitSetup<'a> {
+    /// The resolved answer source (static map or REST endpoint).
+    pub responder: &'a crate::executor::elicitation::Responder,
+    /// Test name, for the trace and the REST request body.
+    pub test_name: &'a str,
+    /// Server name, for the trace and the REST request body.
+    pub server: &'a str,
+}
+
 pub(super) async fn call_server(
     client: &crate::protocol::Client,
     action: &Action,
     transform: Option<&TransformSpec>,
     ctx: &TransformContext,
     expects_error: bool,
+    elicit: Option<&ElicitSetup<'_>>,
 ) -> Result<(Value, i64), String> {
     let (method, mut params, label) = match action {
         Action::ToolCall { tool, args } => (
@@ -121,6 +134,9 @@ pub(super) async fn call_server(
         target = %label,
         "dispatching action"
     );
+    // Keep the transformed params as the base for elicitation retries (only
+    // when a responder is configured, to avoid a clone on the common path).
+    let retry_base = elicit.map(|_| params.clone());
     match client.request_with_id(method.to_string(), params).await {
         Ok((id, raw)) => {
             tracing::trace!(
@@ -139,6 +155,22 @@ pub(super) async fn call_server(
             } else {
                 raw
             };
+            // A tools/call can also answer with an InputRequiredResult; when a
+            // responder is configured, satisfy it and retry until a final
+            // result, returning the final request id for header assertions
+            // (WOR-1383). `params` is the already-transformed base.
+            if let (Action::ToolCall { .. }, Some(setup)) = (action, elicit) {
+                let base = retry_base.expect("retry_base is cloned whenever elicit is set");
+                return resolve_input_required(
+                    client,
+                    json!({ "result": resolved }),
+                    id,
+                    base,
+                    setup,
+                    label,
+                )
+                .await;
+            }
             Ok((json!({ "result": resolved }), id))
         }
         // A live JSON-RPC error becomes an assertable `result.error` envelope
@@ -204,6 +236,142 @@ async fn resolve_task_handle(
     ))
 }
 
+/// Drive the `InputRequiredResult` retry loop for a tools/call (SEP-2322,
+/// WOR-1383).
+///
+/// On entry `envelope` is `{"result": <first result>}`. While it is an
+/// input-required result, the runner resolves answers from `setup`,
+/// retries with `build_retry_params` (off the transformed `base_params`),
+/// polls any task handle each round, and loops until a final result or
+/// the round cap. Returns the final `{"result": ...}` envelope and the
+/// final request id (so response-header assertions inspect the final
+/// response). Every round emits a redacted `mcptest_core::elicitation`
+/// trace event: ids and counts, never `requestState` or answer values.
+async fn resolve_input_required(
+    client: &crate::protocol::Client,
+    initial_envelope: Value,
+    initial_id: i64,
+    base_params: Value,
+    setup: &ElicitSetup<'_>,
+    label: &str,
+) -> Result<(Value, i64), String> {
+    use crate::executor::elicitation::{ElicitCallContext, DEFAULT_MAX_ELICITATION_ROUNDS};
+    use crate::protocol::elicitation::{build_retry_params, recognize_input_required};
+
+    let mut envelope = initial_envelope;
+    let mut params = base_params;
+    let mut last_id = initial_id;
+    for round in 0..DEFAULT_MAX_ELICITATION_ROUNDS {
+        let Some(irr) = recognize_input_required(&envelope) else {
+            if round > 0 {
+                tracing::info!(
+                    target: "mcptest_core::elicitation",
+                    event = "elicitation.completed",
+                    test = setup.test_name, server = setup.server, tool = label,
+                    rounds = round, request_id = last_id,
+                    "elicitation resolved",
+                );
+            }
+            return Ok((envelope, last_id));
+        };
+        let request_ids: Vec<&str> = irr.input_requests.iter().map(|r| r.id.as_str()).collect();
+        let required_count = irr.input_requests.iter().filter(|r| r.required).count();
+        tracing::info!(
+            target: "mcptest_core::elicitation",
+            event = "elicitation.round_started",
+            test = setup.test_name, server = setup.server, tool = label,
+            round = round + 1, responder_kind = setup.responder.kind(),
+            input_request_ids = ?request_ids, required_count,
+            optional_count = irr.input_requests.len() - required_count,
+            request_state_hash = %short_hash(irr.request_state.as_str()),
+            "elicitation round started",
+        );
+        let ctx = ElicitCallContext {
+            test_name: setup.test_name.to_string(),
+            server: setup.server.to_string(),
+            tool: label.to_string(),
+            arguments: params
+                .get("arguments")
+                .cloned()
+                .unwrap_or_else(|| json!({})),
+            request_state: irr.request_state.clone(),
+            round: round + 1,
+        };
+        let started = std::time::Instant::now();
+        let answers = match setup.responder.answer(&irr.input_requests, &ctx).await {
+            Ok(answers) => answers,
+            Err(err) => {
+                tracing::warn!(
+                    target: "mcptest_core::elicitation",
+                    event = "elicitation.failed",
+                    test = setup.test_name, server = setup.server, tool = label,
+                    round = round + 1, error_kind = elicitation_error_kind(&err),
+                    "elicitation could not be answered",
+                );
+                return Err(format!("tools/call `{label}` elicitation: {err}"));
+            }
+        };
+        tracing::debug!(
+            target: "mcptest_core::elicitation",
+            event = "elicitation.answer_resolved",
+            test = setup.test_name, server = setup.server, tool = label,
+            round = round + 1, answered = answers.len(),
+            duration_ms = started.elapsed().as_millis() as u64,
+            "answers resolved",
+        );
+        let retry = build_retry_params(&params, &irr.request_state, &answers);
+        let (id, raw) = client
+            .request_with_id("tools/call".to_string(), retry.clone())
+            .await
+            .map_err(|e| format!("tools/call retry for `{label}` failed: {e}"))?;
+        last_id = id;
+        let resolved = resolve_task_handle(client, raw, label).await?;
+        envelope = json!({ "result": resolved });
+        params = retry;
+        tracing::debug!(
+            target: "mcptest_core::elicitation",
+            event = "elicitation.retry_dispatched",
+            test = setup.test_name, server = setup.server, tool = label,
+            round = round + 1, request_id = last_id,
+            "retry dispatched",
+        );
+    }
+    if recognize_input_required(&envelope).is_some() {
+        tracing::warn!(
+            target: "mcptest_core::elicitation",
+            event = "elicitation.failed",
+            test = setup.test_name, server = setup.server, tool = label,
+            error_kind = "max_rounds",
+            "server kept eliciting past the round cap",
+        );
+        return Err(format!(
+            "tools/call `{label}` elicitation: server kept eliciting after {DEFAULT_MAX_ELICITATION_ROUNDS} rounds; aborting"
+        ));
+    }
+    Ok((envelope, last_id))
+}
+
+/// Short, run-stable hash of an opaque `requestState`, so a trace can
+/// correlate the rounds of one elicitation without logging the token.
+fn short_hash(value: &str) -> String {
+    use std::hash::{Hash, Hasher};
+    let mut hasher = std::collections::hash_map::DefaultHasher::new();
+    value.hash(&mut hasher);
+    format!("{:08x}", hasher.finish() & 0xffff_ffff)
+}
+
+/// One-word `error_kind` for the failure trace, never the message body.
+fn elicitation_error_kind(error: &crate::executor::elicitation::ElicitationError) -> &'static str {
+    use crate::executor::elicitation::ElicitationError;
+    match error {
+        ElicitationError::MissingResponse { .. } => "missing_response",
+        ElicitationError::MaxRoundsExceeded { .. } => "max_rounds",
+        ElicitationError::NoFixture => "no_fixture",
+        ElicitationError::WrongKind { .. } => "wrong_kind",
+        ElicitationError::Rest(_) => "rest",
+    }
+}
+
 /// What a single `tasks/get` poll tells the runner to do next.
 enum PollOutcome {
     /// Non-terminal: poll again.
@@ -289,7 +457,9 @@ pub(super) async fn run_metamorphic(
         tool: tool.to_string(),
         args: base_args.clone(),
     };
-    let (base_envelope, _id) = call_server(client, &base_action, None, ctx, false).await?;
+    // Metamorphic follow-up calls do not participate in elicitation (v1):
+    // the primary call already resolved any InputRequiredResult (WOR-1383).
+    let (base_envelope, _id) = call_server(client, &base_action, None, ctx, false, None).await?;
     let base_result = inner_result(base_envelope);
     let mut pairs = Vec::with_capacity(spec.relations.len());
     for relation in &spec.relations {
@@ -298,7 +468,7 @@ pub(super) async fn run_metamorphic(
             tool: tool.to_string(),
             args: followup_args,
         };
-        let (followup_envelope, _id) = call_server(client, &action, None, ctx, false).await?;
+        let (followup_envelope, _id) = call_server(client, &action, None, ctx, false, None).await?;
         let followup_result = inner_result(followup_envelope);
         pairs.push((relation.clone(), base_result.clone(), followup_result));
     }