diff --git a/src/daemon/agent-runtime.ts b/src/daemon/agent-runtime.ts index fd96e9d..b2e21b1 100644 --- a/src/daemon/agent-runtime.ts +++ b/src/daemon/agent-runtime.ts @@ -91,8 +91,22 @@ export class AgentRuntime { private slackWorkspaces?: Array<{ teamId: string; teamName: string; userId: string }>; private notificationDefault?: { platform: string; channelId: string; label?: string }; + // Optional elicitation manager (set by gateway). When present, the SDK's + // `onElicitation` callback routes ask_user requests through it. + private elicitationManager?: import("./elicitation-manager.ts").ElicitationManager; + private initialized = false; + /** Wire in the elicitation manager. Called by the gateway after construction. */ + setElicitationManager(mgr: import("./elicitation-manager.ts").ElicitationManager): void { + this.elicitationManager = mgr; + } + + /** Expose the elicitation manager so channel adapters can resolve answers. */ + getElicitationManager(): import("./elicitation-manager.ts").ElicitationManager | undefined { + return this.elicitationManager; + } + /** Get the configured model name. */ getModel(): string { return this.config?.model ?? "claude-sonnet-4-6"; @@ -685,6 +699,11 @@ export class AgentRuntime { sessionKey, userState, personaPrompt, + { + platform: message.platform, + channelId: message.channelId, + threadId: message.threadId, + }, ); // Cache the new SDK session ID @@ -746,6 +765,11 @@ export class AgentRuntime { sessionKey, userState, personaPrompt, + { + platform: message.platform, + channelId: message.channelId, + threadId: message.threadId, + }, ); if (result.sessionId) { @@ -785,6 +809,11 @@ export class AgentRuntime { sessionKey, userState, personaPrompt, + { + platform: message.platform, + channelId: message.channelId, + threadId: message.threadId, + }, ); if (upgraded.sessionId) { this.sdkSessionIds.set(sessionKey, upgraded.sessionId); @@ -824,6 +853,12 @@ export class AgentRuntime { sessionKey?: string, userState?: string, personaPrompt?: string, + /** + * Source channel of the incoming message — used so the elicitation + * manager renders `ask_user` questions back on the user's active + * channel. Optional so non-message runs (cron, internal) keep working. + */ + source?: { platform: string; channelId: string; threadId?: string }, ): Promise<{ text: string; sessionId?: string; @@ -855,6 +890,16 @@ export class AgentRuntime { systemPromptAppend = systemPromptAppend + "\n\n" + personaPrompt; } + // Build the elicitation callback for this turn. The `ask_user` MCP + // tool calls `extra.sendRequest({method: "elicitation/create"})`; + // the SDK forwards to `onElicitation`, we route to the channel the + // user is currently talking to us on, and return their answer. + const mgr = this.elicitationManager; + const onElicitation: import("../sdk/session.ts").RunSessionParams["onElicitation"] = + mgr && source + ? (request, opts) => mgr.handleElicitation(request, source, opts.signal) + : undefined; + const sdkQuery = runSession({ prompt, model: model ?? this.config.model, @@ -869,6 +914,7 @@ export class AgentRuntime { anthropicBaseUrl: this.config.anthropicBaseUrl, plugins: this.plugins, useSubscription: this.config.useSubscription, + onElicitation, stderr: (data: string) => { // Log SDK subprocess stderr so we can diagnose crash reasons const trimmed = data.trim(); diff --git a/src/daemon/channels/slack-user.ts b/src/daemon/channels/slack-user.ts index 398b090..876c6b4 100644 --- a/src/daemon/channels/slack-user.ts +++ b/src/daemon/channels/slack-user.ts @@ -50,6 +50,14 @@ export class SlackUserAdapter implements ChannelAdapter { private teamName: string | null = null; private onMessage: (msg: IncomingMessage) => void; private draftManager: DraftManager; + // Optional elicitation manager — wired in by gateway after construction. + // Used by the ask_user button action handler to resolve pending requests. + private elicitationManager?: import("../elicitation-manager.ts").ElicitationManager; + + /** Inject the elicitation manager. Called by gateway after adapter creation. */ + setElicitationManager(mgr: import("../elicitation-manager.ts").ElicitationManager): void { + this.elicitationManager = mgr; + } // Default channel -- the user's direct chat channel with the agent private defaultChannelId: string | null = null; @@ -263,6 +271,25 @@ export class SlackUserAdapter implements ChannelAdapter { }); }); + // ask_user button clicks. action_id is `ask_user_option:`; + // value is `::`. The elicitation + // manager resolves the pending request and we replace the original + // question with a "you chose X" acknowledgement. + this.app.action(/^ask_user_option:\d+$/, async ({ action, ack, respond }) => { + await ack(); + const value = (action as { value?: string }).value; + if (!value) return; + const mgr = this.elicitationManager; + if (!mgr) return; + const { resolved, label } = mgr.resolveByButton(value); + if (resolved && label) { + await respond({ + replace_original: true, + text: `:white_check_mark: You chose: *${label}*`, + }); + } + }); + // Edit draft: open a modal with the draft content for editing this.app.action("edit_draft", async ({ action, ack, body }) => { await ack(); @@ -429,6 +456,32 @@ export class SlackUserAdapter implements ChannelAdapter { return result.ts; } + /** + * Post a message with Block Kit blocks. Used by the elicitation manager + * to render `ask_user` questions with interactive buttons. Falls back to + * plain text on platforms that ignore blocks. Same default-channel + * guard as postMessage — we only render interactive UIs in the channel + * the user actually watches. + */ + async postBlocks( + channelId: string, + fallbackText: string, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + blocks: any[], + threadId?: string, + ): Promise { + if (channelId !== this.defaultChannelId) return undefined; + const client = this.clientFor(channelId); + if (!client) return undefined; + const result = await client.chat.postMessage({ + channel: channelId, + text: fallbackText, + blocks, + thread_ts: threadId, + }); + return result.ts; + } + /** * Update an existing message (for streaming support). * Only works in the default channel (matches postMessage guard). diff --git a/src/daemon/elicitation-manager.ts b/src/daemon/elicitation-manager.ts new file mode 100644 index 0000000..2c4aab1 --- /dev/null +++ b/src/daemon/elicitation-manager.ts @@ -0,0 +1,380 @@ +/** + * Elicitation manager — Nomos's host-side handler for MCP elicitation + * requests (the SDK-native "tool needs to ask the user something" path). + * + * Flow: + * 1. An in-process MCP tool (e.g. `ask_user`) calls `extra.sendRequest` + * with an `elicitation/create` payload. + * 2. The Claude Agent SDK relays the request to our `onElicitation` + * callback (see `src/sdk/session.ts`). + * 3. `handleElicitation()` here renders the question on the channel + * where the user is currently talking to the agent, registers a + * pending entry keyed by elicitation id, and returns a promise that + * resolves when the user answers. + * 4. Channel adapters call `resolveByButton()` (Slack action) or + * `tryConsumeTextReply()` (any channel) to dispatch the answer back. + * + * Cleanup: every pending entry has a TTL; expired entries auto-decline so + * the agent doesn't hang forever if the user walks away. + */ + +import { randomUUID } from "node:crypto"; +import type { ElicitationRequest, ElicitationResult } from "@anthropic-ai/claude-agent-sdk"; +import type { ChannelManager } from "./channel-manager.ts"; +import type { OutgoingMessage } from "./types.ts"; +import { createLogger } from "../lib/logger.ts"; + +const log = createLogger("elicitation-manager"); + +/** Default timeout — agent's promise auto-rejects after this. */ +const DEFAULT_TTL_MS = 10 * 60_000; // 10 minutes + +/** Action ID prefix on Slack buttons. The value carries the elicitation id + option index. */ +export const SLACK_ASK_USER_ACTION_PREFIX = "ask_user_option"; + +/** Schema property name our tool uses for the single-select answer. */ +const ANSWER_PROPERTY = "answer"; + +/** + * Source context for an elicitation — where to render the question and + * where to look for a text-reply answer. + */ +export interface ElicitationSource { + /** Channel platform (e.g. "slack-user:T123", "imessage", "cli"). */ + platform: string; + /** Channel ID (DM channel, room id, phone number, etc.). */ + channelId: string; + /** Optional thread id to keep the conversation contained. */ + threadId?: string; +} + +interface PendingElicitation { + id: string; + source: ElicitationSource; + /** The agent-facing question. */ + message: string; + /** Options the user can pick from. */ + options: Array<{ label: string; description?: string }>; + /** Resolve the agent's `await elicit(...)` call. */ + resolve: (result: ElicitationResult) => void; + /** Time the request landed; used for TTL. */ + createdAt: number; + /** TTL handle so we can clear on resolve. */ + ttlTimer: ReturnType; + /** Posted message id from the channel adapter (for future deletion/update). */ + postedMessageId?: string; +} + +export class ElicitationManager { + private pending = new Map(); + /** Reverse index: channelId → pending id, for fast text-reply lookup. */ + private byChannel = new Map(); + + constructor(private readonly channelManager: ChannelManager) {} + + /** + * Handle an MCP elicitation request from the agent SDK. Resolves with + * the user's answer when they click a button or reply with a matching + * text. Auto-declines if no answer arrives within the TTL. + */ + async handleElicitation( + request: ElicitationRequest, + source: ElicitationSource, + signal: AbortSignal, + ): Promise { + // URL-mode elicitations (OAuth) aren't our use case — decline. + if (request.mode === "url") { + log.warn({ url: request.url }, "URL-mode elicitation not supported; declining"); + return { action: "decline" }; + } + + const options = extractOptionsFromSchema(request.requestedSchema); + if (options.length === 0) { + log.warn( + { schema: request.requestedSchema }, + "Elicitation has no enumerated options; declining (only single-select forms are supported)", + ); + return { action: "decline" }; + } + + const id = randomUUID(); + + const channelKey = channelKeyFor(source); + // If a previous pending question on this channel is unanswered, cancel it + // — the agent should never have two open questions in one channel. + const prior = this.byChannel.get(channelKey); + if (prior) { + const p = this.pending.get(prior); + if (p) { + log.warn({ priorId: prior, channelKey }, "Cancelling stale elicitation on same channel"); + clearTimeout(p.ttlTimer); + p.resolve({ action: "cancel" }); + this.pending.delete(prior); + } + } + + const promise = new Promise((resolve) => { + const ttlTimer = setTimeout(() => { + const stale = this.pending.get(id); + if (!stale) return; + log.warn({ id, channelKey }, "Elicitation TTL expired; auto-declining"); + this.pending.delete(id); + this.byChannel.delete(channelKey); + stale.resolve({ action: "decline" }); + }, DEFAULT_TTL_MS); + + const entry: PendingElicitation = { + id, + source, + message: request.message, + options, + resolve, + createdAt: Date.now(), + ttlTimer, + }; + this.pending.set(id, entry); + this.byChannel.set(channelKey, id); + + // If the agent aborts (timeout, user cancels), clean up. + signal.addEventListener( + "abort", + () => { + const e = this.pending.get(id); + if (!e) return; + clearTimeout(e.ttlTimer); + this.pending.delete(id); + this.byChannel.delete(channelKey); + e.resolve({ action: "cancel" }); + }, + { once: true }, + ); + }); + + // Render the question. Slack gets Block Kit buttons; everything else + // gets a numbered text message and awaits a text reply. + await this.renderQuestion(id, request.message, options, source).catch((err) => { + log.error( + { err: err instanceof Error ? err.message : err, id, source }, + "Failed to render elicitation on channel", + ); + }); + + return promise; + } + + /** + * Resolve a pending elicitation by Slack action_id button click. The + * Slack adapter calls this from its `app.action(ASK_USER_PREFIX, ...)` + * handler. Returns true if the click resolved a pending entry. + */ + resolveByButton(actionValue: string): { resolved: boolean; label?: string } { + const parsed = parseActionValue(actionValue); + if (!parsed) return { resolved: false }; + + const entry = this.pending.get(parsed.id); + if (!entry) return { resolved: false }; + + const option = entry.options[parsed.index]; + if (!option) return { resolved: false }; + + this.resolvePending(entry, { action: "accept", content: { [ANSWER_PROPERTY]: option.label } }); + return { resolved: true, label: option.label }; + } + + /** + * Try to consume an incoming text message as a reply to a pending + * elicitation on the same channel. Returns true if the message was + * consumed (caller should NOT forward to the agent in that case). + * + * Matching: numeric ("1", "2"), bare label, or label substring. Case + * insensitive. Refuses ambiguous substring matches. + */ + tryConsumeTextReply(source: ElicitationSource, text: string): boolean { + const key = channelKeyFor(source); + const id = this.byChannel.get(key); + if (!id) return false; + + const entry = this.pending.get(id); + if (!entry) return false; + + const matched = matchOption(text, entry.options); + if (matched === null) return false; // not a parseable answer — leave for agent + if (matched === "ambiguous") { + // Don't consume; the user will retry. Optionally we could nudge here. + log.info({ id, text }, "Ambiguous answer; leaving message for agent"); + return false; + } + + const option = entry.options[matched]; + this.resolvePending(entry, { action: "accept", content: { [ANSWER_PROPERTY]: option.label } }); + return true; + } + + /** Number of pending elicitations (for debugging / tests). */ + pendingCount(): number { + return this.pending.size; + } + + // ── internals ── + + private resolvePending(entry: PendingElicitation, result: ElicitationResult): void { + clearTimeout(entry.ttlTimer); + this.pending.delete(entry.id); + this.byChannel.delete(channelKeyFor(entry.source)); + entry.resolve(result); + } + + private async renderQuestion( + id: string, + message: string, + options: Array<{ label: string; description?: string }>, + source: ElicitationSource, + ): Promise { + const adapter = this.channelManager.getAdapter(source.platform); + if (!adapter) { + throw new Error(`No adapter for platform ${source.platform}`); + } + + // Slack: render Block Kit buttons via postMessage on the bot/user client. + // We piggy-back on the adapter's `postMessage` if it accepts blocks via a + // typed extension. Slack adapter exposes a `postBlocks` helper when present. + const slackPoster = ( + adapter as unknown as { + postBlocks?: ( + channelId: string, + text: string, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + blocks: any[], + threadId?: string, + ) => Promise; + } + ).postBlocks; + + if (slackPoster) { + const blocks = buildSlackBlocks(id, message, options); + const fallbackText = `${message}\n\n${options.map((o, i) => `${i + 1}. ${o.label}`).join("\n")}`; + const messageId = await slackPoster(source.channelId, fallbackText, blocks, source.threadId); + const entry = this.pending.get(id); + if (entry) entry.postedMessageId = messageId; + return; + } + + // Generic fallback: post numbered text and let the user reply with the + // number or label. tryConsumeTextReply will match either. + const numbered = options + .map((o, i) => `${i + 1}. *${o.label}*${o.description ? ` — ${o.description}` : ""}`) + .join("\n"); + const outgoing: OutgoingMessage = { + inReplyTo: id, + platform: source.platform, + channelId: source.channelId, + threadId: source.threadId, + content: `${message}\n\n${numbered}\n\n_Reply with the number or label to answer._`, + }; + await adapter.send(outgoing); + } +} + +// ── helpers ── + +function channelKeyFor(s: ElicitationSource): string { + return s.threadId ? `${s.platform}|${s.channelId}|${s.threadId}` : `${s.platform}|${s.channelId}`; +} + +/** + * Pull the option labels out of an MCP elicitation form schema. We only + * support the simple "one string property with an enum" shape that + * `ask_user` produces — anything else falls through with no options. + */ +function extractOptionsFromSchema( + schema: ElicitationRequest["requestedSchema"], +): Array<{ label: string; description?: string }> { + if (!schema || typeof schema !== "object") return []; + const properties = (schema as { properties?: Record }).properties; + if (!properties) return []; + const prop = properties[ANSWER_PROPERTY] as + | { enum?: unknown[]; enumNames?: string[]; oneOf?: Array<{ const: string; title?: string }> } + | undefined; + if (!prop) return []; + + if (Array.isArray(prop.oneOf)) { + return prop.oneOf.map((o) => ({ label: o.title ?? o.const })); + } + + if (Array.isArray(prop.enum)) { + const labels = prop.enum.filter((v): v is string => typeof v === "string"); + const names = Array.isArray(prop.enumNames) ? prop.enumNames : []; + return labels.map((label, i) => ({ label: names[i] ?? label })); + } + + return []; +} + +function parseActionValue(value: string): { id: string; index: number } | null { + // value is `::` + const sep = value.indexOf("::"); + if (sep < 0) return null; + const id = value.slice(0, sep); + const indexStr = value.slice(sep + 2); + const index = Number.parseInt(indexStr, 10); + if (!id || Number.isNaN(index) || index < 0) return null; + return { id, index }; +} + +function matchOption(text: string, options: Array<{ label: string }>): number | "ambiguous" | null { + const trimmed = text.trim(); + if (!trimmed) return null; + + // Numeric match: "1", "2", "1.", "(1)" + const num = trimmed.match(/^[\s(]*(\d+)[\s).]*$/); + if (num) { + const n = Number.parseInt(num[1], 10); + if (n >= 1 && n <= options.length) return n - 1; + return null; + } + + // Exact-label match (case insensitive) + const lower = trimmed.toLowerCase(); + const exact = options.findIndex((o) => o.label.toLowerCase() === lower); + if (exact >= 0) return exact; + + // Substring match — only if exactly one option contains the text + const substringMatches = options + .map((o, i) => ({ i, contains: o.label.toLowerCase().includes(lower) })) + .filter((m) => m.contains); + if (substringMatches.length === 1) return substringMatches[0].i; + if (substringMatches.length > 1) return "ambiguous"; + + return null; +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +function buildSlackBlocks( + id: string, + message: string, + options: Array<{ label: string; description?: string }>, +): unknown[] { + // Block Kit limits action elements to 5 per actions block; we cap at 4 + // (matches ask_user's 2-4 options contract anyway). + return [ + { + type: "section", + text: { type: "mrkdwn", text: `*Nomos asks:* ${message}` }, + }, + ...options + .filter((o) => o.description) + .map((o) => ({ + type: "context", + elements: [{ type: "mrkdwn", text: `*${o.label}* — ${o.description}` }], + })), + { + type: "actions", + elements: options.map((o, i) => ({ + type: "button", + text: { type: "plain_text", text: o.label.slice(0, 75) }, + action_id: `${SLACK_ASK_USER_ACTION_PREFIX}:${i}`, + value: `${id}::${i}`, + })), + }, + ]; +} diff --git a/src/daemon/gateway.ts b/src/daemon/gateway.ts index 0c152a0..8e1309b 100644 --- a/src/daemon/gateway.ts +++ b/src/daemon/gateway.ts @@ -16,6 +16,7 @@ import { GrpcServer } from "./grpc-server.ts"; import { ChannelManager } from "./channel-manager.ts"; import { CronEngine } from "./cron-engine.ts"; import { DraftManager } from "./draft-manager.ts"; +import { ElicitationManager } from "./elicitation-manager.ts"; import { writePidFile, installSignalHandlers } from "./lifecycle.ts"; import { SlackAdapter } from "./channels/slack.ts"; import { SlackUserAdapter } from "./channels/slack-user.ts"; @@ -68,6 +69,7 @@ export class Gateway { private channelManager: ChannelManager; private cronEngine: CronEngine; private draftManager: DraftManager; + private elicitationManager!: ElicitationManager; private settingsProcess: ChildProcess | null = null; private cateIntegration: CATEIntegration | null = null; private notifyListener: { unlisten: () => Promise } | null = null; @@ -116,6 +118,13 @@ export class Gateway { // 5. Create channel manager this.channelManager = new ChannelManager(); + // 5b. Elicitation manager — handles `ask_user` (and any other MCP + // elicitation) by rendering questions on the active channel and + // resolving when the user clicks/replies. Hand it to the runtime so + // its `onElicitation` callback can dispatch through here. + this.elicitationManager = new ElicitationManager(this.channelManager); + this.runtime.setElicitationManager(this.elicitationManager); + // 6. Create cron engine with broadcast to connected clients this.cronEngine = new CronEngine(this.messageQueue, this.channelManager, (event) => { this.wsServer.broadcast(event); @@ -784,6 +793,23 @@ export class Gateway { .then(async (msg) => { const adapter = this.channelManager.getAdapter(msg.platform); + // If there's a pending ask_user elicitation on this channel and + // the message text matches one of its options, consume the + // message as an answer and skip agent processing. This is the + // generic text-reply path; Slack buttons go through the action + // handler in slack-user.ts and never reach here. + const consumed = this.elicitationManager.tryConsumeTextReply( + { platform: msg.platform, channelId: msg.channelId, threadId: msg.threadId }, + msg.content, + ); + if (consumed) { + log.info( + { platform: msg.platform, channelId: msg.channelId }, + "Consumed message as ask_user reply", + ); + return; + } + // Notify connected clients about the incoming message this.broadcast({ type: "system", @@ -904,6 +930,7 @@ export class Gateway { onMessage: enqueue, draftManager: this.draftManager, }); + adapter.setElicitationManager(this.elicitationManager); this.channelManager.register(adapter); this.draftManager.registerSendFn(adapter.platform, (channelId, text, threadId) => adapter.sendAsUser(channelId, text, threadId), @@ -947,6 +974,7 @@ export class Gateway { onMessage: enqueue, draftManager: this.draftManager, }); + adapter.setElicitationManager(this.elicitationManager); this.channelManager.register(adapter); this.draftManager.registerSendFn(adapter.platform, (channelId, text, threadId) => adapter.sendAsUser(channelId, text, threadId), diff --git a/src/sdk/ask-user.ts b/src/sdk/ask-user.ts new file mode 100644 index 0000000..46cf973 --- /dev/null +++ b/src/sdk/ask-user.ts @@ -0,0 +1,215 @@ +/** + * `ask_user` MCP tool — lightweight multi-choice question. + * + * The agent calls this when it needs a quick decision from the user + * (which approach, which file, which calendar slot, etc.). The tool + * uses the MCP elicitation protocol: it calls `extra.sendRequest` with + * an `elicitation/create` payload; the Claude Agent SDK relays the + * request to our `onElicitation` callback + * (see `src/daemon/elicitation-manager.ts`), which renders the question + * on the user's active channel and waits for an answer. + * + * Modeled on Claude Code's own `AskUserQuestion` built-in but routed + * through Nomos's channel layer so the same agent can ask via Slack + * buttons, iMessage text, CLI prompt, etc. + */ + +import { tool } from "@anthropic-ai/claude-agent-sdk"; +import { z } from "zod/v4"; +import { createLogger } from "../lib/logger.ts"; + +const log = createLogger("ask-user-tool"); + +const ASK_USER_DESCRIPTION = `Ask the user a multiple-choice question. The user sees the options on whatever channel they're talking to you in (Slack buttons, a numbered prompt elsewhere) and picks one. + +Use this for: +- Quick decisions that meaningfully change what you do next (which approach, which file, which option). +- Confirming destructive or hard-to-reverse actions. +- Resolving ambiguity when the user's intent could go multiple ways. + +Don't use this for: +- Yes/no questions you can ask in plain prose ("Should I proceed?"). +- Open-ended input (free-text responses are better as a normal message). +- Information you can look up yourself. + +The returned value is the label string of whichever option the user picked. If the user cancels or doesn't answer within ~10 minutes, the tool resolves with a "user did not answer" message and you should continue without that input or stop and explain.`; + +export interface AskUserToolOptions { + /** + * Format hint for option preview content. Mirrors the SDK's + * `ToolConfig.askUserQuestion.previewFormat`. + */ + previewFormat?: "markdown" | "html"; +} + +/** + * Build the `ask_user` tool. Keep it as a factory so the host can + * inject preview-format hints, etc. Returns the value of `tool(...)` + * directly so the generic input/arg types stay inferred — the SDK + * accepts heterogeneous `SdkMcpToolDefinition<...>` values in its + * tools array. + */ +export function createAskUserTool(_options: AskUserToolOptions = {}) { + return tool( + "ask_user", + ASK_USER_DESCRIPTION, + { + question: z + .string() + .min(3) + .describe( + "The complete question shown to the user. Clear, specific, ends with a question mark. Example: 'Which calendar should I add this meeting to?'", + ), + options: z + .array( + z.object({ + label: z + .string() + .min(1) + .max(75) + .describe( + "Short button label (1-5 words). Shown on Slack buttons and as the numbered choice elsewhere.", + ), + description: z + .string() + .optional() + .describe("One-sentence elaboration. Optional but helpful when options are close."), + }), + ) + .min(2) + .max(4) + .describe( + "Mutually exclusive choices. 2-4 options. Don't include an 'Other' option — the user can always reply with free text outside this tool if they want.", + ), + header: z + .string() + .max(12) + .optional() + .describe("Very short tag (max 12 chars) summarizing the question. Example: 'Approach'."), + }, + async (args, extra) => { + // Convert our friendly options into an MCP elicitation form schema. + // The schema is a single string property whose `oneOf` carries the + // label/description pairs. The host (elicitation-manager.ts) + // recognises this shape and renders options accordingly. + const oneOf = args.options.map((opt) => ({ + const: opt.label, + title: opt.label, + })); + + const message = args.header ? `[${args.header}] ${args.question}` : args.question; + + // Construct the elicitation/create params inline. Shape matches the + // MCP spec's `ElicitRequestFormParamsSchema` (one string property + // with a `oneOf` enum for the answer); we don't import the zod + // schema directly because @modelcontextprotocol/sdk is only a + // transitive dep of our project. + const requestPayload = { + mode: "form" as const, + message, + requestedSchema: { + type: "object" as const, + properties: { + answer: { + type: "string" as const, + title: "Answer", + description: "User's selected option.", + oneOf, + }, + }, + required: ["answer"], + }, + }; + + // `extra.sendRequest` is the MCP request-handler context's hook. + // The Claude Agent SDK intercepts `elicitation/create` and routes + // it to the host's `onElicitation` callback. + try { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const ext = extra as { + sendRequest?: ( + req: { method: string; params: unknown }, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + resultSchema: any, + ) => Promise<{ action: string; content?: Record }>; + }; + if (!ext?.sendRequest) { + return { + content: [ + { + type: "text", + text: "ask_user: this MCP server is not connected to a host that supports elicitation; ask the user in plain prose instead.", + }, + ], + isError: true, + }; + } + + // Build a minimal zod result schema rather than importing the MCP + // SDK's `ElicitResultSchema` directly (MCP SDK is only a + // transitive dep here). The shape we accept is the same one the + // ElicitationManager produces: `{action, content?}`. + const resultSchema = z.object({ + action: z.enum(["accept", "decline", "cancel"]), + content: z.record(z.string(), z.unknown()).optional(), + }); + + const result = await ext.sendRequest( + { method: "elicitation/create", params: requestPayload }, + resultSchema, + ); + + if (result.action === "accept") { + const answer = (result.content?.answer as string) ?? ""; + if (!answer) { + return { + content: [ + { + type: "text", + text: "ask_user: user accepted but no answer was returned.", + }, + ], + isError: true, + }; + } + return { + content: [{ type: "text", text: `User chose: ${answer}` }], + }; + } + + if (result.action === "decline") { + return { + content: [ + { + type: "text", + text: "User declined to answer (or the question timed out). Proceed without this input or ask in plain prose instead.", + }, + ], + }; + } + + // "cancel" + return { + content: [ + { + type: "text", + text: "User cancelled the question. Don't retry the same question; ask differently or move on.", + }, + ], + }; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + log.error({ err: message }, "ask_user elicitation failed"); + return { + content: [ + { + type: "text", + text: `ask_user failed: ${message}. Continue without user input or ask in plain prose instead.`, + }, + ], + isError: true, + }; + } + }, + ); +} diff --git a/src/sdk/session.ts b/src/sdk/session.ts index 320c67a..5d576bd 100644 --- a/src/sdk/session.ts +++ b/src/sdk/session.ts @@ -6,9 +6,17 @@ import { type SDKMessage, type SDKResultMessage, type SdkPluginConfig, + type OnElicitation, } from "@anthropic-ai/claude-agent-sdk"; -export type { Query, SDKMessage, SDKResultMessage, McpServerConfig, SdkPluginConfig }; +export type { + Query, + SDKMessage, + SDKResultMessage, + McpServerConfig, + SdkPluginConfig, + OnElicitation, +}; export interface RunSessionParams { /** The user prompt to send */ @@ -55,6 +63,14 @@ export interface RunSessionParams { plugins?: SdkPluginConfig[]; /** Use Claude subscription (Max/Pro) instead of API key */ useSubscription?: boolean; + /** + * Callback for MCP elicitation requests (e.g. our `ask_user` tool). + * The SDK calls this when an in-process MCP server invokes + * `extra.sendRequest({method: "elicitation/create", ...})`. Return an + * accept/decline/cancel response. If omitted, all elicitations are + * automatically declined. + */ + onElicitation?: OnElicitation; } /** @@ -122,6 +138,7 @@ export function runSession(params: RunSessionParams): Query { plugins: params.plugins, env, ...(params.cwd ? { cwd: params.cwd } : {}), + ...(params.onElicitation ? { onElicitation: params.onElicitation } : {}), }, }); } diff --git a/src/sdk/tools.ts b/src/sdk/tools.ts index b15d94c..916a693 100644 --- a/src/sdk/tools.ts +++ b/src/sdk/tools.ts @@ -6,6 +6,7 @@ import { import { z } from "zod/v4"; import { handleBootstrapComplete } from "../ui/bootstrap.ts"; import { createLogger } from "../lib/logger.ts"; +import { createAskUserTool } from "./ask-user.ts"; const log = createLogger("sdk-tools"); import { @@ -1305,6 +1306,10 @@ export function createMemoryMcpServer(): McpSdkServerConfigWithInstance { // ── Plan Mode Tool ── + // Multi-choice user prompt that routes through MCP elicitation. The + // actual rendering happens host-side in `src/daemon/elicitation-manager.ts`. + const askUserTool = createAskUserTool(); + const proposePlanTool = tool( "propose_plan", "Propose an implementation plan for the user to review before execution. Use this for complex, multi-step tasks where you want to align with the user before making changes. The plan is stored and the user can approve, modify, or reject it. Only propose plans for significant changes — don't use this for simple tasks.", @@ -1955,6 +1960,8 @@ export function createMemoryMcpServer(): McpSdkServerConfigWithInstance { checkWorkerMessagesTool, // Plan mode proposePlanTool, + // Multi-choice user prompt via MCP elicitation + askUserTool, // LSP code intelligence lspGoToDefinitionTool, lspFindReferencesTool,