diff --git a/README.md b/README.md index 8ae4996..10f6c39 100644 --- a/README.md +++ b/README.md @@ -122,22 +122,68 @@ curl http://localhost:3000/v1/chat/completions \ - `max_completion_tokens` - `stop` - `user` +- `tools` +- `tool_choice` 지원 메시지 형태: -- role: `system | user | assistant` +- role: `system | user | assistant | tool` - content: 문자열 또는 `[{ "type": "text", "text": "..." }]` 형태의 text part array 지원 +- `role: tool` 메시지는 `tool_call_id` + 문자열 content만 지원 현재 제외: -- `tools`, `tool_choice` - image/audio/file이 포함된 multimodal content array - audio -- function calling - structured outputs 제외한 필드는 `400 invalid_request_error`로 거절합니다. +## Tool Calling 지원 방식 + +- provider가 네이티브 tool calling을 지원하면 그대로 provider로 전달합니다. +- provider가 네이티브 tool calling을 지원하지 않는데 `tools`가 들어오면, 프록시가 **에뮬레이션 모드**로 동작합니다. + - 시스템 프롬프트에 tool schema와 호출/결과 처리 플로우를 주입합니다. + - provider가 도구 호출이 필요하다고 판단하면 응답을 `[tool-call]`로 시작하고 뒤에 JSON 본문을 붙입니다. 프록시는 이를 OpenAI 형식(`tool_calls`)으로 변환합니다. + - 도구 호출이 필요하지 않다면 provider는 일반 텍스트를 그대로 출력할 수 있습니다. + - `[tool-result]`를 받은 뒤에는 바로 최종 답변을 만들 수도 있고, 필요하면 추가 도구를 연속 호출할 수도 있습니다. + - 클라이언트가 보낸 `role: tool` 메시지는 provider 입력 시 `role: user` 텍스트로 변환해 전달합니다. + - 클라이언트가 기존 `role: system` 메시지를 보낸 경우, 하나의 system 메시지로 병합됩니다. 기존 시스템 프롬프트를 먼저 유지하고, 뒤에 `[Additional system instructions]`를 붙입니다. 구분자는 `---`를 사용합니다. + - stream 모드에서는 응답 토큰을 짧은 윈도우로 버퍼링해 tool-call 프로토콜 여부를 판정한 뒤, tool 호출이면 구조화 chunk로 변환하고 아니면 원본 스트림을 그대로 흘려보냅니다. + +### 에뮬레이션 시 Provider 입력 컨텍스트 예시 + +요청(요약): +- `tools`: `lookup_weather`, `lookup_air_quality` +- 대화: `서울 날씨 알려줘` → `lookup_weather` 호출 → tool 결과 수신 + +Provider로 전달되는 메시지(개념 예시): + +```json +[ + { + "role": "system", + "content": "You can either answer normally in plain text, or request a tool call using JSON.\n...\ntool_choice:\n\"auto\"\ntools:\n[\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"lookup_weather\",\n \"description\": \"Lookup weather\"\n }\n },\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"lookup_air_quality\",\n \"description\": \"Lookup AQI\"\n }\n }\n]" + }, + { + "role": "user", + "content": "서울 날씨 알려줘" + }, + { + "role": "assistant", + "content": "[tool-call]\n[\n {\n \"tool_call_id\": \"call_1\",\n \"name\": \"lookup_weather\",\n \"arguments\": {\n \"city\": \"서울\"\n }\n }\n]" + }, + { + "role": "user", + "content": "[tool-result]\n{\n \"tool_call_id\": \"call_1\",\n \"content\": \"{\\\"temp_c\\\":21,\\\"condition\\\":\\\"sunny\\\"}\"\n}" + } +] +``` + +이 상태에서 모델은: +- plain text로 바로 답변하거나, +- 추가 정보가 필요하면 다시 `[tool-call]` + JSON 본문을 출력할 수 있습니다. + ## 구조 ```text @@ -175,6 +221,7 @@ import type { ProviderAdapter } from './types.js'; const myProvider: ProviderAdapter = { name: 'acme', modelPrefix: 'acme', + supportsNativeToolCalling: true, async listModels(context) { const auth = context.headers.authorization; diff --git a/src/openai/schemas.ts b/src/openai/schemas.ts index 811657e..68909c1 100644 --- a/src/openai/schemas.ts +++ b/src/openai/schemas.ts @@ -1,4 +1,4 @@ -export type SupportedMessageRole = 'system' | 'user' | 'assistant'; +export type SupportedMessageRole = 'system' | 'user' | 'assistant' | 'tool'; export type SupportedTextContentPart = { type: 'text'; @@ -8,11 +8,58 @@ export type SupportedTextContentPart = { export type SupportedChatMessage = { role: SupportedMessageRole; content: string | SupportedTextContentPart[]; + tool_call_id?: string; +}; + +export type SupportedToolDefinition = { + type: 'function'; + function: { + name: string; + description?: string; + parameters?: Record; + }; +}; + +export type SupportedToolChoice = + | 'none' + | 'auto' + | 'required' + | { + type: 'function'; + function: { + name: string; + }; + }; + +export type SupportedChatCompletionToolCall = { + id: string; + type: 'function'; + function: { + name: string; + arguments: string; + }; +}; + +export type SupportedAssistantMessage = { + role: 'assistant'; + content: string | SupportedTextContentPart[]; + tool_calls?: SupportedChatCompletionToolCall[]; +}; + +export type SupportedUserSystemMessage = { + role: 'system' | 'user'; + content: string | SupportedTextContentPart[]; +}; + +export type SupportedToolMessage = { + role: 'tool'; + content: string; + tool_call_id: string; }; export type SupportedChatCompletionRequest = { model: string; - messages: SupportedChatMessage[]; + messages: Array; stream?: boolean; temperature?: number; top_p?: number; @@ -20,6 +67,8 @@ export type SupportedChatCompletionRequest = { max_completion_tokens?: number; stop?: string | string[]; user?: string; + tools?: SupportedToolDefinition[]; + tool_choice?: SupportedToolChoice; }; export const chatCompletionsRequestSchema = { @@ -39,33 +88,143 @@ export const chatCompletionsRequestSchema = { minItems: 1, }, user: { type: 'string' }, - messages: { + tools: { type: 'array', minItems: 1, items: { type: 'object', additionalProperties: false, - required: ['role', 'content'], + required: ['type', 'function'], properties: { - role: { - type: 'string', - enum: ['system', 'user', 'assistant'], + type: { enum: ['function'] }, + function: { + type: 'object', + additionalProperties: false, + required: ['name'], + properties: { + name: { type: 'string', minLength: 1 }, + description: { type: 'string' }, + parameters: { type: 'object' }, + }, }, - content: { - type: ['string', 'array'], - minLength: 1, - minItems: 1, - items: { + }, + }, + }, + tool_choice: { + anyOf: [ + { enum: ['none', 'auto', 'required'] }, + { + type: 'object', + additionalProperties: false, + required: ['type', 'function'], + properties: { + type: { enum: ['function'] }, + function: { type: 'object', additionalProperties: false, - required: ['type', 'text'], + required: ['name'], properties: { - type: { enum: ['text'] }, - text: { type: 'string', minLength: 1 }, + name: { type: 'string', minLength: 1 }, }, }, }, }, + ], + }, + messages: { + type: 'array', + minItems: 1, + items: { + anyOf: [ + { + type: 'object', + additionalProperties: false, + required: ['role', 'content'], + properties: { + role: { + enum: ['system', 'user'], + }, + content: { + type: ['string', 'array'], + minLength: 1, + minItems: 1, + items: { + type: 'object', + additionalProperties: false, + required: ['type', 'text'], + properties: { + type: { enum: ['text'] }, + text: { type: 'string', minLength: 1 }, + }, + }, + }, + }, + }, + { + type: 'object', + additionalProperties: false, + required: ['role', 'content'], + properties: { + role: { + enum: ['assistant'], + }, + content: { + type: ['string', 'array'], + minLength: 1, + minItems: 1, + items: { + type: 'object', + additionalProperties: false, + required: ['type', 'text'], + properties: { + type: { enum: ['text'] }, + text: { type: 'string', minLength: 1 }, + }, + }, + }, + tool_calls: { + type: 'array', + minItems: 1, + items: { + type: 'object', + additionalProperties: false, + required: ['id', 'type', 'function'], + properties: { + id: { type: 'string', minLength: 1 }, + type: { enum: ['function'] }, + function: { + type: 'object', + additionalProperties: false, + required: ['name', 'arguments'], + properties: { + name: { type: 'string', minLength: 1 }, + arguments: { type: 'string' }, + }, + }, + }, + }, + }, + }, + }, + { + type: 'object', + additionalProperties: false, + required: ['role', 'content', 'tool_call_id'], + properties: { + role: { + enum: ['tool'], + }, + content: { + type: 'string', + minLength: 1, + }, + tool_call_id: { + type: 'string', + minLength: 1, + }, + }, + }, + ], }, }, }, diff --git a/src/openai/tool-emulation.ts b/src/openai/tool-emulation.ts new file mode 100644 index 0000000..5f293e8 --- /dev/null +++ b/src/openai/tool-emulation.ts @@ -0,0 +1,432 @@ +import OpenAI from 'openai'; +import type { + SupportedAssistantMessage, + SupportedChatCompletionRequest, + SupportedToolMessage, +} from './schemas.js'; + +const TOOL_CALL_MARKER = '[tool-call]'; +const TOOL_RESULT_MARKER = '[tool-result]'; + +export type EmulatedToolCall = { + name: string; + arguments: Record; + tool_call_id?: string; +}; + +function flattenContent(content: string | Array<{ type: 'text'; text: string }>): string { + if (typeof content === 'string') { + return content; + } + + return content.map((part) => part.text).join('\n'); +} + +function formatToolResultAsUserMessage(message: SupportedToolMessage): OpenAI.Chat.ChatCompletionUserMessageParam { + const body = JSON.stringify( + { + tool_call_id: message.tool_call_id, + content: message.content, + }, + null, + 2, + ); + + return { + role: 'user', + content: [TOOL_RESULT_MARKER, body].join('\n'), + }; +} + +function formatAssistantToolCallsAsAssistantMessage( + message: SupportedAssistantMessage, +): OpenAI.Chat.ChatCompletionAssistantMessageParam | null { + if (!message.tool_calls || message.tool_calls.length === 0) { + return null; + } + + const normalized = message.tool_calls.map((toolCall) => ({ + tool_call_id: toolCall.id, + name: toolCall.function.name, + arguments: parseJsonString(toolCall.function.arguments) ?? toolCall.function.arguments, + })); + const body = JSON.stringify(normalized, null, 2); + + return { + role: 'assistant', + content: [TOOL_CALL_MARKER, body].join('\n'), + }; +} + +function parseJsonString(value: string): unknown | null { + try { + return JSON.parse(value); + } catch { + return null; + } +} + +function buildToolEmulationInstruction(request: SupportedChatCompletionRequest): string { + const tools = request.tools ?? []; + const toolChoice = request.tool_choice ?? 'auto'; + const prettyTools = JSON.stringify(tools, null, 2); + const prettyToolChoice = JSON.stringify(toolChoice, null, 2); + + return [ + 'You can either answer normally in plain text, or request a tool call using JSON.', + `When you need a tool, start your response with "${TOOL_CALL_MARKER}" and then output a JSON body.`, + 'Tool-call JSON body shape:', + '{"tool_call_id":"","name":"","arguments":{...}}', + 'History format is normalized as:', + `${TOOL_CALL_MARKER} + JSON body for assistant tool-call records, and`, + `${TOOL_RESULT_MARKER} + JSON body for user tool-result records.`, + `When you receive a user message prefixed by ${TOOL_RESULT_MARKER}, you may either:`, + '1) call another tool (if more data is needed), or', + '2) answer normally in plain text.', + 'Do not wrap tool_call JSON with markdown fences.', + 'If no tool is needed, return a normal plain-text assistant answer.', + 'tool_choice:', + prettyToolChoice, + 'tools:', + prettyTools, + ].join('\n'); +} + +function createToolCallId(): string { + return `call_emulated_${Math.random().toString(36).slice(2, 12)}`; +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null; +} + +function parseEmulatedToolCallBody(value: unknown): EmulatedToolCall | null { + if (!isRecord(value)) { + return null; + } + + if (typeof value.name !== 'string' || value.name.length === 0 || !isRecord(value.arguments)) { + return null; + } + + const toolCallId = + typeof value.tool_call_id === 'string' && value.tool_call_id.length > 0 ? value.tool_call_id : undefined; + + return { + name: value.name, + arguments: value.arguments, + ...(toolCallId ? { tool_call_id: toolCallId } : {}), + }; +} + +export function parseToolEmulationContent(content: string): EmulatedToolCall | null { + const trimmed = content.trimStart(); + if (!trimmed.startsWith(TOOL_CALL_MARKER)) { + return null; + } + + const body = trimmed.slice(TOOL_CALL_MARKER.length).trimStart(); + + let parsedJson: unknown; + try { + parsedJson = JSON.parse(body); + } catch { + return null; + } + + if (Array.isArray(parsedJson)) { + return parseEmulatedToolCallBody(parsedJson[0]); + } + + return parseEmulatedToolCallBody(parsedJson); +} + +export function shouldEmulateTools( + request: SupportedChatCompletionRequest, + providerSupportsNativeToolCalling: boolean, +): boolean { + return Boolean(request.tools && request.tools.length > 0 && !providerSupportsNativeToolCalling); +} + +export function toProviderMessages( + request: SupportedChatCompletionRequest, + emulateTools: boolean, +): OpenAI.Chat.ChatCompletionMessageParam[] { + const converted = request.messages.flatMap((message): OpenAI.Chat.ChatCompletionMessageParam[] => { + if (!emulateTools) { + if (message.role === 'tool') { + return [ + { + role: 'tool', + content: message.content, + tool_call_id: message.tool_call_id, + }, + ]; + } + + if (message.role === 'assistant') { + return [ + { + role: 'assistant', + content: flattenContent(message.content), + ...(message.tool_calls ? { tool_calls: message.tool_calls } : {}), + }, + ]; + } + + return [ + { + role: message.role, + content: flattenContent(message.content), + }, + ]; + } + + if (message.role === 'tool') { + return [formatToolResultAsUserMessage(message)]; + } + + if (message.role === 'assistant') { + const assistantToolCalls = formatAssistantToolCallsAsAssistantMessage(message); + if (assistantToolCalls) { + return [assistantToolCalls]; + } + + return [ + { + role: 'assistant', + content: flattenContent(message.content), + }, + ]; + } + + return [ + { + role: message.role, + content: flattenContent(message.content), + }, + ]; + }); + + if (!emulateTools) { + return converted; + } + + const systemContents = converted.flatMap((message) => { + if (message.role !== 'system' || typeof message.content !== 'string') { + return []; + } + + return [message.content]; + }); + + const nonSystemMessages = converted.filter((message) => message.role !== 'system'); + const injectedSections = + systemContents.length > 0 + ? [ + '[System prompt]', + systemContents.join('\n\n'), + '---', + '[Additional system instructions]', + buildToolEmulationInstruction(request), + ] + : ['[Additional system instructions]', buildToolEmulationInstruction(request)]; + + return [ + { + role: 'system', + content: injectedSections.join('\n\n'), + }, + ...nonSystemMessages, + ]; +} + +export function applyToolEmulationResponse(completion: OpenAI.Chat.ChatCompletion, emulateTools: boolean): OpenAI.Chat.ChatCompletion { + if (!emulateTools) { + return completion; + } + + const firstChoice = completion.choices[0]; + if (!firstChoice) { + return completion; + } + + const content = firstChoice.message.content; + + if (typeof content !== 'string') { + return completion; + } + + const parsed = parseToolEmulationContent(content); + + if (!parsed) { + return completion; + } + + const cloned = structuredClone(completion); + const clonedFirstChoice = cloned.choices[0]; + if (!clonedFirstChoice) { + return completion; + } + + clonedFirstChoice.message.content = null; + clonedFirstChoice.message.tool_calls = [ + { + id: parsed.tool_call_id ?? createToolCallId(), + type: 'function', + function: { + name: parsed.name, + arguments: JSON.stringify(parsed.arguments), + }, + }, + ]; + clonedFirstChoice.finish_reason = 'tool_calls'; + + return cloned; +} + +function buildToolCallStreamChunk( + template: OpenAI.Chat.ChatCompletionChunk, + name: string, + argumentsJson: string, +): OpenAI.Chat.ChatCompletionChunk { + return { + id: template.id, + object: 'chat.completion.chunk', + created: template.created, + model: template.model, + choices: [ + { + index: 0, + delta: { + role: 'assistant', + tool_calls: [ + { + index: 0, + id: createToolCallId(), + type: 'function', + function: { + name, + arguments: argumentsJson, + }, + }, + ], + }, + finish_reason: null, + }, + ], + }; +} + +function buildFinalContentStreamChunk( + template: OpenAI.Chat.ChatCompletionChunk, + content: string, +): OpenAI.Chat.ChatCompletionChunk { + return { + id: template.id, + object: 'chat.completion.chunk', + created: template.created, + model: template.model, + choices: [ + { + index: 0, + delta: { + role: 'assistant', + content, + }, + finish_reason: null, + }, + ], + }; +} + +function buildStreamFinishChunk( + template: OpenAI.Chat.ChatCompletionChunk, + finishReason: 'stop' | 'tool_calls', +): OpenAI.Chat.ChatCompletionChunk { + return { + id: template.id, + object: 'chat.completion.chunk', + created: template.created, + model: template.model, + choices: [ + { + index: 0, + delta: {}, + finish_reason: finishReason, + }, + ], + ...(template.usage ? { usage: template.usage } : {}), + }; +} + +export async function* transformToolEmulationStream( + stream: AsyncIterable, + detectionWindowChars = 240, +): AsyncIterable { + const buffered: OpenAI.Chat.ChatCompletionChunk[] = []; + let bufferedText = ''; + let passthrough = false; + let lastChunk: OpenAI.Chat.ChatCompletionChunk | null = null; + + for await (const chunk of stream) { + if (passthrough) { + yield chunk; + continue; + } + + lastChunk = chunk; + buffered.push(chunk); + const content = chunk.choices[0]?.delta.content; + if (typeof content === 'string') { + bufferedText += content; + } + + const trimmed = bufferedText.trimStart(); + const looksLikeToolCallEnvelope = trimmed.length === 0 || trimmed.startsWith(TOOL_CALL_MARKER); + const isFinalChunk = chunk.choices[0]?.finish_reason !== null && chunk.choices[0]?.finish_reason !== undefined; + + if ( + !looksLikeToolCallEnvelope || + (trimmed.length >= detectionWindowChars && !trimmed.startsWith(TOOL_CALL_MARKER)) + ) { + passthrough = true; + for (const bufferedChunk of buffered) { + yield bufferedChunk; + } + buffered.length = 0; + continue; + } + + if (!isFinalChunk) { + continue; + } + + const parsed = parseToolEmulationContent(bufferedText); + if (!parsed) { + for (const bufferedChunk of buffered) { + yield bufferedChunk; + } + return; + } + + yield buildToolCallStreamChunk(chunk, parsed.name, JSON.stringify(parsed.arguments)); + yield buildStreamFinishChunk(chunk, 'tool_calls'); + return; + } + + if (passthrough || buffered.length === 0) { + return; + } + + const parsed = parseToolEmulationContent(bufferedText); + if (!parsed || !lastChunk) { + for (const bufferedChunk of buffered) { + yield bufferedChunk; + } + return; + } + + yield buildToolCallStreamChunk(lastChunk, parsed.name, JSON.stringify(parsed.arguments)); + yield buildStreamFinishChunk(lastChunk, 'tool_calls'); +} diff --git a/src/providers/dummy.ts b/src/providers/dummy.ts index 5c3e64c..1b78f6d 100644 --- a/src/providers/dummy.ts +++ b/src/providers/dummy.ts @@ -66,6 +66,37 @@ function buildAssistantContent( throw new OpenAIProxyError(502, 'server_error', 'Dummy provider failed on purpose.', 'dummy_upstream_failed'); } + const hasToolEmulationInstruction = request.messages.some( + (message) => + message.role === 'system' && + typeof message.content === 'string' && + message.content.includes('request a tool call using JSON'), + ); + + if (hasToolEmulationInstruction) { + if (prompt.includes('plain-text-only')) { + return `Plain text answer from dummy emulation mode: ${prompt}`; + } + + if (prompt.includes('[tool-result]')) { + return `Dummy finalized response from tool result.\n${prompt}`; + } + + return [ + '[tool-call]', + JSON.stringify( + { + name: 'lookup_weather', + arguments: { + query: prompt, + }, + }, + null, + 2, + ), + ].join('\n'); + } + if (request.model === 'dummy/story-1') { return [ `Dummy story mode received: "${prompt}".`, @@ -188,6 +219,7 @@ async function* createStream( const dummyProvider: ProviderAdapter = { name: 'dummy', modelPrefix: 'dummy', + supportsNativeToolCalling: false, async listModels(_context): Promise { const created = unixTime(); diff --git a/src/providers/types.ts b/src/providers/types.ts index 21a3752..18263b9 100644 --- a/src/providers/types.ts +++ b/src/providers/types.ts @@ -14,6 +14,7 @@ export type ProviderContext = { export type ProviderAdapter = { name: string; modelPrefix: string; + supportsNativeToolCalling: boolean; listModels(context: ProviderContext): Promise; createChatCompletion( request: OpenAI.Chat.ChatCompletionCreateParams, diff --git a/src/server.ts b/src/server.ts index 32c489c..bc4ff68 100644 --- a/src/server.ts +++ b/src/server.ts @@ -11,6 +11,12 @@ import { chatCompletionsRequestSchema, type SupportedChatCompletionRequest, } from './openai/schemas.js'; +import { + applyToolEmulationResponse, + shouldEmulateTools, + transformToolEmulationStream, + toProviderMessages, +} from './openai/tool-emulation.js'; import { OpenAIProxyError, formatErrorResponse, @@ -27,21 +33,16 @@ type ModelListResponse = { function toOpenAIRequest( request: SupportedChatCompletionRequest, + providerSupportsNativeToolCalling: boolean, ): OpenAI.Chat.ChatCompletionCreateParams { - const messages: OpenAI.Chat.ChatCompletionMessageParam[] = request.messages.map((message) => { - switch (message.role) { - case 'system': - return { role: 'system', content: message.content }; - case 'assistant': - return { role: 'assistant', content: message.content }; - case 'user': - return { role: 'user', content: message.content }; - } - }); + const emulateTools = shouldEmulateTools(request, providerSupportsNativeToolCalling); + const messages = toProviderMessages(request, emulateTools); const base = { model: request.model, messages, + ...(!emulateTools && request.tools ? { tools: request.tools } : {}), + ...(!emulateTools && request.tool_choice ? { tool_choice: request.tool_choice } : {}), }; const withOptionalFields = { @@ -171,23 +172,25 @@ export function buildServer(logger?: FastifyBaseLogger): FastifyInstance { }, }, async (request, reply) => { - const input = toOpenAIRequest(request.body); + const provider = resolveProvider(request.body.model); + const emulateTools = shouldEmulateTools(request.body, provider.supportsNativeToolCalling); + const input = toOpenAIRequest(request.body, provider.supportsNativeToolCalling); const context = createProviderContext({ headers: request.headers as IncomingHttpHeaders, requestId: request.id, signal: createAbortSignal(request, reply), logger: request.log, }); - const provider = resolveProvider(input.model); if (request.body.stream) { const stream = provider.streamChatCompletion(input, context); - await writeChatCompletionStream(reply, stream); + const outputStream = emulateTools ? transformToolEmulationStream(stream) : stream; + await writeChatCompletionStream(reply, outputStream); return reply; } const completion = await provider.createChatCompletion(input, context); - reply.send(completion); + reply.send(applyToolEmulationResponse(completion, emulateTools)); return reply; }, ); diff --git a/test/server.test.ts b/test/server.test.ts index 31f6507..59f23bf 100644 --- a/test/server.test.ts +++ b/test/server.test.ts @@ -182,7 +182,7 @@ test('unsupported non-text message content shape is rejected by request schema', expect(payload.error.type).toBe('invalid_request_error'); }); -test('unsupported top-level field is rejected by request schema', async () => { +test('invalid tools field is rejected by request schema', async () => { const app = createApp(); const response = await app.inject({ @@ -191,7 +191,11 @@ test('unsupported top-level field is rejected by request schema', async () => { payload: { model: 'dummy/echo-1', messages: [{ role: 'user', content: 'hello' }], - tools: [], + tools: [ + { + type: 'function', + }, + ], }, }); @@ -200,6 +204,227 @@ test('unsupported top-level field is rejected by request schema', async () => { expect(payload.error.type).toBe('invalid_request_error'); }); +test('tool emulation returns OpenAI tool_calls for providers without native support', async () => { + const app = createApp(); + + const response = await app.inject({ + method: 'POST', + url: '/v1/chat/completions', + payload: { + model: 'dummy/echo-1', + messages: [{ role: 'user', content: '서울 날씨 알려줘' }], + tools: [ + { + type: 'function', + function: { + name: 'lookup_weather', + description: 'Lookup weather', + parameters: { + type: 'object', + properties: { + query: { type: 'string' }, + }, + required: ['query'], + }, + }, + }, + ], + tool_choice: 'auto', + }, + }); + + expect(response.statusCode).toBe(200); + const payload = response.json(); + expect(payload.choices[0].finish_reason).toBe('tool_calls'); + expect(payload.choices[0].message.content).toBeNull(); + expect(payload.choices[0].message.tool_calls[0].function.name).toBe('lookup_weather'); +}); + +test('tool emulation allows normal plain-text response when no tool call is needed', async () => { + const app = createApp(); + + const response = await app.inject({ + method: 'POST', + url: '/v1/chat/completions', + payload: { + model: 'dummy/echo-1', + messages: [{ role: 'user', content: 'plain-text-only: 그냥 설명해줘' }], + tools: [ + { + type: 'function', + function: { + name: 'lookup_weather', + }, + }, + ], + tool_choice: 'auto', + }, + }); + + expect(response.statusCode).toBe(200); + const payload = response.json(); + expect(payload.choices[0].message.content).toMatch(/Plain text answer from dummy emulation mode/); + expect(payload.choices[0].message.tool_calls).toBeUndefined(); +}); + +test('tool result messages are transformed to user text for emulation providers', async () => { + const app = createApp(); + + const response = await app.inject({ + method: 'POST', + url: '/v1/chat/completions', + payload: { + model: 'dummy/echo-1', + messages: [ + { role: 'user', content: '서울 날씨 알려줘' }, + { + role: 'assistant', + content: '도구 호출 중', + tool_calls: [ + { + id: 'call_1', + type: 'function', + function: { + name: 'lookup_weather', + arguments: '{\"query\":\"서울 날씨\"}', + }, + }, + ], + }, + { + role: 'tool', + tool_call_id: 'call_1', + content: '맑음 20도', + }, + ], + tools: [ + { + type: 'function', + function: { + name: 'lookup_weather', + }, + }, + ], + }, + }); + + expect(response.statusCode).toBe(200); + const payload = response.json(); + expect(payload.choices[0].message.content).toMatch(/Dummy finalized response from tool result/); + expect(payload.choices[0].message.content).toMatch(/\[tool-result\]/); +}); + +test('tool emulation stream buffers and emits tool_calls instead of raw JSON tokens', async () => { + const app = createApp(); + + const response = await app.inject({ + method: 'POST', + url: '/v1/chat/completions', + payload: { + model: 'dummy/echo-1', + stream: true, + messages: [{ role: 'user', content: '서울 날씨 알려줘' }], + tools: [ + { + type: 'function', + function: { + name: 'lookup_weather', + }, + }, + ], + }, + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-type'] ?? '').toMatch(/^text\/event-stream/); + expect(response.body).toMatch(/"tool_calls"/); + expect(response.body).toMatch(/"finish_reason":"tool_calls"/); + expect(response.body).not.toMatch(/"mode":"tool_call"/); + expect(response.body).toMatch(/data: \[DONE\]/); +}); + +test('tool emulation stream with tool-result emits final text instead of protocol JSON', async () => { + const app = createApp(); + + const response = await app.inject({ + method: 'POST', + url: '/v1/chat/completions', + payload: { + model: 'dummy/echo-1', + stream: true, + messages: [ + { role: 'user', content: '서울 날씨 알려줘' }, + { + role: 'assistant', + content: '도구 호출 중', + tool_calls: [ + { + id: 'call_1', + type: 'function', + function: { + name: 'lookup_weather', + arguments: '{\"query\":\"서울 날씨\"}', + }, + }, + ], + }, + { + role: 'tool', + tool_call_id: 'call_1', + content: '맑음 20도', + }, + ], + tools: [ + { + type: 'function', + function: { + name: 'lookup_weather', + }, + }, + ], + }, + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-type'] ?? '').toMatch(/^text\/event-stream/); + expect(response.body).toMatch(/"content":"Dummy"/); + expect(response.body).toMatch(/"content":"finalized"/); + expect(response.body).toMatch(/"content":"tool"/); + expect(response.body).toMatch(/"content":"result\."/); + expect(response.body).not.toMatch(/"mode":"final"/); + expect(response.body).toMatch(/data: \[DONE\]/); +}); + +test('tool emulation stream passes through plain text when response is not tool-call JSON', async () => { + const app = createApp(); + + const response = await app.inject({ + method: 'POST', + url: '/v1/chat/completions', + payload: { + model: 'dummy/echo-1', + stream: true, + messages: [{ role: 'user', content: 'plain-text-only: 일반 텍스트로 답해' }], + tools: [ + { + type: 'function', + function: { + name: 'lookup_weather', + }, + }, + ], + }, + }); + + expect(response.statusCode).toBe(200); + expect(response.headers['content-type'] ?? '').toMatch(/^text\/event-stream/); + expect(response.body).toMatch(/"content":"Plain"/); + expect(response.body).toMatch(/"content":"text"/); + expect(response.body).toMatch(/"content":"answer"/); + expect(response.body).not.toMatch(/"tool_calls"/); + expect(response.body).toMatch(/data: \[DONE\]/); +}); + test('provider errors are mapped to OpenAI-style JSON errors', async () => { const app = createApp(); diff --git a/test/tool-emulation.test.ts b/test/tool-emulation.test.ts new file mode 100644 index 0000000..5bf6a52 --- /dev/null +++ b/test/tool-emulation.test.ts @@ -0,0 +1,53 @@ +import { expect, test } from 'vitest'; +import { toProviderMessages } from '../src/openai/tool-emulation.js'; +import type { SupportedChatCompletionRequest } from '../src/openai/schemas.js'; + +function baseRequest(): SupportedChatCompletionRequest { + return { + model: 'dummy/echo-1', + messages: [ + { role: 'system', content: '너는 친절한 비서야.' }, + { role: 'user', content: '서울 날씨 알려줘' }, + ], + tools: [ + { + type: 'function', + function: { + name: 'lookup_weather', + }, + }, + ], + }; +} + +test('emulation mode merges injected and client system instructions into one system message', () => { + const messages = toProviderMessages(baseRequest(), true); + + const systemMessages = messages.filter((message) => message.role === 'system'); + expect(systemMessages).toHaveLength(1); + expect(systemMessages[0]).toMatchObject({ role: 'system' }); + + if (systemMessages[0]?.role !== 'system') { + throw new Error('expected system message'); + } + + expect(systemMessages[0].content).toContain('[System prompt]'); + expect(systemMessages[0].content).toContain('---'); + expect(systemMessages[0].content).toContain('[Additional system instructions]'); + expect(systemMessages[0].content).toContain('너는 친절한 비서야.'); + expect(systemMessages[0].content.indexOf('[System prompt]')).toBeLessThan( + systemMessages[0].content.indexOf('[Additional system instructions]'), + ); +}); + +test('non-emulation mode keeps original system messages untouched', () => { + const messages = toProviderMessages(baseRequest(), false); + + const systemMessages = messages.filter((message) => message.role === 'system'); + expect(systemMessages).toHaveLength(1); + if (systemMessages[0]?.role !== 'system') { + throw new Error('expected system message'); + } + + expect(systemMessages[0].content).toBe('너는 친절한 비서야.'); +});