Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/core/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@stello-ai/core",
"version": "0.10.1",
"version": "0.10.2",
"description": "The first open-source conversation topology engine",
"license": "Apache-2.0",
"author": "Stello Contributors",
Expand Down
2 changes: 1 addition & 1 deletion packages/session/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@stello-ai/session",
"version": "0.8.0",
"version": "0.8.1",
"description": "Session layer for Stello — conversation topology engine",
"license": "Apache-2.0",
"author": "Stello Contributors",
Expand Down
33 changes: 33 additions & 0 deletions packages/session/src/__tests__/openai-compatible.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,39 @@ describe('createOpenAICompatibleAdapter', () => {
expect(chunks.map((chunk) => chunk.delta).join('')).toBe('上海中心大厦')
})

it('stream() 请求 provider usage 并透传 usage-only chunk', async () => {
createCompletion.mockResolvedValueOnce((async function* () {
yield { choices: [{ delta: { content: 'ok' } }] }
yield { choices: [], usage: { prompt_tokens: 10, completion_tokens: 2 } }
})())

const adapter = createOpenAICompatibleAdapter({
apiKey: 'test-key',
baseURL: 'https://api.example.com/v1',
model: 'test-model',
maxContextTokens: 128_000,
})

if (!adapter.stream) throw new Error('adapter.stream is required')

const chunks = []
for await (const chunk of adapter.stream([{ role: 'user', content: 'hello' }])) {
chunks.push(chunk)
}

expect(createCompletion).toHaveBeenCalledWith(
expect.objectContaining({
stream: true,
stream_options: { include_usage: true },
}),
undefined,
)
expect(chunks).toEqual([
{ delta: 'ok' },
{ delta: '', usage: { promptTokens: 10, completionTokens: 2 } },
])
})

it('StepFun 3.7 多模态能力不绑定固定 baseURL', async () => {
const adapter = createOpenAICompatibleAdapter({
apiKey: 'test-key',
Expand Down
27 changes: 27 additions & 0 deletions packages/session/src/__tests__/turn.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,33 @@ describe('send() 契约', () => {
expect(messages).toHaveLength(2)
expect(messages[1]!.content).toBe('你好,世界')
})

it('stream() 汇总 adapter chunk usage 并透传到最终结果', async () => {
const { session } = await makeSession({
llm: {
maxContextTokens: 1_000_000,
async complete() {
return { content: 'unused' }
},
async *stream() {
yield { delta: '你', usage: { promptTokens: 11, completionTokens: 0 } }
yield { delta: '好' }
yield { delta: '', usage: { promptTokens: 11, completionTokens: 2 } }
},
},
})

const stream = session.stream('hello')
const chunks: string[] = []
for await (const chunk of stream) {
chunks.push(chunk)
}
const result = await stream.result

expect(chunks).toEqual(['你', '好'])
expect(result.content).toBe('你好')
expect(result.usage).toEqual({ promptTokens: 11, completionTokens: 2 })
})
})

describe('Session.setTools (per-session tool list mutation)', () => {
Expand Down
21 changes: 21 additions & 0 deletions packages/session/src/adapters/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import type {
LLMAdapter,
LLMResult,
LLMChunk,
LLMUsage,
Message,
ToolCall,
LLMCompleteOptions,
Expand All @@ -27,6 +28,11 @@ type AnthropicProviderBlock = {
content?: unknown
} & Record<string, unknown>

type AnthropicStreamUsage = {
input_tokens?: number | null
output_tokens?: number | null
}

/** Anthropic 原生协议的配置选项 */
export interface AnthropicAdapterOptions {
apiKey: string
Expand Down Expand Up @@ -115,6 +121,14 @@ function toAnthropicMessages(messages: Message[]): MessageParam[] {
return result
}

function mergeAnthropicUsage(current: LLMUsage | undefined, usage: AnthropicStreamUsage | undefined): LLMUsage | undefined {
if (!usage) return current
return {
promptTokens: usage.input_tokens ?? current?.promptTokens ?? 0,
completionTokens: usage.output_tokens ?? current?.completionTokens ?? 0,
}
}

/** 将 Stello tools schema 转换为 Anthropic Tool 格式 */
function toAnthropicTools(
tools: NonNullable<LLMCompleteOptions['tools']>,
Expand Down Expand Up @@ -252,6 +266,7 @@ export function createAnthropicAdapter(options: AnthropicAdapterOptions): LLMAda
completeOptions?.signal ? { signal: completeOptions.signal } : undefined,
)

let usage: LLMUsage | undefined
for await (const event of stream) {
if (event.type === 'content_block_start') {
// tool_use 块的 id 和 name 只在 start 事件里下发,
Expand Down Expand Up @@ -285,6 +300,12 @@ export function createAnthropicAdapter(options: AnthropicAdapterOptions): LLMAda
}],
}
}
} else if (event.type === 'message_start') {
usage = mergeAnthropicUsage(usage, event.message.usage)
if (usage) yield { delta: '', usage }
} else if (event.type === 'message_delta') {
usage = mergeAnthropicUsage(usage, event.usage)
if (usage) yield { delta: '', usage }
}
}
},
Expand Down
21 changes: 14 additions & 7 deletions packages/session/src/adapters/openai-compatible.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,15 @@ function isProviderToolCall(call: RawOpenAIToolCall): boolean {
return typeof call.type === 'string' && call.type !== 'function'
}

function toProviderUsage(usage: ChatCompletion['usage'] | ChatCompletionChunk['usage'] | undefined) {
return usage
? {
promptTokens: usage.prompt_tokens,
completionTokens: usage.completion_tokens,
}
: undefined
}

function toProviderToolEvent(call: RawOpenAIToolCall): ProviderToolEvent {
const event: ProviderToolEvent = {
...(call.id ? { id: call.id } : {}),
Expand Down Expand Up @@ -268,12 +277,7 @@ export function createOpenAICompatibleAdapter(options: OpenAICompatibleOptions):
}]
}),
...(providerToolEvents.length > 0 ? { providerToolEvents } : {}),
usage: response.usage
? {
promptTokens: response.usage.prompt_tokens,
completionTokens: response.usage.completion_tokens,
}
: undefined,
usage: toProviderUsage(response.usage),
}
},
async *stream(messages: Message[], completeOptions?: LLMCompleteOptions) {
Expand All @@ -282,6 +286,7 @@ export function createOpenAICompatibleAdapter(options: OpenAICompatibleOptions):
...(await buildParams(messages, completeOptions)),
...(options.extraBody ?? {}),
stream: true,
stream_options: { include_usage: true },
} as Parameters<typeof client.chat.completions.create>[0],
completeOptions?.signal ? { signal: completeOptions.signal } : undefined,
) as Stream<ChatCompletionChunk>
Expand All @@ -297,18 +302,20 @@ export function createOpenAICompatibleAdapter(options: OpenAICompatibleOptions):
const providerToolEvents = rawToolCalls
.filter(isProviderToolCall)
.map(toProviderToolEvent)
const usage = toProviderUsage(chunk.usage)
const toolCallDeltas = rawToolCalls.filter((call) => !isProviderToolCall(call)).map((call) => ({
index: call.index ?? 0,
id: call.id,
name: call.function?.name,
input: call.function?.arguments,
}))
if (delta || reasoningDelta || toolCallDeltas.length > 0 || providerToolEvents.length > 0) {
if (delta || reasoningDelta || toolCallDeltas.length > 0 || providerToolEvents.length > 0 || usage) {
yield {
delta,
...(reasoningDelta ? { reasoningDelta } : {}),
...(toolCallDeltas.length > 0 ? { toolCallDeltas } : {}),
...(providerToolEvents.length > 0 ? { providerToolEvents } : {}),
...(usage ? { usage } : {}),
}
}
}
Expand Down
13 changes: 12 additions & 1 deletion packages/session/src/create-session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { randomUUID } from 'node:crypto'
import type { Session, MessageQueryOptions, SessionInput, SessionSendOptions } from './types/session-api.js'
import { SessionArchivedError } from './types/session-api.js'
import type { SessionMeta, SessionMetaUpdate, ForkOptions } from './types/session.js'
import type { Message } from './types/llm.js'
import type { LLMUsage, Message } from './types/llm.js'
import type { CreateSessionOptions, LoadSessionOptions, SendResult, StreamResult } from './types/functions.js'
import { assembleSessionContext, buildSessionIdentityMessages, createBuiltinCompressFn, flushCompressionCache, hydrateCompressionCache, removeIncompleteToolCallGroups, type CompressionCache } from './context-utils.js'

Expand Down Expand Up @@ -76,6 +76,14 @@ function stripMultimodalParts(records: Message[]): Message[] {
})
}

function mergeUsage(current: LLMUsage | undefined, next: LLMUsage | undefined): LLMUsage | undefined {
if (!next) return current
return {
promptTokens: next.promptTokens ?? current?.promptTokens ?? 0,
completionTokens: next.completionTokens ?? current?.completionTokens ?? 0,
}
}

/** 为 toolResults continuation 组装固定上下文与历史。 */
async function assembleSessionReplayContext(
sessionId: string,
Expand Down Expand Up @@ -369,12 +377,14 @@ function buildSession(
if (options.llm.stream) {
let accumulated = ''
let accumulatedReasoning = ''
let usage: LLMUsage | undefined
const toolCallsByIndex = new Map<number, { id?: string; name?: string; input: string }>()
// adapter 在 abort 时抛 AbortError,这里直接向上传播给 result promise;
// 下方 L3 写入分支不会执行(policy: drop entirely),与非流式 send() 对称。
for await (const chunk of options.llm.stream(promptMessages, { tools, signal: sendOptions?.signal })) {
accumulated += chunk.delta
if (chunk.reasoningDelta) accumulatedReasoning += chunk.reasoningDelta
usage = mergeUsage(usage, chunk.usage)
push(chunk.delta)
for (const delta of chunk.toolCallDeltas ?? []) {
const current = toolCallsByIndex.get(delta.index) ?? { input: '' }
Expand All @@ -393,6 +403,7 @@ function buildSession(
content: accumulated,
...(accumulatedReasoning ? { reasoningContent: accumulatedReasoning } : {}),
toolCalls,
...(usage ? { usage } : {}),
}
} else {
result = await options.llm.complete(promptMessages, { tools, signal: sendOptions?.signal })
Expand Down
7 changes: 7 additions & 0 deletions packages/session/src/types/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,11 @@ export interface LLMResult {
}
}

export interface LLMUsage {
promptTokens: number
completionTokens: number
}

/** 流式输出的单个 chunk */
export interface LLMChunk {
/** 文本增量片段 */
Expand All @@ -154,6 +159,8 @@ export interface LLMChunk {
}>
/** Provider 内置 tool 事件 / 结果,不进入客户端 tool loop。 */
providerToolEvents?: ProviderToolEvent[]
/** token 用量统计;通常由 provider 在流结束前后的 usage-only chunk 返回。 */
usage?: LLMUsage
}

/**
Expand Down