diff --git a/internal/service/server/adapter_dispatch.go b/internal/service/server/adapter_dispatch.go index 6031fa9c..facedb27 100644 --- a/internal/service/server/adapter_dispatch.go +++ b/internal/service/server/adapter_dispatch.go @@ -170,6 +170,12 @@ func (s *Server) handleWithAdapters( // the upstream provider receives the correct model identifier. coreReq.Model = preferred.UpstreamModel + if maxOut := s.routeMaxOutputTokens(openAIReq.Model, preferred); maxOut > 0 { + if coreReq.MaxTokens <= 0 || coreReq.MaxTokens > maxOut { + coreReq.MaxTokens = maxOut + } + } + wsMode := resolvedWebSearchMode(pm, openAIReq.Model, preferred) // Inject web search tools at Core level if mode is "injected". diff --git a/internal/service/server/adapter_dispatch_test.go b/internal/service/server/adapter_dispatch_test.go index 32c6ce52..cfc61f41 100644 --- a/internal/service/server/adapter_dispatch_test.go +++ b/internal/service/server/adapter_dispatch_test.go @@ -265,3 +265,75 @@ func TestInjectCoreWebSearchSkipsWhenCandidateHasNativeSearch(t *testing.T) { t.Fatalf("len(coreReq.Tools) = %d, want 0", len(coreReq.Tools)) } } + +func TestRouteMaxOutputTokensPrefersRouteEntry(t *testing.T) { + rt := runtime.NewRuntime(config.Config{ + Routes: map[string]config.RouteEntry{ + "claude-haiku-4-5": { + Provider: "newapi", + Model: "claude-haiku-4-5", + MaxOutputTokens: 64000, + }, + }, + ProviderDefs: map[string]config.ProviderDef{ + "newapi": { + Models: map[string]config.ModelMeta{ + "claude-haiku-4-5": {MaxOutputTokens: 200000}, + }, + }, + }, + }, nil, nil) + srv := &Server{runtime: rt} + + got := srv.routeMaxOutputTokens("claude-haiku-4-5", provider.ProviderCandidate{ + ProviderKey: "newapi", + UpstreamModel: "claude-haiku-4-5", + }) + if got != 64000 { + t.Fatalf("routeMaxOutputTokens() = %d, want 64000", got) + } +} + +func TestRouteMaxOutputTokensFallsBackToProviderModelMeta(t *testing.T) { + rt := runtime.NewRuntime(config.Config{ + Routes: map[string]config.RouteEntry{ + "qwen3.6-plus": {Provider: "newapi", Model: "qwen3.6-plus"}, + }, + ProviderDefs: map[string]config.ProviderDef{ + "newapi": { + Models: map[string]config.ModelMeta{ + "qwen3.6-plus": {MaxOutputTokens: 65536}, + }, + }, + }, + }, nil, nil) + srv := &Server{runtime: rt} + + got := srv.routeMaxOutputTokens("qwen3.6-plus", provider.ProviderCandidate{ + ProviderKey: "newapi", + UpstreamModel: "qwen3.6-plus", + }) + if got != 65536 { + t.Fatalf("routeMaxOutputTokens() = %d, want 65536", got) + } +} + +func TestRouteMaxOutputTokensReturnsZeroWhenUnset(t *testing.T) { + rt := runtime.NewRuntime(config.Config{ + Routes: map[string]config.RouteEntry{ + "unbounded": {Provider: "newapi", Model: "unbounded"}, + }, + ProviderDefs: map[string]config.ProviderDef{ + "newapi": {Models: map[string]config.ModelMeta{}}, + }, + }, nil, nil) + srv := &Server{runtime: rt} + + got := srv.routeMaxOutputTokens("unbounded", provider.ProviderCandidate{ + ProviderKey: "newapi", + UpstreamModel: "unbounded", + }) + if got != 0 { + t.Fatalf("routeMaxOutputTokens() = %d, want 0", got) + } +} diff --git a/internal/service/server/server.go b/internal/service/server/server.go index 4c792212..c8e98342 100644 --- a/internal/service/server/server.go +++ b/internal/service/server/server.go @@ -96,6 +96,26 @@ func (s *Server) activeProviderDefs() map[string]config.ProviderDef { return nil } +// routeMaxOutputTokens resolves the effective per-route max_output_tokens cap +// for the given inbound model alias, falling back to the upstream provider +// catalog metadata when the route does not declare its own value. +// Returns 0 when no cap is configured (caller should leave defaults alone). +func (s *Server) routeMaxOutputTokens(modelAlias string, preferred provider.ProviderCandidate) int { + snap := s.runtimeSnapshot() + if snap == nil { + return 0 + } + if entry, ok := snap.Config.Routes[modelAlias]; ok && entry.MaxOutputTokens > 0 { + return entry.MaxOutputTokens + } + if def, ok := snap.Config.ProviderDefs[preferred.ProviderKey]; ok { + if meta, ok := def.Models[preferred.UpstreamModel]; ok && meta.MaxOutputTokens > 0 { + return meta.MaxOutputTokens + } + } + return 0 +} + func (s *Server) activeChatClient(providerKey string) any { if snap := s.runtimeSnapshot(); snap != nil { if def, ok := snap.Config.ProviderDefs[providerKey]; ok && def.Protocol == config.ProtocolOpenAIChat {