From 0f71a688a9cce47ef44b8a46f3496604fcb5d4ff Mon Sep 17 00:00:00 2001 From: thezzisu Date: Sun, 24 May 2026 22:45:07 +0800 Subject: [PATCH] feat(server): clamp coreReq.MaxTokens to per-route max_output_tokens When the inbound OpenAI Responses request omits max_output_tokens (or sets it above the upstream limit), moonbridge previously injected the global defaults.max_tokens unchanged. Anthropic / Qwen / Gemini upstreams reject oversized values with 400. Resolve a per-alias cap from config.Routes[].MaxOutputTokens, with fallback to provider catalog ModelMeta.MaxOutputTokens, and clamp coreReq.MaxTokens before the protocol adapter serializes the upstream request. Adds three unit tests covering route, fallback, and unset paths. --- internal/service/server/adapter_dispatch.go | 6 ++ .../service/server/adapter_dispatch_test.go | 72 +++++++++++++++++++ internal/service/server/server.go | 20 ++++++ 3 files changed, 98 insertions(+) diff --git a/internal/service/server/adapter_dispatch.go b/internal/service/server/adapter_dispatch.go index 6031fa9c..facedb27 100644 --- a/internal/service/server/adapter_dispatch.go +++ b/internal/service/server/adapter_dispatch.go @@ -170,6 +170,12 @@ func (s *Server) handleWithAdapters( // the upstream provider receives the correct model identifier. coreReq.Model = preferred.UpstreamModel + if maxOut := s.routeMaxOutputTokens(openAIReq.Model, preferred); maxOut > 0 { + if coreReq.MaxTokens <= 0 || coreReq.MaxTokens > maxOut { + coreReq.MaxTokens = maxOut + } + } + wsMode := resolvedWebSearchMode(pm, openAIReq.Model, preferred) // Inject web search tools at Core level if mode is "injected". diff --git a/internal/service/server/adapter_dispatch_test.go b/internal/service/server/adapter_dispatch_test.go index 32c6ce52..cfc61f41 100644 --- a/internal/service/server/adapter_dispatch_test.go +++ b/internal/service/server/adapter_dispatch_test.go @@ -265,3 +265,75 @@ func TestInjectCoreWebSearchSkipsWhenCandidateHasNativeSearch(t *testing.T) { t.Fatalf("len(coreReq.Tools) = %d, want 0", len(coreReq.Tools)) } } + +func TestRouteMaxOutputTokensPrefersRouteEntry(t *testing.T) { + rt := runtime.NewRuntime(config.Config{ + Routes: map[string]config.RouteEntry{ + "claude-haiku-4-5": { + Provider: "newapi", + Model: "claude-haiku-4-5", + MaxOutputTokens: 64000, + }, + }, + ProviderDefs: map[string]config.ProviderDef{ + "newapi": { + Models: map[string]config.ModelMeta{ + "claude-haiku-4-5": {MaxOutputTokens: 200000}, + }, + }, + }, + }, nil, nil) + srv := &Server{runtime: rt} + + got := srv.routeMaxOutputTokens("claude-haiku-4-5", provider.ProviderCandidate{ + ProviderKey: "newapi", + UpstreamModel: "claude-haiku-4-5", + }) + if got != 64000 { + t.Fatalf("routeMaxOutputTokens() = %d, want 64000", got) + } +} + +func TestRouteMaxOutputTokensFallsBackToProviderModelMeta(t *testing.T) { + rt := runtime.NewRuntime(config.Config{ + Routes: map[string]config.RouteEntry{ + "qwen3.6-plus": {Provider: "newapi", Model: "qwen3.6-plus"}, + }, + ProviderDefs: map[string]config.ProviderDef{ + "newapi": { + Models: map[string]config.ModelMeta{ + "qwen3.6-plus": {MaxOutputTokens: 65536}, + }, + }, + }, + }, nil, nil) + srv := &Server{runtime: rt} + + got := srv.routeMaxOutputTokens("qwen3.6-plus", provider.ProviderCandidate{ + ProviderKey: "newapi", + UpstreamModel: "qwen3.6-plus", + }) + if got != 65536 { + t.Fatalf("routeMaxOutputTokens() = %d, want 65536", got) + } +} + +func TestRouteMaxOutputTokensReturnsZeroWhenUnset(t *testing.T) { + rt := runtime.NewRuntime(config.Config{ + Routes: map[string]config.RouteEntry{ + "unbounded": {Provider: "newapi", Model: "unbounded"}, + }, + ProviderDefs: map[string]config.ProviderDef{ + "newapi": {Models: map[string]config.ModelMeta{}}, + }, + }, nil, nil) + srv := &Server{runtime: rt} + + got := srv.routeMaxOutputTokens("unbounded", provider.ProviderCandidate{ + ProviderKey: "newapi", + UpstreamModel: "unbounded", + }) + if got != 0 { + t.Fatalf("routeMaxOutputTokens() = %d, want 0", got) + } +} diff --git a/internal/service/server/server.go b/internal/service/server/server.go index 4c792212..c8e98342 100644 --- a/internal/service/server/server.go +++ b/internal/service/server/server.go @@ -96,6 +96,26 @@ func (s *Server) activeProviderDefs() map[string]config.ProviderDef { return nil } +// routeMaxOutputTokens resolves the effective per-route max_output_tokens cap +// for the given inbound model alias, falling back to the upstream provider +// catalog metadata when the route does not declare its own value. +// Returns 0 when no cap is configured (caller should leave defaults alone). +func (s *Server) routeMaxOutputTokens(modelAlias string, preferred provider.ProviderCandidate) int { + snap := s.runtimeSnapshot() + if snap == nil { + return 0 + } + if entry, ok := snap.Config.Routes[modelAlias]; ok && entry.MaxOutputTokens > 0 { + return entry.MaxOutputTokens + } + if def, ok := snap.Config.ProviderDefs[preferred.ProviderKey]; ok { + if meta, ok := def.Models[preferred.UpstreamModel]; ok && meta.MaxOutputTokens > 0 { + return meta.MaxOutputTokens + } + } + return 0 +} + func (s *Server) activeChatClient(providerKey string) any { if snap := s.runtimeSnapshot(); snap != nil { if def, ok := snap.Config.ProviderDefs[providerKey]; ok && def.Protocol == config.ProtocolOpenAIChat {