From 197745223862ff201633eeaba38e25830ef1c9cb Mon Sep 17 00:00:00 2001 From: Christopher Wong Date: Sun, 24 May 2026 10:48:29 -0400 Subject: [PATCH] feat(gemini-planner): support native Gemini tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Gemini planner currently builds its Tools list exclusively from registered AX subagents (via agentsToTools). This commit lets ax.yaml opt into Gemini's native tool surfaces (`google_search`, `url_context`, `code_execution`, `google_maps`) by listing them in GeminiConfig.Tools []string, which already exists in config.go but was only consumed by the standalone gemini agent type. Three correctness points the implementation gets right (each driven by an empirical Vertex error, with a test): 1. Single-Tool merge. Per Gemini's tool-combination docs, all FunctionDeclarations + the native tool field must live on the SAME *genai.Tool object. agentsToTools naturally produces one Tool per registered agent, so we flatten + merge in process(). 2. Empty-Tool guard. When both registry and config.Tools are empty, don't send Tools: []*genai.Tool{ {} } — Vertex rejects with 400 INVALID_ARGUMENT ("Tool must contain at least one of function_declarations, google_search, url_context, code_execution"). 3. Native-field preservation from agentsToTools' nativeTools variadic. The merge loop copies .GoogleSearch / .URLContext / .CodeExecution / .GoogleMaps from each rawTool, not just .FunctionDeclarations. First-non-nil wins with a stderr warning on collision. Latent today (no in-tree caller exercises the variadic) but the contract is what existing callers documented. Tests: TestProcess_AppendsNativeToolsFromConfig TestProcess_MergesNativeToolsWithFunctionDeclarations TestProcess_NoToolsWhenRegistryAndNativeBothEmpty TestProcess_MergePreservesNativeFieldsFromRawTools Vertex caveat documented in code: IncludeServerSideToolInvocations (added in v1.51 of google.golang.org/genai) is documented and SDK- enforced as Gemini-Developer-API-only. Vertex auto-execution of native tools is gated by Google; our impl works around this with a separate agent-as-tool pattern in downstream consumers, but that's out of scope here. --- internal/gemini/gemini_planner.go | 87 ++++++- internal/gemini/gemini_planner_test.go | 299 +++++++++++++++++++++++++ 2 files changed, 385 insertions(+), 1 deletion(-) diff --git a/internal/gemini/gemini_planner.go b/internal/gemini/gemini_planner.go index 6a4d9a6..cb33dd8 100644 --- a/internal/gemini/gemini_planner.go +++ b/internal/gemini/gemini_planner.go @@ -67,6 +67,13 @@ type geminiPlannerAgent struct { bashTool Tool skillsTool Tool registry AgentRegistry + // nativeTools is an optional list of Gemini-native Tools (e.g. + // google_search, url_context) to plumb through agentsToTools' + // nativeTools variadic. No production caller wires this today — + // natives are configured via GeminiConfig.Tools and attached in + // process() directly — but future callers (and tests) use this to + // exercise the merge loop's preservation of native fields. + nativeTools []Tool } // NewGeminiPlannerAgent creates a new Gemini-based agent. @@ -195,11 +202,89 @@ func (p *geminiPlannerAgent) loop(ctx context.Context, conversationID string, st } func (p *geminiPlannerAgent) process(ctx context.Context, conversationID string, start *proto.AgentStart, e agent.Executor, handler agent.OutputHandler) (agentID string, keepLooping bool, err error) { - tools, err := agentsToTools(p.registry) + rawTools, err := agentsToTools(p.registry, p.nativeTools...) if err != nil { return "", false, fmt.Errorf("failed to convert agents to tools: %w", err) } + // When native Gemini tools (google_search, url_context, …) are + // configured, ALL function declarations + the native tool must live + // on the SAME *genai.Tool object. Splitting across multiple Tool + // entries causes Gemini 3 to emit the native tool's name as a + // regular function call instead of auto-executing it server-side + // (empirically confirmed against gemini-3-flash-preview on Vertex). + // See https://ai.google.dev/gemini-api/docs/tool-combination + // + // Flatten agentsToTools' one-Tool-per-agent output into a single + // Tool, then attach the configured natives. We also preserve any + // native fields (GoogleSearch / URLContext / CodeExecution / + // GoogleMaps) carried on the raw Tools — the nativeTools variadic + // in agentsToTools can produce those, and dropping them would + // silently lose tool capability. First non-nil wins; a warning is + // logged if two rawTools set the same native field. + mergedTool := &genai.Tool{} + for _, t := range rawTools { + if t == nil { + continue + } + mergedTool.FunctionDeclarations = append(mergedTool.FunctionDeclarations, t.FunctionDeclarations...) + if t.GoogleSearch != nil { + if mergedTool.GoogleSearch != nil { + fmt.Fprintf(os.Stderr, "warn: multiple rawTools set GoogleSearch; keeping first\n") + } else { + mergedTool.GoogleSearch = t.GoogleSearch + } + } + if t.URLContext != nil { + if mergedTool.URLContext != nil { + fmt.Fprintf(os.Stderr, "warn: multiple rawTools set URLContext; keeping first\n") + } else { + mergedTool.URLContext = t.URLContext + } + } + if t.CodeExecution != nil { + if mergedTool.CodeExecution != nil { + fmt.Fprintf(os.Stderr, "warn: multiple rawTools set CodeExecution; keeping first\n") + } else { + mergedTool.CodeExecution = t.CodeExecution + } + } + if t.GoogleMaps != nil { + if mergedTool.GoogleMaps != nil { + fmt.Fprintf(os.Stderr, "warn: multiple rawTools set GoogleMaps; keeping first\n") + } else { + mergedTool.GoogleMaps = t.GoogleMaps + } + } + } + for _, t := range p.config.GeminiConfig.Tools { + switch t { + case "google_search": + mergedTool.GoogleSearch = &genai.GoogleSearch{} + case "url_context": + mergedTool.URLContext = &genai.URLContext{} + case "code_execution": + mergedTool.CodeExecution = &genai.ToolCodeExecution{} + case "google_maps": + mergedTool.GoogleMaps = &genai.GoogleMaps{} + default: + return "", false, fmt.Errorf("unsupported native planner tool: %q", t) + } + } + // Vertex rejects a Tool with zero content + // ("400 INVALID_ARGUMENT: Tool must contain at least one of + // function_declarations, google_search, url_context, code_execution"). + // Only include mergedTool when it actually has content; otherwise + // send no tools at all. + var tools []*genai.Tool + if len(mergedTool.FunctionDeclarations) > 0 || + mergedTool.GoogleSearch != nil || + mergedTool.URLContext != nil || + mergedTool.CodeExecution != nil || + mergedTool.GoogleMaps != nil { + tools = []*genai.Tool{mergedTool} + } + inputs := start.Messages if fc, approved := p.handleConfirmationAnswer(inputs); fc != nil { if fc.Name == p.bashTool.Name() { diff --git a/internal/gemini/gemini_planner_test.go b/internal/gemini/gemini_planner_test.go index aa575f2..4c72459 100644 --- a/internal/gemini/gemini_planner_test.go +++ b/internal/gemini/gemini_planner_test.go @@ -466,3 +466,302 @@ func TestNewGeminiPlannerAgent_NoSkillsPrompt(t *testing.T) { t.Errorf("expected system prompt to not contain '', got: %s", prompt) } } + + +// TestProcess_AppendsNativeToolsFromConfig asserts that native Gemini +// tools listed in GeminiConfig.Tools (e.g. "google_search") get added +// to the GenerateContent request alongside the registered AX subagent +// function declarations. This is how the planner gets web-grounding +// without us building a websearch subagent. +func TestProcess_AppendsNativeToolsFromConfig(t *testing.T) { + var captured *genai.GenerateContentConfig + mockGen := &mockContentGenerator{ + generateContentFunc: func(ctx context.Context, model string, contents []*genai.Content, cfg *genai.GenerateContentConfig) (*genai.GenerateContentResponse, error) { + captured = cfg + return &genai.GenerateContentResponse{ + Candidates: []*genai.Candidate{{ + Content: &genai.Content{Parts: []*genai.Part{{Text: "ok"}}}, + }}, + }, nil + }, + } + + registry := &mockAgentRegistry{ + listFunc: func() []string { return nil }, + } + + p := &geminiPlannerAgent{ + client: mockGen, + registry: registry, + config: GeminiPlannerConfig{ + GeminiConfig: &config.GeminiConfig{ + Model: "test-model", + SystemPrompt: "test", + Tools: []string{"google_search"}, + }, + }, + } + + start := &proto.AgentStart{Messages: []*proto.Message{{ + Role: "user", + Content: &proto.Content{ + Type: &proto.Content_Text{Text: &proto.TextContent{Text: "what is the news"}}, + }, + }}} + _, _, err := p.process(context.Background(), "conv-native-tools", start, nil, func(o *proto.AgentOutputs) error { return nil }) + if err != nil { + t.Fatalf("process: %v", err) + } + + if captured == nil { + t.Fatal("GenerateContent was never called") + } + foundGoogleSearch := false + for _, tool := range captured.Tools { + if tool != nil && tool.GoogleSearch != nil { + foundGoogleSearch = true + } + } + if !foundGoogleSearch { + t.Errorf("captured config.Tools does not contain a GoogleSearch entry; got %d tools", len(captured.Tools)) + } +} + +// TestProcess_MergesNativeToolsWithFunctionDeclarations locks in the Gemini +// docs requirement: when native tools (like GoogleSearch) coexist with +// custom function declarations, they must share ONE Tool object. Splitting +// across multiple Tool entries causes Gemini 3 to emit the native tool's +// name as a regular function call instead of auto-executing it server-side +// (verified empirically against gemini-3-flash-preview on Vertex). +// +// Expected shape: every FunctionDeclaration the planner produced for +// registered AX agents lives on the SAME *genai.Tool that carries the +// GoogleSearch (or other built-in) declaration. +func TestProcess_MergesNativeToolsWithFunctionDeclarations(t *testing.T) { + var captured *genai.GenerateContentConfig + mockGen := &mockContentGenerator{ + generateContentFunc: func(ctx context.Context, model string, contents []*genai.Content, cfg *genai.GenerateContentConfig) (*genai.GenerateContentResponse, error) { + captured = cfg + return &genai.GenerateContentResponse{ + Candidates: []*genai.Candidate{{ + Content: &genai.Content{Parts: []*genai.Part{{Text: "ok"}}}, + }}, + }, nil + }, + } + + registry := &mockAgentRegistry{ + listFunc: func() []string { return []string{"agent-a", "agent-b"} }, + getInfoFunc: func(id string) (*agent.AgentInfo, error) { + return &agent.AgentInfo{ID: id, Name: id, Description: "test"}, nil + }, + } + + p := &geminiPlannerAgent{ + client: mockGen, + registry: registry, + config: GeminiPlannerConfig{ + GeminiConfig: &config.GeminiConfig{ + Model: "test-model", + SystemPrompt: "test", + Tools: []string{"google_search"}, + }, + }, + } + + start := &proto.AgentStart{Messages: []*proto.Message{{ + Role: "user", + Content: &proto.Content{ + Type: &proto.Content_Text{Text: &proto.TextContent{Text: "x"}}, + }, + }}} + _, _, err := p.process(context.Background(), "conv-merged", start, nil, func(o *proto.AgentOutputs) error { return nil }) + if err != nil { + t.Fatalf("process: %v", err) + } + if captured == nil { + t.Fatal("GenerateContent was never called") + } + + // Find the Tool carrying GoogleSearch and assert it ALSO has the + // function declarations (not split across separate Tool entries). + var gsTool *genai.Tool + for _, tool := range captured.Tools { + if tool != nil && tool.GoogleSearch != nil { + gsTool = tool + break + } + } + if gsTool == nil { + t.Fatal("no Tool carries GoogleSearch") + } + if len(gsTool.FunctionDeclarations) < 2 { + t.Errorf("GoogleSearch Tool has %d FunctionDeclarations; want >=2 (agent-a + agent-b merged onto same Tool)", len(gsTool.FunctionDeclarations)) + } + // Belt+suspenders: assert there are NOT also separate Tool entries + // each holding one FunctionDeclaration (the old multi-Tool shape). + if len(captured.Tools) != 1 { + t.Errorf("captured.Tools has %d entries; want 1 merged Tool (combining FunctionDeclarations + GoogleSearch on one Tool is required by Gemini docs)", len(captured.Tools)) + } +} + +// TestProcess_NoToolsWhenRegistryAndNativeBothEmpty guards against Vertex +// "400 INVALID_ARGUMENT: Tool must contain at least one of +// function_declarations, google_search, url_context, code_execution". +// When the agent registry is empty AND no native Gemini tools are +// configured, process() must not send a zero-valued *genai.Tool to the +// model — it should send no tools at all (nil or empty). +// +// Boot-time / empty-config deployments hit this when the planner runs +// with no registered subagents and no native tool list. +func TestProcess_NoToolsWhenRegistryAndNativeBothEmpty(t *testing.T) { + var captured *genai.GenerateContentConfig + mockGen := &mockContentGenerator{ + generateContentFunc: func(ctx context.Context, model string, contents []*genai.Content, cfg *genai.GenerateContentConfig) (*genai.GenerateContentResponse, error) { + captured = cfg + return &genai.GenerateContentResponse{ + Candidates: []*genai.Candidate{{ + Content: &genai.Content{Parts: []*genai.Part{{Text: "ok"}}}, + }}, + }, nil + }, + } + + registry := &mockAgentRegistry{ + listFunc: func() []string { return nil }, + } + + p := &geminiPlannerAgent{ + client: mockGen, + registry: registry, + config: GeminiPlannerConfig{ + GeminiConfig: &config.GeminiConfig{ + Model: "test-model", + SystemPrompt: "test", + // Tools intentionally empty + }, + }, + } + + start := &proto.AgentStart{Messages: []*proto.Message{{ + Role: "user", + Content: &proto.Content{ + Type: &proto.Content_Text{Text: &proto.TextContent{Text: "hi"}}, + }, + }}} + _, _, err := p.process(context.Background(), "conv-empty", start, nil, func(o *proto.AgentOutputs) error { return nil }) + if err != nil { + t.Fatalf("process: %v", err) + } + if captured == nil { + t.Fatal("GenerateContent was never called") + } + + // We must not send a zero-valued *genai.Tool{}. Either nil Tools or + // an empty slice is acceptable; a single empty Tool is not. + if len(captured.Tools) == 0 { + return // nil or empty — good + } + for i, tool := range captured.Tools { + if tool == nil { + continue + } + empty := len(tool.FunctionDeclarations) == 0 && + tool.GoogleSearch == nil && + tool.URLContext == nil && + tool.CodeExecution == nil && + tool.GoogleMaps == nil + if empty { + t.Errorf("captured.Tools[%d] is a zero-valued *genai.Tool; Vertex will reject this with 400 INVALID_ARGUMENT", i) + } + } +} + +// fakeNativeTool is a minimal Tool stub that lets tests inject a Tool +// carrying native Gemini fields (GoogleSearch, URLContext, etc.) into +// the agentsToTools nativeTools variadic and, through it, the process() +// merge loop. +type fakeNativeTool struct { + name string + tools []*genai.Tool +} + +func (f *fakeNativeTool) Name() string { return f.name } +func (f *fakeNativeTool) FuncDecl() []*genai.Tool { return f.tools } +func (f *fakeNativeTool) SystemPrompt() string { return "" } +func (f *fakeNativeTool) HandleCall(ctx context.Context, fc *genai.FunctionCall, o agent.OutputHandler) error { + return nil +} +func (f *fakeNativeTool) HandleExecute(ctx context.Context, fc *genai.FunctionCall, approved bool, o agent.OutputHandler) error { + return nil +} + +// TestProcess_MergePreservesNativeFieldsFromRawTools guards against a +// latent bug in the process() merge loop: when agentsToTools' nativeTools +// variadic produces a *genai.Tool with native fields set +// (GoogleSearch / URLContext / CodeExecution / GoogleMaps), the merge +// loop must copy those fields onto mergedTool — not just +// FunctionDeclarations. +// +// No current caller plumbs nativeTools into agentsToTools, so this is a +// latent bug, but future callers would silently lose native tools. +func TestProcess_MergePreservesNativeFieldsFromRawTools(t *testing.T) { + var captured *genai.GenerateContentConfig + mockGen := &mockContentGenerator{ + generateContentFunc: func(ctx context.Context, model string, contents []*genai.Content, cfg *genai.GenerateContentConfig) (*genai.GenerateContentResponse, error) { + captured = cfg + return &genai.GenerateContentResponse{ + Candidates: []*genai.Candidate{{ + Content: &genai.Content{Parts: []*genai.Part{{Text: "ok"}}}, + }}, + }, nil + }, + } + + registry := &mockAgentRegistry{ + listFunc: func() []string { return nil }, + } + + // nativeTool emits a *genai.Tool with GoogleSearch set (the kind of + // shape future callers of agentsToTools(registry, nativeTools…) + // could pass in). + nativeTool := &fakeNativeTool{ + name: "fake_native", + tools: []*genai.Tool{{ + GoogleSearch: &genai.GoogleSearch{}, + }}, + } + + p := &geminiPlannerAgent{ + client: mockGen, + registry: registry, + nativeTools: []Tool{nativeTool}, + config: GeminiPlannerConfig{ + GeminiConfig: &config.GeminiConfig{ + Model: "test-model", + SystemPrompt: "test", + }, + }, + } + + start := &proto.AgentStart{Messages: []*proto.Message{{ + Role: "user", + Content: &proto.Content{ + Type: &proto.Content_Text{Text: &proto.TextContent{Text: "hi"}}, + }, + }}} + _, _, err := p.process(context.Background(), "conv-native-merge", start, nil, func(o *proto.AgentOutputs) error { return nil }) + if err != nil { + t.Fatalf("process: %v", err) + } + if captured == nil { + t.Fatal("GenerateContent was never called") + } + + if len(captured.Tools) == 0 { + t.Fatal("captured.Tools is empty; expected a Tool carrying GoogleSearch") + } + if captured.Tools[0].GoogleSearch == nil { + t.Errorf("merged Tool dropped GoogleSearch from raw native tool; merge loop only copied FunctionDeclarations") + } +}