NVIDIA-AI-Blueprints · torkian · May 19, 2026 · May 28, 2026 · May 28, 2026 · Jun 2, 2026
diff --git a/src/aiq_agent/agents/clarifier/agent.py b/src/aiq_agent/agents/clarifier/agent.py
@@ -104,6 +104,15 @@
 )
 """Reminder prompt added after tool results to reinforce JSON-only output."""
 
+FORCE_SEARCH_GUIDANCE = (
+    "You attempted to ask the user for clarification before gathering any context. "
+    "Before asking the user a question, you MUST first use the available search tools "
+    "to look up unfamiliar entities, acronyms, products, or terms in their request. "
+    "Issue one focused tool call now with a query derived from the user's request. "
+    "Only after reviewing the tool results should you decide whether clarification is still needed."
+)
+"""Guidance prompt injected when the LLM tries to clarify without having searched first."""
+
 
 class ClarifierAgent:
     """
@@ -484,6 +493,24 @@ def _get_fallback_clarification(self, query: str | None = None) -> str:
     SKIP_COMMANDS = {"skip", "done", "exit", "quit", "proceed", "continue", "no", "n", ""}
     """Set of commands that indicate the user wants to skip clarification."""
 
+    @staticmethod
+    def _has_tool_invocations(messages: Sequence[Any]) -> bool:
+        """
+        Check whether any prior assistant message in the conversation issued tool calls.
+
+        Args:
+            messages: The conversation message history.
+
+        Returns:
+            True if any AIMessage in the history carries non-empty tool_calls,
+            False otherwise.
+        """
+        for msg in messages:
+            tool_calls = getattr(msg, "tool_calls", None)
+            if tool_calls:
+                return True
+        return False
+
     def _is_skip_command(self, user_reply: str) -> bool:
         """
         Check if the user's reply indicates they want to skip clarification.
@@ -503,16 +530,21 @@ def _build_graph(self) -> CompiledStateGraph:
         """
         Build the LangGraph StateGraph for the clarification workflow.
 
-        Creates a graph with three nodes:
-        - agent: Generates clarification questions using the LLM
+        Creates a graph with the following nodes:
+        - agent: Generates clarification questions using the LLM. On the first
+          turn it also enforces search-before-clarify (issue #234): if the model
+          asks for clarification without using its bound search tools, it nudges
+          the model once and retries inline.
         - tools: Executes tool calls (e.g., web search) for context
         - ask_for_clarification: Prompts user and processes response
+        - plan_preview: Optional plan approval flow
 
         The graph flow:
-        1. agent generates a response (question, tool call, or completion)
+        1. agent generates a response (question, tool call, or completion);
+           on turn 0 it may force one search-and-retry before yielding
         2. If tool call → tools node → back to agent
-        3. If question → ask_for_clarification → back to agent
-        4. If complete → end
+        3. If complete → end (or plan_preview if enabled)
+        4. Otherwise → ask_for_clarification → back to agent
 
         Returns:
             Compiled LangGraph StateGraph ready for execution.
@@ -526,6 +558,17 @@ def _build_graph(self) -> CompiledStateGraph:
 
         async def agent_node(state: ClarifierAgentState):
             if state.remaining_questions <= 0:
+                # Clarification budget is exhausted — emit a completion signal,
+                # unless a prior node already did (the skip-command branch in
+                # ask_clarification returns its own AIMessage(complete) and then
+                # this node is re-entered via the unconditional edge). Emitting
+                # another here would place two consecutive assistant messages in
+                # history, which the OpenAI/Anthropic APIs reject. If the last
+                # message is already a completion, leave the state untouched and
+                # let decide_route end the run.
+                last_message = state.messages[-1] if state.messages else None
+                if isinstance(last_message, AIMessage) and self._is_complete(getattr(last_message, "content", "")):
+                    return {}
                 complete_response = ClarificationResponse(needs_clarification=False, clarification_question=None)
                 return {"messages": [AIMessage(content=complete_response.model_dump_json())]}
             tools_info = [
@@ -548,6 +591,43 @@ async def agent_node(state: ClarifierAgentState):
                 messages.append(HumanMessage(content=JSON_REMINDER_AFTER_TOOLS))
 
             response = await bound_llm.ainvoke(messages)
+
+            # Search-before-clarify (issue #234): if, on the first turn, the model
+            # asks for clarification without first using its bound search tools,
+            # nudge it once to search and retry inline. This keeps the behavior
+            # model-agnostic without adding graph nodes or extra state — even
+            # models that would otherwise skip tool use must attempt a search
+            # before falling back to asking the user.
+            #
+            # The guard is one-shot by construction:
+            #   * iteration == 0 — only on the first turn; once the user replies,
+            #     iteration advances and this never fires again.
+            #   * not _has_tool_invocations(state.messages) — once any tool call
+            #     is in history (e.g. after a successful forced search, even while
+            #     iteration is still 0), we never re-nudge.
+            # FORCE_SEARCH_GUIDANCE is sent only in the local retry_messages and is
+            # never returned to state, so it cannot leak into get_latest_user_query.
+            #
+            # We return ONLY retry_response, not the first (search-skipping)
+            # response. The first attempt was already shown to the model inside
+            # retry_messages; persisting it would put two consecutive
+            # assistant-role messages in history once retry_response carries a
+            # tool call (… AIMessage(clarif), AIMessage(tool_call), ToolMessage …),
+            # which the OpenAI Chat Completions and Anthropic Messages APIs reject
+            # with a 400. Keeping only retry_response preserves a valid sequence
+            # regardless of whether it is a tool call or another clarification.
+            if (
+                self.tools
+                and state.iteration == 0
+                and not self._has_tool_invocations(state.messages)
+                and not getattr(response, "tool_calls", None)
+                and self._is_needed(response.content)
+            ):
+                logger.info("Clarifier: model skipped search before clarifying; injecting guidance and retrying once")
+                retry_messages = messages + [response, HumanMessage(content=FORCE_SEARCH_GUIDANCE)]
+                retry_response = await bound_llm.ainvoke(retry_messages)
+                return {"messages": [retry_response]}
+
             return {"messages": [response]}
 
         async def ask_clarification(state: ClarifierAgentState):
@@ -576,8 +656,17 @@ async def ask_clarification(state: ClarifierAgentState):
                 logger.info("Clarifier: User requested to skip clarification")
                 complete_response = ClarificationResponse(needs_clarification=False, clarification_question=None)
                 clarifier_log = f"{clarifier_log}\n**Turn {iteration + 1} - User:** [Skipped clarification]"
+                # Persist the user's reply as a HumanMessage before the
+                # completion AIMessage. The prior turn already left an
+                # AIMessage(clarification) in history; without an interleaving
+                # human message the two assistant turns would be adjacent, which
+                # the OpenAI/Anthropic APIs reject. (The duplicate completion on
+                # graph re-entry is suppressed by the guard in agent_node.)
                 return {
-                    "messages": [AIMessage(content=complete_response.model_dump_json())],
+                    "messages": [
+                        HumanMessage(content=user_reply),
+                        AIMessage(content=complete_response.model_dump_json()),
+                    ],
                     "iteration": max_turns,  # Force end of clarification
                     "clarifier_log": clarifier_log,
                 }
@@ -610,6 +699,11 @@ def decide_route(state: ClarifierAgentState | dict):
                 if self.enable_plan_approval:
                     return "plan_preview"
                 return "__end__"
+
+            # The search-before-clarify nudge (issue #234) is handled inline in
+            # agent_node, not here — see the retry block there. By the time a
+            # clarification response reaches this router, any forced search has
+            # already happened, so we route straight to the user.
             return "ask_for_clarification"
 
         async def plan_preview_node(state: ClarifierAgentState):

diff --git a/src/aiq_agent/agents/clarifier/prompts/research_clarification.j2 b/src/aiq_agent/agents/clarifier/prompts/research_clarification.j2
@@ -29,9 +29,11 @@ Your ONLY responsibility is to determine whether a research request requires cla
 
 ## Tool Usage
 
-- You may use search tools ONLY to understand unfamiliar domains
-- Use at most 1-2 high-value searches
-- Searches are for your internal understanding only
+- **Search first, ask second.** If the user's request contains any unfamiliar entity, acronym, project, person, product, or technical term that you cannot fully define from your training data, you MUST issue a search tool call before deciding clarification is needed. Do not ask the user to define terms that a quick search would resolve.
+- On the first turn, prefer at least one search to ground the topic in current context whenever search tools are available.
+- Use at most 1-2 high-value searches per turn — keep queries focused on the specific unknown.
+- Searches are for your internal understanding only. Do not summarize or report search results to the user.
+- After reviewing search results, re-evaluate: if the request is now sufficiently specified, return `needs_clarification: false`. Only ask a clarification question if a genuine ambiguity remains.
 
 ---