From 84e9c7b2b20c2a2e9661f7e64d9680ed18a23e8a Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Tue, 21 Apr 2026 18:44:09 -0700 Subject: [PATCH 1/3] added reranked_search tool, which triggers multiple search tools and rerank their results by query relevance Signed-off-by: Jack Yu --- .secrets.baseline | 4 +- pyproject.toml | 3 + scripts/setup.sh | 1 + .../src/paper_search.py | 4 +- sources/knowledge_layer/src/register.py | 3 +- sources/reranker/README.md | 75 ++++++++ sources/reranker/example_cli_config.yml | 171 ++++++++++++++++++ sources/reranker/pyproject.toml | 39 ++++ sources/reranker/src/__init__.py | 20 ++ sources/reranker/src/cross_encoder.py | 67 +++++++ sources/reranker/src/register.py | 139 ++++++++++++++ sources/tavily_web_search/src/register.py | 5 +- src/aiq_agent/common/__init__.py | 2 + src/aiq_agent/common/prompt_utils.py | 2 + uv.lock | 31 +++- 15 files changed, 556 insertions(+), 10 deletions(-) create mode 100644 sources/reranker/README.md create mode 100644 sources/reranker/example_cli_config.yml create mode 100644 sources/reranker/pyproject.toml create mode 100644 sources/reranker/src/__init__.py create mode 100644 sources/reranker/src/cross_encoder.py create mode 100644 sources/reranker/src/register.py diff --git a/.secrets.baseline b/.secrets.baseline index 1e30d844..d4141a59 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -214,7 +214,7 @@ "filename": "sources/google_scholar_paper_search/src/paper_search.py", "hashed_secret": "11fa7c37d697f30e6aee828b4426a10f83ab2380", "is_verified": false, - "line_number": 40 + "line_number": 42 } ], "sources/google_scholar_paper_search/tests/conftest.py": [ @@ -290,5 +290,5 @@ } ] }, - "generated_at": "2026-04-15T20:15:51Z" + "generated_at": "2026-04-22T01:33:06Z" } diff --git a/pyproject.toml b/pyproject.toml index 080d2b07..0c16b14e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,6 +85,7 @@ aiq_deep_researcher = "aiq_agent.agents.deep_researcher.register" aiq_fastapi_extensions = "aiq_agent.fastapi_extensions.register" aiq_clarifier = "aiq_agent.agents.clarifier.register" aiq_data_source_registry = "aiq_agent.common.data_source_registry" +reranked_search = "reranked_search.register" [tool.pytest.ini_options] testpaths = ["tests", "sources/**/tests"] @@ -185,6 +186,7 @@ dev = [ "google-scholar-paper-search", "tavily-web-search", "knowledge-layer[all]", + "reranked-search", "aiq-api", "aiq-research-cli", "aiq-debug", @@ -219,6 +221,7 @@ aiq-agent = { workspace = true } google-scholar-paper-search = { workspace = true } tavily-web-search = { workspace = true } knowledge-layer = { workspace = true } +reranked-search = { workspace = true } aiq-api = { workspace = true } aiq-research-cli = { workspace = true } aiq-debug = { workspace = true } diff --git a/scripts/setup.sh b/scripts/setup.sh index e4fe963b..49b117e6 100755 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -67,6 +67,7 @@ echo "Installing data sources..." "${UV_BIN}" pip install -e ./sources/tavily_web_search "${UV_BIN}" pip install -e ./sources/google_scholar_paper_search "${UV_BIN}" pip install -e "./sources/knowledge_layer[llamaindex,foundational_rag]" +"${UV_BIN}" pip install -e ./sources/reranker echo "Data Sources installed" # Setup pre-commit diff --git a/sources/google_scholar_paper_search/src/paper_search.py b/sources/google_scholar_paper_search/src/paper_search.py index 0a4e4d3c..93e97b01 100644 --- a/sources/google_scholar_paper_search/src/paper_search.py +++ b/sources/google_scholar_paper_search/src/paper_search.py @@ -24,6 +24,8 @@ import aiohttp +from aiq_agent.common import SOURCE_DELIMITER + logger = logging.getLogger(__name__) SERPER_API_URL = "https://google.serper.dev/scholar" @@ -175,7 +177,7 @@ def format_results(results: list[dict[str, Any]]) -> str: ) formatted_papers.append(paper_str) - return "\n\n".join(formatted_papers) + return SOURCE_DELIMITER.join(formatted_papers) async def search( self, diff --git a/sources/knowledge_layer/src/register.py b/sources/knowledge_layer/src/register.py index 17dd5324..d264bb56 100644 --- a/sources/knowledge_layer/src/register.py +++ b/sources/knowledge_layer/src/register.py @@ -28,6 +28,7 @@ from pydantic import Field from pydantic import model_validator +from aiq_agent.common import SOURCE_DELIMITER from nat.builder.builder import Builder from nat.builder.context import Context from nat.builder.function_info import FunctionInfo @@ -227,7 +228,7 @@ def _format_results(retrieval_result, query: str) -> str: lines.append(content) lines.append("") - return "\n".join(lines) + return SOURCE_DELIMITER.join(lines) @register_function(config_type=KnowledgeRetrievalConfig) diff --git a/sources/reranker/README.md b/sources/reranker/README.md new file mode 100644 index 00000000..36f07bd9 --- /dev/null +++ b/sources/reranker/README.md @@ -0,0 +1,75 @@ +# Reranked Search Source + +A reranking layer over multiple search tools for NeMo Agent Toolkit workflows. Fans out a query to configured search tools in parallel, reranks them by relevance, and filter top k results. + +## How It Works +1. The reranker receives a query from the agent +2. Calls all `search_tools` in parallel with the same query +3. Scores and ranks all results across sources using the cross-encoder reranking model +4. Returns the top-k results to the agent + +## Environment Variables + +By default, reranker is invoked from nvidia.build.com and requires NVIDIA_API_KEY to run model inference: + +```bash +NVIDIA_API_KEY=your_nvidia_api_key +``` + +## Example Workflow Configuration + +Define the reranked_search tool and other search tools that feed into the reranker. If the search tools are part of a function group, they must be specified in the group's `- include:` list, and use `{group_name}__{tool_name}` format in `reranked_search` config section. + +For more info on function group name space, reference Nemo Agent Toolkit doc, specifically [Function Naming and Namespaing](https://docs.nvidia.com/nemo/agent-toolkit/latest/build-workflows/functions-and-function-groups/function-groups.html#function-naming-and-namespacing) and [Understanding Function Accessibility](https://docs.nvidia.com/nemo/agent-toolkit/latest/build-workflows/functions-and-function-groups/function-groups.html#understanding-function-accessibility). + +```yaml +function_groups: + your_group: + _type: your_group + include: [tool_1, tool_2] + ... + +functions: + web_search_tool: + _type: tavily_web_search + max_results: 5 + + your_custom_search_tool: + _type: your_custom_search + ... + + reranked_search: + _type: reranked_search + # required configs + cross_encoder_model: nv-rerank-qa-mistral-4b:1 + search_tools: + - web_search_tool # standalone function examples + - your_custom_search_tool + - your_group__tool_1 # function group examples + - your_group__tool_2 + + # # uncomment to adjust default values + # top_k: 5 + # timeout_seconds: 10 # per-tool timeout +``` + +Then give it to an agent as its only tool: + +```yaml + shallow_research_agent: + _type: shallow_research_agent + llm: nemotron_nano_llm + tools: + - reranked_search +``` + +See `sources/reranker/example_cli_config.yml` for a full working example. + +### Supported Reranker Models +Choose any rerank model from build.nvidia.com + +## Make Your Source Compatible with Reranker +All built-in sources under `./sources` folder are already supported. + +To design a new source that supports reranking, there's only one condition: +* use `aiq_agent.common.SOURCE_DELIMITER` to join all the search result strings returned by your tool. The reranker tool will use the same delimiter to break down the long string into seperate sources and rerank them by relevance. diff --git a/sources/reranker/example_cli_config.yml b/sources/reranker/example_cli_config.yml new file mode 100644 index 00000000..f9991be8 --- /dev/null +++ b/sources/reranker/example_cli_config.yml @@ -0,0 +1,171 @@ +# This is the default configuration for the CLI mode. +# It has the following features: +# - Human-in-the-loop clarification and plan approval before deep research starts +# - Web search and Paper search tools by default +# - There is no knowledge retrieval + +general: + telemetry: + logging: + console: + _type: console + level: INFO + # tracing: + # langsmith: # Optional: LangSmith tracing - requires langsmith API key. Set using `export LANGSMITH_API_KEY=` + # _type: langsmith + # project: nvidia-aiq + +llms: + nemotron_llm_intent: + _type: nim + model_name: nvidia/nemotron-3-nano-30b-a3b + base_url: "https://integrate.api.nvidia.com/v1" + temperature: 0.5 + top_p: 0.9 + max_tokens: 4096 + num_retries: 5 + chat_template_kwargs: + enable_thinking: true + + nemotron_nano_llm: + _type: nim + model_name: nvidia/nemotron-3-nano-30b-a3b + base_url: "https://integrate.api.nvidia.com/v1" + temperature: 0.5 + top_p: 0.9 + max_tokens: 4096 + num_retries: 5 + chat_template_kwargs: + enable_thinking: false + + gpt_oss_llm: + _type: nim + model_name: openai/gpt-oss-120b + base_url: https://integrate.api.nvidia.com/v1 + temperature: 1.0 + top_p: 1.0 + max_tokens: 256000 + api_key: ${NVIDIA_API_KEY} + max_retries: 10 + + # Nemotron Super is compatible and tested with AIQ but has limited availability + # on the Build API due to high demand. + # Uncomment nemotron_super_llm below if the endpoint is accessible. + # nemotron_super_llm: + # _type: nim + # model_name: nvidia/nemotron-3-super-120b-a12b + # base_url: "https://integrate.api.nvidia.com/v1" + # temperature: 1.0 + # top_p: 1.0 + # max_tokens: 128000 + # num_retries: 5 + # chat_template_kwargs: + # enable_thinking: true + +functions: + # ========================================================================= + # Data Source Registry + # ========================================================================= + # Central registry that controls: + # 1. UI toggles — each source appears as an on/off switch in the frontend + # 2. Per-message filtering — users can select active sources per request + # 3. Tool auto-inheritance — agents with no explicit `tools` list receive + # every tool listed here (use `exclude_tools` on agents to specialize) + # + # Source entry fields: + # id, name, description, tools, requires_auth (default: false), + # default_enabled (default: true) + # + # See docs/source/customization/tools-and-sources.md for full details. + # ========================================================================= + data_sources: + _type: data_source_registry + sources: + - id: web_search + name: "Web Search" + description: "Search the web for real-time information." + tools: + - web_search_tool + - advanced_web_search_tool + # - id: paper_search + # name: "Academic Papers" + # description: "Search academic papers and scientific publications." + # tools: + # - paper_search_tool + + web_search_tool: + _type: tavily_web_search + max_results: 5 + max_content_length: 1000 + + advanced_web_search_tool: + _type: tavily_web_search + max_results: 2 + advanced_search: true + + # Paper Search (optional - requires SERPER_API_KEY) + # Uncomment the block below and set SERPER_API_KEY to enable academic paper search. + paper_search_tool: + _type: paper_search + max_results: 5 + serper_api_key: ${SERPER_API_KEY} + + # ======================================================================== + # reranked_search tool. Useful when 2 or more source are given to the agent. + # ======================================================================== + reranked_search: + _type: reranked_search + llm: nemotron_nano_llm + cross_encoder_model: nv-rerank-qa-mistral-4b:1 + search_tools: + - web_search_tool + - advanced_web_search_tool + - paper_search_tool + top_k: 5 + + # ========================================================================= + # Agents — inherit all registry tools; use exclude_tools to specialize + # ========================================================================= + intent_classifier: + _type: intent_classifier + llm: nemotron_llm_intent + # tools: omitted -> inherits all from data_source_registry + # exclude_tools: [] + # llm_timeout: 90 # optional; seconds for intent LLM call (default 90) + + clarifier_agent: + _type: clarifier_agent + llm: nemotron_nano_llm # replace with nemotron_super_llm if available + planner_llm: nemotron_nano_llm # replace with nemotron_super_llm if available + # tools: omitted -> inherits all from data_source_registry + # exclude_tools: [] + max_turns: 3 + enable_plan_approval: true + log_response_max_chars: 2000 + verbose: true + + shallow_research_agent: + _type: shallow_research_agent + llm: nemotron_nano_llm + # tools: omitted -> inherits all from data_source_registry + tools: + - reranked_search + # exclude_tools: + # - advanced_web_search_tool + max_llm_turns: 10 + max_tool_iterations: 5 + + deep_research_agent: + _type: deep_research_agent + orchestrator_llm: gpt_oss_llm + researcher_llm: nemotron_nano_llm # replace with nemotron_super_llm if available + planner_llm: gpt_oss_llm + max_loops: 2 + tools: + - reranked_search + +workflow: + _type: chat_deepresearcher_agent + enable_escalation: true + enable_clarifier: true + checkpoint_db: ${AIQ_CHECKPOINT_DB:-./checkpoints.db} diff --git a/sources/reranker/pyproject.toml b/sources/reranker/pyproject.toml new file mode 100644 index 00000000..b696cd4d --- /dev/null +++ b/sources/reranker/pyproject.toml @@ -0,0 +1,39 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[build-system] +build-backend = "setuptools.build_meta" +requires = ["setuptools >= 64", "setuptools-scm>=8"] + +[tool.setuptools] +packages = ["reranked_search"] +package-dir = {"reranked_search" = "src"} + +[project] +name = "reranked-search" +version = "1.0.0" +description = "Reranking layer over multiple search tools (BM25 and dense retrieval)" +readme = "README.md" +requires-python = ">=3.11,<3.14" +license = {text = "Apache-2.0"} +dependencies = [ + "nvidia-nat==1.5.0", + "pydantic>=2.0.0", + "langchain-core>=1.1.0", + "langchain-nvidia-ai-endpoints>=1.1.0" +] + +[project.entry-points."nat.plugins"] +reranked_search = "reranked_search.register" diff --git a/sources/reranker/src/__init__.py b/sources/reranker/src/__init__.py new file mode 100644 index 00000000..e51ac5d0 --- /dev/null +++ b/sources/reranker/src/__init__.py @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Reranking search layer — queries multiple tools and reranks results.""" + +from .register import reranked_search + +__all__ = ["reranked_search"] diff --git a/sources/reranker/src/cross_encoder.py b/sources/reranker/src/cross_encoder.py new file mode 100644 index 00000000..283599dc --- /dev/null +++ b/sources/reranker/src/cross_encoder.py @@ -0,0 +1,67 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Cross-encoder reranker using NVIDIA NIM via LangChain. + +Unlike the bi-encoder approach (dense.py) which encodes query and documents +separately then computes cosine similarity, a cross-encoder feeds the +query–document pair together into a single model pass and outputs a +relevance score directly. This is more accurate but slower per document. + +Uses ``langchain-nvidia-ai-endpoints`` (``NVIDIARerank``). Expects +``NVIDIA_API_KEY`` in the environment. +""" + +import logging + +from langchain_core.documents import Document +from langchain_nvidia_ai_endpoints import NVIDIARerank + +logger = logging.getLogger(__name__) + + +def rerank_cross_encoder( + query: str, + results: list[str], + top_k: int, + model_name: str, +) -> list[str]: + """Return the *top_k* results ordered by cross-encoder relevance score. + + Sends query–document pairs to the NVIDIA NIM reranking endpoint and sorts + by the returned relevance scores. + """ + if not results: + return [] + + client = NVIDIARerank(model=model_name) + + documents = [Document(page_content=r) for r in results] + reranked_docs = client.compress_documents(query=query, documents=documents) + + # Map reranked documents back to our SearchResult objects by matching content. + content_to_idx: dict[str, int] = {} + for i, r in enumerate(results): + content_to_idx[r] = i + + ranked_results: list[str] = [] + for doc in reranked_docs: + idx = content_to_idx.get(doc.page_content) + if idx is not None: + ranked_results.append(results[idx]) + if len(ranked_results) >= top_k: + break + + return ranked_results diff --git a/sources/reranker/src/register.py b/sources/reranker/src/register.py new file mode 100644 index 00000000..ba0c1d3c --- /dev/null +++ b/sources/reranker/src/register.py @@ -0,0 +1,139 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""NAT register function for the reranked search tool. + +Queries multiple search tools in parallel, parses their individual results, +and reranks them using BM25, dense embedding similarity, or cross-encoder scoring. +""" + +# from __future__ import annotations + +import asyncio +import logging + +from pydantic import BaseModel +from pydantic import Field +from pydantic import create_model + +from aiq_agent.common import SOURCE_DELIMITER +from nat.builder.builder import Builder +from nat.builder.function_info import FunctionInfo +from nat.cli.register_workflow import register_function +from nat.data_models.function import FunctionBaseConfig + +from .cross_encoder import rerank_cross_encoder + +logger = logging.getLogger(__name__) + + +class RerankedSearchConfig(FunctionBaseConfig, name="reranked_search"): + """Search across multiple tools and rerank the combined results. + + Calls every tool listed in *search_tools* with the same query, splits each + tool's output into individual items, scores them against the query, and + returns the top-k results as a formatted string. + """ + + top_k: int = Field(default=5, description="Number of results to return after reranking.") + search_tools: list[str] = Field( + default_factory=list, + description=("Names of search tool instances to fan out to (e.g. ['web_search_tool', 'paper_search_tool'])."), + ) + + cross_encoder_model: str = Field( + description="Cross-encoder model identifier (e.g. nv-rerank-qa-mistral-4b:1).", + ) + + timeout_seconds: float = Field( + default=30.0, + description="Per-tool timeout in seconds.", + ) + + +@register_function(config_type=RerankedSearchConfig) +async def reranked_search(config: RerankedSearchConfig, builder: Builder): + """Register the reranked search tool.""" + + # Resolve tool callables at registration time. + # Uses get_tools (not get_function) so function-group sub-tools like + # eci_search_and_retrieve resolve correctly. + tool_fns: dict[str, object] = {} + for name in config.search_tools: + try: + tool_fns[name] = await builder.get_function(name) + except Exception: + logger.warning("could not add tool '%s' to reranked_search — skipping", name) + + if len(tool_fns) == 0: + logger.warning("reranked_search: no search tools added; tool will return empty results.") + + async def _call_tool(name: str, fn: object, query: BaseModel) -> list[str]: + """Call a single search tool and parse its output into results.""" + try: + raw: str = await asyncio.wait_for(fn.ainvoke(query), timeout=config.timeout_seconds) + results: list[str] = raw.split(SOURCE_DELIMITER) + if len(results) == 1: + logger.warning("SOURCE_DELIMITER not found in tool '%s' output", name) + return results + except TimeoutError: + logger.warning("tool '%s' timed out after %.0fs", name, config.timeout_seconds) + return [] + except Exception: + logger.exception("tool '%s' failed", name) + return [] + + RerankedSearchInput = create_model( + "RerankedSearchInput", + overall_query=str, + **{name: fn.input_schema for name, fn in tool_fns.items()}, + ) + + # the query parameter is a pydantic model. It's a nested structure, dynamically + # created above based on search tools' input schemas. + async def _reranked_search(input: RerankedSearchInput) -> str: + """Search across multiple data sources and return results reranked by relevance. + + Fans out the query to all configured search tools in parallel, merges + results, and reranks them so the most relevant items appear first. + + Args: + query (str): The search query. + + Returns: + str: Reranked search results formatted for LLM consumption. + """ + # Fan out to all tools concurrently. + coros = [_call_tool(name, fn, input.__getattribute__(name)) for name, fn in tool_fns.items()] + per_tool_results = await asyncio.gather(*coros) + + all_results = [] + for results in per_tool_results: + all_results.extend(results) + + if not all_results: + return "No results found across any search tool." + + # Rerank. Use the overall_query for reranking purposes. + ranked: list[str] = rerank_cross_encoder( + input.overall_query, all_results, config.top_k, model_name=config.cross_encoder_model + ) + + return "Top results ranked by relevance:\n" + SOURCE_DELIMITER.join(ranked) + + yield FunctionInfo.from_fn( + _reranked_search, + description=_reranked_search.__doc__, + ) diff --git a/sources/tavily_web_search/src/register.py b/sources/tavily_web_search/src/register.py index 45d54c23..a90286e0 100644 --- a/sources/tavily_web_search/src/register.py +++ b/sources/tavily_web_search/src/register.py @@ -20,6 +20,7 @@ from pydantic import Field from pydantic import SecretStr +from aiq_agent.common import SOURCE_DELIMITER from nat.builder.builder import Builder from nat.builder.function_info import FunctionInfo from nat.cli.register_workflow import register_function @@ -132,9 +133,9 @@ def _truncate_content(content: str) -> str: answer_text = "" if search_docs.get("answer"): - answer_text = f"\n{search_docs['answer']}\n\n\n---\n\n" + answer_text = f"\n{search_docs['answer']}\n" + SOURCE_DELIMITER - web_search_results = "\n\n---\n\n".join( + web_search_results = SOURCE_DELIMITER.join( [ f'\n' f"\n{doc.get('title')}\n\n" diff --git a/src/aiq_agent/common/__init__.py b/src/aiq_agent/common/__init__.py index e6d7bff1..60f78170 100644 --- a/src/aiq_agent/common/__init__.py +++ b/src/aiq_agent/common/__init__.py @@ -54,6 +54,7 @@ from .llm_provider import LLMProvider from .llm_provider import LLMRole from .message_utils import get_latest_user_query +from .prompt_utils import SOURCE_DELIMITER from .prompt_utils import load_prompt from .prompt_utils import render_prompt_template from .tool_validation import format_tool_unavailability_error @@ -89,6 +90,7 @@ "parse_data_sources", "register_source_parser", "render_prompt_template", + "SOURCE_DELIMITER", "reset_session_registry", "sanitize_report", "set_session_registry", diff --git a/src/aiq_agent/common/prompt_utils.py b/src/aiq_agent/common/prompt_utils.py index 351446e2..ade6e074 100644 --- a/src/aiq_agent/common/prompt_utils.py +++ b/src/aiq_agent/common/prompt_utils.py @@ -27,6 +27,8 @@ logger = logging.getLogger(__name__) +SOURCE_DELIMITER = "\n\n---\n\n" + class PromptError(Exception): """Error loading or rendering prompts.""" diff --git a/uv.lock b/uv.lock index 57d41d61..aebacbba 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.11, <3.14" resolution-markers = [ "python_full_version >= '3.13' and sys_platform == 'win32'", @@ -23,6 +23,7 @@ members = [ "freshqa-eval", "google-scholar-paper-search", "knowledge-layer", + "reranked-search", "tavily-web-search", ] overrides = [ @@ -274,6 +275,7 @@ dev = [ { name = "pytest-asyncio" }, { name = "pytest-cov" }, { name = "pytest-env" }, + { name = "reranked-search" }, { name = "ruff" }, { name = "tavily-web-search" }, { name = "yapf" }, @@ -331,6 +333,7 @@ dev = [ { name = "pytest-asyncio", specifier = ">=0.21.0" }, { name = "pytest-cov", specifier = ">=4.1.0" }, { name = "pytest-env", specifier = ">=1.1.0" }, + { name = "reranked-search", editable = "sources/reranker" }, { name = "ruff", specifier = "~=0.15.1" }, { name = "tavily-web-search", editable = "sources/tavily_web_search" }, { name = "yapf", specifier = ">=0.40.0" }, @@ -2463,16 +2466,17 @@ wheels = [ [[package]] name = "langchain-nvidia-ai-endpoints" -version = "1.0.3" +version = "1.2.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, { name = "filetype" }, { name = "langchain-core" }, + { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5a/9e/30814da280f7a79b168f83180f6a0396c166f86a566e56bb9877bf562611/langchain_nvidia_ai_endpoints-1.0.3.tar.gz", hash = "sha256:11c48fd24e4a9d4c86c65bcef943400f4e709497c93254c7dc97c43f68c2be89", size = 46526, upload-time = "2026-01-28T22:04:33.93Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f0/d5/5d70c7b7a66dfabc4c355e408a066e8bef7d2715f3b7854ce44704886119/langchain_nvidia_ai_endpoints-1.2.1.tar.gz", hash = "sha256:055d2511fa7374da65e5d3dd1705fb09125620a124c6247212d661286f64fa8d", size = 57520, upload-time = "2026-03-16T16:42:34.682Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/67/04/c83f61106a245b74de11c1e075c1cc1e70462ece1dd9fc0584ad992a776d/langchain_nvidia_ai_endpoints-1.0.3-py3-none-any.whl", hash = "sha256:e5f170ad0a335637298bb90fb3df119793821e316355f61ab82f0106913eebbf", size = 50130, upload-time = "2026-01-28T22:04:33.065Z" }, + { url = "https://files.pythonhosted.org/packages/c7/b7/99f72331842b0f62da891411c5590ff7fab70a1a753a9a1be9348921995e/langchain_nvidia_ai_endpoints-1.2.1-py3-none-any.whl", hash = "sha256:eafb2186dea25d163089552c062274664540f2cbe251861c515700645fbf256d", size = 61820, upload-time = "2026-03-16T16:42:33.657Z" }, ] [[package]] @@ -5282,6 +5286,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" }, ] +[[package]] +name = "reranked-search" +version = "1.0.0" +source = { editable = "sources/reranker" } +dependencies = [ + { name = "langchain-core" }, + { name = "langchain-nvidia-ai-endpoints" }, + { name = "nvidia-nat" }, + { name = "pydantic" }, +] + +[package.metadata] +requires-dist = [ + { name = "langchain-core", specifier = ">=1.1.0" }, + { name = "langchain-nvidia-ai-endpoints", specifier = ">=1.1.0" }, + { name = "nvidia-nat", specifier = "==1.5.0" }, + { name = "pydantic", specifier = ">=2.0.0" }, +] + [[package]] name = "rich" version = "14.3.2" From 851c40ce6a3539802ec68fa021c0fa571f6c4049 Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Tue, 21 Apr 2026 20:19:46 -0700 Subject: [PATCH 2/3] address greptile review comments Signed-off-by: Jack Yu --- sources/knowledge_layer/src/register.py | 20 ++++---- sources/reranker/example_cli_config.yml | 3 +- sources/reranker/src/cross_encoder.py | 67 ------------------------- sources/reranker/src/register.py | 25 +++++---- 4 files changed, 27 insertions(+), 88 deletions(-) delete mode 100644 sources/reranker/src/cross_encoder.py diff --git a/sources/knowledge_layer/src/register.py b/sources/knowledge_layer/src/register.py index d264bb56..9b4f4efc 100644 --- a/sources/knowledge_layer/src/register.py +++ b/sources/knowledge_layer/src/register.py @@ -211,22 +211,24 @@ def _format_results(retrieval_result, query: str) -> str: else: citation = chunk.file_name + chunk_lines = [] # Header with source info - lines.append(f"--- Result {i} ---") - lines.append(f"Source: {chunk.file_name}") + chunk_lines.append(f"--- Result {i} ---") + chunk_lines.append(f"Source: {chunk.file_name}") if chunk.page_number and chunk.page_number > 0: - lines.append(f"Page: {chunk.page_number}") - lines.append(f"Citation: {citation}") - lines.append(f"Content Type: {chunk.content_type.value}") - lines.append(f"Relevance Score: {chunk.score:.2f}") - lines.append("") + chunk_lines.append(f"Page: {chunk.page_number}") + chunk_lines.append(f"Citation: {citation}") + chunk_lines.append(f"Content Type: {chunk.content_type.value}") + chunk_lines.append(f"Relevance Score: {chunk.score:.2f}") + chunk_lines.append("") # Content (truncate if very long) content = chunk.content if len(content) > 1500: content = content[:1500] + "... [truncated]" - lines.append(content) - lines.append("") + chunk_lines.append(content) + chunk_lines.append("") + lines.append("\n".join(chunk_lines)) return SOURCE_DELIMITER.join(lines) diff --git a/sources/reranker/example_cli_config.yml b/sources/reranker/example_cli_config.yml index f9991be8..3541c1c6 100644 --- a/sources/reranker/example_cli_config.yml +++ b/sources/reranker/example_cli_config.yml @@ -111,7 +111,7 @@ functions: serper_api_key: ${SERPER_API_KEY} # ======================================================================== - # reranked_search tool. Useful when 2 or more source are given to the agent. + # Example reranked_search tool config. Useful when 2 or more sources are given to the agent. # ======================================================================== reranked_search: _type: reranked_search @@ -119,7 +119,6 @@ functions: cross_encoder_model: nv-rerank-qa-mistral-4b:1 search_tools: - web_search_tool - - advanced_web_search_tool - paper_search_tool top_k: 5 diff --git a/sources/reranker/src/cross_encoder.py b/sources/reranker/src/cross_encoder.py deleted file mode 100644 index 283599dc..00000000 --- a/sources/reranker/src/cross_encoder.py +++ /dev/null @@ -1,67 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Cross-encoder reranker using NVIDIA NIM via LangChain. - -Unlike the bi-encoder approach (dense.py) which encodes query and documents -separately then computes cosine similarity, a cross-encoder feeds the -query–document pair together into a single model pass and outputs a -relevance score directly. This is more accurate but slower per document. - -Uses ``langchain-nvidia-ai-endpoints`` (``NVIDIARerank``). Expects -``NVIDIA_API_KEY`` in the environment. -""" - -import logging - -from langchain_core.documents import Document -from langchain_nvidia_ai_endpoints import NVIDIARerank - -logger = logging.getLogger(__name__) - - -def rerank_cross_encoder( - query: str, - results: list[str], - top_k: int, - model_name: str, -) -> list[str]: - """Return the *top_k* results ordered by cross-encoder relevance score. - - Sends query–document pairs to the NVIDIA NIM reranking endpoint and sorts - by the returned relevance scores. - """ - if not results: - return [] - - client = NVIDIARerank(model=model_name) - - documents = [Document(page_content=r) for r in results] - reranked_docs = client.compress_documents(query=query, documents=documents) - - # Map reranked documents back to our SearchResult objects by matching content. - content_to_idx: dict[str, int] = {} - for i, r in enumerate(results): - content_to_idx[r] = i - - ranked_results: list[str] = [] - for doc in reranked_docs: - idx = content_to_idx.get(doc.page_content) - if idx is not None: - ranked_results.append(results[idx]) - if len(ranked_results) >= top_k: - break - - return ranked_results diff --git a/sources/reranker/src/register.py b/sources/reranker/src/register.py index ba0c1d3c..0b800de5 100644 --- a/sources/reranker/src/register.py +++ b/sources/reranker/src/register.py @@ -24,6 +24,8 @@ import asyncio import logging +from langchain_core.documents import Document +from langchain_nvidia_ai_endpoints import NVIDIARerank from pydantic import BaseModel from pydantic import Field from pydantic import create_model @@ -34,8 +36,6 @@ from nat.cli.register_workflow import register_function from nat.data_models.function import FunctionBaseConfig -from .cross_encoder import rerank_cross_encoder - logger = logging.getLogger(__name__) @@ -65,11 +65,15 @@ class RerankedSearchConfig(FunctionBaseConfig, name="reranked_search"): @register_function(config_type=RerankedSearchConfig) async def reranked_search(config: RerankedSearchConfig, builder: Builder): - """Register the reranked search tool.""" + """ + A cross-encoder feeds the query–document pair together into a single model pass and outputs a + relevance score directly. This is more accurate but slower per document. + + Uses `langchain-nvidia-ai-endpoints` (`NVIDIARerank`). Expects `NVIDIA_API_KEY` in the environment. + """ + compressor = NVIDIARerank(model=config.cross_encoder_model) # Resolve tool callables at registration time. - # Uses get_tools (not get_function) so function-group sub-tools like - # eci_search_and_retrieve resolve correctly. tool_fns: dict[str, object] = {} for name in config.search_tools: try: @@ -97,7 +101,7 @@ async def _call_tool(name: str, fn: object, query: BaseModel) -> list[str]: RerankedSearchInput = create_model( "RerankedSearchInput", - overall_query=str, + overall_query=(str, Field(description="The overarching search query for this reranked search request.")), **{name: fn.input_schema for name, fn in tool_fns.items()}, ) @@ -122,16 +126,17 @@ async def _reranked_search(input: RerankedSearchInput) -> str: all_results = [] for results in per_tool_results: all_results.extend(results) + logger.info(f"{len(all_results)} results found across all search tools.") if not all_results: return "No results found across any search tool." # Rerank. Use the overall_query for reranking purposes. - ranked: list[str] = rerank_cross_encoder( - input.overall_query, all_results, config.top_k, model_name=config.cross_encoder_model - ) + documents = [Document(page_content=r) for r in all_results] + reranked_docs = await compressor.acompress_documents(query=input.overall_query, documents=documents) + ranked_contents: list[str] = [doc.page_content for doc in reranked_docs[: config.top_k]] - return "Top results ranked by relevance:\n" + SOURCE_DELIMITER.join(ranked) + return f"Top {config.top_k} results ranked by relevance:\n" + SOURCE_DELIMITER.join(ranked_contents) yield FunctionInfo.from_fn( _reranked_search, From 3048ee608c99bf5f9e455d062787c83198b7abcb Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Tue, 21 Apr 2026 20:32:14 -0700 Subject: [PATCH 3/3] address greptile review comments, remove unused config Signed-off-by: Jack Yu --- sources/reranker/README.md | 2 +- sources/reranker/example_cli_config.yml | 1 - sources/reranker/src/register.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sources/reranker/README.md b/sources/reranker/README.md index 36f07bd9..78396e42 100644 --- a/sources/reranker/README.md +++ b/sources/reranker/README.md @@ -49,7 +49,7 @@ functions: - your_group__tool_2 # # uncomment to adjust default values - # top_k: 5 + # top_k: 5 # adjust as necessary as you add more search tools, meaning more results to rerank. # timeout_seconds: 10 # per-tool timeout ``` diff --git a/sources/reranker/example_cli_config.yml b/sources/reranker/example_cli_config.yml index 3541c1c6..408da549 100644 --- a/sources/reranker/example_cli_config.yml +++ b/sources/reranker/example_cli_config.yml @@ -115,7 +115,6 @@ functions: # ======================================================================== reranked_search: _type: reranked_search - llm: nemotron_nano_llm cross_encoder_model: nv-rerank-qa-mistral-4b:1 search_tools: - web_search_tool diff --git a/sources/reranker/src/register.py b/sources/reranker/src/register.py index 0b800de5..4aafab50 100644 --- a/sources/reranker/src/register.py +++ b/sources/reranker/src/register.py @@ -102,7 +102,7 @@ async def _call_tool(name: str, fn: object, query: BaseModel) -> list[str]: RerankedSearchInput = create_model( "RerankedSearchInput", overall_query=(str, Field(description="The overarching search query for this reranked search request.")), - **{name: fn.input_schema for name, fn in tool_fns.items()}, + **{name: (fn.input_schema, ...) for name, fn in tool_fns.items()}, ) # the query parameter is a pydantic model. It's a nested structure, dynamically