From 9129c8ae813a5867dc5af629a05735782b4ff7b6 Mon Sep 17 00:00:00 2001 From: Jithendra Nara Date: Sat, 23 May 2026 17:33:05 -0400 Subject: [PATCH 1/4] feat: add Brave and TinyFish web search providers Signed-off-by: Jithendra Nara --- README.md | 25 +- deploy/.env.example | 5 +- deploy/Dockerfile | 2 + .../customization/configuration-reference.md | 49 ++++ .../source/customization/tools-and-sources.md | 2 +- docs/source/deployment/docker-build.md | 2 + docs/source/deployment/docker-compose.md | 2 + docs/source/deployment/kubernetes.md | 2 + docs/source/extending/adding-a-data-source.md | 2 + docs/source/extending/adding-a-tool.md | 2 + docs/source/get-started/installation.md | 8 +- docs/source/get-started/quick-start.md | 4 +- docs/source/resources/faq.md | 2 + docs/source/resources/troubleshooting.md | 2 + pyproject.toml | 4 + sources/brave_web_search/README.md | 16 ++ sources/brave_web_search/pyproject.toml | 36 +++ sources/brave_web_search/src/__init__.py | 20 ++ sources/brave_web_search/src/register.py | 212 +++++++++++++++ .../tests/test_brave_register.py | 244 +++++++++++++++++ sources/tinyfish_web_search/README.md | 16 ++ sources/tinyfish_web_search/pyproject.toml | 36 +++ sources/tinyfish_web_search/src/__init__.py | 20 ++ sources/tinyfish_web_search/src/register.py | 194 ++++++++++++++ .../tests/test_tinyfish_register.py | 246 ++++++++++++++++++ uv.lock | 28 ++ 26 files changed, 1174 insertions(+), 7 deletions(-) create mode 100644 sources/brave_web_search/README.md create mode 100644 sources/brave_web_search/pyproject.toml create mode 100644 sources/brave_web_search/src/__init__.py create mode 100644 sources/brave_web_search/src/register.py create mode 100644 sources/brave_web_search/tests/test_brave_register.py create mode 100644 sources/tinyfish_web_search/README.md create mode 100644 sources/tinyfish_web_search/pyproject.toml create mode 100644 sources/tinyfish_web_search/src/__init__.py create mode 100644 sources/tinyfish_web_search/src/register.py create mode 100644 sources/tinyfish_web_search/tests/test_tinyfish_register.py diff --git a/README.md b/README.md index 6d21de59..f23826dc 100644 --- a/README.md +++ b/README.md @@ -100,10 +100,10 @@ This project is for: **Optional requirements:** -- Tavily API key (for web search functionality) +- Tavily, Exa, Brave, or TinyFish API key (for web search functionality) - Serper API key (for academic paper search functionality) -> **Note:** Configure at least one data source (Tavily web search, Serper search tool, or knowledge layer) to enable research functionality. +> **Note:** Configure at least one data source (web search, Serper search tool, or knowledge layer) to enable research functionality. If these optional API keys are not provided, the agent continues to operate without the corresponding search capabilities. Refer to [Obtain API Keys](#obtain-api-keys) for details. @@ -176,6 +176,9 @@ uv pip install -e ./frontends/benchmarks/freshqa # Install data sources (pick what you need) uv pip install -e ./sources/tavily_web_search +uv pip install -e ./sources/exa_web_search +uv pip install -e ./sources/brave_web_search +uv pip install -e ./sources/tinyfish_web_search uv pip install -e ./sources/google_scholar_paper_search uv pip install -e "./sources/knowledge_layer[llamaindex,foundational_rag]" ``` @@ -187,6 +190,9 @@ uv pip install -e "./sources/knowledge_layer[llamaindex,foundational_rag]" | ---------- | -------------------- | ------------------------- | ----------------------------------------------------------- | | NVIDIA API | `NVIDIA_API_KEY` | LLM inference through NIM | Yes | | Tavily | `TAVILY_API_KEY` | Web search | No (if not specified, agent continues without web search) | +| Exa | `EXA_API_KEY` | Web search | No (if not specified, agent continues without Exa search) | +| Brave | `BRAVE_API_KEY` | Web search | No (if not specified, agent continues without Brave search) | +| TinyFish | `TINYFISH_API_KEY` | Web search | No (if not specified, agent continues without TinyFish search) | | Serper | `SERPER_API_KEY` | Academic paper search | No (if not specified, agent continues without paper search) | @@ -201,6 +207,21 @@ uv pip install -e "./sources/knowledge_layer[llamaindex,foundational_rag]" 2. Navigate to your dashboard 3. Generate an API key +#### Obtain an Exa API Key + +1. Sign in to [Exa](https://exa.ai/) +2. Generate an API key from your dashboard + +#### Obtain a Brave Search API Key + +1. Sign in to [Brave Search API](https://api.search.brave.com/) +2. Subscribe to a search plan and generate a subscription token + +#### Obtain a TinyFish API Key + +1. Sign in to [TinyFish](https://agent.tinyfish.ai/api-keys) +2. Generate an API key from your dashboard + #### Obtain a Serper API Key 1. Sign in to [Serper](https://serper.dev/) diff --git a/deploy/.env.example b/deploy/.env.example index 03b5d713..774bf4c6 100644 --- a/deploy/.env.example +++ b/deploy/.env.example @@ -17,8 +17,11 @@ AIQ_DEV_ENV=cli NVIDIA_API_KEY= -# Web search (Required) +# Web search (Tavily is used by the shipped default configs) TAVILY_API_KEY= +# EXA_API_KEY= # optional, required when using exa_web_search +# BRAVE_API_KEY= # optional, required when using brave_web_search +# TINYFISH_API_KEY= # optional, required when using tinyfish_web_search # Paper search (Optional) # SERPER_API_KEY= # to enable, set API key and update the relevant config in configs/ directory diff --git a/deploy/Dockerfile b/deploy/Dockerfile index 0c5eb391..6055b9c6 100644 --- a/deploy/Dockerfile +++ b/deploy/Dockerfile @@ -77,6 +77,8 @@ RUN uv pip install --no-deps -e . \ && uv pip install --no-deps -e ./sources/google_scholar_paper_search \ && uv pip install --no-deps -e ./sources/tavily_web_search \ && uv pip install --no-deps -e ./sources/exa_web_search \ + && uv pip install --no-deps -e ./sources/brave_web_search \ + && uv pip install --no-deps -e ./sources/tinyfish_web_search \ && uv pip install --no-deps -e "./sources/knowledge_layer[all]" \ && uv pip install --no-deps -e ./frontends/aiq_api \ && uv pip install "psycopg[binary]>=3.0.0" diff --git a/docs/source/customization/configuration-reference.md b/docs/source/customization/configuration-reference.md index a47a21f0..cafc76fa 100644 --- a/docs/source/customization/configuration-reference.md +++ b/docs/source/customization/configuration-reference.md @@ -197,6 +197,55 @@ functions: - **`fast`** -- Optimized for low latency. Returns results quickly at the cost of recall and semantic depth. Use for interactive UIs, high-volume calls, or when the query is narrow and keyword-like. - **`deep`** -- Optimized for thoroughness. Runs a more expensive semantic search with broader retrieval. Use for research-quality queries where completeness matters more than speed. +### `brave_web_search` + +Web search powered by the [Brave Search API](https://api.search.brave.com/). + +```yaml +functions: + web_search_tool: + _type: brave_web_search + max_results: 5 + country: US + search_lang: en +``` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `max_results` | `int` | `5` | Maximum number of web results to return. Brave supports up to 20 web results per request. | +| `api_key` | `str` | `None` | Brave Search API subscription token. Falls back to `BRAVE_API_KEY` environment variable. | +| `max_retries` | `int` | `3` | Number of retry attempts on search failure. | +| `country` | `str` | `"US"` | Two-character country code for search results. | +| `search_lang` | `str` | `"en"` | Language code for search results. | +| `safesearch` | `str` | `"moderate"` | Adult-content filtering mode: `off`, `moderate`, or `strict`. | +| `freshness` | `str` | `None` | Optional page-age filter such as `pd`, `pw`, `pm`, `py`, or `YYYY-MM-DDtoYYYY-MM-DD`. | +| `timeout` | `float` | `20.0` | HTTP request timeout in seconds. | +| `max_content_length` | `int` | `10000` | Truncate each result's snippet text to this many characters. Set to `None` to disable truncation. | + +### `tinyfish_web_search` + +Web search powered by the [TinyFish Search API](https://docs.tinyfish.ai/search-api). + +```yaml +functions: + web_search_tool: + _type: tinyfish_web_search + max_results: 5 + location: US + language: en +``` + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `max_results` | `int` | `5` | Maximum number of search results to return from the response. | +| `api_key` | `str` | `None` | TinyFish API key. Falls back to `TINYFISH_API_KEY` environment variable. | +| `max_retries` | `int` | `3` | Number of retry attempts on search failure. | +| `location` | `str` | `"US"` | Country code for geo-targeted results. | +| `language` | `str` | `"en"` | Language code for result language. | +| `page` | `int` | `0` | Search result page number, starting from `0`. | +| `timeout` | `float` | `20.0` | HTTP request timeout in seconds. | +| `max_content_length` | `int` | `10000` | Truncate each result's snippet text to this many characters. Set to `None` to disable truncation. | + ### `paper_search` Academic paper search through Google Scholar (using the [Serper API](https://serper.dev/)). diff --git a/docs/source/customization/tools-and-sources.md b/docs/source/customization/tools-and-sources.md index 85adf781..7a01f8e3 100644 --- a/docs/source/customization/tools-and-sources.md +++ b/docs/source/customization/tools-and-sources.md @@ -44,7 +44,7 @@ Tools not listed in any data source entry (e.g., utility tools like "think") are | `name` | string | *required* | Display name shown in the UI | | `description` | string | `""` | Human-readable description shown in the UI | | `tools` | list[string] | `[]` | NAT function names or function group names belonging to this source | -| `requires_auth` | bool | `false` | If `true`, the UI greys out this source until the user signs in. Use for sources that need user-level OAuth tokens (e.g., enterprise SSO). Sources that use backend API keys (Tavily, Serper) should leave this `false`. | +| `requires_auth` | bool | `false` | If `true`, the UI greys out this source until the user signs in. Use for sources that need user-level OAuth tokens (e.g., enterprise SSO). Sources that use backend API keys (Tavily, Exa, Brave, TinyFish, Serper) should leave this `false`. | | `default_enabled` | bool | `true` | Whether the source is enabled by default when a user first loads the UI | ## Auto-Inherit: Agents Get All Registry Tools by Default diff --git a/docs/source/deployment/docker-build.md b/docs/source/deployment/docker-build.md index 52b5712a..d2ccbbbc 100644 --- a/docs/source/deployment/docker-build.md +++ b/docs/source/deployment/docker-build.md @@ -48,6 +48,8 @@ The builder stage handles all compilation and package installation: - `sources/google_scholar_paper_search` -- Google Scholar search - `sources/tavily_web_search` -- Tavily web search - `sources/exa_web_search` -- Exa web search + - `sources/brave_web_search` -- Brave web search + - `sources/tinyfish_web_search` -- TinyFish web search - `sources/knowledge_layer[all]` -- Knowledge layer with all extras - `frontends/aiq_api` -- [FastAPI](https://fastapi.tiangolo.com/) frontend - `psycopg[binary]>=3.0.0` -- PostgreSQL driver (psycopg v3, installed non-editable) diff --git a/docs/source/deployment/docker-compose.md b/docs/source/deployment/docker-compose.md index 535f61ce..088d2fb3 100644 --- a/docs/source/deployment/docker-compose.md +++ b/docs/source/deployment/docker-compose.md @@ -44,6 +44,8 @@ The sections below explain each group of variables. | `NVIDIA_API_KEY` | Yes | NVIDIA API key for NIM model access. | | `TAVILY_API_KEY` | Conditional | Web search provider key (required if using `tavily_web_search`). | | `EXA_API_KEY` | Conditional | Web search provider key (required if using `exa_web_search`). | +| `BRAVE_API_KEY` | Conditional | Web search provider key (required if using `brave_web_search`). | +| `TINYFISH_API_KEY` | Conditional | Web search provider key (required if using `tinyfish_web_search`). | | `SERPER_API_KEY` | No | Google Scholar paper search key (optional). | ### API keys (optional) diff --git a/docs/source/deployment/kubernetes.md b/docs/source/deployment/kubernetes.md index 008ddeef..d2cdc0ee 100644 --- a/docs/source/deployment/kubernetes.md +++ b/docs/source/deployment/kubernetes.md @@ -263,6 +263,8 @@ For complete examples with NGC-specific flags, see `deploy/helm/README.md` in th | Key | Description | |-----|-------------| | `EXA_API_KEY` | Exa API key for web search | +| `BRAVE_API_KEY` | Brave Search API key for web search | +| `TINYFISH_API_KEY` | TinyFish API key for web search | | `SERPER_API_KEY` | Serper API key for Google search | | `JINA_API_KEY` | Jina API key | | `WANDB_API_KEY` | Weights & Biases API key | diff --git a/docs/source/extending/adding-a-data-source.md b/docs/source/extending/adding-a-data-source.md index b829bd04..0130bd8d 100644 --- a/docs/source/extending/adding-a-data-source.md +++ b/docs/source/extending/adding-a-data-source.md @@ -460,6 +460,8 @@ async def search(self, query: str) -> str: |---|---|---|---| | Tavily Web Search | `tavily_web_search` | `sources/tavily_web_search` | General web search through Tavily API | | Exa Web Search | `exa_web_search` | `sources/exa_web_search` | General web search through Exa API | +| Brave Web Search | `brave_web_search` | `sources/brave_web_search` | General web search through Brave Search API | +| TinyFish Web Search | `tinyfish_web_search` | `sources/tinyfish_web_search` | General web search through TinyFish Search API | | Google Scholar | `paper_search` | `sources/google_scholar_paper_search` | Academic papers through Serper/Google Scholar | | Knowledge Layer | `knowledge_retrieval` | `sources/knowledge_layer` | Document retrieval through pluggable backends | diff --git a/docs/source/extending/adding-a-tool.md b/docs/source/extending/adding-a-tool.md index af00a06d..67dc309d 100644 --- a/docs/source/extending/adding-a-tool.md +++ b/docs/source/extending/adding-a-tool.md @@ -420,6 +420,8 @@ f'\n\n{title}\n\n{content}\n' |---|---|---|---| | Tavily Web Search | `tavily_web_search` | `sources/tavily_web_search` | `TAVILY_API_KEY` | | Exa Web Search | `exa_web_search` | `sources/exa_web_search` | `EXA_API_KEY` | +| Brave Web Search | `brave_web_search` | `sources/brave_web_search` | `BRAVE_API_KEY` | +| TinyFish Web Search | `tinyfish_web_search` | `sources/tinyfish_web_search` | `TINYFISH_API_KEY` | | Google Scholar | `paper_search` | `sources/google_scholar_paper_search` | `SERPER_API_KEY` | | Knowledge Layer | `knowledge_retrieval` | `sources/knowledge_layer` | (varies by backend) | diff --git a/docs/source/get-started/installation.md b/docs/source/get-started/installation.md index d5fee400..093111c8 100644 --- a/docs/source/get-started/installation.md +++ b/docs/source/get-started/installation.md @@ -51,7 +51,7 @@ The script performs the following steps: 3. Installs the core package with dev dependencies 4. Installs all frontends (CLI, debug console, API server) 5. Installs benchmark packages (freshqa, deepsearch_qa) -6. Installs all data source plugins (Tavily, Exa, Google Scholar, knowledge layer) +6. Installs all data source plugins (Tavily, Exa, Brave, TinyFish, Google Scholar, knowledge layer) 7. Sets up pre-commit hooks 8. Copies `deploy/.env.example` to `deploy/.env` if no `.env` file exists 9. Installs UI npm dependencies (if Node.js is available) @@ -96,6 +96,8 @@ uv pip install -e ./frontends/aiq_api # Unified API server (includes debug) # Data sources (pick what you need) uv pip install -e ./sources/tavily_web_search uv pip install -e ./sources/exa_web_search +uv pip install -e ./sources/brave_web_search +uv pip install -e ./sources/tinyfish_web_search uv pip install -e ./sources/google_scholar_paper_search uv pip install -e "./sources/knowledge_layer[llamaindex,foundational_rag]" @@ -132,9 +134,11 @@ Then edit `deploy/.env` and fill in your keys. |----------|----------|---------| | `TAVILY_API_KEY` | [Tavily](https://tavily.com/) | Web search (Tavily provider) | | `EXA_API_KEY` | [Exa](https://exa.ai/) | Web search (Exa provider) | +| `BRAVE_API_KEY` | [Brave Search API](https://api.search.brave.com/) | Web search (Brave provider) | +| `TINYFISH_API_KEY` | [TinyFish](https://agent.tinyfish.ai/api-keys) | Web search (TinyFish provider) | | `SERPER_API_KEY` | [Serper](https://serper.dev/) | Academic paper search (Google Scholar). To enable, uncomment `paper_search_tool` in your config file | -At minimum, you need `NVIDIA_API_KEY` for LLM inference and one of `TAVILY_API_KEY` or `EXA_API_KEY` for web search. Paper search (`SERPER_API_KEY`) is disabled by default in the shipped configs -- refer to the comments in your config file to enable it. +At minimum, you need `NVIDIA_API_KEY` for LLM inference and one configured web-search provider key (`TAVILY_API_KEY`, `EXA_API_KEY`, `BRAVE_API_KEY`, or `TINYFISH_API_KEY`) for web search. Paper search (`SERPER_API_KEY`) is disabled by default in the shipped configs -- refer to the comments in your config file to enable it. ## Verify Installation diff --git a/docs/source/get-started/quick-start.md b/docs/source/get-started/quick-start.md index fbb59bac..02aeb778 100644 --- a/docs/source/get-started/quick-start.md +++ b/docs/source/get-started/quick-start.md @@ -20,8 +20,10 @@ Edit `deploy/.env` and set at minimum: ```bash NVIDIA_API_KEY=nvapi-... TAVILY_API_KEY=tvly-... -# Or, to use Exa instead of Tavily for web search: +# Or, after updating your config to use another web-search provider: # EXA_API_KEY=... +# BRAVE_API_KEY=... +# TINYFISH_API_KEY=... ``` ## Step 2: Choose a Mode diff --git a/docs/source/resources/faq.md b/docs/source/resources/faq.md index 20916c41..9448e200 100644 --- a/docs/source/resources/faq.md +++ b/docs/source/resources/faq.md @@ -44,6 +44,8 @@ If `enable_escalation: true` in the workflow config, the orchestrator evaluates - **Tavily Web Search** — General web search (requires `TAVILY_API_KEY`) - **Exa Web Search** — General web search via Exa (requires `EXA_API_KEY`) +- **Brave Web Search** — General web search via Brave Search API (requires `BRAVE_API_KEY`) +- **TinyFish Web Search** — General web search via TinyFish Search API (requires `TINYFISH_API_KEY`) - **Google Scholar Paper Search** — Academic paper search (requires `SERPER_API_KEY`) - **Knowledge Layer** — Document retrieval from local or hosted vector stores diff --git a/docs/source/resources/troubleshooting.md b/docs/source/resources/troubleshooting.md index c4dbedbd..710be1b2 100644 --- a/docs/source/resources/troubleshooting.md +++ b/docs/source/resources/troubleshooting.md @@ -25,6 +25,8 @@ Common issues and solutions for the AI-Q blueprint. | `Gateway timeout (504)` | Model endpoint overloaded or unavailable | Retry, or switch to a different model in config | | Tavily search returns empty | Invalid `TAVILY_API_KEY` | Verify key at [tavily.com](https://tavily.com) | | Exa search returns empty or 401 | Invalid or missing `EXA_API_KEY` | Verify key at [exa.ai](https://exa.ai) | +| Brave search returns empty or 401 | Invalid or missing `BRAVE_API_KEY` | Verify key at [api.search.brave.com](https://api.search.brave.com/) | +| TinyFish search returns empty or 401 | Invalid or missing `TINYFISH_API_KEY` | Verify key at [agent.tinyfish.ai](https://agent.tinyfish.ai/api-keys) | | Serper search fails | Missing `SERPER_API_KEY` | Set key or remove `paper_search_tool` from config | ## Runtime Issues diff --git a/pyproject.toml b/pyproject.toml index e08479b3..ea61d2f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -190,6 +190,8 @@ dev = [ "google-scholar-paper-search", "tavily-web-search", "exa-web-search", + "brave-web-search", + "tinyfish-web-search", "knowledge-layer[all]", "aiq-api", "aiq-research-cli", @@ -231,6 +233,8 @@ aiq-agent = { workspace = true } google-scholar-paper-search = { workspace = true } tavily-web-search = { workspace = true } exa-web-search = { workspace = true } +brave-web-search = { workspace = true } +tinyfish-web-search = { workspace = true } knowledge-layer = { workspace = true } aiq-api = { workspace = true } aiq-research-cli = { workspace = true } diff --git a/sources/brave_web_search/README.md b/sources/brave_web_search/README.md new file mode 100644 index 00000000..e151e81c --- /dev/null +++ b/sources/brave_web_search/README.md @@ -0,0 +1,16 @@ +# Brave Web Search + +NAT tool package for the Brave Search API web search endpoint. + +## Configuration + +```yaml +functions: + web_search_tool: + _type: brave_web_search + max_results: 5 + country: US + search_lang: en +``` + +Set `BRAVE_API_KEY` in the environment, or provide `api_key` in the workflow config. diff --git a/sources/brave_web_search/pyproject.toml b/sources/brave_web_search/pyproject.toml new file mode 100644 index 00000000..e9ac2766 --- /dev/null +++ b/sources/brave_web_search/pyproject.toml @@ -0,0 +1,36 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[build-system] +build-backend = "setuptools.build_meta" +requires = ["setuptools >= 64", "setuptools-scm>=8"] + +[tool.setuptools] +packages = ["brave_web_search"] +package-dir = {"brave_web_search" = "src"} + +[project] +name = "brave-web-search" +version = "1.0.0" +description = "NAT-based Brave Search API web search tool" +readme = "README.md" +requires-python = ">=3.11,<3.14" +license = {text = "Apache-2.0"} +dependencies = [ + "pydantic>=2.0.0", +] + +[project.entry-points."nat.plugins"] +brave_web_search = "brave_web_search.register" diff --git a/sources/brave_web_search/src/__init__.py b/sources/brave_web_search/src/__init__.py new file mode 100644 index 00000000..da93dbb8 --- /dev/null +++ b/sources/brave_web_search/src/__init__.py @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .register import brave_web_search # noqa: F401 + +__all__ = [ + "brave_web_search", +] diff --git a/sources/brave_web_search/src/register.py b/sources/brave_web_search/src/register.py new file mode 100644 index 00000000..4b345d29 --- /dev/null +++ b/sources/brave_web_search/src/register.py @@ -0,0 +1,212 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import json +import logging +import os +import urllib.error +import urllib.parse +import urllib.request +from collections.abc import AsyncGenerator +from typing import Literal + +from pydantic import Field +from pydantic import SecretStr + +from nat.builder.builder import Builder +from nat.builder.function_info import FunctionInfo +from nat.cli.register_workflow import register_function +from nat.data_models.function import FunctionBaseConfig + +logger = logging.getLogger(__name__) + +BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/web/search" + +_missing_key_warned = False + + +class BraveWebSearchToolConfig(FunctionBaseConfig, name="brave_web_search"): + """ + Tool that retrieves relevant contexts from web search (using Brave Search) for the given question. + Requires a BRAVE_API_KEY environment variable or api_key config. + """ + + max_results: int = Field(default=5, ge=1, le=20, description="Maximum number of web results to return") + api_key: SecretStr | None = Field(default=None, description="The subscription token for Brave Search API") + max_retries: int = Field(default=3, ge=1, description="Maximum number of retries for the search request") + country: str = Field(default="US", description="Two-character country code for search results") + search_lang: str = Field(default="en", description="Language code for search results") + safesearch: Literal["off", "moderate", "strict"] = Field( + default="moderate", + description="Adult-content filtering mode", + ) + freshness: str | None = Field( + default=None, + description="Optional page-age filter such as pd, pw, pm, py, or YYYY-MM-DDtoYYYY-MM-DD", + ) + timeout: float = Field(default=20.0, gt=0, description="HTTP request timeout in seconds") + max_content_length: int | None = Field( + default=10000, + description="Max characters per result snippet. If set, truncates each result to reduce token usage.", + ) + + +def _http_get_json(url: str, headers: dict[str, str], timeout: float) -> dict: + request = urllib.request.Request(url, headers=headers, method="GET") + try: + with urllib.request.urlopen(request, timeout=timeout) as response: + body = response.read().decode("utf-8") + except urllib.error.HTTPError as e: + error_body = e.read().decode("utf-8", errors="replace") + raise RuntimeError(f"{e.code} {e.reason}: {error_body}") from e + except urllib.error.URLError as e: + raise RuntimeError(str(e.reason)) from e + + try: + payload = json.loads(body) + except json.JSONDecodeError as e: + raise ValueError("Search returned invalid JSON") from e + + if not isinstance(payload, dict): + raise ValueError(f"Search returned unexpected response type: {type(payload).__name__}") + return payload + + +def _truncate_content(content: str, max_content_length: int | None) -> str: + if max_content_length and len(content) > max_content_length: + return content[: max_content_length - 3] + "..." + return content + + +def _render_document(result: dict, max_content_length: int | None) -> str: + url = result.get("url", "") or "" + title = result.get("title", "") or "" + snippets: list[str] = [] + description = result.get("description") or result.get("snippet") or "" + if description: + snippets.append(str(description)) + extra_snippets = result.get("extra_snippets") or [] + if isinstance(extra_snippets, list): + snippets.extend(str(snippet) for snippet in extra_snippets if snippet) + body = _truncate_content("\n".join(snippets), max_content_length) + return f'\n\n{title}\n\n{body}\n' + + +@register_function(config_type=BraveWebSearchToolConfig) +async def brave_web_search( + tool_config: BraveWebSearchToolConfig, + builder: Builder, +) -> AsyncGenerator[FunctionInfo, None]: + """Register the Brave Search API web search tool with NAT.""" + + if not os.environ.get("BRAVE_API_KEY") and tool_config.api_key: + os.environ["BRAVE_API_KEY"] = tool_config.api_key.get_secret_value() + + if not os.environ.get("BRAVE_API_KEY"): + global _missing_key_warned + if not _missing_key_warned: + logger.warning( + "BRAVE_API_KEY not found. The Brave web search tool will be registered but will " + "return an error when called. To enable: set BRAVE_API_KEY in your environment, " + ".env file, or specify api_key in your workflow config." + ) + _missing_key_warned = True + + async def _brave_web_search_stub(question: str) -> str: + """Brave web search tool (unavailable - missing BRAVE_API_KEY).""" + return ( + "Error: Brave web search is unavailable because BRAVE_API_KEY is not set.\n" + "To enable this tool:\n" + "1. Get a subscription token from https://api.search.brave.com/\n" + "2. Set the API key in your environment or in your .env file\n" + "3. Restart the application" + ) + + yield FunctionInfo.from_fn( + _brave_web_search_stub, + description=_brave_web_search_stub.__doc__, + ) + return + + async def _brave_web_search(question: str) -> str: + """Retrieves relevant contexts from web search (using Brave Search) for the given question. + + Args: + question (str): The question to be answered. Will be truncated to 400 characters if longer. + + Returns: + str: The web search results containing relevant documents and their URLs. + """ + if len(question) > 400: + question = question[:397] + "..." + + params = { + "q": question, + "count": str(tool_config.max_results), + "country": tool_config.country, + "search_lang": tool_config.search_lang, + "safesearch": tool_config.safesearch, + "text_decorations": "false", + "spellcheck": "true", + } + if tool_config.freshness: + params["freshness"] = tool_config.freshness + + url = f"{BRAVE_SEARCH_URL}?{urllib.parse.urlencode(params)}" + headers = { + "Accept": "application/json", + "User-Agent": "aiq-brave-web-search/1.0", + "X-Subscription-Token": os.environ["BRAVE_API_KEY"], + } + + for attempt in range(tool_config.max_retries): + try: + payload = await asyncio.to_thread(_http_get_json, url, headers, tool_config.timeout) + web_payload = payload.get("web") or {} + results = web_payload.get("results") if isinstance(web_payload, dict) else None + if not isinstance(results, list): + raise ValueError("Search returned no web results") + if not results: + raise ValueError("Search returned no results") + + web_search_results = "\n\n---\n\n".join( + _render_document(result, tool_config.max_content_length) + for result in results[: tool_config.max_results] + if isinstance(result, dict) + ) + return web_search_results if web_search_results else "Search returned no results" + + except Exception as e: + if attempt == tool_config.max_retries - 1: + error_msg = str(e) + if isinstance(e, ValueError): + return error_msg + if "401" in error_msg or "Unauthorized" in error_msg: + return ( + "Error: Brave web search failed due to invalid API key (401 Unauthorized).\n" + "Please check your BRAVE_API_KEY and ensure it is valid.\n" + ) + if "429" in error_msg: + return "Error: Brave web search failed because the Brave Search API rate limit was exceeded.\n" + return f"Error: Brave web search failed - {error_msg}" + await asyncio.sleep(2**attempt) + + return "Error: Search failed after all retries" + + yield FunctionInfo.from_fn( + _brave_web_search, + description=_brave_web_search.__doc__, + ) diff --git a/sources/brave_web_search/tests/test_brave_register.py b/sources/brave_web_search/tests/test_brave_register.py new file mode 100644 index 00000000..9a70efb1 --- /dev/null +++ b/sources/brave_web_search/tests/test_brave_register.py @@ -0,0 +1,244 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the brave_web_search NAT registration.""" + +import os +import urllib.parse +from unittest.mock import MagicMock + +from brave_web_search.register import BRAVE_SEARCH_URL +from brave_web_search.register import BraveWebSearchToolConfig +from brave_web_search.register import brave_web_search +from pydantic import SecretStr + + +def _search_payload(results=None): + return { + "type": "search", + "web": { + "results": results if results is not None else [], + }, + } + + +def _parse_query(url): + parsed = urllib.parse.urlparse(url) + return urllib.parse.parse_qs(parsed.query) + + +async def _no_sleep(_): + return None + + +class TestBraveWebSearchToolConfig: + def test_defaults(self): + config = BraveWebSearchToolConfig() + assert config.max_results == 5 + assert config.api_key is None + assert config.max_retries == 3 + assert config.country == "US" + assert config.search_lang == "en" + assert config.safesearch == "moderate" + assert config.freshness is None + assert config.timeout == 20.0 + assert config.max_content_length == 10000 + + def test_all_fields(self): + config = BraveWebSearchToolConfig( + max_results=10, + api_key=SecretStr("brave-token"), + max_retries=1, + country="GB", + search_lang="en", + safesearch="strict", + freshness="pw", + timeout=5.0, + max_content_length=50, + ) + assert config.max_results == 10 + assert config.api_key.get_secret_value() == "brave-token" + assert config.max_retries == 1 + assert config.country == "GB" + assert config.safesearch == "strict" + assert config.freshness == "pw" + assert config.timeout == 5.0 + assert config.max_content_length == 50 + + def test_inherits_from_function_base_config(self): + from nat.data_models.function import FunctionBaseConfig + + assert issubclass(BraveWebSearchToolConfig, FunctionBaseConfig) + + +class TestBraveWebSearchStub: + async def test_stub_when_no_api_key(self, monkeypatch): + import brave_web_search.register as reg + + reg._missing_key_warned = False + monkeypatch.delenv("BRAVE_API_KEY", raising=False) + config = BraveWebSearchToolConfig() + builder = MagicMock() + + async with brave_web_search(config, builder) as info: + result = await info.single_fn("anything") + + assert "BRAVE_API_KEY" in result + assert "unavailable" in result.lower() + + +class TestBraveWebSearchLive: + async def test_api_key_from_config_sets_env(self, monkeypatch): + fake_request = MagicMock( + return_value=_search_payload( + [ + { + "url": "https://a.example", + "title": "A", + "description": "body a", + } + ] + ) + ) + monkeypatch.delenv("BRAVE_API_KEY", raising=False) + monkeypatch.setattr("brave_web_search.register._http_get_json", fake_request) + + config = BraveWebSearchToolConfig(api_key=SecretStr("token-from-config")) + builder = MagicMock() + async with brave_web_search(config, builder) as info: + out = await info.single_fn("question") + + assert os.environ.get("BRAVE_API_KEY") == "token-from-config" + assert "https://a.example" in out + assert "body a" in out + + async def test_successful_search_formats_documents(self, monkeypatch): + fake_request = MagicMock( + return_value=_search_payload( + [ + { + "url": "https://a.example", + "title": "Title A", + "description": "Body A", + }, + { + "url": "https://b.example", + "title": "Title B", + "description": "Body B", + "extra_snippets": ["Extra B"], + }, + ] + ) + ) + monkeypatch.setenv("BRAVE_API_KEY", "brave-env") + monkeypatch.setattr("brave_web_search.register._http_get_json", fake_request) + + config = BraveWebSearchToolConfig(max_results=2, country="US", search_lang="en", freshness="pw") + builder = MagicMock() + async with brave_web_search(config, builder) as info: + out = await info.single_fn("query") + + assert "Title A" in out + assert "Title B" in out + assert "Body A" in out + assert "Extra B" in out + assert "---" in out + + request_url, headers, timeout = fake_request.call_args.args + assert request_url.startswith(BRAVE_SEARCH_URL) + params = _parse_query(request_url) + assert params["q"] == ["query"] + assert params["count"] == ["2"] + assert params["country"] == ["US"] + assert params["search_lang"] == ["en"] + assert params["safesearch"] == ["moderate"] + assert params["freshness"] == ["pw"] + assert params["text_decorations"] == ["false"] + assert headers["X-Subscription-Token"] == "brave-env" + assert timeout == 20.0 + + async def test_truncates_long_query(self, monkeypatch): + fake_request = MagicMock(return_value=_search_payload([{"url": "u", "title": "t", "description": "body"}])) + monkeypatch.setenv("BRAVE_API_KEY", "brave-env") + monkeypatch.setattr("brave_web_search.register._http_get_json", fake_request) + + config = BraveWebSearchToolConfig() + builder = MagicMock() + async with brave_web_search(config, builder) as info: + await info.single_fn("x" * 500) + + params = _parse_query(fake_request.call_args.args[0]) + assert len(params["q"][0]) == 400 + assert params["q"][0].endswith("...") + + async def test_truncates_content(self, monkeypatch): + fake_request = MagicMock( + return_value=_search_payload([{"url": "u", "title": "t", "description": "abcdefghijklmnop"}]) + ) + monkeypatch.setenv("BRAVE_API_KEY", "brave-env") + monkeypatch.setattr("brave_web_search.register._http_get_json", fake_request) + + config = BraveWebSearchToolConfig(max_content_length=8) + builder = MagicMock() + async with brave_web_search(config, builder) as info: + out = await info.single_fn("q") + + assert "abcde..." in out + assert "abcdefghi" not in out + + async def test_empty_results_returns_error(self, monkeypatch): + fake_request = MagicMock(return_value=_search_payload([])) + monkeypatch.setenv("BRAVE_API_KEY", "brave-env") + monkeypatch.setattr("brave_web_search.register._http_get_json", fake_request) + + config = BraveWebSearchToolConfig(max_retries=1) + builder = MagicMock() + async with brave_web_search(config, builder) as info: + out = await info.single_fn("q") + + assert "no results" in out.lower() + + async def test_retries_then_succeeds(self, monkeypatch): + fake_request = MagicMock( + side_effect=[ + RuntimeError("transient"), + _search_payload([{"url": "u", "title": "t", "description": "ok"}]), + ] + ) + monkeypatch.setenv("BRAVE_API_KEY", "brave-env") + monkeypatch.setattr("brave_web_search.register._http_get_json", fake_request) + monkeypatch.setattr("brave_web_search.register.asyncio.sleep", _no_sleep) + + config = BraveWebSearchToolConfig(max_retries=3) + builder = MagicMock() + async with brave_web_search(config, builder) as info: + out = await info.single_fn("q") + + assert "ok" in out + assert fake_request.call_count == 2 + + async def test_401_returns_friendly_message(self, monkeypatch): + fake_request = MagicMock(side_effect=RuntimeError("401 Unauthorized")) + monkeypatch.setenv("BRAVE_API_KEY", "brave-env") + monkeypatch.setattr("brave_web_search.register._http_get_json", fake_request) + monkeypatch.setattr("brave_web_search.register.asyncio.sleep", _no_sleep) + + config = BraveWebSearchToolConfig(max_retries=2) + builder = MagicMock() + async with brave_web_search(config, builder) as info: + out = await info.single_fn("q") + + assert "401" in out + assert "BRAVE_API_KEY" in out diff --git a/sources/tinyfish_web_search/README.md b/sources/tinyfish_web_search/README.md new file mode 100644 index 00000000..b8f5eaa6 --- /dev/null +++ b/sources/tinyfish_web_search/README.md @@ -0,0 +1,16 @@ +# TinyFish Web Search + +NAT tool package for the TinyFish Search API. + +## Configuration + +```yaml +functions: + web_search_tool: + _type: tinyfish_web_search + max_results: 5 + location: US + language: en +``` + +Set `TINYFISH_API_KEY` in the environment, or provide `api_key` in the workflow config. diff --git a/sources/tinyfish_web_search/pyproject.toml b/sources/tinyfish_web_search/pyproject.toml new file mode 100644 index 00000000..57cc11f6 --- /dev/null +++ b/sources/tinyfish_web_search/pyproject.toml @@ -0,0 +1,36 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[build-system] +build-backend = "setuptools.build_meta" +requires = ["setuptools >= 64", "setuptools-scm>=8"] + +[tool.setuptools] +packages = ["tinyfish_web_search"] +package-dir = {"tinyfish_web_search" = "src"} + +[project] +name = "tinyfish-web-search" +version = "1.0.0" +description = "NAT-based TinyFish Search API web search tool" +readme = "README.md" +requires-python = ">=3.11,<3.14" +license = {text = "Apache-2.0"} +dependencies = [ + "pydantic>=2.0.0", +] + +[project.entry-points."nat.plugins"] +tinyfish_web_search = "tinyfish_web_search.register" diff --git a/sources/tinyfish_web_search/src/__init__.py b/sources/tinyfish_web_search/src/__init__.py new file mode 100644 index 00000000..ad78cc5b --- /dev/null +++ b/sources/tinyfish_web_search/src/__init__.py @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .register import tinyfish_web_search # noqa: F401 + +__all__ = [ + "tinyfish_web_search", +] diff --git a/sources/tinyfish_web_search/src/register.py b/sources/tinyfish_web_search/src/register.py new file mode 100644 index 00000000..639d521e --- /dev/null +++ b/sources/tinyfish_web_search/src/register.py @@ -0,0 +1,194 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import json +import logging +import os +import urllib.error +import urllib.parse +import urllib.request +from collections.abc import AsyncGenerator + +from pydantic import Field +from pydantic import SecretStr + +from nat.builder.builder import Builder +from nat.builder.function_info import FunctionInfo +from nat.cli.register_workflow import register_function +from nat.data_models.function import FunctionBaseConfig + +logger = logging.getLogger(__name__) + +TINYFISH_SEARCH_URL = "https://api.search.tinyfish.ai" + +_missing_key_warned = False + + +class TinyfishWebSearchToolConfig(FunctionBaseConfig, name="tinyfish_web_search"): + """ + Tool that retrieves relevant contexts from web search (using TinyFish Search) for the given question. + Requires a TINYFISH_API_KEY environment variable or api_key config. + """ + + max_results: int = Field(default=5, ge=1, description="Maximum number of search results to return") + api_key: SecretStr | None = Field(default=None, description="The API key for the TinyFish service") + max_retries: int = Field(default=3, ge=1, description="Maximum number of retries for the search request") + location: str = Field(default="US", description="Country code for geo-targeted results") + language: str = Field(default="en", description="Language code for result language") + page: int = Field(default=0, ge=0, le=10, description="Search result page number, starting from 0") + timeout: float = Field(default=20.0, gt=0, description="HTTP request timeout in seconds") + max_content_length: int | None = Field( + default=10000, + description="Max characters per result snippet. If set, truncates each result to reduce token usage.", + ) + + +def _http_get_json(url: str, headers: dict[str, str], timeout: float) -> dict: + request = urllib.request.Request(url, headers=headers, method="GET") + try: + with urllib.request.urlopen(request, timeout=timeout) as response: + body = response.read().decode("utf-8") + except urllib.error.HTTPError as e: + error_body = e.read().decode("utf-8", errors="replace") + raise RuntimeError(f"{e.code} {e.reason}: {error_body}") from e + except urllib.error.URLError as e: + raise RuntimeError(str(e.reason)) from e + + try: + payload = json.loads(body) + except json.JSONDecodeError as e: + raise ValueError("Search returned invalid JSON") from e + + if not isinstance(payload, dict): + raise ValueError(f"Search returned unexpected response type: {type(payload).__name__}") + return payload + + +def _truncate_content(content: str, max_content_length: int | None) -> str: + if max_content_length and len(content) > max_content_length: + return content[: max_content_length - 3] + "..." + return content + + +def _render_document(result: dict, max_content_length: int | None) -> str: + url = result.get("url", "") or "" + title = result.get("title", "") or "" + site_name = result.get("site_name") or "" + snippet = result.get("snippet") or result.get("description") or "" + body_parts = [str(part) for part in (site_name, snippet) if part] + body = _truncate_content("\n".join(body_parts), max_content_length) + return f'\n\n{title}\n\n{body}\n' + + +@register_function(config_type=TinyfishWebSearchToolConfig) +async def tinyfish_web_search( + tool_config: TinyfishWebSearchToolConfig, + builder: Builder, +) -> AsyncGenerator[FunctionInfo, None]: + """Register the TinyFish Search API web search tool with NAT.""" + + if not os.environ.get("TINYFISH_API_KEY") and tool_config.api_key: + os.environ["TINYFISH_API_KEY"] = tool_config.api_key.get_secret_value() + + if not os.environ.get("TINYFISH_API_KEY"): + global _missing_key_warned + if not _missing_key_warned: + logger.warning( + "TINYFISH_API_KEY not found. The TinyFish web search tool will be registered but will " + "return an error when called. To enable: set TINYFISH_API_KEY in your environment, " + ".env file, or specify api_key in your workflow config." + ) + _missing_key_warned = True + + async def _tinyfish_web_search_stub(question: str) -> str: + """TinyFish web search tool (unavailable - missing TINYFISH_API_KEY).""" + return ( + "Error: TinyFish web search is unavailable because TINYFISH_API_KEY is not set.\n" + "To enable this tool:\n" + "1. Get an API key from https://agent.tinyfish.ai/api-keys\n" + "2. Set the API key in your environment or in your .env file\n" + "3. Restart the application" + ) + + yield FunctionInfo.from_fn( + _tinyfish_web_search_stub, + description=_tinyfish_web_search_stub.__doc__, + ) + return + + async def _tinyfish_web_search(question: str) -> str: + """Retrieves relevant contexts from web search (using TinyFish Search) for the given question. + + Args: + question (str): The question to be answered. + + Returns: + str: The web search results containing relevant documents and their URLs. + """ + params = { + "query": question, + "location": tool_config.location, + "language": tool_config.language, + "page": str(tool_config.page), + } + + url = f"{TINYFISH_SEARCH_URL}?{urllib.parse.urlencode(params)}" + headers = { + "Accept": "application/json", + "User-Agent": "aiq-tinyfish-web-search/1.0", + "X-API-Key": os.environ["TINYFISH_API_KEY"], + } + + for attempt in range(tool_config.max_retries): + try: + payload = await asyncio.to_thread(_http_get_json, url, headers, tool_config.timeout) + results = payload.get("results") + if not isinstance(results, list): + raise ValueError("Search returned no results") + if not results: + raise ValueError("Search returned no results") + + web_search_results = "\n\n---\n\n".join( + _render_document(result, tool_config.max_content_length) + for result in results[: tool_config.max_results] + if isinstance(result, dict) + ) + return web_search_results if web_search_results else "Search returned no results" + + except Exception as e: + if attempt == tool_config.max_retries - 1: + error_msg = str(e) + if isinstance(e, ValueError): + return error_msg + if "401" in error_msg or "Unauthorized" in error_msg: + return ( + "Error: TinyFish web search failed due to invalid API key (401 Unauthorized).\n" + "Please check your TINYFISH_API_KEY and ensure it is valid.\n" + ) + if "429" in error_msg: + return ( + "Error: TinyFish web search failed because the TinyFish Search API " + "rate limit was exceeded.\n" + ) + return f"Error: TinyFish web search failed - {error_msg}" + await asyncio.sleep(2**attempt) + + return "Error: Search failed after all retries" + + yield FunctionInfo.from_fn( + _tinyfish_web_search, + description=_tinyfish_web_search.__doc__, + ) diff --git a/sources/tinyfish_web_search/tests/test_tinyfish_register.py b/sources/tinyfish_web_search/tests/test_tinyfish_register.py new file mode 100644 index 00000000..ed835976 --- /dev/null +++ b/sources/tinyfish_web_search/tests/test_tinyfish_register.py @@ -0,0 +1,246 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the tinyfish_web_search NAT registration.""" + +import os +import urllib.parse +from unittest.mock import MagicMock + +from pydantic import SecretStr +from tinyfish_web_search.register import TINYFISH_SEARCH_URL +from tinyfish_web_search.register import TinyfishWebSearchToolConfig +from tinyfish_web_search.register import tinyfish_web_search + + +def _search_payload(results=None): + return { + "query": "query", + "results": results if results is not None else [], + "total_results": len(results or []), + "page": 0, + } + + +def _parse_query(url): + parsed = urllib.parse.urlparse(url) + return urllib.parse.parse_qs(parsed.query) + + +async def _no_sleep(_): + return None + + +class TestTinyfishWebSearchToolConfig: + def test_defaults(self): + config = TinyfishWebSearchToolConfig() + assert config.max_results == 5 + assert config.api_key is None + assert config.max_retries == 3 + assert config.location == "US" + assert config.language == "en" + assert config.page == 0 + assert config.timeout == 20.0 + assert config.max_content_length == 10000 + + def test_all_fields(self): + config = TinyfishWebSearchToolConfig( + max_results=10, + api_key=SecretStr("tinyfish-key"), + max_retries=1, + location="FR", + language="fr", + page=2, + timeout=5.0, + max_content_length=50, + ) + assert config.max_results == 10 + assert config.api_key.get_secret_value() == "tinyfish-key" + assert config.max_retries == 1 + assert config.location == "FR" + assert config.language == "fr" + assert config.page == 2 + assert config.timeout == 5.0 + assert config.max_content_length == 50 + + def test_inherits_from_function_base_config(self): + from nat.data_models.function import FunctionBaseConfig + + assert issubclass(TinyfishWebSearchToolConfig, FunctionBaseConfig) + + +class TestTinyfishWebSearchStub: + async def test_stub_when_no_api_key(self, monkeypatch): + import tinyfish_web_search.register as reg + + reg._missing_key_warned = False + monkeypatch.delenv("TINYFISH_API_KEY", raising=False) + config = TinyfishWebSearchToolConfig() + builder = MagicMock() + + async with tinyfish_web_search(config, builder) as info: + result = await info.single_fn("anything") + + assert "TINYFISH_API_KEY" in result + assert "unavailable" in result.lower() + + +class TestTinyfishWebSearchLive: + async def test_api_key_from_config_sets_env(self, monkeypatch): + fake_request = MagicMock( + return_value=_search_payload( + [ + { + "url": "https://a.example", + "title": "A", + "snippet": "body a", + } + ] + ) + ) + monkeypatch.delenv("TINYFISH_API_KEY", raising=False) + monkeypatch.setattr("tinyfish_web_search.register._http_get_json", fake_request) + + config = TinyfishWebSearchToolConfig(api_key=SecretStr("key-from-config")) + builder = MagicMock() + async with tinyfish_web_search(config, builder) as info: + out = await info.single_fn("question") + + assert os.environ.get("TINYFISH_API_KEY") == "key-from-config" + assert "https://a.example" in out + assert "body a" in out + + async def test_successful_search_formats_documents(self, monkeypatch): + fake_request = MagicMock( + return_value=_search_payload( + [ + { + "url": "https://a.example", + "title": "Title A", + "snippet": "Body A", + "site_name": "a.example", + }, + { + "url": "https://b.example", + "title": "Title B", + "snippet": "Body B", + "site_name": "b.example", + }, + ] + ) + ) + monkeypatch.setenv("TINYFISH_API_KEY", "tinyfish-env") + monkeypatch.setattr("tinyfish_web_search.register._http_get_json", fake_request) + + config = TinyfishWebSearchToolConfig(max_results=2, location="FR", language="fr", page=2) + builder = MagicMock() + async with tinyfish_web_search(config, builder) as info: + out = await info.single_fn("query") + + assert "Title A" in out + assert "Title B" in out + assert "Body A" in out + assert "a.example" in out + assert "---" in out + + request_url, headers, timeout = fake_request.call_args.args + assert request_url.startswith(TINYFISH_SEARCH_URL) + params = _parse_query(request_url) + assert params["query"] == ["query"] + assert params["location"] == ["FR"] + assert params["language"] == ["fr"] + assert params["page"] == ["2"] + assert headers["X-API-Key"] == "tinyfish-env" + assert timeout == 20.0 + + async def test_limits_results_to_max_results(self, monkeypatch): + fake_request = MagicMock( + return_value=_search_payload( + [ + {"url": "https://a.example", "title": "A", "snippet": "A body"}, + {"url": "https://b.example", "title": "B", "snippet": "B body"}, + ] + ) + ) + monkeypatch.setenv("TINYFISH_API_KEY", "tinyfish-env") + monkeypatch.setattr("tinyfish_web_search.register._http_get_json", fake_request) + + config = TinyfishWebSearchToolConfig(max_results=1) + builder = MagicMock() + async with tinyfish_web_search(config, builder) as info: + out = await info.single_fn("q") + + assert "A body" in out + assert "B body" not in out + + async def test_truncates_content(self, monkeypatch): + fake_request = MagicMock( + return_value=_search_payload([{"url": "u", "title": "t", "snippet": "abcdefghijklmnop"}]) + ) + monkeypatch.setenv("TINYFISH_API_KEY", "tinyfish-env") + monkeypatch.setattr("tinyfish_web_search.register._http_get_json", fake_request) + + config = TinyfishWebSearchToolConfig(max_content_length=8) + builder = MagicMock() + async with tinyfish_web_search(config, builder) as info: + out = await info.single_fn("q") + + assert "abcde..." in out + assert "abcdefghi" not in out + + async def test_empty_results_returns_error(self, monkeypatch): + fake_request = MagicMock(return_value=_search_payload([])) + monkeypatch.setenv("TINYFISH_API_KEY", "tinyfish-env") + monkeypatch.setattr("tinyfish_web_search.register._http_get_json", fake_request) + + config = TinyfishWebSearchToolConfig(max_retries=1) + builder = MagicMock() + async with tinyfish_web_search(config, builder) as info: + out = await info.single_fn("q") + + assert "no results" in out.lower() + + async def test_retries_then_succeeds(self, monkeypatch): + fake_request = MagicMock( + side_effect=[ + RuntimeError("transient"), + _search_payload([{"url": "u", "title": "t", "snippet": "ok"}]), + ] + ) + monkeypatch.setenv("TINYFISH_API_KEY", "tinyfish-env") + monkeypatch.setattr("tinyfish_web_search.register._http_get_json", fake_request) + monkeypatch.setattr("tinyfish_web_search.register.asyncio.sleep", _no_sleep) + + config = TinyfishWebSearchToolConfig(max_retries=3) + builder = MagicMock() + async with tinyfish_web_search(config, builder) as info: + out = await info.single_fn("q") + + assert "ok" in out + assert fake_request.call_count == 2 + + async def test_401_returns_friendly_message(self, monkeypatch): + fake_request = MagicMock(side_effect=RuntimeError("401 Unauthorized")) + monkeypatch.setenv("TINYFISH_API_KEY", "tinyfish-env") + monkeypatch.setattr("tinyfish_web_search.register._http_get_json", fake_request) + monkeypatch.setattr("tinyfish_web_search.register.asyncio.sleep", _no_sleep) + + config = TinyfishWebSearchToolConfig(max_retries=2) + builder = MagicMock() + async with tinyfish_web_search(config, builder) as info: + out = await info.single_fn("q") + + assert "401" in out + assert "TINYFISH_API_KEY" in out diff --git a/uv.lock b/uv.lock index bbffebe9..1cf92d34 100644 --- a/uv.lock +++ b/uv.lock @@ -19,12 +19,14 @@ members = [ "aiq-api", "aiq-debug", "aiq-research-cli", + "brave-web-search", "deepsearch-qa-evaluator", "exa-web-search", "freshqa-eval", "google-scholar-paper-search", "knowledge-layer", "tavily-web-search", + "tinyfish-web-search", ] overrides = [ { name = "authlib", specifier = ">=1.6.11,<2" }, @@ -271,6 +273,7 @@ dev = [ { name = "aiq-api" }, { name = "aiq-debug" }, { name = "aiq-research-cli" }, + { name = "brave-web-search" }, { name = "dask", extra = ["distributed"] }, { name = "exa-web-search" }, { name = "google-scholar-paper-search" }, @@ -283,6 +286,7 @@ dev = [ { name = "pytest-env" }, { name = "ruff" }, { name = "tavily-web-search" }, + { name = "tinyfish-web-search" }, { name = "yapf" }, ] @@ -331,6 +335,7 @@ dev = [ { name = "aiq-api", editable = "frontends/aiq_api" }, { name = "aiq-debug", editable = "frontends/debug" }, { name = "aiq-research-cli", editable = "frontends/cli" }, + { name = "brave-web-search", editable = "sources/brave_web_search" }, { name = "dask", extras = ["distributed"], specifier = ">=2024.1.0" }, { name = "exa-web-search", editable = "sources/exa_web_search" }, { name = "google-scholar-paper-search", editable = "sources/google_scholar_paper_search" }, @@ -343,6 +348,7 @@ dev = [ { name = "pytest-env", specifier = ">=1.1.0" }, { name = "ruff", specifier = "~=0.15.1" }, { name = "tavily-web-search", editable = "sources/tavily_web_search" }, + { name = "tinyfish-web-search", editable = "sources/tinyfish_web_search" }, { name = "yapf", specifier = ">=0.40.0" }, ] @@ -729,6 +735,17 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9d/2a/9186535ce58db529927f6cf5990a849aa9e052eea3e2cfefe20b9e1802da/bracex-2.6-py3-none-any.whl", hash = "sha256:0b0049264e7340b3ec782b5cb99beb325f36c3782a32e36e876452fd49a09952", size = 11508, upload-time = "2025-06-22T19:12:29.781Z" }, ] +[[package]] +name = "brave-web-search" +version = "1.0.0" +source = { editable = "sources/brave_web_search" } +dependencies = [ + { name = "pydantic" }, +] + +[package.metadata] +requires-dist = [{ name = "pydantic", specifier = ">=2.0.0" }] + [[package]] name = "build" version = "1.4.0" @@ -6014,6 +6031,17 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/93/e0/6cc82a562bc6365785a3ff0af27a2a092d57c47d7a81d9e2295d8c36f011/tiktoken-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f", size = 878777, upload-time = "2025-10-06T20:22:18.036Z" }, ] +[[package]] +name = "tinyfish-web-search" +version = "1.0.0" +source = { editable = "sources/tinyfish_web_search" } +dependencies = [ + { name = "pydantic" }, +] + +[package.metadata] +requires-dist = [{ name = "pydantic", specifier = ">=2.0.0" }] + [[package]] name = "tokenizers" version = "0.22.2" From 42e9a22262d637847e7fca838da15d21c717c61b Mon Sep 17 00:00:00 2001 From: Jithendra Nara Date: Sat, 23 May 2026 18:54:12 -0400 Subject: [PATCH 2/4] chore: install new web search providers in setup Signed-off-by: Jithendra Nara --- scripts/setup.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/setup.sh b/scripts/setup.sh index b0992871..72dcef72 100755 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -66,6 +66,8 @@ echo "" echo "Installing data sources..." "${UV_BIN}" pip install -e ./sources/tavily_web_search "${UV_BIN}" pip install -e ./sources/exa_web_search +"${UV_BIN}" pip install -e ./sources/brave_web_search +"${UV_BIN}" pip install -e ./sources/tinyfish_web_search "${UV_BIN}" pip install -e ./sources/google_scholar_paper_search "${UV_BIN}" pip install -e "./sources/knowledge_layer[llamaindex,foundational_rag]" echo "Data Sources installed" From 2ffb6967a2e481e15cc0e1abaf24f5a2b801eb2c Mon Sep 17 00:00:00 2001 From: Jithendra Nara Date: Sat, 23 May 2026 20:37:50 -0400 Subject: [PATCH 3/4] fix: address web search provider review feedback Signed-off-by: Jithendra Nara --- .../customization/configuration-reference.md | 2 +- sources/brave_web_search/src/register.py | 16 +++-- .../tests/test_brave_register.py | 54 ++++++++++++++++ sources/tinyfish_web_search/src/register.py | 20 +++--- .../tests/test_tinyfish_register.py | 62 +++++++++++++++++++ 5 files changed, 140 insertions(+), 14 deletions(-) diff --git a/docs/source/customization/configuration-reference.md b/docs/source/customization/configuration-reference.md index cafc76fa..893c82fc 100644 --- a/docs/source/customization/configuration-reference.md +++ b/docs/source/customization/configuration-reference.md @@ -237,7 +237,7 @@ functions: | Parameter | Type | Default | Description | |-----------|------|---------|-------------| -| `max_results` | `int` | `5` | Maximum number of search results to return from the response. | +| `max_results` | `int` | `5` | Maximum number of search results to return from the response, up to `20`. | | `api_key` | `str` | `None` | TinyFish API key. Falls back to `TINYFISH_API_KEY` environment variable. | | `max_retries` | `int` | `3` | Number of retry attempts on search failure. | | `location` | `str` | `"US"` | Country code for geo-targeted results. | diff --git a/sources/brave_web_search/src/register.py b/sources/brave_web_search/src/register.py index 4b345d29..eb9cfdf5 100644 --- a/sources/brave_web_search/src/register.py +++ b/sources/brave_web_search/src/register.py @@ -14,6 +14,7 @@ # limitations under the License. import asyncio +import html import json import logging import os @@ -60,6 +61,7 @@ class BraveWebSearchToolConfig(FunctionBaseConfig, name="brave_web_search"): timeout: float = Field(default=20.0, gt=0, description="HTTP request timeout in seconds") max_content_length: int | None = Field( default=10000, + ge=1, description="Max characters per result snippet. If set, truncates each result to reduce token usage.", ) @@ -86,14 +88,16 @@ def _http_get_json(url: str, headers: dict[str, str], timeout: float) -> dict: def _truncate_content(content: str, max_content_length: int | None) -> str: - if max_content_length and len(content) > max_content_length: - return content[: max_content_length - 3] + "..." - return content + if max_content_length is None or len(content) <= max_content_length: + return content + if max_content_length <= 3: + return "." * max_content_length + return content[: max_content_length - 3] + "..." def _render_document(result: dict, max_content_length: int | None) -> str: - url = result.get("url", "") or "" - title = result.get("title", "") or "" + url = html.escape(str(result.get("url", "") or ""), quote=True) + title = html.escape(str(result.get("title", "") or "")) snippets: list[str] = [] description = result.get("description") or result.get("snippet") or "" if description: @@ -101,7 +105,7 @@ def _render_document(result: dict, max_content_length: int | None) -> str: extra_snippets = result.get("extra_snippets") or [] if isinstance(extra_snippets, list): snippets.extend(str(snippet) for snippet in extra_snippets if snippet) - body = _truncate_content("\n".join(snippets), max_content_length) + body = html.escape(_truncate_content("\n".join(snippets), max_content_length)) return f'\n\n{title}\n\n{body}\n' diff --git a/sources/brave_web_search/tests/test_brave_register.py b/sources/brave_web_search/tests/test_brave_register.py index 9a70efb1..f05eba8b 100644 --- a/sources/brave_web_search/tests/test_brave_register.py +++ b/sources/brave_web_search/tests/test_brave_register.py @@ -198,6 +198,46 @@ async def test_truncates_content(self, monkeypatch): assert "abcde..." in out assert "abcdefghi" not in out + async def test_small_content_limit_does_not_exceed_requested_length(self, monkeypatch): + fake_request = MagicMock( + return_value=_search_payload([{"url": "u", "title": "t", "description": "abcdefghijklmnop"}]) + ) + monkeypatch.setenv("BRAVE_API_KEY", "brave-env") + monkeypatch.setattr("brave_web_search.register._http_get_json", fake_request) + + config = BraveWebSearchToolConfig(max_content_length=2) + builder = MagicMock() + async with brave_web_search(config, builder) as info: + out = await info.single_fn("q") + + assert "\n..\n" in out + assert "abc" not in out + + async def test_escapes_document_fields(self, monkeypatch): + fake_request = MagicMock( + return_value=_search_payload( + [ + { + "url": 'https://a.example/?q="x"&n=1', + "title": "", + "description": "Body <tag> & value", + } + ] + ) + ) + monkeypatch.setenv("BRAVE_API_KEY", "brave-env") + monkeypatch.setattr("brave_web_search.register._http_get_json", fake_request) + + config = BraveWebSearchToolConfig() + builder = MagicMock() + async with brave_web_search(config, builder) as info: + out = await info.single_fn("q") + + assert 'href="https://a.example/?q="x"&n=1"' in out + assert "<Title & One>" in out + assert "Body <tag> & value" in out + assert "<Title & One>" not in out + async def test_empty_results_returns_error(self, monkeypatch): fake_request = MagicMock(return_value=_search_payload([])) monkeypatch.setenv("BRAVE_API_KEY", "brave-env") @@ -242,3 +282,17 @@ async def test_401_returns_friendly_message(self, monkeypatch): assert "401" in out assert "BRAVE_API_KEY" in out + + async def test_429_returns_rate_limit_message(self, monkeypatch): + fake_request = MagicMock(side_effect=RuntimeError("429 Too Many Requests")) + monkeypatch.setenv("BRAVE_API_KEY", "brave-env") + monkeypatch.setattr("brave_web_search.register._http_get_json", fake_request) + monkeypatch.setattr("brave_web_search.register.asyncio.sleep", _no_sleep) + + config = BraveWebSearchToolConfig(max_retries=2) + builder = MagicMock() + async with brave_web_search(config, builder) as info: + out = await info.single_fn("q") + + assert "rate limit" in out.lower() + assert "Brave" in out diff --git a/sources/tinyfish_web_search/src/register.py b/sources/tinyfish_web_search/src/register.py index 639d521e..5ad9e3b8 100644 --- a/sources/tinyfish_web_search/src/register.py +++ b/sources/tinyfish_web_search/src/register.py @@ -14,6 +14,7 @@ # limitations under the License. import asyncio +import html import json import logging import os @@ -43,7 +44,7 @@ class TinyfishWebSearchToolConfig(FunctionBaseConfig, name="tinyfish_web_search" Requires a TINYFISH_API_KEY environment variable or api_key config. """ - max_results: int = Field(default=5, ge=1, description="Maximum number of search results to return") + max_results: int = Field(default=5, ge=1, le=20, description="Maximum number of search results to return") api_key: SecretStr | None = Field(default=None, description="The API key for the TinyFish service") max_retries: int = Field(default=3, ge=1, description="Maximum number of retries for the search request") location: str = Field(default="US", description="Country code for geo-targeted results") @@ -52,6 +53,7 @@ class TinyfishWebSearchToolConfig(FunctionBaseConfig, name="tinyfish_web_search" timeout: float = Field(default=20.0, gt=0, description="HTTP request timeout in seconds") max_content_length: int | None = Field( default=10000, + ge=1, description="Max characters per result snippet. If set, truncates each result to reduce token usage.", ) @@ -78,18 +80,20 @@ def _http_get_json(url: str, headers: dict[str, str], timeout: float) -> dict: def _truncate_content(content: str, max_content_length: int | None) -> str: - if max_content_length and len(content) > max_content_length: - return content[: max_content_length - 3] + "..." - return content + if max_content_length is None or len(content) <= max_content_length: + return content + if max_content_length <= 3: + return "." * max_content_length + return content[: max_content_length - 3] + "..." def _render_document(result: dict, max_content_length: int | None) -> str: - url = result.get("url", "") or "" - title = result.get("title", "") or "" + url = html.escape(str(result.get("url", "") or ""), quote=True) + title = html.escape(str(result.get("title", "") or "")) site_name = result.get("site_name") or "" snippet = result.get("snippet") or result.get("description") or "" body_parts = [str(part) for part in (site_name, snippet) if part] - body = _truncate_content("\n".join(body_parts), max_content_length) + body = html.escape(_truncate_content("\n".join(body_parts), max_content_length)) return f'<Document href="{url}">\n<title>\n{title}\n\n{body}\n' @@ -145,6 +149,8 @@ async def _tinyfish_web_search(question: str) -> str: "page": str(tool_config.page), } + # TinyFish documents pagination but not a per-request result-count parameter. + # Apply max_results when rendering below. url = f"{TINYFISH_SEARCH_URL}?{urllib.parse.urlencode(params)}" headers = { "Accept": "application/json", diff --git a/sources/tinyfish_web_search/tests/test_tinyfish_register.py b/sources/tinyfish_web_search/tests/test_tinyfish_register.py index ed835976..a1d047f3 100644 --- a/sources/tinyfish_web_search/tests/test_tinyfish_register.py +++ b/sources/tinyfish_web_search/tests/test_tinyfish_register.py @@ -19,7 +19,9 @@ import urllib.parse from unittest.mock import MagicMock +import pytest from pydantic import SecretStr +from pydantic import ValidationError from tinyfish_web_search.register import TINYFISH_SEARCH_URL from tinyfish_web_search.register import TinyfishWebSearchToolConfig from tinyfish_web_search.register import tinyfish_web_search @@ -75,6 +77,10 @@ def test_all_fields(self): assert config.timeout == 5.0 assert config.max_content_length == 50 + def test_max_results_has_upper_bound(self): + with pytest.raises(ValidationError): + TinyfishWebSearchToolConfig(max_results=21) + def test_inherits_from_function_base_config(self): from nat.data_models.function import FunctionBaseConfig @@ -200,6 +206,48 @@ async def test_truncates_content(self, monkeypatch): assert "abcde..." in out assert "abcdefghi" not in out + async def test_small_content_limit_does_not_exceed_requested_length(self, monkeypatch): + fake_request = MagicMock( + return_value=_search_payload([{"url": "u", "title": "t", "snippet": "abcdefghijklmnop"}]) + ) + monkeypatch.setenv("TINYFISH_API_KEY", "tinyfish-env") + monkeypatch.setattr("tinyfish_web_search.register._http_get_json", fake_request) + + config = TinyfishWebSearchToolConfig(max_content_length=2) + builder = MagicMock() + async with tinyfish_web_search(config, builder) as info: + out = await info.single_fn("q") + + assert "\n..\n" in out + assert "abc" not in out + + async def test_escapes_document_fields(self, monkeypatch): + fake_request = MagicMock( + return_value=_search_payload( + [ + { + "url": 'https://a.example/?q="x"&n=1', + "title": "", + "snippet": "Body <tag> & value", + "site_name": "a.example & docs", + } + ] + ) + ) + monkeypatch.setenv("TINYFISH_API_KEY", "tinyfish-env") + monkeypatch.setattr("tinyfish_web_search.register._http_get_json", fake_request) + + config = TinyfishWebSearchToolConfig() + builder = MagicMock() + async with tinyfish_web_search(config, builder) as info: + out = await info.single_fn("q") + + assert 'href="https://a.example/?q="x"&n=1"' in out + assert "<Title & One>" in out + assert "a.example & docs" in out + assert "Body <tag> & value" in out + assert "<Title & One>" not in out + async def test_empty_results_returns_error(self, monkeypatch): fake_request = MagicMock(return_value=_search_payload([])) monkeypatch.setenv("TINYFISH_API_KEY", "tinyfish-env") @@ -244,3 +292,17 @@ async def test_401_returns_friendly_message(self, monkeypatch): assert "401" in out assert "TINYFISH_API_KEY" in out + + async def test_429_returns_rate_limit_message(self, monkeypatch): + fake_request = MagicMock(side_effect=RuntimeError("429 Too Many Requests")) + monkeypatch.setenv("TINYFISH_API_KEY", "tinyfish-env") + monkeypatch.setattr("tinyfish_web_search.register._http_get_json", fake_request) + monkeypatch.setattr("tinyfish_web_search.register.asyncio.sleep", _no_sleep) + + config = TinyfishWebSearchToolConfig(max_retries=2) + builder = MagicMock() + async with tinyfish_web_search(config, builder) as info: + out = await info.single_fn("q") + + assert "rate limit" in out.lower() + assert "TinyFish" in out From 3363e92dc6972705d399411944ebd25a09544def Mon Sep 17 00:00:00 2001 From: Jithendra Nara <jnara01@indianatech.net> Date: Wed, 27 May 2026 08:28:01 -0400 Subject: [PATCH 4/4] fix: resolve CI link and secrets checks Signed-off-by: Jithendra Nara <jnara01@indianatech.net> --- .secrets.baseline | 6 +++--- README.md | 6 +++--- docs/source/get-started/installation.md | 2 +- docs/source/resources/troubleshooting.md | 2 +- sources/brave_web_search/tests/test_brave_register.py | 6 ++++++ 5 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.secrets.baseline b/.secrets.baseline index 30bc721c..02826db8 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -133,7 +133,7 @@ "filename": "README.md", "hashed_secret": "73140b88094aaf220a03532196b27b58a03c9b09", "is_verified": false, - "line_number": 302 + "line_number": 323 } ], "deploy/.env.example": [ @@ -142,7 +142,7 @@ "filename": "deploy/.env.example", "hashed_secret": "9d4e1e23bd5b727046a9e3b4b7db57bd8d6ee684", "is_verified": false, - "line_number": 30 + "line_number": 33 } ], "deploy/compose/README.md": [ @@ -290,5 +290,5 @@ } ] }, - "generated_at": "2026-05-12T22:24:46Z" + "generated_at": "2026-05-27T12:27:09Z" } diff --git a/README.md b/README.md index f23826dc..c0ff0d24 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ limitations under the License. > **🏆 BENCHMARK NOTE 🏆** > -> To obtain results consistent with the **nvidia-aiq** [DeepResearch Bench](https://huggingface.co/spaces/muset-ai/DeepResearch-Bench-Leaderboard) and [DeepResearch Bench II](https://agentresearchlab.com/benchmarks/deepresearch-bench-ii/index.html#leaderboard) leaderboard results, please use the [`drb1`](https://github.com/NVIDIA-AI-Blueprints/aiq/tree/drb1) and [`drb2`](https://github.com/NVIDIA-AI-Blueprints/aiq/tree/drb2) branches, respectively. +> To obtain results consistent with the **nvidia-aiq** [DeepResearch Bench](https://huggingface.co/spaces/muset-ai/DeepResearch-Bench-Leaderboard) leaderboard and [DeepResearch Bench II](https://github.com/imlrz/DeepResearch-Bench-II) benchmark repository results, please use the [`drb1`](https://github.com/NVIDIA-AI-Blueprints/aiq/tree/drb1) and [`drb2`](https://github.com/NVIDIA-AI-Blueprints/aiq/tree/drb2) branches, respectively. ## Table of Contents @@ -219,8 +219,8 @@ uv pip install -e "./sources/knowledge_layer[llamaindex,foundational_rag]" #### Obtain a TinyFish API Key -1. Sign in to [TinyFish](https://agent.tinyfish.ai/api-keys) -2. Generate an API key from your dashboard +1. Review the [TinyFish Search API docs](https://docs.tinyfish.ai/search-api) +2. Generate an API key from your TinyFish dashboard #### Obtain a Serper API Key diff --git a/docs/source/get-started/installation.md b/docs/source/get-started/installation.md index 093111c8..4a13c7cb 100644 --- a/docs/source/get-started/installation.md +++ b/docs/source/get-started/installation.md @@ -135,7 +135,7 @@ Then edit `deploy/.env` and fill in your keys. | `TAVILY_API_KEY` | [Tavily](https://tavily.com/) | Web search (Tavily provider) | | `EXA_API_KEY` | [Exa](https://exa.ai/) | Web search (Exa provider) | | `BRAVE_API_KEY` | [Brave Search API](https://api.search.brave.com/) | Web search (Brave provider) | -| `TINYFISH_API_KEY` | [TinyFish](https://agent.tinyfish.ai/api-keys) | Web search (TinyFish provider) | +| `TINYFISH_API_KEY` | [TinyFish](https://docs.tinyfish.ai/search-api) | Web search (TinyFish provider) | | `SERPER_API_KEY` | [Serper](https://serper.dev/) | Academic paper search (Google Scholar). To enable, uncomment `paper_search_tool` in your config file | At minimum, you need `NVIDIA_API_KEY` for LLM inference and one configured web-search provider key (`TAVILY_API_KEY`, `EXA_API_KEY`, `BRAVE_API_KEY`, or `TINYFISH_API_KEY`) for web search. Paper search (`SERPER_API_KEY`) is disabled by default in the shipped configs -- refer to the comments in your config file to enable it. diff --git a/docs/source/resources/troubleshooting.md b/docs/source/resources/troubleshooting.md index 710be1b2..e3825835 100644 --- a/docs/source/resources/troubleshooting.md +++ b/docs/source/resources/troubleshooting.md @@ -26,7 +26,7 @@ Common issues and solutions for the AI-Q blueprint. | Tavily search returns empty | Invalid `TAVILY_API_KEY` | Verify key at [tavily.com](https://tavily.com) | | Exa search returns empty or 401 | Invalid or missing `EXA_API_KEY` | Verify key at [exa.ai](https://exa.ai) | | Brave search returns empty or 401 | Invalid or missing `BRAVE_API_KEY` | Verify key at [api.search.brave.com](https://api.search.brave.com/) | -| TinyFish search returns empty or 401 | Invalid or missing `TINYFISH_API_KEY` | Verify key at [agent.tinyfish.ai](https://agent.tinyfish.ai/api-keys) | +| TinyFish search returns empty or 401 | Invalid or missing `TINYFISH_API_KEY` | Verify key using the [TinyFish Search API docs](https://docs.tinyfish.ai/search-api) | | Serper search fails | Missing `SERPER_API_KEY` | Set key or remove `paper_search_tool` from config | ## Runtime Issues diff --git a/sources/brave_web_search/tests/test_brave_register.py b/sources/brave_web_search/tests/test_brave_register.py index f05eba8b..d2bcfa94 100644 --- a/sources/brave_web_search/tests/test_brave_register.py +++ b/sources/brave_web_search/tests/test_brave_register.py @@ -19,10 +19,12 @@ import urllib.parse from unittest.mock import MagicMock +import pytest from brave_web_search.register import BRAVE_SEARCH_URL from brave_web_search.register import BraveWebSearchToolConfig from brave_web_search.register import brave_web_search from pydantic import SecretStr +from pydantic import ValidationError def _search_payload(results=None): @@ -77,6 +79,10 @@ def test_all_fields(self): assert config.timeout == 5.0 assert config.max_content_length == 50 + def test_max_results_has_upper_bound(self): + with pytest.raises(ValidationError): + BraveWebSearchToolConfig(max_results=21) + def test_inherits_from_function_base_config(self): from nat.data_models.function import FunctionBaseConfig