NVIDIA-AI-Blueprints · rhossi · May 4, 2026 · May 4, 2026 · May 4, 2026 · May 4, 2026
diff --git a/configs/config_web_opensearch.yml b/configs/config_web_opensearch.yml
@@ -0,0 +1,207 @@
+# This is a Web mode configuration for OpenSearch vector retrieval.
+# It has the following features:
+# - Web search enabled by default
+# - Knowledge retrieval using the built-in OpenSearch backend.
+# - Supports self-hosted OpenSearch and Amazon OpenSearch Serverless via SigV4.
+
+general:
+  use_uvloop: true
+  telemetry:
+    logging:
+      console:
+        _type: console
+        level: INFO
+    # tracing:
+    #   langsmith: # Optional: LangSmith tracing - requires langsmith API key. Set using `export LANGSMITH_API_KEY=<your-langsmith-api-key>`
+    #     _type: langsmith
+    #     project: nvidia-aiq
+
+  front_end:
+    _type: aiq_api
+    runner_class: aiq_api.plugin.AIQAPIWorker
+    # =========================================================================
+    # Knowledge API is automatically enabled when knowledge_retrieval function
+    # is configured
+    # =========================================================================
+    # Async Job API Settings
+    # =========================================================================
+    # Async job infrastructure database (NAT JobStore + EventStore)
+    # Used by: /v1/jobs/async routes, SSE streaming, job status persistence
+    # Requires async driver for SQLite (aiosqlite) or PostgreSQL (asyncpg)
+    # Environment overrides:
+    # - NAT_JOB_STORE_DB_URL (direct override)
+    # - NAT_JOB_STORE_DB_URL_DEV / NAT_JOB_STORE_DB_URL_PROD (via NAT_ENV)
+    db_url: ${NAT_JOB_STORE_DB_URL:-sqlite+aiosqlite:///./jobs.db}
+    # Job expiry - how long completed jobs stay in database before cleanup
+    expiry_seconds: 86400  # 24 hours (min: 600, max: 604800/7 days)
+    cors:
+      allow_origin_regex: 'http://localhost(:\d+)?|http://127.0.0.1(:\d+)?'
+      allow_methods:
+        - GET
+        - POST
+        - DELETE
+        - OPTIONS
+      allow_headers:
+        - "*"
+      allow_credentials: true
+      expose_headers:
+        - "*"
+
+llms:
+  nemotron_llm_intent:
+    _type: nim
+    model_name: nvidia/nemotron-3-nano-30b-a3b
+    base_url: "https://integrate.api.nvidia.com/v1"
+    temperature: 0.5
+    top_p: 0.9
+    max_tokens: 4096
+    num_retries: 5
+    chat_template_kwargs:
+      enable_thinking: true
+
+  nemotron_nano_llm:
+    _type: nim
+    model_name: nvidia/nemotron-3-nano-30b-a3b
+    base_url: "https://integrate.api.nvidia.com/v1"
+    temperature: 0.1
+    top_p: 0.3
+    max_tokens: 16384
+    num_retries: 5
+    chat_template_kwargs:
+      enable_thinking: true
+
+  gpt_oss_llm:
+    _type: nim
+    model_name: openai/gpt-oss-120b
+    base_url: https://integrate.api.nvidia.com/v1
+    temperature: 1.0
+    top_p: 1.0
+    max_tokens: 256000
+    api_key: ${NVIDIA_API_KEY}
+    max_retries: 10
+
+  # Nemotron Super is compatible and tested with AIQ but has limited availability
+  # on the Build API due to high demand.
+  # Uncomment nemotron_super_llm below if the endpoint is accessible.
+  # nemotron_super_llm:
+  #   _type: nim
+  #   model_name: nvidia/nemotron-3-super-120b-a12b
+  #   base_url: "https://integrate.api.nvidia.com/v1"
+  #   temperature: 1.0
+  #   top_p: 1.0
+  #   max_tokens: 128000
+  #   num_retries: 5
+  #   chat_template_kwargs:
+  #     enable_thinking: true
+
+functions:
+  # =========================================================================
+  # Data Source Registry
+  # =========================================================================
+  # Central registry that controls:
+  #   1. UI toggles — each source appears as an on/off switch in the frontend
+  #   2. Per-message filtering — users can select active sources per request
+  #   3. Tool auto-inheritance — agents with no explicit `tools` list receive
+  #      every tool listed here (use `exclude_tools` on agents to specialize)
+  #
+  # Source entry fields:
+  #   id, name, description, tools, requires_auth (default: false),
+  #   default_enabled (default: true)
+  #
+  # See docs/source/customization/tools-and-sources.md for full details.
+  # =========================================================================
+  data_sources:
+    _type: data_source_registry
+    sources:
+      - id: web_search
+        name: "Web Search"
+        description: "Search the web for real-time information."
+        tools:
+          - web_search_tool
+          - advanced_web_search_tool
+      - id: knowledge_layer
+        name: "Knowledge Base"
+        description: "Search uploaded documents and files."
+        tools:
+          - knowledge_search
+
+  web_search_tool:
+    _type: tavily_web_search
+    max_results: 5
+    max_content_length: 1000
+
+  advanced_web_search_tool:
+    _type: tavily_web_search
+    max_results: 2
+    advanced_search: true
+
+  # Knowledge Retrieval (see sources/knowledge_layer/KNOWLEDGE-LAYER-SETUP.md)
+  knowledge_search:
+    _type: knowledge_retrieval
+    backend: opensearch
+    collection_name: ${COLLECTION_NAME:-test_collection}
+    top_k: 5
+    opensearch_url: ${OPENSEARCH_URL:-http://localhost:9200}
+    opensearch_auth_type: ${OPENSEARCH_AUTH_TYPE:-none}
+    opensearch_aws_region: ${AWS_REGION:-us-east-1}
+    opensearch_aws_service: ${OPENSEARCH_AWS_SERVICE:-aoss}
+    opensearch_index_prefix: ${OPENSEARCH_INDEX_PREFIX:-aiq}
+    opensearch_embedding_dim: ${OPENSEARCH_EMBEDDING_DIM:-2048}
+    opensearch_ingestion_mode: ${OPENSEARCH_INGESTION_MODE:-auto}
+    opensearch_dask_scheduler_address: ${NAT_DASK_SCHEDULER_ADDRESS:-}
+    opensearch_dask_file_transfer: ${OPENSEARCH_DASK_FILE_TRANSFER:-bytes}
+    embed_model: ${AIQ_EMBED_MODEL:-nvidia/llama-nemotron-embed-vl-1b-v2}
+    embed_base_url: ${AIQ_EMBED_BASE_URL:-https://integrate.api.nvidia.com/v1}
+
+  # Paper Search (optional - requires SERPER_API_KEY)
+  # Uncomment the block below and set SERPER_API_KEY to enable academic paper search.
+  # paper_search_tool:
+  #   _type: paper_search
+  #   max_results: 5
+  #   serper_api_key: ${SERPER_API_KEY}
+
+  # =========================================================================
+  # Agents — inherit all registry tools; use exclude_tools to specialize
+  # =========================================================================
+  intent_classifier:
+    _type: intent_classifier
+    llm: nemotron_llm_intent
+    # tools: omitted -> inherits all from data_source_registry
+    # exclude_tools: []
+
+  clarifier_agent:
+    _type: clarifier_agent
+    llm: nemotron_nano_llm  # replace with nemotron_super_llm if available
+    planner_llm: nemotron_nano_llm  # replace with nemotron_super_llm if available
+    # tools: omitted -> inherits all from data_source_registry
+    # exclude_tools: []
+    max_turns: 3
+    enable_plan_approval: true
+    log_response_max_chars: 2000
+    verbose: true
+
+  shallow_research_agent:
+    _type: shallow_research_agent
+    llm: nemotron_nano_llm
+    # tools: omitted -> inherits all from data_source_registry
+    exclude_tools:
+      - advanced_web_search_tool
+    max_llm_turns: 10
+    max_tool_iterations: 5
+
+  deep_research_agent:
+    _type: deep_research_agent
+    orchestrator_llm: gpt_oss_llm
+    researcher_llm: nemotron_nano_llm  # replace with nemotron_super_llm if available
+    planner_llm: gpt_oss_llm
+    # tools: omitted -> inherits all from data_source_registry
+    exclude_tools:
+      - web_search_tool
+    max_loops: 2
+
+workflow:
+  _type: chat_deepresearcher_agent
+  enable_escalation: true
+  enable_clarifier: true
+  use_async_deep_research: true
+  checkpoint_db: ${AIQ_CHECKPOINT_DB:-./checkpoints.db}
diff --git a/deploy/helm/README.md b/deploy/helm/README.md
@@ -128,6 +128,45 @@ To see what values the chart supports before installing:
 helm show values aiq2-web-2.0.0.tgz
 ```
 
+### Amazon OpenSearch Serverless
+
+The backend image can be overridden through values without forking the chart:
+
+```yaml
+aiq:
+  apps:
+    backend:
+      image:
+        repository: <registry>/<aiq-agent-image>
+        tag: <tag>
+```
+
+For Amazon OpenSearch Serverless, set the backend workflow config to `configs/config_web_opensearch.yml` and configure
+SigV4 through environment values:
+
+```yaml
+aiq:
+  apps:
+    backend:
+      env:
+        CONFIG_FILE: configs/config_web_opensearch.yml
+        OPENSEARCH_URL: https://abc123.us-west-2.aoss.amazonaws.com
+        OPENSEARCH_AUTH_TYPE: sigv4
+        OPENSEARCH_AWS_SERVICE: aoss
+        OPENSEARCH_INDEX_PREFIX: aiq
+        AWS_REGION: us-west-2
+        OPENSEARCH_INGESTION_MODE: auto
+        OPENSEARCH_DASK_FILE_TRANSFER: bytes
+```
+
+A complete example is available at
+[`deploy/helm/examples/aws-opensearch-serverless-values.yaml`](examples/aws-opensearch-serverless-values.yaml).
+
+For EKS Pod Identity, associate the IAM role with the backend service account for this release. With the default chart
+names, the namespace is `ns-aiq` and the backend service account is `aiq-backend`. EKS Pod Identity associations are
+created through EKS, not by annotating the service account. The role also needs OpenSearch Serverless IAM access and a
+data access policy for the target collection/index pattern.
+
 ### Verify
 
 ```bash

diff --git a/deploy/helm/examples/aws-opensearch-serverless-values.yaml b/deploy/helm/examples/aws-opensearch-serverless-values.yaml
@@ -0,0 +1,32 @@
+# Example values for AI-Q with Amazon OpenSearch Serverless.
+#
+# Before applying this file:
+# - Create an OpenSearch Serverless vector collection and note its data endpoint.
+# - Create an AOSS data access policy for the service account IAM role.
+# - Create an EKS Pod Identity association that maps the backend service account
+#   for this release (default name: aiq-backend in namespace ns-aiq) to that role.
+
+aiq:
+  apps:
+    backend:
+      image:
+        # Override this when testing a custom image that includes unreleased changes.
+        repository: nvcr.io/nvidia/blueprint/aiq-agent
+        tag: "2.0.0"
+        pullPolicy: IfNotPresent
+      imagePullSecrets:
+        - name: ngc-image-pull-secret
+      secretEnv:
+        NVIDIA_API_KEY: NVIDIA_API_KEY
+      env:
+        CONFIG_FILE: configs/config_web_opensearch.yml
+        COLLECTION_NAME: default_collection
+        OPENSEARCH_URL: https://abc123.us-west-2.aoss.amazonaws.com
+        OPENSEARCH_AUTH_TYPE: sigv4
+        OPENSEARCH_AWS_SERVICE: aoss
+        OPENSEARCH_INDEX_PREFIX: aiq
+        AWS_REGION: us-west-2
+        OPENSEARCH_INGESTION_MODE: auto
+        OPENSEARCH_DASK_FILE_TRANSFER: bytes
+        DASK_NWORKERS: "1"
+        DASK_NTHREADS: "4"