rh-ai-quickstart · sauagarwa · Mar 11, 2026 · Feb 17, 2026 · Feb 17, 2026 · Feb 24, 2026
diff --git a/.gitignore b/.gitignore
@@ -184,3 +184,5 @@ deploy/helm/secrets
 # User-specific configuration file (contains sensitive data)
 deploy/helm/rag-values.yaml
 deploy/helm/rag/Chart.lock
+deploy/helm/values.yaml
+deploy/helm/bootstrap-values.yaml
diff --git a/README.md b/README.md
@@ -100,14 +100,16 @@ This QuickStart allows users to explore the capabilities of RAG by:
 | Generation  | `meta-llama/Meta-Llama-3-70B-Instruct` | A100 x2/HPU | p4d.24xlarge
 | Safety      | `meta-llama/Llama-Guard-3-8B`          | L4/HPU      | g6.2xlarge
 
-- Note: Developers can also configure a remote LLM by modifying the `rag_values.yml` file, which gives you complete control over all parameter settings.
+- Note: Developers can also use a remote LLM via the command line (see [Remote LLM Deployment](#remote-llm-deployment-example)) or by modifying the `rag-values.yaml` file directly:
 
 ```yaml
-  remote-llm:
-    id: llama-3-3-70b-instruct-w8a8
-    url: https://somedomain.com/v1
-    apiToken: fake-token
-    enabled: true
+  global:
+    models:
+      remote-llm:
+        id: meta-llama/Llama-3.3-70B-Instruct
+        url: https://somedomain.com/v1
+        apiToken: fake-token
+        enabled: true
 ```
 
 Note: the 70B model is NOT required for initial testing of this example. The safety/shield model `Llama-Guard-3-8B` is also optional.
@@ -251,6 +253,25 @@ make install NAMESPACE=llama-stack-rag LLM=llama-3-2-3b-instruct SAFETY=llama-gu
 
 ```
 
+**Remote LLM Deployment Example:**
+
+To connect to a remote LLM endpoint instead of deploying a local model, use `LLM_URL` and `LLM_API_TOKEN`:
+
+```bash
+make install NAMESPACE=llama-stack-rag \
+  LLM=remote-llm \
+  LLM_URL=https://my-model-endpoint.example.com/v1 \
+  LLM_API_TOKEN=my-api-token
+```
+
+| Parameter | Description |
+|-----------|-------------|
+| `LLM=remote-llm` | Indicates a remote model (no local vLLM deployment) |
+| `LLM_URL` | The base URL of the remote model endpoint |
+| `LLM_API_TOKEN` | Authentication token for the remote endpoint |
+
+This skips local model deployment and configures LlamaStack to use the remote inference endpoint directly. No GPU or HF token is required for the LLM.
+
 When prompted, enter your **[Hugging Face Token](https://huggingface.co/settings/tokens)**.
 
 Note: This process may take 10 to 30 minutes depending on the number and size of models to be downloaded.

diff --git a/deploy/helm/Makefile b/deploy/helm/Makefile
@@ -26,6 +26,7 @@ POSTGRES_DBNAME ?= rag_blueprint
 MINIO_USER ?= minio_rag_user
 MINIO_PASSWORD ?= minio_rag_password
 # HF_TOKEN will be managed by the get_hf_token helper function
+INTERACTIVE ?= true
 RAG_CHART := rag
 TOLERATIONS_TEMPLATE=[{"key":"$(1)","effect":"NoSchedule","operator":"Exists"}]
 
@@ -40,6 +41,7 @@ define check_values_file
 			echo -e "$(YELLOW)[INFO]$(NC) Values file not found. Creating from example..."; \
 			cp "$(VALUES_EXAMPLE)" "$(VALUES_FILE)"; \
 			if [ -z "$(LLM)" ]; then \
+			if [ "$(INTERACTIVE)" = "true" ]; then \
 				echo -e ""; \
 				echo -e "$(GREEN)╔══════════════════════════════════════════════════════════════════════════╗$(NC)"; \
 				echo -e "$(GREEN)║                                                                          ║$(NC)"; \
@@ -69,6 +71,9 @@ define check_values_file
 				read -p "" continue; \
 				echo -e "$(BLUE)[INFO]$(NC) Continuing with installation..."; \
 				echo -e ""; \
+			else \
+				echo -e "$(YELLOW)[INFO]$(NC) Non-interactive mode: skipping configuration prompt. Using $(VALUES_FILE) as-is."; \
+			fi; \
 			else \
 				echo -e "$(GREEN)[SUCCESS]$(NC) Using LLM model from command line: $(LLM)"; \
 				echo -e ""; \
@@ -103,33 +108,41 @@ define validate_values_file
 	LLAMA_STACK_TAVILY=$$(grep "TAVILY_SEARCH_API_KEY:" "$(VALUES_FILE)" 2>/dev/null | sed 's/.*TAVILY_SEARCH_API_KEY: *//' | tr -d '"' | tr -d ' '); \
 	\
 	if [ -z "$$HF_TOKEN_FILE" ] || [ "$$HF_TOKEN_FILE" = "" ]; then \
-		echo -e "$(YELLOW)[WARNING]$(NC) Hugging Face token is not set. Model downloads may fail."; \
-		echo -e "$(BLUE)[INFO]$(NC) Get your token from: https://huggingface.co/settings/tokens"; \
-		echo -e ""; \
-		read -p "Enter your Hugging Face token (or press Enter to skip): " hf_input; \
-		if [ -n "$$hf_input" ]; then \
-			sed -i.bak "/^llm-service:/,/^[^ ]/ s|hf_token:.*|hf_token: \"$$hf_input\"|" "$(VALUES_FILE)"; \
-			echo -e "$(GREEN)[SUCCESS]$(NC) Hugging Face token saved to configuration."; \
-			UPDATED=1; \
+		if [ "$(INTERACTIVE)" = "true" ]; then \
+			echo -e "$(YELLOW)[WARNING]$(NC) Hugging Face token is not set. Model downloads may fail."; \
+			echo -e "$(BLUE)[INFO]$(NC) Get your token from: https://huggingface.co/settings/tokens"; \
+			echo -e ""; \
+			read -p "Enter your Hugging Face token (or press Enter to skip): " hf_input; \
+			if [ -n "$$hf_input" ]; then \
+				sed -i.bak "/^llm-service:/,/^[^ ]/ s|hf_token:.*|hf_token: \"$$hf_input\"|" "$(VALUES_FILE)"; \
+				echo -e "$(GREEN)[SUCCESS]$(NC) Hugging Face token saved to configuration."; \
+				UPDATED=1; \
+			else \
+				echo -e "$(YELLOW)[WARNING]$(NC) Hugging Face token not provided. Model downloads may fail."; \
+			fi; \
+			echo -e ""; \
 		else \
-			echo -e "$(YELLOW)[WARNING]$(NC) Hugging Face token not provided. Model downloads may fail."; \
+			echo -e "$(YELLOW)[WARNING]$(NC) Hugging Face token is not set. Use HF_TOKEN env var or set it in $(VALUES_FILE)."; \
 		fi; \
-		echo -e ""; \
 	fi; \
 	\
 	if [ -z "$$LLAMA_STACK_TAVILY" ] || [ "$$LLAMA_STACK_TAVILY" = "Paste-your-key-here" ]; then \
-		echo -e "$(YELLOW)[WARNING]$(NC) TAVILY search API key is not set. Web search will be disabled."; \
-		echo -e "$(BLUE)[INFO]$(NC) Get your key from: https://tavily.com/"; \
-		echo -e ""; \
-		read -p "Enter your TAVILY search API key (or press Enter to skip): " tavily_input; \
-		if [ -n "$$tavily_input" ]; then \
-			sed -i.bak "s/TAVILY_SEARCH_API_KEY:.*/TAVILY_SEARCH_API_KEY: \"$$tavily_input\"/" "$(VALUES_FILE)"; \
-			echo -e "$(GREEN)[SUCCESS]$(NC) TAVILY search API key saved to configuration."; \
-			UPDATED=1; \
+		if [ "$(INTERACTIVE)" = "true" ]; then \
+			echo -e "$(YELLOW)[WARNING]$(NC) TAVILY search API key is not set. Web search will be disabled."; \
+			echo -e "$(BLUE)[INFO]$(NC) Get your key from: https://tavily.com/"; \
+			echo -e ""; \
+			read -p "Enter your TAVILY search API key (or press Enter to skip): " tavily_input; \
+			if [ -n "$$tavily_input" ]; then \
+				sed -i.bak "s/TAVILY_SEARCH_API_KEY:.*/TAVILY_SEARCH_API_KEY: \"$$tavily_input\"/" "$(VALUES_FILE)"; \
+				echo -e "$(GREEN)[SUCCESS]$(NC) TAVILY search API key saved to configuration."; \
+				UPDATED=1; \
+			else \
+				echo -e "$(YELLOW)[WARNING]$(NC) TAVILY search API key not provided. Web search will be disabled."; \
+			fi; \
+			echo -e ""; \
 		else \
-			echo -e "$(YELLOW)[WARNING]$(NC) TAVILY search API key not provided. Web search will be disabled."; \
+			echo -e "$(YELLOW)[WARNING]$(NC) TAVILY search API key is not set. Use TAVILY_API_KEY env var or set it in $(VALUES_FILE)."; \
 		fi; \
-		echo -e ""; \
 	fi; \
 	\
 	if [ $$UPDATED -eq 1 ]; then \
@@ -212,7 +225,8 @@ help: ## Show this help message
 	@echo -e "  health        - Check health of all services"
 	@echo -e ""
 	@echo -e "$(GREEN)Development Targets:$(NC)"
-	@echo -e "  dev           - Start development environment"
+	@echo -e "  dev-start     - Install (if needed), port-forward, and start Streamlit locally"
+	@echo -e "  dev-stop      - Stop Streamlit & port-forwards, optionally uninstall"
 	@echo -e "  validate      - Validate Helm chart"
 	@echo -e "  validate-config - Validate configuration values"
 	@echo -e "  monitor       - Monitor deployment status"
@@ -244,15 +258,19 @@ help: ## Show this help message
 	@echo -e "  SAFETY_URL    - URL for remote safety service"
 	@echo -e "  LLM_API_TOKEN - API token for remote LLM service"
 	@echo -e "  SAFETY_API_TOKEN - API token for remote safety service"
+	@echo -e "  INTERACTIVE   - Enable interactive prompts (default: true, set false to skip)"
 	@echo -e ""
 	@echo -e "$(GREEN)Quick Start:$(NC)"
 	@echo -e "  $(BLUE)Option 1:$(NC) Using values file (recommended)"
 	@echo -e "    1. make init-config  # Create configuration from example"
 	@echo -e "    2. make configure-keys  # Set up API keys interactively"
 	@echo -e "    3. make install NAMESPACE=my-rag  # Install with your config"
 	@echo -e ""
-	@echo -e "  $(BLUE)Option 2:$(NC) Using command-line parameters"
+	@echo -e "  $(BLUE)Option 2:$(NC) Using command-line parameters with LLM provisioning on OpenShift AI"
 	@echo -e "    make install NAMESPACE=my-rag LLM=llama-3-2-3b-instruct LLM_TOLERATION=\"nvidia.com/gpu\""
+	@echo -e ""
+	@echo -e "  $(BLUE)Option 3:$(NC) Using command-line parameters with remote LLM"
+	@echo -e "    make install NAMESPACE=my-rag LLM=remote-llm LLM_URL=https://<<llm-url>>/v1 LLM_API_TOKEN=<<llm-api-token>>"
 
 # Dependency checks
 .PHONY: check-deps
@@ -337,15 +355,80 @@ namespace:
 	@echo -e "$(GREEN)[SUCCESS]$(NC) Namespace $(NAMESPACE) is ready"
 
 # Development targets
-.PHONY: dev
-dev: ## Start development environment
+FRONTEND_DIR := ../../frontend
+LLAMASTACK_LOCAL_PORT ?= 8321
+PGVECTOR_LOCAL_PORT ?= 5432
+
+.PHONY: dev-start
+dev-start: ## Install (if needed), port-forward, and start Streamlit locally
 	@if [ -z "$(NAMESPACE)" ]; then echo -e "$(RED)[ERROR]$(NC) NAMESPACE is required for dev"; exit 1; fi
 	@$(MAKE) check-deps
-	@$(MAKE) validate-config
-	@echo -e "$(BLUE)[INFO]$(NC) Starting RAG development environment..."
-	@$(call check_values_file)
-	@$(MAKE) install
-	@echo -e "$(GREEN)[SUCCESS]$(NC) Development environment started"
+	@if [ ! -d "$(FRONTEND_DIR)" ]; then echo -e "$(RED)[ERROR]$(NC) Frontend directory not found at $(FRONTEND_DIR)"; exit 1; fi
+	@if helm status $(RAG_CHART) -n $(NAMESPACE) >/dev/null 2>&1; then \
+		echo -e "$(YELLOW)[INFO]$(NC) RAG is already installed in namespace $(NAMESPACE)."; \
+		if [ "$(INTERACTIVE)" = "true" ]; then \
+			read -p "Do you want to reinstall? [y/N] " answer; \
+			if [ "$$answer" = "y" ] || [ "$$answer" = "Y" ]; then \
+				echo -e "$(BLUE)[INFO]$(NC) Reinstalling..."; \
+				$(MAKE) install NAMESPACE=$(NAMESPACE); \
+			else \
+				echo -e "$(BLUE)[INFO]$(NC) Skipping reinstall. Using existing deployment."; \
+			fi; \
+		else \
+			echo -e "$(BLUE)[INFO]$(NC) Non-interactive mode: skipping reinstall."; \
+		fi; \
+	else \
+		echo -e "$(BLUE)[INFO]$(NC) RAG is not installed in namespace $(NAMESPACE). Installing..."; \
+		$(MAKE) install NAMESPACE=$(NAMESPACE); \
+	fi
+	@echo -e ""
+	@echo -e "$(BLUE)[INFO]$(NC) Setting up port-forwards..."
+	@-pkill -f "oc port-forward.*svc/llamastack" 2>/dev/null ||:
+	@-pkill -f "oc port-forward.*svc/pgvector" 2>/dev/null ||:
+	@sleep 1
+	@echo -e "$(BLUE)[INFO]$(NC) Port-forwarding llamastack (localhost:$(LLAMASTACK_LOCAL_PORT) -> 8321)..."
+#@oc port-forward svc/llamastack $(LLAMASTACK_LOCAL_PORT):8321 -n $(NAMESPACE) &>/dev/null &
+	@echo -e "$(BLUE)[INFO]$(NC) Port-forwarding pgvector (localhost:$(PGVECTOR_LOCAL_PORT) -> 5432)..."
+# @oc port-forward svc/pgvector $(PGVECTOR_LOCAL_PORT):5432 -n $(NAMESPACE) &>/dev/null &
+	@sleep 2
+	@echo -e "$(GREEN)[SUCCESS]$(NC) Port-forwards active:"
+	@echo -e "  llama-stack: http://localhost:$(LLAMASTACK_LOCAL_PORT)"
+	@echo -e "  pgvector:    localhost:$(PGVECTOR_LOCAL_PORT)"
+	@echo -e ""
+	@echo -e "$(BLUE)[INFO]$(NC) Loading suggested questions from ConfigMap..."
+	@SUGGESTED_QUESTIONS_JSON=$$(oc get configmap "$(RAG_CHART)-suggested-questions" -n $(NAMESPACE) -o jsonpath='{.data.RAG_QUESTION_SUGGESTIONS}' 2>/dev/null || true); \
+	if [ -n "$$SUGGESTED_QUESTIONS_JSON" ]; then \
+		echo -e "$(GREEN)[SUCCESS]$(NC) Loaded RAG_QUESTION_SUGGESTIONS from ConfigMap."; \
+	else \
+		echo -e "$(YELLOW)[WARNING]$(NC) Suggested questions ConfigMap not found; using empty suggestions."; \
+		SUGGESTED_QUESTIONS_JSON='{}'; \
+	fi; \
+	echo -e "$(BLUE)[INFO]$(NC) Starting Streamlit application..."; \
+	cd $(FRONTEND_DIR) && \
+	LLAMA_STACK_ENDPOINT=http://localhost:$(LLAMASTACK_LOCAL_PORT) \
+	RAG_QUESTION_SUGGESTIONS="$$SUGGESTED_QUESTIONS_JSON" \
+	bash start.sh
+
+.PHONY: dev-stop
+dev-stop: ## Stop Streamlit, port-forwards, and optionally uninstall
+	@echo -e "$(BLUE)[INFO]$(NC) Stopping Streamlit application..."
+	@-pkill -f "streamlit run" 2>/dev/null ||:
+	@echo -e "$(BLUE)[INFO]$(NC) Stopping port-forwards..."
+	@-pkill -f "oc port-forward.*svc/llamastack" 2>/dev/null ||:
+	@-pkill -f "oc port-forward.*svc/pgvector" 2>/dev/null ||:
+	@echo -e "$(GREEN)[SUCCESS]$(NC) Local dev environment stopped."
+	@if [ -n "$(NAMESPACE)" ] && helm status $(RAG_CHART) -n $(NAMESPACE) >/dev/null 2>&1; then \
+		if [ "$(INTERACTIVE)" = "true" ]; then \
+			read -p "Do you want to uninstall RAG from namespace $(NAMESPACE)? [y/N] " answer; \
+			if [ "$$answer" = "y" ] || [ "$$answer" = "Y" ]; then \
+				$(MAKE) uninstall NAMESPACE=$(NAMESPACE); \
+			else \
+				echo -e "$(BLUE)[INFO]$(NC) RAG deployment left running in $(NAMESPACE)."; \
+			fi; \
+		else \
+			echo -e "$(BLUE)[INFO]$(NC) Non-interactive mode: RAG deployment left running in $(NAMESPACE)."; \
+		fi; \
+	fi
 
 # Validation targets
 .PHONY: validate

diff --git a/deploy/helm/bootstrap/Chart.yaml b/deploy/helm/bootstrap/Chart.yaml
@@ -0,0 +1,24 @@
+apiVersion: v2
+name: bootstrap
+description: A Helm chart for Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.0.0"
diff --git a/deploy/helm/bootstrap/templates/applicationset-rag.yaml b/deploy/helm/bootstrap/templates/applicationset-rag.yaml
@@ -0,0 +1,44 @@
+{{ if .Values.rag -}}
+---
+apiVersion: argoproj.io/v1alpha1
+kind: ApplicationSet
+metadata:
+  name: rag
+  namespace: {{ .Values.gitops.namespace }}
+spec:
+  generators:
+  - list:
+      elements:
+{{- $userCount := int .Values.user.count }}
+{{- range $index := until $userCount }}
+      - user: {{ $.Values.user.prefix }}{{ add $index 1}}
+{{- end }}
+  template:
+    metadata:
+      name: rag-{{- "{{" }} user {{- " }}" }}
+      namespace: {{ .Values.gitops.namespace }}
+    spec:
+      project: default
+      source:
+        repoURL: {{ .Values.rag.git.url }}
+        targetRevision: {{ .Values.rag.git.revision }}
+        path: {{ .Values.rag.git.path }}
+        helm:
+          values: |
+{{ toYaml .Values.rag.values | nindent 12 }}
+      destination:
+        server: https://kubernetes.default.svc
+        namespace: '{{ .Values.rag.namespace }}-{{- "{{" }} user {{- " }}" }}-{{ .Values.guid }}'
+      syncPolicy:
+        automated:
+          prune: false
+          selfHeal: false
+        syncOptions:
+          - CreateNamespace=true
+        retry:
+          limit: 30
+          backoff:
+            duration: "5s"
+            factor: 2
+            maxDuration: "5m"
+{{- end }}
diff --git a/deploy/helm/bootstrap/templates/rbac-argocd-controller.yaml b/deploy/helm/bootstrap/templates/rbac-argocd-controller.yaml
@@ -0,0 +1,45 @@
+{{- if .Values.rbac.enabled -}}
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: {{ .Values.rbac.clusterRoleName }}
+rules:
+  - apiGroups: [""]
+    resources: ["namespaces"]
+    verbs: ["get", "list", "watch", "create"]
+  - apiGroups: [""]
+    resources: ["configmaps", "persistentvolumeclaims", "secrets", "serviceaccounts", "services"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+  - apiGroups: ["apps"]
+    resources: ["deployments", "statefulsets"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+  - apiGroups: ["batch"]
+    resources: ["jobs"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+  - apiGroups: ["rbac.authorization.k8s.io"]
+    resources: ["roles", "rolebindings"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+  - apiGroups: ["route.openshift.io"]
+    resources: ["routes"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+  - apiGroups: ["kubeflow.org"]
+    resources: ["notebooks"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+  - apiGroups: ["datasciencepipelinesapplications.opendatahub.io"]
+    resources: ["datasciencepipelinesapplications"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: {{ .Values.rbac.clusterRoleName }}-binding
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: {{ .Values.rbac.clusterRoleName }}
+subjects:
+  - kind: ServiceAccount
+    name: {{ .Values.gitops.applicationControllerServiceAccount }}
+    namespace: {{ .Values.gitops.namespace }}
+{{- end }}