rh-ai-quickstart · sauagarwa · Feb 16, 2026 · Dec 15, 2025 · Dec 15, 2025 · Dec 15, 2025
diff --git a/.github/workflows/e2e-tests.yaml b/.github/workflows/e2e-tests.yaml
@@ -629,17 +629,23 @@ jobs:
             deployment/llamastack -n rag-e2e-ui
           kubectl wait --for=condition=available --timeout=300s \
             deployment/rag -n rag-e2e-ui
+
+          echo "Waiting for pods to be ready..."
+          kubectl wait --for=condition=ready --timeout=600s \
+            pod -l app.kubernetes.io/name=llamastack -n rag-e2e-ui
+          kubectl wait --for=condition=ready --timeout=300s \
+            pod -l app.kubernetes.io/name=rag -n rag-e2e-ui
+
+          echo "✅ All pods are ready"
+          kubectl get pods -n rag-e2e-ui
 
       - name: Expose services via NodePort
         run: |
           kubectl patch service rag -n rag-e2e-ui -p '{"spec":{"type":"NodePort","ports":[{"port":8501,"nodePort":30080}]}}'
           kubectl patch service llamastack -n rag-e2e-ui -p '{"spec":{"type":"NodePort","ports":[{"port":8321,"nodePort":30081}]}}'
-
-      - name: Port forward services
-        run: |
-          kubectl port-forward -n rag-e2e-ui svc/rag 8501:8501 &
-          kubectl port-forward -n rag-e2e-ui svc/llamastack 8321:8321 &
-          sleep 10
+
+          # Verify services
+          kubectl get services -n rag-e2e-ui
 
       - name: Run UI E2E tests with Playwright
         env:
@@ -649,10 +655,74 @@ jobs:
           MAAS_MODEL_ID: ${{ env.MAAS_MODEL_ID }}
           SKIP_MODEL_TESTS: "false"  # Enable MaaS inference tests in UI
         run: |
+          echo "Starting port forwarding and running tests..."
+
+          # Start port forwarding in background and keep them running
+          echo "Starting port forwarding for RAG UI..."
+          kubectl port-forward -n rag-e2e-ui svc/rag 8501:8501 > /tmp/rag-portforward.log 2>&1 &
+          RAG_PF_PID=$!
+          echo "RAG port-forward PID: $RAG_PF_PID"
+
+          echo "Starting port forwarding for LlamaStack..."
+          kubectl port-forward -n rag-e2e-ui svc/llamastack 8321:8321 > /tmp/llamastack-portforward.log 2>&1 &
+          LLAMASTACK_PF_PID=$!
+          echo "LlamaStack port-forward PID: $LLAMASTACK_PF_PID"
+
+          # Function to check if port forwarding is working
+          check_port_forwarding() {
+            (timeout 2 bash -c 'cat < /dev/null > /dev/tcp/localhost/8501' 2>/dev/null) && \
+            (timeout 2 bash -c 'cat < /dev/null > /dev/tcp/localhost/8321' 2>/dev/null)
+          }
+
+          # Function to verify processes are alive
+          check_processes() {
+            kill -0 $RAG_PF_PID 2>/dev/null && kill -0 $LLAMASTACK_PF_PID 2>/dev/null
+          }
+
+          # Wait for port forwarding to establish
+          echo "Waiting for port forwarding to be ready..."
+          for i in {1..30}; do
+            if check_port_forwarding && check_processes; then
+              echo "✅ Port forwarding is working! (attempt $i)"
+              break
+            fi
+            if [ $i -eq 30 ]; then
+              echo "❌ Port forwarding failed to establish after 30 attempts"
+              echo "RAG port-forward log:"
+              cat /tmp/rag-portforward.log || true
+              echo "LlamaStack port-forward log:"
+              cat /tmp/llamastack-portforward.log || true
+              echo "Checking port-forward processes:"
+              ps aux | grep "kubectl port-forward" || true
+              echo "Checking if ports are listening:"
+              ss -tlnp | grep -E ':(8501|8321)' || netstat -tlnp 2>/dev/null | grep -E ':(8501|8321)' || true
+              exit 1
+            fi
+            echo "Waiting for port forwarding... (attempt $i/30)"
+            sleep 2
+          done
+
+          # Set up cleanup trap to kill port forwarding on exit
+          trap "echo 'Cleaning up port forwarding...'; kill $RAG_PF_PID $LLAMASTACK_PF_PID 2>/dev/null || true" EXIT
+
           echo "Running UI E2E tests with MaaS integration..."
           echo "MaaS Endpoint: ${MAAS_ENDPOINT}"
           echo "MaaS Model ID: ${MAAS_MODEL_ID}"
-          pytest tests/e2e_ui/ -v --tb=short --browser chromium
+
+          # Run tests
+          pytest tests/e2e_ui/ -v --tb=short --browser chromium || TEST_EXIT_CODE=$?
+
+          # Verify port forwarding was still working after tests
+          if ! check_port_forwarding; then
+            echo "⚠️  Warning: Port forwarding stopped working during tests"
+            echo "RAG port-forward log:"
+            cat /tmp/rag-portforward.log || true
+            echo "LlamaStack port-forward log:"
+            cat /tmp/llamastack-portforward.log || true
+          fi
+
+          # Exit with test result
+          exit ${TEST_EXIT_CODE:-0}
 
       - name: Upload Playwright test results
         if: always()

diff --git a/README.md b/README.md
@@ -267,7 +267,7 @@ Watch for all pods to reach Running or Completed status. Key pods to watch inclu
 oc get pods -l component=predictor
 ```
 
-Look for **3/3** under the Ready column.
+Look for **2/2** (or **3/3** when RAW_DEPLOYMENT=false) under the Ready column.
 
 8. **Verify Installation**
 

diff --git a/deploy/helm/Makefile b/deploy/helm/Makefile
@@ -85,10 +85,24 @@ endef
 # Helper function to validate values file
 define validate_values_file
 	echo -e "$(BLUE)[INFO]$(NC) Validating configuration values..."; \
-	HF_TOKEN=$$(grep -A 2 "^llm-service:" "$(VALUES_FILE)" | grep "hf_token:" | sed 's/.*hf_token: *//' | tr -d '"' | tr -d ' '); \
-	LLAMA_STACK_TAVILY=$$(grep "TAVILY_SEARCH_API_KEY:" "$(VALUES_FILE)" 2>/dev/null | sed 's/.*TAVILY_SEARCH_API_KEY: *//' | tr -d '"' | tr -d ' '); \
 	UPDATED=0; \
-	if [ -z "$$HF_TOKEN" ] || [ "$$HF_TOKEN" = "" ]; then \
+	\
+	if [ -n "$$HF_TOKEN" ]; then \
+		echo -e "$(GREEN)[SUCCESS]$(NC) Using HF_TOKEN from environment variable."; \
+		sed -i.bak "/^llm-service:/,/^[^ ]/ s|hf_token:.*|hf_token: \"$$HF_TOKEN\"|" "$(VALUES_FILE)"; \
+		UPDATED=1; \
+	fi; \
+	\
+	if [ -n "$$TAVILY_API_KEY" ]; then \
+		echo -e "$(GREEN)[SUCCESS]$(NC) Using TAVILY_API_KEY from environment variable."; \
+		sed -i.bak "s/TAVILY_SEARCH_API_KEY:.*/TAVILY_SEARCH_API_KEY: \"$$TAVILY_API_KEY\"/" "$(VALUES_FILE)"; \
+		UPDATED=1; \
+	fi; \
+	\
+	HF_TOKEN_FILE=$$(grep -A 2 "^llm-service:" "$(VALUES_FILE)" | grep "hf_token:" | sed 's/.*hf_token: *//' | tr -d '"' | tr -d ' '); \
+	LLAMA_STACK_TAVILY=$$(grep "TAVILY_SEARCH_API_KEY:" "$(VALUES_FILE)" 2>/dev/null | sed 's/.*TAVILY_SEARCH_API_KEY: *//' | tr -d '"' | tr -d ' '); \
+	\
+	if [ -z "$$HF_TOKEN_FILE" ] || [ "$$HF_TOKEN_FILE" = "" ]; then \
 		echo -e "$(YELLOW)[WARNING]$(NC) Hugging Face token is not set. Model downloads may fail."; \
 		echo -e "$(BLUE)[INFO]$(NC) Get your token from: https://huggingface.co/settings/tokens"; \
 		echo -e ""; \
@@ -102,6 +116,7 @@ define validate_values_file
 		fi; \
 		echo -e ""; \
 	fi; \
+	\
 	if [ -z "$$LLAMA_STACK_TAVILY" ] || [ "$$LLAMA_STACK_TAVILY" = "Paste-your-key-here" ]; then \
 		echo -e "$(YELLOW)[WARNING]$(NC) TAVILY search API key is not set. Web search will be disabled."; \
 		echo -e "$(BLUE)[INFO]$(NC) Get your key from: https://tavily.com/"; \
@@ -116,6 +131,7 @@ define validate_values_file
 		fi; \
 		echo -e ""; \
 	fi; \
+	\
 	if [ $$UPDATED -eq 1 ]; then \
 		echo -e "$(GREEN)[SUCCESS]$(NC) Configuration updated. Proceeding with installation..."; \
 		echo -e ""; \
@@ -312,7 +328,7 @@ show-config: ## Show configuration file contents
 # Create namespace and deploy
 namespace:
 	@echo -e "$(BLUE)[INFO]$(NC) Creating namespace $(NAMESPACE)..."
-	@oc create namespace $(NAMESPACE) &> /dev/null && oc label namespace $(NAMESPACE) modelmesh-enabled=false ||:
+	@oc new-project $(NAMESPACE) &> /dev/null && oc label namespace $(NAMESPACE) modelmesh-enabled=false &> /dev/null ||:
 	@oc project $(NAMESPACE) &> /dev/null ||:
 	@echo -e "$(GREEN)[SUCCESS]$(NC) Namespace $(NAMESPACE) is ready"
 
@@ -479,11 +495,11 @@ install: ## Install the RAG deployment
 		fi; \
 		if [ -n "$(LLM_URL)" ]; then \
 			echo -e "$(BLUE)[INFO]$(NC) Setting LLM URL: $(LLM_URL)"; \
-			HELM_ARGS="$$HELM_ARGS --set global.models.$(LLM).url='$(LLM_URL)'"; \
+			HELM_ARGS="$$HELM_ARGS --set global.models.$(LLM).url=$(LLM_URL)"; \
 		fi; \
 		if [ -n "$(LLM_API_TOKEN)" ]; then \
 			echo -e "$(BLUE)[INFO]$(NC) Setting LLM API token"; \
-			HELM_ARGS="$$HELM_ARGS --set global.models.$(LLM).apiToken='$(LLM_API_TOKEN)'"; \
+			HELM_ARGS="$$HELM_ARGS --set global.models.$(LLM).apiToken=$(LLM_API_TOKEN)"; \
 		fi; \
 	fi; \
 	if [ -n "$(SAFETY)" ]; then \

diff --git a/deploy/helm/rag/Chart.lock b/deploy/helm/rag/Chart.lock
@@ -1,21 +1,21 @@
 dependencies:
 - name: pgvector
   repository: https://rh-ai-quickstart.github.io/ai-architecture-charts
-  version: 0.5.1
+  version: 0.5.5
 - name: llm-service
   repository: https://rh-ai-quickstart.github.io/ai-architecture-charts
-  version: 0.5.2
+  version: 0.5.9
 - name: configure-pipeline
   repository: https://rh-ai-quickstart.github.io/ai-architecture-charts
-  version: 0.5.4
+  version: 0.5.6
 - name: ingestion-pipeline
   repository: https://rh-ai-quickstart.github.io/ai-architecture-charts
-  version: 0.5.1
+  version: 0.6.6
 - name: llama-stack
   repository: https://rh-ai-quickstart.github.io/ai-architecture-charts
-  version: 0.5.2
+  version: 0.6.11
 - name: mcp-servers
   repository: https://rh-ai-quickstart.github.io/ai-architecture-charts
-  version: 0.5.7
-digest: sha256:d7abd4b5f5c4080a241c567f0bde351f927a5ac0d95fea4bbdf8f364f7a92866
-generated: "2025-12-05T10:53:08.788253807-05:00"
+  version: 0.5.15
+digest: sha256:1065a9cbf8dfb460fd9c9a6d3571fdfc33e3693503aae6535e07e75919a6c9f2
+generated: "2026-02-13T13:19:24.192726731-05:00"
diff --git a/deploy/helm/rag/Chart.yaml b/deploy/helm/rag/Chart.yaml
@@ -2,31 +2,31 @@ apiVersion: v2
 name: rag
 description: A Helm chart for Kubernetes
 type: application
-version: 0.2.31
-appVersion: "0.2.31"
+version: 0.2.32
+appVersion: "0.2.32"
 
 dependencies:
   - name: pgvector
-    version: 0.5.1
+    version: 0.5.5
     repository: https://rh-ai-quickstart.github.io/ai-architecture-charts
     condition: pgvector.enabled
   - name: llm-service
-    version: 0.5.2
+    version: 0.5.9
     repository: https://rh-ai-quickstart.github.io/ai-architecture-charts
     condition: llm-service.enabled
   - name: configure-pipeline
-    version: 0.5.4
+    version: 0.5.6
     repository: https://rh-ai-quickstart.github.io/ai-architecture-charts
     condition: configure-pipeline.enabled
   - name: ingestion-pipeline
-    version: 0.5.1
+    version: 0.6.6
     repository: https://rh-ai-quickstart.github.io/ai-architecture-charts
     condition: ingestion-pipeline.enabled
   - name: llama-stack
-    version: 0.5.2
+    version: 0.6.11
     repository: https://rh-ai-quickstart.github.io/ai-architecture-charts
     condition: llama-stack.enabled
   - name: mcp-servers
-    version: 0.5.7
+    version: 0.5.15
     repository: https://rh-ai-quickstart.github.io/ai-architecture-charts
     condition: mcp-servers.enabled
diff --git a/deploy/helm/rag/templates/deployment.yaml b/deploy/helm/rag/templates/deployment.yaml
@@ -40,6 +40,18 @@ spec:
             - name: TAVILY_SEARCH_API_KEY
               value: {{ (index .Values "llama-stack").secrets.TAVILY_SEARCH_API_KEY | quote }}
             {{- end }}
+            {{- if .Values.pgvector }}
+            - name: PGVECTOR_HOST
+              value: {{ .Values.pgvector.secret.host | quote }}
+            - name: PGVECTOR_PORT
+              value: {{ .Values.pgvector.secret.port | quote }}
+            - name: PGVECTOR_USER
+              value: {{ .Values.pgvector.secret.user | quote }}
+            - name: PGVECTOR_PASSWORD
+              value: {{ .Values.pgvector.secret.password | quote }}
+            - name: PGVECTOR_DB
+              value: {{ .Values.pgvector.secret.dbname | quote }}
+            {{- end }}
             {{- if .Values.suggestedQuestions }}
             - name: RAG_QUESTION_SUGGESTIONS
               valueFrom:

diff --git a/deploy/helm/rag/templates/route.yaml b/deploy/helm/rag/templates/route.yaml
@@ -4,6 +4,9 @@ metadata:
   name: {{ include "rag.fullname" . }}
   labels:
     {{- include "rag.labels" . | nindent 4 }}
+  annotations:
+    # 10 minute timeout for large document uploads
+    haproxy.router.openshift.io/timeout: 600s
 spec:
   to:
     kind: Service

diff --git a/deploy/helm/rag/values.yaml b/deploy/helm/rag/values.yaml
@@ -3,7 +3,7 @@ replicaCount: 1
 image:
   repository: quay.io/rh-ai-quickstart/llamastack-dist-ui
   pullPolicy: Always
-  tag: 0.2.31
+  tag: 0.2.32
 
 service:
   type: ClusterIP

diff --git a/frontend/llama_stack_ui/distribution/ui/app.py b/frontend/llama_stack_ui/distribution/ui/app.py
@@ -3,9 +3,17 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+import logging
+
 import streamlit as st
 
+
 def main():
+    # Configure logging to show DEBUG messages by default
+    logging.basicConfig(
+        level=logging.DEBUG,
+        format='[%(levelname)s] %(name)s: %(message)s'
+    )
     # Define available pages: path and icon
     pages = {
         "Chat": ("page/playground/chat.py", "💬"),
@@ -15,7 +23,7 @@ def main():
 
     # Build navigation items dynamically
     nav_items = [
-        st.Page(path, title=name, icon=icon, default=(name == "Chat"))
+        st.Page(path, title=name, icon=icon, default=name == "Chat")
         for name, (path, icon) in pages.items()
     ]
     # Render navigation
@@ -25,4 +33,3 @@ def main():
 
 if __name__ == "__main__":
     main()
-
diff --git a/frontend/llama_stack_ui/distribution/ui/modules/api.py b/frontend/llama_stack_ui/distribution/ui/modules/api.py
@@ -11,9 +11,13 @@
 
 class LlamaStackApi:
     def __init__(self):
+        # Timeout of 600 seconds (10 minutes) for large document uploads
+        # Default is 60 seconds which is too short for large PDFs
+        timeout = float(os.environ.get("LLAMA_STACK_TIMEOUT", "600"))
+
         self.client = LlamaStackClient(
             base_url=os.environ.get("LLAMA_STACK_ENDPOINT", "http://localhost:8321"),
-
+            timeout=timeout,
             provider_data={
                 "fireworks_api_key": os.environ.get("FIREWORKS_API_KEY", ""),
                 "together_api_key": os.environ.get("TOGETHER_API_KEY", ""),