From 9a365dc84c67f741cf41360d315238f56c17729e Mon Sep 17 00:00:00 2001 From: Ryan Johnson Date: Mon, 13 Apr 2026 17:07:37 -0400 Subject: [PATCH 1/4] fix issues with ui --- README.md | 34 +++++++++---------- deploy/helm/Makefile | 2 +- deploy/helm/rag-values.yaml.example | 2 +- .../distribution/ui/page/playground/chat.py | 4 +-- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 9aeca12..7889674 100644 --- a/README.md +++ b/README.md @@ -100,18 +100,6 @@ This QuickStart allows users to explore the capabilities of RAG by: | Generation | `meta-llama/Meta-Llama-3-70B-Instruct` | A100 x2/HPU | p4d.24xlarge | Safety | `meta-llama/Llama-Guard-3-8B` | L4/HPU | g6.2xlarge -- Note: Developers can also use a remote LLM via the command line (see [Remote LLM Deployment](#remote-llm-deployment-example)) or by modifying the `rag-values.yaml` file directly: - -```yaml - global: - models: - remote-llm: - id: meta-llama/Llama-3.3-70B-Instruct - url: https://somedomain.com/v1 - apiToken: fake-token - enabled: true -``` - Note: the 70B model is NOT required for initial testing of this example. The safety/shield model `Llama-Guard-3-8B` is also optional. ### Installation Steps @@ -250,7 +238,6 @@ make install NAMESPACE=llama-stack-rag LLM=llama-3-2-3b-instruct SAFETY=llama-gu # Xeon deployment make install NAMESPACE=llama-stack-rag LLM=llama-3-2-3b-instruct SAFETY=llama-guard-3-8b DEVICE=xeon - ``` **Remote LLM Deployment Example:** @@ -259,16 +246,17 @@ To connect to a remote LLM endpoint instead of deploying a local model, use `LLM ```bash make install NAMESPACE=llama-stack-rag \ - LLM=remote-llm \ + LLM=remotellm \ LLM_URL=https://my-model-endpoint.example.com/v1 \ - LLM_API_TOKEN=my-api-token + LLM_API_TOKEN=my-api-token \ + LLM_ID=llm_model_id ``` - | Parameter | Description | |-----------|-------------| -| `LLM=remote-llm` | Indicates a remote model (no local vLLM deployment) | +| `LLM=remotellm` | Indicates a remote model (no local vLLM deployment) | | `LLM_URL` | The base URL of the remote model endpoint | | `LLM_API_TOKEN` | Authentication token for the remote endpoint | +| `LLM_ID` | The model of the llm you wish to use | This skips local model deployment and configures LlamaStack to use the remote inference endpoint directly. No GPU or HF token is required for the LLM. @@ -276,6 +264,18 @@ When prompted, enter your **[Hugging Face Token](https://huggingface.co/settings Note: This process may take 10 to 30 minutes depending on the number and size of models to be downloaded. +- Note: Developers can also use a remote LLM via the helm chart (see [Remote LLM Deployment](#remote-llm-deployment-example)) or by modifying the `rag-values.yaml` file directly: + +```yaml + global: + models: + remotellm: + id: meta-llama/Llama-3.3-70B-Instruct + url: https://llm-gateway.com/v1 + apiToken: api-token + enabled: true +``` + 7. **Monitor Deployment** ```bash diff --git a/deploy/helm/Makefile b/deploy/helm/Makefile index eef2ddc..f5cff07 100644 --- a/deploy/helm/Makefile +++ b/deploy/helm/Makefile @@ -270,7 +270,7 @@ help: ## Show this help message @echo -e " make install NAMESPACE=my-rag LLM=llama-3-2-3b-instruct LLM_TOLERATION=\"nvidia.com/gpu\"" @echo -e "" @echo -e " $(BLUE)Option 3:$(NC) Using command-line parameters with remote LLM" - @echo -e " make install NAMESPACE=my-rag LLM=remote-llm LLM_URL=https://<>/v1 LLM_API_TOKEN=<>" + @echo -e " make install NAMESPACE=my-rag LLM=remotellm LLM_URL=https://<>/v1 LLM_API_TOKEN=<>" # Dependency checks .PHONY: check-deps diff --git a/deploy/helm/rag-values.yaml.example b/deploy/helm/rag-values.yaml.example index d2fa2cb..2a46dab 100644 --- a/deploy/helm/rag-values.yaml.example +++ b/deploy/helm/rag-values.yaml.example @@ -103,7 +103,7 @@ global: # To configure LlamaStack with remote llm, replace the id, # url and apiToken value and set enabled to true - # remote-llm: + # remotellm: # id: custom-model-id # url: https://custom-server-url/v1 # apiToken: fake-token diff --git a/frontend/llama_stack_ui/distribution/ui/page/playground/chat.py b/frontend/llama_stack_ui/distribution/ui/page/playground/chat.py index 2da2155..25c7058 100644 --- a/frontend/llama_stack_ui/distribution/ui/page/playground/chat.py +++ b/frontend/llama_stack_ui/distribution/ui/page/playground/chat.py @@ -99,8 +99,8 @@ def _get_model_type(model): return meta.get("model_type") model_list = [ - _get_model_id(model) for model in models - if _get_model_type(model) == "llm" and _get_model_id(model) not in shields_set + model.id for model in models + if model.custom_metadata.get("model_type") == "llm" and model.id not in shields_set ] # Fetch and categorize toolgroups From 403aef2ec1b021dce17226a25a46b53bb378e69a Mon Sep 17 00:00:00 2001 From: Ryan Johnson Date: Mon, 13 Apr 2026 17:08:39 -0400 Subject: [PATCH 2/4] fix issues with ui --- .../llama_stack_ui/distribution/ui/page/playground/chat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/llama_stack_ui/distribution/ui/page/playground/chat.py b/frontend/llama_stack_ui/distribution/ui/page/playground/chat.py index 25c7058..a7836c4 100644 --- a/frontend/llama_stack_ui/distribution/ui/page/playground/chat.py +++ b/frontend/llama_stack_ui/distribution/ui/page/playground/chat.py @@ -99,8 +99,8 @@ def _get_model_type(model): return meta.get("model_type") model_list = [ - model.id for model in models - if model.custom_metadata.get("model_type") == "llm" and model.id not in shields_set +_get_model_id(model) for model in models + if _get_model_type(model) == "llm" and _get_model_id(model) not in shields_set ] # Fetch and categorize toolgroups From 6887e7084b90c9e66a93375a6242ccdc623d1767 Mon Sep 17 00:00:00 2001 From: Ryan Johnson Date: Mon, 13 Apr 2026 17:09:57 -0400 Subject: [PATCH 3/4] fix issues with ui --- frontend/llama_stack_ui/distribution/ui/page/playground/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/llama_stack_ui/distribution/ui/page/playground/chat.py b/frontend/llama_stack_ui/distribution/ui/page/playground/chat.py index a7836c4..2da2155 100644 --- a/frontend/llama_stack_ui/distribution/ui/page/playground/chat.py +++ b/frontend/llama_stack_ui/distribution/ui/page/playground/chat.py @@ -99,7 +99,7 @@ def _get_model_type(model): return meta.get("model_type") model_list = [ -_get_model_id(model) for model in models + _get_model_id(model) for model in models if _get_model_type(model) == "llm" and _get_model_id(model) not in shields_set ] From 573b27cd4a0b36e3dfe40bd966095bc0e004a468 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 14 Apr 2026 13:27:47 +0000 Subject: [PATCH 4/4] chore: bump version to 0.2.42 --- deploy/helm/rag/Chart.yaml | 4 ++-- deploy/helm/rag/values.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/helm/rag/Chart.yaml b/deploy/helm/rag/Chart.yaml index 7b5d394..280f3cd 100644 --- a/deploy/helm/rag/Chart.yaml +++ b/deploy/helm/rag/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: rag description: A Helm chart for Kubernetes type: application -version: 0.2.38 -appVersion: "0.2.38" +version: 0.2.42 +appVersion: "0.2.42" dependencies: - name: pgvector diff --git a/deploy/helm/rag/values.yaml b/deploy/helm/rag/values.yaml index 2863531..a1903be 100644 --- a/deploy/helm/rag/values.yaml +++ b/deploy/helm/rag/values.yaml @@ -3,7 +3,7 @@ replicaCount: 1 image: repository: quay.io/rh-ai-quickstart/llamastack-dist-ui pullPolicy: Always - tag: latest-dev + tag: 0.2.42 service: type: ClusterIP