rootcodelabs · Thirunayan22 · Feb 26, 2026 · Feb 26, 2026 · Feb 26, 2026 · Feb 26, 2026
diff --git a/.env.gui b/.env.gui
@@ -0,0 +1,4 @@
+RELEASE=test
+VERSION=1
+BUILD=1
+FIX=0
diff --git a/.github/workflows/ci-build-image.yml b/.github/workflows/ci-build-image.yml
@@ -0,0 +1,43 @@
+name: Build and publish GUI
+
+on:
+  push:
+    branches:
+      - wip
+    paths:
+      - '.env.gui'
+
+jobs:
+  PackageDeploy:
+    runs-on: ubuntu-22.04
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Docker Setup BuildX
+        uses: docker/setup-buildx-action@v2
+
+      - name: Load environment variables and set them
+        run: |
+          if [ -f .env.gui ]; then
+            export $(cat .env.gui | grep -v '^#' | xargs)
+          fi
+          echo "RELEASE=$RELEASE" >> $GITHUB_ENV
+          echo "VERSION=$VERSION" >> $GITHUB_ENV
+          echo "BUILD=$BUILD" >> $GITHUB_ENV
+          echo "FIX=$FIX" >> $GITHUB_ENV
+      - name: Set repo
+        run: |
+           LOWER_CASE_GITHUB_REPOSITORY=$(echo $GITHUB_REPOSITORY | tr '[:upper:]' '[:lower:]')
+           echo "DOCKER_TAG_CUSTOM=ghcr.io/${LOWER_CASE_GITHUB_REPOSITORY}:$RELEASE-$VERSION.$BUILD.$FIX" >> $GITHUB_ENV
+           echo "$GITHUB_ENV"
+      - name: Docker Build
+        run: | 
+           cd GUI
+           docker image build --tag $DOCKER_TAG_CUSTOM -f Dockerfile.dev .
+
+      - name: Log in to GitHub container registry
+        run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin
+
+      - name: Push Docker image to ghcr
+        run: docker push $DOCKER_TAG_CUSTOM
diff --git a/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md b/docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md
@@ -244,9 +244,9 @@ intent_result = intent_module.forward(...)
 
 # After LLM call
 usage_info = get_lm_usage_since(history_length_before)
-costs_dict["intent_detection"] = usage_info
+costs_metric["intent_detection"] = usage_info
 
-# Later: orchestration_service.log_costs(costs_dict)
+# Later: orchestration_service.log_costs(costs_metric)
 ```
 
 ---
@@ -557,14 +557,14 @@ Service workflow tracks LLM costs following the RAG workflow pattern:
 
 ```python
 # Create costs dict at workflow level
-costs_dict: Dict[str, Dict[str, Any]] = {}
+costs_metric: Dict[str, Dict[str, Any]] = {}
 
 # Intent detection captures costs
 intent_result, intent_usage = await _detect_service_intent(...)
-costs_dict["intent_detection"] = intent_usage
+costs_metric["intent_detection"] = intent_usage
 
 # Log costs after workflow completes
-orchestration_service.log_costs(costs_dict)
+orchestration_service.log_costs(costs_metric)
 ```
 
 **Cost Breakdown Logged:**

diff --git a/docs/TOOL_CLASSIFIER_EXTENSION_SPEC.md b/docs/TOOL_CLASSIFIER_EXTENSION_SPEC.md
@@ -425,7 +425,7 @@ formatted_content = format_service_response(service_response)
 # Apply output guardrails
 if guardrails_adapter:
     output_check = await guardrails_adapter.check_output_async(formatted_content)
-    costs_dict["output_guardrails"] = output_check.usage
+    costs_metric["output_guardrails"] = output_check.usage
 
     if not output_check.allowed:
         logger.warning(f"Service response blocked by guardrails: {output_check.reason}")
@@ -449,7 +449,7 @@ formatted_content = format_service_response(service_response)
 # Apply output guardrails validation
 if guardrails_adapter:
     output_check = await guardrails_adapter.check_output_async(formatted_content)
-    costs_dict["output_guardrails"] = output_check.usage
+    costs_metric["output_guardrails"] = output_check.usage
 
     if not output_check.allowed:
         logger.warning(f"Service response blocked by guardrails")
@@ -791,7 +791,7 @@ async def execute_context_workflow(
     request: OrchestrationRequest,
     llm_manager: LLMManager,
     guardrails_adapter: Optional[NeMoRailsAdapter],
-    costs_dict: Dict
+    costs_metric: Dict
 ) -> Optional[OrchestrationResponse]:
     """
     Execute context-based response workflow with output guardrails.
@@ -807,7 +807,7 @@ async def execute_context_workflow(
     )
 
     # Track costs
-    costs_dict["context_check"] = get_lm_usage_since(history_before)
+    costs_metric["context_check"] = get_lm_usage_since(history_before)
 
     if (context_result.is_greeting or context_result.can_answer_from_context) and context_result.answer:
         logger.info(
@@ -820,7 +820,7 @@ async def execute_context_workflow(
             output_check = await guardrails_adapter.check_output_async(
                 context_result.answer
             )
-            costs_dict["output_guardrails"] = output_check.usage
+            costs_metric["output_guardrails"] = output_check.usage
 
             if not output_check.allowed:
                 logger.warning(
@@ -852,7 +852,7 @@ async def execute_context_workflow_streaming(
     request: OrchestrationRequest,
     llm_manager: LLMManager,
     guardrails_adapter: Optional[NeMoRailsAdapter],
-    costs_dict: Dict
+    costs_metric: Dict
 ) -> Optional[AsyncIterator[str]]:
     """
     Execute context workflow with streaming support and output guardrails.
@@ -871,7 +871,7 @@ async def execute_context_workflow_streaming(
     )
 
     # Track costs
-    costs_dict["context_check"] = get_lm_usage_since(history_before)
+    costs_metric["context_check"] = get_lm_usage_since(history_before)
 
     if (context_result.is_greeting or context_result.can_answer_from_context) and context_result.answer:
         logger.info(
@@ -884,7 +884,7 @@ async def execute_context_workflow_streaming(
             output_check = await guardrails_adapter.check_output_async(
                 context_result.answer
             )
-            costs_dict["output_guardrails"] = output_check.usage
+            costs_metric["output_guardrails"] = output_check.usage
 
             if not output_check.allowed:
                 logger.warning(
@@ -941,17 +941,17 @@ def split_into_tokens(text: str, chunk_size: int = 5) -> List[str]:
 ```python
 try:
     result = await execute_context_workflow(
-        request, llm_manager, guardrails_adapter, costs_dict
+        request, llm_manager, guardrails_adapter, costs_metric
     )
     if result:
         return result  # Context-based answer (validated)
     else:
         # Move to Layer 3 (RAG)
-        return await execute_rag_workflow(request, components, costs_dict)
+        return await execute_rag_workflow(request, components, costs_metric)
 except Exception as e:
     logger.error(f"Context workflow failed: {e}")
     # Fallback to RAG workflow
-    return await execute_rag_workflow(request, components, costs_dict)
+    return await execute_rag_workflow(request, components, costs_metric)
 ```
 
 **Guardrail Violation Fallback:**
@@ -963,7 +963,7 @@ if not output_check.allowed:
 # Option 2: Fallback to RAG (alternative approach)
 if not output_check.allowed:
     logger.warning("Context response blocked, trying RAG workflow")
-    return await execute_rag_workflow(request, components, costs_dict)
+    return await execute_rag_workflow(request, components, costs_metric)
 ```
 
 ---
@@ -978,7 +978,7 @@ if not output_check.allowed:
 ```python
 # Reuse existing RAG pipeline
 return self._execute_orchestration_pipeline(
-    request, components, costs_dict, timing_dict
+    request, components, costs_metric, time_metric
 )
 ```
 
@@ -1121,7 +1121,7 @@ if context_result.can_answer_from_context:
 -  **Pre-validation**: Get complete response → Validate → Stream to client
 -  **Complete response**: Already have full text before streaming starts
 -  **Uni-directional**: Simply chunk and send validated response
--  **Cost**: Separate validation call tracked in `costs_dict["output_guardrails"]`
+-  **Cost**: Separate validation call tracked in `costs_metric["output_guardrails"]`
 -  **UX Consistency**: Simulates streaming to match RAG workflow behavior
 
 ### Why Different Approaches?
@@ -1601,15 +1601,15 @@ CREATE INDEX idx_classifier_decisions_workflow
 
 **Add tracking for new LLM calls:**
 # Service workflow - intent detection
-costs_dict["intent_detection"] = {
+costs_metric["intent_detection"] = {
     "total_prompt_tokens": usage.prompt_tokens,
     "total_completion_tokens": usage.completion_tokens,
     "total_cost": calculate_cost(usage)
 }
 
 # Context workflow - context availability check
-costs_dict["context_check
-costs_dict["intent_detection"] = {
+costs_metric["context_check
+costs_metric["intent_detection"] = {
     "total_prompt_tokens": usage.prompt_tokens,
     "total_completion_tokens": usage.completion_tokens,
     "total_cost": calculate_cost(usage)
@@ -1663,7 +1663,7 @@ async def stream_validated_response(
     response_text: str,
     guardrails_adapter: NeMoRailsAdapter,
     request: OrchestrationRequest,
-    costs_dict: Dict
+    costs_metric: Dict
 ) -> AsyncIterator[str]:
     """
     Apply output guardrails and stream validated response.
@@ -1677,7 +1677,7 @@ async def stream_validated_response(
     output_check = await guardrails_adapter.check_output_async(response_text)
 
     # Track costs
-    costs_dict["output_guardrails"] = output_check.usage
+    costs_metric["output_guardrails"] = output_check.usage
 
     if not output_check.allowed:
         logger.warning(f"[{request.chatId}] Output blocked by guardrails")

diff --git a/src/contextual_retrieval/contextual_retrieval.md b/src/contextual_retrieval/contextual_retrieval.md
@@ -788,7 +788,7 @@ def _initialize_contextual_retriever(
 #### 2. Request Processing
 ```python
 # Main orchestration pipeline
-def _execute_orchestration_pipeline(self, request, components, costs_dict):
+def _execute_orchestration_pipeline(self, request, components, costs_metric):
     # Step 1: Refine user prompt
     refined_output = self._refine_user_prompt(...)
 

diff --git a/src/guardrails/readme.md b/src/guardrails/readme.md
@@ -180,7 +180,7 @@ result.usage = usage_info  # Contains: total_cost, tokens, num_calls
 ### Modified Pipeline in `llm_orchestration_service.py`
 
 ```python
-costs_dict = {
+costs_metric = {
     "input_guardrails": {...},      # Step 1
     "prompt_refiner": {...},         # Step 2
     "response_generator": {...},    # Step 4