Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .env.gui
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
RELEASE=test
VERSION=1
BUILD=1
FIX=0
43 changes: 43 additions & 0 deletions .github/workflows/ci-build-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: Build and publish GUI

on:
push:
branches:
- wip
paths:
- '.env.gui'

jobs:
PackageDeploy:
runs-on: ubuntu-22.04

steps:
- uses: actions/checkout@v2

- name: Docker Setup BuildX
uses: docker/setup-buildx-action@v2

- name: Load environment variables and set them
run: |
if [ -f .env.gui ]; then
export $(cat .env.gui | grep -v '^#' | xargs)
fi
echo "RELEASE=$RELEASE" >> $GITHUB_ENV
echo "VERSION=$VERSION" >> $GITHUB_ENV
echo "BUILD=$BUILD" >> $GITHUB_ENV
echo "FIX=$FIX" >> $GITHUB_ENV
- name: Set repo
run: |
LOWER_CASE_GITHUB_REPOSITORY=$(echo $GITHUB_REPOSITORY | tr '[:upper:]' '[:lower:]')
echo "DOCKER_TAG_CUSTOM=ghcr.io/${LOWER_CASE_GITHUB_REPOSITORY}:$RELEASE-$VERSION.$BUILD.$FIX" >> $GITHUB_ENV
echo "$GITHUB_ENV"
- name: Docker Build
run: |
cd GUI
docker image build --tag $DOCKER_TAG_CUSTOM -f Dockerfile.dev .

- name: Log in to GitHub container registry
run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin

- name: Push Docker image to ghcr
run: docker push $DOCKER_TAG_CUSTOM
10 changes: 5 additions & 5 deletions docs/TOOL_CLASSIFIER_AND_SERVICE_WORKFLOW.md
Original file line number Diff line number Diff line change
Expand Up @@ -244,9 +244,9 @@ intent_result = intent_module.forward(...)

# After LLM call
usage_info = get_lm_usage_since(history_length_before)
costs_dict["intent_detection"] = usage_info
costs_metric["intent_detection"] = usage_info

# Later: orchestration_service.log_costs(costs_dict)
# Later: orchestration_service.log_costs(costs_metric)
```

---
Expand Down Expand Up @@ -557,14 +557,14 @@ Service workflow tracks LLM costs following the RAG workflow pattern:

```python
# Create costs dict at workflow level
costs_dict: Dict[str, Dict[str, Any]] = {}
costs_metric: Dict[str, Dict[str, Any]] = {}

# Intent detection captures costs
intent_result, intent_usage = await _detect_service_intent(...)
costs_dict["intent_detection"] = intent_usage
costs_metric["intent_detection"] = intent_usage

# Log costs after workflow completes
orchestration_service.log_costs(costs_dict)
orchestration_service.log_costs(costs_metric)
```

**Cost Breakdown Logged:**
Expand Down
38 changes: 19 additions & 19 deletions docs/TOOL_CLASSIFIER_EXTENSION_SPEC.md
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ formatted_content = format_service_response(service_response)
# Apply output guardrails
if guardrails_adapter:
output_check = await guardrails_adapter.check_output_async(formatted_content)
costs_dict["output_guardrails"] = output_check.usage
costs_metric["output_guardrails"] = output_check.usage

if not output_check.allowed:
logger.warning(f"Service response blocked by guardrails: {output_check.reason}")
Expand All @@ -449,7 +449,7 @@ formatted_content = format_service_response(service_response)
# Apply output guardrails validation
if guardrails_adapter:
output_check = await guardrails_adapter.check_output_async(formatted_content)
costs_dict["output_guardrails"] = output_check.usage
costs_metric["output_guardrails"] = output_check.usage

if not output_check.allowed:
logger.warning(f"Service response blocked by guardrails")
Expand Down Expand Up @@ -791,7 +791,7 @@ async def execute_context_workflow(
request: OrchestrationRequest,
llm_manager: LLMManager,
guardrails_adapter: Optional[NeMoRailsAdapter],
costs_dict: Dict
costs_metric: Dict
) -> Optional[OrchestrationResponse]:
"""
Execute context-based response workflow with output guardrails.
Expand All @@ -807,7 +807,7 @@ async def execute_context_workflow(
)

# Track costs
costs_dict["context_check"] = get_lm_usage_since(history_before)
costs_metric["context_check"] = get_lm_usage_since(history_before)

if (context_result.is_greeting or context_result.can_answer_from_context) and context_result.answer:
logger.info(
Expand All @@ -820,7 +820,7 @@ async def execute_context_workflow(
output_check = await guardrails_adapter.check_output_async(
context_result.answer
)
costs_dict["output_guardrails"] = output_check.usage
costs_metric["output_guardrails"] = output_check.usage

if not output_check.allowed:
logger.warning(
Expand Down Expand Up @@ -852,7 +852,7 @@ async def execute_context_workflow_streaming(
request: OrchestrationRequest,
llm_manager: LLMManager,
guardrails_adapter: Optional[NeMoRailsAdapter],
costs_dict: Dict
costs_metric: Dict
) -> Optional[AsyncIterator[str]]:
"""
Execute context workflow with streaming support and output guardrails.
Expand All @@ -871,7 +871,7 @@ async def execute_context_workflow_streaming(
)

# Track costs
costs_dict["context_check"] = get_lm_usage_since(history_before)
costs_metric["context_check"] = get_lm_usage_since(history_before)

if (context_result.is_greeting or context_result.can_answer_from_context) and context_result.answer:
logger.info(
Expand All @@ -884,7 +884,7 @@ async def execute_context_workflow_streaming(
output_check = await guardrails_adapter.check_output_async(
context_result.answer
)
costs_dict["output_guardrails"] = output_check.usage
costs_metric["output_guardrails"] = output_check.usage

if not output_check.allowed:
logger.warning(
Expand Down Expand Up @@ -941,17 +941,17 @@ def split_into_tokens(text: str, chunk_size: int = 5) -> List[str]:
```python
try:
result = await execute_context_workflow(
request, llm_manager, guardrails_adapter, costs_dict
request, llm_manager, guardrails_adapter, costs_metric
)
if result:
return result # Context-based answer (validated)
else:
# Move to Layer 3 (RAG)
return await execute_rag_workflow(request, components, costs_dict)
return await execute_rag_workflow(request, components, costs_metric)
except Exception as e:
logger.error(f"Context workflow failed: {e}")
# Fallback to RAG workflow
return await execute_rag_workflow(request, components, costs_dict)
return await execute_rag_workflow(request, components, costs_metric)
```

**Guardrail Violation Fallback:**
Expand All @@ -963,7 +963,7 @@ if not output_check.allowed:
# Option 2: Fallback to RAG (alternative approach)
if not output_check.allowed:
logger.warning("Context response blocked, trying RAG workflow")
return await execute_rag_workflow(request, components, costs_dict)
return await execute_rag_workflow(request, components, costs_metric)
```

---
Expand All @@ -978,7 +978,7 @@ if not output_check.allowed:
```python
# Reuse existing RAG pipeline
return self._execute_orchestration_pipeline(
request, components, costs_dict, timing_dict
request, components, costs_metric, time_metric
)
```

Expand Down Expand Up @@ -1121,7 +1121,7 @@ if context_result.can_answer_from_context:
- **Pre-validation**: Get complete response → Validate → Stream to client
- **Complete response**: Already have full text before streaming starts
- **Uni-directional**: Simply chunk and send validated response
- **Cost**: Separate validation call tracked in `costs_dict["output_guardrails"]`
- **Cost**: Separate validation call tracked in `costs_metric["output_guardrails"]`
- **UX Consistency**: Simulates streaming to match RAG workflow behavior

### Why Different Approaches?
Expand Down Expand Up @@ -1601,15 +1601,15 @@ CREATE INDEX idx_classifier_decisions_workflow

**Add tracking for new LLM calls:**
# Service workflow - intent detection
costs_dict["intent_detection"] = {
costs_metric["intent_detection"] = {
"total_prompt_tokens": usage.prompt_tokens,
"total_completion_tokens": usage.completion_tokens,
"total_cost": calculate_cost(usage)
}

# Context workflow - context availability check
costs_dict["context_check
costs_dict["intent_detection"] = {
costs_metric["context_check
costs_metric["intent_detection"] = {
"total_prompt_tokens": usage.prompt_tokens,
"total_completion_tokens": usage.completion_tokens,
"total_cost": calculate_cost(usage)
Expand Down Expand Up @@ -1663,7 +1663,7 @@ async def stream_validated_response(
response_text: str,
guardrails_adapter: NeMoRailsAdapter,
request: OrchestrationRequest,
costs_dict: Dict
costs_metric: Dict
) -> AsyncIterator[str]:
"""
Apply output guardrails and stream validated response.
Expand All @@ -1677,7 +1677,7 @@ async def stream_validated_response(
output_check = await guardrails_adapter.check_output_async(response_text)

# Track costs
costs_dict["output_guardrails"] = output_check.usage
costs_metric["output_guardrails"] = output_check.usage

if not output_check.allowed:
logger.warning(f"[{request.chatId}] Output blocked by guardrails")
Expand Down
2 changes: 1 addition & 1 deletion src/contextual_retrieval/contextual_retrieval.md
Original file line number Diff line number Diff line change
Expand Up @@ -788,7 +788,7 @@ def _initialize_contextual_retriever(
#### 2. Request Processing
```python
# Main orchestration pipeline
def _execute_orchestration_pipeline(self, request, components, costs_dict):
def _execute_orchestration_pipeline(self, request, components, costs_metric):
# Step 1: Refine user prompt
refined_output = self._refine_user_prompt(...)

Expand Down
2 changes: 1 addition & 1 deletion src/guardrails/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ result.usage = usage_info # Contains: total_cost, tokens, num_calls
### Modified Pipeline in `llm_orchestration_service.py`

```python
costs_dict = {
costs_metric = {
"input_guardrails": {...}, # Step 1
"prompt_refiner": {...}, # Step 2
"response_generator": {...}, # Step 4
Expand Down
Loading