From 6c4a82a2c81d7290621683fdfa97a8decc86da57 Mon Sep 17 00:00:00 2001
From: JustAGhosT <smit.jurie@gmail.com>
Date: Sun, 15 Mar 2026 13:29:57 +0200
Subject: [PATCH 1/8] docs: update architecture documentation with telemetry
 and model details

- Add C3 connection in container architecture flowchart
- Rename "Azure Monitor" to "Application Insights" in telemetry diagram
- Add Prometheus as a telemetry sink with implementation details
- Document retention policies for Application Insights
- Update matrix gateway JSON example with new field names
- Fix confidence threshold in matrix-gateway from 0.70 to 0.75
- Update SOP suggestion threshold from 0.78 to 0.8
- Fix code block formatting in multiple files
---
 .../architecture/02-container-architecture.md |  1 +
 .../04-observability-telemetry.md             | 27 ++++++++++++++++++-
 docs/architecture/reference/matrix-gateway.md | 10 +++----
 .../architecture/reference/matrix-rooivalk.md |  4 +--
 .../reference/slm-management-plan.md          |  2 +-
 5 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/docs/architecture/02-container-architecture.md b/docs/architecture/02-container-architecture.md
index 2b66950..b34021d 100644
--- a/docs/architecture/02-container-architecture.md
+++ b/docs/architecture/02-container-architecture.md
@@ -57,6 +57,7 @@ flowchart TB
 
     C1 --> G1
     C2 --> G1
+    C3 --> G1
     C4 --> G1
 
     G1 --> G2
diff --git a/docs/architecture/04-observability-telemetry.md b/docs/architecture/04-observability-telemetry.md
index 3afe313..79e7f9f 100644
--- a/docs/architecture/04-observability-telemetry.md
+++ b/docs/architecture/04-observability-telemetry.md
@@ -34,8 +34,9 @@ flowchart TB
 
     subgraph Ingest
         I1[OpenTelemetry]
-        I2[Azure Monitor]
+        I2[Application Insights]
         I3[Blob Export]
+        I4[Prometheus]
     end
 
     subgraph Analytics
@@ -62,15 +63,39 @@ flowchart TB
     S4 --> I1
     S5 --> I2
     S6 --> I3
+    S5 --> I4
 
     I1 --> A1
     I2 --> A1
     I3 --> A1
+    I4 --> V1
 
     A1 --> V1
     V1 --> V2
 ```
 
+### Telemetry Sinks
+
+LiteLLM enables Prometheus metrics via `success_callback` and `failure_callback` containing "prometheus". The Prometheus exporter exposes a `/metrics` endpoint that scrapes application metrics. See `infra/modules/aigateway_aca/main.tf:95-113` for the container configuration.
+
+The primary telemetry sinks are:
+
+- **OpenTelemetry**: Traces and spans
+- **Application Insights**: Azure Monitor implementation using `APPLICATIONINSIGHTS_CONNECTION_STRING` env var for OTEL exporter
+- **Blob Export**: Raw event storage
+- **Prometheus**: Application metrics via `/metrics` endpoint
+
+## Retention Policies
+
+Application Insights retention defaults:
+
+- **Production**: 90 days
+- **Non-production (dev/staging)**: 30 days
+
+These are environment-specific settings configured in the Application Insights resource. Operators can adjust retention in the Azure Portal under Application Insights resource settings.
+
+Include retention expectations in operational runbooks to align cost and data availability expectations.
+
 ## Key Metrics
 
 ### Gateway
diff --git a/docs/architecture/reference/matrix-gateway.md b/docs/architecture/reference/matrix-gateway.md
index 4551887..55b8dbb 100644
--- a/docs/architecture/reference/matrix-gateway.md
+++ b/docs/architecture/reference/matrix-gateway.md
@@ -41,12 +41,12 @@ flowchart TD
 
 ```json
 {
-  "intent": "code_review",
+  "request_id": "req_abc123",
+  "label": "code_review",
   "complexity": "medium",
   "tool_candidate": true,
-  "recommended_target": "codeflow-engine",
-  "recommended_model_tier": "small",
-  "escalation_required": false,
+  "recommended_tier": "slm",
+  "cacheable": true,
   "confidence": 0.93
 }
 ```
@@ -91,7 +91,7 @@ interface PolicyScreenOutput {
 | Condition                        | Action                 |
 | -------------------------------- | ---------------------- |
 | `policy-screen.allowed == false` | Block or redact        |
-| `confidence < 0.70`              | Escalate to LLM        |
+| `confidence < 0.75`              | Escalate to LLM        |
 | Tool suggested but no mapping    | Send to general LLM    |
 | Tagging fails                    | Mark telemetry partial |
 
diff --git a/docs/architecture/reference/matrix-rooivalk.md b/docs/architecture/reference/matrix-rooivalk.md
index 29f20da..147c7b0 100644
--- a/docs/architecture/reference/matrix-rooivalk.md
+++ b/docs/architecture/reference/matrix-rooivalk.md
@@ -24,7 +24,7 @@ flowchart TD
 
 ## CRITICAL: SLM is for Reporting Only
 
-```
+```text
 ┌─────────────────────────────────────────────────────────┐
 │                   IMPORTANT - SAFETY BOUNDARY            │
 ├─────────────────────────────────────────────────────────┤
@@ -109,7 +109,7 @@ interface SuggestSopOutput {
 ```typescript
 const DEFAULT_THRESHOLDS = {
   operator_summary: { direct_use: 0.8, facts_only: 0.65 },
-  sop_suggestion: { direct_suggest: 0.78, manual_lookup: 0.65 },
+  sop_suggestion: { direct_suggest: 0.8, manual_lookup: 0.65 },
 };
 ```
 
diff --git a/docs/architecture/reference/slm-management-plan.md b/docs/architecture/reference/slm-management-plan.md
index 7c116c5..d2b0f2e 100644
--- a/docs/architecture/reference/slm-management-plan.md
+++ b/docs/architecture/reference/slm-management-plan.md
@@ -40,7 +40,7 @@ Maintain a tiered model portfolio:
 
 Implement cost controls at each layer:
 
-```
+```text
 Cost Control Layers
 ┌─────────────────────────────────────┐
 │ 1. Budget caps per project          │

From 0e69a2f3d6622f207bf6c20878d945c40e9b1c31 Mon Sep 17 00:00:00 2001
From: JustAGhosT <smit.jurie@gmail.com>
Date: Sun, 15 Mar 2026 14:14:45 +0200
Subject: [PATCH 2/8] feat(workflows): add GitHub environment support to
 deployment workflows

Add environment input parameter to deploy-environment.yaml workflow to specify GitHub environment, improving deployment control and security. Replace hardcoded environment settings in deploy.yaml with the new parameter. Also fix code fences in documentation to use text format and update various documentation details.
---
 .github/workflows/deploy-environment.yaml     | 43 ++++++++++++++++++-
 .github/workflows/deploy.yaml                 | 42 ++++++++++++++++--
 .../reference/slm-implementation-matrix.md    |  8 ++--
 .../reference/slm-management-plan.md          |  6 +--
 .../strategic/07-deployment-model.md          | 16 +++----
 docs/architecture/systems/agentkit-forge.md   |  2 +-
 docs/architecture/systems/ai-gateway.md       | 30 ++++++++++++-
 docs/architecture/systems/codeflow-engine.md  |  4 +-
 docs/architecture/systems/cognitive-mesh.md   |  4 +-
 docs/architecture/systems/phoenix-rooivalk.md |  2 +-
 docs/planning/request_to_token_attribution.md | 12 +++---
 11 files changed, 136 insertions(+), 33 deletions(-)

diff --git a/.github/workflows/deploy-environment.yaml b/.github/workflows/deploy-environment.yaml
index 1485e2e..c35a239 100644
--- a/.github/workflows/deploy-environment.yaml
+++ b/.github/workflows/deploy-environment.yaml
@@ -38,7 +38,26 @@ on:
         type: boolean
         default: false
         description: Include AOAI endpoint host validation
+      environment:
+        required: false
+        type: string
+        default: ""
+        description: GitHub environment to use
     secrets:
+      AZURE_CLIENT_ID:
+        required: true
+      AZURE_TENANT_ID:
+        required: true
+      AZURE_SUBSCRIPTION_ID:
+        required: true
+      TF_BACKEND_RG:
+        required: true
+      TF_BACKEND_SA:
+        required: true
+      TF_BACKEND_CONTAINER:
+        required: true
+      EXPECTED_AOAI_ENDPOINT_HOST:
+        required: false
       AZURE_OPENAI_ENDPOINT:
         required: true
       AZURE_OPENAI_API_KEY:
@@ -49,8 +68,25 @@ on:
         required: true
       AIGATEWAY_KEY:
         required: true
+      STATE_SERVICE_CONTAINER_IMAGE:
+        required: false
+      STATE_SERVICE_SHARED_TOKEN:
+        required: false
+      STATE_SERVICE_REGISTRY_PASSWORD:
+        required: false
+      DASHBOARD_CONTAINER_IMAGE:
+        required: false
+      GRAFANA_URL:
+        required: false
 
 env:
+  AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
+  AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
+  AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+  TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
+  TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
+  TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
+  EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
   TF_VAR_env: ${{ inputs.env_name }}
   TF_VAR_projname: "aigateway"
   TF_VAR_location: "southafricanorth"
@@ -64,10 +100,15 @@ env:
   TF_VAR_codex_api_version: ${{ inputs.codex_api_version }}
   TF_VAR_embedding_deployment: "text-embedding-3-large"
   TF_VAR_embeddings_api_version: "2024-02-01"
+  TF_VAR_state_service_container_image: ${{ secrets.STATE_SERVICE_CONTAINER_IMAGE }}
+  TF_VAR_secrets_expiration_date: "2027-03-31T00:00:00Z"
+  TF_VAR_dashboard_container_image: ${{ secrets.DASHBOARD_CONTAINER_IMAGE || 'ghcr.io/phoenixvc/ai-gateway-dashboard:latest' }}
+  TF_VAR_grafana_url: ${{ secrets.GRAFANA_URL }}
 
 jobs:
   deploy:
     runs-on: ubuntu-latest
+    environment: ${{ inputs.environment || inputs.env_name }}
     defaults:
       run:
         working-directory: ${{ inputs.terraform_working_directory }}
@@ -208,7 +249,7 @@ jobs:
           aoai_api_key: ${{ env.TF_VAR_azure_openai_api_key }}
           max_attempts: "3"
           retry_sleep: ${{ inputs.smoke_retry_sleep }}
-          models_wait_attempts: ${{ if(inputs.env_name == 'prod', '3', '1') }}
+          models_wait_attempts: ${{ inputs.env_name == 'prod' && '3' || '1' }}
           models_wait_sleep: ${{ inputs.smoke_models_wait_sleep }}
 
       - name: Smoke test shared state API (dashboard proxy)
diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
index 59ece73..97c8e2d 100644
--- a/.github/workflows/deploy.yaml
+++ b/.github/workflows/deploy.yaml
@@ -150,7 +150,6 @@ jobs:
     name: Deploy dev
     needs: plan
     if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'dev'
-    environment: dev
     uses: ./.github/workflows/deploy-environment.yaml
     with:
       env_name: dev
@@ -161,18 +160,30 @@ jobs:
       smoke_retry_sleep: "10"
       smoke_models_wait_sleep: "15"
       include_aoai_host_check: false
+      environment: dev
     secrets:
+      AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
+      AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
+      AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+      TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
+      TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
+      TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
+      EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
       AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
       AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
       AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
       AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }}
+      STATE_SERVICE_CONTAINER_IMAGE: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE }}
+      STATE_SERVICE_SHARED_TOKEN: ${{ secrets.STATE_SERVICE_SHARED_TOKEN }}
+      STATE_SERVICE_REGISTRY_PASSWORD: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD }}
+      DASHBOARD_CONTAINER_IMAGE: ${{ vars.DASHBOARD_CONTAINER_IMAGE }}
+      GRAFANA_URL: ${{ secrets.GRAFANA_URL }}
 
   deploy-staging:
     name: Deploy staging
     needs: plan
     if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'main' && contains(join(github.event.pull_request.labels.*.name, ','), 'run-staging')
-    environment: staging
     uses: ./.github/workflows/deploy-environment.yaml
     with:
       env_name: staging
@@ -183,18 +194,30 @@ jobs:
       smoke_retry_sleep: "10"
       smoke_models_wait_sleep: "15"
       include_aoai_host_check: false
+      environment: staging
     secrets:
+      AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
+      AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
+      AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+      TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
+      TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
+      TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
+      EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
       AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
       AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
       AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
       AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }}
+      STATE_SERVICE_CONTAINER_IMAGE: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE }}
+      STATE_SERVICE_SHARED_TOKEN: ${{ secrets.STATE_SERVICE_SHARED_TOKEN }}
+      STATE_SERVICE_REGISTRY_PASSWORD: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD }}
+      DASHBOARD_CONTAINER_IMAGE: ${{ vars.DASHBOARD_CONTAINER_IMAGE }}
+      GRAFANA_URL: ${{ secrets.GRAFANA_URL }}
 
   deploy-prod:
     name: Deploy prod
     needs: plan
     if: github.event_name == 'workflow_dispatch' || (github.event_name == 'push' && github.ref == 'refs/heads/main')
-    environment: prod
     uses: ./.github/workflows/deploy-environment.yaml
     with:
       env_name: prod
@@ -205,11 +228,24 @@ jobs:
       smoke_retry_sleep: "15"
       smoke_models_wait_sleep: "30"
       include_aoai_host_check: true
+      environment: prod
     secrets:
+      AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
+      AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
+      AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+      TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
+      TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
+      TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
+      EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
       AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
       AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
       AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
       AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }}
+      STATE_SERVICE_CONTAINER_IMAGE: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE }}
+      STATE_SERVICE_SHARED_TOKEN: ${{ secrets.STATE_SERVICE_SHARED_TOKEN }}
+      STATE_SERVICE_REGISTRY_PASSWORD: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD }}
+      DASHBOARD_CONTAINER_IMAGE: ${{ vars.DASHBOARD_CONTAINER_IMAGE }}
+      GRAFANA_URL: ${{ secrets.GRAFANA_URL }}
 
   # Legacy inline deployments removed - see deploy-environment.yaml
diff --git a/docs/architecture/reference/slm-implementation-matrix.md b/docs/architecture/reference/slm-implementation-matrix.md
index eb1fc6e..ff50192 100644
--- a/docs/architecture/reference/slm-implementation-matrix.md
+++ b/docs/architecture/reference/slm-implementation-matrix.md
@@ -15,13 +15,13 @@ This document provides a repo-by-repo implementation matrix showing SLM endpoint
 
 ## Documentation Structure
 
-```
+```text
 reference/
 ├── slm-implementation-matrix.md      # This file
 ├── matrix-gateway.md                  # AI Gateway details
 ├── matrix-cognitive-mesh.md          # Cognitive Mesh details
-├── matrix-codeflow.md                # CodeFlow Engine details
-├── matrix-agentkit.md                # AgentKit Forge details
+├── matrix-codeflow.md                 # CodeFlow Engine details
+├── matrix-agentkit.md                 # AgentKit Forge details
 ├── matrix-rooivalk.md                # PhoenixRooivalk details
 └── matrix-mystira.md                 # Mystira details
 ```
@@ -251,7 +251,7 @@ This is a practical role map, not a vendor mandate.
 
 ### Standard Fallback Pattern
 
-```
+```text
 1. SLM timeout → Deterministic rules
 2. Low confidence → LLM escalation
 3. Safety critical → Block immediately
diff --git a/docs/architecture/reference/slm-management-plan.md b/docs/architecture/reference/slm-management-plan.md
index d2b0f2e..92c164d 100644
--- a/docs/architecture/reference/slm-management-plan.md
+++ b/docs/architecture/reference/slm-management-plan.md
@@ -172,7 +172,7 @@ async def security_pipeline(request: Request) -> SecurityResult:
 
 ### Fallback Hierarchy
 
-```
+```text
 Request
    │
    ▼ Primary SLM
@@ -216,7 +216,7 @@ Request
 
 ### Model Lifecycle
 
-```
+```text
 Discovery → Testing → Staging → Production → Deprecated → Retired
     │           │         │          │            │
     ▼           ▼         ▼          ▼            ▼
@@ -271,4 +271,4 @@ Discovery → Testing → Staging → Production → Deprecated → Retired
 6. [ ] Define fallback hierarchies
 7. [ ] Implement observability stack
 8. [ ] Document model lifecycle process
-9. [ ] **Add explicit safety boundary for PhoenixRooivalk**
+9. [x] Add explicit safety boundary for PhoenixRooivalk
diff --git a/docs/architecture/reference/strategic/07-deployment-model.md b/docs/architecture/reference/strategic/07-deployment-model.md
index e4e6f8d..7e03967 100644
--- a/docs/architecture/reference/strategic/07-deployment-model.md
+++ b/docs/architecture/reference/strategic/07-deployment-model.md
@@ -26,14 +26,14 @@ flowchart TD
 
 ## Decision Matrix
 
-| System          | Best SLM Jobs              | Less Suitable                  |
-| --------------- | -------------------------- | ------------------------------ |
-| AI Gateway      | routing, screening, cost   | Nuanced synthesis              |
-| Cognitive Mesh  | routing, decomposition     | Final judgment                 |
-| CodeFlow        | PR triage, log analysis    | Root cause across dependencies |
-| AgentKit        | tool selection, extraction | Multi-step planning            |
-| PhoenixRooivalk | summaries, alerts          | Sole threat authority          |
-| Mystira         | safety, continuity         | Rich narrative                 |
+| System          | Best SLM Jobs                                                 | Less Suitable                  |
+| --------------- | ------------------------------------------------------------- | ------------------------------ |
+| AI Gateway      | routing, screening, cost                                      | Nuanced synthesis              |
+| Cognitive Mesh  | routing, decomposition                                        | Final judgment                 |
+| CodeFlow        | PR classification, CI failure triage, release-note extraction | Root cause across dependencies |
+| AgentKit        | tool selection, extraction                                    | Multi-step planning            |
+| PhoenixRooivalk | summaries, alerts                                             | Sole threat authority          |
+| Mystira         | safety, continuity                                            | Rich narrative                 |
 
 ## Practical Gateway Flow
 
diff --git a/docs/architecture/systems/agentkit-forge.md b/docs/architecture/systems/agentkit-forge.md
index c84a182..e61cf67 100644
--- a/docs/architecture/systems/agentkit-forge.md
+++ b/docs/architecture/systems/agentkit-forge.md
@@ -4,7 +4,7 @@ AgentKit Forge builds AI agents and orchestration workflows. SLMs help when agen
 
 ## Architecture
 
-```
+```text
 Agent Task
       │
       ▼
diff --git a/docs/architecture/systems/ai-gateway.md b/docs/architecture/systems/ai-gateway.md
index 5a288f0..751f9fc 100644
--- a/docs/architecture/systems/ai-gateway.md
+++ b/docs/architecture/systems/ai-gateway.md
@@ -4,7 +4,7 @@ AI Gateway sits between applications and multiple AI providers. The SLM acts as
 
 ## Architecture
 
-```
+```text
 Client Request
       │
       ▼
@@ -144,3 +144,31 @@ Track per routing decision:
 - [ ] Add security prefiltering (injection, PII, secrets)
 - [ ] Set up cost tracking per tier
 - [ ] Configure latency alerts
+
+## v1 API Routing
+
+The gateway routes `/v1/responses` and `/v1/embeddings` requests to Azure OpenAI via LiteLLM provider configuration.
+
+### Routing Rules
+
+| Endpoint         | Provider               | Notes                     |
+| ---------------- | ---------------------- | ------------------------- |
+| `/v1/responses`  | LiteLLM → Azure OpenAI | Standard chat completions |
+| `/v1/embeddings` | LiteLLM → Azure OpenAI | Text embedding generation |
+
+### Example LiteLLM Config
+
+```yaml
+model_list:
+  - model_name: gpt-4.1
+    litellm_params:
+      model: azure/gpt-4.1
+      api_base: https://<resource>.openai.azure.com
+      api_key: os.environ/AZURE_OPENAI_API_KEY
+      api_version: "2025-04-01-preview"
+```
+
+### Response vs Embeddings Handling
+
+- **Responses**: Model selection based on complexity/classification; supports streaming
+- **Embeddings**: Batched processing; fixed deployment mapping
diff --git a/docs/architecture/systems/codeflow-engine.md b/docs/architecture/systems/codeflow-engine.md
index 30f5dfd..c1de01e 100644
--- a/docs/architecture/systems/codeflow-engine.md
+++ b/docs/architecture/systems/codeflow-engine.md
@@ -4,7 +4,7 @@ CodeFlow Engine is a DevOps and CI/CD intelligence system. **This is one of the
 
 ## Architecture
 
-```
+```text
 Git Push / PR Event
       │
       ▼
@@ -96,7 +96,7 @@ async def select_tests(change_type: str, impacted_files: list[str]) -> TestPlan:
 Type: {change_type}
 Files: {', '.join(impacted_files)}
 
-Output: { "run_unit": bool, "run_integration": bool, "run_e2e": bool, "skip_reason": str|null }"""
+Output: {{ "run_unit": bool, "run_integration": bool, "run_e2e": bool, "skip_reason": str|null }}"""
 
     return await slm_completion(prompt)
 ```
diff --git a/docs/architecture/systems/cognitive-mesh.md b/docs/architecture/systems/cognitive-mesh.md
index d4f2c96..dedc502 100644
--- a/docs/architecture/systems/cognitive-mesh.md
+++ b/docs/architecture/systems/cognitive-mesh.md
@@ -4,7 +4,7 @@ Cognitive Mesh architectures orchestrate multiple AI agents and tools. The SLM i
 
 ## Architecture
 
-```
+```text
 User Query
       │
       ▼
@@ -143,7 +143,7 @@ async def compress_context(messages: list[Message]) -> Compressed:
 
 | Pros                            | Cons                                            |
 | ------------------------------- | ----------------------------------------------- |
-| Very large token savings        | Decomposition quality can bottleneck workflow   |
+| Large token savings             | Decomposition quality can bottleneck workflow   |
 | Better determinism              | Brittle routing if taxonomy is poor             |
 | Easier specialist orchestration | Harder debugging if confidence handling is weak |
 | Improved auditability           |                                                 |
diff --git a/docs/architecture/systems/phoenix-rooivalk.md b/docs/architecture/systems/phoenix-rooivalk.md
index dedaf96..5c0e0a7 100644
--- a/docs/architecture/systems/phoenix-rooivalk.md
+++ b/docs/architecture/systems/phoenix-rooivalk.md
@@ -4,7 +4,7 @@ PhoenixRooivalk is an edge AI counter-UAS (Unmanned Aerial System) system. **SLM
 
 ## Architecture
 
-```
+```text
 Sensors
   │
   ▼
diff --git a/docs/planning/request_to_token_attribution.md b/docs/planning/request_to_token_attribution.md
index d5790c1..b0b5cd0 100644
--- a/docs/planning/request_to_token_attribution.md
+++ b/docs/planning/request_to_token_attribution.md
@@ -122,9 +122,9 @@ Start with downstream aggregation in pvc-costops-analytics - the cheapest and fa
 
 ### 1. cognitive-mesh (Upstream Caller)
 
-**Required:** Pass correlation metadata in request body when calling gateway. There are two methods:
+**Recommended:** Pass correlation metadata in request body when calling gateway. There are two methods:
 
-**Method A: Via Request Metadata (Recommended)**
+**Method A: Via Request Metadata (Preferred)**
 Pass correlation IDs in the request body `metadata` field:
 
 ```json
@@ -142,7 +142,7 @@ Pass correlation IDs in the request body `metadata` field:
 }
 ```
 
-**Method B: Via HTTP Headers**
+**Method B: Via HTTP Headers** (alternative - requires additional LiteLLM configuration or middleware)
 
 - x-request-id
 - x-session-id
@@ -151,8 +151,6 @@ Pass correlation IDs in the request body `metadata` field:
 - x-stage-name
 - x-user-id
 
-_Note: Method B requires additional LiteLLM configuration or middleware._
-
 ### 2. pvc-costops-analytics (Downstream Analytics)
 
 **Required:** KQL queries and dashboards to:
@@ -218,7 +216,7 @@ _Note: Method B requires additional LiteLLM configuration or middleware._
 
 - cognitive-mesh: Pass correlation metadata in request body
 - pvc-costops-analytics: Must create KQL queries for new event shape
-- infra: Application Insights resource + APPLICATIONINSIGHTS_CONNECTION_STRING wiring added; trace export requires custom LiteLLM image (with azure-monitor-opentelemetry) or explicit OTEL_EXPORTER_OTLP_ENDPOINT configuration (currently empty by default)
+- infra: Application Insights resource created; APPLICATIONINSIGHTS_CONNECTION_STRING stored in Key Vault and wired to container app via secret reference; trace export requires custom LiteLLM image (with azure-monitor-opentelemetry) or explicit OTEL_EXPORTER_OTLP_ENDPOINT configuration (currently empty by default)
 
 ## Action Items
 
@@ -226,7 +224,7 @@ _Note: Method B requires additional LiteLLM configuration or middleware._
 
 1. ✅ ai-gateway: Add OTEL callback for token telemetry (Phase 1)
 2. ✅ ai-gateway: Document correlation ID requirements (Phase 2)
-3. ✅ ai-gateway: Add Application Insights connection string wiring (Phase 1b - trace export requires custom image or OTLP collector)
+3. ✅ ai-gateway: Add Application Insights connection string wiring via Key Vault (Phase 1b - trace export requires custom image or OTLP collector)
 
 ### Pending
 

From 02a8bb7dac0b386c21ef2c663cb48afcab1e77a6 Mon Sep 17 00:00:00 2001
From: JustAGhosT <smit.jurie@gmail.com>
Date: Sun, 15 Mar 2026 14:17:57 +0200
Subject: [PATCH 3/8] fix(workflows): rename smoke_models_wait_sleep to
 smoke_models_wait_attempts and update default value

---
 .github/workflows/deploy-environment.yaml | 8 ++++----
 .github/workflows/deploy.yaml             | 3 +++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/deploy-environment.yaml b/.github/workflows/deploy-environment.yaml
index c35a239..2a1d739 100644
--- a/.github/workflows/deploy-environment.yaml
+++ b/.github/workflows/deploy-environment.yaml
@@ -28,11 +28,11 @@ on:
         type: string
         default: "10"
         description: Retry sleep for smoke tests
-      smoke_models_wait_sleep:
+      smoke_models_wait_attempts:
         required: false
         type: string
-        default: "15"
-        description: Wait sleep for model registration
+        default: "1"
+        description: Number of attempts to wait for models to become available
       include_aoai_host_check:
         required: false
         type: boolean
@@ -249,7 +249,7 @@ jobs:
           aoai_api_key: ${{ env.TF_VAR_azure_openai_api_key }}
           max_attempts: "3"
           retry_sleep: ${{ inputs.smoke_retry_sleep }}
-          models_wait_attempts: ${{ inputs.env_name == 'prod' && '3' || '1' }}
+          models_wait_attempts: ${{ inputs.smoke_models_wait_attempts }}
           models_wait_sleep: ${{ inputs.smoke_models_wait_sleep }}
 
       - name: Smoke test shared state API (dashboard proxy)
diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
index 97c8e2d..7877e43 100644
--- a/.github/workflows/deploy.yaml
+++ b/.github/workflows/deploy.yaml
@@ -159,6 +159,7 @@ jobs:
       terraform_working_directory: infra/env/dev
       smoke_retry_sleep: "10"
       smoke_models_wait_sleep: "15"
+      smoke_models_wait_attempts: "1"
       include_aoai_host_check: false
       environment: dev
     secrets:
@@ -193,6 +194,7 @@ jobs:
       terraform_working_directory: infra/env/staging
       smoke_retry_sleep: "10"
       smoke_models_wait_sleep: "15"
+      smoke_models_wait_attempts: "1"
       include_aoai_host_check: false
       environment: staging
     secrets:
@@ -227,6 +229,7 @@ jobs:
       terraform_working_directory: infra/env/prod
       smoke_retry_sleep: "15"
       smoke_models_wait_sleep: "30"
+      smoke_models_wait_attempts: "3"
       include_aoai_host_check: true
       environment: prod
     secrets:

From 0e4fff23b4f93cedb6453fe65538f0d226847fd7 Mon Sep 17 00:00:00 2001
From: JustAGhosT <smit.jurie@gmail.com>
Date: Sun, 15 Mar 2026 14:19:12 +0200
Subject: [PATCH 4/8] fix(workflows): rename smoke_models_wait_sleep to
 smoke_models_wait_attempts and update default value # Pull Request Checklist

## Summary

- What changed?
- Why was it needed?

## Validation

- [ ] Local checks run (if applicable)
- [ ] Relevant workflow/jobs observed

## Deployment Notes

- [ ] No environment/config changes required
- [ ] Environment/config changes required (describe below)

## UAT Toggle (PRs to `main`)

- Add label `run-uat` to this PR to enable UAT deployment (`deploy-uat`).
- Remove label `run-uat` to skip UAT deployment.

## Risk / Rollback

- Risk level: low / medium / high
- Rollback plan:
---
 infra/modules/aigateway_aca/outputs.tf | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/infra/modules/aigateway_aca/outputs.tf b/infra/modules/aigateway_aca/outputs.tf
index a8dfe6b..e6b8ff3 100644
--- a/infra/modules/aigateway_aca/outputs.tf
+++ b/infra/modules/aigateway_aca/outputs.tf
@@ -30,8 +30,7 @@ output "container_app_environment_id" {
   value       = azurerm_container_app_environment.cae.id
 }
 
-output "application_insights_connection_string" {
-  value       = azurerm_application_insights.ai.connection_string
-  description = "Application Insights connection string for OTEL export."
-  sensitive   = true
+output "application_insights_name" {
+  description = "Application Insights resource name. Retrieve connection string from Key Vault secret 'appinsights-connection-string'."
+  value       = azurerm_application_insights.ai.name
 }

From 27e172419cad954e2b8e41ae8c1728e80cf56e10 Mon Sep 17 00:00:00 2001
From: JustAGhosT <smit.jurie@gmail.com>
Date: Sun, 15 Mar 2026 14:47:47 +0200
Subject: [PATCH 5/8] feat(workflow): add smoke_models_wait_sleep parameter and
 update docs

- Add new workflow parameter to control sleep between model availability checks
- Update container architecture diagram to include Webhook Auth component
- Improve Prometheus metrics documentation clarity
- Add migration note for Matrix Gateway response contract changes in v1.0.0
---
 .github/workflows/deploy-environment.yaml       |  5 +++++
 docs/architecture/02-container-architecture.md  |  4 +++-
 docs/architecture/04-observability-telemetry.md |  2 +-
 docs/architecture/reference/matrix-gateway.md   | 10 ++++++++++
 4 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/deploy-environment.yaml b/.github/workflows/deploy-environment.yaml
index 2a1d739..5d39d8e 100644
--- a/.github/workflows/deploy-environment.yaml
+++ b/.github/workflows/deploy-environment.yaml
@@ -28,6 +28,11 @@ on:
         type: string
         default: "10"
         description: Retry sleep for smoke tests
+      smoke_models_wait_sleep:
+        required: false
+        type: string
+        default: "15"
+        description: Sleep seconds between model availability checks
       smoke_models_wait_attempts:
         required: false
         type: string
diff --git a/docs/architecture/02-container-architecture.md b/docs/architecture/02-container-architecture.md
index b34021d..544639c 100644
--- a/docs/architecture/02-container-architecture.md
+++ b/docs/architecture/02-container-architecture.md
@@ -27,6 +27,7 @@ flowchart TB
         G4[Budget Router]
         G5[Semantic Cache]
         G6[Escalation Judge]
+        W[Webhook Auth]
     end
 
     subgraph Mesh
@@ -57,7 +58,8 @@ flowchart TB
 
     C1 --> G1
     C2 --> G1
-    C3 --> G1
+    C3 --> W
+    W --> G1
     C4 --> G1
 
     G1 --> G2
diff --git a/docs/architecture/04-observability-telemetry.md b/docs/architecture/04-observability-telemetry.md
index 79e7f9f..8c095da 100644
--- a/docs/architecture/04-observability-telemetry.md
+++ b/docs/architecture/04-observability-telemetry.md
@@ -76,7 +76,7 @@ flowchart TB
 
 ### Telemetry Sinks
 
-LiteLLM enables Prometheus metrics via `success_callback` and `failure_callback` containing "prometheus". The Prometheus exporter exposes a `/metrics` endpoint that scrapes application metrics. See `infra/modules/aigateway_aca/main.tf:95-113` for the container configuration.
+LiteLLM enables Prometheus metrics via `success_callback` and `failure_callback` containing "prometheus". The Prometheus exporter exposes a `/metrics` endpoint which is scraped by Prometheus for application metrics collection. See `infra/modules/aigateway_aca/main.tf:95-113` for the container configuration.
 
 The primary telemetry sinks are:
 
diff --git a/docs/architecture/reference/matrix-gateway.md b/docs/architecture/reference/matrix-gateway.md
index 55b8dbb..395719c 100644
--- a/docs/architecture/reference/matrix-gateway.md
+++ b/docs/architecture/reference/matrix-gateway.md
@@ -51,6 +51,16 @@ flowchart TD
 }
 ```
 
+> **Migration Note (v1.0.0)**: The response contract has been updated. Legacy field names `intent`, `recommended_target`, `recommended_model_tier`, and `escalation_required` are deprecated. Update clients to use the new fields:
+>
+> - `intent` → `label`
+> - `recommended_target` → removed (use `recommended_tier` for routing)
+> - `recommended_model_tier` → `recommended_tier`
+> - `escalation_required` → derive from `confidence < 0.75` threshold
+> - `cacheable` is a new field (previously not returned)
+>
+> **Deprecation window**: Legacy fields will be removed in v1.2.0. Clients should update by then. For backwards compatibility, implement fallback logic checking both old and new field names.
+
 ## Contract Shapes
 
 ```typescript

From 19759df0ac7763e852cefe1d7ee487409c3553a0 Mon Sep 17 00:00:00 2001
From: JustAGhosT <smit.jurie@gmail.com>
Date: Sun, 15 Mar 2026 15:00:41 +0200
Subject: [PATCH 6/8] fix: spacing issues in yaml

---
 infra/modules/dashboard_aca/main.tf     | 2 +-
 infra/modules/state_service_aca/main.tf | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/infra/modules/dashboard_aca/main.tf b/infra/modules/dashboard_aca/main.tf
index 65f86c8..ce740f4 100644
--- a/infra/modules/dashboard_aca/main.tf
+++ b/infra/modules/dashboard_aca/main.tf
@@ -12,7 +12,7 @@ terraform {
 locals {
   prefix           = "pvc-${var.env}-${var.projname}"
   ca_name          = "${local.prefix}-dashboard-${var.location_short}"
-  use_shared_token = trim(var.state_service_shared_token) != ""
+  use_shared_token = trimspace(var.state_service_shared_token) != ""
 
   tags = merge({
     env     = var.env
diff --git a/infra/modules/state_service_aca/main.tf b/infra/modules/state_service_aca/main.tf
index da86391..d768198 100644
--- a/infra/modules/state_service_aca/main.tf
+++ b/infra/modules/state_service_aca/main.tf
@@ -13,7 +13,7 @@ locals {
   prefix            = "pvc-${var.env}-${var.projname}"
   ca_name           = "${local.prefix}-state-${var.location_short}"
   use_registry_auth = var.registry_username != "" && var.registry_password != ""
-  use_shared_token  = trim(var.state_service_shared_token) != ""
+  use_shared_token  = trimspace(var.state_service_shared_token) != ""
 
   tags = merge({
     env     = var.env

From 9be91e54da5bf43f57dda3bddbe523ee51000501 Mon Sep 17 00:00:00 2001
From: JustAGhosT <smit.jurie@gmail.com>
Date: Sun, 15 Mar 2026 16:37:06 +0200
Subject: [PATCH 7/8] docs: add AGENTS.md with guidance for AI coding agents

Add comprehensive documentation for AI coding agents working in this repository, including:
- Project overview and tech stack
- Build/lint/test commands for each component
- Code style guidelines for Python, JavaScript, and Terraform
- Architecture overview and key files
- Prerequisites and pre-commit checks
---
 AGENTS.md | 227 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 227 insertions(+)
 create mode 100644 AGENTS.md

diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..3fd1618
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,227 @@
+# AGENTS.md - Guidance for AI Coding Agents
+
+This file provides guidance for AI coding agents operating in this repository.
+
+## Project Overview
+
+**ai-gateway** — OpenAI-compatible AI gateway built on LiteLLM, deployed to Azure Container Apps. Routes `/v1/responses` and `/v1/embeddings` to Azure OpenAI.
+
+### Tech Stack
+
+- **Gateway**: LiteLLM (Python)
+- **Dashboard**: Node.js/pnpm (in `dashboard/`)
+- **Infrastructure**: Terraform (>= 1.14.0) in `infra/`
+- **State Service**: Python/FastAPI in `state-service/`
+- **Type Checking**: mypy
+- **Scripts**: Deployment/setup scripts in `scripts/`
+
+---
+
+## Build / Lint / Test Commands
+
+### Dashboard (Node.js/pnpm)
+
+```bash
+cd dashboard
+pnpm install          # Install dependencies
+pnpm dev              # Start dev server
+pnpm format           # Format code with prettier
+pnpm format:check    # Check formatting only
+pnpm lint             # Run format check
+```
+
+### Python (State Service)
+
+```bash
+# Type checking
+mypy .                # Run mypy on entire project
+
+# Running a single Python test (if tests exist)
+python -m pytest scripts/test_specific.py::TestClass::test_method
+
+# Individual script execution
+python scripts/integration_test.py
+python scripts/check_aoai_embeddings.py
+```
+
+### Terraform (Infrastructure)
+
+```bash
+cd infra
+
+# Initialize and plan
+terraform init
+terraform plan
+
+# Format check
+terraform fmt -check -recursive
+
+# Apply
+terraform apply
+```
+
+### Combined Checks
+
+```bash
+# Run all checks (format + terraform)
+pnpm check
+```
+
+---
+
+## Code Style Guidelines
+
+### Python (state-service/)
+
+**Imports**
+
+- Use absolute imports within packages: `from .routes import router`
+- Group imports: stdlib → third-party → local
+- Use `import os`, `from typing import Optional`, etc.
+
+**Formatting**
+
+- Follow PEP 8
+- Use 4 spaces for indentation
+- Maximum line length: 100 characters
+
+**Types (mypy)**
+
+- Python version: 3.13 (see `mypy.ini`)
+- Use type hints for function parameters and return values
+- Run `mypy .` before committing
+
+**Naming**
+
+- Variables/functions: `snake_case`
+- Classes: `PascalCase`
+- Constants: `UPPER_SNAKE_CASE`
+- Private members: prefix with `_`
+
+**Error Handling**
+
+- Use custom exceptions with descriptive names
+- Catch specific exceptions, not bare `except:`
+- Include context in error messages
+
+```python
+def selection_key(user_id: str) -> str:
+    if not user_id or not user_id.strip():
+        raise ValueError("user_id must be a non-empty string")
+    # ...
+```
+
+### JavaScript (dashboard/)
+
+**Formatting**
+
+- Use Prettier for formatting (configured in `package.json`)
+- Run `pnpm format` before committing
+
+**Naming**
+
+- Variables/functions: `camelCase`
+- Constants: `UPPER_SNAKE_CASE` or `camelCase` with const
+- Classes: `PascalCase`
+
+**General JS Style**
+
+- Use `const` by default, `let` when reassignment needed
+- Prefer template literals over string concatenation
+- Use strict equality (`===`) not loose equality (`==`)
+
+```javascript
+const MAX_POINTS = 20;
+const reqHistory = { labels: [], datasets: [...] };
+```
+
+### Terraform (infra/)
+
+**Formatting**
+
+- Use `terraform fmt` to format files
+- Run `terraform fmt -check -recursive` in CI
+
+**Naming**
+
+- Resources: `snake_case`
+- Variables: `snake_case`
+- Outputs: `snake_case`
+
+**General**
+
+- Use local values for repeated expressions
+- Tag all resources with `env`, `project`
+- Pin provider versions: `version = ">= 4.62.0"`
+
+### GitHub Actions (`.github/workflows/`)
+
+**Formatting**
+
+- Use Prettier for YAML files
+- Run `pnpm format` to format workflow files
+
+**Naming**
+
+- Job names: descriptive, lowercase with hyphens
+- Step names: descriptive
+
+### Documentation (docs/)
+
+**Formatting**
+
+- Use Prettier for Markdown files
+- Run `pnpm format` to format docs
+
+**General**
+
+- Use ATX-style headers (`#`, `##`, etc.)
+- Keep lines under 100 characters when practical
+- Include code blocks with language identifiers
+
+---
+
+## Architecture Overview
+
+```
+docs/architecture/
+├── systems/          # Individual system documentation
+├── reference/        # Reference and planning docs
+│   └── strategic/   # Strategic guidance
+├── 01-*-*.md       # ADR-style documents
+
+dashboard/           # Admin UI (Node.js/pnpm)
+infra/              # Terraform IaC
+scripts/            # Deployment automation
+state-service/      # FastAPI state service
+```
+
+---
+
+## Key Files
+
+| File                                  | Purpose               |
+| ------------------------------------- | --------------------- |
+| `CLAUDE.md`                           | Claude Code guidance  |
+| `dashboard/app.js`                    | Dashboard UI          |
+| `infra/modules/aigateway_aca/main.tf` | Main infrastructure   |
+| `state-service/state_service/`        | FastAPI state service |
+| `.github/workflows/deploy.yaml`       | CI/CD pipeline        |
+
+---
+
+## Prerequisites
+
+- Azure CLI (`az login`)
+- Terraform >= 1.14.0
+- Node.js + pnpm
+- Python 3.13+
+
+---
+
+## Before Committing
+
+1. Run formatting: `pnpm format`
+2. Run type checks: `mypy .` (if Python changed)
+3. Run terraform fmt: `terraform fmt -check -recursive`
+4. Test locally if possible

From 72c004c37cf053f732173cc5c093db1291917a20 Mon Sep 17 00:00:00 2001
From: JustAGhosT <smit.jurie@gmail.com>
Date: Sun, 15 Mar 2026 17:23:25 +0200
Subject: [PATCH 8/8] feat(infra): add state service configuration to dev
 environment

---
 infra/env/dev/terraform.tfvars | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/infra/env/dev/terraform.tfvars b/infra/env/dev/terraform.tfvars
index aaec5e8..e2d08c8 100644
--- a/infra/env/dev/terraform.tfvars
+++ b/infra/env/dev/terraform.tfvars
@@ -23,3 +23,8 @@ tags = {
 }
 
 enable_redis_cache = true
+
+# State Service
+state_service_container_image   = "ghcr.io/phoenixvc/ai-gateway-state-service:latest"
+state_service_registry_username = "phoenixvc"
+state_service_registry_password = "ghp_xxx"