diff --git a/.github/workflows/deploy-environment.yaml b/.github/workflows/deploy-environment.yaml
index 1485e2e..5d39d8e 100644
--- a/.github/workflows/deploy-environment.yaml
+++ b/.github/workflows/deploy-environment.yaml
@@ -32,13 +32,37 @@ on:
         required: false
         type: string
         default: "15"
-        description: Wait sleep for model registration
+        description: Sleep seconds between model availability checks
+      smoke_models_wait_attempts:
+        required: false
+        type: string
+        default: "1"
+        description: Number of attempts to wait for models to become available
       include_aoai_host_check:
         required: false
         type: boolean
         default: false
         description: Include AOAI endpoint host validation
+      environment:
+        required: false
+        type: string
+        default: ""
+        description: GitHub environment to use
     secrets:
+      AZURE_CLIENT_ID:
+        required: true
+      AZURE_TENANT_ID:
+        required: true
+      AZURE_SUBSCRIPTION_ID:
+        required: true
+      TF_BACKEND_RG:
+        required: true
+      TF_BACKEND_SA:
+        required: true
+      TF_BACKEND_CONTAINER:
+        required: true
+      EXPECTED_AOAI_ENDPOINT_HOST:
+        required: false
       AZURE_OPENAI_ENDPOINT:
         required: true
       AZURE_OPENAI_API_KEY:
@@ -49,8 +73,25 @@ on:
         required: true
       AIGATEWAY_KEY:
         required: true
+      STATE_SERVICE_CONTAINER_IMAGE:
+        required: false
+      STATE_SERVICE_SHARED_TOKEN:
+        required: false
+      STATE_SERVICE_REGISTRY_PASSWORD:
+        required: false
+      DASHBOARD_CONTAINER_IMAGE:
+        required: false
+      GRAFANA_URL:
+        required: false
 
 env:
+  AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
+  AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
+  AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+  TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
+  TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
+  TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
+  EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
   TF_VAR_env: ${{ inputs.env_name }}
   TF_VAR_projname: "aigateway"
   TF_VAR_location: "southafricanorth"
@@ -64,10 +105,15 @@ env:
   TF_VAR_codex_api_version: ${{ inputs.codex_api_version }}
   TF_VAR_embedding_deployment: "text-embedding-3-large"
   TF_VAR_embeddings_api_version: "2024-02-01"
+  TF_VAR_state_service_container_image: ${{ secrets.STATE_SERVICE_CONTAINER_IMAGE }}
+  TF_VAR_secrets_expiration_date: "2027-03-31T00:00:00Z"
+  TF_VAR_dashboard_container_image: ${{ secrets.DASHBOARD_CONTAINER_IMAGE || 'ghcr.io/phoenixvc/ai-gateway-dashboard:latest' }}
+  TF_VAR_grafana_url: ${{ secrets.GRAFANA_URL }}
 
 jobs:
   deploy:
     runs-on: ubuntu-latest
+    environment: ${{ inputs.environment || inputs.env_name }}
     defaults:
       run:
         working-directory: ${{ inputs.terraform_working_directory }}
@@ -208,7 +254,7 @@ jobs:
           aoai_api_key: ${{ env.TF_VAR_azure_openai_api_key }}
           max_attempts: "3"
           retry_sleep: ${{ inputs.smoke_retry_sleep }}
-          models_wait_attempts: ${{ if(inputs.env_name == 'prod', '3', '1') }}
+          models_wait_attempts: ${{ inputs.smoke_models_wait_attempts }}
           models_wait_sleep: ${{ inputs.smoke_models_wait_sleep }}
 
       - name: Smoke test shared state API (dashboard proxy)
diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml
index 59ece73..7877e43 100644
--- a/.github/workflows/deploy.yaml
+++ b/.github/workflows/deploy.yaml
@@ -150,7 +150,6 @@ jobs:
     name: Deploy dev
     needs: plan
     if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'dev'
-    environment: dev
     uses: ./.github/workflows/deploy-environment.yaml
     with:
       env_name: dev
@@ -160,19 +159,32 @@ jobs:
       terraform_working_directory: infra/env/dev
       smoke_retry_sleep: "10"
       smoke_models_wait_sleep: "15"
+      smoke_models_wait_attempts: "1"
       include_aoai_host_check: false
+      environment: dev
     secrets:
+      AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
+      AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
+      AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+      TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
+      TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
+      TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
+      EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
       AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
       AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
       AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
       AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }}
+      STATE_SERVICE_CONTAINER_IMAGE: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE }}
+      STATE_SERVICE_SHARED_TOKEN: ${{ secrets.STATE_SERVICE_SHARED_TOKEN }}
+      STATE_SERVICE_REGISTRY_PASSWORD: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD }}
+      DASHBOARD_CONTAINER_IMAGE: ${{ vars.DASHBOARD_CONTAINER_IMAGE }}
+      GRAFANA_URL: ${{ secrets.GRAFANA_URL }}
 
   deploy-staging:
     name: Deploy staging
     needs: plan
     if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'main' && contains(join(github.event.pull_request.labels.*.name, ','), 'run-staging')
-    environment: staging
     uses: ./.github/workflows/deploy-environment.yaml
     with:
       env_name: staging
@@ -182,19 +194,32 @@ jobs:
       terraform_working_directory: infra/env/staging
       smoke_retry_sleep: "10"
       smoke_models_wait_sleep: "15"
+      smoke_models_wait_attempts: "1"
       include_aoai_host_check: false
+      environment: staging
     secrets:
+      AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
+      AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
+      AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+      TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
+      TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
+      TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
+      EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
       AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
       AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
       AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
       AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }}
+      STATE_SERVICE_CONTAINER_IMAGE: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE }}
+      STATE_SERVICE_SHARED_TOKEN: ${{ secrets.STATE_SERVICE_SHARED_TOKEN }}
+      STATE_SERVICE_REGISTRY_PASSWORD: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD }}
+      DASHBOARD_CONTAINER_IMAGE: ${{ vars.DASHBOARD_CONTAINER_IMAGE }}
+      GRAFANA_URL: ${{ secrets.GRAFANA_URL }}
 
   deploy-prod:
     name: Deploy prod
     needs: plan
     if: github.event_name == 'workflow_dispatch' || (github.event_name == 'push' && github.ref == 'refs/heads/main')
-    environment: prod
     uses: ./.github/workflows/deploy-environment.yaml
     with:
       env_name: prod
@@ -204,12 +229,26 @@ jobs:
       terraform_working_directory: infra/env/prod
       smoke_retry_sleep: "15"
       smoke_models_wait_sleep: "30"
+      smoke_models_wait_attempts: "3"
       include_aoai_host_check: true
+      environment: prod
     secrets:
+      AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
+      AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
+      AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+      TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }}
+      TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }}
+      TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }}
+      EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }}
       AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
       AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
       AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }}
       AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }}
       AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }}
+      STATE_SERVICE_CONTAINER_IMAGE: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE }}
+      STATE_SERVICE_SHARED_TOKEN: ${{ secrets.STATE_SERVICE_SHARED_TOKEN }}
+      STATE_SERVICE_REGISTRY_PASSWORD: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD }}
+      DASHBOARD_CONTAINER_IMAGE: ${{ vars.DASHBOARD_CONTAINER_IMAGE }}
+      GRAFANA_URL: ${{ secrets.GRAFANA_URL }}
 
   # Legacy inline deployments removed - see deploy-environment.yaml
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..3fd1618
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,227 @@
+# AGENTS.md - Guidance for AI Coding Agents
+
+This file provides guidance for AI coding agents operating in this repository.
+
+## Project Overview
+
+**ai-gateway** — OpenAI-compatible AI gateway built on LiteLLM, deployed to Azure Container Apps. Routes `/v1/responses` and `/v1/embeddings` to Azure OpenAI.
+
+### Tech Stack
+
+- **Gateway**: LiteLLM (Python)
+- **Dashboard**: Node.js/pnpm (in `dashboard/`)
+- **Infrastructure**: Terraform (>= 1.14.0) in `infra/`
+- **State Service**: Python/FastAPI in `state-service/`
+- **Type Checking**: mypy
+- **Scripts**: Deployment/setup scripts in `scripts/`
+
+---
+
+## Build / Lint / Test Commands
+
+### Dashboard (Node.js/pnpm)
+
+```bash
+cd dashboard
+pnpm install          # Install dependencies
+pnpm dev              # Start dev server
+pnpm format           # Format code with prettier
+pnpm format:check    # Check formatting only
+pnpm lint             # Run format check
+```
+
+### Python (State Service)
+
+```bash
+# Type checking
+mypy .                # Run mypy on entire project
+
+# Running a single Python test (if tests exist)
+python -m pytest scripts/test_specific.py::TestClass::test_method
+
+# Individual script execution
+python scripts/integration_test.py
+python scripts/check_aoai_embeddings.py
+```
+
+### Terraform (Infrastructure)
+
+```bash
+cd infra
+
+# Initialize and plan
+terraform init
+terraform plan
+
+# Format check
+terraform fmt -check -recursive
+
+# Apply
+terraform apply
+```
+
+### Combined Checks
+
+```bash
+# Run all checks (format + terraform)
+pnpm check
+```
+
+---
+
+## Code Style Guidelines
+
+### Python (state-service/)
+
+**Imports**
+
+- Use absolute imports within packages: `from .routes import router`
+- Group imports: stdlib → third-party → local
+- Use `import os`, `from typing import Optional`, etc.
+
+**Formatting**
+
+- Follow PEP 8
+- Use 4 spaces for indentation
+- Maximum line length: 100 characters
+
+**Types (mypy)**
+
+- Python version: 3.13 (see `mypy.ini`)
+- Use type hints for function parameters and return values
+- Run `mypy .` before committing
+
+**Naming**
+
+- Variables/functions: `snake_case`
+- Classes: `PascalCase`
+- Constants: `UPPER_SNAKE_CASE`
+- Private members: prefix with `_`
+
+**Error Handling**
+
+- Use custom exceptions with descriptive names
+- Catch specific exceptions, not bare `except:`
+- Include context in error messages
+
+```python
+def selection_key(user_id: str) -> str:
+    if not user_id or not user_id.strip():
+        raise ValueError("user_id must be a non-empty string")
+    # ...
+```
+
+### JavaScript (dashboard/)
+
+**Formatting**
+
+- Use Prettier for formatting (configured in `package.json`)
+- Run `pnpm format` before committing
+
+**Naming**
+
+- Variables/functions: `camelCase`
+- Constants: `UPPER_SNAKE_CASE` or `camelCase` with const
+- Classes: `PascalCase`
+
+**General JS Style**
+
+- Use `const` by default, `let` when reassignment needed
+- Prefer template literals over string concatenation
+- Use strict equality (`===`) not loose equality (`==`)
+
+```javascript
+const MAX_POINTS = 20;
+const reqHistory = { labels: [], datasets: [...] };
+```
+
+### Terraform (infra/)
+
+**Formatting**
+
+- Use `terraform fmt` to format files
+- Run `terraform fmt -check -recursive` in CI
+
+**Naming**
+
+- Resources: `snake_case`
+- Variables: `snake_case`
+- Outputs: `snake_case`
+
+**General**
+
+- Use local values for repeated expressions
+- Tag all resources with `env`, `project`
+- Pin provider versions: `version = ">= 4.62.0"`
+
+### GitHub Actions (`.github/workflows/`)
+
+**Formatting**
+
+- Use Prettier for YAML files
+- Run `pnpm format` to format workflow files
+
+**Naming**
+
+- Job names: descriptive, lowercase with hyphens
+- Step names: descriptive
+
+### Documentation (docs/)
+
+**Formatting**
+
+- Use Prettier for Markdown files
+- Run `pnpm format` to format docs
+
+**General**
+
+- Use ATX-style headers (`#`, `##`, etc.)
+- Keep lines under 100 characters when practical
+- Include code blocks with language identifiers
+
+---
+
+## Architecture Overview
+
+```
+docs/architecture/
+├── systems/          # Individual system documentation
+├── reference/        # Reference and planning docs
+│   └── strategic/   # Strategic guidance
+├── 01-*-*.md       # ADR-style documents
+
+dashboard/           # Admin UI (Node.js/pnpm)
+infra/              # Terraform IaC
+scripts/            # Deployment automation
+state-service/      # FastAPI state service
+```
+
+---
+
+## Key Files
+
+| File                                  | Purpose               |
+| ------------------------------------- | --------------------- |
+| `CLAUDE.md`                           | Claude Code guidance  |
+| `dashboard/app.js`                    | Dashboard UI          |
+| `infra/modules/aigateway_aca/main.tf` | Main infrastructure   |
+| `state-service/state_service/`        | FastAPI state service |
+| `.github/workflows/deploy.yaml`       | CI/CD pipeline        |
+
+---
+
+## Prerequisites
+
+- Azure CLI (`az login`)
+- Terraform >= 1.14.0
+- Node.js + pnpm
+- Python 3.13+
+
+---
+
+## Before Committing
+
+1. Run formatting: `pnpm format`
+2. Run type checks: `mypy .` (if Python changed)
+3. Run terraform fmt: `terraform fmt -check -recursive`
+4. Test locally if possible
diff --git a/docs/architecture/02-container-architecture.md b/docs/architecture/02-container-architecture.md
index 2b66950..544639c 100644
--- a/docs/architecture/02-container-architecture.md
+++ b/docs/architecture/02-container-architecture.md
@@ -27,6 +27,7 @@ flowchart TB
         G4[Budget Router]
         G5[Semantic Cache]
         G6[Escalation Judge]
+        W[Webhook Auth]
     end
 
     subgraph Mesh
@@ -57,6 +58,8 @@ flowchart TB
 
     C1 --> G1
     C2 --> G1
+    C3 --> W
+    W --> G1
     C4 --> G1
 
     G1 --> G2
diff --git a/docs/architecture/04-observability-telemetry.md b/docs/architecture/04-observability-telemetry.md
index 3afe313..8c095da 100644
--- a/docs/architecture/04-observability-telemetry.md
+++ b/docs/architecture/04-observability-telemetry.md
@@ -34,8 +34,9 @@ flowchart TB
 
     subgraph Ingest
         I1[OpenTelemetry]
-        I2[Azure Monitor]
+        I2[Application Insights]
         I3[Blob Export]
+        I4[Prometheus]
     end
 
     subgraph Analytics
@@ -62,15 +63,39 @@ flowchart TB
     S4 --> I1
     S5 --> I2
     S6 --> I3
+    S5 --> I4
 
     I1 --> A1
     I2 --> A1
     I3 --> A1
+    I4 --> V1
 
     A1 --> V1
     V1 --> V2
 ```
 
+### Telemetry Sinks
+
+LiteLLM enables Prometheus metrics via `success_callback` and `failure_callback` containing "prometheus". The Prometheus exporter exposes a `/metrics` endpoint which is scraped by Prometheus for application metrics collection. See `infra/modules/aigateway_aca/main.tf:95-113` for the container configuration.
+
+The primary telemetry sinks are:
+
+- **OpenTelemetry**: Traces and spans
+- **Application Insights**: Azure Monitor implementation using `APPLICATIONINSIGHTS_CONNECTION_STRING` env var for OTEL exporter
+- **Blob Export**: Raw event storage
+- **Prometheus**: Application metrics via `/metrics` endpoint
+
+## Retention Policies
+
+Application Insights retention defaults:
+
+- **Production**: 90 days
+- **Non-production (dev/staging)**: 30 days
+
+These are environment-specific settings configured in the Application Insights resource. Operators can adjust retention in the Azure Portal under Application Insights resource settings.
+
+Include retention expectations in operational runbooks to align cost and data availability expectations.
+
 ## Key Metrics
 
 ### Gateway
diff --git a/docs/architecture/reference/matrix-gateway.md b/docs/architecture/reference/matrix-gateway.md
index 4551887..395719c 100644
--- a/docs/architecture/reference/matrix-gateway.md
+++ b/docs/architecture/reference/matrix-gateway.md
@@ -41,16 +41,26 @@ flowchart TD
 
 ```json
 {
-  "intent": "code_review",
+  "request_id": "req_abc123",
+  "label": "code_review",
   "complexity": "medium",
   "tool_candidate": true,
-  "recommended_target": "codeflow-engine",
-  "recommended_model_tier": "small",
-  "escalation_required": false,
+  "recommended_tier": "slm",
+  "cacheable": true,
   "confidence": 0.93
 }
 ```
 
+> **Migration Note (v1.0.0)**: The response contract has been updated. Legacy field names `intent`, `recommended_target`, `recommended_model_tier`, and `escalation_required` are deprecated. Update clients to use the new fields:
+>
+> - `intent` → `label`
+> - `recommended_target` → removed (use `recommended_tier` for routing)
+> - `recommended_model_tier` → `recommended_tier`
+> - `escalation_required` → derive from `confidence < 0.75` threshold
+> - `cacheable` is a new field (previously not returned)
+>
+> **Deprecation window**: Legacy fields will be removed in v1.2.0. Clients should update by then. For backwards compatibility, implement fallback logic checking both old and new field names.
+
 ## Contract Shapes
 
 ```typescript
@@ -91,7 +101,7 @@ interface PolicyScreenOutput {
 | Condition                        | Action                 |
 | -------------------------------- | ---------------------- |
 | `policy-screen.allowed == false` | Block or redact        |
-| `confidence < 0.70`              | Escalate to LLM        |
+| `confidence < 0.75`              | Escalate to LLM        |
 | Tool suggested but no mapping    | Send to general LLM    |
 | Tagging fails                    | Mark telemetry partial |
 
diff --git a/docs/architecture/reference/matrix-rooivalk.md b/docs/architecture/reference/matrix-rooivalk.md
index 29f20da..147c7b0 100644
--- a/docs/architecture/reference/matrix-rooivalk.md
+++ b/docs/architecture/reference/matrix-rooivalk.md
@@ -24,7 +24,7 @@ flowchart TD
 
 ## CRITICAL: SLM is for Reporting Only
 
-```
+```text
 ┌─────────────────────────────────────────────────────────┐
 │                   IMPORTANT - SAFETY BOUNDARY            │
 ├─────────────────────────────────────────────────────────┤
@@ -109,7 +109,7 @@ interface SuggestSopOutput {
 ```typescript
 const DEFAULT_THRESHOLDS = {
   operator_summary: { direct_use: 0.8, facts_only: 0.65 },
-  sop_suggestion: { direct_suggest: 0.78, manual_lookup: 0.65 },
+  sop_suggestion: { direct_suggest: 0.8, manual_lookup: 0.65 },
 };
 ```
 
diff --git a/docs/architecture/reference/slm-implementation-matrix.md b/docs/architecture/reference/slm-implementation-matrix.md
index eb1fc6e..ff50192 100644
--- a/docs/architecture/reference/slm-implementation-matrix.md
+++ b/docs/architecture/reference/slm-implementation-matrix.md
@@ -15,13 +15,13 @@ This document provides a repo-by-repo implementation matrix showing SLM endpoint
 
 ## Documentation Structure
 
-```
+```text
 reference/
 ├── slm-implementation-matrix.md      # This file
 ├── matrix-gateway.md                  # AI Gateway details
 ├── matrix-cognitive-mesh.md          # Cognitive Mesh details
-├── matrix-codeflow.md                # CodeFlow Engine details
-├── matrix-agentkit.md                # AgentKit Forge details
+├── matrix-codeflow.md                 # CodeFlow Engine details
+├── matrix-agentkit.md                 # AgentKit Forge details
 ├── matrix-rooivalk.md                # PhoenixRooivalk details
 └── matrix-mystira.md                 # Mystira details
 ```
@@ -251,7 +251,7 @@ This is a practical role map, not a vendor mandate.
 
 ### Standard Fallback Pattern
 
-```
+```text
 1. SLM timeout → Deterministic rules
 2. Low confidence → LLM escalation
 3. Safety critical → Block immediately
diff --git a/docs/architecture/reference/slm-management-plan.md b/docs/architecture/reference/slm-management-plan.md
index 7c116c5..92c164d 100644
--- a/docs/architecture/reference/slm-management-plan.md
+++ b/docs/architecture/reference/slm-management-plan.md
@@ -40,7 +40,7 @@ Maintain a tiered model portfolio:
 
 Implement cost controls at each layer:
 
-```
+```text
 Cost Control Layers
 ┌─────────────────────────────────────┐
 │ 1. Budget caps per project          │
@@ -172,7 +172,7 @@ async def security_pipeline(request: Request) -> SecurityResult:
 
 ### Fallback Hierarchy
 
-```
+```text
 Request
    │
    ▼ Primary SLM
@@ -216,7 +216,7 @@ Request
 
 ### Model Lifecycle
 
-```
+```text
 Discovery → Testing → Staging → Production → Deprecated → Retired
     │           │         │          │            │
     ▼           ▼         ▼          ▼            ▼
@@ -271,4 +271,4 @@ Discovery → Testing → Staging → Production → Deprecated → Retired
 6. [ ] Define fallback hierarchies
 7. [ ] Implement observability stack
 8. [ ] Document model lifecycle process
-9. [ ] **Add explicit safety boundary for PhoenixRooivalk**
+9. [x] Add explicit safety boundary for PhoenixRooivalk
diff --git a/docs/architecture/reference/strategic/07-deployment-model.md b/docs/architecture/reference/strategic/07-deployment-model.md
index e4e6f8d..7e03967 100644
--- a/docs/architecture/reference/strategic/07-deployment-model.md
+++ b/docs/architecture/reference/strategic/07-deployment-model.md
@@ -26,14 +26,14 @@ flowchart TD
 
 ## Decision Matrix
 
-| System          | Best SLM Jobs              | Less Suitable                  |
-| --------------- | -------------------------- | ------------------------------ |
-| AI Gateway      | routing, screening, cost   | Nuanced synthesis              |
-| Cognitive Mesh  | routing, decomposition     | Final judgment                 |
-| CodeFlow        | PR triage, log analysis    | Root cause across dependencies |
-| AgentKit        | tool selection, extraction | Multi-step planning            |
-| PhoenixRooivalk | summaries, alerts          | Sole threat authority          |
-| Mystira         | safety, continuity         | Rich narrative                 |
+| System          | Best SLM Jobs                                                 | Less Suitable                  |
+| --------------- | ------------------------------------------------------------- | ------------------------------ |
+| AI Gateway      | routing, screening, cost                                      | Nuanced synthesis              |
+| Cognitive Mesh  | routing, decomposition                                        | Final judgment                 |
+| CodeFlow        | PR classification, CI failure triage, release-note extraction | Root cause across dependencies |
+| AgentKit        | tool selection, extraction                                    | Multi-step planning            |
+| PhoenixRooivalk | summaries, alerts                                             | Sole threat authority          |
+| Mystira         | safety, continuity                                            | Rich narrative                 |
 
 ## Practical Gateway Flow
 
diff --git a/docs/architecture/systems/agentkit-forge.md b/docs/architecture/systems/agentkit-forge.md
index c84a182..e61cf67 100644
--- a/docs/architecture/systems/agentkit-forge.md
+++ b/docs/architecture/systems/agentkit-forge.md
@@ -4,7 +4,7 @@ AgentKit Forge builds AI agents and orchestration workflows. SLMs help when agen
 
 ## Architecture
 
-```
+```text
 Agent Task
       │
       ▼
diff --git a/docs/architecture/systems/ai-gateway.md b/docs/architecture/systems/ai-gateway.md
index 5a288f0..751f9fc 100644
--- a/docs/architecture/systems/ai-gateway.md
+++ b/docs/architecture/systems/ai-gateway.md
@@ -4,7 +4,7 @@ AI Gateway sits between applications and multiple AI providers. The SLM acts as
 
 ## Architecture
 
-```
+```text
 Client Request
       │
       ▼
@@ -144,3 +144,31 @@ Track per routing decision:
 - [ ] Add security prefiltering (injection, PII, secrets)
 - [ ] Set up cost tracking per tier
 - [ ] Configure latency alerts
+
+## v1 API Routing
+
+The gateway routes `/v1/responses` and `/v1/embeddings` requests to Azure OpenAI via LiteLLM provider configuration.
+
+### Routing Rules
+
+| Endpoint         | Provider               | Notes                     |
+| ---------------- | ---------------------- | ------------------------- |
+| `/v1/responses`  | LiteLLM → Azure OpenAI | Standard chat completions |
+| `/v1/embeddings` | LiteLLM → Azure OpenAI | Text embedding generation |
+
+### Example LiteLLM Config
+
+```yaml
+model_list:
+  - model_name: gpt-4.1
+    litellm_params:
+      model: azure/gpt-4.1
+      api_base: https://<resource>.openai.azure.com
+      api_key: os.environ/AZURE_OPENAI_API_KEY
+      api_version: "2025-04-01-preview"
+```
+
+### Response vs Embeddings Handling
+
+- **Responses**: Model selection based on complexity/classification; supports streaming
+- **Embeddings**: Batched processing; fixed deployment mapping
diff --git a/docs/architecture/systems/codeflow-engine.md b/docs/architecture/systems/codeflow-engine.md
index 30f5dfd..c1de01e 100644
--- a/docs/architecture/systems/codeflow-engine.md
+++ b/docs/architecture/systems/codeflow-engine.md
@@ -4,7 +4,7 @@ CodeFlow Engine is a DevOps and CI/CD intelligence system. **This is one of the
 
 ## Architecture
 
-```
+```text
 Git Push / PR Event
       │
       ▼
@@ -96,7 +96,7 @@ async def select_tests(change_type: str, impacted_files: list[str]) -> TestPlan:
 Type: {change_type}
 Files: {', '.join(impacted_files)}
 
-Output: { "run_unit": bool, "run_integration": bool, "run_e2e": bool, "skip_reason": str|null }"""
+Output: {{ "run_unit": bool, "run_integration": bool, "run_e2e": bool, "skip_reason": str|null }}"""
 
     return await slm_completion(prompt)
 ```
diff --git a/docs/architecture/systems/cognitive-mesh.md b/docs/architecture/systems/cognitive-mesh.md
index d4f2c96..dedc502 100644
--- a/docs/architecture/systems/cognitive-mesh.md
+++ b/docs/architecture/systems/cognitive-mesh.md
@@ -4,7 +4,7 @@ Cognitive Mesh architectures orchestrate multiple AI agents and tools. The SLM i
 
 ## Architecture
 
-```
+```text
 User Query
       │
       ▼
@@ -143,7 +143,7 @@ async def compress_context(messages: list[Message]) -> Compressed:
 
 | Pros                            | Cons                                            |
 | ------------------------------- | ----------------------------------------------- |
-| Very large token savings        | Decomposition quality can bottleneck workflow   |
+| Large token savings             | Decomposition quality can bottleneck workflow   |
 | Better determinism              | Brittle routing if taxonomy is poor             |
 | Easier specialist orchestration | Harder debugging if confidence handling is weak |
 | Improved auditability           |                                                 |
diff --git a/docs/architecture/systems/phoenix-rooivalk.md b/docs/architecture/systems/phoenix-rooivalk.md
index dedaf96..5c0e0a7 100644
--- a/docs/architecture/systems/phoenix-rooivalk.md
+++ b/docs/architecture/systems/phoenix-rooivalk.md
@@ -4,7 +4,7 @@ PhoenixRooivalk is an edge AI counter-UAS (Unmanned Aerial System) system. **SLM
 
 ## Architecture
 
-```
+```text
 Sensors
   │
   ▼
diff --git a/docs/planning/request_to_token_attribution.md b/docs/planning/request_to_token_attribution.md
index d5790c1..b0b5cd0 100644
--- a/docs/planning/request_to_token_attribution.md
+++ b/docs/planning/request_to_token_attribution.md
@@ -122,9 +122,9 @@ Start with downstream aggregation in pvc-costops-analytics - the cheapest and fa
 
 ### 1. cognitive-mesh (Upstream Caller)
 
-**Required:** Pass correlation metadata in request body when calling gateway. There are two methods:
+**Recommended:** Pass correlation metadata in request body when calling gateway. There are two methods:
 
-**Method A: Via Request Metadata (Recommended)**
+**Method A: Via Request Metadata (Preferred)**
 Pass correlation IDs in the request body `metadata` field:
 
 ```json
@@ -142,7 +142,7 @@ Pass correlation IDs in the request body `metadata` field:
 }
 ```
 
-**Method B: Via HTTP Headers**
+**Method B: Via HTTP Headers** (alternative - requires additional LiteLLM configuration or middleware)
 
 - x-request-id
 - x-session-id
@@ -151,8 +151,6 @@ Pass correlation IDs in the request body `metadata` field:
 - x-stage-name
 - x-user-id
 
-_Note: Method B requires additional LiteLLM configuration or middleware._
-
 ### 2. pvc-costops-analytics (Downstream Analytics)
 
 **Required:** KQL queries and dashboards to:
@@ -218,7 +216,7 @@ _Note: Method B requires additional LiteLLM configuration or middleware._
 
 - cognitive-mesh: Pass correlation metadata in request body
 - pvc-costops-analytics: Must create KQL queries for new event shape
-- infra: Application Insights resource + APPLICATIONINSIGHTS_CONNECTION_STRING wiring added; trace export requires custom LiteLLM image (with azure-monitor-opentelemetry) or explicit OTEL_EXPORTER_OTLP_ENDPOINT configuration (currently empty by default)
+- infra: Application Insights resource created; APPLICATIONINSIGHTS_CONNECTION_STRING stored in Key Vault and wired to container app via secret reference; trace export requires custom LiteLLM image (with azure-monitor-opentelemetry) or explicit OTEL_EXPORTER_OTLP_ENDPOINT configuration (currently empty by default)
 
 ## Action Items
 
@@ -226,7 +224,7 @@ _Note: Method B requires additional LiteLLM configuration or middleware._
 
 1. ✅ ai-gateway: Add OTEL callback for token telemetry (Phase 1)
 2. ✅ ai-gateway: Document correlation ID requirements (Phase 2)
-3. ✅ ai-gateway: Add Application Insights connection string wiring (Phase 1b - trace export requires custom image or OTLP collector)
+3. ✅ ai-gateway: Add Application Insights connection string wiring via Key Vault (Phase 1b - trace export requires custom image or OTLP collector)
 
 ### Pending
 
diff --git a/infra/env/dev/terraform.tfvars b/infra/env/dev/terraform.tfvars
index aaec5e8..e2d08c8 100644
--- a/infra/env/dev/terraform.tfvars
+++ b/infra/env/dev/terraform.tfvars
@@ -23,3 +23,8 @@ tags = {
 }
 
 enable_redis_cache = true
+
+# State Service
+state_service_container_image   = "ghcr.io/phoenixvc/ai-gateway-state-service:latest"
+state_service_registry_username = "phoenixvc"
+state_service_registry_password = "ghp_xxx"
diff --git a/infra/modules/aigateway_aca/outputs.tf b/infra/modules/aigateway_aca/outputs.tf
index a8dfe6b..e6b8ff3 100644
--- a/infra/modules/aigateway_aca/outputs.tf
+++ b/infra/modules/aigateway_aca/outputs.tf
@@ -30,8 +30,7 @@ output "container_app_environment_id" {
   value       = azurerm_container_app_environment.cae.id
 }
 
-output "application_insights_connection_string" {
-  value       = azurerm_application_insights.ai.connection_string
-  description = "Application Insights connection string for OTEL export."
-  sensitive   = true
+output "application_insights_name" {
+  description = "Application Insights resource name. Retrieve connection string from Key Vault secret 'appinsights-connection-string'."
+  value       = azurerm_application_insights.ai.name
 }
diff --git a/infra/modules/dashboard_aca/main.tf b/infra/modules/dashboard_aca/main.tf
index 65f86c8..ce740f4 100644
--- a/infra/modules/dashboard_aca/main.tf
+++ b/infra/modules/dashboard_aca/main.tf
@@ -12,7 +12,7 @@ terraform {
 locals {
   prefix           = "pvc-${var.env}-${var.projname}"
   ca_name          = "${local.prefix}-dashboard-${var.location_short}"
-  use_shared_token = trim(var.state_service_shared_token) != ""
+  use_shared_token = trimspace(var.state_service_shared_token) != ""
 
   tags = merge({
     env     = var.env
diff --git a/infra/modules/state_service_aca/main.tf b/infra/modules/state_service_aca/main.tf
index da86391..d768198 100644
--- a/infra/modules/state_service_aca/main.tf
+++ b/infra/modules/state_service_aca/main.tf
@@ -13,7 +13,7 @@ locals {
   prefix            = "pvc-${var.env}-${var.projname}"
   ca_name           = "${local.prefix}-state-${var.location_short}"
   use_registry_auth = var.registry_username != "" && var.registry_password != ""
-  use_shared_token  = trim(var.state_service_shared_token) != ""
+  use_shared_token  = trimspace(var.state_service_shared_token) != ""
 
   tags = merge({
     env     = var.env