diff --git a/.github/workflows/deploy-environment.yaml b/.github/workflows/deploy-environment.yaml index 1485e2e..5d39d8e 100644 --- a/.github/workflows/deploy-environment.yaml +++ b/.github/workflows/deploy-environment.yaml @@ -32,13 +32,37 @@ on: required: false type: string default: "15" - description: Wait sleep for model registration + description: Sleep seconds between model availability checks + smoke_models_wait_attempts: + required: false + type: string + default: "1" + description: Number of attempts to wait for models to become available include_aoai_host_check: required: false type: boolean default: false description: Include AOAI endpoint host validation + environment: + required: false + type: string + default: "" + description: GitHub environment to use secrets: + AZURE_CLIENT_ID: + required: true + AZURE_TENANT_ID: + required: true + AZURE_SUBSCRIPTION_ID: + required: true + TF_BACKEND_RG: + required: true + TF_BACKEND_SA: + required: true + TF_BACKEND_CONTAINER: + required: true + EXPECTED_AOAI_ENDPOINT_HOST: + required: false AZURE_OPENAI_ENDPOINT: required: true AZURE_OPENAI_API_KEY: @@ -49,8 +73,25 @@ on: required: true AIGATEWAY_KEY: required: true + STATE_SERVICE_CONTAINER_IMAGE: + required: false + STATE_SERVICE_SHARED_TOKEN: + required: false + STATE_SERVICE_REGISTRY_PASSWORD: + required: false + DASHBOARD_CONTAINER_IMAGE: + required: false + GRAFANA_URL: + required: false env: + AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} + AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} + AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }} + TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }} + TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }} + EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }} TF_VAR_env: ${{ inputs.env_name }} TF_VAR_projname: "aigateway" TF_VAR_location: "southafricanorth" @@ -64,10 +105,15 @@ env: TF_VAR_codex_api_version: ${{ inputs.codex_api_version }} TF_VAR_embedding_deployment: "text-embedding-3-large" TF_VAR_embeddings_api_version: "2024-02-01" + TF_VAR_state_service_container_image: ${{ secrets.STATE_SERVICE_CONTAINER_IMAGE }} + TF_VAR_secrets_expiration_date: "2027-03-31T00:00:00Z" + TF_VAR_dashboard_container_image: ${{ secrets.DASHBOARD_CONTAINER_IMAGE || 'ghcr.io/phoenixvc/ai-gateway-dashboard:latest' }} + TF_VAR_grafana_url: ${{ secrets.GRAFANA_URL }} jobs: deploy: runs-on: ubuntu-latest + environment: ${{ inputs.environment || inputs.env_name }} defaults: run: working-directory: ${{ inputs.terraform_working_directory }} @@ -208,7 +254,7 @@ jobs: aoai_api_key: ${{ env.TF_VAR_azure_openai_api_key }} max_attempts: "3" retry_sleep: ${{ inputs.smoke_retry_sleep }} - models_wait_attempts: ${{ if(inputs.env_name == 'prod', '3', '1') }} + models_wait_attempts: ${{ inputs.smoke_models_wait_attempts }} models_wait_sleep: ${{ inputs.smoke_models_wait_sleep }} - name: Smoke test shared state API (dashboard proxy) diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 59ece73..7877e43 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -150,7 +150,6 @@ jobs: name: Deploy dev needs: plan if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'dev' - environment: dev uses: ./.github/workflows/deploy-environment.yaml with: env_name: dev @@ -160,19 +159,32 @@ jobs: terraform_working_directory: infra/env/dev smoke_retry_sleep: "10" smoke_models_wait_sleep: "15" + smoke_models_wait_attempts: "1" include_aoai_host_check: false + environment: dev secrets: + AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} + AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} + AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }} + TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }} + TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }} + EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }} AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }} AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }} AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }} AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }} + STATE_SERVICE_CONTAINER_IMAGE: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE }} + STATE_SERVICE_SHARED_TOKEN: ${{ secrets.STATE_SERVICE_SHARED_TOKEN }} + STATE_SERVICE_REGISTRY_PASSWORD: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD }} + DASHBOARD_CONTAINER_IMAGE: ${{ vars.DASHBOARD_CONTAINER_IMAGE }} + GRAFANA_URL: ${{ secrets.GRAFANA_URL }} deploy-staging: name: Deploy staging needs: plan if: github.event_name == 'pull_request' && github.event.pull_request.base.ref == 'main' && contains(join(github.event.pull_request.labels.*.name, ','), 'run-staging') - environment: staging uses: ./.github/workflows/deploy-environment.yaml with: env_name: staging @@ -182,19 +194,32 @@ jobs: terraform_working_directory: infra/env/staging smoke_retry_sleep: "10" smoke_models_wait_sleep: "15" + smoke_models_wait_attempts: "1" include_aoai_host_check: false + environment: staging secrets: + AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} + AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} + AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }} + TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }} + TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }} + EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }} AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }} AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }} AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }} AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }} + STATE_SERVICE_CONTAINER_IMAGE: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE }} + STATE_SERVICE_SHARED_TOKEN: ${{ secrets.STATE_SERVICE_SHARED_TOKEN }} + STATE_SERVICE_REGISTRY_PASSWORD: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD }} + DASHBOARD_CONTAINER_IMAGE: ${{ vars.DASHBOARD_CONTAINER_IMAGE }} + GRAFANA_URL: ${{ secrets.GRAFANA_URL }} deploy-prod: name: Deploy prod needs: plan if: github.event_name == 'workflow_dispatch' || (github.event_name == 'push' && github.ref == 'refs/heads/main') - environment: prod uses: ./.github/workflows/deploy-environment.yaml with: env_name: prod @@ -204,12 +229,26 @@ jobs: terraform_working_directory: infra/env/prod smoke_retry_sleep: "15" smoke_models_wait_sleep: "30" + smoke_models_wait_attempts: "3" include_aoai_host_check: true + environment: prod secrets: + AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} + AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} + AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + TF_BACKEND_RG: ${{ secrets.TF_BACKEND_RG }} + TF_BACKEND_SA: ${{ secrets.TF_BACKEND_SA }} + TF_BACKEND_CONTAINER: ${{ secrets.TF_BACKEND_CONTAINER }} + EXPECTED_AOAI_ENDPOINT_HOST: ${{ secrets.EXPECTED_AOAI_ENDPOINT_HOST }} AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }} AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }} AZURE_OPENAI_EMBEDDING_ENDPOINT: ${{ secrets.AZURE_OPENAI_EMBEDDING_ENDPOINT }} AZURE_OPENAI_EMBEDDING_API_KEY: ${{ secrets.AZURE_OPENAI_EMBEDDING_API_KEY }} AIGATEWAY_KEY: ${{ secrets.AIGATEWAY_KEY }} + STATE_SERVICE_CONTAINER_IMAGE: ${{ vars.STATE_SERVICE_CONTAINER_IMAGE }} + STATE_SERVICE_SHARED_TOKEN: ${{ secrets.STATE_SERVICE_SHARED_TOKEN }} + STATE_SERVICE_REGISTRY_PASSWORD: ${{ secrets.STATE_SERVICE_REGISTRY_PASSWORD }} + DASHBOARD_CONTAINER_IMAGE: ${{ vars.DASHBOARD_CONTAINER_IMAGE }} + GRAFANA_URL: ${{ secrets.GRAFANA_URL }} # Legacy inline deployments removed - see deploy-environment.yaml diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..3fd1618 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,227 @@ +# AGENTS.md - Guidance for AI Coding Agents + +This file provides guidance for AI coding agents operating in this repository. + +## Project Overview + +**ai-gateway** — OpenAI-compatible AI gateway built on LiteLLM, deployed to Azure Container Apps. Routes `/v1/responses` and `/v1/embeddings` to Azure OpenAI. + +### Tech Stack + +- **Gateway**: LiteLLM (Python) +- **Dashboard**: Node.js/pnpm (in `dashboard/`) +- **Infrastructure**: Terraform (>= 1.14.0) in `infra/` +- **State Service**: Python/FastAPI in `state-service/` +- **Type Checking**: mypy +- **Scripts**: Deployment/setup scripts in `scripts/` + +--- + +## Build / Lint / Test Commands + +### Dashboard (Node.js/pnpm) + +```bash +cd dashboard +pnpm install # Install dependencies +pnpm dev # Start dev server +pnpm format # Format code with prettier +pnpm format:check # Check formatting only +pnpm lint # Run format check +``` + +### Python (State Service) + +```bash +# Type checking +mypy . # Run mypy on entire project + +# Running a single Python test (if tests exist) +python -m pytest scripts/test_specific.py::TestClass::test_method + +# Individual script execution +python scripts/integration_test.py +python scripts/check_aoai_embeddings.py +``` + +### Terraform (Infrastructure) + +```bash +cd infra + +# Initialize and plan +terraform init +terraform plan + +# Format check +terraform fmt -check -recursive + +# Apply +terraform apply +``` + +### Combined Checks + +```bash +# Run all checks (format + terraform) +pnpm check +``` + +--- + +## Code Style Guidelines + +### Python (state-service/) + +**Imports** + +- Use absolute imports within packages: `from .routes import router` +- Group imports: stdlib → third-party → local +- Use `import os`, `from typing import Optional`, etc. + +**Formatting** + +- Follow PEP 8 +- Use 4 spaces for indentation +- Maximum line length: 100 characters + +**Types (mypy)** + +- Python version: 3.13 (see `mypy.ini`) +- Use type hints for function parameters and return values +- Run `mypy .` before committing + +**Naming** + +- Variables/functions: `snake_case` +- Classes: `PascalCase` +- Constants: `UPPER_SNAKE_CASE` +- Private members: prefix with `_` + +**Error Handling** + +- Use custom exceptions with descriptive names +- Catch specific exceptions, not bare `except:` +- Include context in error messages + +```python +def selection_key(user_id: str) -> str: + if not user_id or not user_id.strip(): + raise ValueError("user_id must be a non-empty string") + # ... +``` + +### JavaScript (dashboard/) + +**Formatting** + +- Use Prettier for formatting (configured in `package.json`) +- Run `pnpm format` before committing + +**Naming** + +- Variables/functions: `camelCase` +- Constants: `UPPER_SNAKE_CASE` or `camelCase` with const +- Classes: `PascalCase` + +**General JS Style** + +- Use `const` by default, `let` when reassignment needed +- Prefer template literals over string concatenation +- Use strict equality (`===`) not loose equality (`==`) + +```javascript +const MAX_POINTS = 20; +const reqHistory = { labels: [], datasets: [...] }; +``` + +### Terraform (infra/) + +**Formatting** + +- Use `terraform fmt` to format files +- Run `terraform fmt -check -recursive` in CI + +**Naming** + +- Resources: `snake_case` +- Variables: `snake_case` +- Outputs: `snake_case` + +**General** + +- Use local values for repeated expressions +- Tag all resources with `env`, `project` +- Pin provider versions: `version = ">= 4.62.0"` + +### GitHub Actions (`.github/workflows/`) + +**Formatting** + +- Use Prettier for YAML files +- Run `pnpm format` to format workflow files + +**Naming** + +- Job names: descriptive, lowercase with hyphens +- Step names: descriptive + +### Documentation (docs/) + +**Formatting** + +- Use Prettier for Markdown files +- Run `pnpm format` to format docs + +**General** + +- Use ATX-style headers (`#`, `##`, etc.) +- Keep lines under 100 characters when practical +- Include code blocks with language identifiers + +--- + +## Architecture Overview + +``` +docs/architecture/ +├── systems/ # Individual system documentation +├── reference/ # Reference and planning docs +│ └── strategic/ # Strategic guidance +├── 01-*-*.md # ADR-style documents + +dashboard/ # Admin UI (Node.js/pnpm) +infra/ # Terraform IaC +scripts/ # Deployment automation +state-service/ # FastAPI state service +``` + +--- + +## Key Files + +| File | Purpose | +| ------------------------------------- | --------------------- | +| `CLAUDE.md` | Claude Code guidance | +| `dashboard/app.js` | Dashboard UI | +| `infra/modules/aigateway_aca/main.tf` | Main infrastructure | +| `state-service/state_service/` | FastAPI state service | +| `.github/workflows/deploy.yaml` | CI/CD pipeline | + +--- + +## Prerequisites + +- Azure CLI (`az login`) +- Terraform >= 1.14.0 +- Node.js + pnpm +- Python 3.13+ + +--- + +## Before Committing + +1. Run formatting: `pnpm format` +2. Run type checks: `mypy .` (if Python changed) +3. Run terraform fmt: `terraform fmt -check -recursive` +4. Test locally if possible diff --git a/docs/architecture/02-container-architecture.md b/docs/architecture/02-container-architecture.md index 2b66950..544639c 100644 --- a/docs/architecture/02-container-architecture.md +++ b/docs/architecture/02-container-architecture.md @@ -27,6 +27,7 @@ flowchart TB G4[Budget Router] G5[Semantic Cache] G6[Escalation Judge] + W[Webhook Auth] end subgraph Mesh @@ -57,6 +58,8 @@ flowchart TB C1 --> G1 C2 --> G1 + C3 --> W + W --> G1 C4 --> G1 G1 --> G2 diff --git a/docs/architecture/04-observability-telemetry.md b/docs/architecture/04-observability-telemetry.md index 3afe313..8c095da 100644 --- a/docs/architecture/04-observability-telemetry.md +++ b/docs/architecture/04-observability-telemetry.md @@ -34,8 +34,9 @@ flowchart TB subgraph Ingest I1[OpenTelemetry] - I2[Azure Monitor] + I2[Application Insights] I3[Blob Export] + I4[Prometheus] end subgraph Analytics @@ -62,15 +63,39 @@ flowchart TB S4 --> I1 S5 --> I2 S6 --> I3 + S5 --> I4 I1 --> A1 I2 --> A1 I3 --> A1 + I4 --> V1 A1 --> V1 V1 --> V2 ``` +### Telemetry Sinks + +LiteLLM enables Prometheus metrics via `success_callback` and `failure_callback` containing "prometheus". The Prometheus exporter exposes a `/metrics` endpoint which is scraped by Prometheus for application metrics collection. See `infra/modules/aigateway_aca/main.tf:95-113` for the container configuration. + +The primary telemetry sinks are: + +- **OpenTelemetry**: Traces and spans +- **Application Insights**: Azure Monitor implementation using `APPLICATIONINSIGHTS_CONNECTION_STRING` env var for OTEL exporter +- **Blob Export**: Raw event storage +- **Prometheus**: Application metrics via `/metrics` endpoint + +## Retention Policies + +Application Insights retention defaults: + +- **Production**: 90 days +- **Non-production (dev/staging)**: 30 days + +These are environment-specific settings configured in the Application Insights resource. Operators can adjust retention in the Azure Portal under Application Insights resource settings. + +Include retention expectations in operational runbooks to align cost and data availability expectations. + ## Key Metrics ### Gateway diff --git a/docs/architecture/reference/matrix-gateway.md b/docs/architecture/reference/matrix-gateway.md index 4551887..395719c 100644 --- a/docs/architecture/reference/matrix-gateway.md +++ b/docs/architecture/reference/matrix-gateway.md @@ -41,16 +41,26 @@ flowchart TD ```json { - "intent": "code_review", + "request_id": "req_abc123", + "label": "code_review", "complexity": "medium", "tool_candidate": true, - "recommended_target": "codeflow-engine", - "recommended_model_tier": "small", - "escalation_required": false, + "recommended_tier": "slm", + "cacheable": true, "confidence": 0.93 } ``` +> **Migration Note (v1.0.0)**: The response contract has been updated. Legacy field names `intent`, `recommended_target`, `recommended_model_tier`, and `escalation_required` are deprecated. Update clients to use the new fields: +> +> - `intent` → `label` +> - `recommended_target` → removed (use `recommended_tier` for routing) +> - `recommended_model_tier` → `recommended_tier` +> - `escalation_required` → derive from `confidence < 0.75` threshold +> - `cacheable` is a new field (previously not returned) +> +> **Deprecation window**: Legacy fields will be removed in v1.2.0. Clients should update by then. For backwards compatibility, implement fallback logic checking both old and new field names. + ## Contract Shapes ```typescript @@ -91,7 +101,7 @@ interface PolicyScreenOutput { | Condition | Action | | -------------------------------- | ---------------------- | | `policy-screen.allowed == false` | Block or redact | -| `confidence < 0.70` | Escalate to LLM | +| `confidence < 0.75` | Escalate to LLM | | Tool suggested but no mapping | Send to general LLM | | Tagging fails | Mark telemetry partial | diff --git a/docs/architecture/reference/matrix-rooivalk.md b/docs/architecture/reference/matrix-rooivalk.md index 29f20da..147c7b0 100644 --- a/docs/architecture/reference/matrix-rooivalk.md +++ b/docs/architecture/reference/matrix-rooivalk.md @@ -24,7 +24,7 @@ flowchart TD ## CRITICAL: SLM is for Reporting Only -``` +```text ┌─────────────────────────────────────────────────────────┐ │ IMPORTANT - SAFETY BOUNDARY │ ├─────────────────────────────────────────────────────────┤ @@ -109,7 +109,7 @@ interface SuggestSopOutput { ```typescript const DEFAULT_THRESHOLDS = { operator_summary: { direct_use: 0.8, facts_only: 0.65 }, - sop_suggestion: { direct_suggest: 0.78, manual_lookup: 0.65 }, + sop_suggestion: { direct_suggest: 0.8, manual_lookup: 0.65 }, }; ``` diff --git a/docs/architecture/reference/slm-implementation-matrix.md b/docs/architecture/reference/slm-implementation-matrix.md index eb1fc6e..ff50192 100644 --- a/docs/architecture/reference/slm-implementation-matrix.md +++ b/docs/architecture/reference/slm-implementation-matrix.md @@ -15,13 +15,13 @@ This document provides a repo-by-repo implementation matrix showing SLM endpoint ## Documentation Structure -``` +```text reference/ ├── slm-implementation-matrix.md # This file ├── matrix-gateway.md # AI Gateway details ├── matrix-cognitive-mesh.md # Cognitive Mesh details -├── matrix-codeflow.md # CodeFlow Engine details -├── matrix-agentkit.md # AgentKit Forge details +├── matrix-codeflow.md # CodeFlow Engine details +├── matrix-agentkit.md # AgentKit Forge details ├── matrix-rooivalk.md # PhoenixRooivalk details └── matrix-mystira.md # Mystira details ``` @@ -251,7 +251,7 @@ This is a practical role map, not a vendor mandate. ### Standard Fallback Pattern -``` +```text 1. SLM timeout → Deterministic rules 2. Low confidence → LLM escalation 3. Safety critical → Block immediately diff --git a/docs/architecture/reference/slm-management-plan.md b/docs/architecture/reference/slm-management-plan.md index 7c116c5..92c164d 100644 --- a/docs/architecture/reference/slm-management-plan.md +++ b/docs/architecture/reference/slm-management-plan.md @@ -40,7 +40,7 @@ Maintain a tiered model portfolio: Implement cost controls at each layer: -``` +```text Cost Control Layers ┌─────────────────────────────────────┐ │ 1. Budget caps per project │ @@ -172,7 +172,7 @@ async def security_pipeline(request: Request) -> SecurityResult: ### Fallback Hierarchy -``` +```text Request │ ▼ Primary SLM @@ -216,7 +216,7 @@ Request ### Model Lifecycle -``` +```text Discovery → Testing → Staging → Production → Deprecated → Retired │ │ │ │ │ ▼ ▼ ▼ ▼ ▼ @@ -271,4 +271,4 @@ Discovery → Testing → Staging → Production → Deprecated → Retired 6. [ ] Define fallback hierarchies 7. [ ] Implement observability stack 8. [ ] Document model lifecycle process -9. [ ] **Add explicit safety boundary for PhoenixRooivalk** +9. [x] Add explicit safety boundary for PhoenixRooivalk diff --git a/docs/architecture/reference/strategic/07-deployment-model.md b/docs/architecture/reference/strategic/07-deployment-model.md index e4e6f8d..7e03967 100644 --- a/docs/architecture/reference/strategic/07-deployment-model.md +++ b/docs/architecture/reference/strategic/07-deployment-model.md @@ -26,14 +26,14 @@ flowchart TD ## Decision Matrix -| System | Best SLM Jobs | Less Suitable | -| --------------- | -------------------------- | ------------------------------ | -| AI Gateway | routing, screening, cost | Nuanced synthesis | -| Cognitive Mesh | routing, decomposition | Final judgment | -| CodeFlow | PR triage, log analysis | Root cause across dependencies | -| AgentKit | tool selection, extraction | Multi-step planning | -| PhoenixRooivalk | summaries, alerts | Sole threat authority | -| Mystira | safety, continuity | Rich narrative | +| System | Best SLM Jobs | Less Suitable | +| --------------- | ------------------------------------------------------------- | ------------------------------ | +| AI Gateway | routing, screening, cost | Nuanced synthesis | +| Cognitive Mesh | routing, decomposition | Final judgment | +| CodeFlow | PR classification, CI failure triage, release-note extraction | Root cause across dependencies | +| AgentKit | tool selection, extraction | Multi-step planning | +| PhoenixRooivalk | summaries, alerts | Sole threat authority | +| Mystira | safety, continuity | Rich narrative | ## Practical Gateway Flow diff --git a/docs/architecture/systems/agentkit-forge.md b/docs/architecture/systems/agentkit-forge.md index c84a182..e61cf67 100644 --- a/docs/architecture/systems/agentkit-forge.md +++ b/docs/architecture/systems/agentkit-forge.md @@ -4,7 +4,7 @@ AgentKit Forge builds AI agents and orchestration workflows. SLMs help when agen ## Architecture -``` +```text Agent Task │ ▼ diff --git a/docs/architecture/systems/ai-gateway.md b/docs/architecture/systems/ai-gateway.md index 5a288f0..751f9fc 100644 --- a/docs/architecture/systems/ai-gateway.md +++ b/docs/architecture/systems/ai-gateway.md @@ -4,7 +4,7 @@ AI Gateway sits between applications and multiple AI providers. The SLM acts as ## Architecture -``` +```text Client Request │ ▼ @@ -144,3 +144,31 @@ Track per routing decision: - [ ] Add security prefiltering (injection, PII, secrets) - [ ] Set up cost tracking per tier - [ ] Configure latency alerts + +## v1 API Routing + +The gateway routes `/v1/responses` and `/v1/embeddings` requests to Azure OpenAI via LiteLLM provider configuration. + +### Routing Rules + +| Endpoint | Provider | Notes | +| ---------------- | ---------------------- | ------------------------- | +| `/v1/responses` | LiteLLM → Azure OpenAI | Standard chat completions | +| `/v1/embeddings` | LiteLLM → Azure OpenAI | Text embedding generation | + +### Example LiteLLM Config + +```yaml +model_list: + - model_name: gpt-4.1 + litellm_params: + model: azure/gpt-4.1 + api_base: https://.openai.azure.com + api_key: os.environ/AZURE_OPENAI_API_KEY + api_version: "2025-04-01-preview" +``` + +### Response vs Embeddings Handling + +- **Responses**: Model selection based on complexity/classification; supports streaming +- **Embeddings**: Batched processing; fixed deployment mapping diff --git a/docs/architecture/systems/codeflow-engine.md b/docs/architecture/systems/codeflow-engine.md index 30f5dfd..c1de01e 100644 --- a/docs/architecture/systems/codeflow-engine.md +++ b/docs/architecture/systems/codeflow-engine.md @@ -4,7 +4,7 @@ CodeFlow Engine is a DevOps and CI/CD intelligence system. **This is one of the ## Architecture -``` +```text Git Push / PR Event │ ▼ @@ -96,7 +96,7 @@ async def select_tests(change_type: str, impacted_files: list[str]) -> TestPlan: Type: {change_type} Files: {', '.join(impacted_files)} -Output: { "run_unit": bool, "run_integration": bool, "run_e2e": bool, "skip_reason": str|null }""" +Output: {{ "run_unit": bool, "run_integration": bool, "run_e2e": bool, "skip_reason": str|null }}""" return await slm_completion(prompt) ``` diff --git a/docs/architecture/systems/cognitive-mesh.md b/docs/architecture/systems/cognitive-mesh.md index d4f2c96..dedc502 100644 --- a/docs/architecture/systems/cognitive-mesh.md +++ b/docs/architecture/systems/cognitive-mesh.md @@ -4,7 +4,7 @@ Cognitive Mesh architectures orchestrate multiple AI agents and tools. The SLM i ## Architecture -``` +```text User Query │ ▼ @@ -143,7 +143,7 @@ async def compress_context(messages: list[Message]) -> Compressed: | Pros | Cons | | ------------------------------- | ----------------------------------------------- | -| Very large token savings | Decomposition quality can bottleneck workflow | +| Large token savings | Decomposition quality can bottleneck workflow | | Better determinism | Brittle routing if taxonomy is poor | | Easier specialist orchestration | Harder debugging if confidence handling is weak | | Improved auditability | | diff --git a/docs/architecture/systems/phoenix-rooivalk.md b/docs/architecture/systems/phoenix-rooivalk.md index dedaf96..5c0e0a7 100644 --- a/docs/architecture/systems/phoenix-rooivalk.md +++ b/docs/architecture/systems/phoenix-rooivalk.md @@ -4,7 +4,7 @@ PhoenixRooivalk is an edge AI counter-UAS (Unmanned Aerial System) system. **SLM ## Architecture -``` +```text Sensors │ ▼ diff --git a/docs/planning/request_to_token_attribution.md b/docs/planning/request_to_token_attribution.md index d5790c1..b0b5cd0 100644 --- a/docs/planning/request_to_token_attribution.md +++ b/docs/planning/request_to_token_attribution.md @@ -122,9 +122,9 @@ Start with downstream aggregation in pvc-costops-analytics - the cheapest and fa ### 1. cognitive-mesh (Upstream Caller) -**Required:** Pass correlation metadata in request body when calling gateway. There are two methods: +**Recommended:** Pass correlation metadata in request body when calling gateway. There are two methods: -**Method A: Via Request Metadata (Recommended)** +**Method A: Via Request Metadata (Preferred)** Pass correlation IDs in the request body `metadata` field: ```json @@ -142,7 +142,7 @@ Pass correlation IDs in the request body `metadata` field: } ``` -**Method B: Via HTTP Headers** +**Method B: Via HTTP Headers** (alternative - requires additional LiteLLM configuration or middleware) - x-request-id - x-session-id @@ -151,8 +151,6 @@ Pass correlation IDs in the request body `metadata` field: - x-stage-name - x-user-id -_Note: Method B requires additional LiteLLM configuration or middleware._ - ### 2. pvc-costops-analytics (Downstream Analytics) **Required:** KQL queries and dashboards to: @@ -218,7 +216,7 @@ _Note: Method B requires additional LiteLLM configuration or middleware._ - cognitive-mesh: Pass correlation metadata in request body - pvc-costops-analytics: Must create KQL queries for new event shape -- infra: Application Insights resource + APPLICATIONINSIGHTS_CONNECTION_STRING wiring added; trace export requires custom LiteLLM image (with azure-monitor-opentelemetry) or explicit OTEL_EXPORTER_OTLP_ENDPOINT configuration (currently empty by default) +- infra: Application Insights resource created; APPLICATIONINSIGHTS_CONNECTION_STRING stored in Key Vault and wired to container app via secret reference; trace export requires custom LiteLLM image (with azure-monitor-opentelemetry) or explicit OTEL_EXPORTER_OTLP_ENDPOINT configuration (currently empty by default) ## Action Items @@ -226,7 +224,7 @@ _Note: Method B requires additional LiteLLM configuration or middleware._ 1. ✅ ai-gateway: Add OTEL callback for token telemetry (Phase 1) 2. ✅ ai-gateway: Document correlation ID requirements (Phase 2) -3. ✅ ai-gateway: Add Application Insights connection string wiring (Phase 1b - trace export requires custom image or OTLP collector) +3. ✅ ai-gateway: Add Application Insights connection string wiring via Key Vault (Phase 1b - trace export requires custom image or OTLP collector) ### Pending diff --git a/infra/env/dev/terraform.tfvars b/infra/env/dev/terraform.tfvars index aaec5e8..e2d08c8 100644 --- a/infra/env/dev/terraform.tfvars +++ b/infra/env/dev/terraform.tfvars @@ -23,3 +23,8 @@ tags = { } enable_redis_cache = true + +# State Service +state_service_container_image = "ghcr.io/phoenixvc/ai-gateway-state-service:latest" +state_service_registry_username = "phoenixvc" +state_service_registry_password = "ghp_xxx" diff --git a/infra/modules/aigateway_aca/outputs.tf b/infra/modules/aigateway_aca/outputs.tf index a8dfe6b..e6b8ff3 100644 --- a/infra/modules/aigateway_aca/outputs.tf +++ b/infra/modules/aigateway_aca/outputs.tf @@ -30,8 +30,7 @@ output "container_app_environment_id" { value = azurerm_container_app_environment.cae.id } -output "application_insights_connection_string" { - value = azurerm_application_insights.ai.connection_string - description = "Application Insights connection string for OTEL export." - sensitive = true +output "application_insights_name" { + description = "Application Insights resource name. Retrieve connection string from Key Vault secret 'appinsights-connection-string'." + value = azurerm_application_insights.ai.name } diff --git a/infra/modules/dashboard_aca/main.tf b/infra/modules/dashboard_aca/main.tf index 65f86c8..ce740f4 100644 --- a/infra/modules/dashboard_aca/main.tf +++ b/infra/modules/dashboard_aca/main.tf @@ -12,7 +12,7 @@ terraform { locals { prefix = "pvc-${var.env}-${var.projname}" ca_name = "${local.prefix}-dashboard-${var.location_short}" - use_shared_token = trim(var.state_service_shared_token) != "" + use_shared_token = trimspace(var.state_service_shared_token) != "" tags = merge({ env = var.env diff --git a/infra/modules/state_service_aca/main.tf b/infra/modules/state_service_aca/main.tf index da86391..d768198 100644 --- a/infra/modules/state_service_aca/main.tf +++ b/infra/modules/state_service_aca/main.tf @@ -13,7 +13,7 @@ locals { prefix = "pvc-${var.env}-${var.projname}" ca_name = "${local.prefix}-state-${var.location_short}" use_registry_auth = var.registry_username != "" && var.registry_password != "" - use_shared_token = trim(var.state_service_shared_token) != "" + use_shared_token = trimspace(var.state_service_shared_token) != "" tags = merge({ env = var.env