diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index e2e26d5..25e9d42 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -139,6 +139,13 @@ jobs: printf '%s' "${{ secrets.KUBECONFIG }}" > ~/.kube/config chmod 600 ~/.kube/config + - name: Verify cluster auth + run: | + kubectl get --raw /apis >/dev/null || { + echo "::error::KUBECONFIG appears invalid/expired (system:unauthenticated) — rotate the secret." + exit 1 + } + - name: Create/refresh genai-env secret env: GENAI_ENV_CONTENT: ${{ secrets.GENAI_ENV_CONTENT }} @@ -146,7 +153,7 @@ jobs: printf '%s\n' "$GENAI_ENV_CONTENT" > /tmp/genai.env kubectl -n "$NAMESPACE" create secret generic genai-env \ --from-env-file=/tmp/genai.env \ - --dry-run=client -o yaml | kubectl apply -f - + --dry-run=client -o yaml | kubectl apply --validate=false -f - - name: Unlock stuck Helm release (if any) run: | diff --git a/README.md b/README.md index bd858db..d6b4875 100644 --- a/README.md +++ b/README.md @@ -150,11 +150,11 @@ cd infra && docker compose down -v # wipes the postgres volume too ## Production Deployment -The stack runs on a single Azure VM in **UAE North**, fronted by Traefik with a +The stack runs on a single Azure VM in **Poland Central**, fronted by Traefik with a real TLS certificate from Let's Encrypt (production CA). Everything is automated; no manual VM access is required for normal deploys. -**Live URL:** +**Live URL:** ### Infrastructure stack @@ -163,7 +163,7 @@ automated; no manual VM access is required for normal deploys. | Provisioning | **Terraform** (AzureRM ~> 4.0) | Resource group, VNet, NSG (22/80/443), static public IP + free Azure FQDN, Ubuntu 24.04 VM | | Configuration | **Ansible** | Installs Docker, clones repo, writes `.env`, runs `docker compose up` | | CI/CD | **GitHub Actions** (OIDC, no client secrets) | `infra.yml` (manual: plan/apply/destroy) and `cd.yml` (auto on push to `main`) | -| Remote state | **Azure Blob Storage** (`stteamdevoopstfstate/tfstate`) | Shared, locked Terraform state — survives between CI runs | +| Remote state | **Azure Blob Storage** (`stdevoops26tfstate/tfstate`) | Shared, locked Terraform state — survives between CI runs | | TLS | **Let's Encrypt** (HTTP-01 via Traefik) | Cert persisted in a Docker volume; auto-renewed | ### GitHub Actions workflows @@ -186,7 +186,7 @@ automated; no manual VM access is required for normal deploys. The OIDC service principal needs `Contributor` on the subscription (to manage resources in `rg-team-devoops`) and `Storage Blob Data Contributor` on the -state account `stteamdevoopstfstate` (to read/write tfstate). +state account `stdevoops26tfstate` (to read/write tfstate). ### Typical workflow @@ -280,7 +280,7 @@ The web client redirects to Keycloak automatically (`login-required` strategy). ### Production admin console -Keycloak is publicly accessible via Traefik at . Admin console: `/auth/admin`. +Keycloak is publicly accessible via Traefik at . Admin console: `/auth/admin`. ### Spring services — JWT validation diff --git a/infra/docker-compose.yml b/infra/docker-compose.yml index 178c724..9b62758 100644 --- a/infra/docker-compose.yml +++ b/infra/docker-compose.yml @@ -11,7 +11,7 @@ services: app-database: condition: service_healthy environment: - - KEYCLOAK_ISSUER_URL=https://team-devoops.uaenorth.cloudapp.azure.com/auth/realms/devops + - KEYCLOAK_ISSUER_URL=https://team-devoops.polandcentral.cloudapp.azure.com/auth/realms/devops - KEYCLOAK_JWKS_URL=http://keycloak:8080/auth/realms/devops/protocol/openid-connect/certs - SPRING_DATASOURCE_URL=jdbc:postgresql://app-database:5432/app_db - SPRING_DATASOURCE_USERNAME=reports_user @@ -21,7 +21,7 @@ services: labels: - "traefik.enable=true" - "traefik.http.routers.py-genai-helper.entrypoints=websecure" - - "traefik.http.routers.py-genai-helper.rule=Host(`team-devoops.uaenorth.cloudapp.azure.com`) && PathPrefix(`/api/v1/helper`)" + - "traefik.http.routers.py-genai-helper.rule=Host(`team-devoops.polandcentral.cloudapp.azure.com`) && PathPrefix(`/api/v1/helper`)" - "traefik.http.routers.py-genai-helper.tls=true" - "traefik.http.routers.py-genai-helper.tls.certresolver=le" - "traefik.http.middlewares.helper-stripprefix.stripprefix.prefixes=/api/v1/helper" @@ -44,11 +44,11 @@ services: - SPRING_DATASOURCE_URL=jdbc:postgresql://app-database:5432/app_db - SPRING_DATASOURCE_USERNAME=organization_user - SPRING_DATASOURCE_PASSWORD=organization_password - - SPRING_SECURITY_OAUTH2_RESOURCESERVER_JWT_ISSUER_URI=https://team-devoops.uaenorth.cloudapp.azure.com/auth/realms/devops + - SPRING_SECURITY_OAUTH2_RESOURCESERVER_JWT_ISSUER_URI=https://team-devoops.polandcentral.cloudapp.azure.com/auth/realms/devops labels: - "traefik.enable=true" - "traefik.http.routers.organization-service.entrypoints=websecure" - - "traefik.http.routers.organization-service.rule=Host(`team-devoops.uaenorth.cloudapp.azure.com`) && PathPrefix(`/api/v1/organization`)" + - "traefik.http.routers.organization-service.rule=Host(`team-devoops.polandcentral.cloudapp.azure.com`) && PathPrefix(`/api/v1/organization`)" - "traefik.http.routers.organization-service.tls=true" - "traefik.http.routers.organization-service.tls.certresolver=le" - "traefik.http.middlewares.organization-stripprefix.stripprefix.prefixes=/api/v1" @@ -71,11 +71,11 @@ services: - SPRING_DATASOURCE_URL=jdbc:postgresql://app-database:5432/app_db - SPRING_DATASOURCE_USERNAME=member_user - SPRING_DATASOURCE_PASSWORD=member_password - - SPRING_SECURITY_OAUTH2_RESOURCESERVER_JWT_ISSUER_URI=https://team-devoops.uaenorth.cloudapp.azure.com/auth/realms/devops + - SPRING_SECURITY_OAUTH2_RESOURCESERVER_JWT_ISSUER_URI=https://team-devoops.polandcentral.cloudapp.azure.com/auth/realms/devops labels: - "traefik.enable=true" - "traefik.http.routers.member-service.entrypoints=websecure" - - "traefik.http.routers.member-service.rule=Host(`team-devoops.uaenorth.cloudapp.azure.com`) && PathPrefix(`/api/v1/members`)" + - "traefik.http.routers.member-service.rule=Host(`team-devoops.polandcentral.cloudapp.azure.com`) && PathPrefix(`/api/v1/members`)" - "traefik.http.routers.member-service.tls=true" - "traefik.http.routers.member-service.tls.certresolver=le" - "traefik.http.middlewares.member-stripprefix.stripprefix.prefixes=/api/v1" @@ -98,11 +98,11 @@ services: - SPRING_DATASOURCE_URL=jdbc:postgresql://app-database:5432/app_db - SPRING_DATASOURCE_USERNAME=event_user - SPRING_DATASOURCE_PASSWORD=event_password - - SPRING_SECURITY_OAUTH2_RESOURCESERVER_JWT_ISSUER_URI=https://team-devoops.uaenorth.cloudapp.azure.com/auth/realms/devops + - SPRING_SECURITY_OAUTH2_RESOURCESERVER_JWT_ISSUER_URI=https://team-devoops.polandcentral.cloudapp.azure.com/auth/realms/devops labels: - "traefik.enable=true" - "traefik.http.routers.event-service.entrypoints=websecure" - - "traefik.http.routers.event-service.rule=Host(`team-devoops.uaenorth.cloudapp.azure.com`) && PathPrefix(`/api/v1/events`)" + - "traefik.http.routers.event-service.rule=Host(`team-devoops.polandcentral.cloudapp.azure.com`) && PathPrefix(`/api/v1/events`)" - "traefik.http.routers.event-service.tls=true" - "traefik.http.routers.event-service.tls.certresolver=le" - "traefik.http.middlewares.event-stripprefix.stripprefix.prefixes=/api/v1" @@ -125,11 +125,11 @@ services: - SPRING_DATASOURCE_URL=jdbc:postgresql://app-database:5432/app_db - SPRING_DATASOURCE_USERNAME=feedback_user - SPRING_DATASOURCE_PASSWORD=feedback_password - - SPRING_SECURITY_OAUTH2_RESOURCESERVER_JWT_ISSUER_URI=https://team-devoops.uaenorth.cloudapp.azure.com/auth/realms/devops + - SPRING_SECURITY_OAUTH2_RESOURCESERVER_JWT_ISSUER_URI=https://team-devoops.polandcentral.cloudapp.azure.com/auth/realms/devops labels: - "traefik.enable=true" - "traefik.http.routers.feedback-service.entrypoints=websecure" - - "traefik.http.routers.feedback-service.rule=Host(`team-devoops.uaenorth.cloudapp.azure.com`) && PathPrefix(`/api/v1/feedback`)" + - "traefik.http.routers.feedback-service.rule=Host(`team-devoops.polandcentral.cloudapp.azure.com`) && PathPrefix(`/api/v1/feedback`)" - "traefik.http.routers.feedback-service.tls=true" - "traefik.http.routers.feedback-service.tls.certresolver=le" - "traefik.http.middlewares.feedback-stripprefix.stripprefix.prefixes=/api/v1" @@ -152,11 +152,11 @@ services: - SPRING_DATASOURCE_URL=jdbc:postgresql://app-database:5432/app_db - SPRING_DATASOURCE_USERNAME=finance_user - SPRING_DATASOURCE_PASSWORD=finance_password - - SPRING_SECURITY_OAUTH2_RESOURCESERVER_JWT_ISSUER_URI=https://team-devoops.uaenorth.cloudapp.azure.com/auth/realms/devops + - SPRING_SECURITY_OAUTH2_RESOURCESERVER_JWT_ISSUER_URI=https://team-devoops.polandcentral.cloudapp.azure.com/auth/realms/devops labels: - "traefik.enable=true" - "traefik.http.routers.finance-service.entrypoints=websecure" - - "traefik.http.routers.finance-service.rule=Host(`team-devoops.uaenorth.cloudapp.azure.com`) && PathPrefix(`/api/v1/finance`)" + - "traefik.http.routers.finance-service.rule=Host(`team-devoops.polandcentral.cloudapp.azure.com`) && PathPrefix(`/api/v1/finance`)" - "traefik.http.routers.finance-service.tls=true" - "traefik.http.routers.finance-service.tls.certresolver=le" - "traefik.http.middlewares.finance-stripprefix.stripprefix.prefixes=/api/v1" @@ -179,11 +179,11 @@ services: - SPRING_DATASOURCE_URL=jdbc:postgresql://app-database:5432/app_db - SPRING_DATASOURCE_USERNAME=letter_user - SPRING_DATASOURCE_PASSWORD=letter_password - - SPRING_SECURITY_OAUTH2_RESOURCESERVER_JWT_ISSUER_URI=https://team-devoops.uaenorth.cloudapp.azure.com/auth/realms/devops + - SPRING_SECURITY_OAUTH2_RESOURCESERVER_JWT_ISSUER_URI=https://team-devoops.polandcentral.cloudapp.azure.com/auth/realms/devops labels: - "traefik.enable=true" - "traefik.http.routers.letter-service.entrypoints=websecure" - - "traefik.http.routers.letter-service.rule=Host(`team-devoops.uaenorth.cloudapp.azure.com`) && PathPrefix(`/api/v1/letters`)" + - "traefik.http.routers.letter-service.rule=Host(`team-devoops.polandcentral.cloudapp.azure.com`) && PathPrefix(`/api/v1/letters`)" - "traefik.http.routers.letter-service.tls=true" - "traefik.http.routers.letter-service.tls.certresolver=le" - "traefik.http.middlewares.letter-stripprefix.stripprefix.prefixes=/api/v1" @@ -206,7 +206,7 @@ services: labels: - "traefik.enable=true" - "traefik.http.routers.api-docs.entrypoints=websecure" - - "traefik.http.routers.api-docs.rule=Host(`team-devoops.uaenorth.cloudapp.azure.com`) && PathPrefix(`/docs`)" + - "traefik.http.routers.api-docs.rule=Host(`team-devoops.polandcentral.cloudapp.azure.com`) && PathPrefix(`/docs`)" - "traefik.http.routers.api-docs.tls=true" - "traefik.http.routers.api-docs.tls.certresolver=le" - "traefik.http.services.api-docs.loadbalancer.server.port=8080" @@ -217,7 +217,7 @@ services: build: context: ../web-client/ args: - VITE_KEYCLOAK_URL: https://team-devoops.uaenorth.cloudapp.azure.com/auth + VITE_KEYCLOAK_URL: https://team-devoops.polandcentral.cloudapp.azure.com/auth container_name: web-client expose: - 8080 @@ -232,7 +232,7 @@ services: labels: - "traefik.enable=true" - "traefik.http.routers.web-client.entrypoints=websecure" - - "traefik.http.routers.web-client.rule=Host(`team-devoops.uaenorth.cloudapp.azure.com`)" + - "traefik.http.routers.web-client.rule=Host(`team-devoops.polandcentral.cloudapp.azure.com`)" - "traefik.http.routers.web-client.tls=true" - "traefik.http.routers.web-client.tls.certresolver=le" - "traefik.http.routers.web-client.middlewares=forward-auth@file" @@ -279,14 +279,14 @@ services: container_name: traefik-forward-auth environment: - DEFAULT_PROVIDER=oidc - - PROVIDERS_OIDC_ISSUER_URL=https://team-devoops.uaenorth.cloudapp.azure.com/auth/realms/devops + - PROVIDERS_OIDC_ISSUER_URL=https://team-devoops.polandcentral.cloudapp.azure.com/auth/realms/devops - PROVIDERS_OIDC_CLIENT_ID=traefik-forward-auth - PROVIDERS_OIDC_CLIENT_SECRET=traefik-forward-auth-secret - SECRET=a-random-32-char-secret-changeme! - INSECURE_COOKIE=false - LOG_LEVEL=debug extra_hosts: - - "team-devoops.uaenorth.cloudapp.azure.com:host-gateway" + - "team-devoops.polandcentral.cloudapp.azure.com:host-gateway" labels: - "traefik.enable=false" depends_on: @@ -313,12 +313,12 @@ services: KC_HEALTH_ENABLED: "true" KC_HTTP_RELATIVE_PATH: /auth KC_HTTP_MANAGEMENT_RELATIVE_PATH: / - KC_HOSTNAME: https://team-devoops.uaenorth.cloudapp.azure.com/auth + KC_HOSTNAME: https://team-devoops.polandcentral.cloudapp.azure.com/auth KC_PROXY_HEADERS: xforwarded labels: - "traefik.enable=true" - "traefik.http.routers.keycloak.entrypoints=websecure" - - "traefik.http.routers.keycloak.rule=Host(`team-devoops.uaenorth.cloudapp.azure.com`) && PathPrefix(`/auth`)" + - "traefik.http.routers.keycloak.rule=Host(`team-devoops.polandcentral.cloudapp.azure.com`) && PathPrefix(`/auth`)" - "traefik.http.routers.keycloak.tls=true" - "traefik.http.routers.keycloak.tls.certresolver=le" - "traefik.http.services.keycloak.loadbalancer.server.port=8080" diff --git a/infra/helm/team-devoops/files/realm-config.json b/infra/helm/team-devoops/files/realm-config.json index 5a9fbe6..78694eb 100644 --- a/infra/helm/team-devoops/files/realm-config.json +++ b/infra/helm/team-devoops/files/realm-config.json @@ -117,12 +117,12 @@ "standardFlowEnabled": true, "directAccessGrantsEnabled": false, "redirectUris": [ - "https://team-devoops.uaenorth.cloudapp.azure.com/_oauth", + "https://team-devoops.polandcentral.cloudapp.azure.com/_oauth", "https://ge83mom-devops26.stud.k8s.aet.cit.tum.de/oauth2/callback", "http://localhost/_oauth" ], "webOrigins": [ - "https://team-devoops.uaenorth.cloudapp.azure.com", + "https://team-devoops.polandcentral.cloudapp.azure.com", "https://ge83mom-devops26.stud.k8s.aet.cit.tum.de", "http://localhost" ] diff --git a/infra/helm/team-devoops/templates/deployment.yaml b/infra/helm/team-devoops/templates/deployment.yaml index 65214fc..2ecb93e 100644 --- a/infra/helm/team-devoops/templates/deployment.yaml +++ b/infra/helm/team-devoops/templates/deployment.yaml @@ -8,6 +8,7 @@ metadata: {{- include "team-devoops.labels" (dict "name" $name "root" $root) | nindent 4 }} spec: replicas: {{ $svc.replicas | default 1 }} + progressDeadlineSeconds: {{ $svc.progressDeadlineSeconds | default 600 }} strategy: {{- toYaml $root.Values.strategy | nindent 4 }} selector: diff --git a/infra/keycloak/realm-config.json b/infra/keycloak/realm-config.json index 5a9fbe6..78694eb 100644 --- a/infra/keycloak/realm-config.json +++ b/infra/keycloak/realm-config.json @@ -117,12 +117,12 @@ "standardFlowEnabled": true, "directAccessGrantsEnabled": false, "redirectUris": [ - "https://team-devoops.uaenorth.cloudapp.azure.com/_oauth", + "https://team-devoops.polandcentral.cloudapp.azure.com/_oauth", "https://ge83mom-devops26.stud.k8s.aet.cit.tum.de/oauth2/callback", "http://localhost/_oauth" ], "webOrigins": [ - "https://team-devoops.uaenorth.cloudapp.azure.com", + "https://team-devoops.polandcentral.cloudapp.azure.com", "https://ge83mom-devops26.stud.k8s.aet.cit.tum.de", "http://localhost" ] diff --git a/infra/terraform/main.tf b/infra/terraform/main.tf index 40ade52..08d6657 100644 --- a/infra/terraform/main.tf +++ b/infra/terraform/main.tf @@ -12,7 +12,7 @@ terraform { # Bootstrap (run once, manually) — see infra/README or commit history for details. backend "azurerm" { resource_group_name = "rg-team-devoops-tfstate" - storage_account_name = "stteamdevoopstfstate" + storage_account_name = "stdevoops26tfstate" container_name = "tfstate" key = "team-devoops.tfstate" use_oidc = true @@ -53,6 +53,13 @@ resource "azurerm_virtual_network" "main" { resource_group_name = azurerm_resource_group.main.name address_space = ["10.0.0.0/16"] tags = local.tags + + # Subnets are managed via the separate azurerm_subnet.main resource below. + # Without this, the provider's computed `subnet` list can drift and attempt + # to delete the in-use subnet on unrelated applies. + lifecycle { + ignore_changes = [subnet] + } } resource "azurerm_subnet" "main" { diff --git a/infra/terraform/variables.tf b/infra/terraform/variables.tf index cae8a0a..d28168c 100644 --- a/infra/terraform/variables.tf +++ b/infra/terraform/variables.tf @@ -7,13 +7,13 @@ variable "resource_group_name" { variable "location" { description = "Azure region for all resources" type = string - default = "uaenorth" + default = "polandcentral" } variable "vm_size" { description = "Azure VM size" type = string - default = "Standard_D2_v4" + default = "Standard_D2as_v4" } variable "admin_username" { diff --git a/services/py-genai-helper/conftest.py b/services/py-genai-helper/conftest.py index ad41bdf..1885a64 100644 --- a/services/py-genai-helper/conftest.py +++ b/services/py-genai-helper/conftest.py @@ -1,8 +1,8 @@ """Test setup shared across the suite. -Importing ``app`` pulls in ``service``/``rag``, which at import time build an LLM agent and a FAISS -vector store from the bundled PDFs (real OpenAI calls). Tests stub ``service`` out before ``app`` is -imported, and neutralise the startup DB initialisation so no live database is required. +Importing ``app`` pulls in ``service``/``rag``, which at import time construct an LLM agent client +(no network calls). Tests still stub ``service`` out before ``app`` is imported to avoid requiring +an API key, and neutralise the startup DB initialisation so no live database is required. """ import sys diff --git a/services/py-genai-helper/rag.py b/services/py-genai-helper/rag.py index 903d516..6c0ca18 100644 --- a/services/py-genai-helper/rag.py +++ b/services/py-genai-helper/rag.py @@ -1,3 +1,4 @@ +from functools import lru_cache from pathlib import Path from dotenv import load_dotenv @@ -29,10 +30,17 @@ def _load_pdfs() -> FAISS | None: return FAISS.from_documents(docs, embedding=embeddings) -vector_store = _load_pdfs() +@lru_cache(maxsize=1) +def _get_vector_store() -> FAISS | None: + # Built lazily on first RAG request rather than at import time: embedding the PDFs is a + # real OpenAI API call per chunk, which previously ran during gunicorn worker boot and + # blocked /health from responding until it finished (minutes, sometimes exceeding the + # Kubernetes rollout deadline). + return _load_pdfs() def get_rag_agent(): + vector_store = _get_vector_store() if vector_store is None: raise RuntimeError("No PDFs found in file-storage/")