diff --git a/.github/workflows/deploy-k8s.yml b/.github/workflows/deploy-k8s.yml index 7084737..8be3bee 100644 --- a/.github/workflows/deploy-k8s.yml +++ b/.github/workflows/deploy-k8s.yml @@ -106,6 +106,15 @@ jobs: kubectl apply -f infra/k8s/web-client/ kubectl apply -f infra/k8s/ingress.yaml + - name: Upsert Grafana secret + run: | + kubectl create secret generic grafana-secret -n monitoring \ + --from-literal=admin-password="${{ secrets.GRAFANA_ADMIN_PASSWORD }}" \ + --dry-run=client -o yaml | kubectl apply -f - + + - name: Deploy monitoring + run: kubectl apply -f infra/k8s/monitoring/ + - name: Restart deployments to pull latest images if: github.event_name != 'push' run: | diff --git a/README.md b/README.md index 26bf412..195081f 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,8 @@ Azure deployment (Docker Compose): http://135.116.196.120/ Coverage reports: https://aet-devops26.github.io/team-devsecops/ API scheme (Swagger UI): https://devsecops.stud.k8s.aet.cit.tum.de/swagger-ui/index.html + +Monitoring (Grafana): https://devsecops.stud.k8s.aet.cit.tum.de/grafana ## Local development The full stack runs under Docker Compose with live-reload: diff --git a/infra/k8s/monitoring/grafana-ingress.yaml b/infra/k8s/monitoring/grafana-ingress.yaml new file mode 100644 index 0000000..fc99932 --- /dev/null +++ b/infra/k8s/monitoring/grafana-ingress.yaml @@ -0,0 +1,29 @@ +--- +# Exposes Grafana at https://devsecops.stud.k8s.aet.cit.tum.de/grafana/ +# Grafana is configured with GF_SERVER_SERVE_FROM_SUB_PATH=true so it handles +# the /grafana/ prefix itself — no rewrite-target needed. +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: grafana-ingress + namespace: monitoring + annotations: + cert-manager.io/cluster-issuer: letsencrypt-prod + nginx.ingress.kubernetes.io/proxy-read-timeout: "60" +spec: + ingressClassName: nginx + tls: + - hosts: + - devsecops.stud.k8s.aet.cit.tum.de + secretName: grafana-tls-cert + rules: + - host: devsecops.stud.k8s.aet.cit.tum.de + http: + paths: + - path: /grafana + pathType: Prefix + backend: + service: + name: grafana + port: + number: 3000 diff --git a/infra/k8s/monitoring/grafana.yaml b/infra/k8s/monitoring/grafana.yaml new file mode 100644 index 0000000..0d9b19b --- /dev/null +++ b/infra/k8s/monitoring/grafana.yaml @@ -0,0 +1,92 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-datasources + namespace: monitoring +data: + prometheus.yaml: | + apiVersion: 1 + datasources: + - name: Prometheus + type: prometheus + url: http://prometheus.monitoring.svc.cluster.local:9090 + isDefault: true + access: proxy +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: grafana-data + namespace: monitoring +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: grafana + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: grafana + template: + metadata: + labels: + app: grafana + spec: + securityContext: + fsGroup: 472 + containers: + - name: grafana + image: grafana/grafana:11.6.1 + env: + - name: GF_SECURITY_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: grafana-secret + key: admin-password + - name: GF_SERVER_ROOT_URL + value: https://devsecops.stud.k8s.aet.cit.tum.de/grafana/ + - name: GF_SERVER_SERVE_FROM_SUB_PATH + value: "true" + ports: + - containerPort: 3000 + volumeMounts: + - name: datasources + mountPath: /etc/grafana/provisioning/datasources + - name: data + mountPath: /var/lib/grafana + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 500m + memory: 256Mi + volumes: + - name: datasources + configMap: + name: grafana-datasources + - name: data + persistentVolumeClaim: + claimName: grafana-data +--- +apiVersion: v1 +kind: Service +metadata: + name: grafana + namespace: monitoring +spec: + selector: + app: grafana + ports: + - name: http + port: 3000 + targetPort: 3000 diff --git a/infra/k8s/monitoring/prometheus.yaml b/infra/k8s/monitoring/prometheus.yaml new file mode 100644 index 0000000..9cf83a0 --- /dev/null +++ b/infra/k8s/monitoring/prometheus.yaml @@ -0,0 +1,102 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-config + namespace: monitoring +data: + prometheus.yml: | + global: + scrape_interval: 15s + evaluation_interval: 15s + + scrape_configs: + - job_name: spring-api + metrics_path: /actuator/prometheus + static_configs: + - targets: ['spring-api.app.svc.cluster.local:8080'] + labels: + service: spring-api + + - job_name: py-help-service + static_configs: + - targets: ['py-help-service.app.svc.cluster.local:8080'] + labels: + service: py-help-service + + - job_name: py-recipe-service + static_configs: + - targets: ['py-recipe-service.app.svc.cluster.local:8080'] + labels: + service: py-recipe-service +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: prometheus-data + namespace: monitoring +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus + template: + metadata: + labels: + app: prometheus + spec: + securityContext: + fsGroup: 65534 + containers: + - name: prometheus + image: prom/prometheus:v3.4.1 + args: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + - --storage.tsdb.retention.time=7d + ports: + - containerPort: 9090 + volumeMounts: + - name: config + mountPath: /etc/prometheus + - name: data + mountPath: /prometheus + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + volumes: + - name: config + configMap: + name: prometheus-config + - name: data + persistentVolumeClaim: + claimName: prometheus-data +--- +apiVersion: v1 +kind: Service +metadata: + name: prometheus + namespace: monitoring +spec: + selector: + app: prometheus + ports: + - name: http + port: 9090 + targetPort: 9090 diff --git a/services/py-help-service/main.py b/services/py-help-service/main.py index 7abcdfe..cab1ef7 100644 --- a/services/py-help-service/main.py +++ b/services/py-help-service/main.py @@ -10,6 +10,7 @@ from langchain.chat_models import init_chat_model from langchain_core.messages import HumanMessage, SystemMessage +from prometheus_fastapi_instrumentator import Instrumentator from client.cooking_assistant_gen_ai_services_api_internal_client.models.help_request_forwarded import ( HelpRequestForwarded, ) @@ -21,6 +22,7 @@ load_dotenv() app = FastAPI(title="Cooking Assistant GenAI Service") +Instrumentator().instrument(app).expose(app) @app.exception_handler(HTTPException) diff --git a/services/py-help-service/requirements.txt b/services/py-help-service/requirements.txt index 68d4b58..e05b4a3 100644 --- a/services/py-help-service/requirements.txt +++ b/services/py-help-service/requirements.txt @@ -8,3 +8,4 @@ pydantic==2.7.4 python-dotenv==1.0.1 attrs==23.2.0 python-dateutil==2.9.0.post0 +prometheus-fastapi-instrumentator==7.1.0 diff --git a/services/py-recipe-service/main.py b/services/py-recipe-service/main.py index 57f8b24..c116a87 100644 --- a/services/py-recipe-service/main.py +++ b/services/py-recipe-service/main.py @@ -12,6 +12,7 @@ from langchain.chat_models import init_chat_model from langchain_core.messages import HumanMessage, SystemMessage +from prometheus_fastapi_instrumentator import Instrumentator from client.cooking_assistant_gen_ai_services_api_internal_client.models.recipe_request_forwarded import ( RecipeRequestForwarded, ) @@ -20,6 +21,7 @@ load_dotenv() app = FastAPI(title="Cooking Assistant GenAI Service") +Instrumentator().instrument(app).expose(app) @app.exception_handler(HTTPException) diff --git a/services/py-recipe-service/requirements.txt b/services/py-recipe-service/requirements.txt index 68d4b58..e05b4a3 100644 --- a/services/py-recipe-service/requirements.txt +++ b/services/py-recipe-service/requirements.txt @@ -8,3 +8,4 @@ pydantic==2.7.4 python-dotenv==1.0.1 attrs==23.2.0 python-dateutil==2.9.0.post0 +prometheus-fastapi-instrumentator==7.1.0 diff --git a/services/spring-api/build.gradle.kts b/services/spring-api/build.gradle.kts index 834f40a..8ef7394 100644 --- a/services/spring-api/build.gradle.kts +++ b/services/spring-api/build.gradle.kts @@ -20,35 +20,35 @@ repositories { } kotlin { - compilerOptions { - jvmTarget.set(JvmTarget.JVM_17) - freeCompilerArgs.add("-Xannotation-default-target=param-property") - } - - sourceSets { - getByName("test") { - kotlin.srcDir("src/test/kotlin") - } - } + compilerOptions { + jvmTarget.set(JvmTarget.JVM_17) + freeCompilerArgs.add("-Xannotation-default-target=param-property") + } + + sourceSets { + getByName("test") { + kotlin.srcDir("src/test/kotlin") + } + } } tasks.test { - useJUnitPlatform() - - jvmArgs("-XX:+EnableDynamicAgentLoading") - - reports { - junitXml.required.set(true) - html.required.set(true) - } - - testLogging { - events("passed", "skipped", "failed") - exceptionFormat = org.gradle.api.tasks.testing.logging.TestExceptionFormat.FULL - showExceptions = true - showCauses = true - } - finalizedBy(tasks.jacocoTestReport) + useJUnitPlatform() + + jvmArgs("-XX:+EnableDynamicAgentLoading") + + reports { + junitXml.required.set(true) + html.required.set(true) + } + + testLogging { + events("passed", "skipped", "failed") + exceptionFormat = org.gradle.api.tasks.testing.logging.TestExceptionFormat.FULL + showExceptions = true + showCauses = true + } + finalizedBy(tasks.jacocoTestReport) } tasks.jacocoTestReport { @@ -87,6 +87,7 @@ dependencies { // Observability implementation("org.springframework.boot:spring-boot-starter-actuator") + runtimeOnly("io.micrometer:micrometer-registry-prometheus") // Auth implementation("org.springframework.boot:spring-boot-starter-security") @@ -94,24 +95,28 @@ dependencies { runtimeOnly("io.jsonwebtoken:jjwt-impl:0.12.6") runtimeOnly("io.jsonwebtoken:jjwt-jackson:0.12.6") - // Tests - testImplementation("org.jetbrains.kotlin:kotlin-test-junit5") - testImplementation("org.springframework.boot:spring-boot-starter-test") { - exclude(module = "junit") - } - testImplementation("org.mockito.kotlin:mockito-kotlin:5.4.0") - testImplementation("org.springframework.boot:spring-boot-webmvc-test") - testImplementation("org.springframework.security:spring-security-test") - - // Retrofit - implementation("com.squareup.retrofit2:retrofit:2.11.0") - implementation("com.squareup.retrofit2:converter-jackson:2.11.0") - - // OkHttp - implementation("com.squareup.okhttp3:okhttp:4.12.0") - - // JSON multiplatform runtime - implementation("com.squareup.moshi:moshi:1.15.1") - implementation("com.squareup.moshi:moshi-kotlin:1.15.1") - implementation("com.squareup.retrofit2:converter-moshi:2.11.0") + // Tests + testImplementation("org.jetbrains.kotlin:kotlin-test-junit5") + testImplementation("org.springframework.boot:spring-boot-starter-test") { + exclude(module = "junit") + } + testImplementation("org.mockito.kotlin:mockito-kotlin:5.4.0") + testImplementation("org.springframework.boot:spring-boot-webmvc-test") + testImplementation("org.springframework.security:spring-security-test") + + // Retrofit + implementation("com.squareup.retrofit2:retrofit:2.11.0") + implementation("com.squareup.retrofit2:converter-jackson:2.11.0") + + // OkHttp + implementation("com.squareup.okhttp3:okhttp:4.12.0") + + // JSON multiplatform runtime + implementation("com.squareup.moshi:moshi:1.15.1") + implementation("com.squareup.moshi:moshi-kotlin:1.15.1") + implementation("com.squareup.retrofit2:converter-moshi:2.11.0") +} + +springBoot { + buildInfo() } diff --git a/services/spring-api/src/main/kotlin/org/openapitools/security/SecurityConfig.kt b/services/spring-api/src/main/kotlin/org/openapitools/security/SecurityConfig.kt index 26053c1..04ea3ba 100644 --- a/services/spring-api/src/main/kotlin/org/openapitools/security/SecurityConfig.kt +++ b/services/spring-api/src/main/kotlin/org/openapitools/security/SecurityConfig.kt @@ -64,6 +64,7 @@ class SecurityConfig( "/swagger-ui/**", "/v3/api-docs/**", "/actuator/health/**", + "/actuator/prometheus", ).permitAll() .anyRequest() .authenticated() // all other routes require a valid JWT diff --git a/services/spring-api/src/main/resources/application.yaml b/services/spring-api/src/main/resources/application.yaml index 406dbf5..d506b0d 100644 --- a/services/spring-api/src/main/resources/application.yaml +++ b/services/spring-api/src/main/resources/application.yaml @@ -42,7 +42,10 @@ management: endpoints: web: exposure: - include: health # only /actuator/health is exposed, not info/metrics/env/etc. + include: health,prometheus,info + info: + build: + enabled: true endpoint: health: probes: