From 78d1094c1ce9eb389a6bbd14b13f4e904bc1ad05 Mon Sep 17 00:00:00 2001 From: Deepak Tiwari Date: Tue, 9 Jun 2026 15:00:32 +0530 Subject: [PATCH] feat: add PrometheusRule manifests for operational alerting --- .../helm/kerno/templates/prometheusrule.yaml | 49 +++++++++++++++++++ deploy/helm/kerno/values.yaml | 4 ++ deploy/k8s/prometheusrule.yaml | 49 +++++++++++++++++++ 3 files changed, 102 insertions(+) create mode 100644 deploy/helm/kerno/templates/prometheusrule.yaml create mode 100644 deploy/k8s/prometheusrule.yaml diff --git a/deploy/helm/kerno/templates/prometheusrule.yaml b/deploy/helm/kerno/templates/prometheusrule.yaml new file mode 100644 index 0000000..4a1ea31 --- /dev/null +++ b/deploy/helm/kerno/templates/prometheusrule.yaml @@ -0,0 +1,49 @@ +{{- if .Values.prometheusRule.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: kerno + namespace: {{ .Release.Namespace }} + labels: + {{- include "kerno.labels" . | nindent 4 }} + +spec: + groups: + - name: kerno.rules + rules: + - alert: BPFProgramsNotLoaded + expr: kerno_bpf_programs_loaded == 0 + for: 5m + labels: + severity: warning + annotations: + summary: No eBPF programs loaded + description: Kerno has no loaded eBPF programs for more than 5 minutes. + + - alert: CollectorErrorsHigh + expr: increase(kerno_collector_errors_total[5m]) > 10 + for: 5m + labels: + severity: warning + annotations: + summary: High collector error rate + description: Collector errors exceeded 10 in the last 5 minutes. + + - alert: OOMKillsDetected + expr: increase(kerno_oom_kills_total[5m]) > 0 + for: 1m + labels: + severity: critical + annotations: + summary: OOM kill detected + description: One or more processes were killed by the OOM killer. + + - alert: HighMemoryPressure + expr: kerno_cgroup_memory_pressure_pct > 90 + for: 5m + labels: + severity: warning + annotations: + summary: High memory pressure + description: Container memory usage exceeded 90 percent. +{{- end }} \ No newline at end of file diff --git a/deploy/helm/kerno/values.yaml b/deploy/helm/kerno/values.yaml index 6726657..ff33eed 100644 --- a/deploy/helm/kerno/values.yaml +++ b/deploy/helm/kerno/values.yaml @@ -160,6 +160,10 @@ serviceMonitor: # scrapeTimeout must be less than interval. scrapeTimeout: 10s +prometheusRule: + + enabled: true + # ── Tolerations ─────────────────────────────────────────────────────────────── # By default kerno tolerates all taints so it runs on every node, including # control-plane nodes tainted with node-role.kubernetes.io/control-plane:NoSchedule. diff --git a/deploy/k8s/prometheusrule.yaml b/deploy/k8s/prometheusrule.yaml new file mode 100644 index 0000000..1c360f8 --- /dev/null +++ b/deploy/k8s/prometheusrule.yaml @@ -0,0 +1,49 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule + +metadata: + name: kerno + namespace: kerno-system + labels: + app.kubernetes.io/name: kerno + +spec: + groups: + - name: kerno.rules + rules: + + - alert: BPFProgramsNotLoaded + expr: kerno_bpf_programs_loaded == 0 + for: 5m + labels: + severity: warning + annotations: + summary: No eBPF programs loaded + description: Kerno has no loaded eBPF programs for more than 5 minutes. + + - alert: CollectorErrorsHigh + expr: increase(kerno_collector_errors_total[5m]) > 10 + for: 5m + labels: + severity: warning + annotations: + summary: High collector error rate + description: Collector errors exceeded 10 in the last 5 minutes. + + - alert: OOMKillsDetected + expr: increase(kerno_oom_kills_total[5m]) > 0 + for: 1m + labels: + severity: critical + annotations: + summary: OOM kill detected + description: One or more processes were killed by the OOM killer. + + - alert: HighMemoryPressure + expr: kerno_cgroup_memory_pressure_pct > 90 + for: 5m + labels: + severity: warning + annotations: + summary: High memory pressure + description: Container memory usage exceeded 90 percent. \ No newline at end of file