diff --git a/deploy/helm/kerno/templates/prometheusrule.yaml b/deploy/helm/kerno/templates/prometheusrule.yaml new file mode 100644 index 0000000..4a1ea31 --- /dev/null +++ b/deploy/helm/kerno/templates/prometheusrule.yaml @@ -0,0 +1,49 @@ +{{- if .Values.prometheusRule.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: kerno + namespace: {{ .Release.Namespace }} + labels: + {{- include "kerno.labels" . | nindent 4 }} + +spec: + groups: + - name: kerno.rules + rules: + - alert: BPFProgramsNotLoaded + expr: kerno_bpf_programs_loaded == 0 + for: 5m + labels: + severity: warning + annotations: + summary: No eBPF programs loaded + description: Kerno has no loaded eBPF programs for more than 5 minutes. + + - alert: CollectorErrorsHigh + expr: increase(kerno_collector_errors_total[5m]) > 10 + for: 5m + labels: + severity: warning + annotations: + summary: High collector error rate + description: Collector errors exceeded 10 in the last 5 minutes. + + - alert: OOMKillsDetected + expr: increase(kerno_oom_kills_total[5m]) > 0 + for: 1m + labels: + severity: critical + annotations: + summary: OOM kill detected + description: One or more processes were killed by the OOM killer. + + - alert: HighMemoryPressure + expr: kerno_cgroup_memory_pressure_pct > 90 + for: 5m + labels: + severity: warning + annotations: + summary: High memory pressure + description: Container memory usage exceeded 90 percent. +{{- end }} \ No newline at end of file diff --git a/deploy/helm/kerno/values.yaml b/deploy/helm/kerno/values.yaml index 6726657..ff33eed 100644 --- a/deploy/helm/kerno/values.yaml +++ b/deploy/helm/kerno/values.yaml @@ -160,6 +160,10 @@ serviceMonitor: # scrapeTimeout must be less than interval. scrapeTimeout: 10s +prometheusRule: + + enabled: true + # ── Tolerations ─────────────────────────────────────────────────────────────── # By default kerno tolerates all taints so it runs on every node, including # control-plane nodes tainted with node-role.kubernetes.io/control-plane:NoSchedule. diff --git a/deploy/k8s/prometheusrule.yaml b/deploy/k8s/prometheusrule.yaml new file mode 100644 index 0000000..1c360f8 --- /dev/null +++ b/deploy/k8s/prometheusrule.yaml @@ -0,0 +1,49 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule + +metadata: + name: kerno + namespace: kerno-system + labels: + app.kubernetes.io/name: kerno + +spec: + groups: + - name: kerno.rules + rules: + + - alert: BPFProgramsNotLoaded + expr: kerno_bpf_programs_loaded == 0 + for: 5m + labels: + severity: warning + annotations: + summary: No eBPF programs loaded + description: Kerno has no loaded eBPF programs for more than 5 minutes. + + - alert: CollectorErrorsHigh + expr: increase(kerno_collector_errors_total[5m]) > 10 + for: 5m + labels: + severity: warning + annotations: + summary: High collector error rate + description: Collector errors exceeded 10 in the last 5 minutes. + + - alert: OOMKillsDetected + expr: increase(kerno_oom_kills_total[5m]) > 0 + for: 1m + labels: + severity: critical + annotations: + summary: OOM kill detected + description: One or more processes were killed by the OOM killer. + + - alert: HighMemoryPressure + expr: kerno_cgroup_memory_pressure_pct > 90 + for: 5m + labels: + severity: warning + annotations: + summary: High memory pressure + description: Container memory usage exceeded 90 percent. \ No newline at end of file