diff --git a/docs/conf.py b/docs/conf.py index a713d7a09..c533376c2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -109,10 +109,15 @@ "how-it-works/privacy-and-security.html": "/master/setup-robusta/privacy-and-security.html", "how-it-works/index.html": "/master/playbook-reference/what-are-playbooks.html", "playbook-reference/examples.html": "/master/playbook-reference/prometheus-examples/index.html", - "tutorials/playbook-track-changes.html": "/master/playbook-reference/kubernetes-examples/playbook-failed-liveness.html", - "tutorials/playbook-job-failure.html": "/master/playbook-reference/kubernetes-examples/playbook-job-failure.html", - "tutorials/playbook-failed-liveness.html": "/master/playbook-reference/kubernetes-examples/playbook-failed-liveness.html", - "tutorials/playbook-track-secrets.html": "/master/playbook-reference/kubernetes-examples//playbook-track-secrets.html", + "tutorials/playbook-track-changes.html": "/master/playbook-reference/kubernetes-examples/kubernetes-change-notifications.html", + "tutorials/playbook-job-failure.html": "/master/playbook-reference/kubernetes-examples/kubernetes-change-notifications.html", + "tutorials/playbook-failed-liveness.html": "/master/playbook-reference/kubernetes-examples/kubernetes-change-notifications.html", + "tutorials/playbook-track-secrets.html": "/master/playbook-reference/kubernetes-examples/kubernetes-change-notifications.html", + "playbook-reference/kubernetes-examples/playbook-failed-liveness.html": "/master/playbook-reference/kubernetes-examples/kubernetes-change-notifications.html", + "playbook-reference/kubernetes-examples/playbook-job-failure.html": "/master/playbook-reference/kubernetes-examples/kubernetes-change-notifications.html", + "playbook-reference/kubernetes-examples/playbook-track-changes.html": "/master/playbook-reference/kubernetes-examples/kubernetes-change-notifications.html", + "playbook-reference/kubernetes-examples/playbook-track-secrets.html": "/master/playbook-reference/kubernetes-examples/kubernetes-change-notifications.html", + "playbook-reference/kubernetes-examples/track-kubernetes-changes.html": "/master/playbook-reference/kubernetes-examples/kubernetes-change-notifications.html", "tutorials/alert-remediation.html": "/master/playbook-reference/prometheus-examples/alert-remediation.html", "tutorials/alert-custom-enrichment.html": "/master/playbook-reference/prometheus-examples/alert-custom-enrichment.html", "catalog/sinks/slack.html": "/master/configuration/sinks/slack.html", @@ -201,7 +206,16 @@ "user-guide/robusta-cli.html": "/master/setup-robusta/installation/index.html", "advanced/index.html": "/master/setup-robusta/installation/index.html", "configuration/exporting/exporting-data.html": "/master/configuration/exporting/send-alerts-api.html", - "configuration/alertmanager-integration/troubleshooting-alertmanager.html": "/master/configuration/exporting/send-alerts-api.html" + "configuration/alertmanager-integration/troubleshooting-alertmanager.html": "/master/configuration/exporting/send-alerts-api.html", + "configuration/alertmanager-integration/grafana-alert-manager.html": "/master/configuration/alertmanager-integration/grafana-self-hosted.html", + "configuration/alertmanager-integration/grafana-cloud-mimir.html": "/master/configuration/alertmanager-integration/grafana-cloud.html", + "playbook-reference/what-are-playbooks.html": "/master/playbook-reference/overview.html", + "how-it-works/alert-builtin-enrichment.html": "/master/playbook-reference/builtin-alert-enrichment.html", + "setup-robusta/installation/extend-prometheus-installation.html": "/master/setup-robusta/installation/standalone-installation.html", + "playbook-reference/defining-playbooks/index.html": "/master/playbook-reference/index.html", + "configuration/alertmanager-integration/alert-custom-prometheus.html": "/master/configuration/alertmanager-integration/embedded-prometheus.html#creating-custom-prometheus-alerts", + "configuration/alertmanager-integration/index.html": "/master/configuration/index.html", + "notification-routing/notification-routing-examples.html": "/master/notification-routing/index.html" } diff --git a/docs/configuration/alertmanager-integration/_prometheus_flags_check.rst b/docs/configuration/alertmanager-integration/_prometheus_flags_check.rst deleted file mode 100644 index c4b251074..000000000 --- a/docs/configuration/alertmanager-integration/_prometheus_flags_check.rst +++ /dev/null @@ -1,8 +0,0 @@ -Robusta utilizes the flags API to retrieve data from Prometheus-style metric stores. However, some platforms like Google Managed Prometheus, Azure Managed Prometheus etc, do not implement the flags API. - -You can disable the Prometheus flags API check by setting the following option to false. - -.. code-block:: yaml - - globalConfig: - check_prometheus_flags: true/false \ No newline at end of file diff --git a/docs/configuration/alertmanager-integration/alert-manager.rst b/docs/configuration/alertmanager-integration/alert-manager.rst index 3ab76349d..b3039a9f2 100644 --- a/docs/configuration/alertmanager-integration/alert-manager.rst +++ b/docs/configuration/alertmanager-integration/alert-manager.rst @@ -1,9 +1,9 @@ -In-cluster AlertManager Integration -**************************************** +AlertManager - in-cluster +************************** This guide shows how to send alerts from an existing AlertManager to Robusta in the same cluster. -If your AlertManager is in a different cluster, refer to :ref:`External Prometheus`. +If your AlertManager is in a different cluster, refer to :doc:`AlertManager - external `. Send Alerts to Robusta ============================ diff --git a/docs/configuration/alertmanager-integration/customize-labels-priorities.rst b/docs/configuration/alertmanager-integration/customize-labels-priorities.rst new file mode 100644 index 000000000..8380bd926 --- /dev/null +++ b/docs/configuration/alertmanager-integration/customize-labels-priorities.rst @@ -0,0 +1,97 @@ +Customize Labels and Priorities +================================= + +Relabel Prometheus Alerts +-------------------------- + +When sending Prometheus alerts to Robusta, alerts are mapped onto related Kubernetes resources, when possible. This mapping relies on the alerts having the following labels: + ++---------------------------+-------------------------------------------+ +| Kubernetes Resource | Alert Labels | ++===========================+===========================================+ +| Deployment | deployment, namespace | ++---------------------------+-------------------------------------------+ +| DaemonSet | daemonset, namespace | ++---------------------------+-------------------------------------------+ +| StatefulSet | statefulset, namespace | ++---------------------------+-------------------------------------------+ +| Job | job_name, namespace | ++---------------------------+-------------------------------------------+ +| Pod | pod, namespace | ++---------------------------+-------------------------------------------+ +| HorizontalPodAutoscaler | horizontalpodautoscaler, namespace | ++---------------------------+-------------------------------------------+ +| Node | node or instance (fallback if node | +| | doesn't exist) | ++---------------------------+-------------------------------------------+ + +If your alerts have different labels, you can change the mapping with the ``alertRelabel`` helm value. + +A relabeling has 3 attributes: + +* ``source``: The label's name on your alerts (which differs from the expected value in the above table) +* ``target``: The standard label name that Robusta expects (a value from the table above) +* ``operation``: Either ``add`` (default) or ``replace``. If ``add``, your custom mapping will be recognized in addition to Robusta's default mapping. + +For example: + +.. code-block:: yaml + + alertRelabel: + - source: "pod_name" + target: "pod" + operation: "add" + - source: "deployment_name" + target: "deployment" + operation: "replace" + - source: "job_name" + target: "job" + +Mapping Custom Alert Severity +------------------------------ + +To help you prioritize alerts from different sources, Robusta maps alert severity to four standard levels: + +* **HIGH** - requires your immediate attention - may indicate a service outage +* **LOW** - minor problems and areas for improvement (e.g. performance) - to be reviewed periodically on a weekly or bi-weekly cadence +* **INFO** - you probably want to be aware of these, but do not necessarily need to take action +* **DEBUG** - debug only - can be ignored unless you're actively debugging an issue + +You are free to interpret these levels differently, but the above is a good starting point for most companies. + +Prometheus alerts are normalized to the above levels as follows: + ++----------------------+--------------------+ +| Prometheus Severity | Robusta Severity | ++======================+====================+ +| critical | HIGH | ++----------------------+--------------------+ +| high | HIGH | ++----------------------+--------------------+ +| medium | HIGH | ++----------------------+--------------------+ +| error | HIGH | ++----------------------+--------------------+ +| warning | LOW | ++----------------------+--------------------+ +| low | LOW | ++----------------------+--------------------+ +| info | INFO | ++----------------------+--------------------+ +| debug | DEBUG | ++----------------------+--------------------+ + +Prometheus alerts with a severity not in the above list are mapped to Robusta's INFO level. + +You can map your own Prometheus severities, using the ``custom_severity_map`` Helm value. For example: + +.. code-block:: yaml + + globalConfig: + custom_severity_map: + # maps a p1 value on your own alerts to Robusta's HIGH value + p1: high + # maps a p2 value on your own alerts to Robusta's LOW value + p2: low + +The mapped values must be one of: ``high``, ``low``, ``info``, and ``debug``. diff --git a/docs/configuration/alertmanager-integration/dynatrace.rst b/docs/configuration/alertmanager-integration/dynatrace.rst index 9c6070937..8bda1b668 100644 --- a/docs/configuration/alertmanager-integration/dynatrace.rst +++ b/docs/configuration/alertmanager-integration/dynatrace.rst @@ -35,13 +35,13 @@ Step 2: Create a Dynatrace Problems Webhook 5. Set the **Custom payload** to the Dynatrace macro: - .. code-block:: json + .. code-block:: text {ProblemDetailsJSONv2} 6. Add the following **HTTP headers**: - .. code-block:: http + .. code-block:: text Authorization: Bearer account-id: diff --git a/docs/configuration/alertmanager-integration/eks-managed-prometheus.rst b/docs/configuration/alertmanager-integration/eks-managed-prometheus.rst index 8c363ad50..77f2411f5 100644 --- a/docs/configuration/alertmanager-integration/eks-managed-prometheus.rst +++ b/docs/configuration/alertmanager-integration/eks-managed-prometheus.rst @@ -18,7 +18,7 @@ Since AWS Managed Prometheus doesn't have a built-in AlertManager, you'll need t 1. Set up Amazon Managed Grafana with your AMP workspace 2. Configure Grafana alerts to send to Robusta -3. See :doc:`grafana-alert-manager` for detailed Grafana alerting setup +3. See :doc:`grafana-self-hosted` for detailed Grafana alerting setup Configure Metric Querying ========================= diff --git a/docs/configuration/alertmanager-integration/embedded-prometheus.rst b/docs/configuration/alertmanager-integration/embedded-prometheus.rst index da46dad65..7acd76744 100644 --- a/docs/configuration/alertmanager-integration/embedded-prometheus.rst +++ b/docs/configuration/alertmanager-integration/embedded-prometheus.rst @@ -51,6 +51,76 @@ To allow the Grafana dashboard to persist after the Grafana instance restarts, y Apply the change by performing a :ref:`Helm Upgrade `. +Creating Custom Prometheus Alerts +---------------------------------- + +Prometheus Alerts are defined on Kubernetes using the PrometheusRule CRD. + +Prerequisites +^^^^^^^^^^^^^ + +Enable global rule selection for the Prometheus operator. Add the following config to your ``generated_values.yaml``. (By default Prometheus Operator picks up only certain new alerts, here we tell it to pick up all new alerts) + +.. code-block:: yaml + + kube-prometheus-stack: + prometheus: + prometheusSpec: + ruleNamespaceSelector: {} # (1) + ruleSelector: {} # (2) + ruleSelectorNilUsesHelmValues: false # (3) + +.. code-annotations:: + 1. Add a namespace if you want Prometheus to identify rules created in specific namespaces. Leave ``{}`` to detect rules from any namespace. + 2. Add a label if you want Prometheus to detect rules with a specific selector. Leave ``{}`` to detect rules with any label. + 3. When set to `false`, Prometheus detects rules that are created directly, not just rules created using helm values file. + +Defining an Alert +^^^^^^^^^^^^^^^^^ + +As an example, we'll define an alert to find Pods with CPU usage over their request. + +Save the following YAML into ``my_alert.yaml`` and run ``kubectl apply -f my_alert.yaml`` + +.. code-block:: yaml + + apiVersion: monitoring.coreos.com/v1 + kind: PrometheusRule + metadata: + name: container-cpu-alert + labels: + prometheus: kube-prometheus + role: alert-rules + spec: + groups: + - name: container-cpu-usage + rules: + - alert: KubeContainerCPURequestAlert + expr: | + (rate(container_cpu_usage_seconds_total{container="stress"}[5m]) / + on (container) kube_pod_container_resource_requests{resource="cpu", container="stress"}) > 0.75 + for: 1m + labels: + severity: warning + annotations: + summary: "Container CPU usage is above 75% of request for 5 minutes" + description: "The container is using more than 75% of its requested CPU for 5 minutes." + +Testing the Alert +^^^^^^^^^^^^^^^^^ + +To test the alert, deploy a pod that uses more CPU than its request. + +.. code-block:: bash + + kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/cpu_throttling/throttling.yaml + +You will know the alert was defined successfully when Prometheus fires an alert and you receive a notification in all configured sinks. + +.. image:: /images/container_cpu_request_alert.png + :width: 600 + :align: center + Troubleshooting --------------------- diff --git a/docs/configuration/alertmanager-integration/grafana-cloud-mimir.rst b/docs/configuration/alertmanager-integration/grafana-cloud-mimir.rst deleted file mode 100644 index 459bd34ec..000000000 --- a/docs/configuration/alertmanager-integration/grafana-cloud-mimir.rst +++ /dev/null @@ -1,188 +0,0 @@ -Grafana Cloud (Mimir) -******************************** - -This guide walks you through integrating Robusta with Grafana Cloud, enabling both the Robusta runner and Holmes to query metrics from Mimir and receive alerts from Grafana Cloud AlertManager. - -Prerequisites -============= - -Before starting, ensure you have: - -* A Grafana Cloud account with a configured instance -* Prometheus and AlertManager datasources configured in Grafana Cloud -* Access to create service accounts and API tokens in Grafana Cloud -* Your Robusta ``account_id`` and ``signing_key`` from ``generated_values.yaml`` - -Step 1: Gather Grafana Cloud Information -========================================= - -Find Your Grafana Instance Details -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -1. Log into your Grafana Cloud portal -2. Note your Grafana instance URL (e.g., ``https://YOUR-INSTANCE.grafana.net``) - -Create API Keys -^^^^^^^^^^^^^^^ - -You'll need credentials for Grafana API Access (used by both Robusta Runner and Holmes): - -1. Go to **Administration → Service accounts** -2. Create a new service account named "robusta-integration" -3. Generate a service account token -4. Save the token (starts with ``glsa_``) - -Find Your Cluster Name -^^^^^^^^^^^^^^^^^^^^^^ - -If your grafana setup covers multiple clusters, the cluster name is required and used to -identify your specific cluster in Prometheus queries: - -1. Go to Grafana → Explore -2. Run query: ``up{cluster!=""}`` -3. Check the cluster label values -4. This value will be set to ``cluster_name`` in your ``generated_values.yaml`` - -Find Datasource UIDs -^^^^^^^^^^^^^^^^^^^^ - -Using the Grafana API, list your datasources: - -.. code-block:: bash - - curl -H "Authorization: Bearer YOUR_GLSA_TOKEN" \ - "https://YOUR-INSTANCE.grafana.net/api/datasources" | jq - -Note the UID for Prometheus datasource UID (typically ``grafanacloud-prom``) - -Step 2: Configure Robusta Runner -================================= - -Update Robusta Values -^^^^^^^^^^^^^^^^^^^^^ - -Add the following to your ``generated_values.yaml``: - -.. code-block:: yaml - - globalConfig: - # Your Robusta account details (should already exist) - account_id: YOUR_ROBUSTA_ACCOUNT_ID - signing_key: YOUR_ROBUSTA_SIGNING_KEY - - # Grafana Cloud Prometheus Configuration (via proxy) - prometheus_url: https://YOUR-INSTANCE.grafana.net/api/datasources/proxy/uid/PROMETHEUS_DATASOURCE_UID - prometheus_auth: Bearer YOUR_GLSA_TOKEN - - # Grafana Cloud AlertManager Configuration - alertmanager_url: https://YOUR-INSTANCE.grafana.net - alertmanager_flavor: grafana - alertmanager_auth: Bearer YOUR_GLSA_TOKEN - - # Grafana API Key for enrichments - grafana_api_key: YOUR_GLSA_TOKEN - - # Cluster identification (required) - cluster_name: YOUR_CLUSTER_NAME - -.. note:: - - The ``prometheus_url`` uses Grafana's proxy endpoint format which handles authentication and routing to Mimir automatically. - -Apply Configuration -^^^^^^^^^^^^^^^^^^^ - -Apply the configuration changes: - -.. code-block:: bash - - helm upgrade robusta robusta/robusta -f generated_values.yaml -n default - -Restart Robusta Runner -^^^^^^^^^^^^^^^^^^^^^^ - -Ensure the changes take effect: - -.. code-block:: bash - - kubectl rollout restart deployment/robusta-runner -n default - -Step 3: Configure Holmes Prometheus Toolset -============================================ - -Holmes requires additional configuration to work with Grafana Cloud's Mimir backend. - -For detailed instructions on configuring Holmes with Grafana Cloud, see the **Grafana Cloud (Mimir) Configuration** section in :doc:`/configuration/holmesgpt/toolsets/prometheus`. - -The key configuration points for Grafana Cloud are: - -* Use the proxy endpoint URL format: ``https://YOUR-INSTANCE.grafana.net/api/datasources/proxy/uid/PROMETHEUS_DATASOURCE_UID`` -* Set ``fetch_labels_with_labels_api: false`` (important for Mimir compatibility) -* Set ``fetch_metadata_with_series_api: true`` (important for Mimir compatibility) -* Use Bearer authentication with your service account token - -After updating your ``generated_values.yaml`` with the Holmes configuration, apply the changes: - -.. code-block:: bash - - helm upgrade robusta robusta/robusta -f generated_values.yaml -n default - kubectl rollout restart deployment/robusta-holmes -n default - -Step 4: Configure Alert Routing (Optional) -=========================================== - -To send alerts from Grafana Cloud to Robusta's timeline, follow the alert configuration steps in :doc:`grafana-alert-manager`. - -The key differences for Grafana Cloud are: - -1. Use your Grafana Cloud instance URL -2. Use the service account token (``glsa_`` token) for authentication -3. Ensure your alerts include the ``cluster`` label matching your configured ``cluster_name`` - -Verification -============ - -Verify Metrics Integration -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -1. Open any application in the Robusta UI -2. Check if CPU and memory graphs are displayed -3. If graphs are shown, the metrics integration is working correctly - -Verify Holmes Integration -^^^^^^^^^^^^^^^^^^^^^^^^^ - -1. Trigger a test alert or wait for an actual alert -2. In the Robusta UI, click on "Investigate with Holmes" -3. Verify that Holmes can query metrics and provide analysis - -Troubleshooting -=============== - -Common Issues -^^^^^^^^^^^^^ - -**Metrics not showing in Robusta UI:** - -* Verify the ``prometheus_url`` includes the correct datasource UID -* Check that the service account token has not expired -* Ensure the token has appropriate permissions to query metrics - -**Holmes unable to query metrics:** - -* Verify ``fetch_metadata_with_series_api`` is set to ``true`` -* Check that the Holmes deployment has restarted after configuration changes -* Review Holmes logs for authentication errors: ``kubectl logs -n default deployment/robusta-holmes`` - -**Authentication errors:** - -* Regenerate the service account token if expired -* Ensure the token is correctly formatted with ``Bearer `` prefix -* Verify the token has the necessary permissions in Grafana Cloud - -Additional Resources -==================== - -* :doc:`grafana-alert-manager` - For configuring Grafana alerts -* :doc:`/configuration/holmesgpt/toolsets/prometheus` - For advanced Holmes configuration -* `Grafana Cloud Documentation `_ \ No newline at end of file diff --git a/docs/configuration/alertmanager-integration/grafana-cloud.rst b/docs/configuration/alertmanager-integration/grafana-cloud.rst new file mode 100644 index 000000000..1f7e00a54 --- /dev/null +++ b/docs/configuration/alertmanager-integration/grafana-cloud.rst @@ -0,0 +1,46 @@ +Grafana Cloud +************* + +This guide shows how to send alerts from Grafana Cloud's AlertManager to Robusta. + +For configuring metric querying and advanced settings, see :doc:`/configuration/metric-providers-grafana-cloud`. + +Send Alerts to Robusta +======================= + +Configure Grafana Cloud to forward alerts to Robusta: + +1. **Get Your Webhook URL**: + + - Log into the Robusta UI + - Navigate to ``Settings`` > ``Advanced`` + - In the Grafana webhook section, click ``Generate URL`` + - Save the generated webhook URL + +2. **Create Contact Point in Grafana Cloud**: + + - Log into your Grafana Cloud instance + - Go to ``Alerting`` > ``Contact points`` + - Click ``Add contact point`` + - Set **Name**: ``robusta`` + - Set **Integration**: ``Webhook`` + - Set **URL**: Paste the webhook URL from step 1 + - Click ``Test`` to verify connectivity + - Click ``Save contact point`` + +3. **Configure Notification Policy**: + + - Go to ``Alerting`` > ``Notification policies`` + - Edit the default policy or create a new one + - Set the contact point to ``robusta`` + - Save the policy + +.. details:: Why do I see a banner in the UI that "Alerts won't show up"? + :class: warning + + This notification is displayed until the first alert arrives at Robusta. + +Configure Metric Querying +========================== + +To enable Robusta to pull metrics from Grafana Cloud's Mimir, see :doc:`/configuration/metric-providers-grafana-cloud`. diff --git a/docs/configuration/alertmanager-integration/grafana-alert-manager.rst b/docs/configuration/alertmanager-integration/grafana-self-hosted.rst similarity index 52% rename from docs/configuration/alertmanager-integration/grafana-alert-manager.rst rename to docs/configuration/alertmanager-integration/grafana-self-hosted.rst index 011c60c95..28630d6d1 100644 --- a/docs/configuration/alertmanager-integration/grafana-alert-manager.rst +++ b/docs/configuration/alertmanager-integration/grafana-self-hosted.rst @@ -1,5 +1,5 @@ -Grafana Alerts -************** +Grafana - Self-Hosted +********************* Grafana can send alerts to the Robusta timeline for visualization and AI investigation. @@ -9,17 +9,14 @@ Grafana can send alerts to the Robusta timeline for visualization and AI investi .. note:: - **Using Grafana Cloud with Mimir?** For complete integration including metrics querying and Holmes configuration, see :doc:`grafana-cloud-mimir`. + **Using Grafana Cloud?** See the :doc:`Grafana Cloud ` guide. -This guide covers sending alerts from Grafana Alerting to the Robusta timeline. -For metrics integration with self-hosted Grafana, refer to :ref:`metrics-integration docs for Prometheus `. -For Grafana Cloud metrics integration, see :doc:`grafana-cloud-mimir`. - - -Send Alerts to Robusta's Timeline +Option 1: Send Alerts to Robusta's Timeline =========================================== -This integration lets you send Grafana alerts to Robusta's Timeline. To configure it: +Send Grafana alerts to Robusta's Timeline for visualization and AI investigation. + +To configure it: 1. Get your Robusta ``account_id`` from your ``generated_values.yaml`` file. It appears under the ``globalConfig`` section. @@ -86,20 +83,21 @@ That's it! You can now see your Grafana alerts in the Robusta Timeline, and use AI to analyze it. +Correlating Alerts with Kubernetes Resources +---------------------------------------------- -Kubernetes Alerts -================================= -In case your alerts are from a Kubernetes cluster monitored by Robusta, and your alerts has a ``cluster`` label, make sure it matches the ``cluster_name`` that appears in Robusta ``generated_values.yaml``. +To enable Robusta to correlate your Grafana alerts with the specific Kubernetes resources they're related to (pods, deployments, etc.), make sure the ``cluster`` label in your alerts matches ``clusterName`` in Robusta's ``generated_values.yaml``. -**This is optional - you can send any alert to the Robusta timeline!** +.. note:: + This is only required for Kubernetes alerts. You can send any alert to the Robusta timeline, including non-Kubernetes alerts. -Send Alerts to Robusta for enrichments -=================================================================== +Option 2: Inline Alert Enrichment and Routing +=========================================== -You can use Robusta to enrich alerts with extra context, and to route it to other systems as well. +Use Robusta to enrich alerts inline with extra context and route them to :doc:`other systems ` (Slack, Microsoft Teams, etc.). Learn more about :doc:`alert routing `. -If you'd like to do that, this integration is for you. +This is an alternative to Option 1, where alerts are only sent to Robusta's Timeline without inline enrichment or routing to other destinations. To configure it: @@ -140,57 +138,3 @@ If successful, you will receive a notification in the Robusta UI, Slack or any o :align: center 6. Finally, click "Save contact point" to complete the Robusta integration. - - -Configure Silencing -================================================= - -Modify and add the following config to ``generated_values.yaml`` and :ref:`update Robusta `. - -.. code-block:: yaml - - globalConfig: # this line should already exist - # add the lines below - grafana_url: "https://.grafana.net" - # Create alert silencing when using Grafana alerts - grafana_api_key: - alertmanager_flavor: grafana # (1) - - # alertmanager_url: "https://alertmanager.grafana.net" - # prometheus_url: "https://prometheus.grafana.net/api/prom" - - # Add any labels that are relevant to the specific cluster (optional) - # prometheus_additional_labels: - # cluster: 'CLUSTER_NAME_HERE' - - # If using a multi-tenant prometheus or alertmanager, pass the org id to all queries - # prometheus_additional_headers: - # X-Scope-OrgID: - # alertmanager_additional_headers: - # X-Scope-OrgID: - -.. code-annotations:: - 1. This is necessary for Robusta to create silences when using Grafana Alerts, because of minor API differences in the AlertManager embedded in Grafana. - -.. note:: - - The Grafana API key must have the ``Editor`` role in order to create silences. - - -You can optionally set up authentication, SSL verification, and other parameters described below. - -Verify it Works -^^^^^^^^^^^^^^^^^ -Open any application in the Robusta UI. If CPU and memory graphs are shown, everything is working. - -Alternatively, trigger a `demo OOMKill alert `_ and confirm that Robusta sends a Slack/Teams message with a memory graph. This indicates proper configuration. - - -Optional Settings -============================= - -For authentication and SSL configuration when querying metrics from Grafana's backend Prometheus, see the relevant metric provider documentation: - -- :doc:`/configuration/metric-providers-in-cluster` for in-cluster Prometheus -- :doc:`/configuration/metric-providers-external` for external Prometheus -- Or the appropriate cloud provider metric documentation diff --git a/docs/configuration/alertmanager-integration/index.rst b/docs/configuration/alertmanager-integration/index.rst deleted file mode 100644 index d36278696..000000000 --- a/docs/configuration/alertmanager-integration/index.rst +++ /dev/null @@ -1,81 +0,0 @@ -:hide-toc: - - -Prometheus & AlertManager -========================= -.. toctree:: - :hidden: - :maxdepth: 1 - - alert-manager - outofcluster-prometheus - azure-managed-prometheus - eks-managed-prometheus - coralogix_managed_prometheus - google-managed-prometheus - victoria-metrics - grafana-alert-manager - grafana-cloud-mimir - embedded-prometheus - - -Connect Robusta to your Prometheus setup to get enriched alerts with logs, events, and metrics. - -**Already using Robusta's embedded Prometheus?** No setup needed - skip this page. - -**Choose your setup:** - -.. grid:: 1 1 2 3 - :gutter: 3 - - - .. grid-item-card:: In-cluster Prometheus - :class-card: sd-bg-light sd-bg-text-light - :link: alert-manager - :link-type: doc - - .. grid-item-card:: External Prometheus - :class-card: sd-bg-light sd-bg-text-light - :link: outofcluster-prometheus - :link-type: doc - - .. grid-item-card:: Azure Managed Prometheus - :class-card: sd-bg-light sd-bg-text-light - :link: azure-managed-prometheus - :link-type: doc - - .. grid-item-card:: AWS Managed Prometheus - :class-card: sd-bg-light sd-bg-text-light - :link: eks-managed-prometheus - :link-type: doc - - .. grid-item-card:: Coralogix - :class-card: sd-bg-light sd-bg-text-light - :link: coralogix_managed_prometheus - :link-type: doc - - .. grid-item-card:: Google Managed Prometheus - :class-card: sd-bg-light sd-bg-text-light - :link: google-managed-prometheus - :link-type: doc - - .. grid-item-card:: VictoriaMetrics - :class-card: sd-bg-light sd-bg-text-light - :link: victoria-metrics - :link-type: doc - - .. grid-item-card:: Grafana Alerts - :class-card: sd-bg-light sd-bg-text-light - :link: grafana-alert-manager - :link-type: doc - - .. grid-item-card:: Grafana Cloud (Mimir) - :class-card: sd-bg-light sd-bg-text-light - :link: grafana-cloud-mimir - :link-type: doc - - .. grid-item-card:: Install Prometheus with Robusta - :class-card: sd-bg-light sd-bg-text-light - :link: embedded-prometheus - :link-type: doc - diff --git a/docs/configuration/alertmanager-integration/launchdarkly.rst b/docs/configuration/alertmanager-integration/launchdarkly.rst index a6165457c..2bd3b4633 100644 --- a/docs/configuration/alertmanager-integration/launchdarkly.rst +++ b/docs/configuration/alertmanager-integration/launchdarkly.rst @@ -58,7 +58,7 @@ In LaunchDarkly: Other resource types other than **FLAG** are not verified to work though they might function without additional configuration. Alternative: Using Headers Instead of URL Parameters --------------------------------------------------- +----------------------------------------------------- Including API keys in URLs can expose them in logs, browser history, and monitoring tools. A more secure approach is to send the key in the request headers whenever possible. @@ -67,7 +67,7 @@ If you’re using a third-party service that supports custom headers, configure - **URL**: ``https://api.robusta.dev/integrations/generic/launchdarkly?account_id=YOUR_ACCOUNT_ID_HERE`` - **Headers**: - .. code-block:: http + .. code-block:: text Authorization: Bearer @@ -117,7 +117,7 @@ Troubleshooting For additional support, check the Robusta logs for any LaunchDarkly webhook processing errors. Holmes Configuration -------------------- +-------------------- To enable Holmes to pull LaunchDarkly changes into the AI assistant, add the following configuration to your ``generated_values.yaml`` file and upgrade the Robusta Helm chart: diff --git a/docs/configuration/alertmanager-integration/nagios.rst b/docs/configuration/alertmanager-integration/nagios.rst index 31f48ab0d..90c672c0f 100644 --- a/docs/configuration/alertmanager-integration/nagios.rst +++ b/docs/configuration/alertmanager-integration/nagios.rst @@ -44,7 +44,7 @@ Step 2: Define the Robusta Contact Insert the following contact definition into your Nagios configuration to register Robusta as a notification target. Be sure to replace the placeholders for `_account_id` and `_robusta_api_token` with your actual values. -.. code-block:: nagios +.. code-block:: text define contact { contact_name robusta @@ -65,7 +65,7 @@ Step 3: Add Robusta to a Contact Group Ensure Robusta is part of a contact group or explicitly included in your alert definitions: -.. code-block:: nagios +.. code-block:: text define contactgroup { contactgroup_name all_contacts @@ -144,7 +144,7 @@ Step 5: Define Robusta Notification Commands Before proceeding, if your Nagios setup monitors multiple clusters, you can optionally set the `cluster_name` variable per host using custom `_cluster_name` properties: -.. code-block:: nagios +.. code-block:: text define host { host_name order-management @@ -164,7 +164,7 @@ Before proceeding, if your Nagios setup monitors multiple clusters, you can opti Now, define the notification commands and replace `PATH_TO_SCRIPT_HERE` with the actual path to your Bash script (`notify-robusta.sh`). -.. code-block:: nagios +.. code-block:: text define command { command_name notify-service-by-robusta diff --git a/docs/configuration/alertmanager-integration/newrelic.rst b/docs/configuration/alertmanager-integration/newrelic.rst index 849db3fa4..5e0b4d7e6 100644 --- a/docs/configuration/alertmanager-integration/newrelic.rst +++ b/docs/configuration/alertmanager-integration/newrelic.rst @@ -64,7 +64,7 @@ Webhook Payload Template (JSON) Paste this into the **Template** field for the webhook action. Replace ``ACCOUNT_ID_HERE`` with your actual account ID. -.. code-block:: json +.. code-block:: text { "account_id": "ACCOUNT_ID_HERE", diff --git a/docs/configuration/alertmanager-integration/outofcluster-prometheus.rst b/docs/configuration/alertmanager-integration/outofcluster-prometheus.rst index 9870fdd06..62f10fe58 100644 --- a/docs/configuration/alertmanager-integration/outofcluster-prometheus.rst +++ b/docs/configuration/alertmanager-integration/outofcluster-prometheus.rst @@ -1,11 +1,11 @@ -External Prometheus -************************************** +AlertManager - external +************************ Follow this guide to connect Robusta to a central Prometheus (e.g. Thanos/Mimir), running outside the cluster monitored by Robusta. .. note:: - **Using Grafana Cloud?** For Grafana Cloud with Mimir, see the dedicated guide: :doc:`grafana-cloud-mimir` + **Using Grafana Cloud?** For Grafana Cloud with Mimir, see the dedicated guide: :doc:`grafana-cloud` You will need to configure two integrations: one to send alerts to Robusta and another to let Robusta query metrics and create silences. diff --git a/docs/configuration/exporting/namespace-resources-api.rst b/docs/configuration/exporting/namespace-resources-api.rst index 0e4b66d1d..8e4141197 100644 --- a/docs/configuration/exporting/namespace-resources-api.rst +++ b/docs/configuration/exporting/namespace-resources-api.rst @@ -95,7 +95,7 @@ Success Response If the request is successful, the API returns the following structure: -.. code-block:: json +.. code-block:: text { "cluster": "your-cluster-name", diff --git a/docs/configuration/exporting/robusta-pro-features.rst b/docs/configuration/exporting/robusta-pro-features.rst index 6be112cfa..4eae61798 100644 --- a/docs/configuration/exporting/robusta-pro-features.rst +++ b/docs/configuration/exporting/robusta-pro-features.rst @@ -11,7 +11,7 @@ AI Analysis Automatically investigate and resolve issues with AI-powered analysis. -:doc:`AI Analysis (HolmesGPT) <../holmesgpt/index>` +:doc:`AI Analysis (HolmesGPT) <../holmesgpt/main-features>` Automatically analyze Kubernetes alerts, logs, and metrics. Get potential root causes and remediation suggestions. Custom Alert Ingestion diff --git a/docs/configuration/exporting/send-alerts-api.rst b/docs/configuration/exporting/send-alerts-api.rst index bfd026941..77c6bb65e 100644 --- a/docs/configuration/exporting/send-alerts-api.rst +++ b/docs/configuration/exporting/send-alerts-api.rst @@ -20,7 +20,7 @@ Integration Methods There are two main ways to send alerts to Robusta: -1. **Pre-built Integrations**: Use our existing integrations for AlertManager, Nagios, SolarWinds, and other monitoring systems. See :doc:`Alert Sources <../index>`. +1. **Pre-built Integrations**: Use our existing integrations for AlertManager, Nagios, SolarWinds, and other monitoring systems. See :doc:`Send Alerts <../index>`. 2. **Programmatic API**: Send alerts directly using our REST API (detailed below). diff --git a/docs/configuration/holmesgpt/holmesgpt-docs.rst b/docs/configuration/holmesgpt/holmesgpt-docs.rst index ddbd2c7e6..b5861ee8d 100644 --- a/docs/configuration/holmesgpt/holmesgpt-docs.rst +++ b/docs/configuration/holmesgpt/holmesgpt-docs.rst @@ -3,25 +3,4 @@ HolmesGPT Documentation For comprehensive HolmesGPT documentation, please visit the official HolmesGPT documentation site at `holmesgpt.dev `_. -Configuring HolmesGPT with Robusta ------------------------------------ - -When configuring HolmesGPT with Robusta, follow the **Robusta Helm Chart configuration method** described in the HolmesGPT documentation rather than standalone CLI installation. - -Key points: - -* Use the ``enableHolmesGPT: true`` setting in your Robusta Helm values -* Configure data sources and advanced settings using the Helm chart configuration examples provided in the HolmesGPT docs -* Refer to the `Helm Configuration Reference `_ for advanced HolmesGPT settings specific to Robusta deployments - -Quick Links ------------ - -* `HolmesGPT Main Documentation `_ -* `Data Sources Configuration `_ -* `Helm Chart Configuration Reference `_ - -For Robusta-specific HolmesGPT setup instructions, see: - -* :doc:`getting-started` - Quick setup guide for HolmesGPT with Robusta -* :doc:`main-features` - Overview of AI analysis features \ No newline at end of file +For Robusta-specific setup instructions, see :doc:`getting-started`. \ No newline at end of file diff --git a/docs/configuration/holmesgpt/main-features.rst b/docs/configuration/holmesgpt/main-features.rst index c039daf07..4611a4510 100644 --- a/docs/configuration/holmesgpt/main-features.rst +++ b/docs/configuration/holmesgpt/main-features.rst @@ -3,27 +3,38 @@ Main Features Robusta integrates `HolmesGPT `_ to provide AI-powered root cause analysis for Kubernetes alerts and issues. -What HolmesGPT Does -------------------- - -**Automatic Investigation** - When alerts fire, HolmesGPT automatically: - - * Analyzes pod logs and events - * Examines resource metrics and limits - * Checks recent deployments and changes - * Investigates related resources and dependencies - * Provides actionable recommendations - -**Multi-Source Analysis** - HolmesGPT pulls data from: - - * Kubernetes API (pods, nodes, events, logs) - * Prometheus metrics - * Cloud provider APIs (AWS, Azure, GCP) - * Application monitoring (Datadog, New Relic) - * Log aggregation systems (Loki, OpenSearch, Coralogix) - * And more via `extensible toolsets `_ +See HolmesGPT in Action +----------------------- + +.. tab-set:: + + .. tab-item:: AWS Troubleshooting + + .. raw:: html + +
+ +
+ + .. tab-item:: CPU Spike Investigation + + .. raw:: html + +
+ +
How to Use It ------------- @@ -36,40 +47,11 @@ How to Use It **Via @holmes in Slack** Ask natural language questions about your clusters: - + * ``@holmes what apps are crashing in prod-cluster?`` * ``@holmes why is my alert firing on staging?`` * ``@holmes investigate high memory usage in dev-cluster`` -Example Investigation ---------------------- - -Here's what HolmesGPT found for a CrashLoopBackOff alert: - -.. image:: /images/AI_Analysis_demo2.png - :width: 1000px - -The AI identified: -- The exact error from pod logs -- The root cause (missing environment variable) -- Recommended fix with example YAML -- Related configuration issues - -Why Use HolmesGPT? ------------------- - -**Save Time** - Stop manually checking logs, metrics, and events across multiple tools. HolmesGPT does it in seconds. - -**Reduce MTTR** - Get to root cause faster with AI that understands Kubernetes patterns and common issues. - -**Learn as You Go** - Each investigation explains what was checked and why, helping your team learn Kubernetes troubleshooting. - -**24/7 Coverage** - AI investigations run automatically on every alert, even at 3 AM. - Next Steps ---------- diff --git a/docs/configuration/index.rst b/docs/configuration/index.rst index 18db403e1..db1bdda22 100644 --- a/docs/configuration/index.rst +++ b/docs/configuration/index.rst @@ -1,62 +1,109 @@ :hide-toc: -Alert Sources -============= +Send Alerts to Robusta +======================= Connect your monitoring system to Robusta, to enrich alerts and apply automation rules. -**Choose your setup:** +Prometheus & AlertManager +-------------------------- .. grid:: 1 1 2 3 :gutter: 3 - .. grid-item-card:: :octicon:`pulse;1em;` Prometheus & AlertManager + .. grid-item-card:: :octicon:`pulse;1em;` AlertManager - external :class-card: sd-bg-light sd-bg-text-light - :link: alertmanager-integration/index + :link: alertmanager-integration/outofcluster-prometheus :link-type: doc - Any Prometheus-compatible stack + .. grid-item-card:: :octicon:`pulse;1em;` AlertManager - in-cluster + :class-card: sd-bg-light sd-bg-text-light + :link: alertmanager-integration/alert-manager + :link-type: doc - .. grid-item-card:: :octicon:`bell;1em;` Nagios + .. grid-item-card:: :octicon:`pulse;1em;` AWS Managed Prometheus :class-card: sd-bg-light sd-bg-text-light - :link: alertmanager-integration/nagios + :link: alertmanager-integration/eks-managed-prometheus :link-type: doc - Forward Nagios alerts by webhook + .. grid-item-card:: :octicon:`pulse;1em;` Azure Managed Prometheus + :class-card: sd-bg-light sd-bg-text-light + :link: alertmanager-integration/azure-managed-prometheus + :link-type: doc - .. grid-item-card:: :octicon:`bell;1em;` NewRelic + .. grid-item-card:: :octicon:`pulse;1em;` Coralogix :class-card: sd-bg-light sd-bg-text-light - :link: alertmanager-integration/newrelic + :link: alertmanager-integration/coralogix_managed_prometheus :link-type: doc - Forward NewRelic alerts by webhook + .. grid-item-card:: :octicon:`pulse;1em;` Embedded Prometheus Stack + :class-card: sd-bg-light sd-bg-text-light + :link: alertmanager-integration/embedded-prometheus + :link-type: doc - .. grid-item-card:: :octicon:`bell;1em;` SolarWinds + .. grid-item-card:: :octicon:`pulse;1em;` Google Managed Prometheus :class-card: sd-bg-light sd-bg-text-light - :link: alertmanager-integration/solarwinds + :link: alertmanager-integration/google-managed-prometheus :link-type: doc - Forward SolarWinds alerts by webhook + .. grid-item-card:: :octicon:`pulse;1em;` Grafana - Self-Hosted + :class-card: sd-bg-light sd-bg-text-light + :link: alertmanager-integration/grafana-self-hosted + :link-type: doc - .. grid-item-card:: :octicon:`bell;1em;` Dynatrace + .. grid-item-card:: :octicon:`pulse;1em;` Grafana Cloud + :class-card: sd-bg-light sd-bg-text-light + :link: alertmanager-integration/grafana-cloud + :link-type: doc + + .. grid-item-card:: :octicon:`pulse;1em;` VictoriaMetrics + :class-card: sd-bg-light sd-bg-text-light + :link: alertmanager-integration/victoria-metrics + :link-type: doc + +Other +------ + +.. grid:: 1 1 2 3 + :gutter: 3 + + .. grid-item-card:: :octicon:`pulse;1em;` Dynatrace :class-card: sd-bg-light sd-bg-text-light :link: alertmanager-integration/dynatrace :link-type: doc - Forward Dynatrace alerts by webhook + .. grid-item-card:: :octicon:`pulse;1em;` Nagios + :class-card: sd-bg-light sd-bg-text-light + :link: alertmanager-integration/nagios + :link-type: doc + + .. grid-item-card:: :octicon:`pulse;1em;` New Relic + :class-card: sd-bg-light sd-bg-text-light + :link: alertmanager-integration/newrelic + :link-type: doc - .. grid-item-card:: :octicon:`bell;1em;` Pagerduty + .. grid-item-card:: :octicon:`pulse;1em;` PagerDuty :class-card: sd-bg-light sd-bg-text-light :link: alertmanager-integration/pagerduty-alerting :link-type: doc - Forward PagerDuty incidents and alerts by webhook + .. grid-item-card:: :octicon:`pulse;1em;` SolarWinds + :class-card: sd-bg-light sd-bg-text-light + :link: alertmanager-integration/solarwinds + :link-type: doc - .. grid-item-card:: :octicon:`bell;1em;` LaunchDarkly +Advanced +--------- + +.. grid:: 1 1 2 3 + :gutter: 3 + + .. grid-item-card:: :octicon:`tools;1em;` Customize Labels & Priorities :class-card: sd-bg-light sd-bg-text-light - :link: alertmanager-integration/launchdarkly + :link: alertmanager-integration/customize-labels-priorities :link-type: doc - Forward LaunchDarkly changes by webhook - .. -**Have alerts elsewhere?** Send alerts via the generic :doc:`HTTP webhook endpoint `. + .. grid-item-card:: :octicon:`plug;1em;` Other - Custom Webhooks + :class-card: sd-bg-light sd-bg-text-light + :link: exporting/custom-webhooks + :link-type: doc diff --git a/docs/configuration/metric-providers-grafana-cloud.rst b/docs/configuration/metric-providers-grafana-cloud.rst new file mode 100644 index 000000000..37dff3cfe --- /dev/null +++ b/docs/configuration/metric-providers-grafana-cloud.rst @@ -0,0 +1,88 @@ +Grafana Cloud (Mimir) +===================== + +Configure Robusta to use Grafana Cloud's managed Prometheus (Mimir) for querying metrics. + +Prerequisites +------------- + +* A Grafana Cloud account with a configured instance +* Prometheus datasource configured in Grafana Cloud +* Access to create service accounts and API tokens + +Quick Start +----------- + +1. **Find Your Grafana Instance URL**: + + Log into your Grafana Cloud portal and note your instance URL (e.g., ``https://YOUR-INSTANCE.grafana.net``). + +2. **Create API Token**: + + - Go to **Administration → Service accounts** + - Create a new service account named "robusta-integration" + - Generate a service account token + - Save the token (starts with ``glsa_``) + +3. **Find Datasource UID**: + + Using the Grafana API, list your datasources: + + .. code-block:: bash + + curl -H "Authorization: Bearer YOUR_GLSA_TOKEN" \ + "https://YOUR-INSTANCE.grafana.net/api/datasources" | jq + + Note the UID for your Prometheus datasource (typically ``grafanacloud-prom``). + +4. **Find Your Cluster Name** (if using multi-cluster setup): + + - Go to Grafana → Explore + - Run query: ``up{cluster!=""}`` + - Note the cluster label value + +5. **Add to your** ``generated_values.yaml``: + + .. code-block:: yaml + + globalConfig: + # Grafana Cloud Prometheus Configuration (via proxy) + prometheus_url: https://YOUR-INSTANCE.grafana.net/api/datasources/proxy/uid/PROMETHEUS_DATASOURCE_UID + prometheus_auth: Bearer YOUR_GLSA_TOKEN + + # Grafana API Key for enrichments and silencing + grafana_api_key: YOUR_GLSA_TOKEN + grafana_url: https://YOUR-INSTANCE.grafana.net + + # Grafana Cloud AlertManager Configuration + alertmanager_url: https://YOUR-INSTANCE.grafana.net + alertmanager_flavor: grafana + alertmanager_auth: Bearer YOUR_GLSA_TOKEN + + # Cluster identification (required for multi-cluster) + clusterName: YOUR_CLUSTER_NAME + + .. note:: + + **Optional:** To create silences from Robusta, the Grafana API key must have the ``Editor`` role. + +6. :ref:`Update Robusta ` + +Multi-cluster Setup +------------------- + +Make sure ``clusterName`` in Robusta Helm's values matches the ``cluster`` label in Grafana. + +.. code-block:: yaml + + clusterName: "production-us-east" + +HolmesGPT Configuration +----------------------- + +Give HolmesGPT - Robusta's AI Agent - read access to metrics. See the `Grafana Cloud (Mimir) Configuration `_ guide. + +Next Steps +---------- + +- :doc:`Send alerts from Grafana Cloud to Robusta ` diff --git a/docs/configuration/metric-providers.rst b/docs/configuration/metric-providers.rst index f3846cabd..c38e6cb16 100644 --- a/docs/configuration/metric-providers.rst +++ b/docs/configuration/metric-providers.rst @@ -70,3 +70,10 @@ Supported Providers :link-type: doc VictoriaMetrics time-series database + + .. grid-item-card:: :octicon:`organization;1em;` Grafana Cloud (Mimir) + :class-card: sd-bg-light sd-bg-text-light + :link: metric-providers-grafana-cloud + :link-type: doc + + Grafana Cloud managed Prometheus (Mimir) diff --git a/docs/how-it-works/alert-builtin-enrichment.rst b/docs/how-it-works/alert-builtin-enrichment.rst deleted file mode 100644 index 32e0720aa..000000000 --- a/docs/how-it-works/alert-builtin-enrichment.rst +++ /dev/null @@ -1,77 +0,0 @@ -.. _builtin-alert-enrichment: - -Enhanced Prometheus Alerts -######################################## - -Robusta takes Prometheus to the next level by correlating alerts with other observability data. - -Robusta has two primary sources of alerts: - -* Prometheus alerts, forwarded by AlertManager to Robusta -* APIServer Alerts, generated by Robusta itself (e.g. for OOMKilled pods) - -Let's see each type of alert in action. - -.. Prerequisites -.. --------------- - -.. Either of the following: - -.. * :ref:`Robusta installed with embedded Prometheus ` -.. * :ref:`Robusta integrated with an external Prometheus ` - -Testing out Prometheus alerts -********************************* -1. Deploy a broken pod that will be stuck in pending state: - -.. code-block:: bash - :name: cb-apply-pendingpod - - kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/pending_pods/pending_pod_resources.yaml - -2. Trigger a Prometheus alert immediately, skipping the normal delays: - -.. code-block:: bash - :name: cb-trigger-prometheus-alert - - robusta playbooks trigger prometheus_alert alert_name=KubePodCrashLooping namespace=default pod_name=example-pod - -.. admonition:: Example Slack Message - - .. image:: /images/simulatedprometheusalert.png - - -Testing out APIServer alerts -********************************** - -Let's deploy a crashing pod: - -.. code-block:: bash - :name: cb-apply-crashpod-apiserver - - kubectl apply -f https://gist.githubusercontent.com/robusta-lab/283609047306dc1f05cf59806ade30b6/raw - -Verify that the pod is actually crashing: - -.. code-block:: console - :name: cb-verify-crash-pod-crashing - - $ kubectl get pods -A | grep crashpod - NAME READY STATUS RESTARTS AGE - crashpod-64d8fbfd-s2dvn 0/1 CrashLoopBackOff 1 7s - -Once the pod has reached two restarts, you'll get notified in Slack (or whatever alternative integration you configured): - -.. admonition:: Example Slack Message - - .. image:: /images/crash-report.png - - -Now open the `Robusta UI `_ and look for the same message there. - -Finally, clean up the crashing pod: - -.. code-block:: bash - :name: cb-delete-crashpod - - kubectl delete deployment crashpod diff --git a/docs/how-it-works/architecture.rst b/docs/how-it-works/architecture.rst index e5c0dba71..d4f461841 100644 --- a/docs/how-it-works/architecture.rst +++ b/docs/how-it-works/architecture.rst @@ -28,7 +28,7 @@ Data Flow 5. **Routing**: Enriched alerts are routed to configured sinks (Slack, Teams, etc.) based on routing rules Extended Architecture -^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^ **AI Analysis with HolmesGPT** Robusta's AI engine automatically investigates alerts by analyzing logs, events, and cluster state to provide root cause analysis and remediation suggestions. See :ref:`AI Analysis ` for configuration details. diff --git a/docs/index.rst b/docs/index.rst index bf2c623b4..249113936 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -2,54 +2,80 @@ .. toctree:: :maxdepth: 1 - :caption: 📖 Overview + :caption: Overview :hidden: self how-it-works/architecture how-it-works/oss-vs-saas how-it-works/usage-faq - how-it-works/alert-builtin-enrichment .. toctree:: :maxdepth: 4 - :caption: 🚀 Installation + :caption: Installation :hidden: setup-robusta/index .. toctree:: :maxdepth: 4 - :caption: 🚨 Alert Sources + :caption: AI Analysis + :hidden: + + configuration/holmesgpt/main-features + configuration/holmesgpt/getting-started + HolmesGPT Docs + +.. toctree:: + :maxdepth: 4 + :caption: Send Alerts :hidden: Overview - Prometheus & AlertManager + AlertManager - external + AlertManager - in-cluster + AWS Managed Prometheus + Azure Managed Prometheus + Coralogix + Dynatrace + Embedded Prometheus Stack + Google Managed Prometheus + Grafana - Self-Hosted + Grafana Cloud + Nagios New Relic - SolarWinds PagerDuty - Dynatrace + SolarWinds + VictoriaMetrics + Customize Labels & Priorities + Other - Custom Webhooks + +.. toctree:: + :maxdepth: 4 + :caption: Track Config Changes + :hidden: + + track-changes/kubernetes-changes LaunchDarkly - Nagios - Custom Webhooks .. toctree:: :maxdepth: 4 - :caption: 📊 Metric Providers + :caption: Connect Metrics :hidden: General Settings - In-cluster Prometheus - External Prometheus + Prometheus - in-cluster + Prometheus - external Azure Managed AWS Managed Google Managed Coralogix VictoriaMetrics + Grafana Cloud (Mimir) .. toctree:: :maxdepth: 4 - :caption: 🔔 Notification Sinks + :caption: Notification Sinks :hidden: Overview @@ -80,7 +106,7 @@ .. toctree:: :maxdepth: 4 - :caption: 🔀 Alert Routing + :caption: Alert Routing :hidden: Overview @@ -99,26 +125,7 @@ .. toctree:: :maxdepth: 4 - :caption: ⚙️ Automation - :hidden: - - playbook-reference/index - configuration/alertmanager-integration/alert-custom-prometheus - Cost Savings - KRR - K8s Misconfigurations - Popeye - -.. toctree:: - :maxdepth: 4 - :caption: 🤖 AI Analysis - :hidden: - - configuration/holmesgpt/main-features - configuration/holmesgpt/getting-started - HolmesGPT Docs - -.. toctree:: - :maxdepth: 4 - :caption: 💼 Robusta Pro Features + :caption: Robusta Pro Features :hidden: configuration/exporting/robusta-pro-features @@ -132,7 +139,18 @@ .. toctree:: :maxdepth: 4 - :caption: ❓ Help + :caption: Advanced - Playbooks + :hidden: + + playbook-reference/index + Builtin Alert Enrichment + Custom Alert Enrichment + Kubernetes Change Notifications + Cost Savings - KRR + +.. toctree:: + :maxdepth: 4 + :caption: Help :hidden: help @@ -142,29 +160,30 @@ Welcome to Robusta ==================== -Robusta transforms basic Prometheus alerts into actionable insights with full Kubernetes context, and magical automation. - -.. grid:: 1 1 1 2 - :margin: 0 - :padding: 0 - :gutter: 3 - - .. grid-item:: - - **How Robusta Improves Alerts:** +Robusta is an SRE agent that transforms alerts into actionable insights using LLMs combined with a rules (playbooks) engine. - * **Self-Healing** - Define auto-remediation rules for faster fixes - * **Smart Grouping** - Reduce notification spam - * **AI Investigation** - Kickstart alert investigation with AI - * **Alert Enrichment** - Pod logs, events and more alongside alerts +Robusta is available in open-source and commercial versions: +.. list-table:: + :widths: 30 35 35 + :header-rows: 1 - Connect to your existing Prometheus or install our all-in-one bundle (based on kube-prometheus-stack). Need to go beyond Kubernetes? `Try Robusta Pro `_. + * - **Version** + - **Cloud Environments** + - **Alert Sources** + * - `Robusta Open Source `_ + - Kubernetes + - Prometheus + * - `Robusta Pro `_ + - Kubernetes and non-Kubernetes environments + - Prometheus, DataDog, NewRelic, and more - .. grid-item:: +**Key Features:** - .. image:: /images/prometheus-alert-with-robusta.png - :width: 400px +* **Smart Grouping** - Reduce notification spam +* **AI Investigation** - Find the root cause with AI +* **Alert Enrichment** - Correlate alerts with logs, k8s events, and more +* **Auto-Remediation** - Define self-healing rules for faster fixes Ready to get started? --------------------- diff --git a/docs/notification-routing/notification-routing-examples.rst b/docs/notification-routing/notification-routing-examples.rst deleted file mode 100644 index 8338094d0..000000000 --- a/docs/notification-routing/notification-routing-examples.rst +++ /dev/null @@ -1,79 +0,0 @@ -:hide-toc: - -Routing Cookbook -=================================== - -.. toctree:: - :maxdepth: 1 - :hidden: - - routing-by-namespace - routing-by-type - implementing-monitoring-shifts - routing-to-multiple-slack-channels - routing-exclusion - routing-by-severity - excluding-resolved - disable-oomkill-notifications - -In this section you'll find example configurations for common routing patterns. - - -.. grid:: 1 1 2 3 - :gutter: 3 - - .. grid-item-card:: :octicon:`book;1em;` Routing by Namespace - :class-card: sd-bg-light sd-bg-text-light - :link: routing-by-namespace - :link-type: doc - - Route notifications based on Kubernetes namespaces. - - .. grid-item-card:: :octicon:`book;1em;` Routing by Alert Name - :class-card: sd-bg-light sd-bg-text-light - :link: routing-by-type - :link-type: doc - - Route notifications based on alert types. - - .. grid-item-card:: :octicon:`book;1em;` Route by Time of Day - :class-card: sd-bg-light sd-bg-text-light - :link: implementing-monitoring-shifts - :link-type: doc - - Implement monitoring shifts for better alert management. - - .. grid-item-card:: :octicon:`book;1em;` Routing to Multiple Slack Channels - :class-card: sd-bg-light sd-bg-text-light - :link: routing-to-multiple-slack-channels - :link-type: doc - - Send notifications to multiple Slack channels. - - .. grid-item-card:: :octicon:`book;1em;` Routing Exclusion - :class-card: sd-bg-light sd-bg-text-light - :link: routing-exclusion - :link-type: doc - - Exclude specific alerts from being routed. - - .. grid-item-card:: :octicon:`book;1em;` Dropping Specific Alerts - :class-card: sd-bg-light sd-bg-text-light - :link: routing-by-severity - :link-type: doc - - Route notifications based on alert severity. - - .. grid-item-card:: :octicon:`book;1em;` Excluding "Resolved" Notifications - :class-card: sd-bg-light sd-bg-text-light - :link: excluding-resolved - :link-type: doc - - Exclude resolved alerts from notifications. - - .. grid-item-card:: :octicon:`book;1em;` Disable "OOMKill" Notifications - :class-card: sd-bg-light sd-bg-text-light - :link: disable-oomkill-notifications - :link-type: doc - - Disable notifications for OOMKill events. \ No newline at end of file diff --git a/docs/notification-routing/routing-with-scopes.rst b/docs/notification-routing/routing-with-scopes.rst index c0d223900..b74ebd5c1 100644 --- a/docs/notification-routing/routing-with-scopes.rst +++ b/docs/notification-routing/routing-with-scopes.rst @@ -324,7 +324,7 @@ Here is the complete list of attributes that can be used in ``include`` / ``excl +---------------------+-----------------------------------------------------------+------------------------------------------+ | ``annotations`` | Same as Kubernetes selectors: a comma-separated list of | Can refer to both Kubernetes resource | | | ``key=val`` pairs with AND between them. e.g. | annotations and Prometheus alert | -| | ``app.kubernetes.io/name=prometheus``. Supports regex in | annotations. Prometheus values | +| | ``app.kubernetes.io/name=prometheus``. Supports regex in | annotations. Prometheus values | | | the value. | are prioritized when both exist. | +---------------------+-----------------------------------------------------------+------------------------------------------+ | ``namespace_labels``| Labels on the Kubernetes namespace containing this object.| Same matching syntax as ``labels``. For | diff --git a/docs/playbook-reference/actions/scans.rst b/docs/playbook-reference/actions/scans.rst index fede98412..691955a29 100644 --- a/docs/playbook-reference/actions/scans.rst +++ b/docs/playbook-reference/actions/scans.rst @@ -9,7 +9,4 @@ These actions can be triggered: * On demand, via the Robusta UI. * On demand, via :ref:`cli command `. -There are two built-in KRR and Popeye, you can find more details below. - -* :ref:`Cost Savings (KRR)` -* :ref:`Kubernetes Misconfigurations (Popeye)` +There are two built-in: KRR and Popeye. diff --git a/docs/playbook-reference/builtin-alert-enrichment.rst b/docs/playbook-reference/builtin-alert-enrichment.rst new file mode 100644 index 000000000..adbc038d1 --- /dev/null +++ b/docs/playbook-reference/builtin-alert-enrichment.rst @@ -0,0 +1,26 @@ +.. _builtin-alert-enrichment: + +Builtin Alert Enrichment +######################################## + +Robusta takes Prometheus to the next level by correlating alerts with other observability data. + +Testing out Prometheus alerts +********************************* +1. Deploy a broken pod that will be stuck in pending state: + +.. code-block:: bash + :name: cb-apply-pendingpod + + kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/pending_pods/pending_pod_resources.yaml + +2. Trigger a Prometheus alert immediately, skipping the normal delays: + +.. code-block:: bash + :name: cb-trigger-prometheus-alert + + robusta playbooks trigger prometheus_alert alert_name=KubePodCrashLooping namespace=default pod_name=example-pod + +.. admonition:: Example Slack Message + + .. image:: /images/simulatedprometheusalert.png diff --git a/docs/playbook-reference/defining-playbooks/builtin-playbooks.rst b/docs/playbook-reference/defining-playbooks/builtin-playbooks.rst index 4d413a821..f26d6c100 100644 --- a/docs/playbook-reference/defining-playbooks/builtin-playbooks.rst +++ b/docs/playbook-reference/defining-playbooks/builtin-playbooks.rst @@ -25,7 +25,7 @@ The following default playbook handles all Prometheus alerts that Robusta receiv There are additional enrichments for specific alerts. For example: -To define additional playbooks for your own alerts, refer to the :ref:`Enhanced Prometheus Alerts` tutorial. +To define additional playbooks for your own alerts, refer to the :doc:`Custom Alert Enrichment ` guide. Default Prometheus Silencing -------------------------------- @@ -34,4 +34,4 @@ Robusta uses *silencer* actions to flag false positive alerts and prevent them f Silencers are just regular actions that call a special silencing API in their code. -To define your own silencers, refer to the :ref:`Silencing Prometheus Alerts` tutorial. +To define your own silencers, refer to the playbook actions documentation. diff --git a/docs/playbook-reference/defining-playbooks/playbook-basics.rst b/docs/playbook-reference/defining-playbooks/playbook-basics.rst index 4e925bb06..cd839c190 100644 --- a/docs/playbook-reference/defining-playbooks/playbook-basics.rst +++ b/docs/playbook-reference/defining-playbooks/playbook-basics.rst @@ -5,7 +5,7 @@ Playbook Basics A playbook is an automation rule for detecting, investigating, or fixing problems in your cluster. -For a gentle introduction, see :ref:`What are Playbooks?` +For a gentle introduction, see :doc:`Playbook Overview ` Overview ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/playbook-reference/index.rst b/docs/playbook-reference/index.rst index 702ece5c0..232b8caf6 100644 --- a/docs/playbook-reference/index.rst +++ b/docs/playbook-reference/index.rst @@ -5,11 +5,12 @@ :hidden: overview - what-are-playbooks - defining-playbooks/index + Playbook Basics + Creating Notifications + Advanced Playbook Techniques + Matching Actions to Triggers + Loading External Actions ⚡️ Triggers 💥 Actions automatic-remediation-examples/index - prometheus-examples/index - kubernetes-examples/index Log Based Alerting diff --git a/docs/playbook-reference/kubernetes-examples/index.rst b/docs/playbook-reference/kubernetes-examples/index.rst deleted file mode 100644 index 04a6a9d1f..000000000 --- a/docs/playbook-reference/kubernetes-examples/index.rst +++ /dev/null @@ -1,19 +0,0 @@ -:hide-toc: - -Kubernetes Change Tracking -========================================== - -Robusta allows you to track and respond to Kubernetes changes. - -You can use this to get notified about problems in your cluster without relying on Prometheus metrics and defining complex PromQL alerts. -Robusta listens to the API Server directly and triggers playbooks when changes occur. - -Another use case is **automatic remediation** of Kubernetes problems - Refer to :ref:`Remediation` for more information. - -.. toctree:: - :maxdepth: 1 - - playbook-failed-liveness - playbook-job-failure - playbook-track-changes - playbook-track-secrets diff --git a/docs/playbook-reference/kubernetes-examples/kubernetes-change-notifications.rst b/docs/playbook-reference/kubernetes-examples/kubernetes-change-notifications.rst new file mode 100644 index 000000000..e4673df99 --- /dev/null +++ b/docs/playbook-reference/kubernetes-examples/kubernetes-change-notifications.rst @@ -0,0 +1,364 @@ +Kubernetes Change Notifications +################################ + +You can configure Robusta to send push notifications when Kubernetes resources change or become unhealthy. This is done by listening to API Server changes with `kubewatch `_ and then filtering the stream of events in a Robusta playbook. + +Notifications are sent to configured :ref:`Sinks ` like Slack or MSTeams. You can also :ref:`route notifications to specific sinks`. + +Pod Health Tracking +=================== + +Failed Liveness Probes +---------------------- + +Get notified when liveness probes fail. + +Add the following YAML to the ``customPlaybooks`` Helm value: + +.. code-block:: yaml + + customPlaybooks: + - triggers: + - on_kubernetes_warning_event_create: + include: ["Liveness"] + actions: + - create_finding: + aggregation_key: "Failed Liveness Probe" + severity: HIGH + title: "Failed liveness probe: $name" + - event_resource_events: {} + +Then do a :ref:`Helm Upgrade `. + +.. details:: Testing + + Apply the following command to create a failing liveness probe: + + .. code-block:: bash + + kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/liveness_probe_fail/failing_liveness_probe.yaml + + You should get a notification in your configured sink. + + .. image:: /images/failedlivenessprobe.png + :alt: Failed liveness probe notification on Slack + :align: center + +.. details:: How it Works + + This playbook uses the :ref:`on_kubernetes_warning_event_create` trigger, which fires for Liveness probe failures in your cluster. + + It uses the :ref:`create_finding ` action to generate a notification message, and :ref:`event_resource_events ` action to gather all other events on the same resource in the near past. + + +Job Tracking +============ + +Failed Kubernetes Jobs +----------------------- + +Get notified about failed Kubernetes Jobs. + +.. image:: /images/failingjobs.png + :alt: Failing Kubernetes jobs notification on Slack + :align: center + +.. admonition:: Avoid Duplicate Alerts + + If you installed Robusta with the embedded Prometheus stack, you don't need to configure this playbook. It's configured by default. + +Add the following YAML to the ``customPlaybooks`` Helm value: + +.. code-block:: yaml + + customPlaybooks: + - triggers: + - on_job_failure: {} + actions: + - create_finding: + title: "Job Failed" + aggregation_key: "JobFailure" + - job_info_enricher: {} + - job_events_enricher: {} + - job_pod_enricher: {} + +Then do a :ref:`Helm Upgrade `. + +.. details:: Testing + + Deploy a failing job. The job will fail after 60 seconds, then attempt to run again. After two attempts, it will fail for good. + + .. code-block:: bash + + kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/job_failure/job_crash.yaml + +.. details:: How it Works + + * :ref:`on_job_failure` fires once for each failed Kubernetes Job + * :ref:`create_finding` generates a notification message + * :ref:`job_info_enricher` fetches the Jobs status and information + * :ref:`job_events_enricher` runs ``kubectl get events``, finds Events related to the Job, and attaches them + * :ref:`job_pod_enricher` finds Pods that were part of the Job. It attaches Pod-level information like Pod logs + + + +Workload Change Tracking +========================= + +Track changes to Deployments and other workload resources. You can filter specific YAML fields to avoid noise - for example, ignoring ``spec.replicas`` changes from autoscaling. + +Deployment Image Changes +------------------------ + +Get notified when a Deployment strategy or container details change. + +Add the following YAML to the ``customPlaybooks`` Helm value: + +.. code-block:: yaml + + customPlaybooks: + - triggers: + - on_deployment_update: + change_filters: + ignore: + - status + - metadata.generation + - metadata.resourceVersion + - metadata.managedFields + - spec.replicas + include: + - spec.template.spec.containers[0] + - spec.strategy + actions: + - resource_babysitter: {} + - customise_finding: + severity: HIGH + title: "New changes in $kind/$namespace/$name" + sinks: + - some_sink_name # Optional + +Then perform a :ref:`Helm Upgrade `. + +**Note**: You can also use :ref:`Sink Matchers` to route notifications instead of explicitly specifying a sink in the playbook. + +.. details:: Testing + + Modify the image of a deployment in your cluster. + + Run the following YAML files to simulate a deployment image change: + + .. code-block:: bash + + kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/deployment_image_change/before_image_change.yaml + kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/deployment_image_change/after_image_change.yaml + + A Robusta notification will arrive in your configured :ref:`sinks `, showing exactly what changed in the deployment. + + .. image:: /images/deployment-image-change.png + :width: 600 + :align: center + +.. details:: How it Works + + 1. The ``on_deployment_update`` trigger monitors deployment changes + 2. ``change_filters`` specify which fields to monitor, ignoring noisy fields like ``spec.replicas`` that change due to autoscaling + 3. Optionally route notifications to specific sinks + + +Deployment Manifest on Image Change +------------------------------------ + +Get the full Deployment manifest sent to a webhook each time the image changes. + +Add the following YAML to the ``customPlaybooks`` Helm value: + +.. code-block:: yaml + + customPlaybooks: + - triggers: + - on_deployment_update: + change_filters: + include: + - image + actions: + - json_change_tracker: + url: "https://SOME-WEBHOOK-URL" + +This playbook doesn't use a Sink - it sends the manifest as JSON to the webhook URL specified in the action parameters. + +Then perform a :ref:`Helm Upgrade `. + +.. details:: Testing + + Modify a Deployment image in your cluster. + + A notification with the Deployment manifest, as JSON, should be sent to the webhook URL. + + +Network Resource Tracking +========================== + +Ingress Changes +--------------- + +Get notified when Ingress rules or TLS details change. + +Add the following YAML to the ``customPlaybooks`` Helm value: + +.. code-block:: yaml + + customPlaybooks: + - triggers: + - on_ingress_all_changes: + change_filters: + ignore: + - status + - metadata.generation + - metadata.resourceVersion + - metadata.managedFields + - spec.replicas + include: + - spec.rules + - spec.tls + actions: + - resource_babysitter: {} + sinks: + - some_sink_name # Optional + +Then perform a :ref:`Helm Upgrade `. + +**Note**: You can also use :ref:`Sink Matchers` to route notifications instead of explicitly specifying a sink in the playbook. + +.. details:: Testing + + Create, modify, or delete an ingress in your cluster. + + Run the following commands to simulate ingress changes: + + .. code-block:: bash + + kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/ingress_port_path_change/before_port_path_change.yaml + kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/ingress_port_path_change/after_port_path_change.yaml + + A Robusta notification will arrive in your configured :ref:`sinks `, showing exactly what changed in the ingress. + + .. image:: /images/ingress-image-change.png + :width: 600 + :align: center + +.. details:: How it Works + + 1. The ``on_ingress_all_changes`` trigger monitors all ingress changes + 2. ``change_filters`` specify to only notify on ``spec.rules`` and ``spec.tls`` changes + 3. Optionally route notifications to specific sinks + + +Secret Tracking +=============== + +.. _track-secrets-overview: + +Track Kubernetes Secret Changes +-------------------------------- + +By default Robusta is not configured to track secret changes. To enable secret tracking, you need to grant permissions to Robusta and configure kubewatch. + +**Setup Steps**: + +1. **Grant Permissions to Robusta**: By default, Robusta does not have permission to read Secrets +2. **Configure Kubewatch**: Set up Kubewatch to monitor Secret resources +3. **Create Custom Playbook**: Define notification rules + +**1. Grant Permissions to Robusta** + +Create a YAML file named ``kubewatch-secret-permissions.yaml`` with the following content: + +.. code-block:: yaml + + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRole + metadata: + namespace: your-namespace + name: read-secrets-role + rules: + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "list", "watch"] + --- + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRoleBinding + metadata: + name: read-secrets-role-binding + subjects: + - kind: ServiceAccount + name: robusta-forwarder-service-account + namespace: your-namespace + roleRef: + kind: ClusterRole + name: read-secrets-role + apiGroup: rbac.authorization.k8s.io + +Apply the permissions: + +.. code-block:: bash + + kubectl apply -f kubewatch-secret-permissions.yaml + +**2. Configure Kubewatch to Monitor Secrets** + +Add the following to the ``kubewatch`` section in your ``generated_values.yaml``: + +.. code-block:: yaml + + kubewatch: + config: + namespace: your-namespace + resource: + secret: true + +**3. Create Custom Playbook** + +Add the following to the ``customPlaybooks`` section in your ``generated_values.yaml``: + +.. code-block:: yaml + + customPlaybooks: + - triggers: + - on_secret_all_changes: {} + actions: + - create_finding: + title: "Secret $name in namespace $namespace was changed" + aggregation_key: SecretModified + +Then perform a :ref:`Helm Upgrade `. + +**Note**: You can also use :ref:`Sink Matchers` to route notifications instead of explicitly specifying a sink in the playbook. + +.. details:: Testing + + 1. **Create a Test Secret**: + + .. code-block:: bash + + kubectl create secret generic test-secret --from-literal=key1=value1 + + 2. **Modify the Secret**: + + .. code-block:: bash + + kubectl patch secret test-secret -p '{"stringData":{"key1":"newvalue"}}' + + 3. **Delete the Secret**: + + .. code-block:: bash + + kubectl delete secret test-secret + + A Robusta notification will arrive in your configured :ref:`sinks `, indicating that the Secret was created, modified, or deleted. + +.. details:: How it Works + + 1. **Grant Permissions**: The RBAC YAML grants Robusta the necessary permissions to read Secrets + 2. **Configure Kubewatch**: The ``kubewatch`` configuration tells Robusta to monitor Secret resources + 3. **Set Up the Trigger**: The ``on_secret_all_changes`` trigger ensures you'll receive notifications for all Secret changes + 4. **Create the Notification**: The ``create_finding`` action generates a notification with a custom title diff --git a/docs/playbook-reference/kubernetes-examples/playbook-failed-liveness.rst b/docs/playbook-reference/kubernetes-examples/playbook-failed-liveness.rst deleted file mode 100644 index b1b6c6952..000000000 --- a/docs/playbook-reference/kubernetes-examples/playbook-failed-liveness.rst +++ /dev/null @@ -1,52 +0,0 @@ -Track Failed Liveness Probes -############################## - -Lets track failed Liveness Probes and notify the user. Notifications will be sent to configured :ref:`Sinks ` -like Slack or MSTeams. It is also possible to :ref:`route notifications to specific sinks`. - -Define a Playbook to Track Liveness Probes ------------------------------------------------------ - -Add the following YAML to the ``customPlaybooks`` Helm value: - -.. code-block:: yaml - - customPlaybooks: - - triggers: - - on_kubernetes_warning_event_create: - include: ["Liveness"] # fires on failed Liveness probes - actions: - - create_finding: - aggregation_key: "Failed Liveness Probe" - severity: HIGH - title: "Failed liveness probe: $name" - - event_resource_events: {} - -Then do a :ref:`Helm Upgrade `. - -Testing Your Playbook ------------------------------------------- - -Apply the following command the create a failing liveness probe. - -.. code-block:: yaml - - kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/liveness_probe_fail/failing_liveness_probe.yaml - - -You should get a notification in your configured sink. - -.. details:: Example Slack Notification - - .. image:: /images/failedlivenessprobe.png - :alt: Failed liveness probe notification on Slack - :align: center - -How it Works -------------- - -This playbook uses the :ref:`on_kubernetes_warning_event_create` trigger, which fires for Liveness probe failures in your cluster. - -It uses the :ref:`create_finding ` action to generate a notification message, and :ref:`event_resource_events ` action to gather all other events on the same resource in the near past. - -.. improve based on comments at https://github.com/robusta-dev/robusta/issues/799#event-8873234835 diff --git a/docs/playbook-reference/kubernetes-examples/playbook-job-failure.rst b/docs/playbook-reference/kubernetes-examples/playbook-job-failure.rst deleted file mode 100644 index 46e02fe4e..000000000 --- a/docs/playbook-reference/kubernetes-examples/playbook-job-failure.rst +++ /dev/null @@ -1,55 +0,0 @@ -Track Failed Kubernetes Jobs -############################## - -Notify about failed Kubernetes Jobs in Slack, MSTeams or other :ref:`Sinks `. - -.. image:: /images/failingjobs.png - :alt: Failing Kubernetes jobs notification on Slack - :align: center - -.. admonition:: Avoid Duplicate Alerts - - If you installed Robusta with the embedded Prometheus stack, you don't need to configure this playbook. It's configured by default. - -Defining a Playbook to Track Failed Jobs ------------------------------------------- - -Add the following YAML to the ``customPlaybooks`` Helm value: - -.. code-block:: yaml - - customPlaybooks: - - triggers: - - on_job_failure: {} # (1) - actions: - - create_finding: # (2) - title: "Job Failed" - aggregation_key: "JobFailure" - - job_info_enricher: {} # (3) - - job_events_enricher: {} # (4) - - job_pod_enricher: {} # (5) - -.. code-annotations:: - 1. :ref:`on_job_failure` fires once for each failed Kubernetes Job - 2. :ref:`create_finding` generates a notification message - 3. :ref:`job_info_enricher` fetches the Jobs status and information - 4. :ref:`job_events_enricher` runs ``kubectl get events``, finds Events related to the Job, and attaches them - 5. :ref:`job_pod_enricher` finds Pods that were part of the Job. It attaches Pod-level information like Pod logs - -Then do a :ref:`Helm Upgrade `. - -Testing Your Playbook ------------------------------------------- - -Deploy a failing job. The job will fail after 60 seconds, then attempt to run again. After two attempts, it will fail for good. - -.. code-block:: yaml - - kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/job_failure/job_crash.yaml - -Tips and Tricks ----------------- - -Route failed Jobs to specific Slack channels -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Refer to :ref:`docs on notification routing`. diff --git a/docs/playbook-reference/kubernetes-examples/playbook-track-changes.rst b/docs/playbook-reference/kubernetes-examples/playbook-track-changes.rst deleted file mode 100644 index 5d503bfe2..000000000 --- a/docs/playbook-reference/kubernetes-examples/playbook-track-changes.rst +++ /dev/null @@ -1,244 +0,0 @@ -Track Kubernetes Changes -############################################ - -Robusta lets you get notifications when Kubernetes resources are updated. Users can set up personalized notifications for any Deployment, ReplicaSet, or other resource, ensuring you get notified when new versions are rolled out or other engineers change something important in the cluster. This feature is especially useful for various roles: - -* **DevOps and Platform Teams** can track all changes to Ingresses and other sensitive cluster resources. -* **Developers** can receive notifications each time their application is deployed to production. -* **Security and DevSecOps** professionals can track changes to ClusterRoles or ServiceAccounts. - -How to Track Changes in Kubernetes Resources ------------------------------------------------- -1. **Create Custom Playbook**: Start by defining a personalized template that specifies when you should be notified and what data you'd like to see. This is your "custom playbook." -2. **Select Kubernetes Object**: In your custom playbook, specify which Kubernetes resource you want to monitor, such as Deployment or ReplicaSet. -3. **Filter YAML Fields**: To avoid unnecessary notifications, select specific YAML field. For example, when tracking an autoscaled Deployment, you can filter out notifications related to `Deployment.spec.replicas`, as this field is automatically updated by the Horizontal Pod Autoscaler (HPA) regularly. -4. **Set Up Change Detection**: Configure your playbook to send a 'diff' that shows exactly what changed in the selected Kubernetes object. -5. **Route Alerts (Optional)**: If needed, direct these change notifications to specific destinations, also known as 'Sinks', by adding this information to your custom playbook. - -Kubernetes Change Tracking Use Cases ------------------------------------------ -Let's explore practical use cases for Kubernetes change tracking. - - -Use Case 1: Notification on Deployment Image Change -******************************************************* -**Scenario**: You want to be notified when a Deployment strategy or container details are changed. - -**Implementation**: - -Add the following YAML to the ``customPlaybooks`` Helm value: - -.. code-block:: yaml - - customPlaybooks: - - triggers: - - on_deployment_update: - change_filters: - ignore: # These are ignored by default - - status - - metadata.generation - - metadata.resourceVersion - - metadata.managedFields - - spec.replicas - include: - - spec.template.spec.containers[0] - - spec.strategy - actions: - - resource_babysitter: {} - - customise_finding: - severity: HIGH - title: "New changes in $kind/$namespace/$name" - sinks: - - some_sink_name # Optional - -.. details:: How does it work? - - 1. **Initialize Custom Playbook**: Create a custom playbook where you'll outline the rules for when and how you'll be notified. - 2. **Set Up the Deployment Trigger**: In your custom playbook, add the ``on_deployment_update`` trigger. This ensures you'll receive notifications for deployment changes. - 3. **Specify Fields to Monitor**: Add change_filters to your ``on_deployment_update`` trigger to filter which changes you will be notified for. - 4. **Route Notifications (Optional)**: Optionally, specify in your playbook where these notifications should be sent by defining 'sinks'. - -Then perform a :ref:`Helm Upgrade `. - -**Note**: You can also use the :ref:`Sink Matchers` to route notifications instead of explicitly specifying a sink in the playbook. - - -**Testing**: - -Modify the image of a deployment in your cluster. - -Run the following YAML files to simulate a deployment image change - -.. code-block:: yaml - - kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/deployment_image_change/before_image_change.yaml - kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/deployment_image_change/after_image_change.yaml - -A Robusta notification will arrive in your configured :ref:`sinks `, showing exactly what changed in the deployment. - -**Sample Alert**: - -.. image:: /images/deployment-image-change.png - :width: 600 - :align: center - - -Use Case 2: Notification on Ingress Rules Change -***************************************************************** -**Scenario**: You want to be notified when an Ingress rules or tls details are changed. - -**Implementation**: - -Add the following YAML to the ``customPlaybooks`` Helm value: - -.. code-block:: yaml - - customPlaybooks: - - triggers: - - on_ingress_all_changes: - change_filters: - ignore: - - status - - metadata.generation - - metadata.resourceVersion - - metadata.managedFields - - spec.replicas - include: - - spec.rules - - spec.tls - actions: - - resource_babysitter: {} - sinks: - - some_sink_name # Optional - -.. details:: How does it work? - - 1. **Initialize Custom Playbook**: Create a custom playbook where you'll outline the rules for when and how you'll be notified. - 2. **Set Up the Ingress Trigger**: In your custom playbook, add the ``on_ingress_all_changes`` trigger. This ensures you'll receive notifications for all ingress changes. - 3. **Specify Fields to Monitor**: Add change_filters to your ``on_ingress_all_changes`` trigger to filter which changes you will be notified for. - 4. **Route Notifications (Optional)**: Optionally, specify in your playbook where these notifications should be sent by defining 'sinks'. - -Then perform a :ref:`Helm Upgrade `. - -**Note**: You can also use the :ref:`Sink Matchers` to route notifications instead of explicitly specifying a sink in the playbook. - -**Testing**: - -Create, modify, or delete an ingress in your cluster. - -Run the following commands to simulate ingress changes: - -.. code-block:: yaml - - kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/ingress_port_path_change/before_port_path_change.yaml - kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/ingress_port_path_change/after_port_path_change.yaml - -A Robusta notification will arrive in your configured :ref:`sinks `, showing exactly what changed in the ingress. - -**Sample Alert**: - -.. image:: /images/ingress-image-change.png - :width: 600 - :align: center - -Use Case 3: Notification when a Deployment image change - including the Deployment manifest -******************************************************************************************** -**Scenario**: You want to get the Deployment manifest, each time the image changes - -**Implementation**: - -Add the following YAML to the ``customPlaybooks`` Helm value: - -.. code-block:: yaml - - customPlaybooks: - - triggers: - - on_deployment_update: - change_filters: - include: - - image - actions: - - json_change_tracker: - url: "https://SOME-WEBHOOL-URL" - -.. details:: How does it work? - - 1. **Initialize Custom Playbook**: Create a custom playbook where you'll outline the rules for when and how you'll be notified. - 2. **Set Up the Deployment Trigger**: In your custom playbook, add the ``on_deployment_change`` trigger, with a ``scope`` including only image changes. This ensures you'll receive notifications for deployment image changes. - -This playbook doesn't use a Sink! It sends the to the url specified in the action parameters. - -Then perform a :ref:`Helm Upgrade `. - -**Testing**: - -Modify a Deployment image in your cluster. - -A notification with the Deployment manifest, as json, should be sent to the webhook url - - - -Cleanup ------------------------------- -Remove the playbook you added based on your specific use case from the ``customPlaybooks`` in your ``generated_values.yaml`` file. Then, perform a :ref:`Helm Upgrade `. - -.. Use Case 2: Notification on Kubernetes Job Failure -.. ******************************************************* -.. **Scenario**: You want to be notified when a Kubernetes job is failed. - -.. .. admonition:: Avoid Duplicate Alerts - -.. If you installed Robusta with the embedded Prometheus stack, you don't need to configure this playbook. It's configured by default. - - -.. **Implementation**: - -.. Add the following YAML to the ``customPlaybooks`` Helm value: - -.. .. code-block:: yaml - -.. customPlaybooks: -.. - triggers: -.. - on_job_failure: {} # (1) -.. actions: -.. - create_finding: # (2) -.. title: "Job Failed" -.. aggregation_key: "JobFailure" -.. - job_info_enricher: {} # (3) -.. - job_events_enricher: {} # (4) -.. - job_pod_enricher: {} # (5) -.. sinks: -.. - some_sink_name - -.. 1. :ref:`on_job_failure` fires once for each failed Kubernetes Job -.. 2. :ref:`create_finding` generates a notification message -.. 3. :ref:`job_info_enricher` fetches the Jobs status and information -.. 4. :ref:`job_events_enricher` runs ``kubectl get events``, finds Events related to the Job, and attaches them -.. 5. :ref:`job_pod_enricher` finds Pods that were part of the Job. It attaches Pod-level information like Pod logs - -.. .. details:: How does it work? - -.. 1. **Initialize Custom Playbook**: Create a custom playbook where you'll define the rules for when and how you'll be notified. -.. 2. **Set Up the Failure Trigger**: In your custom playbook, add the `on_job_failure` trigger. This will notify you specifically when a job fails. -.. 3. **Configure Notification Creation**: Within the same playbook, use the `create_finding` action and set the title to `Job Failed`. This will generate the actual notification. -.. 4. **Include Additional Information**: Add `job_info_enricher`, `job_events_enricher`, and `job_pod_enricher` to your playbook. These gather more details that will accompany your notification. -.. 5. **Route Notifications (Optional)**: If desired, specify in your playbook where to send these notifications by adding 'sinks'. - - -.. Then do a :ref:`Helm Upgrade `. - -.. **Note**: You can also use the :ref:`Sink Matchers` to route notifications instead of explicitly specifying a sink in the playbook. - -.. **Testing**: -.. Deploy a failing job. The job will fail after 60 seconds, then attempt to run again. After two attempts, it will fail for good. - -.. .. code-block:: yaml - -.. kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/job_failure/job_crash.yaml - - -.. **Sample Alert**: - -.. .. image:: /images/failingjobs.png -.. :alt: Failing Kubernetes jobs notification on Slack -.. :align: center diff --git a/docs/playbook-reference/kubernetes-examples/playbook-track-secrets.rst b/docs/playbook-reference/kubernetes-examples/playbook-track-secrets.rst deleted file mode 100644 index 875fc3e6b..000000000 --- a/docs/playbook-reference/kubernetes-examples/playbook-track-secrets.rst +++ /dev/null @@ -1,141 +0,0 @@ -.. _track-secrets-overview: - -Track Kubernetes Secret Changes -############################################ - -By default Robusta is not configured to track secret changes, but it is possible to configure it -by giving permissions to Robusta to read secrets and configuring kubewatch. - -How to Track Changes in Kubernetes Secrets ------------------------------------------------- - -1. **Grant Permissions to Robusta**: By default, Robusta does not have permission to read Secrets. You'll need to grant it the necessary permissions. -2. **Configure Kubewatch**: Set up Kubewatch to monitor Secret resources. -3. **Create Custom Playbook**: Define a playbook that specifies when you should be notified and what data you'd like to see. -4. **Route Alerts (Optional)**: If needed, direct these notifications to specific destinations, also known as 'Sinks', by adding this information to your custom playbook. - -Updating Configurations to track Secret Changes -******************************************************* -**Scenario**: You want to be notified whenever a Secret in your cluster is created, updated, or deleted. - -**Implementation**: - -Add the following configurations to your `generated_values.yaml` file and apply the necessary permissions. - -**1. Grant Permissions to Robusta** - -Create a YAML file named `kubewatch-secret-permissions.yaml` with the following content: - -.. code-block:: yaml - - apiVersion: rbac.authorization.k8s.io/v1 - kind: ClusterRole - metadata: - namespace: your-namespace - name: read-secrets-role - rules: - - apiGroups: [""] - resources: ["secrets"] - verbs: ["get", "list", "watch"] - --- - apiVersion: rbac.authorization.k8s.io/v1 - kind: ClusterRoleBinding - metadata: - name: read-secrets-role-binding - subjects: - - kind: ServiceAccount - name: robusta-forwarder-service-account - namespace: your-namespace - roleRef: - kind: ClusterRole - name: read-secrets-role - apiGroup: rbac.authorization.k8s.io - -Apply the permissions: - -.. code-block:: shell - - kubectl apply -f kubewatch-secret-permissions.yaml - -**2. Configure Kubewatch to Monitor Secrets** - -Add the following to the `kubewatch` section in your `generated_values.yaml`: - -.. code-block:: yaml - - kubewatch: - config: - namespace: your-namespace - resource: - secret: true - -**3. Create Custom Playbook** - -Add the following to the `customPlaybooks` section in your `generated_values.yaml`: - -.. code-block:: yaml - - customPlaybooks: - - triggers: - - on_secret_all_changes: {} - actions: - - create_finding: - title: "Secret $name in namespace $namespace was changed" - aggregation_key: SecretModified - -.. details:: How does it work? - - 1. **Grant Permissions**: The first YAML grants Robusta the necessary permissions to read Secrets. - 2. **Configure Kubewatch**: The `kubewatch` configuration tells Robusta to monitor Secret resources. - 3. **Set Up the Trigger**: The `on_secret_all_changes` trigger ensures you'll receive notifications for all Secret changes. - 4. **Create the Notification**: The `create_finding` action generates a notification with a custom title. - -Then perform a :ref:`Helm Upgrade `. - -**Note**: You can also use the :ref:`Sink Matchers` to route notifications instead of explicitly specifying a sink in the playbook. - -**Testing**: - -1. **Create a Test Secret**: - - .. code-block:: shell - - kubectl create secret generic test-secret --from-literal=key1=value1 - -2. **Modify the Secret**: - - .. code-block:: shell - - kubectl patch secret test-secret -p '{"stringData":{"key1":"newvalue"}}' - -3. **Delete the Secret**: - - .. code-block:: shell - - kubectl delete secret test-secret - -A Robusta notification will arrive in your configured :ref:`sinks `, indicating that the Secret was created, modified, or deleted. - - -Cleanup ------------------------------- - -To stop monitoring Secret changes: - -1. Remove the playbook you added from the `customPlaybooks` in your `generated_values.yaml` file. -2. Remove the Secret monitoring configuration: - - .. code-block:: yaml - - kubewatch: - config: - resource: - secret: false - -3. Delete the permissions: - - .. code-block:: shell - - kubectl delete -f kubewatch-secret-permissions.yaml - -Then, perform a :ref:`Helm Upgrade `. diff --git a/docs/playbook-reference/overview.rst b/docs/playbook-reference/overview.rst index 5499f6567..711214089 100644 --- a/docs/playbook-reference/overview.rst +++ b/docs/playbook-reference/overview.rst @@ -2,27 +2,11 @@ Overview =========== -With Robusta you can respond to alerts faster and more reliably, by using automation. This is done by defining "playbooks" or automation workflows. +Playbooks are deterministic rules for responding to alerts and unhealthy conditions in a Kubernetes cluster. -Automation for Prometheus Alerts ------------------------------------ +Playbooks are recommended for advanced use cases. Most users should start with :doc:`AI Analysis ` of alerts first, which requires far less configuration. -Robusta can automate the response to Prometheus alerts. Here are common automation types: - -* Enrich - add missing context to alerts -* Remediate - automatically fix issues -* Silence - suppress alerts based on complex conditions to reduce noise - -Automation for Kubernetes Changes --------------------------------------------------- - -You can also automate the response to Kubernetes resource changes: - -* Create Finding - generate a notification for a change of interest -* Enrich - add context to notifications generated by "Create Finding" -* Remediate - run a Job in response to a Kubernetes event or change - -How Automations Work +How Playbooks Work --------------------- Automations in Robusta are called playbooks and they are defined in YAML in your Robusta Helm values. diff --git a/docs/playbook-reference/prometheus-examples/index.rst b/docs/playbook-reference/prometheus-examples/index.rst index 85cbe1795..55b7e856a 100644 --- a/docs/playbook-reference/prometheus-examples/index.rst +++ b/docs/playbook-reference/prometheus-examples/index.rst @@ -1,11 +1,15 @@ :hide-toc: -Prometheus Alert Enrichment +Custom Alert Enrichment ============================== -Ever feel overwhelmed by Prometheus alerts that lack context? In this section, you will learn to enrich alerts with critical information using Robusta. +Ever feel overwhelmed by Prometheus alerts that lack context? In this section, you will learn to enrich alerts with deterministic rules using Robusta. -By enriching alerts, you can: +.. note:: + + **Looking for automatic AI enrichment?** Check out :doc:`HolmesGPT ` for zero-configuration AI-powered alert enrichment that automatically investigates alerts and provides root cause analysis. + +By creating custom enrichment rules, you can: * Reduce mean time to resolution (MTTR) by automatically gathering system state and logs when alerts fire * Make faster decisions on which team needs to investigate the alert diff --git a/docs/playbook-reference/triggers/kubernetes.rst b/docs/playbook-reference/triggers/kubernetes.rst index 0c12035ec..2e5753095 100644 --- a/docs/playbook-reference/triggers/kubernetes.rst +++ b/docs/playbook-reference/triggers/kubernetes.rst @@ -9,9 +9,7 @@ These triggers work even when Prometheus is not connected to Robusta. They're tr .. details:: Related Tutorials - * :ref:`Track Failed Kubernetes Jobs` - * :ref:`Track Failed Liveness Probes` - * :ref:`Track Kubernetes Changes` + * :doc:`Kubernetes Change Notifications ` Crashing Pod Triggers @@ -81,7 +79,7 @@ The following triggers are available for crashing Pods: * If A `name` is defined without a `namespace` than all containers with that name prefix will be ignored for this trigger. * If A `namespace` is defined without a `name` than all containers in that namespace will be ignored for this trigger. - An example playbook using :ref:`oomkilled_container_graph_enricher`: + An example playbook using ``oomkilled_container_graph_enricher``: .. code-block:: yaml diff --git a/docs/playbook-reference/triggers/prometheus.rst b/docs/playbook-reference/triggers/prometheus.rst index 12d900eed..a58cc62fe 100644 --- a/docs/playbook-reference/triggers/prometheus.rst +++ b/docs/playbook-reference/triggers/prometheus.rst @@ -1,12 +1,11 @@ Prometheus and AlertManager ############################# -Robusta can :ref:`improve your existing Prometheus alerts `. It can also execute -:ref:`Remediation Actions ` in response to alerts. +Robusta can improve your existing Prometheus alerts. It can also execute automated remediation actions in response to alerts. Prerequisites --------------- -AlertManager must be connected to Robusta. Refer to :ref:`Integrating AlertManager and Prometheus`. +AlertManager must be connected to Robusta. Refer to the :doc:`Send Alerts ` documentation. Triggers ----------- diff --git a/docs/playbook-reference/what-are-playbooks.rst b/docs/playbook-reference/what-are-playbooks.rst deleted file mode 100644 index 86b5e3592..000000000 --- a/docs/playbook-reference/what-are-playbooks.rst +++ /dev/null @@ -1,112 +0,0 @@ -.. _how-it-works-index: - -What are Playbooks? -================================================= - -The `Robusta Open Source `_ is a rules-engine for Kubernetes, designed for monitoring and observability use cases. - -In Robusta, rules are called *playbooks*. Every playbook consists of a *trigger* (e.g. a Crashing Pod, a Prometheus Alert, or some other condition) and one or -more *actions*. Actions can enrich alerts, silence them, or remediate problems. - -Conceptually, Robusta does three things: - -1. **Listens passively to various sources:** Robusta monitors Kubernetes events, Prometheus alerts, and other sources to stay informed about your cluster's current state. -2. **Actively collects observability data:** When noteworthy events occur, Robusta actively gathers and correlates information such as logs, graphs, and thread dumps. All according to the playbooks defined in Robusta. -3. **Sends notifications:** Based on your preferences, Robusta notifies in :ref:`sinks ` like Slack, MSTeams, and PagerDuty - -To get a feel for playbooks, let's explore two examples: - -* :ref:`Automatically Investigate a Prometheus Alert` *(Prometheus required)* -* :ref:`Track Failing Kubernetes Jobs` *(No Prometheus required)* - -Example Playbooks -^^^^^^^^^^^^^^^^^^^^^^ - -Automatically Investigate a Prometheus Alert ----------------------------------------------- - -``KubePodCrashLooping`` is a Prometheus alert that identifies crashing pods. It normally looks like this in Slack: - -.. image:: /images/prometheus-alert-without-robusta.png - :width: 800px - -While it's clear that a pod is crashing in the cluster, it's not obvious why. With Robusta, the same Slack alert is transformed into this: - -.. image:: /images/prometheus-alert-with-robusta.png - :width: 800px - -Now the alert contains pod logs and rapid-response buttons like "Investigate" and "Silence". - -This enhancement is implemented with 5 lines of YAML in Robusta: - -.. code-block:: yaml - - - triggers: - - on_prometheus_alert: - alert_name: KubePodCrashLooping - actions: - - logs_enricher: {} - -Here's how it works: - -1. A Prometheus alert fires and is sent to Robusta by webhook -2. Robusta evaluates all of the ``on_prometheus_alert`` triggers that are currently loaded. -3. If the alert name is ``KubePodCrashLooping``, there's a match and Robusta runs the above playbook. -4. The Prometheus alert is mapped to a Kubernetes resources (in this case a Pod) using the alert's metadata. -5. All actions in the playbook execute - in this case, a single action called ``logs_enricher``. -6. ``logs_enricher`` is a builtin action that takes a Pod-related event as input and fetch logs. It also builds a notification message. -7. The notification is sent to sinks according to global settings. - -.. admonition:: Do I need to write playbooks to use Robusta? - - Nope, you can get started without writing any YAML. Robusta includes builtin playbooks covering dozens of problems seen on real-world clusters. - -Track Failing Kubernetes Jobs ----------------------------------------- - -Robusta can generate alerts by listening to the APIServer, rather than just improving existing Prometheus alerts. - -This is useful if you don't have Prometheus, and for cases when writing Prometheus alerts is awkward. - -Lets notify in Slack when a Kubernetes Job fails: - -.. image:: /images/on_job_failed_example.png - :width: 800px - -Here is the Robusta rule that generates this notification: - -.. code-block:: yaml - - - triggers: - - on_job_failure: {} - actions: - - create_finding: - title: "Job Failed" - aggregation_key: "JobFailure" - - job_info_enricher: {} - - job_events_enricher: {} - - job_pod_enricher: {} - -In this example, the trigger was ``on_job_failure``. Robusta generated a notification using four actions: - -1. ``create_finding`` - create the notification message itself -2. ``job_info_enricher`` - fetch the Job's status and attach it -3. ``job_events_enricher`` run ``kubectl get events`` and attach events related to this Job -4. ``job_pod_enricher`` find the latest Pod in this Job and attach its information - -.. _robusta-or-prometheus-alerts: - -.. admonition:: Should I generate alerts with Robusta or with Prometheus? - - Robusta can respond to Prometheus alerts, or it can generate alerts itself. Most users mix and match these options, depending on their use case. Here are some guidelines: - - * Use Prometheus for alerts involving thresholds and time-series (e.g. Jobs running over 18 hours). - * Use Robusta for alerts involving discrete events (e.g. Jobs failing). - - That said, the choice is yours. Robusta is flexible and supports both approaches. - -Next Steps -^^^^^^^^^^^^^ - -* :ref:`See reference guide on defining playbooks ` -* :ref:`Install Robusta with Helm ` \ No newline at end of file diff --git a/docs/setup-robusta/additional-settings.rst b/docs/setup-robusta/additional-settings.rst index e369b3597..61a833371 100644 --- a/docs/setup-robusta/additional-settings.rst +++ b/docs/setup-robusta/additional-settings.rst @@ -26,180 +26,3 @@ clusters, make sure you change ``cluster_name`` accordingly. The other values sh If you need to generate the secret values yourself, use cryptographically secure strings with at least 128 bits of randomness. - -Relabel Prometheus Alerts ------------------------------ - -In order to enrich alerts, Robusta maps Prometheus alerts to related Kubernetes resources. - -The following labels determine which Kubernetes resource relates to an alert: - -.. list-table:: - :header-rows: 1 - - * - Kubernetes Resource - - Alert Labels - * - Deployment - - deployment, namespace - * - DaemonSet - - daemonset, namespace - * - StatefulSet - - statefulset, namespace - * - Job - - job_name, namespace - * - Pod - - pod, namespace - * - HorizontalPodAutoscaler - - horizontalpodautoscaler, namespace - * - Node - - node or instance (used as a fallback if node doesn't exist) - -If your alerts have different labels, you can change the mapping with the ``alertRelabel`` helm value. - -A relabeling has 3 attributes: - -* ``source``: The label's name on your alerts (which differs from the expected value in the above table) -* ``target``: The standard label name that Robusta expects (a value from the table above) -* ``operation``: Either ``add`` (default) or ``replace``. If ``add``, your custom mapping will be recognized *in addition* to Robusta's default mapping. - -For example: - -.. code-block:: yaml - - alertRelabel: - - source: "pod_name" - target: "pod" - operation: "add" - - source: "deployment_name" - target: "deployment" - operation: "replace" - - source: "job_name" - target: "job" - -Mapping Custom Alert Severity ------------------------------------- - -To help you prioritize alerts from different sources, Robusta maps alert severity to four standard levels: - -* HIGH - requires your immediate attention - may indicate a service outage -* LOW - minor problems and areas for improvement (e.g. performance) - to be reviewed periodically on a weekly or bi-weekly cadence -* INFO - you probably want to be aware of these, but do not necessarily need to take action -* DEBUG - debug only - can be ignored unless you're actively debugging an issue - -You are free to interpret these levels differently, but the above is a good starting point for most companies. - -Prometheus alerts are normalized to the above levels as follows: - -.. list-table:: - :header-rows: 1 - - * - Prometheus Severity - - Robusta Severity - * - critical - - HIGH - * - high - - HIGH - * - medium - - HIGH - * - error - - HIGH - * - warning - - LOW - * - low - - LOW - * - info - - INFO - * - debug - - DEBUG - -Prometheus alerts with a severity **not in the above list** are mapped to Robusta's INFO level. - -You can map your own Prometheus severities, using the ``custom_severity_map`` Helm value. For example: - -.. code-block:: yaml - - globalConfig: - custom_severity_map: - # maps a p1 value on your own alerts to Robusta's HIGH value - p1: high - # maps a p2 value on your own alerts to Robusta's LOW value - p2: low - -The mapped values must be one of: high, low, info, and debug. - - -Censoring Logs ----------------- - -Pod logs gathered by Robusta can be censored using `Python regular expressions `_. For example, a payment processing pod might have credit card numbers or other sensitive information in its logs. These can be automatically sanitized before they appear in notifications. - -**How to Enable Log Censoring for All Logs** - -To censor sensitive information in all logs, add the following to your Helm values file: - -.. code-block:: yaml - - globalConfig: - regex_replacement_style: SAME_LENGTH_ASTERISKS # Alternative: NAMED - regex_replacer_patterns: - - name: CreditCard - regex: "[0-9]{4}[- ][0-9]{4}[- ][0-9]{4}[- ][0-9]{4}" - - name: Email - regex: "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}" - - name: UUID - regex: "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}" - -After adding these values, perform a Helm upgrade: - -.. code-block:: bash - - helm upgrade robusta robusta/robusta -f values.yaml - -**Example: Before and After Censoring** - -Given the following pod log: - -.. code-block:: - - # Original pod log: - 2022-07-28 08:24:45.283 INFO user's uuid: '193836d9-9cce-4df9-a454-c2edcf2e80e5' - 2022-07-28 08:35:00.762 INFO Customer email: user@example.com - 2022-07-28 08:35:01.090 INFO Payment processed with card: 4111-1111-1111-1111 - -The censored output will appear as: - -.. code-block:: - - # Using SAME_LENGTH_ASTERISKS style: - 2022-07-28 08:24:45.283 INFO user's uuid: '************************************' - 2022-07-28 08:35:00.762 INFO Customer email: **************** - 2022-07-28 08:35:01.090 INFO Payment processed with card: ******************* - - # Using NAMED style: - 2022-07-28 08:24:45.283 INFO user's uuid: '[UUID]' - 2022-07-28 08:35:00.762 INFO Customer email: [Email] - 2022-07-28 08:35:01.090 INFO Payment processed with card: [CreditCard] - -**Note:** This censoring applies to logs displayed in Robusta's built-in notifications, including those shown by the following Robusta actions: - -- :code:`logs_enricher` - Shows container logs in various alerts -- :code:`report_crash_loop` - Shows container logs for crashing pods - -For specific actions, you can also override these settings in your playbook definitions if needed. - - -Memory allocation on big clusters ------------------------------------- - -On bigger clusters, increase Robusta's memory ``requests`` and ``limits`` - -Add this to Robusta's Helm values: - -.. code-block:: yaml - - runner: - resources: - requests: - memory: 2048Mi - limits: - memory: 2048Mi diff --git a/docs/setup-robusta/index.rst b/docs/setup-robusta/index.rst index de9ca5f7a..f5be5f44d 100644 --- a/docs/setup-robusta/index.rst +++ b/docs/setup-robusta/index.rst @@ -15,6 +15,7 @@ multi-cluster upgrade + tuning-performance configuration-secrets robusta-runner-metrics supported-clusters diff --git a/docs/setup-robusta/installation-faq.rst b/docs/setup-robusta/installation-faq.rst index 9d5f4b72f..7c2ed67b0 100644 --- a/docs/setup-robusta/installation-faq.rst +++ b/docs/setup-robusta/installation-faq.rst @@ -17,7 +17,7 @@ Yes, using the cli is optional. It auto-generates helm values, but you can also helm repo add robusta https://robusta-charts.storage.googleapis.com && helm repo update helm show values robusta/robusta -2. Modify those values to your heart's content. Refer to the :ref:`Integration Guide ` for details. +2. Modify those values to your heart's content. Refer to the :doc:`Send Alerts ` documentation for details. 3. Do a ``helm install``. @@ -52,7 +52,7 @@ Verify success by checking that Robusta pods are running: Does Robusta support Thanos/Cortex/Mimir/VictoriaMetrics? ============================================================ -Any Prometheus-compatible solution is fine. Just follow instructions under :ref:`Integrate with Existing Prometheus` +Any Prometheus-compatible solution is fine. Just follow instructions in the :doc:`Send Alerts ` documentation. Can I use Robusta with DataDog? ============================================================ @@ -64,6 +64,6 @@ It's being planned, speak to us on Slack. Does Robusta replace monitoring tools? ============================================================ -Robusta's :ref:`all-in-one package ` is a complete monitoring and observability solution. +Robusta's :ref:`all-in-one package ` is a complete monitoring and observability solution. Alternatively, you can keep your existing tools and add-on robusta. diff --git a/docs/setup-robusta/installation/_helm_install_no_prometheus.inc.rst b/docs/setup-robusta/installation/_helm_install_no_prometheus.inc.rst index 13d081991..9a7d20ab3 100644 --- a/docs/setup-robusta/installation/_helm_install_no_prometheus.inc.rst +++ b/docs/setup-robusta/installation/_helm_install_no_prometheus.inc.rst @@ -44,7 +44,7 @@ On some clusters this can take a while, so don't panic if it appears stuck: .. tab-item:: GKE Autopilot :name: install-gke-autopilot - Due to Autopilot restrictions, some components are disabled for Robusta's bundled Prometheus. Don't worry, everything will still work. + Due to Autopilot restrictions, some components are disabled. Don't worry, everything will still work. .. code-block:: bash :name: cb-helm-install-gke-autopilot diff --git a/docs/setup-robusta/installation/_helm_install_with_prometheus.inc.rst b/docs/setup-robusta/installation/_helm_install_with_prometheus.inc.rst index 718fab8fb..243896776 100644 --- a/docs/setup-robusta/installation/_helm_install_with_prometheus.inc.rst +++ b/docs/setup-robusta/installation/_helm_install_with_prometheus.inc.rst @@ -44,7 +44,7 @@ On some clusters this can take a while, so don't panic if it appears stuck: .. tab-item:: GKE Autopilot :name: install-gke-autopilot - Due to Autopilot restrictions, some components are disabled for Robusta's bundled Prometheus. Don't worry, everything will still work. + Due to Autopilot restrictions, some components are disabled. Don't worry, everything will still work. .. code-block:: bash :name: cb-helm-install-gke-autopilot diff --git a/docs/setup-robusta/installation/_see_robusta_in_action.rst b/docs/setup-robusta/installation/_see_robusta_in_action.rst deleted file mode 100644 index bf21d7583..000000000 --- a/docs/setup-robusta/installation/_see_robusta_in_action.rst +++ /dev/null @@ -1,34 +0,0 @@ -See Robusta in action ------------------------------- - -Deploy a crashing pod: - -.. code-block:: bash - :name: cb-apply-crashpod - - kubectl apply -f https://gist.githubusercontent.com/robusta-lab/283609047306dc1f05cf59806ade30b6/raw - -Verify the pod is crashing: - -.. code-block:: console - :name: cb-verify-crash-pod-crashing - - $ kubectl get pods -A | grep crashpod - NAME READY STATUS RESTARTS AGE - crashpod-64d8fbfd-s2dvn 0/1 CrashLoopBackOff 1 7s - -Once the pod restarts twice, you'll get notified in your configured :ref:`sink `. - -.. admonition:: Example Slack Message - - .. image:: /images/crash-report.png - - -Now open the `Robusta UI `_ and look for the same message there. - -Finally, clean up the crashing pod: - -.. code-block:: bash - :name: cb-delete-crashpod - - kubectl delete deployment crashpod diff --git a/docs/setup-robusta/installation/all-in-one-installation.rst b/docs/setup-robusta/installation/all-in-one-installation.rst index 9e41fdd37..ad59856a9 100644 --- a/docs/setup-robusta/installation/all-in-one-installation.rst +++ b/docs/setup-robusta/installation/all-in-one-installation.rst @@ -2,11 +2,11 @@ .. _install-all-in-one: -Monitor Kubernetes from Scratch +Install Robusta + Prometheus #################################### *Estimated time: 5 minutes* -Setup Kubernetes monitoring from scratch. Install Robusta, Prometheus, and Grafana on Kubernetes using Helm. This is the recommended way to monitor your cluster, with an all-in-one package. +Setup Kubernetes monitoring from scratch. Install Robusta, Prometheus, and Grafana on Kubernetes using Helm. This is the recommended setup for users that are setting up Kubernetes monitoring from scratch. Prerequisites --------------------- @@ -21,10 +21,7 @@ Prerequisites .. include:: ./_helm_install_with_prometheus.inc.rst -.. include:: ./_see_robusta_in_action.rst - Next Steps --------------------------------- -* :ref:`See how Robusta improves Prometheus ` -* :ref:`Learn about AI-powered alert analysis ` +:doc:`Investigate your alerts with AI ` diff --git a/docs/setup-robusta/installation/extend-prometheus-installation.rst b/docs/setup-robusta/installation/extend-prometheus-installation.rst deleted file mode 100644 index 392474272..000000000 --- a/docs/setup-robusta/installation/extend-prometheus-installation.rst +++ /dev/null @@ -1,34 +0,0 @@ -:tocdepth: 2 - -.. _install-existing-prometheus: - -Integrate with Existing Prometheus -#################################### -*Estimated time: 5 minutes* - -Install Robusta alongside an existing Prometheus. See what Robusta can do. - -Prerequisites ---------------------- - -* A :ref:`supported Kubernetes cluster ` -* A Prometheus installation -* Helm - -.. jinja:: - :inline-ctx: {"gen_config_flags": "--no-enable-prometheus-stack"} - :header_update_levels: - :file: setup-robusta/installation/_generate_config.jinja - -.. include:: ./_helm_install_no_prometheus.inc.rst - -.. include:: ./_see_robusta_in_action.rst - -Next Steps ---------------------------------- - -Integrate Robusta with AlertManager: - -* :ref:`Follow a guide to integrate AlertManager ` -* :ref:`See the features you'll gain by integrating AlertManager ` -* :ref:`Configure AI analysis with your Prometheus data ` diff --git a/docs/setup-robusta/installation/index.rst b/docs/setup-robusta/installation/index.rst index db3a71679..04bd3e8e3 100644 --- a/docs/setup-robusta/installation/index.rst +++ b/docs/setup-robusta/installation/index.rst @@ -11,31 +11,23 @@ Installation Guides :maxdepth: 1 :hidden: - all-in-one-installation - extend-prometheus-installation standalone-installation + all-in-one-installation dev-setup .. grid:: 1 1 2 2 :gutter: 2 - .. grid-item-card:: Monitor Kubernetes from Scratch + .. grid-item-card:: Install Robusta :class-card: sd-bg-text-light - :link: all-in-one-installation - :link-type: doc - - Five minute setup. Great default alerts. Powered by Prometheus and Robusta. - - .. grid-item-card:: Add Robusta to Existing Prometheus - :class-card: sd-bg-light sd-bg-text-light - :link: extend-prometheus-installation + :link: standalone-installation :link-type: doc - Make your existing alerts better. Attach pod logs. Automatic alert insights. + Use Robusta's AI Agent alongside DataDog, NewRelic, SolarWinds, and more. - .. grid-item-card:: Use Robusta's AI Agent with other monitoring tools - :class-card: sd-bg-light sd-bg-text-light - :link: standalone-installation + .. grid-item-card:: Install Robusta + Prometheus + :class-card: sd-bg-text-light + :link: all-in-one-installation :link-type: doc - Use Robusta's AI Agent alongside DataDog, NewRelic, SolarWinds, and more. + Five minute setup. Great default alerts. Powered by Prometheus and Robusta. diff --git a/docs/setup-robusta/installation/standalone-installation.rst b/docs/setup-robusta/installation/standalone-installation.rst index 08a052025..c2feb08ba 100644 --- a/docs/setup-robusta/installation/standalone-installation.rst +++ b/docs/setup-robusta/installation/standalone-installation.rst @@ -2,18 +2,8 @@ .. _install-barebones: -Use HolmesGPT without Prometheus -#################################### - -*Estimated time: 5 minutes* - -Robusta's AI Agent works with many monitoring tools beyond Prometheus - including Datadog, New Relic, PagerDuty, and more. This installation method configures Robusta AI to analyze alerts and incidents from multiple data sources for comprehensive AI-powered investigation and automation capabilities. - -.. note:: - - This installation method is most relevant for Robusta SaaS users who want to integrate with existing monitoring tools. The AI Agent can analyze alerts from multiple sources and provide intelligent investigation across your entire observability stack. - - If you're looking for standalone open source monitoring, you should install Robusta with Prometheus using the :ref:`all-in-one installation ` instead. +Install Robusta +############### Prerequisites --------------------- @@ -28,10 +18,9 @@ Prerequisites .. include:: ./_helm_install_no_prometheus.inc.rst -.. include:: ./_see_robusta_in_action.rst - Next Steps --------------------------------- -* :ref:`Track Failed Kubernetes Jobs` \ No newline at end of file +1. :doc:`Send alerts to Robusta ` +2. :doc:`Investigate your alerts with AI ` \ No newline at end of file diff --git a/docs/setup-robusta/privacy-and-security.rst b/docs/setup-robusta/privacy-and-security.rst index ee0401587..ac1799774 100644 --- a/docs/setup-robusta/privacy-and-security.rst +++ b/docs/setup-robusta/privacy-and-security.rst @@ -42,7 +42,61 @@ Refer to :ref:`Managing Secrets`. Censoring Sensitive Data ************************* -Pod logs gathered by Robusta can be censored using regexes. Refer to the :ref:`Censoring Logs` guide for details. +Pod logs gathered by Robusta can be censored using `Python regular expressions `_. For example, a payment processing pod might have credit card numbers or other sensitive information in its logs. These can be automatically sanitized before they appear in notifications. + +How to Enable Log Censoring for All Logs +----------------------------------------- + +To censor sensitive information in all logs, add the following to your Helm values file: + +.. code-block:: yaml + + globalConfig: + regex_replacement_style: SAME_LENGTH_ASTERISKS # Alternative: NAMED + regex_replacer_patterns: + - name: CreditCard + regex: "[0-9]{4}[- ][0-9]{4}[- ][0-9]{4}[- ][0-9]{4}" + - name: Email + regex: "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}" + - name: UUID + regex: "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}" + +After adding these values, perform a Helm upgrade: + +.. code-block:: bash + + helm upgrade robusta robusta/robusta -f values.yaml + +Example: Before and After Censoring +------------------------------------ + +Given the following pod log: + +.. code-block:: + + # Original pod log: + 2022-07-28 08:24:45.283 INFO user's uuid: '193836d9-9cce-4df9-a454-c2edcf2e80e5' + 2022-07-28 08:35:00.762 INFO Customer email: user@example.com + 2022-07-28 08:35:01.090 INFO Payment processed with card: 4111-1111-1111-1111 + +The censored output will appear as: + +.. code-block:: + + # Using SAME_LENGTH_ASTERISKS style: + 2022-07-28 08:24:45.283 INFO user's uuid: '************************************' + 2022-07-28 08:35:00.762 INFO Customer email: **************** + 2022-07-28 08:35:01.090 INFO Payment processed with card: ******************* + + # Using NAMED style: + 2022-07-28 08:24:45.283 INFO user's uuid: '[UUID]' + 2022-07-28 08:35:00.762 INFO Customer email: [Email] + 2022-07-28 08:35:01.090 INFO Payment processed with card: [CreditCard] + +**Note:** This censoring applies to logs displayed in Robusta's built-in notifications, including those shown by the following Robusta actions: + +- :code:`logs_enricher` - Shows container logs in various alerts +- :code:`report_crash_loop` - Shows container logs for crashing pods Limiting Robusta's Access in Your Cluster ******************************************* diff --git a/docs/setup-robusta/tuning-performance.rst b/docs/setup-robusta/tuning-performance.rst new file mode 100644 index 000000000..67fe87175 --- /dev/null +++ b/docs/setup-robusta/tuning-performance.rst @@ -0,0 +1,18 @@ +Monitoring Large Clusters +========================= + +Memory Allocation on Big Clusters +---------------------------------- + +On bigger clusters, increase Robusta's memory ``requests`` and ``limits``. + +Add this to Robusta's Helm values: + +.. code-block:: yaml + + runner: + resources: + requests: + memory: 2048Mi + limits: + memory: 2048Mi diff --git a/docs/track-changes/kubernetes-changes.rst b/docs/track-changes/kubernetes-changes.rst new file mode 100644 index 000000000..90f7f6c48 --- /dev/null +++ b/docs/track-changes/kubernetes-changes.rst @@ -0,0 +1,8 @@ +Kubernetes Changes +================== + +When using Robusta SaaS, Robusta automatically tracks all Kubernetes changes and correlates them with alerts. + +This provides context about recent changes when investigating issues, helping you quickly identify if a deployment, configuration update, or other change caused a problem. + +Looking to get push notifications (e.g. Slack or other sinks) when Kubernetes resources change? See the :doc:`Kubernetes Change Notifications ` guide in the Advanced section. diff --git a/playbooks/robusta_playbooks/kubectl_enrichments.py b/playbooks/robusta_playbooks/kubectl_enrichments.py index a7b8fd60d..6fbb1eeac 100644 --- a/playbooks/robusta_playbooks/kubectl_enrichments.py +++ b/playbooks/robusta_playbooks/kubectl_enrichments.py @@ -19,7 +19,7 @@ class KubectlParams(PodRunningParams): """ - :var kubectl_command: The full kubectl command to run, formatted as a shell command string. + :var command: The full kubectl command to run, formatted as a shell command string. :var description: A description of the command ran. :var timeout: The maximum time (in seconds) to wait for the kubectl command to complete. Default is 3600 seconds. """ diff --git a/playbooks/robusta_playbooks/sink_enrichments.py b/playbooks/robusta_playbooks/sink_enrichments.py index cce166a5e..8a21450a5 100644 --- a/playbooks/robusta_playbooks/sink_enrichments.py +++ b/playbooks/robusta_playbooks/sink_enrichments.py @@ -25,7 +25,7 @@ class SlackCallbackParams(ActionParams): class OpsGenieAckParams(SlackCallbackParams): """ - :var alertmanager_url: Alternative Alert Manager url to send requests. + :var alert_fingerprint: The fingerprint of the alert to acknowledge. """ alert_fingerprint: str