diff --git a/docs/conf.py b/docs/conf.py index e05cbeb91..1b6fb7abb 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -122,7 +122,35 @@ "tutorials/alert-custom-prometheus.html": "/master/configuration/alertmanager-integration/alert-custom-prometheus.html", "catalog/triggers/prometheus.html": "/master/configuration/alertmanager-integration/index.html", "playbook-reference/prometheus-examples/alert-remediation.html": "/master/playbook-reference/automatic-remediation-examples/index.html", - "configuration/ai-analysis.html": "/master/configuration/holmesgpt/index.html", + "configuration/ai-analysis.html": "/master/configuration/holmesgpt/main-features.html", + "configuration/holmesgpt/index.html": "/master/configuration/holmesgpt/main-features.html", + # AI Analysis pages redirects to holmesgpt.dev (docs have moved there) + "configuration/holmesgpt/builtin_toolsets.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/", + "configuration/holmesgpt/permissions.html": "https://holmesgpt.dev/data-sources/permissions/", + "configuration/holmesgpt/custom_toolsets.html": "https://holmesgpt.dev/data-sources/custom-toolsets/", + "configuration/holmesgpt/remote_mcp_servers.html": "https://holmesgpt.dev/data-sources/remote-mcp-servers/", + # Individual toolset page redirects + "configuration/holmesgpt/toolsets/argocd.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/argocd/", + "configuration/holmesgpt/toolsets/aws.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/aws/", + "configuration/holmesgpt/toolsets/confluence.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/confluence/", + "configuration/holmesgpt/toolsets/coralogix_logs.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/coralogix-logs/", + "configuration/holmesgpt/toolsets/datadog_logs.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/datadog/", + "configuration/holmesgpt/toolsets/datetime.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/datetime/", + "configuration/holmesgpt/toolsets/docker.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/docker/", + "configuration/holmesgpt/toolsets/grafanaloki.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/grafanaloki/", + "configuration/holmesgpt/toolsets/grafanatempo.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/grafanatempo/", + "configuration/holmesgpt/toolsets/helm.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/helm/", + "configuration/holmesgpt/toolsets/internet.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/internet/", + "configuration/holmesgpt/toolsets/kafka.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/kafka/", + "configuration/holmesgpt/toolsets/kubernetes.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/kubernetes/", + "configuration/holmesgpt/toolsets/newrelic.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/newrelic/", + "configuration/holmesgpt/toolsets/notion.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/notion/", + "configuration/holmesgpt/toolsets/opensearch_logs.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/opensearch-logs/", + "configuration/holmesgpt/toolsets/opensearch_status.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/opensearch-status/", + "configuration/holmesgpt/toolsets/prometheus.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/prometheus/", + "configuration/holmesgpt/toolsets/rabbitmq.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/rabbitmq/", + "configuration/holmesgpt/toolsets/robusta.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/robusta/", + "configuration/holmesgpt/toolsets/slab.html": "https://holmesgpt.dev/data-sources/builtin-toolsets/slab/", "coverage.html": "/master/how-it-works/coverage.html", "tutorials/python-profiling.html": "/master/playbook-reference/actions/python-troubleshooting.html#python-profiler", "tutorials/more-tutorials.html": "/master/community-tutorials.html", diff --git a/docs/configuration/alertmanager-integration/_alertmanager-config.rst b/docs/configuration/alertmanager-integration/_alertmanager-config.rst index 0aabfcc55..59f92578d 100644 --- a/docs/configuration/alertmanager-integration/_alertmanager-config.rst +++ b/docs/configuration/alertmanager-integration/_alertmanager-config.rst @@ -1,63 +1,63 @@ -.. admonition:: AlertManager config for sending alerts to Robusta - - .. tab-set:: - - .. tab-item:: kube-prometheus-stack (Prometheus Operator) - - Add the following to your `AlertManager's config Secret `_ - - Do not apply in other ways, they all `have limitations `_ and won't forward all alerts. - - .. code-block:: yaml - - receivers: - - name: 'robusta' - webhook_configs: - - url: 'http://-runner..svc.cluster.local/api/alerts' - send_resolved: true # (3) - - name: 'default-receiver' - - route: # (1) - routes: - - receiver: 'robusta' - group_by: [ '...' ] - group_wait: 1s - group_interval: 1s - matchers: - - severity =~ ".*" - repeat_interval: 4h - continue: true # (2) - receiver: 'default-receiver' - - .. code-annotations:: - 1. Put Robusta's route as the first route, to guarantee it receives alerts. If you can't do so, you must guarantee all previous routes set ``continue: true`` set. - 2. Keep sending alerts to receivers defined after Robusta. - 3. Important, so Robusta knows when alerts are resolved. - - .. tab-item:: Other Prometheus Installations - - Add the following to your AlertManager configuration, wherever it is defined. - - .. code-block:: yaml - - receivers: - - name: 'robusta' - webhook_configs: - - url: 'http://-runner..svc.cluster.local/api/alerts' - send_resolved: true # (3) - - route: # (1) - routes: - - receiver: 'robusta' - group_by: [ '...' ] - group_wait: 1s - group_interval: 1s - matchers: - - severity =~ ".*" - repeat_interval: 4h - continue: true # (2) - - .. code-annotations:: - 1. Put Robusta's route as the first route, to guarantee it receives alerts. If you can't do so, you must guarantee all previous routes set ``continue: true`` set. - 2. Keep sending alerts to receivers defined after Robusta. - 3. Important, so Robusta knows when alerts are resolved. +Configure your AlertManager to send alerts to Robusta: + +.. tab-set:: + + .. tab-item:: kube-prometheus-stack (Prometheus Operator) + + Add the following to your `AlertManager's config Secret `_. + + Do not apply in other ways, they all `have limitations `_ and won't forward all alerts. + + .. code-block:: yaml + + receivers: + - name: 'robusta' + webhook_configs: + - url: 'http://-runner..svc.cluster.local/api/alerts' + send_resolved: true # (3) + - name: 'default-receiver' + + route: # (1) + routes: + - receiver: 'robusta' + group_by: [ '...' ] + group_wait: 1s + group_interval: 1s + matchers: + - severity =~ ".*" + repeat_interval: 4h + continue: true # (2) + receiver: 'default-receiver' + + .. code-annotations:: + 1. Put Robusta's route as the first route, to guarantee it receives alerts. If you can't do so, you must guarantee all previous routes set ``continue: true``. + 2. Keep sending alerts to receivers defined after Robusta. + 3. Important, so Robusta knows when alerts are resolved. + + .. tab-item:: Other Prometheus Installations + + Add the following to your AlertManager configuration, wherever it is defined. + + .. code-block:: yaml + + receivers: + - name: 'robusta' + webhook_configs: + - url: 'http://-runner..svc.cluster.local/api/alerts' + send_resolved: true # (3) + + route: # (1) + routes: + - receiver: 'robusta' + group_by: [ '...' ] + group_wait: 1s + group_interval: 1s + matchers: + - severity =~ ".*" + repeat_interval: 4h + continue: true # (2) + + .. code-annotations:: + 1. Put Robusta's route as the first route, to guarantee it receives alerts. If you can't do so, you must guarantee all previous routes set ``continue: true``. + 2. Keep sending alerts to receivers defined after Robusta. + 3. Important, so Robusta knows when alerts are resolved. \ No newline at end of file diff --git a/docs/configuration/alertmanager-integration/_prometheus_flags_check.rst b/docs/configuration/alertmanager-integration/_prometheus_flags_check.rst index 959def18d..c4b251074 100644 --- a/docs/configuration/alertmanager-integration/_prometheus_flags_check.rst +++ b/docs/configuration/alertmanager-integration/_prometheus_flags_check.rst @@ -1,9 +1,8 @@ - Robusta utilizes the flags API to retrieve data from Prometheus-style metric stores. However, some platforms like Google Managed Prometheus, Azure Managed Prometheus etc, do not implement the flags API. -You can disable the Prometheus flags API check by setting the following option to ``false``. +You can disable the Prometheus flags API check by setting the following option to false. .. code-block:: yaml - globalConfig: - check_prometheus_flags: true/false + globalConfig: + check_prometheus_flags: true/false \ No newline at end of file diff --git a/docs/configuration/alertmanager-integration/_pull_integration.rst b/docs/configuration/alertmanager-integration/_pull_integration.rst index 35aec4e0a..aa5dc5778 100644 --- a/docs/configuration/alertmanager-integration/_pull_integration.rst +++ b/docs/configuration/alertmanager-integration/_pull_integration.rst @@ -1,81 +1,10 @@ Configure Metric Querying ==================================== -Metrics querying lets Robusta pull metrics and create silences. +To enable Robusta to pull metrics and create silences, you need to configure Prometheus and AlertManager URLs. -If Robusta fails to auto-detect the Prometheus and Alertmanager urls - and you see related connection errors in the logs - configure the ``prometheus_url`` and ``alertmanager_url`` in your Helm values and :ref:`update Robusta ` +See :doc:`Prometheus and metrics configuration ` for detailed instructions. -.. code-block:: yaml +.. note:: - globalConfig: # This line should already exist - # Add the lines below - alertmanager_url: "http://ALERT_MANAGER_SERVICE_NAME.NAMESPACE.svc.cluster.local:9093" # (1) - prometheus_url: "http://PROMETHEUS_SERVICE_NAME.NAMESPACE.svc.cluster.local:9090" # (2) - - # If Prometheus has data for multiple clusters, tell Robusta how to query data for this cluster only - # prometheus_additional_labels: - # cluster: 'CLUSTER_NAME_HERE' - - # If using Grafana alerts, add this too - # grafana_api_key: # (3) - # alertmanager_flavor: grafana - - # If necessary, see docs below - # prometheus_auth: ... - # alertmanager_auth: ... - - # If using a multi-tenant prometheus or alertmanager, pass the org id to all queries - # prometheus_additional_headers: - # X-Scope-OrgID: - # alertmanager_additional_headers: - # X-Scope-OrgID: - -.. code-annotations:: - 1. Example: http://alertmanager-Helm_release_name-kube-prometheus-alertmanager.default.svc.cluster.local:9093. - 2. Example: http://Helm_Release_Name-kube-prometheus-prometheus.default.svc.cluster.local:9090 - 3. This is necessary for Robusta to create silences when using Grafana Alerts, because of minor API differences in the AlertManager embedded in Grafana. - -You can optionally setup authentication, SSL verification, and other parameters described below. - -Verify it Works -^^^^^^^^^^^^^^^^^ -Open any application in the Robusta UI. If CPU and memory graphs are shown, everything is working. - -If you don't use the Robusta UI, trigger a `demo OOMKill alert `_, -and verify that Robusta sends a Slack/Teams message with a memory graph included. If so, everything is configured properly. - -Optional Settings -============================= - -Authentication Headers -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If Prometheus and/or AlertManager require authentication, add the following to ``generated_values.yaml``: - -.. code-block:: yaml - - globalConfig: - prometheus_auth: Bearer # Replace with your actual token or use any other auth header as needed - alertmanager_auth: Basic # Replace with your actual credentials, base64-encoded, or use any other auth header as needed - -These settings may be configured independently. - -SSL Verification -^^^^^^^^^^^^^^^^^^^^ -By default, Robusta does not verify the SSL certificate of the Prometheus server. - -To enable SSL verification, add the following to Robusta's ``generated_values.yaml``: - -.. code-block:: yaml - - runner: - additional_env_vars: - - name: PROMETHEUS_SSL_ENABLED - value: "true" - -If you have a custom Certificate Authority (CA) certificate, add one more setting: - -.. code-block:: yaml - - runner: - certificate: "" # base64-encoded certificate value + Robusta will attempt to auto-detect Prometheus and AlertManager URLs in your cluster. Manual configuration is only needed if auto-detection fails. \ No newline at end of file diff --git a/docs/configuration/alertmanager-integration/_testing_integration.rst b/docs/configuration/alertmanager-integration/_testing_integration.rst index 41bfda27b..20b1f831f 100644 --- a/docs/configuration/alertmanager-integration/_testing_integration.rst +++ b/docs/configuration/alertmanager-integration/_testing_integration.rst @@ -56,4 +56,4 @@ If everything is setup properly, this alert will reach Robusta. It will show up Robusta enriches alerts with Kubernetes and log data using Prometheus labels for mapping. Standard label names are used by default. If your setup differs, you can - :ref:`customize this mapping ` to fit your environment. + `customize this mapping `_ to fit your environment. diff --git a/docs/configuration/alertmanager-integration/alert-manager.rst b/docs/configuration/alertmanager-integration/alert-manager.rst index cc5ffa4cd..3ab76349d 100644 --- a/docs/configuration/alertmanager-integration/alert-manager.rst +++ b/docs/configuration/alertmanager-integration/alert-manager.rst @@ -1,13 +1,9 @@ -In-cluster Prometheus +In-cluster AlertManager Integration **************************************** -Here's how to integrate an existing Prometheus with Robusta in the same cluster: +This guide shows how to send alerts from an existing AlertManager to Robusta in the same cluster. -* Send alerts to Robusta by adding a receiver to AlertManager -* Point Robusta at Prometheus so it can query metrics and silence alerts - * Robusta will attempt auto-detection, so this is not always necessary! - -If your Prometheus is in a different cluster, refer to :ref:`External Prometheus`. +If your AlertManager is in a different cluster, refer to :ref:`External Prometheus`. Send Alerts to Robusta ============================ diff --git a/docs/configuration/alertmanager-integration/azure-managed-prometheus.rst b/docs/configuration/alertmanager-integration/azure-managed-prometheus.rst index 7bee765d7..ce6af2d32 100644 --- a/docs/configuration/alertmanager-integration/azure-managed-prometheus.rst +++ b/docs/configuration/alertmanager-integration/azure-managed-prometheus.rst @@ -1,7 +1,9 @@ -Azure managed Prometheus -************************* +Azure Managed Prometheus Alerts +********************************* -This guide walks you through integrating your Azure managed Prometheus with Robusta. You will need to configure two integrations: one to send alerts to Robusta and another to let Robusta query metrics and create silences. +This guide shows how to send alerts from Azure Managed Prometheus to Robusta. + +For configuring metric querying and advanced settings, see :doc:`/configuration/metric-providers-azure`. Send Alerts to Robusta =============================== @@ -22,107 +24,4 @@ This integration sends Azure Managed Prometheus alerts to Robusta. To configure Configure Metric Querying =============================== -Metrics querying lets Robusta pull metrics from Azure Managed Prometheus. - -This can be configured either of two ways: - -.. details:: Option #1: Create an Azure Active Directory authentication app - - **Pros:** - - Quick setup. Just need to create an app, get the credentials and add them to the manifests - - Other pods can't use the Service Principal without having the secret - **Cons:** - - Requires a service principal (Azure AD permission) - - Need the client secret in the kubernetes manifests - - Client secret expires, you need to manage its rotation - -.. details:: Option #2: Use kubelet Managed Identity - - **Pros:** - * Quick setup. Get the Managed Identity Client ID and add them to the manifests - * No need to manage secrets. Removing the password element decreases the risk of the credentials being compromised - **Cons:** - * Managed Identity is bound to the entire VMSS, which means that other pods can use it if they have the client ID - -Retrieve the Azure Prometheus query endpoint -============================================== - -Whichever method you choose, you will need an Azure Prometheus query endpoint: - -1. Go to `Azure Monitor workspaces `_ and choose your monitored workspace. -2. In your monitored workspace, `overview`, find the ``Query endpoint`` and copy it. -3. In your `generated_values.yaml` file add the query endpoint URL under ``globalConfig`` with a 443 port: - -.. code-block:: yaml - - globalConfig: # this line should already exist - prometheus_url: ":443" - -Option #1: Create an Azure authentication app -============================================== - -Create an Azure authentication app and get credentials for Robusta to access Prometheus data: - -1. Follow the Azure guide to `register an app with Azure Active Directory `_ - -2. In your generated_values.yaml file add environment variables from the previous step. - -.. code-block:: yaml - - runner: - additional_env_vars: - - name: PROMETHEUS_SSL_ENABLED - value: "true" - - name: AZURE_CLIENT_ID - value: "" - - name: AZURE_TENANT_ID - value: "" - - name: AZURE_CLIENT_SECRET - value: "" - -3. Complete the step `allow your app access to your workspace `_, so your app can query data from your Azure Monitor workspace. - -Option #2: Use Kubelet's Managed Identity -============================================== - -Instead of creating an Azure authentication app, you can use kubelet's Managed Identity to access Prometheus. -(As a variation on this, you can also create a new User Assigned Managed Identity and bind it to the underlying VMSS.) - -1. Get the AKS kubelet's Managed Identity Client ID: - -.. code-block:: bash - - az aks show -g -n --query identityProfile.kubeletidentity.clientId -o tsv - -2. In your generated_values.yaml file add the following environment variables from the previous step. - -.. code-block:: yaml - - runner: - additional_env_vars: - - name: PROMETHEUS_SSL_ENABLED - value: "true" - - name: AZURE_USE_MANAGED_ID - value: "true" - - name: AZURE_CLIENT_ID - value: "" - - name: AZURE_TENANT_ID - value: "" - -3. Give access to your Managed Identity on your workspace: - - a. Open the Access Control (IAM) page for your Azure Monitor workspace in the Azure portal. - b. Select Add role assignment. - c. Select Monitoring Data Reader and select Next. - d. For Assign access to, select Managed identity. - e. Select + Select members. - f. Select the Managed Identity you got from step 1. - g. Select Review + assign to save the configuration. - - -Optional Settings -================== - -**Prometheus flags checks** - -.. include:: ./_prometheus_flags_check.rst +To enable Robusta to pull metrics from Azure Managed Prometheus, see :doc:`/configuration/metric-providers-azure`. diff --git a/docs/configuration/alertmanager-integration/coralogix_managed_prometheus.rst b/docs/configuration/alertmanager-integration/coralogix_managed_prometheus.rst index 593349b77..a9ff600b1 100644 --- a/docs/configuration/alertmanager-integration/coralogix_managed_prometheus.rst +++ b/docs/configuration/alertmanager-integration/coralogix_managed_prometheus.rst @@ -1,7 +1,9 @@ -Coralogix Managed Prometheus -******************************** +Coralogix Alerts +***************** -This guide walks you through integrating your Coralogix managed Prometheus with Robusta. You will need to configure two integrations: one to send alerts to Robusta and another to let Robusta query metrics and create silences. +This guide shows how to send alerts from Coralogix to Robusta. + +For configuring metric querying from Coralogix Prometheus, see :doc:`/configuration/metric-providers-coralogix`. Send Alerts to Robusta =============================== @@ -10,7 +12,7 @@ This integration lets you send Coralogix alerts to Robusta. To configure it: -1. In the Coralogix site go to Data Flow and in the Webhook section click ``Webhook``. +1. In the Coralogix site go to Data Flow, then Outbound Webhooks, and click ``Generic webhook``. 2. In the url insert: .. code-block:: @@ -57,48 +59,11 @@ To configure it: } -6. Click the 'Test Config' button and check your robusta sinks that you received an alert +6. Click the 'Test Config' button and check your robusta sink for a "Test configuration" alert. 7. Click Save Configure Metric Querying ============================== -Metrics querying lets Robusta pull metrics from Coralogix Managed Prometheus. - -1. Go to `Coralogix Documentation `_ and choose the relevant 'PromQL Endpoint' from their table. -2. In your `generated_values.yaml` file add the endpoint url: - -.. code-block:: yaml - - # this line should already exist - globalConfig: - prometheus_url: "" #for example https://prom-api.coralogix.com - # To add any labels that are relevant to the specific cluster uncomment and change the lines below (optional) - # prometheus_additional_labels: - # cluster: 'CLUSTER_NAME_HERE' - - -.. code-annotations:: - 1. This is necessary for Robusta to create silences when using Grafana Alerts, because of minor API differences in the AlertManager embedded in Grafana. - - -3. On the Coralogix site, go to Data Flow -> Api Keys and copy the 'Logs Query Key' - -.. note:: If one does not exist you will have to generate a new one by clicking 'GENERATE NEW API KEY' - -4. Create a secret in your cluster with your key logs_query_key and the value as the key you just copied - -5. In your generated_values.yaml file add the following environment variables from the previous step replacing MY_CORLOGIX_SECRET with your secret name. - -.. code-block:: yaml - - runner: - additional_env_vars: - - name: PROMETHEUS_SSL_ENABLED - value: "true" - - name: CORALOGIX_PROMETHEUS_TOKEN - valueFrom: - secretKeyRef: - name: MY_CORALOGIX_SECRET - key: logs_query_key +To enable Robusta to pull metrics from Coralogix Prometheus, see :doc:`/configuration/metric-providers-coralogix` metrics provider settings. diff --git a/docs/configuration/alertmanager-integration/eks-managed-prometheus.rst b/docs/configuration/alertmanager-integration/eks-managed-prometheus.rst index 4329a5485..8c363ad50 100644 --- a/docs/configuration/alertmanager-integration/eks-managed-prometheus.rst +++ b/docs/configuration/alertmanager-integration/eks-managed-prometheus.rst @@ -1,60 +1,26 @@ -AWS Managed Prometheus -************************* +AWS Managed Prometheus Alerts +****************************** -This guide walks you through integrating your AWS Managed Prometheus with Robusta. +.. warning:: -You'll need to configure two integrations: one to send alerts to Robusta and another to let Robusta query metrics and create silences. This guide only covers the integration to query metrics. + Due to updates in the AWS Managed Prometheus API, these instructions may be outdated. + Please contact our team for support on Slack (https://bit.ly/robusta-slack) or by email (support@robusta.dev). + We're working on updating the documentation. -Configure Metric Querying -=============================== - -Metrics querying lets Robusta pull metrics from AWS Managed Prometheus. - -1. Create an AWS access key, `See guide here `_. - -2. In your cluster, create a secret with your access key and secret access key, named `aws-secret-key`. - -3. Collect the URL for your AWS Managed Prometheus workspace. +AWS Managed Prometheus uses Amazon Managed Grafana for alerting. To send alerts to Robusta, configure your Grafana instance to forward alerts. -4. Append the following to your `generated_values.yaml` file. +For configuring metric querying from AWS Managed Prometheus, see :doc:`/configuration/metric-providers-aws`. -.. code-block:: yaml +Send Alerts to Robusta +====================== - globalConfig: - ... - prometheus_url: AWS_PROMETHEUS_URL +Since AWS Managed Prometheus doesn't have a built-in AlertManager, you'll need to: - # Create silences when using Grafana alerts (optional) - # grafana_api_key: # (1) - # alertmanager_flavor: grafana +1. Set up Amazon Managed Grafana with your AMP workspace +2. Configure Grafana alerts to send to Robusta +3. See :doc:`grafana-alert-manager` for detailed Grafana alerting setup - runner: - additional_env_vars: - - name: PROMETHEUS_SSL_ENABLED - value: "true" - - name: AWS_ACCESS_KEY - value: - - name: AWS_ACCESS_KEY - valueFrom: - secretKeyRef: - name: aws-secret-key - key: - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: aws-secret-key - key: - - name: AWS_SERVICE_NAME - value: "aps" # , it is usually aps - - name: AWS_REGION - value: - -.. code-annotations:: - 1. This is necessary for Robusta to create silences when using Grafana Alerts, because of minor API differences in the AlertManager embedded in Grafana. - -Optional Settings -================== - -**Prometheus flags checks** +Configure Metric Querying +========================= -.. include:: ./_prometheus_flags_check.rst +To enable Robusta to pull metrics from AWS Managed Prometheus, see :doc:`/configuration/metric-providers-aws`. diff --git a/docs/configuration/alertmanager-integration/google-managed-prometheus.rst b/docs/configuration/alertmanager-integration/google-managed-prometheus.rst index 326056395..c8fa5e42a 100644 --- a/docs/configuration/alertmanager-integration/google-managed-prometheus.rst +++ b/docs/configuration/alertmanager-integration/google-managed-prometheus.rst @@ -1,9 +1,15 @@ -Google Managed Prometheus -========================== +Google Managed Prometheus Alerts +================================= -This guide walks you through integrating your `Google Managed Prometheus `_ with Robusta. +.. warning:: -You will need to configure two integrations: one to send alerts to Robusta and another to let Robusta query metrics and create silences. + Due to updates in the Google Managed Prometheus API, these instructions may be outdated. + Please contact our team for support on Slack (https://bit.ly/robusta-slack) or by email (support@robusta.dev). + We're working on updating the documentation. + +This guide shows how to send alerts from `Google Managed Prometheus `_ to Robusta. + +For configuring metric querying from Google Managed Prometheus, see :doc:`/configuration/metric-providers-google`. Prerequisites **************** @@ -62,23 +68,4 @@ You know it works if you receive an alert from Robusta. Configure Metric Querying ****************************** -A pull integration lets Robusta pull metrics and create silences. - -Add the following to Robusta's configuration(``generated_values.yaml``) and :ref:`update Robusta `. - -.. code-block:: yaml - - globalConfig: # this line should already exist - prometheus_url: "http://frontend.default.svc.cluster.local:9090" - alertmanager_url: "http://alertmanager.gmp-system.svc.cluster.local:9093" - - -Verify it Works ---------------------- -Run the following command to create a Pod that triggers an OOMKilled alert - -.. code-block:: yaml - - kubectl apply -f https://raw.githubusercontent.com/robusta-dev/kubernetes-demos/main/oomkill/oomkill_job.yaml - -You know it works if you receive an alert from Robusta with a graph. +To enable Robusta to pull metrics from Google Managed Prometheus, see :doc:`/configuration/metric-providers-google`. diff --git a/docs/configuration/alertmanager-integration/grafana-alert-manager.rst b/docs/configuration/alertmanager-integration/grafana-alert-manager.rst index 7e5b0a0c5..011c60c95 100644 --- a/docs/configuration/alertmanager-integration/grafana-alert-manager.rst +++ b/docs/configuration/alertmanager-integration/grafana-alert-manager.rst @@ -1,5 +1,5 @@ -Grafana AlertManager -**************************************** +Grafana Alerts +************** Grafana can send alerts to the Robusta timeline for visualization and AI investigation. @@ -91,7 +91,7 @@ Kubernetes Alerts ================================= In case your alerts are from a Kubernetes cluster monitored by Robusta, and your alerts has a ``cluster`` label, make sure it matches the ``cluster_name`` that appears in Robusta ``generated_values.yaml``. -** This is optional - you can send any alert to the Robusta timeline! ** +**This is optional - you can send any alert to the Robusta timeline!** Send Alerts to Robusta for enrichments @@ -189,35 +189,8 @@ Alternatively, trigger a `demo OOMKill alert # Replace with your actual token or use any other auth header as needed - alertmanager_auth: Basic # Replace with your actual credentials, base64-encoded, or use any other auth header as needed - -These settings may be configured independently. - -SSL Verification -^^^^^^^^^^^^^^^^^^^^ -By default, Robusta does not verify the SSL certificate of the Prometheus server. - -To enable SSL verification, add the following to Robusta's ``generated_values.yaml``: - -.. code-block:: yaml - - runner: - additional_env_vars: - - name: PROMETHEUS_SSL_ENABLED - value: "true" - -If you have a custom Certificate Authority (CA) certificate, add one more setting: - -.. code-block:: yaml - - runner: - certificate: "" # base64-encoded certificate value +- :doc:`/configuration/metric-providers-in-cluster` for in-cluster Prometheus +- :doc:`/configuration/metric-providers-external` for external Prometheus +- Or the appropriate cloud provider metric documentation diff --git a/docs/configuration/alertmanager-integration/outofcluster-prometheus.rst b/docs/configuration/alertmanager-integration/outofcluster-prometheus.rst index 3d001e78a..9870fdd06 100644 --- a/docs/configuration/alertmanager-integration/outofcluster-prometheus.rst +++ b/docs/configuration/alertmanager-integration/outofcluster-prometheus.rst @@ -62,4 +62,4 @@ You can do so with the ``prometheus_url_query_string`` parameter, shown below: globalConfig: # Additional query string parameters to be appended to the Prometheus connection URL (optional) - prometheus_url_query_string: "cluster=prod1&x=y" + prometheus_url_query_string: "cluster=prod1&x=y" \ No newline at end of file diff --git a/docs/configuration/alertmanager-integration/troubleshooting-alertmanager.rst b/docs/configuration/alertmanager-integration/troubleshooting-alertmanager.rst index ce098eacd..76ea56fd9 100644 --- a/docs/configuration/alertmanager-integration/troubleshooting-alertmanager.rst +++ b/docs/configuration/alertmanager-integration/troubleshooting-alertmanager.rst @@ -1,13 +1,13 @@ -Integrating AlertManager with the UI -************************************************* +Sending Alerts to the Robusta UI +================================= Why Send Your Alerts to Robusta? ---------------------------------------- +--------------------------------- Benefits include: * Persistent alert history on a filterable timeline -* Centralized view of alerts from all sources and AlertManager instances +* Centralized view of alerts from all your monitoring systems (multiple Prometheus instances, cloud services, custom tools) * AI investigation of alerts * Correlations between alerts and Kubernetes deploys * and more! @@ -15,26 +15,75 @@ Benefits include: .. image:: /images/robusta-ui-timeline.png :alt: Prometheus Alert History -How to Send Your Alerts To Robusta ---------------------------------------- +Setting Up Alert Integration +----------------------------- -Choose one of the following options: +To configure alert integration with your monitoring system, see :doc:`Alert Sources `. -1. :ref:`Enable Robusta's embedded kube-prometheus-stack stack ` -2. :ref:`Add a webhook to your existing AlertManager (or equivalent integration) `. +Common Troubleshooting Scenarios +--------------------------------- -Troubleshooting the embedded kube-prometheus-stack ------------------------------------------------------ +.. tab-set:: -1. Did you install Robusta in the last 10 minutes? If so, wait 10 minutes and see if the problem resolves on its own. -2. Check if all Prometheus and AlertManager related pods are running and healthy -3. If you see OOMKills, increase the memory limits for the relevant pods. -4. If you are still having trouble, please reach out on our `Slack community `_. + .. tab-item:: General Issues -Troubleshooting an external AlertManager webhook -------------------------------------------------------- + **Not receiving alerts in Robusta UI?** -1. Are there errors in your AlertManager logs? -2. Are there errors in the Prometheus Operator logs (if relevant)? -3. Is Robusta the first receiver in your AlertManager configuration? If not, are all previous receivers configured with ``continue: true``? -4. If you are still having trouble, please reach out on our `Slack community `_. + 1. **Just installed?** Wait 10 minutes after installation for all components to initialize + 2. **Check your specific integration:** Each alert source has its own troubleshooting guide on its documentation page + 3. **Verify authentication:** Ensure API keys and webhook URLs are correctly configured + + **Need to test your integration?** + + Refer to your specific alert source documentation for testing procedures. + + .. tab-item:: AlertManager + + **Not receiving alerts?** + + 1. **Verify routing configuration:** + + - Ensure Robusta is the first receiver in your AlertManager configuration, or + - All previous receivers have ``continue: true`` set + - See configuration examples in your specific alert source documentation + + 2. **Check logs for errors:** + + - Review AlertManager logs for webhook errors + - Check Prometheus Operator logs (if using kube-prometheus-stack) + - Look for errors in Robusta runner logs + + 3. **Check pod health (embedded Prometheus stack):** + + - Verify all Prometheus and AlertManager pods are running + - Look for OOMKills and increase memory limits if needed + - See :doc:`Embedded Prometheus troubleshooting ` + + 4. **Verify network connectivity (external AlertManager):** + + - Test connectivity to Robusta webhook endpoint + - Check firewall rules and network policies + - Ensure AlertManager can resolve DNS names + + **Alerts arriving but missing Kubernetes context?** + + Check :doc:`Alert Label Mapping ` to customize how Prometheus labels map to Kubernetes resources. + + +Testing Your Integration +------------------------ + +Each alert source has specific testing methods: + +* **Standard AlertManager**: Use ``robusta demo-alert`` command +* **Cloud Services**: Check the specific service's documentation for test procedures +* **Custom Systems**: Use the test features built into your monitoring platform + +Refer to your specific integration documentation for detailed testing steps. + +Need More Help? +--------------- + +* Check your specific alert source documentation for detailed troubleshooting +* Review logs in AlertManager, Prometheus Operator (if applicable), and Robusta runner +* Join our `Slack community `_ for direct support \ No newline at end of file diff --git a/docs/configuration/alertmanager-integration/victoria-metrics.rst b/docs/configuration/alertmanager-integration/victoria-metrics.rst index 2d8f9bd48..8aca0c1fc 100644 --- a/docs/configuration/alertmanager-integration/victoria-metrics.rst +++ b/docs/configuration/alertmanager-integration/victoria-metrics.rst @@ -1,12 +1,12 @@ -Victoria Metrics -******************** +VictoriaMetrics Alerts +======================= -This guide walks you through configuring `Victoria Metrics `_ with Robusta. +This guide shows how to send alerts from `VictoriaMetrics `_ with Robusta. -You will need to configure two integrations: one to send alerts to Robusta and another to let Robusta query metrics and create silences. +For configuring metric querying from VictoriaMetrics, see :doc:`/configuration/metric-providers-victoria`. Send Alerts to Robusta -============================ +-------------------------- Add the following to your Victoria Metrics Alertmanager configuration (e.g., Helm values file or VMAlertmanagerConfig CRD): @@ -38,41 +38,6 @@ Add the following to your Victoria Metrics Alertmanager configuration (e.g., Hel .. include:: ./_testing_integration.rst Configure Metrics Querying -==================================== +----------------------------- -Robusta can query metrics and create silences using Victoria Metrics. If both are in the same Kubernetes cluster, Robusta can auto-detect the Victoria Metrics service. To verify, go to the "Apps" tab in Robusta, select an application, and check for usage graphs. - -If auto-detection fails you must add the ``prometheus_url`` parameter and :ref:`update Robusta `. - -.. code-block:: yaml - - globalConfig: # this line should already exist - # add the lines below - alertmanager_url: "http://..svc.cluster.local:9093" # Example:"http://vmalertmanager-victoria-metrics-vm.default.svc.cluster.local:9093/" - prometheus_url: "http://VM_Metrics_SERVICE_NAME.NAMESPACE.svc.cluster.local:8429" # Example:"http://vmsingle-vmks-victoria-metrics-k8s-stack.default.svc.cluster.local:8429" - # Add any labels that are relevant to the specific cluster (optional) - # prometheus_additional_labels: - # cluster: 'CLUSTER_NAME_HERE' - - # Additional query string parameters to be appended to the Prometheus connection URL (optional) - # prometheus_url_query_string: "demo-query=example-data&another-query=value" - - # Create alert silencing when using Grafana alerts (optional) - # grafana_api_key: # (1) - # alertmanager_flavor: grafana - - # If using a multi-tenant prometheus or alertmanager, pass the org id to all queries - # prometheus_additional_headers: - # X-Scope-OrgID: - # alertmanager_additional_headers: - # X-Scope-OrgID: - -.. code-annotations:: - 1. This is necessary for Robusta to create silences when using Grafana Alerts, because of minor API differences in the AlertManager embedded in Grafana. - -Optional Settings -================== - -**Prometheus flags checks** - -.. include:: ./_prometheus_flags_check.rst +To enable Robusta to query metrics from VictoriaMetrics, see metrics provider :doc:`/configuration/metric-providers-victoria` configuration docs. \ No newline at end of file diff --git a/docs/configuration/exporting/alert-export-api.rst b/docs/configuration/exporting/alert-export-api.rst new file mode 100644 index 000000000..7ea4bbc19 --- /dev/null +++ b/docs/configuration/exporting/alert-export-api.rst @@ -0,0 +1,159 @@ +Alert Export API +============================================== + +.. note:: + This feature is available with the Robusta SaaS platform and self-hosted commercial plans. It is not available in the open-source version. + +Use this endpoint to export alert history data. You can filter the results based on specific criteria using query parameters such as ``alert_name``, ``account_id``, and time range. + +.. _alert-export-api: + +GET https://api.robusta.dev/api/query/alerts +------------------------------------------------------ + +Query Parameters +^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 10 70 10 + :header-rows: 1 + + * - Parameter + - Type + - Description + - Required + * - ``account_id`` + - string + - The unique account identifier (found in your ``generated_values.yaml`` file). + - Yes + * - ``start_ts`` + - string + - Start timestamp for the alert history query (in ISO 8601 format, e.g., ``2024-09-02T04:02:05.032Z``). + - Yes + * - ``end_ts`` + - string + - End timestamp for the alert history query (in ISO 8601 format, e.g., ``2024-09-17T05:02:05.032Z``). + - Yes + * - ``alert_name`` + - string + - The name of the alert to filter by (e.g., ``CrashLoopBackoff``). + - No + +Example Request +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following ``curl`` command demonstrates how to export alert history data for the ``CrashLoopBackoff`` alert: + +.. code-block:: bash + + curl --location 'https://api.robusta.dev/api/query/alerts?alert_name=CrashLoopBackoff&account_id=ACCOUNT_ID&start_ts=2024-09-02T04%3A02%3A05.032Z&end_ts=2024-09-17T05%3A02%3A05.032Z' \ + --header 'Authorization: Bearer API-KEY' + +In the command, make sure to replace the following placeholders: + +- ``ACCOUNT_ID``: Your account ID, which can be found in your ``generated_values.yaml`` file. +- ``API-KEY``: Your API Key for authentication. You can generate this token in the platform by navigating to **Settings** -> **API Keys** -> **New API Key**, and creating a key with the "Read Alerts" permission. + +Request Headers +^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 30 70 + :header-rows: 1 + + * - Header + - Description + * - ``Authorization`` + - Bearer token for authentication (e.g., ``Bearer TOKEN_HERE``). The token must have "Read Alerts" permission. + +Response Format +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The API will return a list of alerts in JSON format. Each alert object contains detailed information about the alert, including the name, priority, source, and related resource information. + +Example Response +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: json + + [ + { + "alert_name": "CrashLoopBackoff", + "title": "Crashing pod api-gateway-123abc in namespace prod", + "description": null, + "source": "kubernetes_api_server", + "priority": "HIGH", + "started_at": "2024-09-03T04:09:31.342818+00:00", + "resolved_at": null, + "cluster": "prod-cluster-1", + "namespace": "prod", + "app": "api-gateway", + "kind": null, + "resource_name": "api-gateway-123abc", + "resource_node": "gke-prod-cluster-1-node-1" + }, + { + "alert_name": "CrashLoopBackoff", + "title": "Crashing pod billing-service-xyz789 in namespace billing", + "description": null, + "source": "kubernetes_api_server", + "priority": "HIGH", + "started_at": "2024-09-03T04:09:31.496713+00:00", + "resolved_at": null, + "cluster": "prod-cluster-2", + "namespace": "billing", + "app": "billing-service", + "kind": null, + "resource_name": "billing-service-xyz789", + "resource_node": "gke-prod-cluster-2-node-3" + } + ] + +Response Fields +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 25 10 70 + :header-rows: 1 + + * - Field + - Type + - Description + * - ``alert_name`` + - string + - Name of the alert (e.g., ``CrashLoopBackoff``). + * - ``title`` + - string + - A brief description of the alert event. + * - ``source`` + - string + - Source of the alert (e.g., ``kubernetes_api_server``). + * - ``priority`` + - string + - Priority level of the alert (e.g., ``HIGH``). + * - ``started_at`` + - string + - Timestamp when the alert was triggered, in ISO 8601 format. + * - ``resolved_at`` + - string + - Timestamp when the alert was resolved, or ``null`` if still unresolved. + * - ``cluster`` + - string + - The cluster where the alert originated. + * - ``namespace`` + - string + - Namespace where the alert occurred. + * - ``app`` + - string + - The application that triggered the alert. + * - ``resource_name`` + - string + - Name of the resource that caused the alert. + * - ``resource_node`` + - string + - The node where the resource is located. + +Quick Start Example +^^^^^^^^^^^^^^^^^^^ + +There is a quick-start `Prometheus report-generator `_ on GitHub that demonstrates how to use the export APIs. \ No newline at end of file diff --git a/docs/configuration/exporting/alert-statistics-api.rst b/docs/configuration/exporting/alert-statistics-api.rst new file mode 100644 index 000000000..9e79fd7b1 --- /dev/null +++ b/docs/configuration/exporting/alert-statistics-api.rst @@ -0,0 +1,132 @@ +Alert Statistics API +============================================== + +.. note:: + This feature is available with the Robusta SaaS platform and self-hosted commercial plans. It is not available in the open-source version. + +Use this endpoint to retrieve aggregated alert data, including the count of each type of alert during a specified time range. Filters can be applied using query parameters such as `account_id` and the time range. + +.. _alert-reporting-api: + +GET https://api.robusta.dev/api/query/report +------------------------------------------------------------ + +Query Parameters +^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 20 10 70 10 + :header-rows: 1 + + * - Parameter + - Type + - Description + - Required + * - ``account_id`` + - string + - The unique account identifier (found in your ``generated_values.yaml`` file). + - Yes + * - ``start_ts`` + - string + - Start timestamp for the query (in ISO 8601 format, e.g., ``2024-10-27T04:02:05.032Z``). + - Yes + * - ``end_ts`` + - string + - End timestamp for the query (in ISO 8601 format, e.g., ``2024-11-27T05:02:05.032Z``). + - Yes + + +Example Request +^^^^^^^^^^^^^^^^^^^^^^^ + +The following `curl` command demonstrates how to query aggregated alert data for a specified time range: + +.. code-block:: bash + + curl --location 'https://api.robusta.dev/api/query/report?account_id=XXXXXX-XXXX_XXXX_XXXXX7&start_ts=2024-10-27T04:02:05.032Z&end_ts=2024-11-27T05:02:05.032Z' \ + --header 'Authorization: Bearer API-KEY' + + +In the command, make sure to replace the following placeholders: + +- `account_id`: Your account ID, which can be found in your `generated_values.yaml` file. +- `API-KEY`: Your API Key for authentication. Generate this token in the platform by navigating to **Settings** -> **API Keys** -> **New API Key**, and creating a key with the "Read Alerts" permission. + + + +Request Headers +^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 30 70 + :header-rows: 1 + + * - Header + - Description + * - ``Authorization`` + - Bearer token for authentication (e.g., ``Bearer TOKEN_HERE``). The token must have "Read Alerts" permission. + +Response Format +^^^^^^^^^^^^^^^^^^^^ + +The API will return a JSON array of aggregated alerts, with each object containing: + +- **`aggregation_key`**: The unique identifier of the alert type (e.g., `KubeJobFailed`). +- **`alert_count`**: The total count of occurrences of this alert type within the specified time range. + +Example Response +^^^^^^^^^^^^^^^^^^^^^^^^^ +.. code-block:: json + + [ + {"aggregation_key": "KubeJobFailed", "alert_count": 17413}, + {"aggregation_key": "KubePodNotReady", "alert_count": 11893}, + {"aggregation_key": "KubeDeploymentReplicasMismatch", "alert_count": 2410}, + {"aggregation_key": "KubeDeploymentRolloutStuck", "alert_count": 923}, + {"aggregation_key": "KubePodCrashLooping", "alert_count": 921}, + {"aggregation_key": "KubeContainerWaiting", "alert_count": 752}, + {"aggregation_key": "PrometheusRuleFailures", "alert_count": 188}, + {"aggregation_key": "KubeMemoryOvercommit", "alert_count": 187}, + {"aggregation_key": "PrometheusOperatorRejectedResources", "alert_count": 102}, + {"aggregation_key": "KubeletTooManyPods", "alert_count": 94}, + {"aggregation_key": "NodeMemoryHighUtilization", "alert_count": 23}, + {"aggregation_key": "TargetDown", "alert_count": 19}, + {"aggregation_key": "test123", "alert_count": 7}, + {"aggregation_key": "KubeAggregatedAPIDown", "alert_count": 4}, + {"aggregation_key": "KubeAggregatedAPIErrors", "alert_count": 4}, + {"aggregation_key": "KubeMemoryOvercommitTEST2", "alert_count": 1}, + {"aggregation_key": "TestAlert", "alert_count": 1}, + {"aggregation_key": "TestAlert2", "alert_count": 1}, + {"aggregation_key": "dsafd", "alert_count": 1}, + {"aggregation_key": "KubeMemoryOvercommitTEST", "alert_count": 1}, + {"aggregation_key": "vfd", "alert_count": 1} + ] + + + +Response Fields +^^^^^^^^^^^^^^^^^^^^ +.. list-table:: + :widths: 25 10 70 + :header-rows: 1 + + * - Field + - Type + - Description + * - ``aggregation_key`` + - string + - The unique key representing the type of alert (e.g., ``KubeJobFailed``). + * - ``alert_count`` + - integer + - The number of times this alert occurred within the specified time range. + +Notes +^^^^^^^^^^^^^^^ + +- Ensure that the `start_ts` and `end_ts` parameters are in ISO 8601 format and are correctly set to cover the desired time range. +- Use the correct `Authorization` token with sufficient permissions to access the alert data. + +Quick Start Example +^^^^^^^^^^^^^^^^^^^ + +There is a quick-start `Prometheus report-generator `_ on GitHub that demonstrates how to use the export APIs. \ No newline at end of file diff --git a/docs/configuration/exporting/configuration-changes-api.rst b/docs/configuration/exporting/configuration-changes-api.rst new file mode 100644 index 000000000..b8259a502 --- /dev/null +++ b/docs/configuration/exporting/configuration-changes-api.rst @@ -0,0 +1,190 @@ +Configuration Changes API +============================================== + +.. note:: + This feature is available with the Robusta SaaS platform and self-hosted commercial plans. It is not available in the open-source version. + +Use this endpoint to send configuration changes to Robusta. You can send up to 1000 configuration changes in a single request. + +.. _send-configuration-changes-api: + +POST https://api.robusta.dev/api/config-changes +-------------------------------------------------------------------- + +Request Body Schema +^^^^^^^^^^^^^^^^^^^ + +The request body must include the following fields: + +.. list-table:: + :widths: 25 10 70 10 + :header-rows: 1 + + * - Field + - Type + - Description + - Required + * - ``account_id`` + - string + - The unique account identifier. + - Yes + * - ``config_changes`` + - list + - A list of configuration changes. + - Yes + +Configuration Change Schema +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Each configuration change in the ``config_changes`` list must follow the specific schema, which includes the following fields: + +.. list-table:: + :widths: 25 10 70 10 + :header-rows: 1 + + * - Field + - Type + - Description + - Required + * - ``title`` + - string + - A short description of the configuration change. + - Yes + * - ``old_config`` + - string + - The previous configuration value. + - Yes + * - ``new_config`` + - string + - The new configuration value. + - Yes + * - ``resource_name`` + - string + - The name of the resource affected by the configuration change. + - Yes + * - ``description`` + - string + - A detailed description of the configuration change (optional). + - No + * - ``source`` + - string + - The source of the configuration change (default: ``external``). + - No + * - ``cluster`` + - string + - The cluster where the configuration change occurred (default: ``external``). + - No + * - ``labels`` + - dict + - Extra labels for the alert (optional). + - No + * - ``annotations`` + - dict + - Extra annotations for the configuration change (optional). + - No + * - ``subject_name`` + - string + - The name of the subject related to the configuration change (optional). + - No + * - ``subject_namespace`` + - string + - The namespace of the subject related to the configuration change (optional). + - No + * - ``subject_node`` + - string + - The node where the subject related to the configuration change is located (optional). + - No + * - ``subject_type`` + - string + - The type of subject related to the configuration change (optional). + - No + * - ``service_key`` + - string + - A key identifying the service related to the configuration change (optional). + - No + * - ``fingerprint`` + - string + - A unique identifier for the configuration change (optional). + - No + +Example Request +^^^^^^^^^^^^^^^^^^^^ + +Here is an example of a ``POST`` request to send a list of configuration changes: + +.. code-block:: bash + + curl --location --request POST 'https://api.robusta.dev/api/config-changes' \ + --header 'Authorization: Bearer API-KEY' \ + --header 'Content-Type: application/json' \ + --data-raw '{ + "account_id": "ACCOUNT_ID", + "config_changes": [ + { + "title": "Updated test-service deployment", + "old_config": "apiVersion: apps/v1\nkind: Deployment\n....", + "new_config": "apiVersion: apps/v1...", + "resource_name": "test sercvice", + "description": "Changed deployemnt", + "source": "test-service", + "cluster": "prod-cluster-1", + "labels": { + "environment": "production" + }, + "annotations": { + "env1": "true" + }, + "subject_namespace": "prod", + "subject_node": "gke-prod-cluster-1-node-1" + } + ] + }' + +In this request, replace the following placeholders: + +- ``ACCOUNT_ID``: Your account ID, which can be found in your ``generated_values.yaml`` file. +- ``API-KEY``: Your API Key for authentication. You can generate this token by navigating to **Settings** -> **API Keys** -> **New API Key**. + +Request Headers +^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 30 70 + :header-rows: 1 + + * - Header + - Description + * - ``Authorization`` + - Bearer token for authentication (e.g., ``Bearer TOKEN_HERE``). The token must have the necessary permissions to submit configuration changes. + * - ``Content-Type`` + - Must be set to ``application/json``. + +Response Format +^^^^^^^^^^^^^^^^^^^^ + +Success Response +"""""""""""""""" + +If the request is successful, the API will return the following response: + +.. code-block:: json + + { + "success": true + } + +- **Status Code**: `200 OK` + +Error Response +"""""""""""""" + +If there is an error in processing the request, the API will return the following format: + +.. code-block:: json + + { + "msg": "Error message here", + "error_code": 123 + } + +- **Status Code**: Varies based on the error (e.g., `400 Bad Request`, `500 Internal Server Error`). \ No newline at end of file diff --git a/docs/configuration/exporting/custom-webhooks.rst b/docs/configuration/exporting/custom-webhooks.rst index 265c76570..4a87695f4 100644 --- a/docs/configuration/exporting/custom-webhooks.rst +++ b/docs/configuration/exporting/custom-webhooks.rst @@ -28,7 +28,7 @@ You'll need your API key and account ID: 1. **Account ID**: Found in your ``generated_values.yaml`` file 2. **API Key**: Generate this in the Robusta platform under **Settings** → **API Keys** → **New API Key** -For detailed API documentation including request format, authentication, and examples, see :doc:`Alert History Import and Export API `. +For detailed API documentation including request format, authentication, and examples, see :doc:`Send Alerts API `. Quick Example ------------- @@ -56,4 +56,4 @@ Here's a simple example of sending a custom alert: Next Steps ---------- -For complete API documentation including all available fields and response formats, see :doc:`Alert History Import and Export API `. \ No newline at end of file +For complete API documentation including all available fields and response formats, see :doc:`Send Alerts API `. \ No newline at end of file diff --git a/docs/configuration/exporting/exporting-data.rst b/docs/configuration/exporting/exporting-data.rst index 296686caf..72b6a2f3c 100644 --- a/docs/configuration/exporting/exporting-data.rst +++ b/docs/configuration/exporting/exporting-data.rst @@ -1,778 +1,31 @@ -Alert History Import and Export API +Robusta API Reference ============================================== .. note:: - This feature is available with the Robusta SaaS platform and self-hosted commercial plans. It is not available in the open-source version. + These features are available with the Robusta SaaS platform and self-hosted commercial plans. They are not available in the open-source version. -The Robusta SaaS platform exposes several HTTP APIs for exporting data and sending alerts: +The Robusta platform exposes HTTP APIs for exporting data, sending alerts, and managing resources. -* :ref:`API to export alerts ` - Export historical alert data -* :ref:`API to fetch aggregate alert statistics ` - Get aggregated alert statistics -* :ref:`API to send alerts ` - Send custom alerts programmatically -* :ref:`API to send configuration changes ` - Track configuration changes +.. toctree:: + :maxdepth: 1 + + send-alerts-api + configuration-changes-api + alert-export-api + alert-statistics-api + namespace-resources-api -For a simpler webhook integration guide, see :doc:`Custom Webhooks `. +Getting Started +--------------- -There is an quick-start `Prometheus report-generator `_ on GitHub that demonstrates how to use the export APIs. +All APIs require authentication using an API key. Generate API keys in the Robusta UI: -.. _alert-export-api: +**Settings** → **API Keys** → **New API Key** -GET https://api.robusta.dev/api/query/alerts ------------------------------------------------------- +Assign appropriate permissions to your API key based on the APIs you plan to use. -Use this endpoint to export alert history data. You can filter the results based on specific criteria using query parameters such as ``alert_name``, ``account_id``, and time range. +Related Resources +----------------- -Query Parameters -^^^^^^^^^^^^^^^^^^^^^^ - -.. list-table:: - :widths: 20 10 70 10 - :header-rows: 1 - - * - Parameter - - Type - - Description - - Required - * - ``account_id`` - - string - - The unique account identifier (found in your ``generated_values.yaml`` file). - - Yes - * - ``start_ts`` - - string - - Start timestamp for the alert history query (in ISO 8601 format, e.g., ``2024-09-02T04:02:05.032Z``). - - Yes - * - ``end_ts`` - - string - - End timestamp for the alert history query (in ISO 8601 format, e.g., ``2024-09-17T05:02:05.032Z``). - - Yes - * - ``alert_name`` - - string - - The name of the alert to filter by (e.g., ``CrashLoopBackoff``). - - No - -Example Request -^^^^^^^^^^^^^^^^^^^^^^^^^ - -The following ``curl`` command demonstrates how to export alert history data for the ``CrashLoopBackoff`` alert: - -.. code-block:: bash - - curl --location 'https://api.robusta.dev/api/query/alerts?alert_name=CrashLoopBackoff&account_id=ACCOUNT_ID&start_ts=2024-09-02T04%3A02%3A05.032Z&end_ts=2024-09-17T05%3A02%3A05.032Z' \ - --header 'Authorization: Bearer API-KEY' - -In the command, make sure to replace the following placeholders: - -- ``ACCOUNT_ID``: Your account ID, which can be found in your ``generated_values.yaml`` file. -- ``API-KEY``: Your API Key for authentication. You can generate this token in the platform by navigating to **Settings** -> **API Keys** -> **New API Key**, and creating a key with the "Read Alerts" permission. - -Request Headers -^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. list-table:: - :widths: 30 70 - :header-rows: 1 - - * - Header - - Description - * - ``Authorization`` - - Bearer token for authentication (e.g., ``Bearer TOKEN_HERE``). The token must have "Read Alerts" permission. - -Response Format -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The API will return a list of alerts in JSON format. Each alert object contains detailed information about the alert, including the name, priority, source, and related resource information. - -Example Response -^^^^^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: json - - [ - { - "alert_name": "CrashLoopBackoff", - "title": "Crashing pod api-gateway-123abc in namespace prod", - "description": null, - "source": "kubernetes_api_server", - "priority": "HIGH", - "started_at": "2024-09-03T04:09:31.342818+00:00", - "resolved_at": null, - "cluster": "prod-cluster-1", - "namespace": "prod", - "app": "api-gateway", - "kind": null, - "resource_name": "api-gateway-123abc", - "resource_node": "gke-prod-cluster-1-node-1" - }, - { - "alert_name": "CrashLoopBackoff", - "title": "Crashing pod billing-service-xyz789 in namespace billing", - "description": null, - "source": "kubernetes_api_server", - "priority": "HIGH", - "started_at": "2024-09-03T04:09:31.496713+00:00", - "resolved_at": null, - "cluster": "prod-cluster-2", - "namespace": "billing", - "app": "billing-service", - "kind": null, - "resource_name": "billing-service-xyz789", - "resource_node": "gke-prod-cluster-2-node-3" - } - ] - -Response Fields -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. list-table:: - :widths: 25 10 70 - :header-rows: 1 - - * - Field - - Type - - Description - * - ``alert_name`` - - string - - Name of the alert (e.g., ``CrashLoopBackoff``). - * - ``title`` - - string - - A brief description of the alert event. - * - ``source`` - - string - - Source of the alert (e.g., ``kubernetes_api_server``). - * - ``priority`` - - string - - Priority level of the alert (e.g., ``HIGH``). - * - ``started_at`` - - string - - Timestamp when the alert was triggered, in ISO 8601 format. - * - ``resolved_at`` - - string - - Timestamp when the alert was resolved, or ``null`` if still unresolved. - * - ``cluster`` - - string - - The cluster where the alert originated. - * - ``namespace`` - - string - - Namespace where the alert occurred. - * - ``app`` - - string - - The application that triggered the alert. - * - ``resource_name`` - - string - - Name of the resource that caused the alert. - * - ``resource_node`` - - string - - The node where the resource is located. - -.. _alert-reporting-api: - -GET `https://api.robusta.dev/api/query/report` ------------------------------------------------------------- - -Use this endpoint to retrieve aggregated alert data, including the count of each type of alert during a specified time range. Filters can be applied using query parameters such as `account_id` and the time range. - - -Query Parameters -^^^^^^^^^^^^^^^^^^^^ - -.. list-table:: - :widths: 20 10 70 10 - :header-rows: 1 - - * - Parameter - - Type - - Description - - Required - * - ``account_id`` - - string - - The unique account identifier (found in your ``generated_values.yaml`` file). - - Yes - * - ``start_ts`` - - string - - Start timestamp for the query (in ISO 8601 format, e.g., ``2024-10-27T04:02:05.032Z``). - - Yes - * - ``end_ts`` - - string - - End timestamp for the query (in ISO 8601 format, e.g., ``2024-11-27T05:02:05.032Z``). - - Yes - - -Example Request -^^^^^^^^^^^^^^^^^^^^^^^ - -The following `curl` command demonstrates how to query aggregated alert data for a specified time range: - -.. code-block:: bash - - curl --location 'https://api.robusta.dev/api/query/report?account_id=XXXXXX-XXXX_XXXX_XXXXX7&start_ts=2024-10-27T04:02:05.032Z&end_ts=2024-11-27T05:02:05.032Z' \ - --header 'Authorization: Bearer API-KEY' - - -In the command, make sure to replace the following placeholders: - -- `account_id`: Your account ID, which can be found in your `generated_values.yaml` file. -- `API-KEY`: Your API Key for authentication. Generate this token in the platform by navigating to **Settings** -> **API Keys** -> **New API Key**, and creating a key with the "Read Alerts" permission. - - - -Request Headers -^^^^^^^^^^^^^^^^^^^^ - -.. list-table:: - :widths: 30 70 - :header-rows: 1 - - * - Header - - Description - * - ``Authorization`` - - Bearer token for authentication (e.g., ``Bearer TOKEN_HERE``). The token must have "Read Alerts" permission. - -Response Format -^^^^^^^^^^^^^^^^^^^^ - -The API will return a JSON array of aggregated alerts, with each object containing: - -- **`aggregation_key`**: The unique identifier of the alert type (e.g., `KubeJobFailed`). -- **`alert_count`**: The total count of occurrences of this alert type within the specified time range. - -Example Response -^^^^^^^^^^^^^^^^^^^^^^^^^ -.. code-block:: json - - [ - {"aggregation_key": "KubeJobFailed", "alert_count": 17413}, - {"aggregation_key": "KubePodNotReady", "alert_count": 11893}, - {"aggregation_key": "KubeDeploymentReplicasMismatch", "alert_count": 2410}, - {"aggregation_key": "KubeDeploymentRolloutStuck", "alert_count": 923}, - {"aggregation_key": "KubePodCrashLooping", "alert_count": 921}, - {"aggregation_key": "KubeContainerWaiting", "alert_count": 752}, - {"aggregation_key": "PrometheusRuleFailures", "alert_count": 188}, - {"aggregation_key": "KubeMemoryOvercommit", "alert_count": 187}, - {"aggregation_key": "PrometheusOperatorRejectedResources", "alert_count": 102}, - {"aggregation_key": "KubeletTooManyPods", "alert_count": 94}, - {"aggregation_key": "NodeMemoryHighUtilization", "alert_count": 23}, - {"aggregation_key": "TargetDown", "alert_count": 19}, - {"aggregation_key": "test123", "alert_count": 7}, - {"aggregation_key": "KubeAggregatedAPIDown", "alert_count": 4}, - {"aggregation_key": "KubeAggregatedAPIErrors", "alert_count": 4}, - {"aggregation_key": "KubeMemoryOvercommitTEST2", "alert_count": 1}, - {"aggregation_key": "TestAlert", "alert_count": 1}, - {"aggregation_key": "TestAlert2", "alert_count": 1}, - {"aggregation_key": "dsafd", "alert_count": 1}, - {"aggregation_key": "KubeMemoryOvercommitTEST", "alert_count": 1}, - {"aggregation_key": "vfd", "alert_count": 1} - ] - - - -Response Fields -^^^^^^^^^^^^^^^^^^^^ -.. list-table:: - :widths: 25 10 70 - :header-rows: 1 - - * - Field - - Type - - Description - * - ``aggregation_key`` - - string - - The unique key representing the type of alert (e.g., ``KubeJobFailed``). - * - ``alert_count`` - - integer - - The number of times this alert occurred within the specified time range. - -Notes -^^^^^^^^^^^^^^^ - -- Ensure that the `start_ts` and `end_ts` parameters are in ISO 8601 format and are correctly set to cover the desired time range. -- Use the correct `Authorization` token with sufficient permissions to access the alert data. - -.. _send-alerts-api: - -POST https://api.robusta.dev/api/alerts ----------------------------------------------------- -Use this endpoint to send alert data to Robusta. You can send up to 1000 alerts in a single request. - -Request Body Schema -^^^^^^^^^^^^^^^^^^^^^^^^ - -The request body must include the following fields: - -.. list-table:: - :widths: 25 10 70 10 - :header-rows: 1 - - * - Field - - Type - - Description - - Required - * - ``account_id`` - - string - - The unique account identifier. - - Yes - * - ``alerts`` - - list - - A list of alerts to be sent. - - Yes - -Each alert in the ``alerts`` list must follow the specific schema, which includes the following fields: - -.. list-table:: - :widths: 20 10 70 10 - :header-rows: 1 - - * - Field - - Type - - Description - - Required - * - ``title`` - - string - - A short description of the alert. - - Yes - * - ``description`` - - string - - A detailed description of the alert - - Yes - * - ``source`` - - string - - The source of the alert. - - Yes - * - ``priority`` - - string (one of: ``critical``, ``high``, ``medium``, ``error``, ``warning``, ``info``, ``low``, ``debug``) - - The priority level of the alert. - - Yes - * - ``aggregation_key`` - - string - - A key to group alerts that are related. - - Yes - * - ``failure`` - - boolean - - Indicates whether the alert represents a failure (default: ``false``). - - No - * - ``starts_at`` - - string (ISO 8601 timestamp) - - The timestamp when the alert started (optional). - - No - * - ``ends_at`` - - string (ISO 8601 timestamp) - - The timestamp when the alert ended (optional). - - No - * - ``labels`` - - dict - - Extra labels for the alert (optional). - - No - * - ``annotations`` - - dict - - Extra annotations for the alert (optional). - - No - * - ``cluster`` - - string - - Alert's cluster (default: ``external``) - - No - * - ``service_key`` - - string - - A key identifying the service related to the alert (optional). - - No - * - ``subject_type`` - - string - - The type of subject related to the alert (optional). - - No - * - ``subject_name`` - - string - - The name of the subject related to the alert (optional) - - No - * - ``subject_namespace`` - - string - - The namespace of the subject related to the alert (optional). - - No - * - ``subject_node`` - - string - - The node where the subject related to the alert is located (optional). - - No - * - ``fingerprint`` - - string - - A unique identifier for the alert (optional). - - No - -Example Request -^^^^^^^^^^^^^^^ - -Here is an example of a ``POST`` request to send a list of alerts: - -.. code-block:: bash - - curl --location --request POST 'https://api.robusta.dev/api/alerts' \ - --header 'Authorization: Bearer API-KEY' \ - --header 'Content-Type: application/json' \ - --data-raw '{ - "account_id": "ACCOUNT_ID", - "alerts": [ - { - "title": "Test Service Down", - "description": "The Test Service is not responding.", - "source": "monitoring-system", - "priority": "high", - "aggregation_key": "test-service-issues", - "failure": true, - "starts_at": "2024-10-07T10:00:00Z", - "labels": { - "environment": "production" - }, - "annotations": { - "env1": "true" - }, - "cluster": "prod-cluster-1", - "subject_namespace": "prod", - "subject_node": "gke-prod-cluster-1-node-1" - } - ] - }' - -In this request, replace the following placeholders: - -- ``ACCOUNT_ID``: Your account ID, which can be found in your ``generated_values.yaml`` file. -- ``API-KEY``: Your API Key for authentication. You can generate this token by navigating to **Settings** -> **API Keys** -> **New API Key**. - -Request Headers -^^^^^^^^^^^^^^^^^^^^ - -.. list-table:: - :widths: 30 70 - :header-rows: 1 - - * - Header - - Description - * - ``Authorization`` - - Bearer token for authentication (e.g., ``Bearer TOKEN_HERE``). The token must have the necessary permissions to submit alerts. - * - ``Content-Type`` - - Must be set to ``application/json``. - -Response Format -^^^^^^^^^^^^^^^^^^^^ - -*Success Response* - -If the request is successful, the API will return the following response: - -.. code-block:: json - - { - "success": true - } - -- **Status Code**: `200 OK` - -*Error Response* - -If there is an error in processing the request, the API will return the following format: - -.. code-block:: json - - { - "msg": "Error message here", - "error_code": 123 - } - -- **Status Code**: Varies based on the error (e.g., `400 Bad Request`, `500 Internal Server Error`). - -.. _send-configuration-changes-api: - -POST https://api.robusta.dev/api/config-changes --------------------------------------------------------------------- - -Use this endpoint to send configuration changes to Robusta. You can send up to 1000 configuration changes in a single request. - -Request Body Schema -^^^^^^^^^^^^^^^^^^^ - -The request body must include the following fields: - -.. list-table:: - :widths: 25 10 70 10 - :header-rows: 1 - - * - Field - - Type - - Description - - Required - * - ``account_id`` - - string - - The unique account identifier. - - Yes - * - ``config_changes`` - - list - - A list of configuration changes. - - Yes - -Each configuration change in the ``config_changes`` list must follow the specific schema, which includes the following fields: - -.. list-table:: - :widths: 25 10 70 10 - :header-rows: 1 - - * - Field - - Type - - Description - - Required - * - ``title`` - - string - - A short description of the configuration change. - - Yes - * - ``old_config`` - - string - - The previous configuration value. - - Yes - * - ``new_config`` - - string - - The new configuration value. - - Yes - * - ``resource_name`` - - string - - The name of the resource affected by the configuration change. - - Yes - * - ``description`` - - string - - A detailed description of the configuration change (optional). - - No - * - ``source`` - - string - - The source of the configuration change (default: ``external``). - - No - * - ``cluster`` - - string - - The cluster where the configuration change occurred (default: ``external``). - - No - * - ``labels`` - - dict - - Extra labels for the alert (optional). - - No - * - ``annotations`` - - dict - - Extra annotations for the configuration change (optional). - - No - * - ``subject_name`` - - string - - The name of the subject related to the configuration change (optional). - - No - * - ``subject_namespace`` - - string - - The namespace of the subject related to the configuration change (optional). - - No - * - ``subject_node`` - - string - - The node where the subject related to the configuration change is located (optional). - - No - * - ``subject_type`` - - string - - The type of subject related to the configuration change (optional). - - No - * - ``service_key`` - - string - - A key identifying the service related to the configuration change (optional). - - No - * - ``fingerprint`` - - string - - A unique identifier for the configuration change (optional). - - No - -Example Request -^^^^^^^^^^^^^^^^^^^^ - -Here is an example of a ``POST`` request to send a list of configuration changes: - -.. code-block:: bash - - curl --location --request POST 'https://api.robusta.dev/api/config-changes' \ - --header 'Authorization: Bearer API-KEY' \ - --header 'Content-Type: application/json' \ - --data-raw '{ - "account_id": "ACCOUNT_ID", - "config_changes": [ - { - "title": "Updated test-service deployment", - "old_config": "apiVersion: apps/v1\nkind: Deployment\n....", - "new_config": "apiVersion: apps/v1...", - "resource_name": "test sercvice", - "description": "Changed deployemnt", - "source": "test-service", - "cluster": "prod-cluster-1", - "labels": { - "environment": "production" - }, - "annotations": { - "env1": "true" - }, - "subject_namespace": "prod", - "subject_node": "gke-prod-cluster-1-node-1" - } - ] - }' - -In this request, replace the following placeholders: - -- ``ACCOUNT_ID``: Your account ID, which can be found in your ``generated_values.yaml`` file. -- ``API-KEY``: Your API Key for authentication. You can generate this token by navigating to **Settings** -> **API Keys** -> **New API Key**. - -Request Headers -^^^^^^^^^^^^^^^^^^^^ - -.. list-table:: - :widths: 30 70 - :header-rows: 1 - - * - Header - - Description - * - ``Authorization`` - - Bearer token for authentication (e.g., ``Bearer TOKEN_HERE``). The token must have the necessary permissions to submit configuration changes. - * - ``Content-Type`` - - Must be set to ``application/json``. - -Response Format -^^^^^^^^^^^^^^^^^^^^ - -*Success Response* - -If the request is successful, the API will return the following response: - -.. code-block:: json - - { - "success": true - } - -- **Status Code**: `200 OK` - -*Error Response* - -If there is an error in processing the request, the API will return the following format: - -.. code-block:: json - - { - "msg": "Error message here", - "error_code": 123 - } - -- **Status Code**: Varies based on the error (e.g., `400 Bad Request`, `500 Internal Server Error`). - -.. _namespaces-resources-api: - -POST https://api.robusta.dev/api/namespaces/resources ------------------------------------------------------- - -Use this endpoint to retrieve an **active count of specific Kubernetes resources** within a namespace. This is the same data displayed in the **Namespaces** tab of the Robusta UI. - -You can specify exactly which resource kinds you want to query in the request. - -This API relies on resource types configured in the Robusta UI sink. -Make sure to configure them as described in :ref:`cb-robusta-ui-sink-namespace-config`. - -Request Body Schema -^^^^^^^^^^^^^^^^^^^ - -The request body must include the following fields: - -.. list-table:: - :widths: 25 10 70 10 - :header-rows: 1 - - * - Field - - Type - - Description - - Required - * - ``namespace`` - - string - - The name of the namespace you want to inspect. - - Yes - * - ``account_id`` - - string - - The unique account identifier. - - Yes - * - ``cluster_name`` - - string - - The name of the cluster where the namespace resides. - - Yes - * - ``resources`` - - list - - A list of resource types to count, each including ``kind``, ``apiGroup``, and ``apiVersion``. - - Yes - -Each item in the ``resources`` list must include: - -* ``kind`` (e.g., `Deployments`) -* ``apiGroup`` (e.g., `apps`, or empty string for core group) -* ``apiVersion`` (e.g., `v1`, `v2`) - -Example Request -^^^^^^^^^^^^^^^^^^^^ - -Here is an example of a ``POST`` request to query the resource count in a namespace: - -.. code-block:: bash - - curl --location 'https://api.robusta.dev/api/namespaces/resources' \ - --header 'Authorization: Bearer API-KEY-HERE' \ - --header 'Content-Type: application/json' \ - --data '{ - "namespace": "your-namespace", - "account_id": "your-account-id", - "cluster_name": "your-cluster-name", - "resources": [ - {"kind": "Deployments", "apiGroup": "apps", "apiVersion": "v1"}, - {"kind": "Ingresses", "apiGroup": "networking.k8s.io", "apiVersion": "v1"}, - {"kind": "Services", "apiGroup": "", "apiVersion": "v1"}, - {"kind": "HorizontalPodAutoscalers", "apiGroup": "autoscaling", "apiVersion": "v2"}, - {"kind": "ReplicationControllers", "apiGroup": "", "apiVersion": "v1"} - ] - }' - -Replace: - -- ``API-KEY-HERE`` with your API Key from **Settings → API Keys → New API Key**. - Make sure the key has **Clusters → Read** permissions to access namespace resource data. -- ``your-account-id`` with the ID found in ``generated_values.yaml`` -- ``your-cluster-name`` and ``your-namespace`` accordingly - -Response Format -^^^^^^^^^^^^^^^^^^^^ - -*Success Response* - -If the request is successful, the API returns the following structure: - -.. code-block:: json - - { - "cluster": "your-cluster-name", - "namespace": "your-namespace", - "resources": [ - { - "apiGroup": "apps", - "apiVersion": "v1", - "count": 2, - "kind": "Deployments" - }, - { - "apiGroup": "", - "apiVersion": "v1", - "count": 5, - "kind": "Pods" - }, - ... - ] - } - -- **Status Code**: `200 OK` - -*Error Response* - -If an error occurs, you will receive a response in the following format: - -.. code-block:: json - - { - "msg": "Error message here", - "error_code": 456 - } - -- **Status Code**: Varies depending on the error (e.g., `400`, `403`, `500`) +* For webhook integration, see :doc:`Custom Webhooks ` +* Example implementation: `Prometheus report-generator `_ \ No newline at end of file diff --git a/docs/configuration/exporting/namespace-resources-api.rst b/docs/configuration/exporting/namespace-resources-api.rst new file mode 100644 index 000000000..0e4b66d1d --- /dev/null +++ b/docs/configuration/exporting/namespace-resources-api.rst @@ -0,0 +1,140 @@ +Namespace Resources API +============================================== + +.. note:: + This feature is available with the Robusta SaaS platform and self-hosted commercial plans. It is not available in the open-source version. + +Use this endpoint to retrieve an **active count of specific Kubernetes resources** within a namespace. This is the same data displayed in the **Namespaces** tab of the Robusta UI. + +You can specify exactly which resource kinds you want to query in the request. + +.. _namespaces-resources-api: + +POST https://api.robusta.dev/api/namespaces/resources +------------------------------------------------------ + +Prerequisites +^^^^^^^^^^^^^ + +This API relies on resource types configured in the Robusta UI sink. +Make sure to configure all the individual resources you need as described in :ref:`cb-robusta-ui-sink-namespace-config`. + +Request Body Schema +^^^^^^^^^^^^^^^^^^^ + +The request body must include the following fields: + +.. list-table:: + :widths: 25 10 70 10 + :header-rows: 1 + + * - Field + - Type + - Description + - Required + * - ``namespace`` + - string + - The name of the namespace you want to inspect. + - Yes + * - ``account_id`` + - string + - The unique account identifier. + - Yes + * - ``cluster_name`` + - string + - The name of the cluster where the namespace resides. + - Yes + * - ``resources`` + - list + - A list of resource types to count, each including ``kind``, ``apiGroup``, and ``apiVersion``. + - Yes + +Resource Schema +^^^^^^^^^^^^^^^ + +Each item in the ``resources`` list must include: + +* ``kind`` (e.g., `Deployments`) +* ``apiGroup`` (e.g., `apps`, or empty string for core group) +* ``apiVersion`` (e.g., `v1`, `v2`) + +Example Request +^^^^^^^^^^^^^^^^^^^^ + +Here is an example of a ``POST`` request to query the resource count in a namespace: + +.. code-block:: bash + + curl --location 'https://api.robusta.dev/api/namespaces/resources' \ + --header 'Authorization: Bearer API-KEY-HERE' \ + --header 'Content-Type: application/json' \ + --data '{ + "namespace": "your-namespace", + "account_id": "your-account-id", + "cluster_name": "your-cluster-name", + "resources": [ + {"kind": "Deployments", "apiGroup": "apps", "apiVersion": "v1"}, + {"kind": "Services", "apiGroup": "", "apiVersion": "v1"}, + {"kind": "Ingresses", "apiGroup": "networking.k8s.io", "apiVersion": "v1"}, + {"kind": "CronJobs", "apiGroup": "batch", "apiVersion": "v1"} + ] + }' + +Replace: + +- ``API-KEY-HERE`` with your API Key from **Settings → API Keys → New API Key**. + Make sure the key has **Clusters → Read** permissions to access namespace resource data. +- ``your-account-id`` with the ID found in ``generated_values.yaml`` +- ``your-cluster-name`` and ``your-namespace`` accordingly + +Response Format +^^^^^^^^^^^^^^^^^^^^ + +Success Response +"""""""""""""""" + +If the request is successful, the API returns the following structure: + +.. code-block:: json + + { + "cluster": "your-cluster-name", + "namespace": "your-namespace", + "resources": [ + { + "apiGroup": "apps", + "apiVersion": "v1", + "count": 2, + "kind": "Deployments" + }, + { + "apiGroup": "", + "apiVersion": "v1", + "count": 3, + "kind": "Services" + }, + { + "apiGroup": "networking.k8s.io", + "apiVersion": "v1", + "count": 1, + "kind": "Ingresses" + }, + ... + ] + } + +- **Status Code**: `200 OK` + +Error Response +"""""""""""""" + +If an error occurs, you will receive a response in the following format: + +.. code-block:: json + + { + "msg": "Error message here", + "error_code": 456 + } + +- **Status Code**: Varies depending on the error (e.g., `400`, `403`, `500`) \ No newline at end of file diff --git a/docs/configuration/exporting/robusta-pro-features.rst b/docs/configuration/exporting/robusta-pro-features.rst index 6764daf54..2d9ad141d 100644 --- a/docs/configuration/exporting/robusta-pro-features.rst +++ b/docs/configuration/exporting/robusta-pro-features.rst @@ -1,11 +1,19 @@ -Robusta Pro Features -==================== +Overview +======== .. note:: These features are available with the Robusta SaaS platform and self-hosted commercial plans. They are not available in the open-source version. Robusta Pro adds a web UI, additional integrations, and enterprise APIs to the open-source engine. Available as SaaS (we handle hosting) or self-hosted on-premise. +AI Analysis +----------- + +Automatically investigate and resolve issues with AI-powered analysis. + +:doc:`AI Analysis (HolmesGPT) <../holmesgpt/index>` + Automatically analyze Kubernetes alerts, logs, and metrics. Get potential root causes and remediation suggestions. + Custom Alert Ingestion ----------------------- @@ -30,18 +38,10 @@ Export alert history and generate reports using Robusta's REST APIs. Features include: -* **Alert Export API**: Export historical alert data with filtering by time range, alert name, and account -* **Alert Reporting API**: Get aggregated statistics and counts for different alert types -* **Custom Alert API**: Send alerts programmatically from external systems -* **Configuration Changes API**: Track configuration changes in your environment - -AI Analysis ------------ - -Optional AI-powered alert investigation using HolmesGPT. - -:doc:`AI Analysis (HolmesGPT) <../holmesgpt/index>` - Automatically analyze Kubernetes alerts, logs, and metrics. Get potential root causes and remediation suggestions. +* :doc:`Alert Export API `: Export historical alert data with filtering by time range, alert name, and account +* :doc:`Alert Reporting API `: Get aggregated statistics and counts for different alert types +* :doc:`Custom Alert API `: Send alerts programmatically from external systems +* :doc:`Configuration Changes API `: Track configuration changes in your environment Additional Pro Features ----------------------- diff --git a/docs/configuration/exporting/send-alerts-api.rst b/docs/configuration/exporting/send-alerts-api.rst new file mode 100644 index 000000000..ef6e4db0a --- /dev/null +++ b/docs/configuration/exporting/send-alerts-api.rst @@ -0,0 +1,199 @@ +Send Alerts API +============================================== + +.. note:: + This feature is available with the Robusta SaaS platform and self-hosted commercial plans. It is not available in the open-source version. + +Use this endpoint to send alert data to Robusta. You can send up to 1000 alerts in a single request. + +.. _send-alerts-api: + +POST https://api.robusta.dev/api/alerts +---------------------------------------------------- + +Request Body Schema +^^^^^^^^^^^^^^^^^^^^^^^^ + +The request body must include the following fields: + +.. list-table:: + :widths: 25 10 70 10 + :header-rows: 1 + + * - Field + - Type + - Description + - Required + * - ``account_id`` + - string + - The unique account identifier. + - Yes + * - ``alerts`` + - list + - A list of alerts to be sent. + - Yes + +Alert Schema +^^^^^^^^^^^^ + +Each alert in the ``alerts`` list must follow the specific schema, which includes the following fields: + +.. list-table:: + :widths: 20 10 70 10 + :header-rows: 1 + + * - Field + - Type + - Description + - Required + * - ``title`` + - string + - A short description of the alert. + - Yes + * - ``description`` + - string + - A detailed description of the alert + - Yes + * - ``source`` + - string + - The source of the alert. + - Yes + * - ``priority`` + - string (one of: ``critical``, ``high``, ``medium``, ``error``, ``warning``, ``info``, ``low``, ``debug``) + - The priority level of the alert. + - Yes + * - ``aggregation_key`` + - string + - A key to group alerts that are related. + - Yes + * - ``failure`` + - boolean + - Indicates whether the alert represents a failure (default: ``false``). + - No + * - ``starts_at`` + - string (ISO 8601 timestamp) + - The timestamp when the alert started (optional). + - No + * - ``ends_at`` + - string (ISO 8601 timestamp) + - The timestamp when the alert ended (optional). + - No + * - ``labels`` + - dict + - Extra labels for the alert (optional). + - No + * - ``annotations`` + - dict + - Extra annotations for the alert (optional). + - No + * - ``cluster`` + - string + - Alert's cluster (default: ``external``) + - No + * - ``service_key`` + - string + - A key identifying the service related to the alert (optional). + - No + * - ``subject_type`` + - string + - The type of subject related to the alert (optional). + - No + * - ``subject_name`` + - string + - The name of the subject related to the alert (optional) + - No + * - ``subject_namespace`` + - string + - The namespace of the subject related to the alert (optional). + - No + * - ``subject_node`` + - string + - The node where the subject related to the alert is located (optional). + - No + * - ``fingerprint`` + - string + - A unique identifier for the alert (optional). + - No + +Example Request +^^^^^^^^^^^^^^^ + +Here is an example of a ``POST`` request to send a list of alerts: + +.. code-block:: bash + + curl --location --request POST 'https://api.robusta.dev/api/alerts' \ + --header 'Authorization: Bearer API-KEY' \ + --header 'Content-Type: application/json' \ + --data-raw '{ + "account_id": "ACCOUNT_ID", + "alerts": [ + { + "title": "Test Service Down", + "description": "The Test Service is not responding.", + "source": "monitoring-system", + "priority": "high", + "aggregation_key": "test-service-issues", + "failure": true, + "starts_at": "2024-10-07T10:00:00Z", + "labels": { + "environment": "production" + }, + "annotations": { + "env1": "true" + }, + "cluster": "prod-cluster-1", + "subject_namespace": "prod", + "subject_node": "gke-prod-cluster-1-node-1" + } + ] + }' + +In this request, replace the following placeholders: + +- ``ACCOUNT_ID``: Your account ID, which can be found in your ``generated_values.yaml`` file. +- ``API-KEY``: Your API Key for authentication. You can generate this token by navigating to **Settings** -> **API Keys** -> **New API Key**. + +Request Headers +^^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :widths: 30 70 + :header-rows: 1 + + * - Header + - Description + * - ``Authorization`` + - Bearer token for authentication (e.g., ``Bearer TOKEN_HERE``). The token must have the necessary permissions to submit alerts. + * - ``Content-Type`` + - Must be set to ``application/json``. + +Response Format +^^^^^^^^^^^^^^^^^^^^ + +Success Response +"""""""""""""""" + +If the request is successful, the API will return the following response: + +.. code-block:: json + + { + "success": true + } + +- **Status Code**: `200 OK` + +Error Response +"""""""""""""" + +If there is an error in processing the request, the API will return the following format: + +.. code-block:: json + + { + "msg": "Error message here", + "error_code": 123 + } + +- **Status Code**: Varies based on the error (e.g., `400 Bad Request`, `500 Internal Server Error`). \ No newline at end of file diff --git a/docs/configuration/holmesgpt/builtin_toolsets.rst b/docs/configuration/holmesgpt/builtin_toolsets.rst deleted file mode 100644 index 70472d035..000000000 --- a/docs/configuration/holmesgpt/builtin_toolsets.rst +++ /dev/null @@ -1,149 +0,0 @@ - -Builtin Toolsets -================ - -.. toctree:: - :hidden: - :maxdepth: 1 - - toolsets/argocd - toolsets/aws - toolsets/confluence - toolsets/coralogix_logs - toolsets/datadog_logs - toolsets/datetime - toolsets/docker - toolsets/grafanaloki - toolsets/grafanatempo - toolsets/helm - toolsets/internet - toolsets/kafka - toolsets/kubernetes - toolsets/newrelic - toolsets/notion - toolsets/opensearch_logs - toolsets/opensearch_status - toolsets/prometheus - toolsets/rabbitmq - toolsets/robusta - toolsets/slab - -Holmes allows you to define and configure integrations (toolsets) that fetch data from external sources. This data -will be automatically used in investigations when relevant. - -You can :doc:`write your own toolset ` or use the default Holmes toolsets listed below. - - -Builtin toolsets -^^^^^^^^^^^^^^^^ -Holmes comes with a set of builtin toolsets. Some of these toolsets are enabled by default, such as toolsets -to read Kubernetes resources and fetch logs. Some builtin toolsets are disabled by default and can be enabled -by the user by providing credentials or API keys to external systems. - -.. grid:: 1 1 2 3 - :gutter: 3 - - .. grid-item-card:: :octicon:`cpu;1em;` ArgoCD - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/argocd - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` AWS - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/aws - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` Confluence - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/confluence - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` Coralogix logs - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/coralogix_logs - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` Datadog logs - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/datadog_logs - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` Datetime - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/datetime - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` Docker - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/docker - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` Grafana Loki - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/grafanaloki - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` Grafana Tempo - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/grafanatempo - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` Helm - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/helm - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` Internet - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/internet - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` Kafka - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/kafka - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` Kubernetes - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/kubernetes - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` New Relic - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/newrelic - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` Notion - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/notion - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` OpenSearch logs - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/opensearch_logs - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` OpenSearch status - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/opensearch_status - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` Prometheus - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/prometheus - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` RabbitMQ - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/rabbitmq - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` Robusta - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/robusta - :link-type: doc - - .. grid-item-card:: :octicon:`cpu;1em;` Slab - :class-card: sd-bg-light sd-bg-text-light - :link: toolsets/slab - :link-type: doc diff --git a/docs/configuration/holmesgpt/custom_toolsets.rst b/docs/configuration/holmesgpt/custom_toolsets.rst deleted file mode 100644 index 9b75b158f..000000000 --- a/docs/configuration/holmesgpt/custom_toolsets.rst +++ /dev/null @@ -1,557 +0,0 @@ - -Custom toolsets -=============== - -.. include:: ./toolsets/_custom_toolset_appeal.inc.rst - -Examples --------- - -Below are examples of custom toolsets and how to add them to Holmes: - - -Example 1: Grafana Toolset -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -This toolset lets Holmes view Grafana dashboards and suggest relevant dashboards to the user: - -**Prerequisites:** - -- Grafana URL (e.g. http://localhost:3000 or https://grafana.example.com) -- Grafana service account token with **Basic role -> Viewer** and **Data sources -> Reader** permissions. Check out this `video `_ on creating a Grafana service account token. - -**Configuration:** - -.. md-tab-set:: - - .. md-tab-item:: Robusta Helm Chart - - **Helm Values:** - - .. code-block:: yaml - - holmes: - # provide environment variables the toolset needs - can be pulled from secrets or provided in plaintext - additionalEnvVars: - - name: GRAFANA_API_KEY - value: - - name: GRAFANA_URL - value: - - # define the toolset - toolsets: - grafana: - # this tool can only be enabled if these prerequisites are met - prerequisites: - # we need the GRAFANA_URL and GRAFANA_API_KEY environment variables to be set - - env: - - "GRAFANA_URL" - - "GRAFANA_API_KEY" - # curl must be installed - we check by running `curl --version` (if it's not installed, the command will fail) - - command: "curl --version" - - # human-readable description of the toolset (this is not seen by the AI model - its just for users) - description: "Grafana tools" - - # tools (capabilities) that will be provided to HolmesGPT when this toolset is enabled - tools: - - name: "grafana_get_dashboard" - # the LLM sees this description and uses it to decide when to use this tool - description: "Get list of grafana dashboards" - # the command that will be executed when this tool is used - # environment variables like GRAFANA_URL and GRAFANA_API_KEY can be used in the command - # they will not be exposed to the AI model, as the AI model doesn't see the command that was run - command: "curl \"${GRAFANA_URL}/api/search\" -H \"Authorization: Bearer ${GRAFANA_API_KEY}\"" - - - name: "grafana_get_url" - description: "Get the URL of a Grafana dashboard by UID, including the real URL of Grafana" - # in this command we use a variable called `{{ dashboard_uid }}` - # unlike enviroment variables that were provided by the user, variables like `{{ dashboard_uid }}` are provided by the AI model - # the AI model sees the tool description, decides to use this tool, and then provides a value for all {{ template_variables }} to invoke the tool - command: "echo \"${GRAFANA_URL}/d/{{ dashboard_uid }}\"" - - Update your Helm values with the provided YAML configuration, then apply the changes with Helm upgrade: - - .. code-block:: bash - - helm upgrade robusta robusta/robusta --values=generated_values.yaml --set clusterName= - - After the deployment is complete, you can open the HolmesGPT chat in the Robusta SaaS UI and ask questions like *what grafana dashboard should I look at to investigate high pod cpu?*. - - **Suggesting relevant dashboards during alert investigations:** Add runbook instructions to your alert in the Robusta UI, instructing Holmes to search for related Grafana dashboards. - - .. image:: /images/custom-grafana-toolset.png - :width: 600 - :align: center - - .. md-tab-item:: Holmes CLI - - **grafana_toolset.yaml:** - - .. code-block:: yaml - - toolsets: - grafana: - # this tool can only be enabled if these prerequisites are met - prerequisites: - # we need the GRAFANA_URL and GRAFANA_API_KEY environment variables to be set - - env: - - "GRAFANA_URL" - - "GRAFANA_API_KEY" - # curl must be installed - we check by running `curl --version` (if it's not installed, the command will fail) - - command: "curl --version" - - # human-readable description of the toolset (this is not seen by the AI model - its just for users) - description: "Grafana tools" - - # tools (capabilities) that will be provided to HolmesGPT when this toolset is enabled - tools: - - name: "grafana_get_dashboard" - # the LLM sees this description and uses it to decide when to use this tool - description: "Get list of grafana dashboards" - # the command that will be executed when this tool is used - # environment variables like GRAFANA_URL and GRAFANA_API_KEY can be used in the command - # they will not be exposed to the AI model, as the AI model doesn't see the command that was run - command: "curl \"${GRAFANA_URL}/api/search\" -H \"Authorization: Bearer ${GRAFANA_API_KEY}\"" - - - name: "grafana_get_url" - description: "Get the URL of a Grafana dashboard by UID, including the real URL of Grafana" - # in this command we use a variable called `{{ dashboard_uid }}` - # unlike environment variables that were provided by the user, variables like `{{ dashboard_uid }}` are provided by the AI model - # the AI model sees the tool description, decides to use this tool, and then provides a value for all {{ template_variables }} to invoke the tool - command: "echo \"${GRAFANA_URL}/d/{{ dashboard_uid }}\"" - - Set the appropriate environment variables and run Holmes: - - .. code-block:: bash - - export GRAFANA_API_KEY="" - export GRAFANA_URL="" - - To test, run: - - .. code-block:: bash - - holmes ask -t grafana_toolset.yaml "what grafana dashboard should I look at to investigate high pod cpu?" - -Example 2: Kubernetes Diagnostics Toolset -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -This toolset provides diagnostics for Kubernetes clusters, helping developers identify and resolve issues. - -.. code-block:: yaml - - holmes: - toolsets: - kubernetes/diagnostics: - description: "Advanced diagnostics and troubleshooting tools for Kubernetes clusters" - docs_url: "https://kubernetes.io/docs/home/" - icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s" - tags: - - core - - cluster - prerequisites: - - command: "kubectl version --client" - tools: - - - name: "kubectl_node_health" - description: "Check the health status of all nodes in the cluster." - command: "kubectl get nodes -o wide" - - - name: "kubectl_check_resource_quota" - description: "Fetch the resource quota for a specific namespace." - command: "kubectl get resourcequota -n {{ namespace }} -o yaml" - - - name: "kubectl_find_evicted_pods" - description: "List all evicted pods in a specific namespace." - command: "kubectl get pods -n {{ namespace }} --field-selector=status.phase=Failed | grep Evicted" - -Update the ``generated_values.yaml`` file with the provided YAML configuration, then apply the changes by executing the Helm upgrade command: - -.. code-block:: bash - - helm upgrade robusta robusta/robusta --values=generated_values.yaml --set clusterName= - -Once deployed, Holmes will have access to advanced diagnostic tools for Kubernetes clusters. For example, you can ask Holmes, ``"Can you do a node health check?"`` and it will automatically use the newly added tools to provide you the answer. - - -Example 3: GitHub Toolset -^^^^^^^^^^^^^^^^^^^^^^^^^ - -This toolset enables Holmes to fetch information from GitHub repositories. - -First `create a GitHub Personal Access Token with fine-grained permissions `_. For this example, you can leave the default permissions. - -.. md-tab-set:: - - .. md-tab-item:: Robusta Helm Chart - - **Helm Values:** - - .. code-block:: yaml - - holmes: - # provide environment variables the toolset needs - additionalEnvVars: - - name: GITHUB_TOKEN - value: - - # define the toolset itself - toolsets: - github_tools: - description: "Tools for managing GitHub repositories" - tags: - - cli - prerequisites: - - env: - - "GITHUB_TOKEN" - - command: "curl --version" - tools: - - name: "get_recent_commits" - description: "Fetches the most recent commits for a repository" - command: "curl -H 'Authorization: token ${GITHUB_TOKEN}' https://api.github.com/repos/{{ owner }}/{{ repo }}/commits?per_page={{ limit }} " - - - name: "get_repo_details" - description: "Fetches details of a specific repository" - command: "curl -H 'Authorization: token ${GITHUB_TOKEN}' https://api.github.com/repos/{{ owner }}/{{ repo }}" - - # In the above examples, LLM-provided parameters like {{ owner }} are inferred automatically from the command - # you can also define them explicitly - this is useful if: - # - You want to enforce parameter requirements (e.g., `owner` and `repo` are required). - # - You want to define provide a default value for optional parameters. - parameters: - owner: - type: "string" - description: "Owner of the repository." - required: true - repo: - type: "string" - description: "Name of the repository." - required: true - - Update your Helm values with the provided YAML configuration, then apply the changes with Helm upgrade: - - .. code-block:: bash - - helm upgrade robusta robusta/robusta --values=generated_values.yaml --set clusterName= - - After the deployment is complete, the GitHub toolset will be available. HolmesGPT will be able to use it to interact with GitHub repositories. - For example, you can now open the HolmesGPT chat in the Robusta SaaS UI and ask, *who made the last commit to the robusta-dev/holmesgpt repo on github?*. - - .. image:: /images/custom-github-toolset.png - :width: 600 - :align: center - - .. md-tab-item:: Holmes CLI - - First, add the following environment variables: - - .. code-block:: bash - - export GITHUB_TOKEN="" - - Then, add the following to **~/.holmes/config.yaml**, creating the file if it doesn't exist: - - .. code-block:: yaml - - toolsets: - github_tools: - description: "Tools for managing GitHub repositories" - tags: - - cli - prerequisites: - - env: - - "GITHUB_TOKEN" - - command: "curl --version" - tools: - - name: "get_recent_commits" - description: "Fetches the most recent commits for a repository" - command: "curl -H 'Authorization: token ${GITHUB_TOKEN}' https://api.github.com/repos/{{ owner }}/{{ repo }}/commits?per_page={{ limit }} " - - # In the above examples, LLM-provided parameters like {{ owner }} are inferred automatically from the command - # you can also define them explicitly - this is useful if: - # - You want to enforce parameter requirements (e.g., `owner` and `repo` are required). - # - You want to provide a default value for optional parameters. - parameters: - owner: - type: "string" - description: "Owner of the repository." - required: true - repo: - type: "string" - description: "Name of the repository." - required: true - - To test, run: - - .. code-block:: bash - - holmes ask -t github_toolset.yaml "who made the last commit to the robusta-dev/holmesgpt repo on github?" - - -Reference ---------- - -A toolset is defined in your Helm values (``generated_values.yaml``). Each toolset has a unique name and has to contain tools. - - -.. code-block:: yaml - - toolsets: - : - enabled: - name: "" - description: "" - docs_url: "" - icon_url: "" - tags: - - - installation_instructions: "" - prerequisites: - - command: "" - expected_output: "" - - env: - - "" - additional_instructions: "" - tools: - - name: "" - description: "" - command: "" - script: "