From 7de97041082832a8576c556b0c963f8217aa8049 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Tue, 7 Apr 2026 00:08:35 -0500 Subject: [PATCH] feat: add adx-mon Helm chart Adds a Helm chart for deploying adx-mon components (collector, ingestor, operator, alerter) along with their CRDs and supporting resources. The chart is based on the deployment configuration used in production and includes: - Collector, Ingestor, Operator, Alerter, ICM Notifier deployments - CRDs: ADXCluster, Collector, Ingestor, ManagementCommand, MetricsExporter - Namespace creation Functions, SummaryRule, and AlertRule CRDs and resources are intentionally excluded as they are environment-specific and managed separately. --- charts/adx-mon/Chart.yaml | 6 + charts/adx-mon/templates/alerter.yaml | 174 +++++++++++ .../adx-mon/templates/collector-config.yaml | 247 +++++++++++++++ charts/adx-mon/templates/collector.yaml | 268 +++++++++++++++++ .../templates/crds/adxclusters_crd.yaml | 283 ++++++++++++++++++ .../templates/crds/collectors_crd.yaml | 131 ++++++++ .../adx-mon/templates/crds/ingestors_crd.yaml | 196 ++++++++++++ .../crds/managementcommands_crd.yaml | 133 ++++++++ .../templates/crds/metricsexporters_crd.yaml | 188 ++++++++++++ charts/adx-mon/templates/ingestor.yaml | 227 ++++++++++++++ charts/adx-mon/templates/namespace.yaml | 7 + charts/adx-mon/templates/operator.yaml | 230 ++++++++++++++ charts/adx-mon/values.yaml | 47 +++ 13 files changed, 2137 insertions(+) create mode 100644 charts/adx-mon/Chart.yaml create mode 100644 charts/adx-mon/templates/alerter.yaml create mode 100644 charts/adx-mon/templates/collector-config.yaml create mode 100644 charts/adx-mon/templates/collector.yaml create mode 100644 charts/adx-mon/templates/crds/adxclusters_crd.yaml create mode 100644 charts/adx-mon/templates/crds/collectors_crd.yaml create mode 100644 charts/adx-mon/templates/crds/ingestors_crd.yaml create mode 100644 charts/adx-mon/templates/crds/managementcommands_crd.yaml create mode 100644 charts/adx-mon/templates/crds/metricsexporters_crd.yaml create mode 100644 charts/adx-mon/templates/ingestor.yaml create mode 100644 charts/adx-mon/templates/namespace.yaml create mode 100644 charts/adx-mon/templates/operator.yaml create mode 100644 charts/adx-mon/values.yaml diff --git a/charts/adx-mon/Chart.yaml b/charts/adx-mon/Chart.yaml new file mode 100644 index 000000000..a1a212cae --- /dev/null +++ b/charts/adx-mon/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: adx-mon +description: Deployment of adx-mon +type: application +version: 1.0.0 +appVersion: 1.0.0 diff --git a/charts/adx-mon/templates/alerter.yaml b/charts/adx-mon/templates/alerter.yaml new file mode 100644 index 000000000..4a71c8049 --- /dev/null +++ b/charts/adx-mon/templates/alerter.yaml @@ -0,0 +1,174 @@ +{{- if .Values.alerter.enabled }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: alerter + namespace: {{ .Release.Namespace }} +automountServiceAccountToken: true +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: adx-mon:alerter +rules: + - apiGroups: + - "" + resources: + - namespaces + - pods + verbs: + - get + - list + - watch + - apiGroups: + - adx-mon.azure.com + resources: + - alertrules + - functions + - managementcommands + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + - apiGroups: + - adx-mon.azure.com + resources: + - alertrules/status + - functions/status + verbs: + - update + - patch + - apiGroups: + - adx-mon.azure.com + resources: + - alertrules/finalizers + - functions/finalizers + verbs: + - get + - update +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: adx-mon:alerter +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: adx-mon:alerter +subjects: + - kind: ServiceAccount + name: alerter + namespace: {{ .Release.Namespace }} +--- +apiVersion: v1 +kind: Service +metadata: + name: alerter + namespace: {{ .Release.Namespace }} +spec: + ports: + - port: 8080 + protocol: TCP + targetPort: 8080 + selector: + app: alerter + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: alerter + namespace: {{ .Release.Namespace }} +spec: + replicas: {{ .Values.alerter.replicas | default 1 }} + selector: + matchLabels: + app: alerter + template: + metadata: + labels: + app: alerter + annotations: + adx-mon/scrape: "true" + adx-mon/port: "8080" + adx-mon/path: "/metrics" + adx-mon/log-destination: "Logs:Alerter" + adx-mon/log-parsers: json + spec: + serviceAccountName: alerter + automountServiceAccountToken: true + priorityClassName: system-cluster-critical + {{- if .Values.alerter.affinity }} + {{- if .Values.alerter.affinity.required }} + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: {{ .Values.alerter.affinity.key | default "agentpool"}} + operator: In + values: + - {{ .Values.alerter.affinity.value | default "system" }} + {{- else }} + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: {{ .Values.alerter.affinity.key | default "agentpool"}} + operator: In + values: + - {{ .Values.alerter.affinity.value | default "system" }} + {{- end }} + {{- end }} + containers: + - name: alerter + image: ghcr.io/azure/adx-mon/alerter:latest + command: + - /alerter + args: + - "--alerter-address=http://icmnotifier.adx-mon.svc.cluster.local:8080" + - "--port=8080" + - "--cloud={{ .Values.cloud }}" + - "--region={{ .Values.region }}" + - "--tag=environment={{ .Values.environment }}" + - "--auth-msi-id={{ .Values.alerter.auth_msi_id }}" + {{- range $key, $value := .Values.alerter.kusto_endpoints }} + - "--kusto-endpoint={{ $key }}={{ $value }}" + {{- end }} + ports: + - containerPort: 8080 + name: http + env: + - name: GODEBUG + value: http2client=0 + volumeMounts: + - mountPath: /etc/ssl/certs + name: ssl-certs + readOnly: true + - mountPath: /etc/pki/ca-trust/extracted + name: etc-pki-ca-certs + readOnly: true + resources: + requests: + cpu: {{ .Values.alerter.resources.requests.cpu | default "100m" }} + memory: {{ .Values.alerter.resources.requests.memory | default "256Mi" }} + limits: + cpu: {{ .Values.alerter.resources.limits.cpu | default "500m" }} + memory: {{ .Values.alerter.resources.limits.memory | default "1Gi" }} + volumes: + - name: ssl-certs + hostPath: + path: /etc/ssl/certs + type: Directory + - name: etc-pki-ca-certs + hostPath: + path: /etc/pki/ca-trust/extracted + type: DirectoryOrCreate +{{- end }} diff --git a/charts/adx-mon/templates/collector-config.yaml b/charts/adx-mon/templates/collector-config.yaml new file mode 100644 index 000000000..5bd2e95c3 --- /dev/null +++ b/charts/adx-mon/templates/collector-config.yaml @@ -0,0 +1,247 @@ +{{- if .Values.collector.enabled }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: collector-config + namespace: {{ .Release.Namespace }} +data: + config.toml: | + # Ingestor URL to send collected telemetry. + endpoint = {{ .Values.collector.ingestor_endpoint | default "https://ingestor.adx-mon.svc.cluster.local" | toToml }} + + # Region is a location identifier + region = '{{ .Values.region }}' + + # Skip TLS verification. + insecure-skip-verify = true + + # Address to listen on for endpoints. + listen-addr = ':8080' + + # Maximum number of connections to accept. + max-connections = 100 + + # Maximum number of samples to send in a single batch. + max-batch-size = 10000 + + # Storage directory for the WAL. + storage-dir = '/mnt/data' + + # Regexes of metrics to drop from all sources. + drop-metrics = {{ .Values.global_drop_metrics | default (list) | toToml }} + keep-metrics = {{ .Values.global_keep_metrics | default (list) | toToml }} + + # Disable metrics forwarding to endpoints. + disable-metrics-forwarding = false + + # WAL flush interval in milliseconds. For collector it's lowered to reduce CPU usage since + # fewer metrics are in flight. + wal-flush-interval-ms = 1000 + + # Add target host,cluster and pod information as columns for tables in Metrics + # For metrics, adxmon_* take on the target pod's values + lift-labels = [ + { name = 'host' }, + { name = 'cluster' }, + { name = 'adxmon_pod', column = 'Pod' }, + { name = 'adxmon_namespace', column = 'Namespace' }, + { name = 'adxmon_container', column = 'Container' }, + ] + + # Add target host,cluster and pod information as columns for tables in Logs + lift-resources = [ + { name = 'host' }, + { name = 'cluster' }, + { name = 'pod', column = 'Pod' }, + { name = 'namespace', column = 'Namespace' }, + { name = 'container', column = 'Container' }, + ] + + # Key/value pairs of labels to add to all metrics and logs. + [add-labels] + host = '$(HOSTNAME)' + cluster = '{{ .Values.cluster_name }}' + cloud = '{{ .Values.cloud }}' + region = '{{ .Values.region }}' + environment = '{{ .Values.environment }}' + + # Defines a prometheus scrape endpoint. + [prometheus-scrape] + + # Database to store metrics in. + database = 'Metrics' + + default-drop-metrics = false + + # Defines a static scrape target. + static-scrape-target = [ + # Scrape our own metrics + { host-regex = '.*', url = 'http://$(HOSTNAME):3100/metrics', namespace = 'adx-mon', pod = 'collector', container = 'collector' }, + + # Scrape kubelet metrics + { host-regex = '.*', url = 'https://$(HOSTNAME):10250/metrics/cadvisor', namespace = 'kube-system', pod = 'kubelet', container = 'cadvisor' }, + { host-regex = '.*', url = 'https://$(HOSTNAME):10250/metrics/resource', namespace = 'kube-system', pod = 'kubelet', container = 'resource' }, + ] + + # Scrape interval in seconds. + scrape-interval = 30 + + # Scrape timeout in seconds. + scrape-timeout = 25 + + # Disable metrics forwarding to endpoints. + disable-metrics-forwarding = false + + # Regexes of metrics to keep from scraping source. + keep-metrics = [] + + # Regexes of metrics to drop from scraping source. + drop-metrics = [] + + # Defines a prometheus remote write endpoint. + [[prometheus-remote-write]] + + # Database to store metrics in. + database = 'Metrics' + + # The path to listen on for prometheus remote write requests. Defaults to /receive. + path = '/receive' + + # Regexes of metrics to drop. + drop-metrics = [] + + # Disable metrics forwarding to endpoints. + disable-metrics-forwarding = false + + # Key/value pairs of labels to add to this source. + [prometheus-remote-write.add-labels] + + # Defines an OpenTelemetry log endpoint. + [otel-log] + # Attributes lifted from the Body and added to Attributes. + lift-attributes = ['kusto.database', 'kusto.table'] + + [[otel-metric]] + # Database to store metrics in. + database = 'Metrics' + # The path to listen on for OTLP/HTTP requests. + path = '/v1/metrics' + # The port to listen on for OTLP/gRPC requests. + grpc-port = 4317 + # Regexes of metrics to drop. + drop-metrics = [] + # Regexes of metrics to keep. + keep-metrics = [] + # List of exporter names to forward metrics to. + exporters = [] + + [[host-log]] + parsers = ['json'] + + journal-target = [ + # matches are optional and are parsed like MATCHES in journalctl. + # If different fields are matched, only entries matching all terms are included. + # If the same fields are matched, entries matching any term are included. + # + can be added between to include a disjunction of terms. + # See examples under man 1 journalctl + { matches = [ '_SYSTEMD_UNIT=kubelet.service' ], database = 'Logs', table = 'Kubelet' }, + { matches = [ '_TRANSPORT=kernel' ], database = 'Logs', table = 'Kernel' }, + { matches = [ '_TRANSPORT=syslog' ], database = 'Logs', table = 'Syslog' } + ] + + +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: collector-singleton-config + namespace: {{ .Release.Namespace }} +data: + config.toml: | + # Ingestor URL to send collected telemetry. + endpoint = {{ .Values.collector.ingestor_endpoint | default "https://ingestor.adx-mon.svc.cluster.local" | toToml }} + + # Region is a location identifier + region = '{{ .Values.region }}' + + # Skip TLS verification. + insecure-skip-verify = true + + # Address to listen on for endpoints. + listen-addr = ':8080' + + # Maximum number of connections to accept. + max-connections = 100 + + # Maximum number of samples to send in a single batch. + max-batch-size = 10000 + + # Storage directory for the WAL. + storage-dir = '/mnt/data' + + # Regexes of metrics to drop from all sources. + drop-metrics = [] + + # Disable metrics forwarding to endpoints. + disable-metrics-forwarding = false + + # WAL flush interval in milliseconds. For collector it's lowered to reduce CPU usage since + # fewer metrics are in flight. + wal-flush-interval-ms = 1000 + + lift-labels = [ + { name = 'host' }, + { name = 'cluster' }, + { name = 'adxmon_pod', column = 'Pod' }, + { name = 'adxmon_namespace', column = 'Namespace' }, + { name = 'adxmon_container', column = 'Container' }, + ] + + # Key/value pairs of labels to add to all metrics. + lift-resources = [ + { name = 'host' }, + { name = 'cluster' }, + { name = 'adxmon_pod', column = 'Pod' }, + { name = 'adxmon_namespace', column = 'Namespace' }, + { name = 'adxmon_container', column = 'Container' }, + ] + + # Key/value pairs of labels to add to all metrics. + [add-labels] + host = '$(HOSTNAME)' + cluster = '{{ .Values.cluster_name }}' + cloud = '{{ .Values.cloud }}' + region = '{{ .Values.region }}' + environment = '{{ .Values.environment }}' + + # Defines a prometheus scrape endpoint. + [prometheus-scrape] + + # Database to store metrics in. + database = 'Metrics' + + default-drop-metrics = false + + # Defines a static scrape target. + static-scrape-target = [ + # Scrape api server endpoint + { host-regex = '.*', url = 'https://kubernetes.default.svc/metrics', namespace = 'kube-system', pod = 'kube-apiserver', container = 'kube-apiserver' }, + ] + + # Scrape interval in seconds. + scrape-interval = 30 + + # Scrape timeout in seconds. + scrape-timeout = 25 + + # Disable dynamic discovery of scrape targets. + disable-discovery = true + + # Disable metrics forwarding to endpoints. + disable-metrics-forwarding = false + + # Regexes of metrics to keep from scraping source. + drop-metrics = {{ .Values.singleton_drop_metrics | default (list) | toToml }} + keep-metrics = {{ .Values.singleton_keep_metrics | default (list) | toToml }} +{{- end }} diff --git a/charts/adx-mon/templates/collector.yaml b/charts/adx-mon/templates/collector.yaml new file mode 100644 index 000000000..912627c55 --- /dev/null +++ b/charts/adx-mon/templates/collector.yaml @@ -0,0 +1,268 @@ +{{- if .Values.collector.enabled }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: collector + namespace: {{ .Release.Namespace }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: adx-mon:collector +rules: + - apiGroups: + - "" + resources: + - nodes/metrics + - nodes/proxy + verbs: + - get + - apiGroups: + - "" + resources: + - namespaces + - nodes # required if reading node labels into metrics labels and logs attributes + - pods + verbs: + - get + - list + - watch + - nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: adx-mon:collector +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: adx-mon:collector +subjects: + - kind: ServiceAccount + name: collector + namespace: {{ .Release.Namespace }} +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: collector + namespace: {{ .Release.Namespace }} +spec: + selector: + matchLabels: + adxmon: collector + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 0 + maxUnavailable: 10% + template: + metadata: + labels: + adxmon: collector + annotations: + adx-mon/scrape: "true" + adx-mon/port: "9091" + adx-mon/path: "/metrics" + adx-mon/log-destination: "Logs:Collector" + adx-mon/log-parsers: json + configmap-hash: {{ include (print $.Template.BasePath "/collector-config.yaml") . | sha256sum }} + spec: + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + serviceAccountName: collector + priorityClassName: system-node-critical + containers: + - name: collector + securityContext: + privileged: true + image: ghcr.io/azure/adx-mon/collector:latest + command: + - /collector + args: + - "--config=/etc/config/config.toml" + - "--hostname=$(HOSTNAME)" + ports: + - containerPort: 8080 + protocol: TCP + hostPort: 3100 + - containerPort: 4317 + protocol: TCP + hostPort: 4317 + env: + - name: LOG_LEVEL + value: INFO + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: "GODEBUG" + value: "http2client=0" + volumeMounts: + - mountPath: /etc/ssl/certs + name: ssl-certs + readOnly: true + - mountPath: /etc/pki/ca-trust/extracted + name: etc-pki-ca-certs + readOnly: true + - name: config-volume + mountPath: /etc/config + - name: storage + mountPath: /mnt/data + - name: varlog + mountPath: /var/log + readOnly: true + - name: runlog + mountPath: /run/log + readOnly: true + - name: varlibdockercontainers + mountPath: /var/lib/docker/containers + readOnly: true + - name: etcmachineid + mountPath: /etc/machine-id + readOnly: true + resources: + requests: + cpu: 50m + memory: 100Mi + limits: + cpu: 500m + memory: 2000Mi + volumes: + - name: ssl-certs + hostPath: + path: /etc/ssl/certs + type: Directory + - name: etc-pki-ca-certs + hostPath: + path: /etc/pki/ca-trust/extracted + type: DirectoryOrCreate + - name: config-volume + configMap: + # Provide the name of the ConfigMap containing the files you want + # to add to the container + name: collector-config + - name: storage + hostPath: + path: /mnt/collector + - name: varlog + hostPath: + path: /var/log + - name: runlog # used for non-persistent storage mode of journalctl + hostPath: + path: /run/log + - name: varlibdockercontainers + hostPath: + path: /var/lib/docker/containers + - name: etcmachineid + hostPath: + path: /etc/machine-id + type: File +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: collector-singleton + namespace: {{ .Release.Namespace }} +spec: + replicas: 1 + selector: + matchLabels: + adxmon: collector + template: + metadata: + labels: + adxmon: collector + annotations: + adx-mon/scrape: "true" + adx-mon/port: "9091" + adx-mon/path: "/metrics" + adx-mon/log-destination: "Logs:Collector" + adx-mon/log-parsers: json + configmap-hash: {{ include (print $.Template.BasePath "/collector-config.yaml") . | sha256sum }} + spec: + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + serviceAccountName: collector + priorityClassName: system-node-critical + containers: + - name: collector + image: ghcr.io/azure/adx-mon/collector:latest + command: + - /collector + args: + - "--config=/etc/config/config.toml" + - "--hostname=$(HOSTNAME)" + env: + - name: LOG_LEVEL + value: INFO + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: "GODEBUG" + value: "http2client=0" + volumeMounts: + - mountPath: /etc/ssl/certs + name: ssl-certs + readOnly: true + - mountPath: /etc/pki/ca-trust/extracted + name: etc-pki-ca-certs + readOnly: true + - name: config-volume + mountPath: /etc/config + - name: storage + mountPath: /mnt/data + - name: varlog + mountPath: /var/log + readOnly: true + - name: varlibdockercontainers + mountPath: /var/lib/docker/containers + readOnly: true + resources: + requests: + cpu: 50m + memory: 100Mi + limits: + cpu: 500m + memory: 2000Mi + volumes: + - name: ssl-certs + hostPath: + path: /etc/ssl/certs + type: Directory + - name: etc-pki-ca-certs + hostPath: + path: /etc/pki/ca-trust/extracted + type: DirectoryOrCreate + - name: config-volume + configMap: + name: collector-singleton-config + - name: storage + hostPath: + path: /mnt/collector + - name: varlog + hostPath: + path: /var/log + - name: varlibdockercontainers + hostPath: + path: /var/lib/docker/containers +{{- end }} diff --git a/charts/adx-mon/templates/crds/adxclusters_crd.yaml b/charts/adx-mon/templates/crds/adxclusters_crd.yaml new file mode 100644 index 000000000..a850d7ef9 --- /dev/null +++ b/charts/adx-mon/templates/crds/adxclusters_crd.yaml @@ -0,0 +1,283 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.19.0 + name: adxclusters.adx-mon.azure.com +spec: + group: adx-mon.azure.com + names: + kind: ADXCluster + listKind: ADXClusterList + plural: adxclusters + singular: adxcluster + scope: Namespaced + versions: + - name: v1 + schema: + openAPIV3Schema: + description: ADXCluster is the Schema for the adxclusters API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ADXClusterSpec defines the desired state of ADXCluster + properties: + clusterName: + description: ClusterName is the unique, valid name for the ADX cluster. + Must match ^[a-zA-Z0-9-]+$ and be at most 100 characters. Used for + resource identification and naming in Azure. + maxLength: 100 + pattern: ^[a-zA-Z0-9-]+$ + type: string + criteriaExpression: + description: |- + CriteriaExpression is an optional CEL (Common Expression Language) expression evaluated against + operator cluster labels (region, environment, cloud, and any --cluster-labels key/value pairs). + Every label is exposed as a string variable that can be referenced directly, for example: + + criteriaExpression: "region in ['eastus','westus'] && environment == 'prod'" + + Semantics: + * Empty / omitted expression => the ADXCluster always reconciles. + * When specified, the expression must evaluate to true for reconciliation; false skips quietly. + * CEL parse, type-check, or evaluation errors are surfaced via status and block reconciliation + until corrected. + type: string + databases: + description: Databases is an array of ADXClusterDatabaseSpec objects. + Each object defines a database to be created in the ADX cluster. + If not specified, no databases will be created. + items: + properties: + databaseName: + description: ADX valid database name, required + maxLength: 64 + minLength: 1 + pattern: ^[a-zA-Z0-9_]+$ + type: string + retentionInDays: + default: 30 + description: default 30, optional + type: integer + retentionPolicy: + description: ADX retention policy, optional + type: string + telemetryType: + description: 'TelemetryType: Required' + enum: + - Metrics + - Logs + - Traces + type: string + required: + - databaseName + - telemetryType + type: object + type: array + endpoint: + description: 'Endpoint is the URI of an existing ADX cluster. If set, + the operator will use this cluster instead of provisioning a new + one. Example: "https://mycluster.kusto.windows.net"' + format: uri + type: string + federation: + description: Supports cluster partitioning. Only relevant if Role + is set. + properties: + federatedTargets: + description: If role is "Partition", specifies the Federated cluster(s) + details for heartbeating. + items: + properties: + endpoint: + description: 'Endpoint is the URI of the federated ADX cluster. + Example: "https://mycluster.kusto.windows.net"' + type: string + heartbeatDatabase: + pattern: ^[a-zA-Z0-9_]+$ + type: string + heartbeatTable: + pattern: ^[a-zA-Z0-9_]+$ + type: string + managedIdentityClientId: + description: Used for writing logs to the federated cluster. + type: string + required: + - endpoint + - heartbeatDatabase + - heartbeatTable + - managedIdentityClientId + type: object + type: array + heartbeatDatabase: + description: If role is "Federated", specifies the ADX cluster's + heartbeat database. + pattern: ^[a-zA-Z0-9_]+$ + type: string + heartbeatTTL: + default: 1h + description: If role is "Federated", specifies the ADX cluster's + heartbeat table TTL. + pattern: ^(\d+h)?(\d+m)?(\d+s)?$ + type: string + heartbeatTable: + description: If role is "Federated", specifies the ADX cluster's + heartbeat table. + pattern: ^[a-zA-Z0-9_]+$ + type: string + partitioning: + additionalProperties: + type: string + description: If role is "Partition", specifies the ADX cluster's + partition details. Open-ended map/object for partitioning metadata + (geo, location, tenant, etc.). + type: object + type: object + provision: + description: Provision contains optional Azure provisioning details + for the ADX cluster. If omitted, the operator will attempt zero-config + provisioning using Azure IMDS. + properties: + appliedProvisionState: + description: |- + AppliedProvisionState is a JSON-serialized snapshot of the SkuName, Tier, and UserAssignedIdentities + as last applied by the operator. This is set by the operator and is read-only for users. + type: string + autoScale: + description: |- + //+kubebuilder:default=false + AutoScale indicates whether to enable auto-scaling for the ADX cluster. Optional. Defaults to false if not specified. + type: boolean + autoScaleMax: + default: 10 + description: AutoScaleMax is the maximum number of nodes for auto-scaling. + Optional. Defaults to 10 if not specified. + type: integer + autoScaleMin: + default: 2 + description: AutoScaleMin is the minimum number of nodes for auto-scaling. + Optional. Defaults to 2 if not specified. + type: integer + location: + description: Location is the Azure region for the ADX cluster + (e.g., "eastus2"). Optional. If omitted, will be auto-detected. + type: string + resourceGroup: + description: ResourceGroup is the Azure resource group for the + ADX cluster. Optional. If omitted, will be auto-created or detected. + type: string + skuName: + description: SkuName is the Azure SKU for the ADX cluster (e.g., + "Standard_L8as_v3"). Optional. The operator will select a default + if not specified. + type: string + subscriptionId: + description: SubscriptionId is the Azure subscription ID to use + for provisioning. Optional. If omitted, will be auto-detected + in zero-config mode. + type: string + tier: + description: Tier is the Azure ADX tier (e.g., "Standard"). Optional. + Defaults to "Standard" if not specified. + type: string + userAssignedIdentities: + description: |- + UserAssignedIdentities is a list of MSIs that can be attached to the cluster. They should be resource-ids of the form + /subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/{identityName} + items: + type: string + type: array + type: object + role: + description: 'Role specifies the cluster''s role: "Partition" (default, + for data-holding clusters) or "Federated" (for the central federating + cluster).' + enum: + - Partition + - Federated + type: string + required: + - clusterName + type: object + status: + description: ADXClusterStatus defines the observed state of ADXCluster + properties: + conditions: + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/charts/adx-mon/templates/crds/collectors_crd.yaml b/charts/adx-mon/templates/crds/collectors_crd.yaml new file mode 100644 index 000000000..fbcc5ae6c --- /dev/null +++ b/charts/adx-mon/templates/crds/collectors_crd.yaml @@ -0,0 +1,131 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.19.0 + name: collectors.adx-mon.azure.com +spec: + group: adx-mon.azure.com + names: + kind: Collector + listKind: CollectorList + plural: collectors + singular: collector + scope: Namespaced + versions: + - name: v1 + schema: + openAPIV3Schema: + description: Collector is the Schema for the collectors API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: CollectorSpec defines the desired state of Collector + properties: + criteriaExpression: + description: |- + CriteriaExpression is an optional CEL (Common Expression Language) expression evaluated against + operator cluster labels (region, environment, cloud, and any --cluster-labels key/value pairs). + All labels are exposed as string variables. Example: + + criteriaExpression: "environment == 'prod' && region == 'eastus'" + + Semantics: + * Empty / omitted expression => the Collector always reconciles. + * When specified, the expression must evaluate to true for reconciliation; false skips quietly. + * CEL parse, type-check, or evaluation errors surface via status and block reconciliation until + corrected. + type: string + image: + description: Image is the container image to use for the collector + component. Optional; if omitted, a default image will be used. + type: string + ingestorEndpoint: + description: IngestorEndpoint is the URI endpoint for the ingestor + service that this collector should send data to. Optional; if omitted, + the operator will configure it automatically. + format: uri + type: string + type: object + status: + description: CollectorStatus defines the observed state of Collector + properties: + conditions: + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/charts/adx-mon/templates/crds/ingestors_crd.yaml b/charts/adx-mon/templates/crds/ingestors_crd.yaml new file mode 100644 index 000000000..a1ab957c9 --- /dev/null +++ b/charts/adx-mon/templates/crds/ingestors_crd.yaml @@ -0,0 +1,196 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.19.0 + name: ingestors.adx-mon.azure.com +spec: + group: adx-mon.azure.com + names: + kind: Ingestor + listKind: IngestorList + plural: ingestors + singular: ingestor + scope: Namespaced + versions: + - name: v1 + schema: + openAPIV3Schema: + description: Ingestor is the Schema for the ingestors API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: IngestorSpec defines the desired state of Ingestor + properties: + adxClusterSelector: + description: ADXClusterSelector is a label selector used to select + the ADXCluster CRDs this ingestor should target. This field is required. + properties: + matchExpressions: + description: matchExpressions is a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + appliedProvisionState: + description: |- + AppliedProvisionState is a JSON-serialized snapshot of the CRD + as last applied by the operator. This is set by the operator and is read-only for users. + type: string + criteriaExpression: + description: |- + CriteriaExpression is an optional CEL (Common Expression Language) expression evaluated against + operator cluster labels (region, environment, cloud, and any --cluster-labels key/value pairs). + All labels are exposed as string variables. Example: + + criteriaExpression: "environment == 'prod' && region == 'eastus'" + + Semantics: + * Empty / omitted expression => the Ingestor always reconciles. + * When specified, the expression must evaluate to true for reconciliation; false skips quietly. + * CEL parse, type-check, or evaluation errors surface via status and block reconciliation until + corrected. + type: string + endpoint: + description: Endpoint is the endpoint to use for the ingestor. If + running in a cluster, this should be the service name; otherwise, + the operator will generate an endpoint. Optional. + type: string + exposeExternally: + default: false + description: ExposeExternally indicates if the ingestor should be + exposed externally as reflected in the Endpoint. Optional; defaults + to false. + type: boolean + image: + description: Image is the container image to use for the ingestor + component. Optional; if omitted, a default image will be used. + type: string + replicas: + default: 1 + description: Replicas is the number of ingestor replicas to run. Optional; + defaults to 1 if omitted. + format: int32 + type: integer + required: + - adxClusterSelector + type: object + status: + description: IngestorStatus defines the observed state of Ingestor + properties: + conditions: + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/charts/adx-mon/templates/crds/managementcommands_crd.yaml b/charts/adx-mon/templates/crds/managementcommands_crd.yaml new file mode 100644 index 000000000..c4be27a46 --- /dev/null +++ b/charts/adx-mon/templates/crds/managementcommands_crd.yaml @@ -0,0 +1,133 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.19.0 + name: managementcommands.adx-mon.azure.com +spec: + group: adx-mon.azure.com + names: + kind: ManagementCommand + listKind: ManagementCommandList + plural: managementcommands + singular: managementcommand + scope: Namespaced + versions: + - name: v1 + schema: + openAPIV3Schema: + description: ManagementCommand is the Schema for the managementcommands API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ManagementCommandSpec defines the desired state of ManagementCommand + properties: + body: + description: Body is the management command to execute + type: string + criteriaExpression: + description: |- + CriteriaExpression is an optional CEL (Common Expression Language) expression evaluated against + operator / ingestor cluster labels (region, environment, cloud, and any --cluster-labels key/value + pairs). All labels are exposed as string variables. Example: + + criteriaExpression: "environment == 'prod' && region == 'eastus'" + + Semantics: + * Empty / omitted expression => the ManagementCommand always executes when selected. + * When specified, the expression must evaluate to true; false skips execution. + * CEL parse, type-check, or evaluation errors surface via status and block execution until + corrected. + type: string + database: + description: |- + Database is the target database for a management command. Not all management commands + are database specific. + type: string + required: + - body + type: object + status: + description: ManagementCommandStatus defines the observed state of ManagementCommand + properties: + conditions: + description: Conditions is a list of conditions that apply to the + Function + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/charts/adx-mon/templates/crds/metricsexporters_crd.yaml b/charts/adx-mon/templates/crds/metricsexporters_crd.yaml new file mode 100644 index 000000000..e9ca02914 --- /dev/null +++ b/charts/adx-mon/templates/crds/metricsexporters_crd.yaml @@ -0,0 +1,188 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.19.0 + name: metricsexporters.adx-mon.azure.com +spec: + group: adx-mon.azure.com + names: + kind: MetricsExporter + listKind: MetricsExporterList + plural: metricsexporters + singular: metricsexporter + scope: Namespaced + versions: + - name: v1 + schema: + openAPIV3Schema: + description: MetricsExporter is the Schema for the metricsexporters API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: MetricsExporterSpec defines the desired state of MetricsExporter + properties: + body: + description: Body is the KQL query to execute + type: string + criteria: + additionalProperties: + items: + type: string + type: array + description: |- + Criteria for cluster-based execution selection (same pattern as SummaryRule) + Key/Value pairs used to determine when a metrics exporter can execute. If empty, always execute. Keys and values + are deployment specific and configured on adxexporter instances. For example, an adxexporter instance may be + started with `--cluster-labels=region=eastus,environment=production`. If a MetricsExporter has `criteria: {region: [eastus], environment: [production]}`, then the rule will only + execute on that adxexporter. Any key/values pairs must match (case-insensitive) for the rule to execute. + type: object + criteriaExpression: + description: |- + CriteriaExpression is an optional CEL (Common Expression Language) expression that provides a richer way + to determine if the metrics exporter should execute. The CEL environment is dynamically constructed from the + exporter's cluster labels (e.g. region, cloud, environment, and any --cluster-labels key/value pairs). All variables + are exposed as strings and can be referenced directly by their label name. For example: + + criteriaExpression: "region in ['eastus','westus'] && environment == 'prod'" + + Execution semantics mirror AlertRule: + * If neither criteria nor criteriaExpression are specified, the exporter always executes. + * If only criteriaExpression is specified, it must evaluate to true for execution. + * If only criteria is specified, existing matching behavior (any key/value match) applies. + * If both are specified, BOTH must match (criteria map match AND expression true). + + An invalid expression (parse or type check error) will be treated as an error and prevent execution. + type: string + database: + description: Database is the name of the database to query + type: string + interval: + description: Interval defines how often to execute the query and refresh + metrics + type: string + transform: + description: Transform defines how to convert query results to metrics + properties: + defaultMetricName: + description: DefaultMetricName provides a fallback if MetricNameColumn + is not specified + type: string + labelColumns: + description: LabelColumns specifies columns to use as metric labels + items: + type: string + type: array + metricNameColumn: + description: MetricNameColumn specifies which column contains + the metric name + type: string + metricNamePrefix: + description: MetricNamePrefix provides optional team/project namespacing + for all metrics + type: string + timestampColumn: + description: TimestampColumn specifies which column contains the + timestamp + type: string + valueColumns: + description: ValueColumns specifies columns to use as metric values + items: + type: string + type: array + required: + - timestampColumn + - valueColumns + type: object + required: + - body + - database + - interval + - transform + type: object + status: + description: MetricsExporterStatus defines the observed state of MetricsExporter + properties: + conditions: + description: |- + Conditions is an array of current observed MetricsExporter conditions. + Controllers set at minimum the owner condition (MetricsExporterOwner) and may also + use shared conditions defined in conditions.go: ConditionCriteria, ConditionCompleted, ConditionFailed. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/charts/adx-mon/templates/ingestor.yaml b/charts/adx-mon/templates/ingestor.yaml new file mode 100644 index 000000000..a49323830 --- /dev/null +++ b/charts/adx-mon/templates/ingestor.yaml @@ -0,0 +1,227 @@ +{{- if .Values.ingestor.enabled }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: ingestor + namespace: {{ .Release.Namespace }} + {{- if .Values.ingestor.client_id }} + annotations: + azure.workload.identity/client-id: {{ .Values.ingestor.client_id | quote }} + {{- end }} +automountServiceAccountToken: false +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: adx-mon:ingestor +rules: + - apiGroups: + - "" + resources: + - namespaces + - pods + verbs: + - get + - list + - watch + - update + - apiGroups: + - adx-mon.azure.com + resources: + - functions + - managementcommands + - summaryrules + verbs: + - get + - list + - update + - patch + - apiGroups: + - adx-mon.azure.com + resources: + - functions/status + - managementcommands/status + - summaryrules/status + verbs: + - update + - patch + - apiGroups: + - adx-mon.azure.com + resources: + - functions/finalizers + - managementcommands/finalizers + - summaryrules/finalizers + verbs: + - get + - update + - delete +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: adx-mon:ingestor +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: adx-mon:ingestor +subjects: + - kind: ServiceAccount + name: ingestor + namespace: {{ .Release.Namespace }} +--- +apiVersion: v1 +kind: Service +metadata: + name: ingestor + namespace: {{ .Release.Namespace }} +spec: + type: ClusterIP + selector: + app: ingestor + ports: + # By default and for convenience, the `targetPort` is set to the same value as the `port` field. + - port: 443 + targetPort: 9090 + # Optional field + # By default and for convenience, the Kubernetes control plane will allocate a port from a range (default: 30000-32767) + #nodePort: 30007 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: ingestor + namespace: {{ .Release.Namespace }} +spec: + serviceName: "adx-mon" + replicas: {{ .Values.ingestor.replicas | default 1 }} + updateStrategy: + type: RollingUpdate + selector: + matchLabels: + app: ingestor + template: + metadata: + labels: + app: ingestor + {{- if .Values.ingestor.client_id }} + azure.workload.identity/use: "true" + {{- end }} + annotations: + adx-mon/scrape: "true" + adx-mon/port: "9091" + adx-mon/path: "/metrics" + adx-mon/log-destination: "Logs:Ingestor" + adx-mon/log-parsers: json + spec: + automountServiceAccountToken: true + securityContext: {} + serviceAccountName: ingestor + priorityClassName: system-node-critical + containers: + - name: ingestor + image: ghcr.io/azure/adx-mon/ingestor:latest + securityContext: + allowPrivilegeEscalation: false + privileged: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + ports: + - containerPort: 9090 + name: ingestor + - containerPort: 9091 + name: metrics + env: + - name: LOG_LEVEL + value: INFO + - name: "GODEBUG" + value: "http2client=0" + - name: "AZURE_RESOURCE" + value: {{ .Values.adx.url | quote }} + - name: "AZURE_CLIENT_ID" + value: {{ .Values.ingestor.client_id | quote }} + command: + - /ingestor + args: + - "--storage-dir=/mnt/data" + - "--max-segment-age=5s" + - "--max-disk-usage=21474836480" + - "--max-transfer-size=10485760" + - "--max-connections=8000" + - "--insecure-skip-verify" + - "--metrics-kusto-endpoints=Metrics={{ .Values.adx.url }}" + - "--logs-kusto-endpoints=Logs={{ .Values.adx.url }}" + volumeMounts: + - name: metrics + mountPath: /mnt/data + - mountPath: /etc/pki/ca-trust/extracted + name: etc-pki-ca-certs + readOnly: true + - mountPath: /etc/ssl/certs + name: ca-certs + readOnly: true + readinessProbe: + httpGet: + path: /readyz + port: 9090 + scheme: HTTPS + initialDelaySeconds: 10 + periodSeconds: 5 + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - ingestor + topologyKey: kubernetes.io/hostname + {{- if .Values.ingestor.affinity }} + {{- if .Values.ingestor.affinity.required }} + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: {{ .Values.ingestor.affinity.key | default "agentpool"}} + operator: In + values: + - {{ .Values.ingestor.affinity.value | default "system" }} + {{- else }} + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: {{ .Values.ingestor.affinity.key | default "agentpool"}} + operator: In + values: + - {{ .Values.ingestor.affinity.value | default "system" }} + {{- end }} + {{- end }} + volumes: + - name: ca-certs + hostPath: + path: /etc/ssl/certs + type: Directory + - name: etc-pki-ca-certs + hostPath: + path: /etc/pki/ca-trust/extracted + type: DirectoryOrCreate + - name: metrics + hostPath: + path: /mnt/ingestor + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - effect: NoExecute + key: node.kubernetes.io/not-ready + operator: Exists + tolerationSeconds: 300 + - effect: NoExecute + key: node.kubernetes.io/unreachable + operator: Exists + tolerationSeconds: 300 +{{- end }} diff --git a/charts/adx-mon/templates/namespace.yaml b/charts/adx-mon/templates/namespace.yaml new file mode 100644 index 000000000..9b2965110 --- /dev/null +++ b/charts/adx-mon/templates/namespace.yaml @@ -0,0 +1,7 @@ +{{- if .Values.create_namespace }} +--- +apiVersion: v1 +kind: Namespace +metadata: + name: {{ .Release.Namespace }} +{{- end }} diff --git a/charts/adx-mon/templates/operator.yaml b/charts/adx-mon/templates/operator.yaml new file mode 100644 index 000000000..915a049d6 --- /dev/null +++ b/charts/adx-mon/templates/operator.yaml @@ -0,0 +1,230 @@ +{{- if .Values.operator.enabled }} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: + azure.workload.identity/client-id: {{ .Values.operator.client_id }} + name: operator + namespace: {{ .Release.Namespace }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: adx-mon:operator +rules: + - apiGroups: + - "" + resources: + - configmaps + - endpoints + - events + - namespaces + - pods + - secrets + - serviceaccounts + - services + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + - apiGroups: + - apps + resources: + - deployments + - daemonsets + - replicasets + - statefulsets + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + - apiGroups: + - rbac.authorization.k8s.io + resources: + - roles + - rolebindings + - clusterroles + - clusterrolebindings + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + - apiGroups: + - adx-mon.azure.com + resources: + - adxclusters + - alerters + - alertrules + - collectors + - functions + - ingestors + - managementcommands + - metricsexporters + - summaryrules + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + - apiGroups: + - adx-mon.azure.com + resources: + - adxclusters/status + - alerters/status + - alertrules/status + - collectors/status + - functions/status + - ingestors/status + - managementcommands/status + - metricsexporters/status + - summaryrules/status + verbs: + - get + - update + - patch + - apiGroups: + - adx-mon.azure.com + resources: + - adxclusters/finalizers + - alerters/finalizers + - alertrules/finalizers + - collectors/finalizers + - functions/finalizers + - ingestors/finalizers + - managementcommands/finalizers + - metricsexporters/finalizers + - summaryrules/finalizers + verbs: + - get + - update + - delete +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: adx-mon:operator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: adx-mon:operator +subjects: + - kind: ServiceAccount + name: operator + namespace: {{ .Release.Namespace }} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: operator + namespace: {{ .Release.Namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: operator + template: + metadata: + annotations: + adx-mon/scrape: "true" + adx-mon/log-destination: "Logs:Operator" + adx-mon/log-parsers: json + labels: + app: operator + azure.workload.identity/use: "true" + spec: + automountServiceAccountToken: true + serviceAccountName: operator + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 3000 + fsGroup: 65534 + containers: + - name: operator + image: ghcr.io/azure/adx-mon/operator:latest + imagePullPolicy: Always + args: + - --pprof-bind-address=:6060 + ports: + - containerPort: 6060 + protocol: TCP + securityContext: + allowPrivilegeEscalation: false + privileged: false + readOnlyRootFilesystem: true + {{- if .Values.operator.affinity }} + {{- if .Values.operator.affinity.required }} + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: {{ .Values.operator.affinity.key | default "agentpool"}} + operator: In + values: + - {{ .Values.operator.affinity.value | default "system" }} + {{- else }} + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: {{ .Values.operator.affinity.key | default "agentpool"}} + operator: In + values: + - {{ .Values.operator.affinity.value | default "system" }} + {{- end }} + {{- end }} + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - effect: NoExecute + key: node.kubernetes.io/not-ready + operator: Exists + tolerationSeconds: 300 + - effect: NoExecute + key: node.kubernetes.io/unreachable + operator: Exists + tolerationSeconds: 300 +--- +apiVersion: adx-mon.azure.com/v1 +kind: ADXCluster +metadata: + name: {{ .Values.adx.name }} +spec: + clusterName: {{ .Values.adx.name }} + databases: + - databaseName: Logs + telemetryType: Logs + - databaseName: Metrics + telemetryType: Metrics + endpoint: {{ .Values.adx.url }} + federation: + # Hub clusters + federatedTargets: + - endpoint: {{ .Values.hub_url }} + heartbeatDatabase: Federation + heartbeatTable: Heartbeats + managedIdentityClientId: "" + heartbeatTTL: 1h + partitioning: + region: {{ .Values.region }} + role: Partition +{{- end }} diff --git a/charts/adx-mon/values.yaml b/charts/adx-mon/values.yaml new file mode 100644 index 000000000..207cb261b --- /dev/null +++ b/charts/adx-mon/values.yaml @@ -0,0 +1,47 @@ +--- +create_namespace: true +cluster_name: aks +region: region +cloud: azure +environment: environment +global_drop_metrics: [] +global_keep_metrics: [] +singleton_drop_metrics: [] +singleton_keep_metrics: [] +adx: + name: adx-cluster + url: https://adx.region.kusto.windows.net +hub_url: https://adxhub.region.kusto.windows.net +collector: + enabled: true + ingestor_endpoint: https://ingestor.adx-mon.svc.cluster.local +ingestor: + enabled: true + replicas: 1 + affinity: + required: false + key: agentpool + value: system + client_id: client_id +alerter: + enabled: false + replicas: 1 + affinity: + required: false + key: agentpool + value: system + auth_msi_id: "" # to be filled with agent pool managed identity + kusto_endpoints: {} # key: database name, value: adx url (can be same for multiple dbs) + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 1Gi +operator: + enabled: false + affinity: + required: false + key: agentpool + value: system