diff --git a/docker-compose.yml b/docker-compose.yml index 7d107503..4227a014 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -25,7 +25,7 @@ # cerberus via its own Prom/Loki/Tempo heads, populating # the provisioned dashboard within a minute of stack start. # grafana pre-provisioned with cerberus as three datasources -# (Prometheus / Loki / Tempo) and the `cerberus-self` +# (Prometheus / Loki / Tempo) and the `cerberus` # dashboard (self-obs; set as home dashboard) under the # "Cerberus" folder. Provisioning files live under # `test/e2e/grafana/compose/`. @@ -180,10 +180,10 @@ services: GF_AUTH_DISABLE_SIGNOUT_MENU: "true" GF_USERS_DEFAULT_THEME: dark GF_FEATURE_TOGGLES_ENABLE: traceqlSearch - # Land directly on the cerberus-self dashboard instead of an empty + # Land directly on the cerberus dashboard instead of an empty # home page. Path is the in-container mount location of the JSON, # not a Grafana URL or UID. - GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH: /etc/grafana/provisioning/dashboards/cerberus/cerberus-self.json + GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH: /etc/grafana/provisioning/dashboards/cerberus/cerberus.json volumes: - ./test/e2e/grafana/compose/datasources:/etc/grafana/provisioning/datasources:ro # Dashboard provider config tells Grafana to scan the `cerberus` diff --git a/docs/observability.md b/docs/observability.md index 881f1783..f246e1a4 100644 --- a/docs/observability.md +++ b/docs/observability.md @@ -21,7 +21,7 @@ data point, AND log record so a Grafana dashboard can pivot on them across all three signal types. The k3s manifest at `test/e2e/k3s/otel-collector.yaml` and the provisioned -`test/e2e/grafana/dashboards/cerberus-self.json` wire the full export path +`test/e2e/grafana/dashboards/cerberus.json` wire the full export path end-to-end against a running cluster. ## Logging diff --git a/internal/api/admit/admit.go b/internal/api/admit/admit.go index b798146d..0d1c4bb6 100644 --- a/internal/api/admit/admit.go +++ b/internal/api/admit/admit.go @@ -43,7 +43,7 @@ const meterName = "github.com/tsouza/cerberus/internal/api/admit" // attrQL labels the rejection counter with the query language the // limiter is fronting — "promql" / "logql" / "traceql". Mirrors the // cerberus.ql attribute set by internal/telemetry on the -// per-query counters, so the cerberus-self dashboard's +// per-query counters, so the cerberus dashboard's // `sum by (cerberus_ql, reason) (rate(cerberus_admit_rejected_total[5m]))` // panel resolves consistently across both metric sources. const attrQL = attribute.Key("cerberus.ql") diff --git a/internal/telemetry/metrics.go b/internal/telemetry/metrics.go index cbd639e1..0e0a8d5b 100644 --- a/internal/telemetry/metrics.go +++ b/internal/telemetry/metrics.go @@ -102,7 +102,7 @@ type Instruments struct { // QueryInflight is the count of currently-executing engine // queries — incremented at engine entry, decremented (via defer) // at engine return so panics + early-returns + cancellations - // still balance. Attribute: cerberus.ql. The cerberus-self + // still balance. Attribute: cerberus.ql. The cerberus // dashboard panel queries `sum by (cerberus_ql) // (cerberus_query_inflight)`. QueryInflight metric.Int64UpDownCounter diff --git a/test/e2e/grafana/compose/dashboards-provider.yaml b/test/e2e/grafana/compose/dashboards-provider.yaml index ae21d20b..911b165f 100644 --- a/test/e2e/grafana/compose/dashboards-provider.yaml +++ b/test/e2e/grafana/compose/dashboards-provider.yaml @@ -3,7 +3,7 @@ # for JSON dashboards and load them under the "Cerberus" folder. # # One starter dashboard ships with the compose quickstart: -# - cerberus-self.json: cerberus's own observability surface (per-head +# - cerberus.json: cerberus's own observability surface (per-head # request rate, latency quantiles, error rate, in-flight, admission # rejections, recent ERROR logs, slow-query traces). Wired as the # home dashboard via GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH so diff --git a/test/e2e/grafana/compose/dashboards/cerberus-self.json b/test/e2e/grafana/compose/dashboards/cerberus.json similarity index 99% rename from test/e2e/grafana/compose/dashboards/cerberus-self.json rename to test/e2e/grafana/compose/dashboards/cerberus.json index 1ed80c5c..2b37d155 100644 --- a/test/e2e/grafana/compose/dashboards/cerberus-self.json +++ b/test/e2e/grafana/compose/dashboards/cerberus.json @@ -583,7 +583,7 @@ }, "timepicker": {}, "timezone": "", - "title": "Cerberus - self-observability", + "title": "Cerberus", "uid": "cerberus-self", "version": 1, "weekStart": "" diff --git a/test/e2e/grafana/dashboards/cerberus-self.json b/test/e2e/grafana/dashboards/cerberus.json similarity index 99% rename from test/e2e/grafana/dashboards/cerberus-self.json rename to test/e2e/grafana/dashboards/cerberus.json index ce20b4db..aaa75654 100644 --- a/test/e2e/grafana/dashboards/cerberus-self.json +++ b/test/e2e/grafana/dashboards/cerberus.json @@ -583,7 +583,7 @@ }, "timepicker": {}, "timezone": "", - "title": "Cerberus - self-observability", + "title": "Cerberus", "uid": "cerberus-self", "version": 1, "weekStart": "" diff --git a/test/e2e/k3s/cerberus.yaml b/test/e2e/k3s/cerberus.yaml index 63bd2c11..ce9f69a9 100644 --- a/test/e2e/k3s/cerberus.yaml +++ b/test/e2e/k3s/cerberus.yaml @@ -21,7 +21,7 @@ data: # Earlier `CERBERUS_OTEL_*` spellings here were dead config — the binary # read empty values and silently fell back to the noop providers, so # `cerberus_queries_total` / `cerberus_queries_duration_seconds_*` never - # reached CH and the cerberus-self dashboard panels collapsed to either + # reached CH and the cerberus dashboard panels collapsed to either # a single anonymous bucket or empty series (#214 / #215 N2 / N5 e2e # partition + histogram-completeness regression class). # diff --git a/test/e2e/k3s/grafana-dashboards.yaml b/test/e2e/k3s/grafana-dashboards.yaml index 7b143e63..8edbffc4 100644 --- a/test/e2e/k3s/grafana-dashboards.yaml +++ b/test/e2e/k3s/grafana-dashboards.yaml @@ -25,15 +25,15 @@ data: foldersFromFilesStructure: false --- # The dashboard JSON itself — sourced from -# test/e2e/grafana/dashboards/cerberus-self.json. Keep the two in sync; the +# test/e2e/grafana/dashboards/cerberus.json. Keep the two in sync; the # repo file is the editable copy, the ConfigMap is the deployed copy. apiVersion: v1 kind: ConfigMap metadata: - name: grafana-dashboard-cerberus-self + name: grafana-dashboard-cerberus namespace: cerberus data: - cerberus-self.json: | + cerberus.json: | { "annotations": { "list": [ @@ -619,7 +619,7 @@ data: }, "timepicker": {}, "timezone": "", - "title": "Cerberus - self-observability", + "title": "Cerberus", "uid": "cerberus-self", "version": 1, "weekStart": "" diff --git a/test/e2e/k3s/grafana.yaml b/test/e2e/k3s/grafana.yaml index 0c1e129c..7c94a6a3 100644 --- a/test/e2e/k3s/grafana.yaml +++ b/test/e2e/k3s/grafana.yaml @@ -156,4 +156,4 @@ spec: name: grafana-dashboard-provider - name: dashboards configMap: - name: grafana-dashboard-cerberus-self + name: grafana-dashboard-cerberus diff --git a/test/e2e/playwright/compose_grafana_smoke.spec.ts b/test/e2e/playwright/compose_grafana_smoke.spec.ts index 91889c67..67dd6079 100644 --- a/test/e2e/playwright/compose_grafana_smoke.spec.ts +++ b/test/e2e/playwright/compose_grafana_smoke.spec.ts @@ -98,7 +98,7 @@ test('compose: home, drilldown app, and every provisioned dashboard load without // — overflow before #630 made every // Prom Explore page 502. // - prom-cerberus-metric: the self-telemetry stream the - // cerberus-self dashboard targets; + // cerberus dashboard targets; // catches dotted-vs-underscored regressions. // - loki-allstreams: exercises `/loki/api/v1/query_range` // with a permissive selector; also @@ -335,7 +335,7 @@ test('compose: home, drilldown app, and every provisioned dashboard load without // Accept branch on the cerberus handler; this drill-through // asserts the round trip is clean. // - // We locate the "Slow cerberus traces" panel on cerberus-self, + // We locate the "Slow cerberus traces" panel on the cerberus dashboard, // click the first row's trace-ID link, wait for Grafana's // `/explore` navigation, and re-run the same `/api/ds/query` + // DOM error sweeps over the new view. If no traces exist on the @@ -347,7 +347,7 @@ test('compose: home, drilldown app, and every provisioned dashboard load without // 6. Underscored-OTel-label partition sweep. // - // The cerberus-self dashboard's "Query rate by language" panel + // The cerberus dashboard's "Query rate by language" panel // fires `sum by (cerberus_ql) (rate(cerberus_queries_total[5m]))`. // OTel writes the `cerberus.ql` attribute under the dotted form // in storage; the matcher-side lookup must cross the @@ -380,7 +380,7 @@ test('compose: home, drilldown app, and every provisioned dashboard load without }); /** - * Drive the cerberus-self → "Slow cerberus traces" panel → click a + * Drive the cerberus dashboard → "Slow cerberus traces" panel → click a * trace row → land on /explore drill-through and assert no * ds/query 500, no DOM error banner, no "illegal wireType" text. * @@ -392,7 +392,7 @@ test('compose: home, drilldown app, and every provisioned dashboard load without async function driveTraceClick(page: Page, baseURL: string): Promise { const failures: string[] = []; - // 1. Navigate to the cerberus-self dashboard and wait for panels to settle. + // 1. Navigate to the cerberus dashboard and wait for panels to settle. await page.goto(`${baseURL}/d/cerberus-self`, { waitUntil: 'domcontentloaded', timeout: 90_000, @@ -571,7 +571,7 @@ async function driveTraceClick(page: Page, baseURL: string): Promise { } /** - * Drive the cerberus-self → "Query rate by language" panel and assert + * Drive the cerberus dashboard → "Query rate by language" panel and assert * the underscored-matcher → dotted-OTel-attribute fallback emits at * least 2 distinct grouped series. * @@ -590,7 +590,7 @@ async function driveTraceClick(page: Page, baseURL: string): Promise { * to tolerate a stack where a single head momentarily has no traffic. * * If the panel isn't provisioned in the current stack (compose - * variant without the cerberus-self dashboard) the function returns + * variant without the cerberus dashboard) the function returns * cleanly — the dashboard sweep above already covers the "panel * exists" case. */ diff --git a/test/e2e/playwright/helpers/README.md b/test/e2e/playwright/helpers/README.md index 4ece8684..449690f6 100644 --- a/test/e2e/playwright/helpers/README.md +++ b/test/e2e/playwright/helpers/README.md @@ -12,7 +12,7 @@ land in subsequent PRs. | `dashboard.ts` | Enumerate provisioned dashboards via `/api/search` + `/api/dashboards/uid/`, flatten rows, expose `Dashboard` + `Panel` types. | | `query-shape.ts` | Regex-based target classification + rewriting: `extractByKeys`, `extractWithoutKeys`, `expectedByKeys`, `isHistogramQuantile`, `extractHistogramName`, `addLabelFilter`, `expressionHasMatcherFor`. | | `assertions.ts` | Per-shape assertions over the Grafana `/api/ds/query` envelope (`assertLabelShape` / `assertLabelAbsent` / histogram pair / `assertSubsetByCount`) + the zero-404 gate (`assertNon200ResponseClass`). | -| `sweep.ts` | `generateSelfTraffic` — pre-step that fires self-traffic against cerberus so the cerberus-self dashboards have data to render. | +| `sweep.ts` | `generateSelfTraffic` — pre-step that fires self-traffic against cerberus so the cerberus dashboards have data to render. | | `drilldown.ts` | Drilldown-app catalogue (4 built-in apps) + `drillTwoLevels` gesture driver + `isAppInstalled` (`/api/plugins//settings` probe so the iteration handles apps that aren't provisioned). | | `dom.ts` | Browser-side helpers: console-error capture, `role="alert"` banner read, kiosk repaint-flicker tolerance. | | `probes.ts` | `fetchAndAssert200` (the zero-404 gate on direct HTTP probes) + `extractDataSourceProxyURL` (panel → datasource proxy path). | diff --git a/test/e2e/playwright/helpers/sweep.ts b/test/e2e/playwright/helpers/sweep.ts index ecf98364..212ae0ae 100644 --- a/test/e2e/playwright/helpers/sweep.ts +++ b/test/e2e/playwright/helpers/sweep.ts @@ -1,7 +1,7 @@ /** * Self-traffic generator. * - * Several Grafana dashboards (notably cerberus-self) only render + * Several Grafana dashboards (notably cerberus) only render * meaningfully when cerberus has just served real queries — the * `cerberus_queries_total` counter, the `cerberus_query_duration_*` * histogram, and the by-language partition all stay flat at 0 on a diff --git a/test/e2e/playwright/iterate-all-dashboards.spec.ts b/test/e2e/playwright/iterate-all-dashboards.spec.ts index ec6d066a..f38e6886 100644 --- a/test/e2e/playwright/iterate-all-dashboards.spec.ts +++ b/test/e2e/playwright/iterate-all-dashboards.spec.ts @@ -68,7 +68,7 @@ import { import { generateSelfTraffic } from './helpers/index.js'; -// Self-traffic warmup so cerberus-self panels have populated counters +// Self-traffic warmup so cerberus panels have populated counters // before we sweep them. Same value the other phase specs use. const SEED_TRAFFIC_SECONDS = 30; diff --git a/test/e2e/playwright/iterate-histogram-completeness.spec.ts b/test/e2e/playwright/iterate-histogram-completeness.spec.ts index cfe9025e..f8c1eb85 100644 --- a/test/e2e/playwright/iterate-histogram-completeness.spec.ts +++ b/test/e2e/playwright/iterate-histogram-completeness.spec.ts @@ -13,7 +13,7 @@ * response": * * - N5 (`_bucket` series MUST exist when the panel is meant - * to render). The cerberus-self "P95 latency by language" panel + * to render). The cerberus dashboard's "P95 latency by language" panel * went flat at 0 because the underlying bucket series were * emitted under a sibling metric root (cerberus_pipeline vs * cerberus_queries_duration_seconds_bucket), and @@ -189,7 +189,7 @@ test('histogram-completeness: every histogram_quantile panel has its _bucket / _ process.env.GRAFANA_BASE_URL ?? 'http://localhost:3000'; - // Seed traffic so cerberus-self's histogram panels have something + // Seed traffic so the cerberus dashboard's histogram panels have something // to render. generateSelfTraffic swallows individual request errors // — this is a nudge, not an assertion. await generateSelfTraffic(request, SEED_TRAFFIC_SECONDS); diff --git a/test/e2e/playwright/iterate-metrics-explorer.spec.ts b/test/e2e/playwright/iterate-metrics-explorer.spec.ts index 3a9a044f..e28de3b5 100644 --- a/test/e2e/playwright/iterate-metrics-explorer.spec.ts +++ b/test/e2e/playwright/iterate-metrics-explorer.spec.ts @@ -320,7 +320,7 @@ test.describe('iterate-metrics-explorer: Drilldown-Metrics + label chips', () => test.describe.configure({ mode: 'serial' }); test.beforeAll(async ({ request }) => { - // Warmup so the cerberus-self metrics show populated values. + // Warmup so the cerberus self metrics show populated values. await generateSelfTraffic(request, SEED_TRAFFIC_SECONDS); // Allow OTLP push + CH insert flush to settle. See the comment on // POST_WARMUP_FLUSH_SECONDS above — without this, /api/v1/series diff --git a/test/e2e/playwright/iterate-panel-kiosk.spec.ts b/test/e2e/playwright/iterate-panel-kiosk.spec.ts index 1ef81770..4eef7770 100644 --- a/test/e2e/playwright/iterate-panel-kiosk.spec.ts +++ b/test/e2e/playwright/iterate-panel-kiosk.spec.ts @@ -65,11 +65,11 @@ import { tolerateRepaintFlicker, } from './helpers/index.js'; -// Self-traffic warmup. Mirrors the phase-1/2 specs so cerberus-self +// Self-traffic warmup. Mirrors the phase-1/2 specs so cerberus // dashboards have populated panels by the time kiosk view opens — // otherwise a panel legitimately empty due to "no traffic yet" would // false-positive the visible-body assertion. 30s is the low end of -// "long enough to populate the cerberus_self panels". +// "long enough to populate the cerberus panels". const SEED_TRAFFIC_SECONDS = 30; // Substrings on a `role="alert"` banner that count as an error-state @@ -150,7 +150,7 @@ test('panel-kiosk: every panel renders cleanly in single-panel kiosk view + back process.env.GRAFANA_BASE_URL ?? 'http://localhost:3000'; - // Seed traffic so cerberus-self panels have something to render + // Seed traffic so cerberus panels have something to render // when kiosk mode re-mounts them. Without this, a panel that's // legitimately empty (no traffic yet) would trip the visible-body // assertion below. diff --git a/test/e2e/playwright/iterate-panel-shape.spec.ts b/test/e2e/playwright/iterate-panel-shape.spec.ts index 5ee9827c..2aacbdab 100644 --- a/test/e2e/playwright/iterate-panel-shape.spec.ts +++ b/test/e2e/playwright/iterate-panel-shape.spec.ts @@ -127,7 +127,7 @@ test('panel-shape: every aggregating panel surfaces its by(...) and respects its process.env.GRAFANA_BASE_URL ?? 'http://localhost:3000'; - // Seed traffic so cerberus-self panels have something to render. + // Seed traffic so cerberus dashboard panels have something to render. // generateSelfTraffic swallows individual request errors — this is // a nudge, not an assertion. await generateSelfTraffic(request, SEED_TRAFFIC_SECONDS); @@ -243,7 +243,7 @@ test('panel-shape: every aggregating panel surfaces its by(...) and respects its // "Value" frame). A truly empty response — zero frames — // is the N5-class shape and is out of scope here: the // compose stack ships placeholder panels backed by - // metrics that don't exist yet (the cerberus-self + // metrics that don't exist yet (the cerberus // dashboard's In-flight / Admission-rejections panels are // labelled "declarative until the admission middleware // exports it" in their description text) and has a diff --git a/test/e2e/playwright/iterate-time-ranges.spec.ts b/test/e2e/playwright/iterate-time-ranges.spec.ts index 415105b1..a82515b9 100644 --- a/test/e2e/playwright/iterate-time-ranges.spec.ts +++ b/test/e2e/playwright/iterate-time-ranges.spec.ts @@ -40,7 +40,7 @@ * until the flake rate is observed < 1% over a fix cycle. * * Flake handling: empty frames are an ANNOTATION, not a failure (the - * same gating pattern phase-1 uses for cerberus-self placeholder + * same gating pattern phase-1 uses for cerberus placeholder * panels). Errors — non-2xx, malformed body, label-shape regression on * a populated frame, histogram fabricated-value on a populated frame — * are still hard failures. @@ -173,7 +173,7 @@ test('time-ranges: every aggregating / histogram panel re-asserts under (range, request, }, testInfo) => { // Matrix is up to 4 ranges × 3 steps × N panels per dashboard. On - // the compose stack with the cerberus-self dashboard (~15 panels), + // the compose stack with the cerberus dashboard (~15 panels), // expect ~50-100 query_range fires. 15 minutes covers the seed + // the full matrix even on a slow CI runner. testInfo.setTimeout(15 * 60_000); diff --git a/test/regression/k3s_env_test.go b/test/regression/k3s_env_test.go index 99c8d360..1c49610c 100644 --- a/test/regression/k3s_env_test.go +++ b/test/regression/k3s_env_test.go @@ -19,7 +19,7 @@ import ( // CERBERUS_OTLP_ENDPOINT="", telemetry.New short-circuited to the noop // MeterProvider, and `cerberus_queries_total` / `cerberus_queries_ // duration_seconds_*` never reached the otel-collector → ClickHouse -// pipeline. The cerberus-self dashboard's "Query rate by language" +// pipeline. The cerberus dashboard's "Query rate by language" // panel then collapsed to a single anonymous bucket because no rows // existed (the lower correctly emitted `sum by (cerberus_ql) // (rate(cerberus_queries_total[5m]))` but the query matched zero CH @@ -76,7 +76,7 @@ func TestK3sCerberusManifestUsesOTLPEnvNames(t *testing.T) { // renaming it) sees a loud failure too. for _, want := range []string{"CERBERUS_OTLP_ENDPOINT:", "CERBERUS_OTLP_INSECURE:"} { if !strings.Contains(string(buf), want) { - t.Errorf("test/e2e/k3s/cerberus.yaml missing required ConfigMap key %q — the k3d cerberus deployment needs the OTLP exporter wired or the cerberus-self dashboard panels return empty matrices", want) + t.Errorf("test/e2e/k3s/cerberus.yaml missing required ConfigMap key %q — the k3d cerberus deployment needs the OTLP exporter wired or the cerberus dashboard panels return empty matrices", want) } } } diff --git a/test/spec/logql/matcher_self_service.txtar b/test/spec/logql/matcher_self_service.txtar index 45340ffa..8f5701ae 100644 --- a/test/spec/logql/matcher_self_service.txtar +++ b/test/spec/logql/matcher_self_service.txtar @@ -2,7 +2,7 @@ {service_name="cerberus"} -- seed -- -- ResourceAttributes + ServiceName are MATERIALIZED on Body so the --- four storage shapes (cerberus-self, seed-underscored, seed-dotted, +-- four storage shapes (self-row, seed-underscored, seed-dotted, -- negative) are encoded by one Body value each. MATERIALIZED columns -- are excluded from `SELECT *` but remain queryable in WHERE, so the -- runner's `SELECT * FROM otel_logs WHERE ...` returns just the two diff --git a/test/spec/promql/sum_by_rate_dotted_source.txtar b/test/spec/promql/sum_by_rate_dotted_source.txtar index a56bd395..522266b1 100644 --- a/test/spec/promql/sum_by_rate_dotted_source.txtar +++ b/test/spec/promql/sum_by_rate_dotted_source.txtar @@ -1,4 +1,4 @@ -# Regression pin for the cerberus-self "Query rate by language" panel +# Regression pin for the cerberus dashboard's "Query rate by language" panel # (compose_grafana_smoke.spec.ts ≥ 2 partition assertion). The panel # fires `sum by (cerberus_ql) (rate(cerberus_queries_total[5m]))` and # the underlying OTel-CH rows store the data-point attribute under its diff --git a/test/spec/promql/sum_by_rate_range_step.txtar b/test/spec/promql/sum_by_rate_range_step.txtar index d94dae48..c28122d6 100644 --- a/test/spec/promql/sum_by_rate_range_step.txtar +++ b/test/spec/promql/sum_by_rate_range_step.txtar @@ -5,7 +5,7 @@ # outer SELECT only surfaced `anchor_ts`. The fix projects an additional # `anchor_ts AS TimeUnix` alias so the wrapping Aggregate's per-step # GROUP BY (ColumnRef{TimestampColumn}, see internal/promql/lower.go -# `bucket_ts` branch) resolves. The cerberus-self dashboard's +# `bucket_ts` branch) resolves. The cerberus dashboard's # request-rate panel exercises exactly this shape. -- query.promql --