diff --git a/compatibility/prometheus/cerberus-test-queries.yml b/compatibility/prometheus/cerberus-test-queries.yml index cc3f43cd..8b9b7ff8 100644 --- a/compatibility/prometheus/cerberus-test-queries.yml +++ b/compatibility/prometheus/cerberus-test-queries.yml @@ -19,10 +19,10 @@ # `demo_batch_last_success_timestamp_seconds`, `demo_intermittent_metric`). # When both ref Prom and cerberus return empty results for a missing-metric # query, the tester records that as PASS (zero diff) — acceptable noise -# for the RC1 baseline. +# for the compatibility baseline. # # Pointer-to-upstream: re-sync this file by re-copying the upstream and -# re-applying the edit above. Track in a follow-up M1.x ticket so this +# re-applying the edit above. Track in a follow-up issue so this # divergence stays auditable. test_cases: @@ -38,7 +38,7 @@ test_cases: - query: 'NaN' # Vector selectors. - # TODO: Add tests for staleness support. + # TODO(upstream): Add tests for staleness support. - query: 'demo_memory_usage_bytes' - query: '{__name__="demo_memory_usage_bytes"}' - query: 'demo_memory_usage_bytes{type="free"}' @@ -110,7 +110,7 @@ test_cases: # Check that vector-scalar binops set output timestamps correctly. - query: 'timestamp(demo_memory_usage_bytes * 1)' # Check that unary minus sets timestamps correctly. - # TODO: Check this more systematically for every node type? + # TODO(upstream): Check this more systematically for every node type? - query: 'timestamp(-demo_memory_usage_bytes)' - query: 'demo_memory_usage_bytes {{.binOp}} on(instance, job, type) demo_memory_usage_bytes' variant_args: ['binOp'] @@ -125,8 +125,8 @@ test_cases: - query: 'sum without(job) (demo_memory_usage_bytes) / on(instance, type) group_left(job) demo_memory_usage_bytes' - query: 'demo_memory_usage_bytes / on(instance, job) group_left demo_num_cpus' - query: 'demo_memory_usage_bytes / on(instance, type, job, non_existent) demo_memory_usage_bytes' - # TODO: Add non-explicit many-to-one / one-to-many that errors. - # TODO: Add many-to-many match that errors. + # TODO(upstream): Add non-explicit many-to-one / one-to-many that errors. + # TODO(upstream): Add many-to-many match that errors. # NaN/Inf/-Inf support. - query: 'demo_num_cpus * Inf' diff --git a/docs/test-strategy.md b/docs/test-strategy.md index 9e3cb9f1..f3eda689 100644 --- a/docs/test-strategy.md +++ b/docs/test-strategy.md @@ -198,6 +198,29 @@ needs strengthening, (b) a functionally-equivalent mutation (`<` vs `append` regrows past), or (c) a missing test. The gremlins JSON artifact on each run names the file + line + mutation kind. +### Surviving-mutant policy + +When a mutant survives the phase threshold, pick the remedy in this +order — the goal is to keep production code clear and let the test +suite carry the discipline: + +1. **PREFERRED — prove equivalent.** Add a comment in the source + explaining why the mutated branch is semantically identical to the + original, then drop the phase efficacy threshold in `.gremlins.yaml` + by 1 percentage point to absorb the equivalent mutant. The mutation + count is now defensible and the source stays clear. +2. **ACCEPTABLE — add a distinguishing test.** Write a unit / property + test whose output differs between the original and the mutated + branch. This is the right call when the mutation reveals real + under-tested behaviour. +3. **REJECTED — refactor production code to make the mutant + distinguishable.** This is pattern #11 (DEFEAT-MUTANT) — the + codebase loses clarity to satisfy the mutation tool. Don't do it. + +Prior PRs #504 and #664 carry pattern-#3 refactors. They are not +reverted (their diffs are now load-bearing for the published +thresholds), but new violations should follow remedy #1 or #2. + ## Regression meta-tests `test/regression/` pins past CI failures so they can't silently diff --git a/internal/chsql/builder_test.go b/internal/chsql/builder_test.go index 40b88610..a4f0843b 100644 --- a/internal/chsql/builder_test.go +++ b/internal/chsql/builder_test.go @@ -893,7 +893,7 @@ func TestBuilder_Expr(t *testing.T) { } } -// --- typed operator / punctuation Frag constructors (R6.11a) ----------- +// --- typed operator / punctuation Frag constructors ------------------- // TestOperatorFrags_BinaryOps — each comparison + arithmetic operator // renders " " with single spaces around the op token, and diff --git a/internal/optimizer/property_test.go b/internal/optimizer/property_test.go index 1e1a36d0..e0e8b5f7 100644 --- a/internal/optimizer/property_test.go +++ b/internal/optimizer/property_test.go @@ -190,7 +190,7 @@ func TestPropertyOptimizerSemanticEquivalence(t *testing.T) { // // Depth budget: at depth 0 the generator picks any node type; once // depth ≥ 3 it bottoms out into a Scan to keep trees small. This -// covers the three RC1-relevant shapes: +// covers the three plan shapes the optimizer is expected to handle: // // Scan(table) // Filter(, Scan(table)) diff --git a/lefthook.yml b/lefthook.yml index 2e93e023..5e8b6cd0 100644 --- a/lefthook.yml +++ b/lefthook.yml @@ -104,6 +104,19 @@ pre-push: tags: discipline run: | scripts/test-forbid-skip.sh + # Mirrors the CI `forbid-skip` job's `Guard new should_skip entries` + # step. Rejects net-new `should_skip:` entries in compatibility + # overlays that lack a tracking ref (jira / link / #NNN in reason). + # Background: PRs #429 + #537 added skip rows to silence failing + # cases instead of fixing them, and the skips lingered for weeks. + # Local guard catches the anti-pattern before CI does. + forbid-should-skip-additions: + tags: discipline + env: + BASE_REF: origin/main + run: | + scripts/check-skip-additions.sh --self-test + scripts/check-skip-additions.sh forbid-soft-assert: tags: discipline run: | diff --git a/test/spec/logql/binop_vv_compare_filter.txtar b/test/spec/logql/binop_vv_compare_filter.txtar index 5920a924..335980d3 100644 --- a/test/spec/logql/binop_vv_compare_filter.txtar +++ b/test/spec/logql/binop_vv_compare_filter.txtar @@ -1,3 +1,7 @@ +# empty: rate(...) > rate(...) on the same selector compares each +# series against itself, so the strict greater-than yields zero +# samples — every join pair has lhs == rhs. + -- query.logql -- rate({service_name="api"}[5m]) > rate({service_name="api"}[5m]) -- seed -- diff --git a/test/spec/promql/binop_vv_on_compare_lt.txtar b/test/spec/promql/binop_vv_on_compare_lt.txtar index 5309ccb8..61316742 100644 --- a/test/spec/promql/binop_vv_on_compare_lt.txtar +++ b/test/spec/promql/binop_vv_on_compare_lt.txtar @@ -1,3 +1,7 @@ +# empty: vector-vs-vector `<` joined on identical label sets compares +# each series against itself, so the strict less-than yields no true +# samples — every join pair has lhs == rhs. + -- query.promql -- demo_memory_usage_bytes < on(instance, job, type) demo_memory_usage_bytes -- seed -- diff --git a/test/spec/traceql/count.txtar b/test/spec/traceql/count.txtar index bf1003bb..07a3039d 100644 --- a/test/spec/traceql/count.txtar +++ b/test/spec/traceql/count.txtar @@ -1,3 +1,7 @@ +# empty: seeded spans all carry service.name in {backend, db, cache}; +# the resource.service.name = "frontend" matcher selects zero traces, +# so the count() > 0 trace-level filter drops every candidate. + -- query.traceql -- { resource.service.name = "frontend" } | count() > 0 -- seed -- diff --git a/test/spec/traceql/count_eq_zero.txtar b/test/spec/traceql/count_eq_zero.txtar index d8a88a91..8860e8b5 100644 --- a/test/spec/traceql/count_eq_zero.txtar +++ b/test/spec/traceql/count_eq_zero.txtar @@ -1,3 +1,8 @@ +# empty: traces t1+t2 contain frontend spans (count >= 1) and t3 has +# none, but the inner matcher drops t3 entirely before grouping; so +# every surviving trace has count >= 1 and the = 0 filter excludes +# them all. + -- query.traceql -- { resource.service.name = "frontend" } | count() = 0 -- seed -- diff --git a/test/spec/traceql/structural_descendant_of_intersect.txtar b/test/spec/traceql/structural_descendant_of_intersect.txtar index 0d9e3d45..9e016502 100644 --- a/test/spec/traceql/structural_descendant_of_intersect.txtar +++ b/test/spec/traceql/structural_descendant_of_intersect.txtar @@ -1,3 +1,8 @@ +# empty: the intersect (service.name == a AND service.name == b on the +# same span) selects zero spans because resource.service.name is +# single-valued per span, so no descendant chain can be rooted in the +# intersect set. + -- query.traceql -- ({ resource.service.name = "a" } && { resource.service.name = "b" }) >> { resource.service.name = "c" } -- sql --