Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
491 changes: 491 additions & 0 deletions deploy/otel/grafana/dashboards/helix-internals.json

Large diffs are not rendered by default.

129 changes: 45 additions & 84 deletions deploy/otel/grafana/dashboards/helix-pipeline-observatory.json
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
"mode": "off"
}
},
"unit": "s"
"unit": "bytes"
},
"overrides": []
},
Expand All @@ -93,16 +93,17 @@
"sort": "none"
}
},
"title": "process_memory (seconds) - pid (pid) in my heaps",
"title": "Knowledge-store size (raw vs compressed bytes)",
"type": "timeseries",
"targets": [
{
"datasource": "Prometheus",
"expr": "process_resident_memory_bytes{job=\"helix\"}",
"legendFormat": "pid {{pid}}",
"expr": "helix_genome_size_bytes",
"legendFormat": "{{kind}}",
"refId": "A"
}
]
],
"description": "helix_genome_size_bytes, refreshed on each /stats poll. Legacy bio term: genome size."
},
{
"datasource": "Prometheus",
Expand Down Expand Up @@ -142,7 +143,7 @@
"mode": "off"
}
},
"unit": "percentunit"
"unit": "percent"
},
"overrides": []
},
Expand All @@ -164,46 +165,17 @@
"sort": "none"
}
},
"title": "tier estimations (%) status (stem tree csr ring)",
"title": "Tier activation share (%)",
"type": "timeseries",
"targets": [
{
"datasource": "Prometheus",
"expr": "helix_tier_estimation_percent{job=\"helix\", status=\"bulk\"}",
"legendFormat": "bulk",
"expr": "100 * sum by (tier) (rate(helix_tier_fired_total[5m])) / ignoring(tier) group_left sum(rate(helix_tier_fired_total[5m]))",
"legendFormat": "{{tier}}",
"refId": "A"
},
{
"datasource": "Prometheus",
"expr": "helix_tier_estimation_percent{job=\"helix\", status=\"documents\"}",
"legendFormat": "documents",
"refId": "B"
},
{
"datasource": "Prometheus",
"expr": "helix_tier_estimation_percent{job=\"helix\", status=\"tx_active\"}",
"legendFormat": "tx_active",
"refId": "C"
},
{
"datasource": "Prometheus",
"expr": "helix_tier_estimation_percent{job=\"helix\", status=\"guests\"}",
"legendFormat": "guests",
"refId": "D"
},
{
"datasource": "Prometheus",
"expr": "helix_tier_estimation_percent{job=\"helix\", status=\"log_event\"}",
"legendFormat": "log_event",
"refId": "E"
},
{
"datasource": "Prometheus",
"expr": "helix_tier_estimation_percent{job=\"helix\", status=\"log_write\"}",
"legendFormat": "log_write",
"refId": "F"
}
]
],
"description": "Share of retrieval tier activations per tier \u2014 100 * rate(helix_tier_fired_total) by tier over the all-tier total."
},
{
"datasource": "Prometheus",
Expand Down Expand Up @@ -265,16 +237,17 @@
"sort": "none"
}
},
"title": "Per tier readable time in histogram (cross magnitude x tier)",
"title": "Retrieval signal latency p95 (per signal)",
"type": "timeseries",
"targets": [
{
"datasource": "Prometheus",
"expr": "rate(helix_tier_readable_time_bucket{job=\"helix\"}[5m])",
"legendFormat": "{{le}}",
"expr": "histogram_quantile(0.95, sum by (le, signal) (rate(helix_genome_signal_seconds_bucket[5m])))",
"legendFormat": "{{signal}}",
"refId": "A"
}
]
],
"description": "histogram_quantile over helix_genome_signal_seconds \u2014 per-signal SQLite timing inside query_genes (fts5, splade, dense, tag_*, pki, harmonic, sr)."
},
{
"collapsed": false,
Expand All @@ -285,7 +258,7 @@
"y": 17
},
"id": 101,
"title": "CRDTs Latent Clock",
"title": "CWoLa Label Clock",
"type": "row"
},
{
Expand Down Expand Up @@ -347,16 +320,17 @@
"sort": "none"
}
},
"title": "CRDTs bucket accumulation",
"title": "CWoLa bucket accumulation",
"type": "timeseries",
"targets": [
{
"datasource": "Prometheus",
"expr": "helix_crdt_bucket_accumulation{job=\"helix\"}",
"expr": "sum by (bucket) (increase(helix_cwola_bucket_total[1h]))",
"legendFormat": "{{bucket}}",
"refId": "A"
}
]
],
"description": "helix_cwola_bucket_total \u2014 CWoLa log rows by bucket (A/B/pending), 1h increase."
},
{
"datasource": "Prometheus",
Expand Down Expand Up @@ -409,16 +383,17 @@
"wideLayout": true
},
"pluginVersion": "11.0.0",
"title": "p99_rq = (1.0 + 1.97) ditto-genes",
"title": "p99 /context latency",
"type": "stat",
"targets": [
{
"datasource": "Prometheus",
"expr": "histogram_quantile(0.99, rate(helix_rq_duration_seconds_bucket{job=\"helix\"}[5m]))",
"expr": "histogram_quantile(0.99, sum by (le) (rate(helix_context_latency_seconds_bucket[5m])))",
"legendFormat": "p99",
"refId": "A"
}
]
],
"description": "histogram_quantile(0.99) over helix_context_latency_seconds \u2014 end-to-end /context build time."
},
{
"collapsed": false,
Expand Down Expand Up @@ -491,16 +466,17 @@
"sort": "none"
}
},
"title": "bit keying - ring edges by provenance",
"title": "Co-activation edges by provenance",
"type": "timeseries",
"targets": [
{
"datasource": "Prometheus",
"expr": "helix_ring_edges_by_provenance{job=\"helix\"}",
"legendFormat": "rk_{{provenance}}",
"expr": "helix_harmonic_edges_total",
"legendFormat": "{{source}}",
"refId": "A"
}
]
],
"description": "helix_harmonic_edges_total by source (seeded / co_retrieved / cwola_validated). Legacy bio term: harmonic_links."
},
{
"datasource": "Prometheus",
Expand Down Expand Up @@ -546,16 +522,17 @@
"sort": "none"
}
},
"title": "Chroni-join state distribution",
"title": "Lifecycle tier distribution",
"type": "piechart",
"targets": [
{
"datasource": "Prometheus",
"expr": "helix_chroni_join_state{job=\"helix\"}",
"expr": "helix_chromatin_state_total",
"legendFormat": "{{state}}",
"refId": "A"
}
]
],
"description": "helix_chromatin_state_total by state (open / euchromatin / heterochromatin). Legacy bio term: chromatin."
},
{
"datasource": "Prometheus",
Expand Down Expand Up @@ -607,16 +584,17 @@
"wideLayout": true
},
"pluginVersion": "11.0.0",
"title": "cost concentration ratio (top ~1% majority / mean)",
"title": "Hub concentration ratio (top 1% mean / mean)",
"type": "stat",
"targets": [
{
"datasource": "Prometheus",
"expr": "helix_cost_concentration_ratio{job=\"helix\"}",
"legendFormat": "",
"expr": "helix_hub_concentration_ratio",
"legendFormat": "ratio",
"refId": "A"
}
]
],
"description": "helix_hub_concentration_ratio \u2014 co-activation inbound-degree top-1% mean over overall mean. Healthy below ~10x."
},
{
"datasource": "Prometheus",
Expand Down Expand Up @@ -677,34 +655,17 @@
"sort": "none"
}
},
"title": "resolve degree distribution (r=1, j=r1, j=r2, j mean)",
"title": "Hub inbound-degree distribution",
"type": "timeseries",
"targets": [
{
"datasource": "Prometheus",
"expr": "helix_resolve_degree_distribution{job=\"helix\", r=\"1\"}",
"legendFormat": "r=1",
"expr": "helix_hub_inbound_degree",
"legendFormat": "{{stat}}",
"refId": "A"
},
{
"datasource": "Prometheus",
"expr": "helix_resolve_degree_distribution{job=\"helix\", j=\"r1\"}",
"legendFormat": "j=r1",
"refId": "B"
},
{
"datasource": "Prometheus",
"expr": "helix_resolve_degree_distribution{job=\"helix\", j=\"r2\"}",
"legendFormat": "j=r2",
"refId": "C"
},
{
"datasource": "Prometheus",
"expr": "avg(helix_resolve_degree_distribution{job=\"helix\"})",
"legendFormat": "mean",
"refId": "D"
}
]
],
"description": "helix_hub_inbound_degree summary stats (max / p99 / p95 / p50 / mean) over co-activation inbound degree."
}
],
"refresh": "10s",
Expand Down
Loading
Loading