Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions controllers/dashboards/persesdashboard_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ func (r *PersesDashboardReconciler) Reconcile(ctx context.Context, req ctrl.Requ
if r.ReconciliationTracker != nil {
r.ReconciliationTracker.ForgetObject(objKey)
}
if r.Metrics != nil {
r.Metrics.ForgetObject(objKey)
}
return subreconciler.Evaluate(r.deleteDashboardInAllInstances(ctx, req, req.Namespace, req.Name))
}
log.WithError(err).Error("Failed to get perses dashboard")
Expand Down
3 changes: 3 additions & 0 deletions controllers/datasources/persesdatasource_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ func (r *PersesDatasourceReconciler) Reconcile(ctx context.Context, req ctrl.Req
if r.ReconciliationTracker != nil {
r.ReconciliationTracker.ForgetObject(objKey)
}
if r.Metrics != nil {
r.Metrics.ForgetObject(objKey)
}
return subreconciler.Evaluate(r.deleteDatasourceInAllInstances(ctx, req.Namespace, req.Name))
}
log.WithError(err).Error("Failed to get perses datasource")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ func (r *PersesGlobalDatasourceReconciler) Reconcile(ctx context.Context, req ct
if r.ReconciliationTracker != nil {
r.ReconciliationTracker.ForgetObject(objKey)
}
if r.Metrics != nil {
r.Metrics.ForgetObject(objKey)
}
return subreconciler.Evaluate(r.deleteGlobalDatasourceInAllInstances(ctx, req.Name))
}
log.WithError(err).Error("Failed to get perses globaldatasource")
Expand Down
26 changes: 17 additions & 9 deletions controllers/perses/perses_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,13 @@ type Config struct {
// PersesReconciler reconciles a Perses object
type PersesReconciler struct {
client.Client
APIReader client.Reader // uncached reader for Secret data (cached client strips Data via Transform)
Scheme *runtime.Scheme
Recorder record.EventRecorder
Config Config
Metrics *operatormetrics.Metrics
ReconciliationTracker *operatormetrics.ReconciliationTracker
APIReader client.Reader // uncached reader for Secret data (cached client strips Data via Transform)
Scheme *runtime.Scheme
Recorder record.EventRecorder
Config Config
Metrics *operatormetrics.Metrics
ReconciliationTracker *operatormetrics.ReconciliationTracker
ClientCacheInvalidator common.PersesClientCacheInvalidator
}

var log = logger.WithField("module", "perses_controller")
Expand All @@ -97,6 +98,13 @@ func (r *PersesReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
if r.ReconciliationTracker != nil {
r.ReconciliationTracker.ForgetObject(objKey)
}
if r.Metrics != nil {
r.Metrics.ForgetObject(objKey)
r.Metrics.DeletePersesInstance(req.Namespace, req.Name)
}
if r.ClientCacheInvalidator != nil {
r.ClientCacheInvalidator.ForgetInstance(fmt.Sprintf("%s/%s", req.Namespace, req.Name))
}
return subreconciler.Evaluate(subreconciler.DoNotRequeue())
}
log.WithError(err).Error("Failed to get perses")
Expand All @@ -106,9 +114,9 @@ func (r *PersesReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
return subreconciler.Evaluate(subreconciler.RequeueWithError(err))
}

// Update Perses instance count
if r.Metrics != nil {
r.Metrics.PersesInstances(perses.Namespace).Set(1)
// Update Perses instance count (skip for objects being deleted)
if r.Metrics != nil && perses.GetDeletionTimestamp() == nil {
r.Metrics.PersesInstances(perses.Namespace, perses.Name).Set(1)
}

// Store perses in context for all sub-reconcilers
Expand Down
25 changes: 21 additions & 4 deletions internal/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ func NewMetrics() *Metrics {
Name: "perses_operator_managed_perses_instances",
Help: "Number of Perses instances managed by the operator",
},
[]string{"resource_namespace"},
[]string{"resource_namespace", "resource_name"},
),
ready: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Expand Down Expand Up @@ -124,9 +124,15 @@ func (m *Metrics) ReconcileErrors(controller, reason string) prometheus.Counter
return m.reconcileErrors.With(prometheus.Labels{"controller": controller, "reason": reason})
}

// PersesInstances returns a gauge to track Perses instance count.
func (m *Metrics) PersesInstances(namespace string) prometheus.Gauge {
return m.persesInstances.With(prometheus.Labels{"resource_namespace": namespace})
// PersesInstances returns a gauge to track a specific Perses instance.
func (m *Metrics) PersesInstances(namespace, name string) prometheus.Gauge {
return m.persesInstances.With(prometheus.Labels{"resource_namespace": namespace, "resource_name": name})
}

// DeletePersesInstance removes the gauge entry for the given Perses instance.
// It should be called when a Perses instance is deleted to clean up stale label sets.
func (m *Metrics) DeletePersesInstance(namespace, name string) {
m.persesInstances.DeleteLabelValues(namespace, name)
}

// Ready returns a gauge to track operator readiness for the given controller.
Expand All @@ -144,6 +150,17 @@ func (m *Metrics) SetFailedResources(objKey, resource string, v int) {
m.setResources(objKey, resourceKey{resource: resource, state: failed}, v)
}

// ForgetObject removes all resource entries for the given object key.
// It should be called when a controller detects that the object has been deleted.
func (m *Metrics) ForgetObject(objKey string) {
m.mtx.Lock()
defer m.mtx.Unlock()

for rKey := range m.resources {
delete(m.resources[rKey], objKey)
}
}

func (m *Metrics) setResources(objKey string, resKey resourceKey, v int) {
m.mtx.Lock()
defer m.mtx.Unlock()
Expand Down
132 changes: 125 additions & 7 deletions internal/metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func TestNewMetrics(t *testing.T) {
Name: "perses_operator_managed_perses_instances",
Help: "Number of Perses instances managed by the operator",
},
[]string{"resource_namespace"},
[]string{"resource_namespace", "resource_name"},
),
ready: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Expand All @@ -64,7 +64,7 @@ func TestNewMetrics(t *testing.T) {
// Set some values so metrics appear in output
m.reconcileOperations.WithLabelValues("test").Add(1)
m.reconcileErrors.WithLabelValues("test", "test_reason").Add(0)
m.persesInstances.WithLabelValues("test-ns").Set(1)
m.persesInstances.WithLabelValues("test-ns", "test-perses").Set(1)
m.Ready("test").Set(1)

// Verify metrics are registered
Expand Down Expand Up @@ -159,22 +159,22 @@ func TestPersesInstancesGauge(t *testing.T) {
Name: "perses_operator_managed_perses_instances",
Help: "Number of Perses instances managed by the operator",
},
[]string{"resource_namespace"},
[]string{"resource_namespace", "resource_name"},
),
resources: make(map[resourceKey]map[string]int),
}
reg.MustRegister(m.persesInstances)

// Set instance counts
m.PersesInstances("perses-dev").Set(1)
m.PersesInstances("production").Set(3)
m.PersesInstances("perses-dev", "perses-1").Set(1)
m.PersesInstances("production", "perses-prod").Set(1)

// Verify values
expected := `
# HELP perses_operator_managed_perses_instances Number of Perses instances managed by the operator
# TYPE perses_operator_managed_perses_instances gauge
perses_operator_managed_perses_instances{resource_namespace="perses-dev"} 1
perses_operator_managed_perses_instances{resource_namespace="production"} 3
perses_operator_managed_perses_instances{resource_name="perses-1",resource_namespace="perses-dev"} 1
perses_operator_managed_perses_instances{resource_name="perses-prod",resource_namespace="production"} 1
`
err := testutil.CollectAndCompare(m.persesInstances, strings.NewReader(expected))
assert.NoError(t, err)
Expand Down Expand Up @@ -270,6 +270,124 @@ func TestResourceStateString(t *testing.T) {
}
}

func TestDeletePersesInstance(t *testing.T) {
reg := prometheus.NewRegistry()
m := &Metrics{
persesInstances: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "perses_operator_managed_perses_instances",
Help: "Number of Perses instances managed by the operator",
},
[]string{"resource_namespace", "resource_name"},
),
resources: make(map[resourceKey]map[string]int),
}
reg.MustRegister(m.persesInstances)

// Set instances for two namespaces
m.PersesInstances("perses-dev", "perses-1").Set(1)
m.PersesInstances("production", "perses-prod").Set(1)

// Delete one instance
m.DeletePersesInstance("perses-dev", "perses-1")

// Verify only production remains
expected := `
# HELP perses_operator_managed_perses_instances Number of Perses instances managed by the operator
# TYPE perses_operator_managed_perses_instances gauge
perses_operator_managed_perses_instances{resource_name="perses-prod",resource_namespace="production"} 1
`
err := testutil.CollectAndCompare(m.persesInstances, strings.NewReader(expected))
assert.NoError(t, err)

// Deleting a non-existent instance should not panic
m.DeletePersesInstance("nonexistent", "nonexistent")
}

func TestForgetObject(t *testing.T) {
m := &Metrics{
resources: make(map[resourceKey]map[string]int),
}

// Set synced and failed entries for multiple objects
m.SetSyncedResources("ns1/resource1", "dashboard", 1)
m.SetSyncedResources("ns1/resource2", "dashboard", 1)
m.SetFailedResources("ns1/resource1", "dashboard", 1)
m.SetSyncedResources("ns2/resource3", "datasource", 1)

// Forget resource1
m.ForgetObject("ns1/resource1")

// Verify resource1 is removed from both synced and failed maps
m.mtx.RLock()
defer m.mtx.RUnlock()

syncedDashboard := resourceKey{resource: "dashboard", state: synced}
failedDashboard := resourceKey{resource: "dashboard", state: failed}
syncedDatasource := resourceKey{resource: "datasource", state: synced}

assert.Equal(t, 1, len(m.resources[syncedDashboard]), "Should have 1 synced dashboard after forget")
assert.Equal(t, 0, len(m.resources[failedDashboard]), "Should have 0 failed dashboards after forget")
assert.Equal(t, 1, len(m.resources[syncedDatasource]), "Datasource should be unaffected")

// resource2 should still exist
assert.Equal(t, 1, m.resources[syncedDashboard]["ns1/resource2"])
// resource1 should be gone
_, exists := m.resources[syncedDashboard]["ns1/resource1"]
assert.False(t, exists, "resource1 should be removed from synced map")
}

func TestForgetObjectCollectOutput(t *testing.T) {
reg := prometheus.NewRegistry()
m := &Metrics{
resources: make(map[resourceKey]map[string]int),
}
reg.MustRegister(m)

m.SetSyncedResources("ns1/resource1", "dashboard", 1)
m.SetSyncedResources("ns1/resource2", "dashboard", 1)
m.SetFailedResources("ns1/resource3", "dashboard", 1)

// Forget resource1 and resource3
m.ForgetObject("ns1/resource1")
m.ForgetObject("ns1/resource3")

// Collect and verify totals
expected := `
# HELP perses_operator_managed_resources Number of resources managed by the operator per state (synced/failed)
# TYPE perses_operator_managed_resources gauge
perses_operator_managed_resources{resource="dashboard",state="synced"} 1
perses_operator_managed_resources{resource="dashboard",state="failed"} 0
`
err := testutil.CollectAndCompare(m, strings.NewReader(expected))
assert.NoError(t, err)
}

func TestForgetObjectConcurrency(t *testing.T) {
m := &Metrics{
resources: make(map[resourceKey]map[string]int),
}

done := make(chan bool)
for i := 0; i < 10; i++ {
go func(id int) {
key := "test/resource"
m.SetSyncedResources(key, "dashboard", id)
m.SetFailedResources(key, "datasource", id)
m.ForgetObject(key)
done <- true
}(i)
}

for i := 0; i < 10; i++ {
<-done
}

m.mtx.RLock()
defer m.mtx.RUnlock()
assert.NotNil(t, m.resources)
}

func TestMetricsConcurrency(t *testing.T) {
m := &Metrics{
resources: make(map[resourceKey]map[string]int),
Expand Down
Loading
Loading