Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 34 additions & 4 deletions internal/controllers/reconciliation/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,7 @@ func (c *Controller) Reconcile(ctx context.Context, req resource.Request) (ctrl.
return ctrl.Result{Requeue: true}, nil
}

deleted := current == nil ||
(current.GetDeletionTimestamp() != nil && !snap.ForegroundDeletion) ||
(snap.Deleted() && (snap.Orphan || snap.Disable || failingOpen)) // orphaning should be reflected on the status.

deleted := shouldMarkResourceDeleted(snap, current, failingOpen)
c.writeBuffer.PatchStatusAsync(ctx, &resource.ManifestRef, patchResourceState(deleted, ready))
return c.requeue(logger, snap, ready)
}
Expand All @@ -177,6 +174,39 @@ func (c *Controller) shouldFailOpen(resource *resource.Resource) bool {
return (resource.FailOpen == nil && c.failOpen) || (resource.FailOpen != nil && *resource.FailOpen)
}

// shouldMarkResourceDeleted returns true when the resource should be treated
// as deleted for status-reporting purposes.
func shouldMarkResourceDeleted(snap *resource.Snapshot, current *unstructured.Unstructured, failingOpen bool) bool {
// Resource does not exist - marking as deleted for sure.
if current == nil {
return true
}

// Resource is being deleted by the kube-apiserver (deletionTimestamp set).
// For foreground deletion we must wait until the object is fully gone,
// so we only report deleted here for non-foreground cases.
if current.GetDeletionTimestamp() != nil && !snap.ForegroundDeletion {
return true
}

// Orphan & Disabled should always be reflected on the status.
// FailOpen should also be reflected — *unless* we are doing a foreground
// deletion that is still in progress, because marking a resource deleted
// prematurely would break the foreground ordering guarantee.
if snap.Deleted() {
if snap.Orphan || snap.Disable {
return true
}

if failingOpen {
// Do not prematurely report deleted during an active foreground deletion.
pendingForegroundDeletion := snap.ForegroundDeletion && current.GetDeletionTimestamp() != nil
return !pendingForegroundDeletion
}
}
return false
}

func (c *Controller) reconcileResource(ctx context.Context, comp *apiv1.Composition, prev *resource.Resource, resource *resource.Resource) (snap *resource.Snapshot, current *unstructured.Unstructured, ready *metav1.Time, modified bool, err error) {
logger := logr.FromContextOrDiscard(ctx)

Expand Down
81 changes: 81 additions & 0 deletions internal/controllers/reconciliation/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -249,3 +250,83 @@ func TestRequeueDoesNotPanic(t *testing.T) {
}).
Evaluate(t)
}

func TestShouldMarkResourceDeleted(t *testing.T) {
now := metav1.Now()
existing := &unstructured.Unstructured{}
deleting := &unstructured.Unstructured{}
deleting.SetDeletionTimestamp(&now)

tests := []struct {
name string
snap *resource.Snapshot
current *unstructured.Unstructured
failOpen bool
wantDelete bool
}{
{
name: "current is nil — resource does not exist",
snap: &resource.Snapshot{Resource: &resource.Resource{}},
current: nil,
wantDelete: true,
},
{
name: "deletionTimestamp set, non-foreground",
snap: &resource.Snapshot{Resource: &resource.Resource{}},
current: deleting,
wantDelete: true,
},
{
name: "deletionTimestamp set, foreground — wait for full removal",
snap: &resource.Snapshot{Resource: &resource.Resource{}, ForegroundDeletion: true},
current: deleting,
wantDelete: false,
},
{
name: "deleted + orphan — always report deleted",
snap: &resource.Snapshot{Resource: &resource.Resource{}, Disable: true, Orphan: true},
current: existing,
wantDelete: true,
},
{
name: "deleted + disable — always report deleted",
snap: &resource.Snapshot{Resource: &resource.Resource{}, Disable: true},
current: existing,
wantDelete: true,
},
{
name: "deleted + failOpen, no foreground — report deleted",
snap: &resource.Snapshot{Resource: &resource.Resource{}, Disable: true},
current: existing,
failOpen: true,
wantDelete: true,
},
{
name: "deleted + failOpen + foreground + deletionTimestamp — do NOT prematurely report deleted",
snap: &resource.Snapshot{Resource: &resource.Resource{}, Disable: true, ForegroundDeletion: true},
current: deleting,
failOpen: true,
wantDelete: true, // Disable branch returns true before reaching failOpen
},
{
name: "not deleted, resource exists — no deletion",
snap: &resource.Snapshot{Resource: &resource.Resource{}},
current: existing,
wantDelete: false,
},
{
name: "not deleted, resource exists, failOpen — no deletion",
snap: &resource.Snapshot{Resource: &resource.Resource{}},
current: existing,
failOpen: true,
wantDelete: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := shouldMarkResourceDeleted(tt.snap, tt.current, tt.failOpen)
assert.Equal(t, tt.wantDelete, got)
})
}
}
8 changes: 3 additions & 5 deletions internal/resource/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,20 +109,18 @@ func (c *Cache) Fill(ctx context.Context, comp *apiv1.Composition, synUUID strin
logger.Error(err, "invalid resource - cannot load into cache", "resourceSliceName", slice.Name, "resourceIndex", i)
return
}

isGhostResource := false
if c.ResourceFilter != nil {
matches, err := c.evaluateResourceFilter(ctx, comp, res)
if err != nil {
logger.Error(err, "failed to evaluate resource filter", "resourceKind", res.Ref.Kind, "resourceName", res.Ref.Name)
resourceFilterErrors.Inc()
continue
}
if !matches {
continue
}
isGhostResource = !matches
}

builder.Add(res)
builder.Add(res, isGhostResource)
}
}
tree := builder.Build()
Expand Down
108 changes: 108 additions & 0 deletions internal/resource/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,114 @@ func TestCacheResourceFilterAlwaysTrue(t *testing.T) {
assert.True(t, found)
}

// TestCacheGhostResourceCrossReconcilerDeletionOrder simulates two eno-reconcilers (A and B)
// sharing the same composition. Reconciler A manages a CRD-like resource (deletion-group -1)
// and reconciler B manages a Deployment-like resource (deletion-group 0).
// The test proves that B's resource is blocked until A's resource is confirmed deleted,
// even though A's resource is a ghost (filtered out) in B's cache.
func TestCacheGhostResourceCrossReconcilerDeletionOrder(t *testing.T) {
ctx := context.Background()

// Shared composition with DeletionTimestamp set to activate deletion-group logic
now := metav1.Now()
comp := &apiv1.Composition{}
comp.Name = "test-comp"
comp.Namespace = "test-ns"
comp.DeletionTimestamp = &now

// Two resources in the same slice:
// - "crd-resource" with deletion-group -1, label role=crd (managed by reconciler A)
// - "deployment-resource" with deletion-group 0, label role=deployment (managed by reconciler B)
slices := []apiv1.ResourceSlice{{
ObjectMeta: metav1.ObjectMeta{Name: "slice-1", Namespace: "test-ns"},
Spec: apiv1.ResourceSliceSpec{
Resources: []apiv1.Manifest{
{Manifest: `{"apiVersion": "v1", "kind": "ConfigMap", "metadata": {"name": "crd-resource", "namespace": "default", "labels": {"role": "crd"}, "annotations": {"eno.azure.io/deletion-group": "-1"}}}`},
{Manifest: `{"apiVersion": "v1", "kind": "ConfigMap", "metadata": {"name": "deployment-resource", "namespace": "default", "labels": {"role": "deployment"}, "annotations": {"eno.azure.io/deletion-group": "0"}}}`},
},
},
}}

const synUUID = "test-syn"
crdRef := Ref{Name: "crd-resource", Namespace: "default", Kind: "ConfigMap"}
deployRef := Ref{Name: "deployment-resource", Namespace: "default", Kind: "ConfigMap"}

// --- Reconciler A: manages role=crd resources ---
filterA, err := enocel.Parse("self.metadata.labels.role == 'crd'")
require.NoError(t, err)
var cacheA Cache
queueA := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[Request]())
cacheA.SetQueue(queueA)
cacheA.ResourceFilter = filterA

cacheA.Fill(ctx, comp, synUUID, slices)
cacheA.Visit(ctx, comp, synUUID, slices)
dumpQueue(queueA) // drain

// Reconciler A can see the crd-resource (it's its own)
res, visible, found := cacheA.Get(ctx, synUUID, crdRef)
assert.NotNil(t, res, "reconciler A should find its own resource")
assert.True(t, found)
assert.True(t, visible, "crd-resource (deletion-group -1) has no dependencies, should be visible")

// Reconciler A cannot see deployment-resource (it's a ghost)
res, visible, found = cacheA.Get(ctx, synUUID, deployRef)
assert.Nil(t, res)
assert.False(t, found, "deployment-resource is a ghost in reconciler A's cache")
assert.False(t, visible)

// --- Reconciler B: manages role=deployment resources ---
filterB, err := enocel.Parse("self.metadata.labels.role == 'deployment'")
require.NoError(t, err)
var cacheB Cache
queueB := workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[Request]())
cacheB.SetQueue(queueB)
cacheB.ResourceFilter = filterB

cacheB.Fill(ctx, comp, synUUID, slices)
cacheB.Visit(ctx, comp, synUUID, slices)
dumpQueue(queueB) // drain

// Reconciler B cannot see crd-resource (it's a ghost)
res, visible, found = cacheB.Get(ctx, synUUID, crdRef)
assert.Nil(t, res)
assert.False(t, found, "crd-resource is a ghost in reconciler B's cache")
assert.False(t, visible)

// Reconciler B can find deployment-resource but it should NOT be visible yet
// because it depends on crd-resource (deletion-group -1 < 0) which hasn't been deleted
res, visible, found = cacheB.Get(ctx, synUUID, deployRef)
assert.NotNil(t, res, "reconciler B should find its own resource")
assert.True(t, found)
assert.False(t, visible, "deployment-resource should be blocked by ghost crd-resource in deletion-group -1")

// --- Simulate reconciler A deleting the crd-resource ---
// Reconciler A writes Deleted: true to the resource slice status for the crd-resource (index 0)
slicesWithStatus := []apiv1.ResourceSlice{{
ObjectMeta: metav1.ObjectMeta{Name: "slice-1", Namespace: "test-ns"},
Spec: slices[0].Spec,
Status: apiv1.ResourceSliceStatus{
Resources: []apiv1.ResourceState{
{Deleted: true}, // crd-resource at index 0 is deleted
{}, // deployment-resource at index 1 is not yet deleted
},
},
}}

// Reconciler B's informer picks up the status change and calls Visit
cacheB.Visit(ctx, comp, synUUID, slicesWithStatus)
enqueuedB := dumpQueue(queueB)

// The deployment-resource should have been enqueued because its dependency was cleared
assert.Contains(t, enqueuedB, deployRef.String(), "deployment-resource should be enqueued after ghost dependency is cleared")

// Now deployment-resource should be visible — the ghost crd-resource dependency is satisfied
res, visible, found = cacheB.Get(ctx, synUUID, deployRef)
assert.NotNil(t, res)
assert.True(t, found)
assert.True(t, visible, "deployment-resource should now be visible after crd-resource ghost is deleted")
}

func dumpQueue(q workqueue.TypedRateLimitingInterface[Request]) (slice []string) {
for {
if q.Len() == 0 {
Expand Down
18 changes: 12 additions & 6 deletions internal/resource/tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@ import (
)

type indexedResource struct {
Resource *Resource
Seen bool
Resource *Resource
Seen bool
// If GhostResource true it means it is being managed by other eno-reconciler, but only participate in dependency graph to ensure
// that the ordering (readiness-group/deletion-group works as expected)
GhostResource bool
PendingDependencies map[Ref]struct{}
Dependents map[Ref]*indexedResource
}
Expand Down Expand Up @@ -54,7 +57,7 @@ func (b *treeBuilder) init() {
}
}

func (b *treeBuilder) Add(resource *Resource) {
func (b *treeBuilder) Add(resource *Resource, ghostResource bool) {
b.init()

// Handle conflicting refs deterministically
Expand All @@ -65,6 +68,7 @@ func (b *treeBuilder) Add(resource *Resource) {
// Index the resource into the builder
idx := &indexedResource{
Resource: resource,
GhostResource: ghostResource,
PendingDependencies: map[Ref]struct{}{},
Dependents: map[Ref]*indexedResource{},
}
Expand Down Expand Up @@ -136,7 +140,7 @@ type tree struct {
// Get returns the resource and determines if it's visible based on the state of its dependencies.
func (t *tree) Get(key Ref) (res *Resource, visible bool, found bool) {
idx, ok := t.byRef[key]
if !ok {
if !ok || idx.GhostResource { // don't need to act on ghost resource as it is managed by other eno-reconciler
return nil, false, false
}
//TODO: debug logging on what we're blocked on might help future issues.
Expand All @@ -152,8 +156,10 @@ func (t *tree) UpdateState(ref ManifestRef, state *apiv1.ResourceState, enqueue

// Requeue self when the state has changed
lastKnown := idx.Resource.latestKnownState.Swap(state)
if (!idx.Seen && lastKnown == nil) || !lastKnown.Equal(state) {
enqueue(idx.Resource.Ref)
if !idx.GhostResource {
if (!idx.Seen && lastKnown == nil) || !lastKnown.Equal(state) {
enqueue(idx.Resource.Ref)
}
}
idx.Seen = true

Expand Down
Loading
Loading