diff --git a/internal/operator-controller/controllers/clusterextension_controller_test.go b/internal/operator-controller/controllers/clusterextension_controller_test.go index 6aa55c5f9..8240352c6 100644 --- a/internal/operator-controller/controllers/clusterextension_controller_test.go +++ b/internal/operator-controller/controllers/clusterextension_controller_test.go @@ -680,10 +680,18 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te cl, reconciler := newClientAndReconciler(t, func(d *deps) { // Boxcutter keeps a rolling revision when apply fails. We mirror that state so the test uses - // the same inputs the runtime would see. + // the same inputs the runtime would see. The revision metadata must match the spec so the + // controller recognizes this as a still-valid rollout (not a spec change). d.RevisionStatesGetter = &MockRevisionStatesGetter{ RevisionStates: &controllers.RevisionStates{ - RollingOut: []*controllers.RevisionMetadata{{}}, + RollingOut: []*controllers.RevisionMetadata{{ + Package: "prometheus", + Image: "quay.io/operatorhubio/prometheus@fake1.0.0", + BundleMetadata: ocv1.BundleMetadata{ + Name: "prometheus.v1.0.0", + Version: "1.0.0", + }, + }}, }, } d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { @@ -768,6 +776,609 @@ func TestClusterExtensionBoxcutterApplierFailsDoesNotLeakDeprecationErrors(t *te require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) } +// TestClusterExtensionSpecMismatchWhileRollingOutReResolvesBundle verifies that when a +// ClusterExtension's spec requests a different version than the revision currently rolling out, +// the controller re-resolves the bundle from the catalog instead of continuing to use the stale revision. +// +// Scenario: +// - A revision for v1.0.2 is rolling out (e.g., stuck due to deployment probe failure) +// - The ClusterExtension spec already requests v1.0.3 at reconcile time (e.g., previously updated by the user) +// - The controller detects the spec/revision mismatch and re-resolves from the catalog +// - The resolver returns v1.0.3, which is used for the new rollout +func TestClusterExtensionSpecMismatchWhileRollingOutReResolvesBundle(t *testing.T) { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=true", features.BoxcutterRuntime))) + t.Cleanup(func() { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=false", features.BoxcutterRuntime))) + }) + + resolverCalled := false + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + Installed: &controllers.RevisionMetadata{ + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.1-nginxolm88138", + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.1", + Version: "1.0.1", + }, + }, + RollingOut: []*controllers.RevisionMetadata{{ + RevisionName: "extension-88138-2", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138", + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.2", + Version: "1.0.2", + }, + }}, + }, + } + d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + resolverCalled = true + v := bundle.VersionRelease{ + Version: bsemver.MustParse("1.0.3"), + } + return &declcfg.Bundle{ + Name: "nginx88138.v1.0.3", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.3-nginxolm88138", + }, &v, nil, nil + }) + d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} + d.Applier = &MockApplier{installCompleted: true} + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} + + clusterExtension := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "nginx88138", + Version: "1.0.3", + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{ + Name: "default", + }, + }, + } + require.NoError(t, cl.Create(ctx, clusterExtension)) + + res, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.Equal(t, ctrl.Result{}, res) + require.NoError(t, err) + + require.True(t, resolverCalled, + "resolver should be called because the rolling out revision (v1.0.2) does not match the spec (v1.0.3)") + + require.NoError(t, cl.Get(ctx, extKey, clusterExtension)) + + installedCond := apimeta.FindStatusCondition(clusterExtension.Status.Conditions, ocv1.TypeInstalled) + require.NotNil(t, installedCond) + require.Equal(t, metav1.ConditionTrue, installedCond.Status) + require.Contains(t, installedCond.Message, "1.0.3") + + require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) +} + +// TestClusterExtensionRollingOutRevisionMatchesSpecNoReResolve verifies that when a +// revision is rolling out and the spec hasn't changed, the controller uses the existing +// rolling out revision without re-resolving from the catalog. +func TestClusterExtensionRollingOutRevisionMatchesSpecNoReResolve(t *testing.T) { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=true", features.BoxcutterRuntime))) + t.Cleanup(func() { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=false", features.BoxcutterRuntime))) + }) + + resolverCalled := false + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + RollingOut: []*controllers.RevisionMetadata{{ + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138", + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.2", + Version: "1.0.2", + }, + }}, + }, + } + d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + resolverCalled = true + v := bundle.VersionRelease{ + Version: bsemver.MustParse("1.0.2"), + } + return &declcfg.Bundle{ + Name: "nginx88138.v1.0.2", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138", + }, &v, nil, nil + }) + d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} + d.Applier = &MockApplier{installCompleted: true} + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} + + clusterExtension := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "nginx88138", + Version: "1.0.2", + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{ + Name: "default", + }, + }, + } + require.NoError(t, cl.Create(ctx, clusterExtension)) + + _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.NoError(t, err) + + require.False(t, resolverCalled, + "resolver should NOT be called because the rolling out revision (v1.0.2) matches the spec (v1.0.2)") + + require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) +} + +// TestClusterExtensionMultipleRollingOutRevisionsUsesLatestMatch verifies that when multiple +// revisions are rolling out (e.g., a stuck revision and a new one created after re-resolution), +// the controller picks the latest revision that matches the spec. +// +// This covers the lifecycle after re-resolution: the stuck revision (v1.0.2) is still active, +// a new revision (v1.0.1) was created. The revision controller will archive the old one once +// the new one succeeds, but in the meantime both are in the RollingOut list. +func TestClusterExtensionMultipleRollingOutRevisionsUsesLatestMatch(t *testing.T) { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=true", features.BoxcutterRuntime))) + t.Cleanup(func() { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=false", features.BoxcutterRuntime))) + }) + + resolverCalled := false + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + Installed: &controllers.RevisionMetadata{ + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.0-nginxolm88138", + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.0", + Version: "1.0.0", + }, + }, + RollingOut: []*controllers.RevisionMetadata{ + { + RevisionName: "extension-88138-2", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138", + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.2", + Version: "1.0.2", + }, + }, + { + RevisionName: "extension-88138-3", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.1-nginxolm88138", + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.1", + Version: "1.0.1", + }, + }, + }, + }, + } + d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + resolverCalled = true + return nil, nil, nil, fmt.Errorf("should not be called") + }) + d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} + d.Applier = &MockApplier{installCompleted: true} + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} + + clusterExtension := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "nginx88138", + Version: "1.0.1", + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{ + Name: "default", + }, + }, + } + require.NoError(t, cl.Create(ctx, clusterExtension)) + + _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.NoError(t, err) + + require.False(t, resolverCalled, + "resolver should NOT be called: the latest rolling-out revision (v1.0.1) matches the spec, "+ + "even though an older stuck revision (v1.0.2) does not match") + + require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) +} + +// TestClusterExtensionReResolvesWithInstalledAsFromVersion verifies that when re-resolving +// during a stuck rollout, the resolver receives the Installed revision (A) as the +// installedBundle parameter — not the rolling-out revision (B). This ensures upgrade +// constraints are checked against the last known-good state. +func TestClusterExtensionReResolvesWithInstalledAsFromVersion(t *testing.T) { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=true", features.BoxcutterRuntime))) + t.Cleanup(func() { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=false", features.BoxcutterRuntime))) + }) + + var capturedInstalledBundle *ocv1.BundleMetadata + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + Installed: &controllers.RevisionMetadata{ + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.0-nginxolm88138", + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.0", + Version: "1.0.0", + }, + }, + RollingOut: []*controllers.RevisionMetadata{{ + RevisionName: "extension-88138-2", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138", + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.2", + Version: "1.0.2", + }, + }}, + }, + } + d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + capturedInstalledBundle = installedBundle + v := bundle.VersionRelease{ + Version: bsemver.MustParse("1.0.1"), + } + return &declcfg.Bundle{ + Name: "nginx88138.v1.0.1", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.1-nginxolm88138", + }, &v, nil, nil + }) + d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} + d.Applier = &MockApplier{installCompleted: true} + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} + + clusterExtension := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "nginx88138", + Version: "1.0.1", + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{ + Name: "default", + }, + }, + } + require.NoError(t, cl.Create(ctx, clusterExtension)) + + _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.NoError(t, err) + + require.NotNil(t, capturedInstalledBundle, + "resolver should receive an installedBundle") + require.Equal(t, "nginx88138.v1.0.0", capturedInstalledBundle.Name, + "resolver should receive the Installed version (v1.0.0), not the rolling-out version (v1.0.2)") + require.Equal(t, "1.0.0", capturedInstalledBundle.Version) + + require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) +} + +// TestClusterExtensionProgressDeadlineExceededReResolvesBundle verifies that when the +// latest rolling-out revision has exceeded its progression deadline, the controller +// re-resolves from the catalog — even if the spec still matches the rolling-out version. +// This is the automatic recovery trigger for stuck rollouts. +func TestClusterExtensionProgressDeadlineExceededReResolvesBundle(t *testing.T) { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=true", features.BoxcutterRuntime))) + t.Cleanup(func() { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=false", features.BoxcutterRuntime))) + }) + + resolverCalled := false + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + Installed: &controllers.RevisionMetadata{ + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.0-nginxolm88138", + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.0", + Version: "1.0.0", + }, + }, + RollingOut: []*controllers.RevisionMetadata{{ + RevisionName: "extension-88138-2", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138", + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.2", + Version: "1.0.2", + }, + Conditions: []metav1.Condition{{ + Type: ocv1.ClusterExtensionRevisionTypeProgressing, + Status: metav1.ConditionFalse, + Reason: ocv1.ReasonProgressDeadlineExceeded, + }}, + }}, + }, + } + d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + resolverCalled = true + v := bundle.VersionRelease{ + Version: bsemver.MustParse("1.0.2"), + } + return &declcfg.Bundle{ + Name: "nginx88138.v1.0.2", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138", + }, &v, nil, nil + }) + d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} + d.Applier = &MockApplier{installCompleted: true} + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} + + clusterExtension := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "nginx88138", + Version: "1.0.2", + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{ + Name: "default", + }, + }, + } + require.NoError(t, cl.Create(ctx, clusterExtension)) + + _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.NoError(t, err) + + require.True(t, resolverCalled, + "resolver should be called because the rolling out revision exceeded its progression deadline, "+ + "even though the spec (v1.0.2) matches the rolling-out version (v1.0.2)") + + require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) +} + +// TestClusterExtensionNoInstalledRevisionSpecMismatchReResolves verifies that when there +// is no installed revision (first install attempt got stuck) and the spec changes, +// the controller re-resolves with nil as the installedBundle. +func TestClusterExtensionNoInstalledRevisionSpecMismatchReResolves(t *testing.T) { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=true", features.BoxcutterRuntime))) + t.Cleanup(func() { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=false", features.BoxcutterRuntime))) + }) + + resolverCalled := false + var capturedInstalledBundle *ocv1.BundleMetadata + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + RollingOut: []*controllers.RevisionMetadata{{ + RevisionName: "extension-1", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138", + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.2", + Version: "1.0.2", + }, + }}, + }, + } + d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + resolverCalled = true + capturedInstalledBundle = installedBundle + v := bundle.VersionRelease{ + Version: bsemver.MustParse("1.0.1"), + } + return &declcfg.Bundle{ + Name: "nginx88138.v1.0.1", + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.1-nginxolm88138", + }, &v, nil, nil + }) + d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} + d.Applier = &MockApplier{installCompleted: true} + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} + + clusterExtension := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "nginx88138", + Version: "1.0.1", + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{ + Name: "default", + }, + }, + } + require.NoError(t, cl.Create(ctx, clusterExtension)) + + _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.NoError(t, err) + + require.True(t, resolverCalled, + "resolver should be called: first install stuck (v1.0.2) and spec requests v1.0.1") + require.Nil(t, capturedInstalledBundle, + "installedBundle should be nil since no revision has been successfully installed yet") + + require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) +} + +// TestClusterExtensionRollingOutEmptyVersionSpecMatchesAny verifies that when the spec +// has no version constraint, any rolling-out revision matches and is reused. +func TestClusterExtensionRollingOutEmptyVersionSpecMatchesAny(t *testing.T) { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=true", features.BoxcutterRuntime))) + t.Cleanup(func() { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=false", features.BoxcutterRuntime))) + }) + + resolverCalled := false + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + RollingOut: []*controllers.RevisionMetadata{{ + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138", + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.2", + Version: "1.0.2", + }, + }}, + }, + } + d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + resolverCalled = true + return nil, nil, nil, fmt.Errorf("should not be called") + }) + d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} + d.Applier = &MockApplier{installCompleted: true} + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} + + clusterExtension := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "nginx88138", + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{ + Name: "default", + }, + }, + } + require.NoError(t, cl.Create(ctx, clusterExtension)) + + _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.NoError(t, err) + + require.False(t, resolverCalled, + "resolver should NOT be called: spec has no version constraint, so any rolling-out revision matches") + + require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) +} + +// TestClusterExtensionRollingOutVersionRangeMatchReuses verifies that when the spec has +// a version range constraint and the rolling-out revision's version falls within that range, +// the controller reuses the revision without re-resolving. +func TestClusterExtensionRollingOutVersionRangeMatchReuses(t *testing.T) { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=true", features.BoxcutterRuntime))) + t.Cleanup(func() { + require.NoError(t, features.OperatorControllerFeatureGate.Set(fmt.Sprintf("%s=false", features.BoxcutterRuntime))) + }) + + resolverCalled := false + cl, reconciler := newClientAndReconciler(t, func(d *deps) { + d.RevisionStatesGetter = &MockRevisionStatesGetter{ + RevisionStates: &controllers.RevisionStates{ + RollingOut: []*controllers.RevisionMetadata{{ + Package: "nginx88138", + Image: "quay.io/olmqe/nginxolm-operator-bundle:v1.0.2-nginx88138", + BundleMetadata: ocv1.BundleMetadata{ + Name: "nginx88138.v1.0.2", + Version: "1.0.2", + }, + }}, + }, + } + d.Resolver = resolve.Func(func(ctx context.Context, ext *ocv1.ClusterExtension, installedBundle *ocv1.BundleMetadata) (*declcfg.Bundle, *bundle.VersionRelease, *declcfg.Deprecation, error) { + resolverCalled = true + return nil, nil, nil, fmt.Errorf("should not be called") + }) + d.ImagePuller = &imageutil.MockPuller{ImageFS: fstest.MapFS{}} + d.Applier = &MockApplier{installCompleted: true} + }) + + ctx := context.Background() + extKey := types.NamespacedName{Name: fmt.Sprintf("cluster-extension-test-%s", rand.String(8))} + + clusterExtension := &ocv1.ClusterExtension{ + ObjectMeta: metav1.ObjectMeta{Name: extKey.Name}, + Spec: ocv1.ClusterExtensionSpec{ + Source: ocv1.SourceConfig{ + SourceType: "Catalog", + Catalog: &ocv1.CatalogFilter{ + PackageName: "nginx88138", + Version: ">=1.0.0, <2.0.0", + }, + }, + Namespace: "default", + ServiceAccount: ocv1.ServiceAccountReference{ + Name: "default", + }, + }, + } + require.NoError(t, cl.Create(ctx, clusterExtension)) + + _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: extKey}) + require.NoError(t, err) + + require.False(t, resolverCalled, + "resolver should NOT be called: rolling-out version (v1.0.2) is within spec range (>=1.0.0, <2.0.0)") + + require.NoError(t, cl.DeleteAllOf(ctx, &ocv1.ClusterExtension{})) +} + func TestValidateClusterExtension(t *testing.T) { tests := []struct { name string diff --git a/internal/operator-controller/controllers/clusterextension_reconcile_steps.go b/internal/operator-controller/controllers/clusterextension_reconcile_steps.go index 14ad71785..1ae41ae8a 100644 --- a/internal/operator-controller/controllers/clusterextension_reconcile_steps.go +++ b/internal/operator-controller/controllers/clusterextension_reconcile_steps.go @@ -21,6 +21,7 @@ import ( "errors" "fmt" + bsemver "github.com/blang/semver/v4" apierrors "k8s.io/apimachinery/pkg/api/errors" apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -32,6 +33,7 @@ import ( ocv1 "github.com/operator-framework/operator-controller/api/v1" "github.com/operator-framework/operator-controller/internal/operator-controller/bundleutil" + "github.com/operator-framework/operator-controller/internal/operator-controller/catalogmetadata/compare" "github.com/operator-framework/operator-controller/internal/operator-controller/labels" "github.com/operator-framework/operator-controller/internal/operator-controller/resolve" imageutil "github.com/operator-framework/operator-controller/internal/shared/util/image" @@ -140,14 +142,20 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc { return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) { l := log.FromContext(ctx) - // If already rolling out, use existing revision and set deprecation to Unknown (no catalog check) - if len(state.revisionStates.RollingOut) > 0 { - installedBundleName := "" - if state.revisionStates.Installed != nil { - installedBundleName = state.revisionStates.Installed.Name - } - SetDeprecationStatus(ext, installedBundleName, nil, false) - state.resolvedRevisionMetadata = state.revisionStates.RollingOut[0] + // When revisions are actively rolling out, we use two triggers to decide + // whether to re-engage the resolver: + // + // 1. Progression deadline exceeded: The latest rolling-out revision has been + // marked ProgressDeadlineExceeded by the revision controller. This provides + // automatic recovery from stuck rollouts even if the admin hasn't changed + // the spec. + // + // 2. Spec change: No rolling-out revision matches the current spec. The admin + // has explicitly requested a different version, so we honor it immediately. + // + // If neither trigger fires, we reuse the latest matching rolling-out revision + // to avoid unnecessary catalog lookups and provide resilience during catalog outages. + if reuseRollingOutRevision(ctx, state, ext) { return nil, nil } @@ -199,6 +207,108 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc { } } +// reuseRollingOutRevision checks whether an active rollout should continue using its +// current revision. Returns true if a matching revision was found and set on the state +// (caller should return early). Returns false if the resolver should be called instead. +// +// Two triggers cause this to return false (meaning: re-resolve): +// 1. The latest rolling-out revision exceeded its progression deadline. +// 2. No rolling-out revision matches the current spec (admin changed it). +func reuseRollingOutRevision(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) bool { + if len(state.revisionStates.RollingOut) == 0 { + return false + } + + l := log.FromContext(ctx) + latestRollingOut := state.revisionStates.RollingOut[len(state.revisionStates.RollingOut)-1] + + // Trigger 1: progression deadline exceeded — re-resolve regardless of spec match. + if hasProgressDeadlineExceeded(latestRollingOut) { + l.Info("progression deadline exceeded for rolling out revision, re-resolving bundle", + "revision", latestRollingOut.RevisionName, + "version", latestRollingOut.Version, + ) + return false + } + + // Check if any rolling-out revision matches the current spec. + // If so, reuse it — no need to call the resolver. + for i := len(state.revisionStates.RollingOut) - 1; i >= 0; i-- { + rollingOut := state.revisionStates.RollingOut[i] + if rollingOutRevisionMatchesSpec(rollingOut, ext) { + installedBundleName := "" + if state.revisionStates.Installed != nil { + installedBundleName = state.revisionStates.Installed.Name + } + SetDeprecationStatus(ext, installedBundleName, nil, false) + state.resolvedRevisionMetadata = rollingOut + return true + } + } + + // Trigger 2: no rolling-out revision matches the current spec (spec changed or constraints are invalid). + specVersion := "" + if ext.Spec.Source.Catalog != nil { + specVersion = ext.Spec.Source.Catalog.Version + } + l.Info("no rolling-out revision matches current spec or spec constraints, re-resolving bundle", + "rollingOutCount", len(state.revisionStates.RollingOut), + "specVersion", specVersion, + ) + return false +} + +// rollingOutRevisionMatchesSpec checks whether a rolling out revision is still compatible +// with the current ClusterExtension spec. It compares the revision's package name and +// version against the spec's constraints. +// +// Returns true if the revision matches the spec (meaning we should continue rolling it out). +// Returns false if the revision does not match the current spec or if the spec's version +// constraints are invalid/unparseable, in which case we should re-resolve from the catalog. +func rollingOutRevisionMatchesSpec(rm *RevisionMetadata, ext *ocv1.ClusterExtension) bool { + if ext.Spec.Source.Catalog == nil { + return false + } + if rm.Package != ext.Spec.Source.Catalog.PackageName { + return false + } + return versionMatchesSpec(rm.Version, ext) +} + +// versionMatchesSpec checks whether a given version string satisfies the version constraint +// in the ClusterExtension spec. Returns true if the spec has no version constraint, or if +// the version falls within the specified range. Returns false if there is a mismatch or +// if either the constraint or the version string is unparseable. +func versionMatchesSpec(version string, ext *ocv1.ClusterExtension) bool { + if ext.Spec.Source.Catalog == nil { + return true + } + specVersion := ext.Spec.Source.Catalog.Version + if specVersion == "" { + return true + } + if version == "" { + return false + } + versionRange, err := compare.NewVersionRange(specVersion) + if err != nil { + return false + } + v, err := bsemver.Parse(version) + if err != nil { + return false + } + return versionRange(v) +} + +// hasProgressDeadlineExceeded checks whether a rolling-out revision has exceeded +// its progression deadline. This is one of the triggers for re-engaging the resolver +// during an active rollout. +func hasProgressDeadlineExceeded(rm *RevisionMetadata) bool { + cnd := apimeta.FindStatusCondition(rm.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing) + return cnd != nil && cnd.Status == metav1.ConditionFalse && cnd.Reason == ocv1.ReasonProgressDeadlineExceeded +} + // handleResolutionError handles the case when bundle resolution fails. // // Decision logic (evaluated in order): @@ -224,15 +334,15 @@ func handleResolutionError(ctx context.Context, c client.Client, state *reconcil return nil, err } - // Check if the spec is requesting a specific version that differs from installed - specVersion := "" - if ext.Spec.Source.Catalog != nil { - specVersion = ext.Spec.Source.Catalog.Version - } - installedVersion := state.revisionStates.Installed.Version - - // If spec requests a different version, we cannot fall back - must fail and retry - if specVersion != "" && specVersion != installedVersion { + // Check if the spec is requesting a version that differs from what's installed. + // Uses the same semver range matching as reuseRollingOutRevision so that ranges + // like ">=1.0.0, <2.0.0" are correctly recognized as satisfied by "1.0.0". + if !versionMatchesSpec(state.revisionStates.Installed.Version, ext) { + specVersion := "" + if ext.Spec.Source.Catalog != nil { + specVersion = ext.Spec.Source.Catalog.Version + } + installedVersion := state.revisionStates.Installed.Version msg := fmt.Sprintf("unable to upgrade to version %s: %v (currently installed: %s)", specVersion, err, installedVersion) l.Error(err, "resolution failed and spec requests version change - cannot fall back", "requestedVersion", specVersion, diff --git a/test/e2e/features/update.feature b/test/e2e/features/update.feature index e1b4becca..dedcac668 100644 --- a/test/e2e/features/update.feature +++ b/test/e2e/features/update.feature @@ -243,3 +243,69 @@ Feature: Update ClusterExtension And ClusterExtensionRevision "${NAME}-2" reports Progressing as True with Reason RollingOut And ClusterExtensionRevision "${NAME}-2" reports Available as False with Reason ProbeFailure + @BoxcutterRuntime + Scenario: Version change while a revision is stuck rolling out re-resolves from catalog + Given ClusterExtension is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtension + metadata: + name: ${NAME} + spec: + namespace: ${TEST_NAMESPACE} + serviceAccount: + name: olm-sa + source: + sourceType: Catalog + catalog: + packageName: test + selector: + matchLabels: + "olm.operatorframework.io/metadata.name": test-catalog + version: 1.0.2 + upgradeConstraintPolicy: SelfCertified + """ + And ClusterExtensionRevision "${NAME}-1" reports Available as False with Reason ProbeFailure + When ClusterExtension is updated to version "1.0.1" + Then ClusterExtension is rolled out + And ClusterExtension is available + And bundle "test-operator.1.0.1" is installed in version "1.0.1" + + # Note: This test changes the version after the deadline fires. Without the version change, + # re-resolving with the same spec produces the same revision content — the applier patches + # in-place and no new revision is created, making re-resolution unobservable in e2e. + # The deadline trigger is tested in isolation by the unit test + # TestClusterExtensionProgressDeadlineExceededReResolvesBundle. + @BoxcutterRuntime + @ProgressDeadline + Scenario: Progression deadline triggers re-resolution when rollout is stuck + Given min value for ClusterExtension .spec.progressDeadlineMinutes is set to 1 + And min value for ClusterExtensionRevision .spec.progressDeadlineMinutes is set to 1 + And ClusterExtension is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtension + metadata: + name: ${NAME} + spec: + namespace: ${TEST_NAMESPACE} + serviceAccount: + name: olm-sa + source: + sourceType: Catalog + catalog: + packageName: test + selector: + matchLabels: + "olm.operatorframework.io/metadata.name": test-catalog + version: 1.0.2 + upgradeConstraintPolicy: SelfCertified + progressDeadlineMinutes: 1 + """ + And ClusterExtensionRevision "${NAME}-1" reports Available as False with Reason ProbeFailure + And ClusterExtensionRevision "${NAME}-1" reports Progressing as False with Reason ProgressDeadlineExceeded + When ClusterExtension is updated to version "1.0.1" + Then ClusterExtension is rolled out + And ClusterExtension is available + And bundle "test-operator.1.0.1" is installed in version "1.0.1" +