diff --git a/api/aicr/v1/server.yaml b/api/aicr/v1/server.yaml index 6f66667d8..cdc691070 100644 --- a/api/aicr/v1/server.yaml +++ b/api/aicr/v1/server.yaml @@ -214,13 +214,15 @@ paths: os: any componentRefs: - name: gpu-operator + type: Helm version: v25.3.3 - order: 1 - repository: https://helm.ngc.nvidia.com/nvidia + source: https://helm.ngc.nvidia.com/nvidia + chart: gpu-operator - name: network-operator + type: Helm version: v25.4.0 - order: 2 - repository: https://helm.ngc.nvidia.com/nvidia + source: https://helm.ngc.nvidia.com/nvidia + chart: network-operator constraints: driver: version: "580.82.07" @@ -947,9 +949,10 @@ paths: kind: Recipe componentRefs: - name: gpu-operator + type: Helm version: v25.3.3 - type: helm - repository: https://helm.ngc.nvidia.com/nvidia + source: https://helm.ngc.nvidia.com/nvidia + chart: gpu-operator fullRecipe: summary: Full recipe with metadata value: @@ -979,9 +982,10 @@ paths: os: any componentRefs: - name: gpu-operator + type: Helm version: v25.3.3 - order: 1 - repository: https://helm.ngc.nvidia.com/nvidia + source: https://helm.ngc.nvidia.com/nvidia + chart: gpu-operator responses: "200": description: Zip archive containing generated bundles @@ -1457,19 +1461,40 @@ components: type: array items: type: object + required: [name] properties: name: type: string description: Component name + type: + type: string + enum: [Helm, Kustomize] + description: >- + Deployment type; send the canonical value `Helm` or + `Kustomize` (the normative contract). A Helm ref must not carry + `tag`/`path`; a Kustomize ref requires `path` (and, when it + sets `tag`, a `source`). Incoherent combinations are rejected + with 400. For back-compatibility the server back-fills a + missing type on an enabled ref from the registry when the + component is known (a missing type on an unknown component is + rejected with 400), and accepts other casings (e.g. `helm`) + and normalizes them — but clients should send the canonical + values above. version: type: string - description: Component version - order: - type: integer - description: Deployment order - repository: + description: Chart version (Helm) + tag: + type: string + description: Git tag/ref (Kustomize); only meaningful with a source + source: + type: string + description: Chart repository URL (Helm) or OCI/Git source reference (Kustomize) + chart: + type: string + description: Helm chart name + path: type: string - description: Helm repository URL + description: Path within the source to the kustomization (Kustomize) description: List of component references with deployment order constraints: type: object diff --git a/docs/user/cli-reference.md b/docs/user/cli-reference.md index 13335257e..20c10b5b3 100644 --- a/docs/user/cli-reference.md +++ b/docs/user/cli-reference.md @@ -492,6 +492,7 @@ criteria: os: any componentRefs: - name: gpu-operator + type: Helm version: vXX.Y.Z # illustrative; see Component Catalog for current pins source: https://helm.ngc.nvidia.com/nvidia deploymentOrder: diff --git a/pkg/bundler/bundler.go b/pkg/bundler/bundler.go index d47e157e7..c5345f2b6 100644 --- a/pkg/bundler/bundler.go +++ b/pkg/bundler/bundler.go @@ -219,6 +219,21 @@ func (b *DefaultBundler) Make(ctx context.Context, recipeResult *recipe.RecipeRe "recipe must contain at least one component reference") } + // Reject incoherent component refs (e.g. a Helm ref that also carries a + // Kustomize tag/path, which the deployers silently build as Kustomize) + // before generating anything. DefaultBundler.Make is a public entry point + // (docs/integrator/public-api.md) reachable without the CLI/server + // validation boundaries, so it must apply the same coherence gate. Validate + // a provider-preserving defensive copy — a struct copy keeps the bound + // provider, and a fresh ComponentRefs slice keeps type back-fill/ + // canonicalization from mutating the caller's RecipeResult. See #1584. + validated := *recipeResult + validated.ComponentRefs = append([]recipe.ComponentRef(nil), recipeResult.ComponentRefs...) + if err := validated.PrepareAndValidate(); err != nil { + return nil, err + } + recipeResult = &validated + enabledRefs, filteredOrder, filterErr := b.filterEnabledComponents(recipeResult) if filterErr != nil { return nil, filterErr diff --git a/pkg/bundler/bundler_test.go b/pkg/bundler/bundler_test.go index 530722f23..3e3ea2b29 100644 --- a/pkg/bundler/bundler_test.go +++ b/pkg/bundler/bundler_test.go @@ -3162,3 +3162,34 @@ func TestDynamicTolerationPathExcludedFromBakeIn(t *testing.T) { t.Errorf("daemonsets.tolerations should not be baked in when declared dynamic, got: %v", val) } } + +// TestMake_RejectsIncoherentRef verifies the public DefaultBundler.Make entry +// point rejects an incoherent ref (a Helm component carrying a Kustomize path, +// which the deployers would silently build as Kustomize) rather than producing +// a mismatched bundle. Pins issue #1584 at the direct-bundler boundary. +func TestMake_RejectsIncoherentRef(t *testing.T) { + bundler, err := New() + if err != nil { + t.Fatalf("New() error = %v", err) + } + recipeResult := &recipe.RecipeResult{ + ComponentRefs: []recipe.ComponentRef{ + {Name: "gpu-operator", Type: recipe.ComponentTypeHelm, Version: "v1", Path: "deploy"}, + }, + } + _, err = bundler.Make(context.Background(), recipeResult, t.TempDir()) + if err == nil { + t.Fatal("expected Make to reject an incoherent Helm+path ref, got nil") + } + var se *errors.StructuredError + if !stderrors.As(err, &se) { + t.Fatalf("expected *errors.StructuredError, got %T: %v", err, err) + } + if se.Code != errors.ErrCodeInvalidRequest { + t.Errorf("expected ErrCodeInvalidRequest, got %s: %v", se.Code, err) + } + // The caller's RecipeResult must not have been mutated by validation. + if got := recipeResult.ComponentRefs[0].Type; got != recipe.ComponentTypeHelm { + t.Errorf("caller's ref was mutated: type=%q", got) + } +} diff --git a/pkg/client/v1/aicr_internal_test.go b/pkg/client/v1/aicr_internal_test.go index 16364fb1b..d39418628 100644 --- a/pkg/client/v1/aicr_internal_test.go +++ b/pkg/client/v1/aicr_internal_test.go @@ -1046,3 +1046,160 @@ func TestClient_NoCacheGrowthAcrossManyCloseCycles(t *testing.T) { } } } + +// TestAdoptRecipe_RejectsIncoherentRef pins issue #1584 at the REST/adopt +// boundary: POST /v1/bundle decodes a RecipeResult and calls adoptRecipe, +// which never runs the resolver — so coherence must be enforced here. An +// incoherent ref (Helm carrying a Kustomize tag) is rejected, and a coherent +// lowercase-typed ref (the OpenAPI wire form) is accepted case-insensitively. +func TestAdoptRecipe_RejectsIncoherentRef(t *testing.T) { + t.Parallel() + + client, err := NewClient(WithRecipeSource(EmbeddedSource())) + if err != nil { + t.Fatalf("NewClient: %v", err) + } + t.Cleanup(func() { _ = client.Close() }) + + base := func(refs []recipe.ComponentRef) *recipe.RecipeResult { + return &recipe.RecipeResult{ + Kind: recipe.RecipeResultKind, + APIVersion: recipe.RecipeAPIVersion, + Criteria: &recipe.Criteria{Service: recipe.CriteriaServiceEKS}, + ComponentRefs: refs, + } + } + + // Incoherent: Helm ref carrying a Kustomize tag -> rejected. + _, err = client.adoptRecipe(t.Context(), base([]recipe.ComponentRef{ + {Name: "gpu-operator", Type: recipe.ComponentTypeHelm, Version: "v1", Tag: "v2"}, + })) + if err == nil { + t.Fatal("adoptRecipe accepted an incoherent Helm+tag ref; want ErrCodeInvalidRequest") + } + var se *aicrerrors.StructuredError + if !stderrors.As(err, &se) { + t.Fatalf("expected *aicrerrors.StructuredError, got %T: %v", err, err) + } + if se.Code != aicrerrors.ErrCodeInvalidRequest { + t.Errorf("expected ErrCodeInvalidRequest, got %s", se.Code) + } + + // Coherent, lowercase type (OpenAPI wire form) -> accepted AND canonicalized + // so downstream deployers see the canonical constant. + res, err := client.adoptRecipe(t.Context(), base([]recipe.ComponentRef{ + {Name: "gpu-operator", Type: recipe.ComponentType("helm"), Version: "v1"}, + })) + if err != nil { + t.Fatalf("adoptRecipe rejected a coherent lowercase-typed ref: %v", err) + } + if got := res.internal.ComponentRefs[0].Type; got != recipe.ComponentTypeHelm { + t.Errorf("adopted lowercase type not canonicalized: got %q, want %q", got, recipe.ComponentTypeHelm) + } + + // A type-less ref for a registry component (valid before #1584 — the + // deployers derive the type from fields) must be back-filled from the + // registry, not rejected. + res2, err := client.adoptRecipe(t.Context(), base([]recipe.ComponentRef{ + {Name: "gpu-operator", Version: "v1"}, + })) + if err != nil { + t.Fatalf("adoptRecipe rejected a type-less registry ref instead of back-filling: %v", err) + } + if got := res2.internal.ComponentRefs[0].Type; got != recipe.ComponentTypeHelm { + t.Errorf("type-less registry ref not back-filled: got %q, want %q", got, recipe.ComponentTypeHelm) + } +} + +// blockingReadFileProvider parks ReadFile of a target file (e.g. registry.yaml) +// on a signal, so a test can deterministically pin adoptRecipe inside its +// registry-backed type back-fill while a second goroutine calls Close. +type blockingReadFileProvider struct { + underlying recipe.DataProvider + target string + readStarted chan struct{} + readUnblock chan struct{} +} + +func (b *blockingReadFileProvider) ReadFile(ctx context.Context, path string) ([]byte, error) { + if strings.HasSuffix(path, b.target) { + select { + case <-b.readStarted: + default: + close(b.readStarted) + } + <-b.readUnblock + } + return b.underlying.ReadFile(ctx, path) +} + +func (b *blockingReadFileProvider) WalkDir(ctx context.Context, root string, fn fs.WalkDirFunc) error { + return b.underlying.WalkDir(ctx, root, fn) +} + +func (b *blockingReadFileProvider) Source(path string) string { return b.underlying.Source(path) } + +// TestClient_CloseDrainsInflightAdopt pins the inflight registration added for +// adoptRecipe (issue #1584): PrepareAndValidate reads the component registry to +// back-fill a missing type, so a concurrent Close must drain the adopt before +// evicting caches. A type-less gpu-operator ref forces the registry ReadFile, +// which parks; Close must block on inflight.Wait until the adopt completes. +func TestClient_CloseDrainsInflightAdopt(t *testing.T) { + t.Parallel() + + embedded := recipe.NewEmbeddedDataProvider(recipe.GetEmbeddedFS(), ".") + blockedDP := &blockingReadFileProvider{ + underlying: embedded, + target: "registry.yaml", + readStarted: make(chan struct{}), + readUnblock: make(chan struct{}), + } + c := &Client{ + builder: recipe.NewBuilder(recipe.WithDataProvider(blockedDP)), + dp: blockedDP, + } + + adoptDone := make(chan struct{}) + go func() { + defer close(adoptDone) + _, _ = c.adoptRecipe(context.Background(), &recipe.RecipeResult{ + Kind: recipe.RecipeResultKind, + APIVersion: recipe.RecipeAPIVersion, + Criteria: &recipe.Criteria{Service: recipe.CriteriaServiceEKS}, + ComponentRefs: []recipe.ComponentRef{ + {Name: "gpu-operator", Version: "v1"}, // type-less -> forces registry back-fill + }, + }) + }() + + select { + case <-blockedDP.readStarted: + case <-time.After(5 * time.Second): + t.Fatal("adoptRecipe never read registry.yaml within 5s") + } + + closeDone := make(chan struct{}) + go func() { + defer close(closeDone) + _ = c.Close() + }() + + select { + case <-closeDone: + t.Fatal("Close returned before in-flight adoptRecipe drained; inflight registration is missing") + case <-time.After(100 * time.Millisecond): + // Expected: Close parked on inflight.Wait(). + } + + close(blockedDP.readUnblock) + select { + case <-adoptDone: + case <-time.After(10 * time.Second): + t.Fatal("adoptRecipe did not complete within 10s after unblock") + } + select { + case <-closeDone: + case <-time.After(10 * time.Second): + t.Fatal("Close did not complete within 10s after adopt drained") + } +} diff --git a/pkg/client/v1/bundle.go b/pkg/client/v1/bundle.go index 01a6b6c4c..35d77e1d1 100644 --- a/pkg/client/v1/bundle.go +++ b/pkg/client/v1/bundle.go @@ -94,10 +94,11 @@ type BundleOptions struct { // takes an already-decoded RecipeResult and binds the same provider. // // Synchronization mirrors LoadRecipe: snapshot the per-Client provider under -// the read lock so a concurrent Close can't race the read. Unlike the -// resolve/load paths it does not register in the inflight WaitGroup — it does -// no cache-using work itself (the subsequent MakeBundle call does, and -// registers there). +// the read lock and register in the inflight WaitGroup so Close drains before +// evicting the cache. PrepareAndValidate reads the component registry (to +// back-fill missing types), so — unlike before #1584 — this path does do +// cache-using work of its own and must register, not only rely on the +// subsequent MakeBundle call. // // Errors: // - ErrCodeInvalidRequest when the Client, ctx, or recipe is nil, or when @@ -113,13 +114,21 @@ func (c *Client) adoptRecipe(ctx context.Context, rec *recipe.RecipeResult) (*Re return nil, errors.New(errors.ErrCodeInvalidRequest, "nil RecipeResult") } + // Snapshot the provider and register as an in-flight cache user under the + // read lock: PrepareAndValidate below reads the component registry (to + // back-fill missing types), so Close must drain this before evicting the + // cache — otherwise a concurrent Close could evict, this call repopulate, + // and we would return a result owned by a closed Client. Same protocol as + // the resolve/load paths. c.mu.RLock() if c.builder == nil { c.mu.RUnlock() return nil, errors.New(errors.ErrCodeInvalidRequest, "aicr client not initialized (or already closed)") } dp := c.dp + c.inflight.Add(1) c.mu.RUnlock() + defer c.inflight.Done() // Deep-copy the caller-supplied recipe BEFORE binding the provider. // BindDataProvider mutates the receiver's unexported provider field, and @@ -131,6 +140,19 @@ func (c *Client) adoptRecipe(ctx context.Context, rec *recipe.RecipeResult) (*Re cp := rec.DeepCopy() cp.BindDataProvider(dp) + // An adopted RecipeResult is decoded from an external source (e.g. the + // POST /v1/bundle body) and never passes through the resolver, so + // canonicalize its component types and validate coherence here — otherwise + // an incoherent ref (e.g. Helm + Kustomize tag/path) would deploy as a + // different type than declared and mismatch the attestation BOM, and a + // lowercase type would deploy inconsistently downstream. PrepareAndValidate + // back-fills missing types from the registry first (this boundary does not + // run ApplyRegistryDefaults) so a type-less registry ref — valid before + // #1584 — still resolves rather than being rejected. See issue #1584. + if err := cp.PrepareAndValidate(); err != nil { + return nil, err + } + result, err := loadedResultFromInternal(cp) if err != nil { return nil, err diff --git a/pkg/recipe/componentref_coherence_test.go b/pkg/recipe/componentref_coherence_test.go new file mode 100644 index 000000000..cc67f28d5 --- /dev/null +++ b/pkg/recipe/componentref_coherence_test.go @@ -0,0 +1,235 @@ +// Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package recipe + +import ( + "context" + stderrors "errors" + "io/fs" + "strings" + "testing" + + "github.com/NVIDIA/aicr/pkg/errors" +) + +func TestComponentRefCoherenceProblem(t *testing.T) { + tests := []struct { + name string + ref ComponentRef + wantBad bool + }{ + { + name: "helm coherent", + ref: ComponentRef{Name: "a", Type: ComponentTypeHelm, Source: "https://charts", Chart: "a", Version: "v1"}, + }, + { + name: "helm carries kustomize tag", + ref: ComponentRef{Name: "a", Type: ComponentTypeHelm, Version: "v1", Tag: "v2"}, + wantBad: true, + }, + { + name: "helm carries kustomize path", + ref: ComponentRef{Name: "a", Type: ComponentTypeHelm, Version: "v1", Path: "deploy"}, + wantBad: true, + }, + { + name: "kustomize coherent local path", + ref: ComponentRef{Name: "a", Type: ComponentTypeKustomize, Path: "deploy"}, + }, + { + name: "kustomize coherent git tag+source+path", + ref: ComponentRef{Name: "a", Type: ComponentTypeKustomize, Source: "git://x", Tag: "v1", Path: "deploy"}, + }, + { + name: "kustomize missing path", + ref: ComponentRef{Name: "a", Type: ComponentTypeKustomize, Source: "git://x", Tag: "v1"}, + wantBad: true, + }, + { + name: "kustomize tag without source", + ref: ComponentRef{Name: "a", Type: ComponentTypeKustomize, Tag: "v1", Path: "deploy"}, + wantBad: true, + }, + { + name: "kustomize with post-manifests", + ref: ComponentRef{Name: "a", Type: ComponentTypeKustomize, Path: "deploy", ManifestFiles: []string{"extra.yaml"}}, + wantBad: true, + }, + { + name: "kustomize with pre-manifests is allowed", + ref: ComponentRef{Name: "a", Type: ComponentTypeKustomize, Path: "deploy", PreManifestFiles: []string{"ns.yaml"}}, + }, + { + name: "empty type", + ref: ComponentRef{Name: "a", Version: "v1"}, + wantBad: true, + }, + { + name: "unknown type", + ref: ComponentRef{Name: "a", Type: ComponentType("flux")}, + wantBad: true, + }, + { + // The REST wire format / OpenAPI example uses lowercase; it must be + // accepted case-insensitively, not rejected as an unsupported type. + name: "lowercase helm is accepted", + ref: ComponentRef{Name: "a", Type: ComponentType("helm"), Version: "v1"}, + }, + { + name: "lowercase kustomize is accepted", + ref: ComponentRef{Name: "a", Type: ComponentType("kustomize"), Path: "deploy"}, + }, + { + // Case-insensitivity does not weaken the rules: a lowercase Helm ref + // still may not carry Kustomize fields. + name: "lowercase helm still rejects tag", + ref: ComponentRef{Name: "a", Type: ComponentType("helm"), Version: "v1", Tag: "v2"}, + wantBad: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := tt.ref.coherenceProblem() + if (got != "") != tt.wantBad { + t.Fatalf("coherenceProblem() = %q, wantBad=%v", got, tt.wantBad) + } + }) + } +} + +func TestRecipeResultValidateCoherence(t *testing.T) { + // Coherent set → no error. + ok := &RecipeResult{ComponentRefs: []ComponentRef{ + {Name: "h", Type: ComponentTypeHelm, Version: "v1"}, + {Name: "k", Type: ComponentTypeKustomize, Path: "deploy"}, + }} + if err := ok.ValidateCoherence(); err != nil { + t.Fatalf("coherent refs returned error: %v", err) + } + + // Two incoherent refs → single ErrCodeInvalidRequest naming both. + bad := &RecipeResult{ComponentRefs: []ComponentRef{ + {Name: "h", Type: ComponentTypeHelm, Tag: "v2"}, + {Name: "k", Type: ComponentTypeKustomize, Source: "git://x", Tag: "v1"}, // no path + }} + err := bad.ValidateCoherence() + if err == nil { + t.Fatal("expected error for incoherent refs, got nil") + } + if !stderrors.Is(err, errors.New(errors.ErrCodeInvalidRequest, "")) { + t.Errorf("want ErrCodeInvalidRequest, got %v", err) + } + for _, want := range []string{"\"h\"", "\"k\""} { + if !strings.Contains(err.Error(), want) { + t.Errorf("error %q should name offending ref %s", err.Error(), want) + } + } + + // A DISABLED incoherent ref is skipped (excluded from the bundle). + disabled := &RecipeResult{ComponentRefs: []ComponentRef{ + {Name: "h", Type: ComponentTypeHelm, Tag: "v2", Overrides: map[string]any{"enabled": false}}, + }} + if err := disabled.ValidateCoherence(); err != nil { + t.Errorf("disabled incoherent ref should be skipped, got: %v", err) + } + + // nil receiver is safe. + var nilResult *RecipeResult + if err := nilResult.ValidateCoherence(); err != nil { + t.Errorf("nil RecipeResult should validate clean, got: %v", err) + } +} + +// failingRegistryProvider errors on the registry.yaml read, to verify that a +// registry load failure during type back-fill is propagated (as ErrCodeInternal) +// rather than swallowed into a misleading "unsupported type" INVALID_REQUEST. +type failingRegistryProvider struct{} + +func (failingRegistryProvider) ReadFile(_ context.Context, path string) ([]byte, error) { + return nil, errors.New(errors.ErrCodeInternal, "boom: cannot read "+path) +} +func (failingRegistryProvider) WalkDir(_ context.Context, _ string, _ fs.WalkDirFunc) error { + return errors.New(errors.ErrCodeInternal, "boom: walk") +} +func (failingRegistryProvider) Source(path string) string { return path } + +func TestPrepareAndValidate_PropagatesRegistryError(t *testing.T) { + // A type-less ref needs the registry; if the registry read fails, the + // caller must see the underlying (retryable) internal error, not a + // non-retryable "unsupported type" rejection. + r := &RecipeResult{ + provider: failingRegistryProvider{}, + ComponentRefs: []ComponentRef{{Name: "gpu-operator", Version: "v1"}}, // type-less + } + err := r.PrepareAndValidate() + if err == nil { + t.Fatal("expected an error when the registry read fails during back-fill") + } + var se *errors.StructuredError + if !stderrors.As(err, &se) { + t.Fatalf("expected *errors.StructuredError, got %T: %v", err, err) + } + if se.Code != errors.ErrCodeInternal { + t.Errorf("expected ErrCodeInternal (registry failure), got %s: %v", se.Code, err) + } + + // When no ref needs back-fill, the registry is never touched, so a failing + // provider does not cause an error. + ok := &RecipeResult{ + provider: failingRegistryProvider{}, + ComponentRefs: []ComponentRef{{Name: "gpu-operator", Type: ComponentTypeHelm, Version: "v1"}}, + } + if err := ok.PrepareAndValidate(); err != nil { + t.Errorf("no back-fill needed, but got error (registry should not be read): %v", err) + } + + // A type-less DISABLED stub must not force a registry load: ValidateCoherence + // skips disabled refs, so a registry failure caused solely by an irrelevant + // disabled ref must not fail an otherwise-usable recipe. + disabledStub := &RecipeResult{ + provider: failingRegistryProvider{}, + ComponentRefs: []ComponentRef{ + {Name: "enabled-helm", Type: ComponentTypeHelm, Version: "v1"}, + {Name: "legacy-stub", Overrides: map[string]any{"enabled": false}}, // type-less + disabled + }, + } + if err := disabledStub.PrepareAndValidate(); err != nil { + t.Errorf("disabled type-less stub should not trigger registry back-fill, got: %v", err) + } +} + +// TestPrepareAndValidate_BackfillLoopSkipsDisabled exercises the back-fill LOOP +// (not just the pre-scan): with a working registry, an enabled type-less ref is +// back-filled while a disabled type-less ref in the same result is left +// untouched and does not cause a rejection. +func TestPrepareAndValidate_BackfillLoopSkipsDisabled(t *testing.T) { + dp := NewEmbeddedDataProvider(GetEmbeddedFS(), ".") + r := &RecipeResult{ + provider: dp, + ComponentRefs: []ComponentRef{ + {Name: "gpu-operator", Version: "v1"}, // enabled + type-less -> back-filled + {Name: "network-operator", Overrides: map[string]any{"enabled": false}}, // disabled + type-less registry component -> loop must skip + }, + } + if err := r.PrepareAndValidate(); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got := r.ComponentRefs[0].Type; got != ComponentTypeHelm { + t.Errorf("enabled type-less ref not back-filled: got %q, want %q", got, ComponentTypeHelm) + } + if got := r.ComponentRefs[1].Type; got != "" { + t.Errorf("disabled type-less ref should be left untouched, got type %q", got) + } +} diff --git a/pkg/recipe/loader.go b/pkg/recipe/loader.go index 32ceef79b..3c064a3ae 100644 --- a/pkg/recipe/loader.go +++ b/pkg/recipe/loader.go @@ -106,5 +106,16 @@ func LoadFromFileWithProvider(ctx context.Context, path, kubeconfig, version str inputAPIVersion, header.GroupVersion)) } + // Back-fill missing types from the registry (this boundary does not run + // ApplyRegistryDefaults), canonicalize case, then reject incoherent refs. A + // hydrated RecipeResult read from disk bypasses finalizeRecipeResult, so + // without this a hand-authored recipe with e.g. a Helm ref carrying a + // Kustomize tag/path would reach the bundler/attestation unchecked, and a + // lowercase type would deploy inconsistently — while a type-less registry + // ref (valid before #1584) must still resolve, not be rejected. See #1584. + if err := rec.PrepareAndValidate(); err != nil { + return nil, err + } + return rec, nil } diff --git a/pkg/recipe/metadata.go b/pkg/recipe/metadata.go index 295e5bd48..e01ba17af 100644 --- a/pkg/recipe/metadata.go +++ b/pkg/recipe/metadata.go @@ -217,6 +217,183 @@ func (ref *ComponentRef) ApplyRegistryDefaults(config *ComponentConfig) { // DataProvider is available. See issue #1219. } +// coherenceProblem reports why a resolved ComponentRef's deployment-shape +// fields are internally inconsistent, or "" if the ref is coherent. The rules +// mirror what the deployers enforce so an incoherent ref is rejected at +// resolution rather than silently deploying as a different type (or producing +// a signed attestation whose metadata does not match what deploys): +// +// - the deployers do not trust the declared Type — Helm/Helmfile/ArgoCD drop +// it, and localformat classifies any ref carrying a Tag or Path as +// Kustomize (see pkg/bundler/deployer/localformat classify/write) — so a +// Helm ref must not carry Kustomize fields; +// - a Kustomize ref needs a Path to build from; and +// - a Tag is only meaningful with a Source (git repo / OCI ref). +// +// Keep these in lockstep with the localformat rules. +func (ref *ComponentRef) coherenceProblem() string { + hasTag, hasPath := ref.Tag != "", ref.Path != "" + // Match the type case-insensitively: the resolver emits the canonical + // ComponentType ("Helm"/"Kustomize"), but the REST wire format and + // hand-authored recipes may use lowercase (see the /v1/bundle OpenAPI + // example), and the deployers classify by tag/path rather than by this + // field — so "helm" and "Helm" must be treated the same here. + switch { + case strings.EqualFold(string(ref.Type), string(ComponentTypeHelm)): + if hasTag || hasPath { + return fmt.Sprintf("component %q is Helm but carries Kustomize field(s) (tag=%q, path=%q); "+ + "the deployers classify any ref with a tag or path as Kustomize, so it would deploy as a "+ + "different type than declared", ref.Name, ref.Tag, ref.Path) + } + case strings.EqualFold(string(ref.Type), string(ComponentTypeKustomize)): + if !hasPath { + return fmt.Sprintf("component %q is Kustomize but has no path; a path is required to build from", ref.Name) + } + if hasTag && ref.Source == "" { + return fmt.Sprintf("component %q is Kustomize with tag %q but no source; a tag is only "+ + "meaningful with a git source", ref.Name, ref.Tag) + } + // A Kustomize ref wraps a single primary source; the deployers reject a + // ref that also carries post-manifests (ManifestFiles). PreManifestFiles + // are pre-injected separately and remain supported. + if len(ref.ManifestFiles) > 0 { + return fmt.Sprintf("component %q is Kustomize but also declares manifestFiles; a component may "+ + "declare either Kustomize (tag/path) or raw manifest files, not both", ref.Name) + } + default: + // After ApplyRegistryDefaults every registry-backed ref has a supported + // Type; an empty or unknown Type here means an externally-supplied ref + // the registry did not populate. The deployers classify by tag/path, not + // by this field, so an unsupported Type would deploy ambiguously — fail + // closed rather than silently accept it. + return fmt.Sprintf("component %q has unsupported type %q; expected %q or %q", + ref.Name, ref.Type, ComponentTypeHelm, ComponentTypeKustomize) + } + return "" +} + +// canonicalizeComponentTypes normalizes each ref's case-insensitively-matched +// Type to the canonical ComponentType constant ("helm" -> "Helm"), so registry +// defaulting (which switches on the exact constant) and the deployers (Flux +// rejects a lowercase "helm", ArgoCD-Helm mis-handles a lowercase "kustomize") +// all see a consistent value. Unknown types are left unchanged for the +// coherence check to reject. Call this at every boundary that produces a +// RecipeResult, before defaulting and before returning the result. +func canonicalizeComponentTypes(refs []ComponentRef) { + for i := range refs { + switch { + case strings.EqualFold(string(refs[i].Type), string(ComponentTypeHelm)): + refs[i].Type = ComponentTypeHelm + case strings.EqualFold(string(refs[i].Type), string(ComponentTypeKustomize)): + refs[i].Type = ComponentTypeKustomize + } + } +} + +// backfillComponentTypes sets ref.Type from the registry component's type for +// each ENABLED ref that has no explicit type, using this result's bound +// DataProvider's registry. It mirrors what ApplyRegistryDefaults does on the +// resolve path, so a hand-authored or hydrated recipe that omits `type` — +// valid before #1584, since the deployers derive the type from the ref's +// fields — is not rejected by ValidateCoherence. It is the first step of +// PrepareAndValidate (the load and adopt boundaries do not run +// ApplyRegistryDefaults). Disabled refs are ignored (they are excluded from the +// bundle and skipped by ValidateCoherence, so a disabled type-less stub must +// not trigger a registry load), and the registry is not consulted at all when +// no enabled ref needs a type. Non-registry components are left untouched +// (their empty type still fails closed). +func (r *RecipeResult) backfillComponentTypes() error { + if r == nil { + return nil + } + // Only touch the registry if an ENABLED ref actually needs a type. This + // avoids a spurious registry load (and its potential error) for the common + // case where every ref already declares a type — and, critically, for a + // disabled legacy stub with an empty type: ValidateCoherence skips disabled + // refs, so back-filling one must not be able to fail the whole recipe on a + // registry error when no enabled ref needs it. + needsBackfill := false + for i := range r.ComponentRefs { + if r.ComponentRefs[i].Type == "" && r.ComponentRefs[i].IsEnabled() { + needsBackfill = true + break + } + } + if !needsBackfill { + return nil + } + // A type-less ref needs the registry to resolve its type; propagate a + // load/parse/timeout failure as-is rather than swallowing it and letting + // ValidateCoherence report a misleading, non-retryable "unsupported type". + registry, err := GetComponentRegistryFor(r.provider) + if err != nil { + return errors.PropagateOrWrap(err, errors.ErrCodeInternal, + "failed to load component registry to back-fill component types") + } + for i := range r.ComponentRefs { + if r.ComponentRefs[i].Type != "" || !r.ComponentRefs[i].IsEnabled() { + continue + } + if cfg := registry.Get(r.ComponentRefs[i].Name); cfg != nil { + r.ComponentRefs[i].Type = cfg.GetType() + } + } + return nil +} + +// PrepareAndValidate normalizes a RecipeResult's component refs and rejects +// incoherent ones, in the required order: back-fill missing types on enabled +// refs from the registry, canonicalize the type casing, then validate +// coherence (which itself only inspects enabled refs). Boundaries +// that produce a RecipeResult WITHOUT running ApplyRegistryDefaults — file load +// (LoadFromFileWithProvider) and external adoption (client adoptRecipe) — call +// this single method so the three steps cannot drift or be partially applied +// (e.g. validating before canonicalizing would reject legitimate lowercase +// types; skipping the back-fill would reject type-less registry refs). The +// resolve path (finalizeRecipeResult) instead back-fills via ApplyRegistryDefaults +// and canonicalizes before defaulting, so it calls ValidateCoherence directly. +func (r *RecipeResult) PrepareAndValidate() error { + if r == nil { + return nil + } + if err := r.backfillComponentTypes(); err != nil { + return err + } + canonicalizeComponentTypes(r.ComponentRefs) + return r.ValidateCoherence() +} + +// ValidateCoherence rejects enabled ComponentRefs whose deployment-shape fields +// are internally inconsistent (see coherenceProblem), aggregating every +// offender into one ErrCodeInvalidRequest so the author sees all problems at +// once. Disabled refs are skipped: they are excluded from the bundle, so their +// shape never reaches a deployer. +// +// It is invoked at every boundary that produces a RecipeResult — criteria +// resolution (finalizeRecipeResult), file load (LoadFromFileWithProvider), and +// external adoption (client adoptRecipe / POST /v1/bundle) — so an incoherent +// ref cannot slip in via a hand-authored or decoded hydrated recipe. +func (r *RecipeResult) ValidateCoherence() error { + if r == nil { + return nil + } + var problems []string + for i := range r.ComponentRefs { + if !r.ComponentRefs[i].IsEnabled() { + continue + } + if p := r.ComponentRefs[i].coherenceProblem(); p != "" { + problems = append(problems, p) + } + } + if len(problems) == 0 { + return nil + } + sort.Strings(problems) + return errors.New(errors.ErrCodeInvalidRequest, + "recipe has incoherent component ref(s): "+strings.Join(problems, "; ")) +} + // ExpectedResource represents a Kubernetes resource that should exist after deployment. type ExpectedResource struct { // Kind is the resource kind (e.g., "Deployment", "DaemonSet"). diff --git a/pkg/recipe/metadata_store.go b/pkg/recipe/metadata_store.go index cb983bed3..57f187b87 100644 --- a/pkg/recipe/metadata_store.go +++ b/pkg/recipe/metadata_store.go @@ -935,6 +935,11 @@ func finalizeRecipeResult(provider DataProvider, criteria *Criteria, mergedSpec return nil, aicrerrors.Wrap(aicrerrors.ErrCodeInternal, "failed to compute deployment order", err) } + // Canonicalize case-insensitively-matched types before defaulting, so the + // exact-type switch in ApplyRegistryDefaults (and downstream deployers) see + // the canonical constant. See issue #1584. + canonicalizeComponentTypes(mergedSpec.ComponentRefs) + if err := applyRegistryDefaults(provider, mergedSpec.ComponentRefs); err != nil { return nil, err } @@ -951,6 +956,16 @@ func finalizeRecipeResult(provider DataProvider, criteria *Criteria, mergedSpec } result.Metadata.AppliedOverlays = appliedOverlays + // Reject refs whose deployment-shape fields are incoherent (e.g. a Helm ref + // that also carries a Kustomize tag/path), after defaults populate Type. + // The same check also runs at the load and adopt boundaries (see + // RecipeResult.ValidateCoherence callers) so externally-supplied hydrated + // RecipeResults — the bundle/validate -r and POST /v1/bundle paths — are + // covered too, not only criteria-resolved recipes. See issue #1584. + if err := result.ValidateCoherence(); err != nil { + return nil, err + } + return result, nil } diff --git a/pkg/server/bundle_handler_test.go b/pkg/server/bundle_handler_test.go index 45b654901..98412eda2 100644 --- a/pkg/server/bundle_handler_test.go +++ b/pkg/server/bundle_handler_test.go @@ -69,3 +69,22 @@ func TestBundleHandler_EmptyComponentRefs(t *testing.T) { t.Errorf("status = %d, want %d. Body: %s", w.Code, http.StatusBadRequest, w.Body.String()) } } + +// TestBundleHandler_IncoherentComponentRef verifies the HTTP decode-to-bundle +// path rejects an incoherent ref (a Helm component carrying a Kustomize tag) +// with 400 rather than producing a mismatched bundle. Pins issue #1584 at the +// POST /v1/bundle boundary. +func TestBundleHandler_IncoherentComponentRef(t *testing.T) { + t.Parallel() + h := newTestBundleHandler(t) + + body := `{"apiVersion": "aicr.run/v1alpha2", "kind": "Recipe", "componentRefs": [` + + `{"name": "gpu-operator", "type": "Helm", "version": "v1", "tag": "v2"}]}` + req := httptest.NewRequest(http.MethodPost, "/v1/bundle", strings.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + h.HandleBundles(w, req) + if w.Code != http.StatusBadRequest { + t.Errorf("status = %d, want %d. Body: %s", w.Code, http.StatusBadRequest, w.Body.String()) + } +}