diff --git a/pipelines/types/dunder.go b/pipelines/types/dunder.go new file mode 100644 index 0000000..2e04a1b --- /dev/null +++ b/pipelines/types/dunder.go @@ -0,0 +1,115 @@ +// Copyright 2025 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package types + +import ( + "fmt" + "reflect" + "strings" + + "github.com/Azure/ARO-Tools/config/types" +) + +// PlaceholderGenerator returns, for a given dotted-path key and the leaf's +// reflect.Type, a pair of strings used by EV2Mapping: +// +// - flattenedKey: the value used as a map key in the flat "placeholder -> +// dotted path" output map. Most generators set this equal to replaceVar. +// - replaceVar: the string substituted into the deep-copied configuration +// tree at the leaf's position. +// +// The leaf type is provided so type-aware generators (for example a Bicep +// parameter generator that must wrap non-strings) can branch on Kind. A nil +// reflect.Type means the leaf's runtime type was not recoverable; generators +// must tolerate this. +type PlaceholderGenerator func(key []string, valueType reflect.Type) (flattenedKey string, replaceVar string) + +// NewDunderPlaceholders returns a PlaceholderGenerator that produces the +// "dunder" placeholder format used by sdp-pipelines for pipeline.yaml schema +// validation: each leaf is replaced with a string of the form "____". +// +// The output format is intentionally consistent with DefaultPlaceholderPattern +// ("^__.+__$"), so EV2Mapping(typedCfg, NewDunderPlaceholders(), nil) produces +// a Configuration that ValidatePipelineSchemaWithOptions(content, +// WithAllowPlaceholders("")) will accept on every non-string scalar field. +// +// Example: +// +// key := []string{"foo", "bar"} +// flattenedKey, replaceVar := NewDunderPlaceholders()(key, nil) +// // flattenedKey and replaceVar will both be "__foo.bar__" +func NewDunderPlaceholders() PlaceholderGenerator { + return func(key []string, _ reflect.Type) (flattenedKey string, replaceVar string) { + flattenedKey = fmt.Sprintf("__%s__", strings.Join(key, ".")) + replaceVar = flattenedKey + return + } +} + +// EV2Mapping walks a nested configuration tree and returns: +// +// 1. a flat map of placeholder -> dotted key path (e.g. "__foo.bar__" -> "foo.bar"), +// 2. a deep copy of the input tree where each scalar leaf has been replaced +// by the placeholder string produced by placeholderGenerator. +// +// The walker descends into nested map[string]any values. All other values — +// including []any — are treated as scalar leaves and replaced by a single +// placeholder. This is deliberate: per ARO-HCP configuration policy, arrays in +// per-region configuration are not supported (see +// https://github.com/Azure/ARO-HCP/blob/main/docs/configuration.md#limitations +// — "Avoid using arrays in configuration. Instead, represent arrays as a list +// of comma separated values"). Treating slices as opaque scalars keeps the +// dunder output consistent with Ev2's static-manifest-up-front model. +// +// The prefix argument seeds the dotted-path key for the recursion (callers +// typically pass nil). +// +// Example: +// +// input := types.Configuration{ +// "ev2": map[string]any{"replicas": 3, "name": "svc"}, +// "registries": "quay.io,registry.redhat.io", // CSV per the array-policy +// } +// flat, dunder := EV2Mapping(input, NewDunderPlaceholders(), nil) +// // flat == map[string]string{ +// // "__ev2.replicas__": "ev2.replicas", +// // "__ev2.name__": "ev2.name", +// // "__registries__": "registries", +// // } +// // dunder == map[string]any{ +// // "ev2": map[string]any{"replicas": "__ev2.replicas__", "name": "__ev2.name__"}, +// // "registries": "__registries__", +// // } +func EV2Mapping(input types.Configuration, placeholderGenerator PlaceholderGenerator, prefix []string) (map[string]string, map[string]interface{}) { + output := map[string]string{} + replaced := map[string]interface{}{} + for key, value := range input { + nestedKey := make([]string, 0, len(prefix)+1) + nestedKey = append(nestedKey, prefix...) + nestedKey = append(nestedKey, key) + if typed, ok := value.(map[string]any); ok { + flattened, replacement := EV2Mapping(typed, placeholderGenerator, nestedKey) + for index, what := range flattened { + output[index] = what + } + replaced[key] = replacement + continue + } + flattenedKey, replaceVar := placeholderGenerator(nestedKey, reflect.TypeOf(value)) + output[flattenedKey] = strings.Join(nestedKey, ".") + replaced[key] = replaceVar + } + return output, replaced +} diff --git a/pipelines/types/dunder_test.go b/pipelines/types/dunder_test.go new file mode 100644 index 0000000..d631fd1 --- /dev/null +++ b/pipelines/types/dunder_test.go @@ -0,0 +1,251 @@ +// Copyright 2025 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package types + +import ( + "reflect" + "regexp" + "testing" + + "github.com/google/go-cmp/cmp" + + "github.com/Azure/ARO-Tools/config/types" +) + +// TestNewDunderPlaceholders pins the dunder generator's output format. The +// flattened-key and replace-variable are both "____"; the leaf +// reflect.Type is ignored. +func TestNewDunderPlaceholders(t *testing.T) { + cases := []struct { + name string + key []string + valueType reflect.Type + want string + }{ + { + name: "single segment", + key: []string{"foo"}, + valueType: nil, + want: "__foo__", + }, + { + name: "two segments", + key: []string{"foo", "bar"}, + valueType: nil, + want: "__foo.bar__", + }, + { + name: "deep path", + key: []string{"a", "b", "c", "d"}, + valueType: nil, + want: "__a.b.c.d__", + }, + { + name: "value type does not change output", + key: []string{"foo", "bar"}, + valueType: reflect.TypeOf(42), + want: "__foo.bar__", + }, + } + + gen := NewDunderPlaceholders() + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + flattenedKey, replaceVar := gen(tc.key, tc.valueType) + if flattenedKey != tc.want { + t.Errorf("flattenedKey: got %q, want %q", flattenedKey, tc.want) + } + if replaceVar != tc.want { + t.Errorf("replaceVar: got %q, want %q", replaceVar, tc.want) + } + }) + } +} + +// TestDunderMatchesDefaultPattern is the regex-contract test: every placeholder +// emitted by NewDunderPlaceholders must match DefaultPlaceholderPattern, so +// that a Configuration produced by EV2Mapping(_, NewDunderPlaceholders(), nil) +// is accepted by ValidatePipelineSchemaWithOptions(_, WithAllowPlaceholders("")). +// This test fails loudly if the producer or the validator's default pattern +// ever drift apart. +func TestDunderMatchesDefaultPattern(t *testing.T) { + re, err := regexp.Compile(DefaultPlaceholderPattern) + if err != nil { + t.Fatalf("DefaultPlaceholderPattern is not a valid regexp: %v", err) + } + + gen := NewDunderPlaceholders() + keys := [][]string{ + {"top"}, + {"top", "nested"}, + {"top", "nested", "deeper"}, + {"with_underscore", "and.dots", "trailing"}, + } + for _, k := range keys { + _, v := gen(k, nil) + if !re.MatchString(v) { + t.Errorf("NewDunderPlaceholders(%v) produced %q which does not match DefaultPlaceholderPattern %q", + k, v, DefaultPlaceholderPattern) + } + } +} + +// TestEV2Mapping pins the map-only walker behavior. Scalar leaves (string, +// number, bool) are replaced with a placeholder; nested map[string]any values +// are recursed into; non-map non-string values (including []any) are treated +// as opaque scalars per the array-policy described on EV2Mapping's doc +// comment. +func TestEV2Mapping(t *testing.T) { + tests := []struct { + name string + input types.Configuration + expectedFlattened map[string]string + expectedReplace map[string]interface{} + }{ + { + name: "flat scalars", + input: types.Configuration{ + "key1": "value1", + "key2": 42, + "key3": true, + }, + expectedFlattened: map[string]string{ + "__key1__": "key1", + "__key2__": "key2", + "__key3__": "key3", + }, + expectedReplace: map[string]interface{}{ + "key1": "__key1__", + "key2": "__key2__", + "key3": "__key3__", + }, + }, + { + name: "nested maps recurse", + input: types.Configuration{ + "parent": map[string]interface{}{ + "nested": "nestedvalue", + "nestedInt": 42, + "deeper": map[string]interface{}{ + "deepest": "deepestvalue", + }, + }, + }, + expectedFlattened: map[string]string{ + "__parent.nested__": "parent.nested", + "__parent.nestedInt__": "parent.nestedInt", + "__parent.deeper.deepest__": "parent.deeper.deepest", + }, + expectedReplace: map[string]interface{}{ + "parent": map[string]interface{}{ + "nested": "__parent.nested__", + "nestedInt": "__parent.nestedInt__", + "deeper": map[string]interface{}{ + "deepest": "__parent.deeper.deepest__", + }, + }, + }, + }, + { + name: "mixed flat and nested", + input: types.Configuration{ + "key1": "value1", + "parent": map[string]interface{}{ + "nested": "nestedvalue", + }, + }, + expectedFlattened: map[string]string{ + "__key1__": "key1", + "__parent.nested__": "parent.nested", + }, + expectedReplace: map[string]interface{}{ + "key1": "__key1__", + "parent": map[string]interface{}{ + "nested": "__parent.nested__", + }, + }, + }, + { + name: "slice values are treated as opaque scalars (no per-element placeholders)", + input: types.Configuration{ + "registries": []any{"quay.io", "registry.redhat.io"}, + }, + expectedFlattened: map[string]string{ + "__registries__": "registries", + }, + expectedReplace: map[string]interface{}{ + "registries": "__registries__", + }, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + flattened, replace := EV2Mapping(tc.input, NewDunderPlaceholders(), nil) + if diff := cmp.Diff(tc.expectedFlattened, flattened); diff != "" { + t.Errorf("flattened mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(tc.expectedReplace, replace); diff != "" { + t.Errorf("replace mismatch (-want +got):\n%s", diff) + } + }) + } +} + +// TestEV2MappingPrefix verifies that a non-nil prefix seeds the dotted path +// for every emitted placeholder. +func TestEV2MappingPrefix(t *testing.T) { + input := types.Configuration{ + "key": "value", + } + flattened, replace := EV2Mapping(input, NewDunderPlaceholders(), []string{"root", "branch"}) + wantFlat := map[string]string{ + "__root.branch.key__": "root.branch.key", + } + wantReplace := map[string]interface{}{ + "key": "__root.branch.key__", + } + if diff := cmp.Diff(wantFlat, flattened); diff != "" { + t.Errorf("flattened mismatch (-want +got):\n%s", diff) + } + if diff := cmp.Diff(wantReplace, replace); diff != "" { + t.Errorf("replace mismatch (-want +got):\n%s", diff) + } +} + +// TestEV2MappingDoesNotMutatePrefix guards against a regression where the +// recursive walker appends to a shared backing array, leaking sibling-key +// suffixes into nested placeholders. With map-iteration order randomised, a +// shared-prefix bug would surface as flaky test failures rather than a +// deterministic one — running the walker many times against a single tree +// makes a regression overwhelmingly likely to be caught. +func TestEV2MappingDoesNotMutatePrefix(t *testing.T) { + input := types.Configuration{ + "alpha": "a", + "beta": "b", + "gamma": map[string]any{"delta": "d"}, + } + want := map[string]string{ + "__alpha__": "alpha", + "__beta__": "beta", + "__gamma.delta__": "gamma.delta", + } + for i := 0; i < 32; i++ { + got, _ := EV2Mapping(input, NewDunderPlaceholders(), nil) + if diff := cmp.Diff(want, got); diff != "" { + t.Fatalf("iteration %d: flattened mismatch (-want +got):\n%s", i, diff) + } + } +} diff --git a/pipelines/types/integration_test.go b/pipelines/types/integration_test.go new file mode 100644 index 0000000..e1711e5 --- /dev/null +++ b/pipelines/types/integration_test.go @@ -0,0 +1,165 @@ +// Copyright 2025 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package types + +import ( + "strings" + "testing" + + "github.com/Azure/ARO-Tools/config" + "github.com/Azure/ARO-Tools/config/types" +) + +// pipelineTemplateWithScalarRefs is a pipeline.yaml template that exercises +// every non-string scalar field that pipeline.schema.v1 declares on a Shell +// step (omitFromServiceGroupCompletion: bool, automatedRetry.maximumRetryCount: +// integer) plus a string field. It is rendered twice in the end-to-end test: +// once with a "dunder configuration" produced by EV2Mapping(NewDunderPlaceholders), +// once with the real typed configuration, so the placeholder + strict modes +// can be compared head-to-head. +const pipelineTemplateWithScalarRefs = `$schema: pipeline.schema.v1 +serviceGroup: Microsoft.Azure.ARO.Test +rolloutName: Test Rollout +resourceGroups: +- name: test + resourceGroup: {{ .rg.name }} + subscription: {{ .rg.sub }} + steps: + - name: deploy + action: Shell + command: echo hello + omitFromServiceGroupCompletion: {{ .step.omit }} + automatedRetry: + maximumRetryCount: {{ .step.retry }} +` + +// TestEndToEndPlaceholderValidation is the producer + validator integration +// test asked for in the PR #237 review. It demonstrates the contract that +// EV2Mapping(NewDunderPlaceholders) (producer, moved into this package by +// this PR) and ValidatePipelineSchemaWithOptions(WithAllowPlaceholders) +// (validator, added by this PR) agree on the same placeholder convention. +// +// The flow tested mirrors the sdp-pipelines dunder precompile pass: +// +// 1. A typed configuration (real values) is run through EV2Mapping with the +// dunder generator to produce a dunder configuration whose every leaf is +// replaced with a "__path__" placeholder string. +// 2. A pipeline.yaml template referencing scalar fields (string, bool, +// integer) is rendered with that dunder configuration; the resulting YAML +// carries placeholder strings in fields the schema declares as +// non-string scalars. +// 3. ValidatePipelineSchemaWithOptions(rendered, WithAllowPlaceholders("")) +// accepts that YAML. +// +// The test also pins two regression guards: strict-mode validation must +// reject the same dunderized YAML (so placeholder mode never becomes the +// default), and rendering the same template with the real typed +// configuration must always pass strict-mode validation (so the EV2Mapping +// move-in did not change non-placeholder rendering behavior). +// +// Scope note: this test exercises the schema-validation surface only, which +// is the exact surface PR #237 changes. NewPipelineFromBytes additionally +// unmarshals the rendered YAML into the typed *Pipeline struct, and that +// unmarshal step is unchanged by this PR — callers that pass placeholder +// strings into non-string Go fields will still see unmarshal errors. The +// sdp-pipelines call site that opts into placeholder mode must therefore use +// the rendered-bytes form (ValidatePipelineSchemaWithOptions, exercised +// here) rather than the typed-struct form (NewPipelineFromBytes) for its +// dunder precompile pass. The real-config strict path through +// NewPipelineFromBytes is exercised below to confirm ordinary (non-dunder) +// usage remains intact. +// +// Per ARO-HCP configuration policy +// (https://github.com/Azure/ARO-HCP/blob/main/docs/configuration.md#limitations), +// arrays are not supported in per-region configuration, so the fixture uses +// scalar leaves only. +func TestEndToEndPlaceholderValidation(t *testing.T) { + typedConfig := types.Configuration{ + "rg": map[string]any{ + "name": "test-rg", + "sub": "test-sub", + }, + "step": map[string]any{ + "omit": true, + "retry": 3, + }, + } + + flat, dunderRaw := EV2Mapping(typedConfig, NewDunderPlaceholders(), nil) + + // Producer self-check: every leaf in the typed configuration must produce + // a corresponding entry in the flat map, including the non-string-scalar + // leaves that placeholder mode exists to admit. + for _, want := range []string{"__rg.name__", "__rg.sub__", "__step.omit__", "__step.retry__"} { + if _, ok := flat[want]; !ok { + t.Fatalf("EV2Mapping flat map missing %q; got keys %v", want, keysOf(flat)) + } + } + + dunderCfg := types.Configuration(dunderRaw) + + dunderBytes, err := config.PreprocessContent([]byte(pipelineTemplateWithScalarRefs), dunderCfg) + if err != nil { + t.Fatalf("PreprocessContent(dunderCfg) must succeed, got: %v", err) + } + + // Validator self-check: the dunder-rendered YAML must actually contain + // placeholder strings in non-string scalar positions. Otherwise the + // subsequent placeholder/strict assertions wouldn't distinguish the two + // modes. + dunderYAML := string(dunderBytes) + for _, want := range []string{"omitFromServiceGroupCompletion: __step.omit__", "maximumRetryCount: __step.retry__"} { + if !strings.Contains(dunderYAML, want) { + t.Fatalf("expected dunder-rendered YAML to contain %q; got:\n%s", want, dunderYAML) + } + } + + realBytes, err := config.PreprocessContent([]byte(pipelineTemplateWithScalarRefs), typedConfig) + if err != nil { + t.Fatalf("PreprocessContent(typedConfig) must succeed, got: %v", err) + } + + // 1) Placeholder mode accepts the dunder-rendered YAML. + if err := ValidatePipelineSchemaWithOptions(dunderBytes, WithAllowPlaceholders("")); err != nil { + t.Errorf("placeholder mode must accept dunder-rendered YAML, got: %v", err) + } + + // 2) Strict mode rejects the same dunder-rendered YAML (regression guard + // so placeholder mode never becomes the default). + if err := ValidatePipelineSchemaWithOptions(dunderBytes); err == nil { + t.Errorf("strict mode must reject placeholder strings on non-string scalar fields") + } + + // 3) Strict mode accepts the typed-rendered YAML (regression guard so the + // EV2Mapping move-in did not change non-placeholder behavior). + if err := ValidatePipelineSchemaWithOptions(realBytes); err != nil { + t.Errorf("strict mode must accept typed-rendered YAML, got: %v", err) + } + + // 4) Full NewPipelineFromBytes flow (validate + unmarshal + Validate) is + // intact for the typed-configuration path — proves the move-in did + // not regress ordinary production rendering. + if _, err := NewPipelineFromBytes([]byte(pipelineTemplateWithScalarRefs), typedConfig); err != nil { + t.Errorf("NewPipelineFromBytes(typedConfig) must succeed, got: %v", err) + } +} + +func keysOf(m map[string]string) []string { + out := make([]string, 0, len(m)) + for k := range m { + out = append(out, k) + } + return out +} diff --git a/pipelines/types/options.go b/pipelines/types/options.go new file mode 100644 index 0000000..8604882 --- /dev/null +++ b/pipelines/types/options.go @@ -0,0 +1,72 @@ +// Copyright 2025 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package types + +// DefaultPlaceholderPattern is the regular expression applied to placeholder +// values when WithAllowPlaceholders is used without an explicit pattern. It +// matches strings of the form "____" (dunder-wrapped), which is the +// output format of the sdp-pipelines dunder configuration used during schema +// validation of pipeline.yaml templates. +const DefaultPlaceholderPattern = "^__.+__$" + +// ValidateOption configures the behavior of ValidatePipelineSchemaWithOptions +// (and, by extension, NewPipelineFromFile / NewPipelineFromBytes when called +// with options). +type ValidateOption func(*validateOptions) + +// validateOptions holds the resolved configuration produced by applying a set +// of ValidateOption values. It is intentionally unexported: callers always +// configure validation through the With* option constructors. +type validateOptions struct { + // allowPlaceholders, when non-empty, enables placeholder-mode validation. + // The string is a regular expression that placeholder values must match. + allowPlaceholders string +} + +// newValidateOptions builds a validateOptions by applying the given option +// functions. Options applied later override earlier ones. +func newValidateOptions(opts ...ValidateOption) *validateOptions { + o := &validateOptions{} + for _, opt := range opts { + if opt != nil { + opt(o) + } + } + return o +} + +// WithAllowPlaceholders enables an opt-in placeholder mode for the pipeline +// schema validator. When this option is supplied, the schema is widened +// in-memory so that every non-string scalar field (boolean / integer / number, +// and pure non-string enums) also accepts a string matching pattern. +// +// This is intended for callers that validate pipeline.yaml content with a +// "dunder configuration" — every leaf rendered as "____" — where +// real boolean/integer/number fields would otherwise be rejected. The default +// strict validation is unchanged for all other callers, including production +// EV2 manifest generation. +// +// An empty pattern is treated as DefaultPlaceholderPattern ("^__.+__$"), which +// matches sdp-pipelines' dunder output. Callers that template placeholders +// with a different convention may supply their own regular expression (for +// example, "^<<.+>>$"). +func WithAllowPlaceholders(pattern string) ValidateOption { + return func(o *validateOptions) { + if pattern == "" { + pattern = DefaultPlaceholderPattern + } + o.allowPlaceholders = pattern + } +} diff --git a/pipelines/types/pipeline.go b/pipelines/types/pipeline.go index d8650ce..3289fb4 100644 --- a/pipelines/types/pipeline.go +++ b/pipelines/types/pipeline.go @@ -54,22 +54,28 @@ type BuildStep struct { // - A pointer to a new Pipeline instance if successful. // - An error if there was a problem preprocessing the file, validating the schema, // unmarshaling the pipeline, or validating the pipeline instance. -func NewPipelineFromFile(pipelineFilePath string, cfg types2.Configuration) (*Pipeline, error) { +func NewPipelineFromFile(pipelineFilePath string, cfg types2.Configuration, opts ...ValidateOption) (*Pipeline, error) { content, err := os.ReadFile(pipelineFilePath) if err != nil { return nil, fmt.Errorf("failed to read file %s: %w", pipelineFilePath, err) } - return NewPipelineFromBytes(content, cfg) + return NewPipelineFromBytes(content, cfg, opts...) } -func NewPipelineFromBytes(pipelineBytes []byte, cfg types2.Configuration) (*Pipeline, error) { +// NewPipelineFromBytes preprocesses pipelineBytes with the supplied +// configuration, validates the rendered YAML against the pipeline schema, and +// unmarshals it into a *Pipeline. opts is forwarded to +// ValidatePipelineSchemaWithOptions; in particular, WithAllowPlaceholders +// loosens schema validation for non-string scalar fields. Without opts, +// validation is strict. +func NewPipelineFromBytes(pipelineBytes []byte, cfg types2.Configuration, opts ...ValidateOption) (*Pipeline, error) { bytes, err := config.PreprocessContent(pipelineBytes, cfg) if err != nil { return nil, fmt.Errorf("failed to preprocess pipeline file: %w", err) } - if err := ValidatePipelineSchema(bytes); err != nil { + if err := ValidatePipelineSchemaWithOptions(bytes, opts...); err != nil { return nil, fmt.Errorf("failed to validate pipeline schema: %w", err) } diff --git a/pipelines/types/util.go b/pipelines/types/util.go index 86bc1b2..36d7040 100644 --- a/pipelines/types/util.go +++ b/pipelines/types/util.go @@ -36,59 +36,92 @@ func getSchemaForPipeline(pipelineMap map[string]interface{}) (pipelineSchema *j } func getSchemaForRef(schemaRef string) (*jsonschema.Schema, string, error) { + return getSchemaForRefWithOptions(schemaRef, nil) +} + +// getSchemaForRefWithOptions resolves a schema reference like getSchemaForRef +// does, but threads validateOptions through so that placeholder-mode widening +// can be applied between decoding and compilation. +func getSchemaForRefWithOptions(schemaRef string, opts *validateOptions) (*jsonschema.Schema, string, error) { if schemaRef == "" { schemaRef = defaultSchemaRef } switch schemaRef { case pipelineSchemaV1Ref: - pipelineSchema, err := compileSchema(schemaRef, pipelineSchemaV1Content) + schemaMap, err := decodeSchemaMap(pipelineSchemaV1Content) + if err != nil { + return nil, schemaRef, err + } + if opts != nil && opts.allowPlaceholders != "" { + widenScalarsForPlaceholders(schemaMap, opts.allowPlaceholders) + } + pipelineSchema, err := compileSchemaFromMap(schemaRef, schemaMap) return pipelineSchema, schemaRef, err default: return nil, "", fmt.Errorf("unsupported schema reference: %s", schemaRef) } } +// ValidatePipelineSchema validates pipelineContent against the schema declared +// by its "$schema" key (or the default v1 schema if none is set). Strict +// validation: every type must match exactly. To allow opt-in placeholder +// strings on non-string scalar fields, use ValidatePipelineSchemaWithOptions +// together with WithAllowPlaceholders. func ValidatePipelineSchema(pipelineContent []byte) error { - // unmarshal pipeline content + return ValidatePipelineSchemaWithOptions(pipelineContent) +} + +// ValidatePipelineSchemaWithOptions validates pipelineContent against the +// declared (or default) schema. When opts include WithAllowPlaceholders, the +// schema is widened in-memory so that non-string scalar fields also accept +// strings matching the configured placeholder pattern. The schema asset on +// disk and the strict default validation paths are not affected. +// +// This is intended for callers that validate pipeline.yaml templates against +// a fully dunderized configuration (e.g. sdp-pipelines). EV2 manifest +// generation and other strict callers should continue to use +// ValidatePipelineSchema. +func ValidatePipelineSchemaWithOptions(pipelineContent []byte, opts ...ValidateOption) error { + resolved := newValidateOptions(opts...) + pipelineMap := make(map[string]interface{}) - err := yaml.Unmarshal(pipelineContent, &pipelineMap) - if err != nil { + if err := yaml.Unmarshal(pipelineContent, &pipelineMap); err != nil { return fmt.Errorf("failed to unmarshal pipeline YAML content: %v", err) } - // load pipeline schema - pipelineSchema, schemaRef, err := getSchemaForPipeline(pipelineMap) + schemaRef, _ := pipelineMap["$schema"].(string) + pipelineSchema, schemaRef, err := getSchemaForRefWithOptions(schemaRef, resolved) if err != nil { return fmt.Errorf("failed to load pipeline schema: %v", err) } - // validate pipeline schema - err = pipelineSchema.Validate(pipelineMap) - if err != nil { + if err := pipelineSchema.Validate(pipelineMap); err != nil { return fmt.Errorf("pipeline is not compliant with schema %s: %v", schemaRef, err) } return nil } -func compileSchema(schemaRef string, schemaBytes []byte) (*jsonschema.Schema, error) { - // parse schema content +// decodeSchemaMap unmarshals a JSON schema asset into a generic map so that +// callers can inspect or rewrite it before compilation. +func decodeSchemaMap(schemaBytes []byte) (map[string]interface{}, error) { schemaMap := make(map[string]interface{}) - err := json.Unmarshal(schemaBytes, &schemaMap) - if err != nil { + if err := json.Unmarshal(schemaBytes, &schemaMap); err != nil { return nil, fmt.Errorf("failed to unmarshal schema content: %v", err) } + return schemaMap, nil +} - // compile schema +// compileSchemaFromMap compiles a schema previously decoded with +// decodeSchemaMap (and possibly rewritten by the caller) into a jsonschema.Schema. +func compileSchemaFromMap(schemaRef string, schemaMap map[string]interface{}) (*jsonschema.Schema, error) { c := jsonschema.NewCompiler() - err = c.AddResource(schemaRef, schemaMap) - if err != nil { + if err := c.AddResource(schemaRef, schemaMap); err != nil { return nil, fmt.Errorf("failed to add schema resource %s: %v", schemaRef, err) } pipelineSchema, err := c.Compile(schemaRef) if err != nil { return nil, fmt.Errorf("failed to compile schema %s: %v", schemaRef, err) } - return pipelineSchema, nil } diff --git a/pipelines/types/util_test.go b/pipelines/types/util_test.go new file mode 100644 index 0000000..536982e --- /dev/null +++ b/pipelines/types/util_test.go @@ -0,0 +1,181 @@ +// Copyright 2025 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package types + +import ( + "strings" + "testing" +) + +// pipelineWithStepFields returns a minimal valid pipeline.yaml whose single +// Shell step has the given extra top-level keys injected (verbatim, indented +// to match the existing four-space step body indentation). +func pipelineWithStepFields(extraStepLines string) string { + indented := "" + if extraStepLines != "" { + for _, line := range strings.Split(strings.TrimRight(extraStepLines, "\n"), "\n") { + indented += " " + line + "\n" + } + } + return `$schema: pipeline.schema.v1 +serviceGroup: Microsoft.Azure.ARO.Test +rolloutName: Test Rollout +resourceGroups: +- name: test + resourceGroup: test-rg + subscription: test-sub + steps: + - name: deploy + action: Shell + command: echo hello +` + indented +} + +// pipelineWithResourceGroupFields embeds extra top-level keys on the only +// resourceGroup entry (e.g. executionConstraints arrays). +func pipelineWithResourceGroupFields(extraRGLines string) string { + indented := "" + for _, line := range strings.Split(strings.TrimRight(extraRGLines, "\n"), "\n") { + indented += " " + line + "\n" + } + return `$schema: pipeline.schema.v1 +serviceGroup: Microsoft.Azure.ARO.Test +rolloutName: Test Rollout +resourceGroups: +- name: test + resourceGroup: test-rg + subscription: test-sub +` + indented + ` steps: + - name: deploy + action: Shell + command: echo hello +` +} + +func TestValidatePipelineSchemaPlaceholderMode(t *testing.T) { + tests := []struct { + name string + yaml string + opts []ValidateOption + expectError bool + description string + }{ + { + name: "strict_rejects_arbitrary_string_on_boolean", + yaml: pipelineWithStepFields("omitFromServiceGroupCompletion: \"definitely-not-a-bool\"\n"), + opts: nil, + expectError: true, + description: "strict mode must reject a non-bool value on a boolean field", + }, + { + name: "strict_rejects_placeholder_on_boolean", + yaml: pipelineWithStepFields("omitFromServiceGroupCompletion: __rg.omit__\n"), + opts: nil, + expectError: true, + description: "strict mode must reject placeholder string on a boolean field (regression guard)", + }, + { + name: "placeholder_accepts_placeholder_on_boolean", + yaml: pipelineWithStepFields("omitFromServiceGroupCompletion: __rg.omit__\n"), + opts: []ValidateOption{WithAllowPlaceholders("")}, + expectError: false, + description: "placeholder mode must accept a dunder string on a boolean field", + }, + { + name: "placeholder_rejects_non_matching_string_on_boolean", + yaml: pipelineWithStepFields("omitFromServiceGroupCompletion: not_a_placeholder\n"), + opts: []ValidateOption{WithAllowPlaceholders("")}, + expectError: true, + description: "placeholder mode must still reject arbitrary strings that do not match the pattern", + }, + { + name: "placeholder_accepts_real_bool", + yaml: pipelineWithStepFields("omitFromServiceGroupCompletion: true\n"), + opts: []ValidateOption{WithAllowPlaceholders("")}, + expectError: false, + description: "placeholder mode must still accept genuine boolean values", + }, + { + name: "placeholder_accepts_placeholder_on_nested_integer", + yaml: pipelineWithStepFields("automatedRetry:\n maximumRetryCount: __retry.count__\n"), + opts: []ValidateOption{WithAllowPlaceholders("")}, + expectError: false, + description: "placeholder mode must accept dunder strings on integer fields nested under stepMeta", + }, + { + name: "placeholder_accepts_real_integer", + yaml: pipelineWithStepFields("automatedRetry:\n maximumRetryCount: 5\n"), + opts: []ValidateOption{WithAllowPlaceholders("")}, + expectError: false, + description: "placeholder mode must still accept genuine integer values", + }, + { + name: "placeholder_accepts_singleton_via_ref", + yaml: pipelineWithResourceGroupFields("executionConstraints:\n - singleton: __ec.singleton__\n"), + opts: []ValidateOption{WithAllowPlaceholders("")}, + expectError: false, + description: "placeholder mode must reach scalars defined under #/definitions referenced via $ref", + }, + { + name: "string_field_unchanged_strict", + yaml: pipelineWithStepFields(""), + opts: nil, + expectError: false, + description: "strict mode accepts a minimal pipeline whose string fields are real strings", + }, + { + name: "string_field_unchanged_placeholder", + yaml: pipelineWithStepFields(""), + opts: []ValidateOption{WithAllowPlaceholders("")}, + expectError: false, + description: "placeholder mode leaves string-typed fields strict (real strings accepted)", + }, + { + name: "custom_pattern_is_honored", + yaml: pipelineWithStepFields("omitFromServiceGroupCompletion: <>\n"), + opts: []ValidateOption{WithAllowPlaceholders("^<<.+>>$")}, + expectError: false, + description: "a custom pattern supplied via WithAllowPlaceholders is the one used to match placeholders", + }, + { + name: "custom_pattern_rejects_default_dunder", + yaml: pipelineWithStepFields("omitFromServiceGroupCompletion: __rg.omit__\n"), + opts: []ValidateOption{WithAllowPlaceholders("^<<.+>>$")}, + expectError: true, + description: "a custom pattern rules out the default dunder convention", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := ValidatePipelineSchemaWithOptions([]byte(tt.yaml), tt.opts...) + if tt.expectError && err == nil { + t.Fatalf("%s: expected an error, got nil", tt.description) + } + if !tt.expectError && err != nil { + t.Fatalf("%s: expected no error, got: %v", tt.description, err) + } + }) + } +} + +// TestValidatePipelineSchemaBackwardsCompatible asserts the legacy +// ValidatePipelineSchema entry point retains strict behavior even after the +// option plumbing was added. +func TestValidatePipelineSchemaBackwardsCompatible(t *testing.T) { + yaml := pipelineWithStepFields("omitFromServiceGroupCompletion: __rg.omit__\n") + if err := ValidatePipelineSchema([]byte(yaml)); err == nil { + t.Fatalf("ValidatePipelineSchema (strict) must reject placeholder strings on boolean fields, got nil error") + } +} diff --git a/pipelines/types/widen.go b/pipelines/types/widen.go new file mode 100644 index 0000000..640d1ca --- /dev/null +++ b/pipelines/types/widen.go @@ -0,0 +1,257 @@ +// Copyright 2025 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package types + +import ( + "reflect" + "strconv" + "strings" +) + +// widenScalarsForPlaceholders rewrites a decoded JSON schema map in-place so +// that every non-string scalar declaration (type boolean / integer / number, +// pure non-string enums, and non-string consts) also accepts a string matching +// pattern. +// +// The rewrite preserves the original constraints by wrapping each scalar node +// in a oneOf[, {type:"string", pattern:}]. The +// walker recurses through all JSON Schema combinator and subtree keywords, +// and resolves local "$ref" references (e.g. "#/definitions/foo") against the +// root schema so widening still applies to scalars defined under +// $defs/definitions. A set of already-processed map identities prevents both +// $ref cycles and double-widening (where the same scalar is reachable through +// both a $ref and a direct definitions descent). +// +// schemaMap is the root schema and is mutated in place. Pattern is the regex +// the placeholder string must match (typically DefaultPlaceholderPattern). +func widenScalarsForPlaceholders(schemaMap map[string]interface{}, pattern string) { + if schemaMap == nil || pattern == "" { + return + } + processed := map[uintptr]bool{} + widenNode(schemaMap, schemaMap, pattern, processed) +} + +// scalarTypesToWiden is the set of non-string scalar type names whose +// declarations get widened by widenNode. "null" is intentionally excluded +// (placeholders are never null) and "string" is excluded (already a string). +var scalarTypesToWiden = map[string]bool{ + "boolean": true, + "integer": true, + "number": true, +} + +// widenNode mutates a single schema node in place. If the node declares a +// widenable scalar type (a pure non-string enum, or a non-string const), it is +// replaced with a oneOf wrapper. Otherwise the walker recurses into combinator and subtree +// keywords. Local $ref targets are resolved against root. Each map node is +// processed at most once; revisits via $ref or duplicated tree paths are +// no-ops, which prevents the produced oneOf wrapper from being wrapped +// again by a later traversal. +func widenNode(node interface{}, root map[string]interface{}, pattern string, processed map[uintptr]bool) { + switch n := node.(type) { + case map[string]interface{}: + ptr := reflect.ValueOf(n).Pointer() + if processed[ptr] { + return + } + processed[ptr] = true + + if ref, ok := n["$ref"].(string); ok { + if target := resolveLocalRef(root, ref); target != nil { + widenNode(target, root, pattern, processed) + } + } + + if shouldWidenNode(n) { + wrapWithPlaceholderOneOf(n, pattern, processed) + return + } + + recurseSubtrees(n, root, pattern, processed) + case []interface{}: + for _, item := range n { + widenNode(item, root, pattern, processed) + } + } +} + +// shouldWidenNode reports whether obj declares a non-string scalar that should +// be widened into a oneOf wrapper. Scalar declarations include direct type +// declarations (boolean, integer, number — and type-union variants without +// "string"), pure non-string enums, and non-string consts. +func shouldWidenNode(obj map[string]interface{}) bool { + switch t := obj["type"].(type) { + case string: + if scalarTypesToWiden[t] { + return true + } + case []interface{}: + hasString := false + hasWidenable := false + for _, v := range t { + if s, ok := v.(string); ok { + if s == "string" { + hasString = true + } + if scalarTypesToWiden[s] { + hasWidenable = true + } + } + } + if hasWidenable && !hasString { + return true + } + } + if c, hasConst := obj["const"]; hasConst { + if _, isString := c.(string); !isString { + return true + } + return false + } + if _, hasType := obj["type"]; hasType { + return false + } + if enum, ok := obj["enum"].([]interface{}); ok && len(enum) > 0 { + for _, v := range enum { + if _, isString := v.(string); isString { + return false + } + } + return true + } + return false +} + +// wrapWithPlaceholderOneOf converts obj in-place into a oneOf wrapper. The +// original constraints are copied unchanged into the first oneOf alternative +// and a {type:"string",pattern} branch is added as the second alternative. +// The freshly-created original-alternative map is registered in processed so +// that any later traversal reaching it (for example via repeated tree +// descent) will skip it, preventing recursive widening of the same scalar. +func wrapWithPlaceholderOneOf(obj map[string]interface{}, pattern string, processed map[uintptr]bool) { + original := make(map[string]interface{}, len(obj)) + for k, v := range obj { + original[k] = v + delete(obj, k) + } + processed[reflect.ValueOf(original).Pointer()] = true + + placeholder := map[string]interface{}{ + "type": "string", + "pattern": pattern, + } + processed[reflect.ValueOf(placeholder).Pointer()] = true + + obj["oneOf"] = []interface{}{original, placeholder} +} + +// schemaSubtreeKeys lists JSON Schema keywords whose values contain further +// subschemas (objects or arrays of objects). The widener recurses into each +// to find scalar declarations to widen. +var schemaSubtreeKeys = []string{ + "properties", + "patternProperties", + "$defs", + "definitions", + "items", + "prefixItems", + "additionalProperties", + "unevaluatedProperties", + "unevaluatedItems", + "contains", + "not", + "if", + "then", + "else", + "propertyNames", + "oneOf", + "anyOf", + "allOf", + "dependentSchemas", + "dependencies", +} + +// recurseSubtrees walks the standard JSON Schema subtree containers in obj +// and invokes widenNode on each inner schema. The set of keys whose values +// are maps-of-schemas (rather than a single schema) is handled specially. +func recurseSubtrees(obj map[string]interface{}, root map[string]interface{}, pattern string, processed map[uintptr]bool) { + for _, key := range schemaSubtreeKeys { + child, ok := obj[key] + if !ok { + continue + } + switch v := child.(type) { + case map[string]interface{}: + switch key { + case "properties", "patternProperties", "$defs", "definitions", "dependentSchemas": + for _, sub := range v { + widenNode(sub, root, pattern, processed) + } + default: + widenNode(v, root, pattern, processed) + } + case []interface{}: + for _, item := range v { + widenNode(item, root, pattern, processed) + } + case bool: + // "additionalProperties: true|false" etc — nothing to widen. + } + } +} + +// resolveLocalRef resolves a JSON pointer of the form "#/definitions/foo" +// against the root schema. Returns nil for non-local refs (external URLs) or +// any pointer that does not resolve. Numeric path components select array +// indices when applicable. +func resolveLocalRef(root map[string]interface{}, ref string) interface{} { + if !strings.HasPrefix(ref, "#/") { + return nil + } + parts := strings.Split(strings.TrimPrefix(ref, "#/"), "/") + var current interface{} = root + for _, p := range parts { + p = unescapeJSONPointer(p) + switch node := current.(type) { + case map[string]interface{}: + next, ok := node[p] + if !ok { + return nil + } + current = next + case []interface{}: + n, err := strconv.Atoi(p) + if err != nil { + return nil + } + if n < 0 || n >= len(node) { + return nil + } + current = node[n] + default: + return nil + } + } + return current +} + +// unescapeJSONPointer decodes the two RFC 6901 escape sequences (~1 -> /, +// ~0 -> ~) used in JSON pointer fragments. +func unescapeJSONPointer(p string) string { + p = strings.ReplaceAll(p, "~1", "/") + p = strings.ReplaceAll(p, "~0", "~") + return p +} diff --git a/pipelines/types/widen_test.go b/pipelines/types/widen_test.go new file mode 100644 index 0000000..ce57a21 --- /dev/null +++ b/pipelines/types/widen_test.go @@ -0,0 +1,142 @@ +// Copyright 2025 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package types + +import ( + "testing" +) + +// TestWidenScalarsForPlaceholders_NodeShapes exercises widenScalarsForPlaceholders +// against synthetic schema fragments to verify each non-string scalar shape is +// either wrapped in a oneOf or left alone. The end-to-end behavior is covered +// by TestValidatePipelineSchemaPlaceholderMode; this test pins the structural +// rewrite invariants directly so a future refactor cannot accidentally drop a +// branch (in particular, the non-string const branch added in response to a +// review request). +func TestWidenScalarsForPlaceholders_NodeShapes(t *testing.T) { + pattern := DefaultPlaceholderPattern + + cases := []struct { + name string + input map[string]interface{} + wrapped bool + notation string + }{ + { + name: "boolean_type_is_widened", + input: map[string]interface{}{"type": "boolean"}, + wrapped: true, + notation: "{type: boolean}", + }, + { + name: "string_type_is_unchanged", + input: map[string]interface{}{"type": "string"}, + wrapped: false, + notation: "{type: string}", + }, + { + name: "non_string_enum_is_widened", + input: map[string]interface{}{ + "enum": []interface{}{float64(1), float64(2), float64(3)}, + }, + wrapped: true, + notation: "{enum: [1,2,3]}", + }, + { + name: "string_enum_is_unchanged", + input: map[string]interface{}{ + "enum": []interface{}{"a", "b"}, + }, + wrapped: false, + notation: `{enum: ["a","b"]}`, + }, + { + name: "boolean_const_is_widened", + input: map[string]interface{}{"const": true}, + wrapped: true, + notation: "{const: true}", + }, + { + name: "integer_const_is_widened", + input: map[string]interface{}{"const": float64(42)}, + wrapped: true, + notation: "{const: 42}", + }, + { + name: "string_const_is_unchanged", + input: map[string]interface{}{"const": "ImageMirror"}, + wrapped: false, + notation: `{const: "ImageMirror"}`, + }, + { + name: "typed_boolean_const_is_widened", + input: map[string]interface{}{ + "type": "boolean", + "const": true, + }, + wrapped: true, + notation: "{type: boolean, const: true}", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + // Build a tiny root schema with the input under a property so the + // walker actually descends into it. + root := map[string]interface{}{ + "properties": map[string]interface{}{ + "x": tc.input, + }, + } + widenScalarsForPlaceholders(root, pattern) + + props := root["properties"].(map[string]interface{}) + x := props["x"].(map[string]interface{}) + _, isOneOf := x["oneOf"] + if tc.wrapped && !isOneOf { + t.Fatalf("expected %s to be wrapped in oneOf, got: %#v", tc.notation, x) + } + if !tc.wrapped && isOneOf { + t.Fatalf("expected %s to be left unchanged, got: %#v", tc.notation, x) + } + }) + } +} + +// TestWidenScalarsForPlaceholders_ConstRefBoundary asserts the widener +// resolves a $ref to a definition whose schema body is a non-string const +// (the imageMirrorStep schema uses this shape via anyOf alternatives that +// constrain publicSource to "const: true"). The synthetic fixture mirrors the +// reachability shape that a real refactor of pipeline.schema.v1.json could +// introduce so the invariant is pinned regardless of upstream changes. +func TestWidenScalarsForPlaceholders_ConstRefBoundary(t *testing.T) { + root := map[string]interface{}{ + "definitions": map[string]interface{}{ + "trueConst": map[string]interface{}{"const": true}, + }, + "properties": map[string]interface{}{ + "flag": map[string]interface{}{ + "$ref": "#/definitions/trueConst", + }, + }, + } + widenScalarsForPlaceholders(root, DefaultPlaceholderPattern) + + defs := root["definitions"].(map[string]interface{}) + target := defs["trueConst"].(map[string]interface{}) + if _, isOneOf := target["oneOf"]; !isOneOf { + t.Fatalf("expected {const: true} target of $ref to be wrapped in oneOf, got: %#v", target) + } +}