diff --git a/cli/internal/config/karpenter/karpenter.go b/cli/internal/config/karpenter/karpenter.go index 16fe804..75ebe0a 100644 --- a/cli/internal/config/karpenter/karpenter.go +++ b/cli/internal/config/karpenter/karpenter.go @@ -44,9 +44,11 @@ podLabels: controller: nebiusCredentials: - enabled: true {{- if .NebiusCredentialsJSON }} + enabled: true credentialsJSON: {{ .NebiusCredentialsJSON }} +{{- else }} + enabled: false {{- end }} image: digest: "" diff --git a/docs/usages/karpenter.md b/docs/usages/karpenter.md index 2e79c01..5585d1a 100644 --- a/docs/usages/karpenter.md +++ b/docs/usages/karpenter.md @@ -46,7 +46,9 @@ $ kubectl create namespace karpenter ### 2. Locate your Nebius credentials file -The karpenter controller needs Nebius API credentials to provision VMs. The credentials file is a JSON file generated by the Nebius console (see the [Nebius authorized keys documentation](https://docs.nebius.com/iam/service-accounts/authorized-keys)). +This step is only needed if you plan to provision Nebius nodes. Azure and Azure Flex H200 nodes do not require Nebius credentials. + +For Nebius, the karpenter controller needs Nebius API credentials to provision VMs. The credentials file is a JSON file generated by the Nebius console (see the [Nebius authorized keys documentation](https://docs.nebius.com/iam/service-accounts/authorized-keys)). Note the local path to this file — you will pass it to the CLI in step 4 via `--nebius-credentials-file`. The chart will create the `nebius-credentials` Secret in the `karpenter` namespace automatically during `helm upgrade --install`; no separate `kubectl create secret` step is needed. @@ -65,15 +67,16 @@ The template also creates a **federated identity credential** that pairs the man ### 4. Generate the Helm values file and install -Use the CLI to generate a `karpenter_values.yaml` file with all required values pre-populated. Pass `--nebius-credentials-file` to have the chart create the `nebius-credentials` Secret automatically, and `--ssh-public-key-file` to embed the SSH public key used when bootstrapping provisioned nodes: +Use the CLI to generate a `karpenter_values.yaml` file with all required values pre-populated. Pass `--ssh-public-key-file` to embed the SSH public key used when bootstrapping provisioned nodes: ```bash $ aks-flex-cli config karpenter helm \ - --nebius-credentials-file ~/.nebius/credentials.json \ --ssh-public-key-file ~/.ssh/id_ed25519.pub ``` -The command reads both files, embeds their contents into `karpenter_values.yaml`, and prints the install command to stdout: +If you also use Nebius, add `--nebius-credentials-file ~/.nebius/credentials.json` so the chart creates and mounts the Nebius credentials Secret. For Azure-only H200 clusters, omit it; the generated values will keep `controller.nebiusCredentials.enabled: false`. + +The command reads the files, embeds their contents into `karpenter_values.yaml`, and prints the install command to stdout: ``` helm upgrade --install karpenter charts/karpenter \ @@ -101,7 +104,7 @@ podLabels: controller: nebiusCredentials: - enabled: true + enabled: false image: digest: "" env: @@ -246,6 +249,20 @@ azure-cpu-nodepool-6rhlk aks-azure-cpu-nodepo > aks-flex-cli aks deploy --nvidia-dra-driver --skip-arm > ``` +### Creating an Azure Flex H200 NodePool + +For cross-region Azure H200 nodes, apply one Azure Flex NodeClass and NodePool per Azure region, then deploy a GPU workload with a matching toleration and node affinity: + +```bash +$ kubectl apply -f examples/azure/azureflexnodeclass-h200-eastus2.yaml +$ kubectl apply -f examples/azure/nodepool-h200.yaml +$ kubectl apply -f examples/azure/azureflexnodeclass-h200-eastus2euap.yaml +$ kubectl apply -f examples/azure/nodepool-h200-eastus2euap.yaml +$ kubectl apply -f examples/azure/h200_deployment.yaml +``` + +Each H200 NodePool references exactly one `AzureFlexNodeClass`, so use separate NodePools when trying the same `Standard_ND96isr_H200_v5` SKU in both `eastus2` and `eastus2euap`. The H200 NodePools must have a non-zero `limits.nvidia.com/gpu` value. Set each one to `8` for one node in that region, or a higher multiple of 8 for more nodes. + ## Creating Nodes on Nebius via Karpenter With the karpenter controller running, you can define a `NebiusNodeClass` and `NodePool` to tell Karpenter how and when to provision Nebius nodes. diff --git a/karpenter/charts/karpenter/crds/flex.aks.azure.com_azureflexnodeclasses.yaml b/karpenter/charts/karpenter/crds/flex.aks.azure.com_azureflexnodeclasses.yaml new file mode 100644 index 0000000..4742726 --- /dev/null +++ b/karpenter/charts/karpenter/crds/flex.aks.azure.com_azureflexnodeclasses.yaml @@ -0,0 +1,214 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.1 + name: azureflexnodeclasses.flex.aks.azure.com +spec: + group: flex.aks.azure.com + names: + categories: + - karpenter + - nap + kind: AzureFlexNodeClass + listKind: AzureFlexNodeClassList + plural: azureflexnodeclasses + shortNames: + - afnc + - afncs + singular: azureflexnodeclass + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .status.conditions[?(@.type=='Ready')].status + name: Ready + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + AzureFlexNodeClass is the Schema for the AzureFlexNodeClass API. + + It enables a NodePool in an AKS cluster to auto-provision external Azure VMs in a + (potentially different) Azure region than the AKS cluster's own region. Each node + is a single VM (not VMSS) so that cross-region placement is straightforward. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + AzureFlexNodeClassSpec is the spec for AzureFlexNodeClass. + + Phase 1 scope (issue #63): single region per NodeClass, no spot, no zones, + no identity/UAMI per-NodeClass (the controller MI is assumed to have + Contributor on the target subscription/RG/subnet), no quota preflight, + no PPG/capacity reservation, no spot, no WireGuard. + properties: + allocateNodePublicIP: + default: false + description: AllocateNodePublicIP controls whether each node receives + a public IP. + type: boolean + imageID: + description: ImageID is a SIG / community gallery image resource ID. + Mutually exclusive with ImageReference. + type: string + imageReference: + description: |- + ImageReference selects an Azure Marketplace image. Mutually exclusive with ImageID. + If neither is set, defaults to microsoft-dsvm/ubuntu-hpc/2204/latest. + properties: + offer: + type: string + publisher: + type: string + sku: + type: string + version: + default: latest + type: string + required: + - offer + - publisher + - sku + type: object + location: + description: Location is the Azure region (e.g. "eastus2"). May differ + from the AKS cluster region. + type: string + maxPodsPerNode: + default: 110 + description: MaxPodsPerNode is advertised in the node's capacity and + affects Karpenter scheduling. + format: int32 + type: integer + osDiskSizeGB: + default: 128 + description: OSDiskSizeGB is the size of the OS disk in GB. + format: int32 + type: integer + resourceGroup: + description: |- + ResourceGroup is the resource group where VMs, NICs, and OS disks land. + Must already exist. + type: string + securityType: + default: Standard + description: |- + SecurityType selects the VM security profile. Currently only "Standard" is supported. + TrustedLaunch is deferred — it has been observed to break the DSVM image. + enum: + - Standard + type: string + sshPublicKeys: + description: SSHPublicKeys is the list of SSH public keys to install + on each node. + items: + type: string + type: array + subnetID: + description: |- + SubnetID is the full ARM resource ID of the subnet (must already exist + and be reachable from the AKS cluster). + type: string + subscriptionID: + description: SubscriptionID is the Azure subscription where VMs will + be created. + type: string + tags: + additionalProperties: + type: string + description: Tags are applied to every Azure resource (VM, NIC, OS + disk) created from this NodeClass. + type: object + required: + - location + - resourceGroup + - subnetID + - subscriptionID + type: object + status: + description: status contains the resolved state of the AzureFlexNodeClass. + properties: + conditions: + description: conditions contains signals for health and readiness + items: + description: Condition aliases the upstream type and adds additional + helper methods + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/karpenter/charts/karpenter/templates/clusterrole-core.yaml b/karpenter/charts/karpenter/templates/clusterrole-core.yaml index 0ae9f1f..fe8673e 100644 --- a/karpenter/charts/karpenter/templates/clusterrole-core.yaml +++ b/karpenter/charts/karpenter/templates/clusterrole-core.yaml @@ -71,7 +71,7 @@ rules: {{- if .Values.webhook.enabled }} - apiGroups: ["apiextensions.k8s.io"] resources: ["customresourcedefinitions/status"] - resourceNames: ["aksnodeclasses.karpenter.azure.com", "nodepools.karpenter.sh", "nodeclaims.karpenter.sh", "nebiusnodeclasses.flex.aks.azure.com"] + resourceNames: ["aksnodeclasses.karpenter.azure.com", "nodepools.karpenter.sh", "nodeclaims.karpenter.sh", "nebiusnodeclasses.flex.aks.azure.com", "azureflexnodeclasses.flex.aks.azure.com", "kaitonodeclasses.kaito.sh"] verbs: ["patch"] - apiGroups: ["apiextensions.k8s.io"] resources: ["customresourcedefinitions"] diff --git a/karpenter/charts/karpenter/templates/clusterrole.yaml b/karpenter/charts/karpenter/templates/clusterrole.yaml index 3e6a978..12aebfb 100644 --- a/karpenter/charts/karpenter/templates/clusterrole.yaml +++ b/karpenter/charts/karpenter/templates/clusterrole.yaml @@ -33,7 +33,7 @@ rules: resources: ["aksnodeclasses"] verbs: ["get", "list", "watch"] - apiGroups: ["flex.aks.azure.com"] - resources: ["nebiusnodeclasses"] + resources: ["nebiusnodeclasses", "azureflexnodeclasses"] verbs: ["get", "list", "watch"] - apiGroups: ["kaito.sh"] resources: ["kaitonodeclasses"] @@ -43,7 +43,7 @@ rules: resources: ["aksnodeclasses", "aksnodeclasses/status"] verbs: ["patch", "update"] - apiGroups: ["flex.aks.azure.com"] - resources: ["nebiusnodeclasses", "nebiusnodeclasses/status"] + resources: ["nebiusnodeclasses", "nebiusnodeclasses/status", "azureflexnodeclasses", "azureflexnodeclasses/status"] verbs: ["patch", "update"] - apiGroups: ["kaito.sh"] resources: ["kaitonodeclasses", "kaitonodeclasses/status"] diff --git a/karpenter/cmd/controller/main.go b/karpenter/cmd/controller/main.go index dca0fbf..6108b48 100644 --- a/karpenter/cmd/controller/main.go +++ b/karpenter/cmd/controller/main.go @@ -2,6 +2,7 @@ package main import ( "context" + "fmt" "time" "github.com/Azure/karpenter-provider-azure/pkg/apis" @@ -9,11 +10,17 @@ import ( "github.com/Azure/karpenter-provider-azure/pkg/controllers" "github.com/Azure/karpenter-provider-azure/pkg/operator" "github.com/Azure/karpenter-provider-azure/pkg/operator/options" + "github.com/go-logr/logr" "github.com/go-logr/zapr" "github.com/samber/lo" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client/apiutil" "sigs.k8s.io/karpenter/pkg/cloudprovider/metrics" "sigs.k8s.io/karpenter/pkg/cloudprovider/overlay" corecontrollers "sigs.k8s.io/karpenter/pkg/controllers" @@ -26,6 +33,7 @@ import ( kaitov1alpha1 "github.com/Azure/aks-flex/karpenter/pkg/apis/kaito/v1alpha1" "github.com/Azure/aks-flex/karpenter/pkg/apis/v1alpha1" flexcloudproviders "github.com/Azure/aks-flex/karpenter/pkg/cloudproviders" + azureflex "github.com/Azure/aks-flex/karpenter/pkg/cloudproviders/azure" "github.com/Azure/aks-flex/karpenter/pkg/cloudproviders/kaito" "github.com/Azure/aks-flex/karpenter/pkg/cloudproviders/nebius" flexcontrollers "github.com/Azure/aks-flex/karpenter/pkg/controllers" @@ -44,12 +52,17 @@ func main() { ctx := injection.WithOptionsOrDie(context.Background(), coreoptions.Injectables...) logger := zapr.NewLogger(logging.NewLogger(ctx, "controller")) lo.Must0( - operator.WaitForCRDs( + operator.WaitForCRDs(ctx, 2*time.Minute, ctrl.GetConfigOrDie(), logger), + "failed waiting for CRDs", + ) + lo.Must0( + waitForCRDs( ctx, 2*time.Minute, ctrl.GetConfigOrDie(), logger, &v1alpha1.NebiusNodeClass{}, + &v1alpha1.AzureFlexNodeClass{}, &kaitov1alpha1.KaitoNodeClass{}, ), - "failed waiting for CRDs", + "failed waiting for flex CRDs", ) ctx, op := operator.NewOperator(coreoperator.NewOperator()) @@ -119,6 +132,17 @@ func main() { lo.Must0(err, "registering kaito cloud provider") } + // azure-flex (cross-region single-VM Azure cloud provider) + { + err := azureflex.Register( + ctx, + hubCloudProvider, + op.GetClient(), + clusterCA, + ) + lo.Must0(err, "registering azure-flex cloud provider") + } + overlayUndecoratedCloudProvider := metrics.Decorate(hubCloudProvider) cloudProvider := overlay.Decorate(overlayUndecoratedCloudProvider, op.GetClient(), op.InstanceTypeStore) clusterState := state.NewCluster(op.Clock, op.GetClient(), cloudProvider) @@ -146,8 +170,11 @@ func main() { // TODO: still need to refactor ImageProvider side of things. op.KubernetesVersionProvider, op.ImageProvider, + op.InstanceTypesProvider, op.InClusterKubernetesInterface, op.AZClient.SubnetsClient(), + op.AZClient.DiskEncryptionSetsClient(), + options.FromContext(ctx).ParsedDiskEncryptionSetID, )...). WithControllers(ctx, flexcontrollers.NewControllers( ctx, @@ -156,3 +183,50 @@ func main() { )...). Start(ctx) } + +func waitForCRDs(ctx context.Context, timeout time.Duration, config *rest.Config, logger logr.Logger, objs ...runtime.Object) error { + client, err := rest.HTTPClientFor(config) + if err != nil { + return fmt.Errorf("creating kubernetes client: %w", err) + } + restMapper, err := apiutil.NewDynamicRESTMapper(config, client) + if err != nil { + return fmt.Errorf("creating dynamic rest mapper: %w", err) + } + + requiredGVKs := make([]schema.GroupVersionKind, 0, len(objs)) + for _, obj := range objs { + gvk, err := apiutil.GVKForObject(obj, scheme.Scheme) + if err != nil { + return fmt.Errorf("getting GVK for %T: %w", obj, err) + } + requiredGVKs = append(requiredGVKs, gvk) + } + + logger.Info("waiting for flex CRDs to be available", "gvks", requiredGVKs, "timeout", timeout) + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + for _, gvk := range requiredGVKs { + err := wait.PollUntilContextCancel(ctx, 10*time.Second, true, func(ctx context.Context) (bool, error) { + if _, err := restMapper.RESTMapping(gvk.GroupKind(), gvk.Version); err != nil { + if meta.IsNoMatchError(err) { + logger.V(1).Info("waiting for flex CRD to be available", "gvk", gvk) + return false, nil + } + return false, err + } + logger.V(1).Info("flex CRD is available", "gvk", gvk) + return true, nil + }) + if err != nil { + if ctx.Err() == context.DeadlineExceeded { + return fmt.Errorf("timed out waiting for CRD %s to be available", gvk) + } + return fmt.Errorf("failed to wait for CRD %s: %w", gvk, err) + } + } + + logger.Info("all flex CRDs are available") + return nil +} diff --git a/karpenter/examples/azure/azureflexnodeclass-h200-eastus2.yaml b/karpenter/examples/azure/azureflexnodeclass-h200-eastus2.yaml new file mode 100644 index 0000000..2f335f1 --- /dev/null +++ b/karpenter/examples/azure/azureflexnodeclass-h200-eastus2.yaml @@ -0,0 +1,21 @@ +apiVersion: flex.aks.azure.com/v1alpha1 +kind: AzureFlexNodeClass +metadata: + name: h200-eastus2 +spec: + subscriptionID: 00000000-0000-0000-0000-000000000000 + location: eastus2 + resourceGroup: my-flex-rg + subnetID: /subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/my-flex-rg/providers/Microsoft.Network/virtualNetworks/flex-vnet/subnets/nodes + imageReference: + publisher: microsoft-dsvm + offer: ubuntu-hpc + sku: "2204" + version: latest + securityType: Standard + osDiskSizeGB: 256 + allocateNodePublicIP: false + maxPodsPerNode: 110 + tags: + purpose: karpenter-flex-h200 + managed-by: aks-flex-karpenter diff --git a/karpenter/examples/azure/azureflexnodeclass-h200-eastus2euap.yaml b/karpenter/examples/azure/azureflexnodeclass-h200-eastus2euap.yaml new file mode 100644 index 0000000..1c353d9 --- /dev/null +++ b/karpenter/examples/azure/azureflexnodeclass-h200-eastus2euap.yaml @@ -0,0 +1,21 @@ +apiVersion: flex.aks.azure.com/v1alpha1 +kind: AzureFlexNodeClass +metadata: + name: h200-eastus2euap +spec: + subscriptionID: 00000000-0000-0000-0000-000000000000 + location: eastus2euap + resourceGroup: my-flex-rg + subnetID: /subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/my-flex-rg/providers/Microsoft.Network/virtualNetworks/flex-vnet-eastus2euap/subnets/nodes + imageReference: + publisher: microsoft-dsvm + offer: ubuntu-hpc + sku: "2204" + version: latest + securityType: Standard + osDiskSizeGB: 256 + allocateNodePublicIP: false + maxPodsPerNode: 110 + tags: + purpose: karpenter-flex-h200 + managed-by: aks-flex-karpenter diff --git a/karpenter/examples/azure/h200_deployment.yaml b/karpenter/examples/azure/h200_deployment.yaml new file mode 100644 index 0000000..a51270f --- /dev/null +++ b/karpenter/examples/azure/h200_deployment.yaml @@ -0,0 +1,44 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: azure-sample-h200-app +spec: + replicas: 1 + selector: + matchLabels: + app: azure-sample-h200-app + template: + metadata: + labels: + app: azure-sample-h200-app + spec: + tolerations: + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: gpu + operator: In + values: + - h200 + - key: node.kubernetes.io/instance-type + operator: In + values: + - Standard_ND96isr_H200_v5 + containers: + - name: gpu-container + image: nvidia/cuda:12.4.0-base-ubuntu22.04 + command: ["nvidia-smi", "-l", "60"] + resources: + requests: + memory: "512Mi" + cpu: "250m" + nvidia.com/gpu: "1" + limits: + memory: "1Gi" + cpu: "500m" + nvidia.com/gpu: "1" diff --git a/karpenter/examples/azure/nodepool-h200-eastus2euap.yaml b/karpenter/examples/azure/nodepool-h200-eastus2euap.yaml new file mode 100644 index 0000000..fec0651 --- /dev/null +++ b/karpenter/examples/azure/nodepool-h200-eastus2euap.yaml @@ -0,0 +1,33 @@ +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: h200-eastus2euap +spec: + template: + metadata: + labels: + gpu: h200 + nvidia.com/gpu.present: "true" + spec: + nodeClassRef: + group: flex.aks.azure.com + kind: AzureFlexNodeClass + name: h200-eastus2euap + requirements: + - key: node.kubernetes.io/instance-type + operator: In + values: + - Standard_ND96isr_H200_v5 + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + taints: + - key: nvidia.com/gpu + value: "true" + effect: NoSchedule + limits: + nvidia.com/gpu: 64 + disruption: + consolidationPolicy: WhenEmpty + consolidateAfter: 30s diff --git a/karpenter/examples/azure/nodepool-h200.yaml b/karpenter/examples/azure/nodepool-h200.yaml new file mode 100644 index 0000000..fa3e06b --- /dev/null +++ b/karpenter/examples/azure/nodepool-h200.yaml @@ -0,0 +1,33 @@ +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: h200 +spec: + template: + metadata: + labels: + gpu: h200 + nvidia.com/gpu.present: "true" + spec: + nodeClassRef: + group: flex.aks.azure.com + kind: AzureFlexNodeClass + name: h200-eastus2 + requirements: + - key: node.kubernetes.io/instance-type + operator: In + values: + - Standard_ND96isr_H200_v5 + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + taints: + - key: nvidia.com/gpu + value: "true" + effect: NoSchedule + limits: + nvidia.com/gpu: 64 + disruption: + consolidationPolicy: WhenEmpty + consolidateAfter: 30s diff --git a/karpenter/go.mod b/karpenter/go.mod index 16481eb..24afd07 100644 --- a/karpenter/go.mod +++ b/karpenter/go.mod @@ -1,23 +1,25 @@ module github.com/Azure/aks-flex/karpenter -go 1.26.0 +go 1.26.1 require ( github.com/Azure/aks-flex/plugin v0.0.0-00010101000000-000000000000 - github.com/Azure/karpenter-provider-azure v1.7.1 - github.com/awslabs/operatorpkg v0.0.0-20250909182303-e8e550b6f339 + github.com/Azure/azure-sdk-for-go/sdk/azcore v1.21.0 + github.com/Azure/karpenter-provider-azure v1.10.2 + github.com/awslabs/operatorpkg v0.0.0-20251222193911-34e9a1898737 github.com/go-logr/logr v1.4.3 github.com/go-logr/zapr v1.3.0 github.com/nebius/gosdk v0.0.0-20260218100913-7fb27c45819a github.com/samber/lo v1.52.0 golang.org/x/sync v0.19.0 - google.golang.org/grpc v1.79.1 + google.golang.org/grpc v1.79.3 + google.golang.org/protobuf v1.36.11 k8s.io/api v0.35.1 k8s.io/apimachinery v0.35.1 k8s.io/client-go v0.35.1 k8s.io/utils v0.0.0-20260108192941-914a6e750570 sigs.k8s.io/controller-runtime v0.23.1 - sigs.k8s.io/karpenter v1.7.1 + sigs.k8s.io/karpenter v1.10.0 ) require ( @@ -26,8 +28,7 @@ require ( github.com/Azure/aks-middleware v0.0.42 // indirect github.com/Azure/azure-kusto-go v0.16.1 // indirect github.com/Azure/azure-sdk-for-go v68.0.0+incompatible // indirect - github.com/Azure/azure-sdk-for-go-extensions v0.5.1 // indirect - github.com/Azure/azure-sdk-for-go/sdk/azcore v1.21.0 // indirect + github.com/Azure/azure-sdk-for-go-extensions v0.6.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v2 v2.2.0 // indirect @@ -35,6 +36,7 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerregistry/armcontainerregistry v1.2.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v6 v6.6.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v8 v8.3.0-beta.1 // indirect + github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v9 v9.1.0-beta.1 // indirect github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/keyvault/armkeyvault v1.5.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi v1.3.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork v1.1.0 // indirect @@ -170,11 +172,10 @@ require ( golang.org/x/oauth2 v0.34.0 // indirect golang.org/x/sys v0.40.0 // indirect golang.org/x/term v0.39.0 // indirect - golang.org/x/text v0.33.0 // indirect + golang.org/x/text v0.34.0 // indirect golang.org/x/time v0.14.0 // indirect gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect - google.golang.org/protobuf v1.36.11 // indirect gopkg.in/dnaeon/go-vcr.v3 v3.2.0 // indirect gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect diff --git a/karpenter/go.sum b/karpenter/go.sum index 90086c9..45361f3 100644 --- a/karpenter/go.sum +++ b/karpenter/go.sum @@ -10,8 +10,8 @@ github.com/Azure/azure-kusto-go v0.16.1 h1:vCBWcQghmC1qIErUUgVNWHxGhZVStu1U/hki6 github.com/Azure/azure-kusto-go v0.16.1/go.mod h1:9F2zvXH8B6eWzgI1S4k1ZXAIufnBZ1bv1cW1kB1n3D0= github.com/Azure/azure-sdk-for-go v68.0.0+incompatible h1:fcYLmCpyNYRnvJbPerq7U0hS+6+I79yEDJBqVNcqUzU= github.com/Azure/azure-sdk-for-go v68.0.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= -github.com/Azure/azure-sdk-for-go-extensions v0.5.1 h1:kV3u4tAWoFd+0wipN7QKSWckDkAHR06mZ3LglDuYSVM= -github.com/Azure/azure-sdk-for-go-extensions v0.5.1/go.mod h1:adhNwBpL1vnUS6yvTCbu0tVB/b6SdmmQhU9SpwYtjjY= +github.com/Azure/azure-sdk-for-go-extensions v0.6.0 h1:LzJ4iAk3ZBZ0Y27uUm66XBQntbgMr3QXn2KIDb4Mx04= +github.com/Azure/azure-sdk-for-go-extensions v0.6.0/go.mod h1:f/wRrqvvh197V5r4jGADV7528UdO/zfL+/Ud92BMSag= github.com/Azure/azure-sdk-for-go/sdk/azcore v1.21.0 h1:fou+2+WFTib47nS+nz/ozhEBnvU96bKHy6LjRsY4E28= github.com/Azure/azure-sdk-for-go/sdk/azcore v1.21.0/go.mod h1:t76Ruy8AHvUAC8GfMWJMa0ElSbuIcO03NLpynfbgsPA= github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 h1:Hk5QBxZQC1jb2Fwj6mpzme37xbCDdNTxU7O9eb5+LB4= @@ -32,6 +32,8 @@ github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontai github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v6 v6.6.0/go.mod h1:OWKfCmX4X3Vp2w7GSx1LZn8566tOHJBA6K0IAUVNYx0= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v8 v8.3.0-beta.1 h1:qUFuc6UySPwwCCWYhuiay5/ef50JEDl8jZ7UWNUx4nA= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v8 v8.3.0-beta.1/go.mod h1:DTMSChgVxhpEYIPzaE0nfUsdAHcGO1wJtdQ4MDX4VbM= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v9 v9.1.0-beta.1 h1:jSeRQBf6dETTFquS18l7PaecEWLrxuU4f7N2dYSkREw= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v9 v9.1.0-beta.1/go.mod h1:1NzwJtdlA4Qwki8NuLFN2wVS24bOUiWRcNWpmcmHNiU= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal v1.0.0 h1:lMW1lD/17LUA5z1XTURo7LcVG2ICBPlyMHjIUrcFZNQ= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal v1.0.0/go.mod h1:ceIuwmxDWptoW3eCqSXlnPsZFKh4X+R38dWPv7GS9Vs= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v2 v2.0.0 h1:PTFGRSlMKCQelWwxUyYVEUqseBJVemLyqWJjvMyt0do= @@ -88,8 +90,8 @@ github.com/Azure/go-autorest/logger v0.2.1 h1:IG7i4p/mDa2Ce4TRyAO8IHnVhAVF3RFU+Z github.com/Azure/go-autorest/logger v0.2.1/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8= github.com/Azure/go-autorest/tracing v0.6.0 h1:TYi4+3m5t6K48TGI9AUdb+IzbnSxvnvUMfuitfgcfuo= github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= -github.com/Azure/karpenter-provider-azure v1.7.1 h1:dhjDn8T1YKlfNpQsfWQdk6V8ed4lYTxjJgNvoa1DqdE= -github.com/Azure/karpenter-provider-azure v1.7.1/go.mod h1:VjyqZVV/fDwirfYl+DkhrzqQmOJTJoeI/ZUVFcSM0vI= +github.com/Azure/karpenter-provider-azure v1.10.2 h1:zI5gTIHm+x5qHwmpCq1n8kOFyr47bqy3BO5N1CPAZEE= +github.com/Azure/karpenter-provider-azure v1.10.2/go.mod h1:BEdfQh3u1k1IAvEfcPzbZo48I6fFeCHi7LRitxGJF80= github.com/Azure/msi-dataplane v0.4.3 h1:dWPWzY4b54tLIR9T1Q014Xxd/1DxOsMIp6EjRFAJlQY= github.com/Azure/msi-dataplane v0.4.3/go.mod h1:yAfxdJyvcnvSDfSyOFV9qm4fReEQDl+nZLGeH2ZWSmw= github.com/Azure/skewer v0.0.21 h1:6Yew9XAlJ1ltjJxh1m68X6weXc1ihm9oY3++qY5JWnM= @@ -140,8 +142,8 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.41.6 h1:5fFjR/ToSOzB2OQ/XqWpZBmNvmP/ github.com/aws/aws-sdk-go-v2/service/sts v1.41.6/go.mod h1:qgFDZQSD/Kys7nJnVqYlWKnh0SSdMjAi0uSwON4wgYQ= github.com/aws/smithy-go v1.24.1 h1:VbyeNfmYkWoxMVpGUAbQumkODcYmfMRfZ8yQiH30SK0= github.com/aws/smithy-go v1.24.1/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0= -github.com/awslabs/operatorpkg v0.0.0-20250909182303-e8e550b6f339 h1:p4oSlQ9IaT7/DHfgcrs9zdNhdIp37VIMujZLuxSgECk= -github.com/awslabs/operatorpkg v0.0.0-20250909182303-e8e550b6f339/go.mod h1:tNmCf0qIjaGbODGbm3DM8GIKBUvvxM7iW3KHbpSnVgw= +github.com/awslabs/operatorpkg v0.0.0-20251222193911-34e9a1898737 h1:hF8FFDPnboX/ABn1r8oS77t8tG4TVS8i99iPXMaL8Jk= +github.com/awslabs/operatorpkg v0.0.0-20251222193911-34e9a1898737/go.mod h1:reUhRkYche5Vkz+ACdxho8smFwdAspzr8rpA2dNqsVQ= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= @@ -450,8 +452,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= -golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= +golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk= +golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA= golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -467,8 +469,8 @@ gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 h1:gRkg/vSppuSQoDjxyiGfN4Upv/h/DQmIR10ZU8dh4Ww= google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk= -google.golang.org/grpc v1.79.1 h1:zGhSi45ODB9/p3VAawt9a+O/MULLl9dpizzNNpq7flY= -google.golang.org/grpc v1.79.1/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ= +google.golang.org/grpc v1.79.3 h1:sybAEdRIEtvcD68Gx7dmnwjZKlyfuc61Dyo9pGXXkKE= +google.golang.org/grpc v1.79.3/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -515,8 +517,8 @@ sigs.k8s.io/controller-runtime v0.23.1 h1:TjJSM80Nf43Mg21+RCy3J70aj/W6KyvDtOlpKf sigs.k8s.io/controller-runtime v0.23.1/go.mod h1:B6COOxKptp+YaUT5q4l6LqUJTRpizbgf9KSRNdQGns0= sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= -sigs.k8s.io/karpenter v1.7.1 h1:KoAWzWG4dEjaW33KtXKnGySAPDnIMwRrLZUndaeAjoY= -sigs.k8s.io/karpenter v1.7.1/go.mod h1:fqk7MeJYRNfMPcOZGv/BtsPR/Hq170J4D2GoU3IVHYA= +sigs.k8s.io/karpenter v1.10.0 h1:F8cupDXyn5c7TQDgTSj86nPmUJxFaV0wxu5HIdp+TJc= +sigs.k8s.io/karpenter v1.10.0/go.mod h1:XQtYAxoCysLHjytci7Fx5zw2txgcW2Vxc+qq6DDiFX8= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= sigs.k8s.io/structured-merge-diff/v6 v6.3.2-0.20260122202528-d9cc6641c482 h1:2WOzJpHUBVrrkDjU4KBT8n5LDcj824eX0I5UKcgeRUs= diff --git a/karpenter/patches/001-karpenter-provider-azure-aks-node-class-filter.diff b/karpenter/patches/001-karpenter-provider-azure-aks-node-class-filter.diff deleted file mode 100644 index 9fe079f..0000000 --- a/karpenter/patches/001-karpenter-provider-azure-aks-node-class-filter.diff +++ /dev/null @@ -1,50 +0,0 @@ -diff --git a/karpenter/vendor/github.com/Azure/karpenter-provider-azure/pkg/controllers/nodeclaim/inplaceupdate/controller.go b/karpenter/vendor/github.com/Azure/karpenter-provider-azure/pkg/controllers/nodeclaim/inplaceupdate/controller.go -index fb46b1a..08cd5e4 100644 ---- a/karpenter/vendor/github.com/Azure/karpenter-provider-azure/pkg/controllers/nodeclaim/inplaceupdate/controller.go -+++ b/karpenter/vendor/github.com/Azure/karpenter-provider-azure/pkg/controllers/nodeclaim/inplaceupdate/controller.go -@@ -252,6 +252,7 @@ func (c *Controller) Register(_ context.Context, m manager.Manager) error { - For( - &karpv1.NodeClaim{}, - builder.WithPredicates( -+ nodeclaimutils.UsingAKSNodeClassPredicate(), - predicate.Or( - predicate.GenerationChangedPredicate{}, // Note that this will trigger on pod restart for all Machines. - ), -diff --git a/karpenter/vendor/github.com/Azure/karpenter-provider-azure/pkg/utils/nodeclaim/nodeclaim.go b/karpenter/vendor/github.com/Azure/karpenter-provider-azure/pkg/utils/nodeclaim/nodeclaim.go -index 6b3719b..60b4631 100644 ---- a/karpenter/vendor/github.com/Azure/karpenter-provider-azure/pkg/utils/nodeclaim/nodeclaim.go -+++ b/karpenter/vendor/github.com/Azure/karpenter-provider-azure/pkg/utils/nodeclaim/nodeclaim.go -@@ -24,6 +24,7 @@ import ( - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/client" -+ "sigs.k8s.io/controller-runtime/pkg/predicate" - karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" - - armcompute "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v7" -@@ -33,6 +34,25 @@ import ( - "github.com/Azure/karpenter-provider-azure/pkg/utils" - ) - -+// UsingAKSNodeClass tells if the giving node claim referencing AKS node class. -+func UsingAKSNodeClass(nodeClaim *karpv1.NodeClaim) bool { -+ if nodeClaim.Spec.NodeClassRef == nil { -+ return false -+ } -+ return nodeClaim.Spec.NodeClassRef.Kind == "AKSNodeClass" -+} -+ -+// UsingAKSNodeClassPredicate creates a predicate to filter node claim using AKS node class. -+func UsingAKSNodeClassPredicate() predicate.Funcs { -+ return predicate.NewPredicateFuncs(func(object client.Object) bool { -+ nodeClaim, ok := object.(*karpv1.NodeClaim) -+ if !ok { -+ return false -+ } -+ return UsingAKSNodeClass(nodeClaim) -+ }) -+} -+ - // GetAKSNodeClass resolves the AKSNodeClass from the NodeClaim's NodeClassRef. - // If the NodeClass for the nodeClaim has DeletionTimestamp set, an error is returned. - func GetAKSNodeClass(ctx context.Context, kubeClient client.Client, nodeClaim *karpv1.NodeClaim) (*v1beta1.AKSNodeClass, error) { diff --git a/karpenter/patches/003-karpenter-provider-azure-wait-for-extra-crds.diff b/karpenter/patches/003-karpenter-provider-azure-wait-for-extra-crds.diff deleted file mode 100644 index 51f780e..0000000 --- a/karpenter/patches/003-karpenter-provider-azure-wait-for-extra-crds.diff +++ /dev/null @@ -1,29 +0,0 @@ -diff --git a/karpenter/vendor/github.com/Azure/karpenter-provider-azure/pkg/operator/operator.go b/karpenter/vendor/github.com/Azure/karpenter-provider-azure/pkg/operator/operator.go -index 397760b..143d58b 100644 ---- a/karpenter/vendor/github.com/Azure/karpenter-provider-azure/pkg/operator/operator.go -+++ b/karpenter/vendor/github.com/Azure/karpenter-provider-azure/pkg/operator/operator.go -@@ -314,7 +314,13 @@ func getVnetGUID(ctx context.Context, creds azcore.TokenCredential, cfg *auth.Co - } - - // WaitForCRDs waits for the required CRDs to be available with a timeout --func WaitForCRDs(ctx context.Context, timeout time.Duration, config *rest.Config, log logr.Logger) error { -+func WaitForCRDs( -+ ctx context.Context, -+ timeout time.Duration, -+ config *rest.Config, -+ log logr.Logger, -+ otherObjs ...runtime.Object, -+) error { - gvk := func(obj runtime.Object) schema.GroupVersionKind { - return lo.Must(apiutil.GVKForObject(obj, scheme.Scheme)) - } -@@ -323,6 +329,9 @@ func WaitForCRDs(ctx context.Context, timeout time.Duration, config *rest.Config - gvk(&karpv1.NodeClaim{}), - gvk(&v1beta1.AKSNodeClass{}), - } -+ for _, o := range otherObjs { -+ requiredGVKs = append(requiredGVKs, gvk(o)) -+ } - - client, err := rest.HTTPClientFor(config) - if err != nil { diff --git a/karpenter/patches/004-karpenter-taints.diff b/karpenter/patches/004-karpenter-taints.diff deleted file mode 100644 index 528902b..0000000 --- a/karpenter/patches/004-karpenter-taints.diff +++ /dev/null @@ -1,21 +0,0 @@ -diff --git a/karpenter/vendor/sigs.k8s.io/karpenter/pkg/scheduling/taints.go b/karpenter/vendor/sigs.k8s.io/karpenter/pkg/scheduling/taints.go -index d19c17b..ea1d167 100644 ---- a/karpenter/vendor/sigs.k8s.io/karpenter/pkg/scheduling/taints.go -+++ b/karpenter/vendor/sigs.k8s.io/karpenter/pkg/scheduling/taints.go -@@ -25,6 +25,7 @@ import ( - corev1 "k8s.io/api/core/v1" - cloudproviderapi "k8s.io/cloud-provider/api" - -+ "sigs.k8s.io/karpenter/pkg/operator/logging" - "sigs.k8s.io/karpenter/pkg/utils/pretty" - - v1 "sigs.k8s.io/karpenter/pkg/apis/v1" -@@ -55,7 +56,7 @@ func (ts Taints) Tolerates(tolerations []corev1.Toleration) (errs error) { - taint := ts[i] - tolerates := false - for _, t := range tolerations { -- tolerates = tolerates || t.ToleratesTaint(&taint) -+ tolerates = tolerates || t.ToleratesTaint(logging.NopLogger, &taint, true) - } - if !tolerates { - errs = multierr.Append(errs, serrors.Wrap(fmt.Errorf("did not tolerate taint"), "taint", pretty.Taint(taint))) diff --git a/karpenter/pkg/apis/crds/flex.aks.azure.com_azureflexnodeclasses.yaml b/karpenter/pkg/apis/crds/flex.aks.azure.com_azureflexnodeclasses.yaml new file mode 100644 index 0000000..4742726 --- /dev/null +++ b/karpenter/pkg/apis/crds/flex.aks.azure.com_azureflexnodeclasses.yaml @@ -0,0 +1,214 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.1 + name: azureflexnodeclasses.flex.aks.azure.com +spec: + group: flex.aks.azure.com + names: + categories: + - karpenter + - nap + kind: AzureFlexNodeClass + listKind: AzureFlexNodeClassList + plural: azureflexnodeclasses + shortNames: + - afnc + - afncs + singular: azureflexnodeclass + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .status.conditions[?(@.type=='Ready')].status + name: Ready + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + AzureFlexNodeClass is the Schema for the AzureFlexNodeClass API. + + It enables a NodePool in an AKS cluster to auto-provision external Azure VMs in a + (potentially different) Azure region than the AKS cluster's own region. Each node + is a single VM (not VMSS) so that cross-region placement is straightforward. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + AzureFlexNodeClassSpec is the spec for AzureFlexNodeClass. + + Phase 1 scope (issue #63): single region per NodeClass, no spot, no zones, + no identity/UAMI per-NodeClass (the controller MI is assumed to have + Contributor on the target subscription/RG/subnet), no quota preflight, + no PPG/capacity reservation, no spot, no WireGuard. + properties: + allocateNodePublicIP: + default: false + description: AllocateNodePublicIP controls whether each node receives + a public IP. + type: boolean + imageID: + description: ImageID is a SIG / community gallery image resource ID. + Mutually exclusive with ImageReference. + type: string + imageReference: + description: |- + ImageReference selects an Azure Marketplace image. Mutually exclusive with ImageID. + If neither is set, defaults to microsoft-dsvm/ubuntu-hpc/2204/latest. + properties: + offer: + type: string + publisher: + type: string + sku: + type: string + version: + default: latest + type: string + required: + - offer + - publisher + - sku + type: object + location: + description: Location is the Azure region (e.g. "eastus2"). May differ + from the AKS cluster region. + type: string + maxPodsPerNode: + default: 110 + description: MaxPodsPerNode is advertised in the node's capacity and + affects Karpenter scheduling. + format: int32 + type: integer + osDiskSizeGB: + default: 128 + description: OSDiskSizeGB is the size of the OS disk in GB. + format: int32 + type: integer + resourceGroup: + description: |- + ResourceGroup is the resource group where VMs, NICs, and OS disks land. + Must already exist. + type: string + securityType: + default: Standard + description: |- + SecurityType selects the VM security profile. Currently only "Standard" is supported. + TrustedLaunch is deferred — it has been observed to break the DSVM image. + enum: + - Standard + type: string + sshPublicKeys: + description: SSHPublicKeys is the list of SSH public keys to install + on each node. + items: + type: string + type: array + subnetID: + description: |- + SubnetID is the full ARM resource ID of the subnet (must already exist + and be reachable from the AKS cluster). + type: string + subscriptionID: + description: SubscriptionID is the Azure subscription where VMs will + be created. + type: string + tags: + additionalProperties: + type: string + description: Tags are applied to every Azure resource (VM, NIC, OS + disk) created from this NodeClass. + type: object + required: + - location + - resourceGroup + - subnetID + - subscriptionID + type: object + status: + description: status contains the resolved state of the AzureFlexNodeClass. + properties: + conditions: + description: conditions contains signals for health and readiness + items: + description: Condition aliases the upstream type and adds additional + helper methods + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/karpenter/pkg/apis/crds/flex.aks.azure.com_nebiusnodeclasses.yaml b/karpenter/pkg/apis/crds/flex.aks.azure.com_nebiusnodeclasses.yaml index a216063..daa430b 100644 --- a/karpenter/pkg/apis/crds/flex.aks.azure.com_nebiusnodeclasses.yaml +++ b/karpenter/pkg/apis/crds/flex.aks.azure.com_nebiusnodeclasses.yaml @@ -53,6 +53,13 @@ spec: allocateNodePublicIP: default: false type: boolean + maxPodsPerNode: + default: 110 + description: |- + MaxPodsPerNode is the maximum number of pods that can be scheduled on a single node. + This value is advertised in the node's capacity and affects Karpenter's scheduling decisions. + format: int32 + type: integer osDiskImageFamily: default: ubuntu24.04-driverless type: string @@ -61,13 +68,6 @@ spec: description: OSDiskSizeGB is the size of the OS disk in GB. format: int32 type: integer - maxPodsPerNode: - default: 110 - description: |- - MaxPodsPerNode is the maximum number of pods that can be scheduled on a single node. - This value is advertised in the node's capacity and affects Karpenter's scheduling decisions. - format: int32 - type: integer projectID: description: ProjectID is the nebius project id to launch nodes in. type: string diff --git a/karpenter/pkg/apis/v1alpha1/azureflex.go b/karpenter/pkg/apis/v1alpha1/azureflex.go new file mode 100644 index 0000000..84d8f97 --- /dev/null +++ b/karpenter/pkg/apis/v1alpha1/azureflex.go @@ -0,0 +1,142 @@ +package v1alpha1 + +import ( + "github.com/awslabs/operatorpkg/status" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// AzureFlexNodeClass is the Schema for the AzureFlexNodeClass API. +// +// It enables a NodePool in an AKS cluster to auto-provision external Azure VMs in a +// (potentially different) Azure region than the AKS cluster's own region. Each node +// is a single VM (not VMSS) so that cross-region placement is straightforward. +// +// +kubebuilder:object:root=true +// +kubebuilder:resource:path=azureflexnodeclasses,scope=Cluster,shortName={afnc,afncs},categories={karpenter,nap} +// +kubebuilder:printcolumn:name="Ready",type=string,JSONPath=".status.conditions[?(@.type=='Ready')].status" +// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=".metadata.creationTimestamp" +// +kubebuilder:storageversion +// +kubebuilder:subresource:status +type AzureFlexNodeClass struct { + metav1.TypeMeta `json:",inline"` + // metadata is standard object metadata. + // +optional + metav1.ObjectMeta `json:"metadata,omitempty"` + + // +optional + Spec AzureFlexNodeClassSpec `json:"spec,omitempty"` + + // status contains the resolved state of the AzureFlexNodeClass. + // +optional + Status AzureFlexNodeClassStatus `json:"status,omitempty"` +} + +var _ status.Object = (*AzureFlexNodeClass)(nil) + +func (s *AzureFlexNodeClass) GetConditions() []status.Condition { + return s.Status.Conditions +} + +func (s *AzureFlexNodeClass) SetConditions(conditions []status.Condition) { + s.Status.Conditions = conditions +} + +func (s *AzureFlexNodeClass) StatusConditions() status.ConditionSet { + conds := []string{ + ConditionTypeValidationSucceeded, + } + return status.NewReadyConditions(conds...).For(s) +} + +// AzureFlexImageReference selects an Azure Marketplace image. +// Mutually exclusive with AzureFlexNodeClassSpec.ImageID. +type AzureFlexImageReference struct { + // +required + Publisher string `json:"publisher"` + // +required + Offer string `json:"offer"` + // +required + SKU string `json:"sku"` + // +optional + // +default="latest" + Version string `json:"version,omitempty"` +} + +// AzureFlexNodeClassSpec is the spec for AzureFlexNodeClass. +// +// Phase 1 scope (issue #63): single region per NodeClass, no spot, no zones, +// no identity/UAMI per-NodeClass (the controller MI is assumed to have +// Contributor on the target subscription/RG/subnet), no quota preflight, +// no PPG/capacity reservation, no spot, no WireGuard. +type AzureFlexNodeClassSpec struct { + // SubscriptionID is the Azure subscription where VMs will be created. + // +required + SubscriptionID string `json:"subscriptionID"` + + // Location is the Azure region (e.g. "eastus2"). May differ from the AKS cluster region. + // +required + Location string `json:"location"` + + // ResourceGroup is the resource group where VMs, NICs, and OS disks land. + // Must already exist. + // +required + ResourceGroup string `json:"resourceGroup"` + + // SubnetID is the full ARM resource ID of the subnet (must already exist + // and be reachable from the AKS cluster). + // +required + SubnetID string `json:"subnetID"` + + // ImageReference selects an Azure Marketplace image. Mutually exclusive with ImageID. + // If neither is set, defaults to microsoft-dsvm/ubuntu-hpc/2204/latest. + // +optional + ImageReference *AzureFlexImageReference `json:"imageReference,omitempty"` + + // ImageID is a SIG / community gallery image resource ID. Mutually exclusive with ImageReference. + // +optional + ImageID *string `json:"imageID,omitempty"` + + // SecurityType selects the VM security profile. Currently only "Standard" is supported. + // TrustedLaunch is deferred — it has been observed to break the DSVM image. + // +optional + // +default="Standard" + // +kubebuilder:validation:Enum=Standard + SecurityType *string `json:"securityType,omitempty"` + + // OSDiskSizeGB is the size of the OS disk in GB. + // +optional + // +default=128 + OSDiskSizeGB *int32 `json:"osDiskSizeGB,omitempty"` + + // SSHPublicKeys is the list of SSH public keys to install on each node. + // +optional + SSHPublicKeys []string `json:"sshPublicKeys,omitempty"` + + // AllocateNodePublicIP controls whether each node receives a public IP. + // +optional + // +default=false + AllocateNodePublicIP *bool `json:"allocateNodePublicIP,omitempty"` + + // MaxPodsPerNode is advertised in the node's capacity and affects Karpenter scheduling. + // +optional + // +default=110 + MaxPodsPerNode *int32 `json:"maxPodsPerNode,omitempty"` + + // Tags are applied to every Azure resource (VM, NIC, OS disk) created from this NodeClass. + // +optional + Tags map[string]string `json:"tags,omitempty"` +} + +type AzureFlexNodeClassStatus struct { + // conditions contains signals for health and readiness + // +optional + //nolint:kubeapilinter // conditions: using status.Condition from operatorpkg instead of metav1.Condition for compatibility + Conditions []status.Condition `json:"conditions,omitempty"` +} + +// +kubebuilder:object:root=true +type AzureFlexNodeClassList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []AzureFlexNodeClass `json:"items"` +} diff --git a/karpenter/pkg/apis/v1alpha1/doc.go b/karpenter/pkg/apis/v1alpha1/doc.go index 881f9c4..c50c6b9 100644 --- a/karpenter/pkg/apis/v1alpha1/doc.go +++ b/karpenter/pkg/apis/v1alpha1/doc.go @@ -17,6 +17,8 @@ var ( scheme.AddKnownTypes(SchemeGroupVersion, &NebiusNodeClass{}, &NebiusNodeClassList{}, + &AzureFlexNodeClass{}, + &AzureFlexNodeClassList{}, ) metav1.AddToGroupVersion(scheme, SchemeGroupVersion) return nil diff --git a/karpenter/pkg/apis/v1alpha1/labels.go b/karpenter/pkg/apis/v1alpha1/labels.go index 3cd96da..f861e73 100644 --- a/karpenter/pkg/apis/v1alpha1/labels.go +++ b/karpenter/pkg/apis/v1alpha1/labels.go @@ -4,4 +4,9 @@ import "github.com/Azure/aks-flex/karpenter/pkg/apis" const ( TerminationFinalizer = apis.Group + "/termination" + + // AzureFlexNodeClassHashAnnotation stores the deterministic hash of the + // AzureFlexNodeClass spec at the time a NodeClaim was created. The + // CloudProvider compares it against the current spec hash to compute drift. + AzureFlexNodeClassHashAnnotation = apis.Group + "/azureflex-nodeclass-hash" ) diff --git a/karpenter/pkg/apis/v1alpha1/zz_generated.deepcopy.go b/karpenter/pkg/apis/v1alpha1/zz_generated.deepcopy.go index 3a6a61f..b24a064 100644 --- a/karpenter/pkg/apis/v1alpha1/zz_generated.deepcopy.go +++ b/karpenter/pkg/apis/v1alpha1/zz_generated.deepcopy.go @@ -9,6 +9,159 @@ import ( "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AzureFlexImageReference) DeepCopyInto(out *AzureFlexImageReference) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AzureFlexImageReference. +func (in *AzureFlexImageReference) DeepCopy() *AzureFlexImageReference { + if in == nil { + return nil + } + out := new(AzureFlexImageReference) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AzureFlexNodeClass) DeepCopyInto(out *AzureFlexNodeClass) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AzureFlexNodeClass. +func (in *AzureFlexNodeClass) DeepCopy() *AzureFlexNodeClass { + if in == nil { + return nil + } + out := new(AzureFlexNodeClass) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *AzureFlexNodeClass) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AzureFlexNodeClassList) DeepCopyInto(out *AzureFlexNodeClassList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]AzureFlexNodeClass, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AzureFlexNodeClassList. +func (in *AzureFlexNodeClassList) DeepCopy() *AzureFlexNodeClassList { + if in == nil { + return nil + } + out := new(AzureFlexNodeClassList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *AzureFlexNodeClassList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AzureFlexNodeClassSpec) DeepCopyInto(out *AzureFlexNodeClassSpec) { + *out = *in + if in.ImageReference != nil { + in, out := &in.ImageReference, &out.ImageReference + *out = new(AzureFlexImageReference) + **out = **in + } + if in.ImageID != nil { + in, out := &in.ImageID, &out.ImageID + *out = new(string) + **out = **in + } + if in.SecurityType != nil { + in, out := &in.SecurityType, &out.SecurityType + *out = new(string) + **out = **in + } + if in.OSDiskSizeGB != nil { + in, out := &in.OSDiskSizeGB, &out.OSDiskSizeGB + *out = new(int32) + **out = **in + } + if in.SSHPublicKeys != nil { + in, out := &in.SSHPublicKeys, &out.SSHPublicKeys + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.AllocateNodePublicIP != nil { + in, out := &in.AllocateNodePublicIP, &out.AllocateNodePublicIP + *out = new(bool) + **out = **in + } + if in.MaxPodsPerNode != nil { + in, out := &in.MaxPodsPerNode, &out.MaxPodsPerNode + *out = new(int32) + **out = **in + } + if in.Tags != nil { + in, out := &in.Tags, &out.Tags + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AzureFlexNodeClassSpec. +func (in *AzureFlexNodeClassSpec) DeepCopy() *AzureFlexNodeClassSpec { + if in == nil { + return nil + } + out := new(AzureFlexNodeClassSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AzureFlexNodeClassStatus) DeepCopyInto(out *AzureFlexNodeClassStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]status.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AzureFlexNodeClassStatus. +func (in *AzureFlexNodeClassStatus) DeepCopy() *AzureFlexNodeClassStatus { + if in == nil { + return nil + } + out := new(AzureFlexNodeClassStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NebiusNodeClass) DeepCopyInto(out *NebiusNodeClass) { *out = *in diff --git a/karpenter/pkg/cloudproviders/azure/api.go b/karpenter/pkg/cloudproviders/azure/api.go new file mode 100644 index 0000000..216c037 --- /dev/null +++ b/karpenter/pkg/cloudproviders/azure/api.go @@ -0,0 +1,67 @@ +package azure + +import ( + "errors" + "strings" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +// IsNotFound returns true if err signals "resource does not exist". The error +// can flow back from either the gRPC plugin (NotFound) or directly from the +// Azure ARM SDK (HTTP 404). +func IsNotFound(err error) bool { + if err == nil { + return false + } + if s, ok := status.FromError(err); ok && s.Code() == codes.NotFound { + return true + } + var rerr *azcore.ResponseError + if errors.As(err, &rerr) && rerr.StatusCode == 404 { + return true + } + return false +} + +// IsTypeMismatch returns true if err indicates the plugin returned an object of +// a different concrete protobuf type than the caller expected. +func IsTypeMismatch(err error) bool { + if err == nil { + return false + } + s, ok := status.FromError(err) + return ok && s.Code() == codes.InvalidArgument && strings.Contains(s.Message(), "type mismatch") +} + +// IsQuotaError returns true if err signals an Azure quota / capacity exhaustion. +// We classify both HTTP 429 and the well-known Azure ARM error codes. +func IsQuotaError(err error) bool { + if err == nil { + return false + } + var rerr *azcore.ResponseError + if errors.As(err, &rerr) { + if rerr.StatusCode == 429 { + return true + } + switch rerr.ErrorCode { + case "QuotaExceeded", + "OperationNotAllowed", + "SkuNotAvailable", + "AllocationFailed", + "ZonalAllocationFailed", + "OverconstrainedAllocationRequest": + return true + } + } + // Fallback substring match — covers gRPC-wrapped error strings and any + // codes the SDK didn't surface structurally. + msg := err.Error() + return strings.Contains(msg, "QuotaExceeded") || + strings.Contains(msg, "OperationNotAllowed") || + strings.Contains(msg, "SkuNotAvailable") || + strings.Contains(msg, "AllocationFailed") +} diff --git a/karpenter/pkg/cloudproviders/azure/api_test.go b/karpenter/pkg/cloudproviders/azure/api_test.go new file mode 100644 index 0000000..c684818 --- /dev/null +++ b/karpenter/pkg/cloudproviders/azure/api_test.go @@ -0,0 +1,55 @@ +package azure + +import ( + "fmt" + "testing" + + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +func TestIsTypeMismatch(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + err error + want bool + }{ + { + name: "exact type mismatch status", + err: status.Error(codes.InvalidArgument, "type mismatch"), + want: true, + }, + { + name: "wrapped type mismatch status", + err: fmt.Errorf("wrap: %w", status.Error(codes.InvalidArgument, "type mismatch")), + want: true, + }, + { + name: "other invalid argument", + err: status.Error(codes.InvalidArgument, "bad request"), + want: false, + }, + { + name: "not found", + err: status.Error(codes.NotFound, "not found"), + want: false, + }, + { + name: "nil", + err: nil, + want: false, + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + if got := IsTypeMismatch(tc.err); got != tc.want { + t.Fatalf("IsTypeMismatch(%v) = %v, want %v", tc.err, got, tc.want) + } + }) + } +} diff --git a/karpenter/pkg/cloudproviders/azure/cloudprovider.go b/karpenter/pkg/cloudproviders/azure/cloudprovider.go new file mode 100644 index 0000000..99fc013 --- /dev/null +++ b/karpenter/pkg/cloudproviders/azure/cloudprovider.go @@ -0,0 +1,356 @@ +// Package azure implements the AzureFlex cross-region cloud provider for +// Karpenter. It is distinct from the upstream Azure/karpenter-provider-azure +// (which only supports VMs in the same Azure region as the AKS cluster) and +// from the in-tree AKS provider wired up alongside it in cmd/controller/main.go. +// +// The provider talks to a colocated plugin gRPC service (flexvm) that performs +// the actual Azure API calls. This isolates the Karpenter controller from +// Azure SDK details and lets the plugin run with its own (Contributor-scoped) +// managed identity. +package azure + +import ( + "context" + "errors" + "fmt" + "sync" + "time" + + karpoptions "github.com/Azure/karpenter-provider-azure/pkg/operator/options" + "github.com/Azure/karpenter-provider-azure/pkg/utils" + "github.com/awslabs/operatorpkg/status" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + grpcstatus "google.golang.org/grpc/status" + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + v1 "sigs.k8s.io/karpenter/pkg/apis/v1" + corecloudprovider "sigs.k8s.io/karpenter/pkg/cloudprovider" + + pluginapi "github.com/Azure/aks-flex/plugin/api" + stretchhelper "github.com/Azure/aks-flex/plugin/pkg/helper" + stretchservices "github.com/Azure/aks-flex/plugin/pkg/services" + agentpoolsapi "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/api" + flexvm "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/azure/flexvm" + + "github.com/Azure/aks-flex/karpenter/pkg/apis" + "github.com/Azure/aks-flex/karpenter/pkg/apis/v1alpha1" + "github.com/Azure/aks-flex/karpenter/pkg/cloudproviders" + "github.com/Azure/aks-flex/karpenter/pkg/cloudproviders/azure/instancetype" +) + +const incompleteAgentPoolCleanupDelay = 30 * time.Minute + +type CloudProvider struct { + stretchPluginConn *grpc.ClientConn + stretchAgentPoolsClient agentpoolsapi.AgentPoolsClient + + kubeClient client.Client + + // clusterCA is captured once at startup. Per the rubber-duck note, the + // AKS bootstrap secret lookup (kubeadm.FromAKS) is "exactly one secret per + // call", so anything cluster-wide we need is captured at struct init — + // we do not re-fetch on every Create. + clusterCA []byte + + instanceTypeProvider *instancetype.Provider + + cleanupInFlight sync.Map +} + +var flexAgentPoolTypeURL = "type.googleapis.com/" + string((&flexvm.AgentPool{}).ProtoReflect().Descriptor().FullName()) + +func newCloudProvider( + stretchPluginConn *grpc.ClientConn, + kubeClient client.Client, + clusterCA []byte, +) *CloudProvider { + return &CloudProvider{ + stretchPluginConn: stretchPluginConn, + stretchAgentPoolsClient: agentpoolsapi.NewAgentPoolsClient(stretchPluginConn), + kubeClient: kubeClient, + clusterCA: clusterCA, + instanceTypeProvider: instancetype.NewProvider(), + } +} + +// Register installs the AzureFlex provider into the multiplexing hub. +func Register( + ctx context.Context, + hub *cloudproviders.CloudProvidersHub, + kubeClient client.Client, + clusterCA []byte, +) error { + stretchPluginConn, err := stretchservices.NewConnection() + if err != nil { + return fmt.Errorf("creating stretch plugin connection: %w", err) + } + cp := newCloudProvider(stretchPluginConn, kubeClient, clusterCA) + hub.Register(cp, GroupKind, ProviderIDScheme) + return nil +} + +var _ corecloudprovider.CloudProvider = (*CloudProvider)(nil) + +func (c *CloudProvider) getNodeClass( + ctx context.Context, + ref *v1.NodeClassReference, +) (*v1alpha1.AzureFlexNodeClass, error) { + if ref == nil { + return nil, errors.New("nodeClaim must reference a node class") + } + if ref.Group != apis.Group { + return nil, fmt.Errorf("nodeClassRef %s in group %q, expected %q", ref.Name, ref.Group, apis.Group) + } + + rv := &v1alpha1.AzureFlexNodeClass{} + if err := c.kubeClient.Get(ctx, client.ObjectKey{Name: ref.Name}, rv); err != nil { + return nil, fmt.Errorf("getting AzureFlexNodeClass %s: %w", ref.Name, err) + } + if !rv.DeletionTimestamp.IsZero() { + return nil, utils.NewTerminatingResourceError( + schema.GroupResource{Group: apis.Group, Resource: "azureflexnodeclasses"}, + rv.Name, + ) + } + return rv, nil +} + +func (c *CloudProvider) instanceTypeKey(nc *v1alpha1.AzureFlexNodeClass) instancetype.NodeClassKey { + osDisk := int64(128) + if nc.Spec.OSDiskSizeGB != nil { + osDisk = int64(*nc.Spec.OSDiskSizeGB) + } + pods := instancetype.DefaultPerNodePodsCount + if nc.Spec.MaxPodsPerNode != nil { + pods = *nc.Spec.MaxPodsPerNode + } + return instancetype.NodeClassKey{ + Region: nc.Spec.Location, + OSDiskSizeGiB: osDisk, + PerNodePodsCount: pods, + } +} + +func (c *CloudProvider) Create(ctx context.Context, nodeClaim *v1.NodeClaim) (*v1.NodeClaim, error) { + logger := log.FromContext(ctx).WithValues("nodeClaim", nodeClaim.Name) + logger.Info("creating azure-flex VM for nodeClaim") + + nodeClass, err := c.getNodeClass(ctx, nodeClaim.Spec.NodeClassRef) + if err != nil { + return nil, err + } + + key := c.instanceTypeKey(nodeClass) + it, err := c.instanceTypeProvider.ResolveFromNodeClaim(key, nodeClaim.Spec.Requirements) + if err != nil { + // Schedule-time error: the requested SKU isn't in the Phase 1 catalog. + return nil, corecloudprovider.NewInsufficientCapacityError(err) + } + logger.Info("resolved instance type", "instanceType", it.Name) + + agentPool := nodeClaimToAgentPool( + karpoptions.FromContext(ctx), + c.clusterCA, + nodeClass, + nodeClaim, + it, + ) + created, err := stretchhelper.CreateOrUpdate( + c.stretchAgentPoolsClient.CreateOrUpdate, + ctx, agentPool, + ) + if err != nil { + if IsQuotaError(err) { + c.cleanupAgentPoolInBackground(ctx, nodeClaim.Name, "quota/capacity create failure") + return nil, corecloudprovider.NewInsufficientCapacityError(err) + } + return nil, fmt.Errorf("creating azure-flex agent pool: %w", err) + } + + // Stamp the NodeClass drift hash onto the returned NodeClaim so that + // IsDrifted can detect spec changes later. Without this annotation the + // drift check silently no-ops. + out := agentPoolToNodeClaim(created, it) + if out.Annotations == nil { + out.Annotations = map[string]string{} + } + out.Annotations[v1alpha1.AzureFlexNodeClassHashAnnotation] = driftHash(nodeClass.Spec) + return out, nil +} + +func (c *CloudProvider) Delete(ctx context.Context, nodeClaim *v1.NodeClaim) error { + return c.deleteAgentPool(ctx, nodeClaim.Name) +} + +func (c *CloudProvider) deleteAgentPool(ctx context.Context, name string) error { + logger := log.FromContext(ctx).WithValues("agentPool", name) + // Per CloudProvider.Delete contract: signal NodeClaimNotFoundError if the + // remote resource is already gone (so karpenter knows it's safe to drop). + if _, err := c.getFlexAgentPool(ctx, name); err != nil { + if IsNotFound(err) || IsTypeMismatch(err) { + return corecloudprovider.NewNodeClaimNotFoundError(err) + } + // Non-NotFound get failure: log and proceed with delete in best effort. + logger.V(5).Error(err, "getting agent pool for nodeClaim, proceeding to delete") + } + + if err := stretchhelper.Delete( + c.stretchAgentPoolsClient.Delete, + ctx, name, + ); err != nil { + if IsNotFound(err) || IsTypeMismatch(err) { + return corecloudprovider.NewNodeClaimNotFoundError(err) + } + return fmt.Errorf("deleting azure-flex agent pool: %w", err) + } + logger.Info("deleted azure-flex agent pool") + return nil +} + +func (c *CloudProvider) Get(ctx context.Context, providerID string) (*v1.NodeClaim, error) { + name, err := providerIDToVMName(providerID) + if err != nil { + return nil, err + } + ap, err := c.getFlexAgentPool(ctx, name) + if err != nil { + if IsNotFound(err) || IsTypeMismatch(err) { + return nil, corecloudprovider.NewNodeClaimNotFoundError(err) + } + return nil, err + } + // We don't have the NodeClass here (Get is called by reconcilers that may + // not have a class on hand) — pass nil instanceType and accept missing + // well-known labels. They'll be repopulated by the next Create-flow Get. + return agentPoolToNodeClaim(ap, nil), nil +} + +func (c *CloudProvider) getFlexAgentPool(ctx context.Context, id string) (*flexvm.AgentPool, error) { + req := &pluginapi.GetRequest{} + req.SetId(id) + resp, err := c.stretchAgentPoolsClient.Get(ctx, req) + if err != nil { + return nil, err + } + return flexAgentPoolFromGetResponse(resp) +} + +func flexAgentPoolFromGetResponse(resp *pluginapi.GetResponse) (*flexvm.AgentPool, error) { + if resp == nil || resp.GetItem() == nil { + return nil, grpcstatus.Error(codes.NotFound, "") + } + if resp.GetItem().GetTypeUrl() != flexAgentPoolTypeURL { + return nil, grpcstatus.Error(codes.NotFound, "") + } + return stretchhelper.AnyTo[*flexvm.AgentPool](resp.GetItem()) +} + +func (c *CloudProvider) List(ctx context.Context) ([]*v1.NodeClaim, error) { + aps, err := stretchhelper.ListByType[*flexvm.AgentPool]( + c.stretchAgentPoolsClient.List, + ctx, "", + ) + if err != nil { + return nil, err + } + out := make([]*v1.NodeClaim, 0, len(aps)) + now := time.Now() + for _, ap := range aps { + if ap.GetStatus().GetVmResourceId() == "" { + if shouldCleanupIncompleteAgentPool(ap, now) { + c.cleanupAgentPoolInBackground(ctx, ap.GetMetadata().GetId(), "stale incomplete agent pool") + } + continue + } + out = append(out, agentPoolToNodeClaim(ap, nil)) + } + return out, nil +} + +func shouldCleanupIncompleteAgentPool(ap *flexvm.AgentPool, now time.Time) bool { + if ap.GetStatus().GetVmResourceId() != "" { + return false + } + createdAt := ap.GetStatus().GetCreatedAt() + if createdAt == nil { + return true + } + return !createdAt.AsTime().Add(incompleteAgentPoolCleanupDelay).After(now) +} + +func (c *CloudProvider) cleanupAgentPoolInBackground(ctx context.Context, name, reason string) { + if name == "" { + return + } + if _, loaded := c.cleanupInFlight.LoadOrStore(name, struct{}{}); loaded { + return + } + + logger := log.FromContext(ctx).WithValues("agentPool", name, "reason", reason) + logger.Info("starting azure-flex agent pool cleanup") + go func() { + defer c.cleanupInFlight.Delete(name) + + cleanupCtx, cancel := context.WithTimeout(context.Background(), 8*time.Minute) + defer cancel() + + if err := c.deleteAgentPool(cleanupCtx, name); err != nil && !corecloudprovider.IsNodeClaimNotFoundError(err) { + logger.Error(err, "cleaning up azure-flex agent pool") + return + } + logger.Info("cleaned up azure-flex agent pool") + }() +} + +func (c *CloudProvider) GetInstanceTypes(ctx context.Context, nodePool *v1.NodePool) ([]*corecloudprovider.InstanceType, error) { + logger := loggerFromContext(ctx).WithValues("nodePool", nodePool.Name) + + nodeClass, err := c.getNodeClass(ctx, nodePool.Spec.Template.Spec.NodeClassRef) + if err != nil { + return nil, fmt.Errorf("getting node class for node pool: %w", err) + } + + its := c.instanceTypeProvider.GetInstanceTypes(c.instanceTypeKey(nodeClass)) + logger.V(5).Info("listed instance types", "count", len(its)) + return its, nil +} + +func (c *CloudProvider) GetSupportedNodeClasses() []status.Object { + return []status.Object{ + &v1alpha1.AzureFlexNodeClass{}, + } +} + +func (c *CloudProvider) IsDrifted(ctx context.Context, nodeClaim *v1.NodeClaim) (corecloudprovider.DriftReason, error) { + if nodeClaim.Spec.NodeClassRef == nil { + return "", nil + } + nc := &v1alpha1.AzureFlexNodeClass{} + if err := c.kubeClient.Get(ctx, client.ObjectKey{Name: nodeClaim.Spec.NodeClassRef.Name}, nc); err != nil { + return "", client.IgnoreNotFound(err) + } + + current := driftHash(nc.Spec) + prior := nodeClaim.Annotations[v1alpha1.AzureFlexNodeClassHashAnnotation] + if prior != "" && prior != current { + return corecloudprovider.DriftReason("AzureFlexNodeClassChanged"), nil + } + return "", nil +} + +func (c *CloudProvider) Name() string { + return ProviderIDScheme +} + +func (c *CloudProvider) RepairPolicies() []corecloudprovider.RepairPolicy { + return []corecloudprovider.RepairPolicy{} +} + +func (c *CloudProvider) Close(context.Context) error { + if c.stretchPluginConn != nil { + return c.stretchPluginConn.Close() + } + return nil +} diff --git a/karpenter/pkg/cloudproviders/azure/cloudprovider_test.go b/karpenter/pkg/cloudproviders/azure/cloudprovider_test.go new file mode 100644 index 0000000..1d08fd7 --- /dev/null +++ b/karpenter/pkg/cloudproviders/azure/cloudprovider_test.go @@ -0,0 +1,89 @@ +package azure + +import ( + "testing" + + pluginapi "github.com/Azure/aks-flex/plugin/api" + "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/azure/flexvm" + "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/azure/ubuntu2404vmss" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" +) + +func TestFlexAgentPoolFromGetResponse(t *testing.T) { + t.Parallel() + + mkMeta := func(id string) *pluginapi.Metadata { + return pluginapi.Metadata_builder{Id: proto.String(id)}.Build() + } + mkFlexResp := func(id string) *pluginapi.GetResponse { + item, err := anypb.New(flexvm.AgentPool_builder{ + Metadata: mkMeta(id), + }.Build()) + if err != nil { + t.Fatalf("building flex anypb: %v", err) + } + return pluginapi.GetResponse_builder{Item: item}.Build() + } + mkVMSSResp := func(id string) *pluginapi.GetResponse { + item, err := anypb.New(ubuntu2404vmss.AgentPool_builder{ + Metadata: mkMeta(id), + }.Build()) + if err != nil { + t.Fatalf("building vmss anypb: %v", err) + } + return pluginapi.GetResponse_builder{Item: item}.Build() + } + + tests := []struct { + name string + resp *pluginapi.GetResponse + wantID string + wantErr bool + }{ + { + name: "nil response is not found", + resp: nil, + wantErr: true, + }, + { + name: "nil item is not found", + resp: pluginapi.GetResponse_builder{}.Build(), + wantErr: true, + }, + { + name: "wrong item type is not found", + resp: mkVMSSResp("node-1"), + wantErr: true, + }, + { + name: "flex agentpool item returns parsed object", + resp: mkFlexResp("node-2"), + wantID: "node-2", + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + got, err := flexAgentPoolFromGetResponse(tc.resp) + if tc.wantErr { + if err == nil { + t.Fatalf("expected error, got nil") + } + if !IsNotFound(err) { + t.Fatalf("expected NotFound-style error, got: %v", err) + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got.GetMetadata().GetId() != tc.wantID { + t.Fatalf("got id %q, want %q", got.GetMetadata().GetId(), tc.wantID) + } + }) + } +} diff --git a/karpenter/pkg/cloudproviders/azure/consts.go b/karpenter/pkg/cloudproviders/azure/consts.go new file mode 100644 index 0000000..91edac1 --- /dev/null +++ b/karpenter/pkg/cloudproviders/azure/consts.go @@ -0,0 +1,21 @@ +package azure + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + + "github.com/Azure/aks-flex/karpenter/pkg/apis" +) + +const ( + // ProviderIDScheme is the URL scheme used in NodeClaim.Status.ProviderID + // for instances managed by the Azure cross-region (flex) cloud provider. + // + // Distinct from "azure" (which the AKS in-region provider uses) so the + // Karpenter cloud-provider hub can multiplex correctly. + ProviderIDScheme = "azure-flex" +) + +var GroupKind = schema.GroupKind{ + Group: apis.Group, + Kind: "AzureFlexNodeClass", +} diff --git a/karpenter/pkg/cloudproviders/azure/instancetype/catalog.go b/karpenter/pkg/cloudproviders/azure/instancetype/catalog.go new file mode 100644 index 0000000..ea4c5f3 --- /dev/null +++ b/karpenter/pkg/cloudproviders/azure/instancetype/catalog.go @@ -0,0 +1,38 @@ +package instancetype + +// CatalogEntry is a Phase 1 hardcoded SKU description. We deliberately do NOT +// call Azure's SKU/quota APIs from the karpenter controller — letting ARM +// fail and classifying the error is simpler and correct for Phase 1 (issue #63). +// +// Future work: replace this with a dynamic provider backed by armcompute's +// resourceSkus client (and per-region offering refresh). +type CatalogEntry struct { + // Name is the Azure VM size, used as both the karpenter instance type + // name and the Azure VMSize when creating the VM. + Name string + + VCPU int64 + MemoryGB int64 + GPU int64 +} + +// Catalog is the hardcoded allowlist of SKUs that AzureFlexNodeClass +// NodePools may schedule onto in Phase 1. +var Catalog = []CatalogEntry{ + {Name: "Standard_ND96isr_H200_v5", VCPU: 96, MemoryGB: 1900, GPU: 8}, + {Name: "Standard_ND96isr_H100_v5", VCPU: 96, MemoryGB: 1900, GPU: 8}, + {Name: "Standard_ND96amsr_A100_v4", VCPU: 96, MemoryGB: 1900, GPU: 8}, + {Name: "Standard_NC40ads_H100_v5", VCPU: 40, MemoryGB: 320, GPU: 1}, + {Name: "Standard_NC24ads_A100_v4", VCPU: 24, MemoryGB: 220, GPU: 1}, + {Name: "Standard_D8s_v5", VCPU: 8, MemoryGB: 32, GPU: 0}, +} + +// Get returns the CatalogEntry for name, or nil if name is not in the catalog. +func Get(name string) *CatalogEntry { + for i := range Catalog { + if Catalog[i].Name == name { + return &Catalog[i] + } + } + return nil +} diff --git a/karpenter/pkg/cloudproviders/azure/instancetype/catalog_test.go b/karpenter/pkg/cloudproviders/azure/instancetype/catalog_test.go new file mode 100644 index 0000000..afcbf17 --- /dev/null +++ b/karpenter/pkg/cloudproviders/azure/instancetype/catalog_test.go @@ -0,0 +1,43 @@ +package instancetype + +import "testing" + +func TestCatalogContainsRequiredSKUs(t *testing.T) { + required := []string{ + "Standard_ND96isr_H200_v5", + "Standard_ND96amsr_A100_v4", + "Standard_NC40ads_H100_v5", + "Standard_NC24ads_A100_v4", + "Standard_D8s_v5", + } + for _, name := range required { + if Get(name) == nil { + t.Errorf("catalog missing required SKU %q", name) + } + } +} + +func TestCatalogGetUnknown(t *testing.T) { + if Get("Standard_DoesNotExist_v1") != nil { + t.Fatalf("Get must return nil for unknown SKU") + } +} + +func TestProviderResolveUnknownSKU(t *testing.T) { + // Bypass NodeClaim machinery: an instance type name not in the catalog + // must come back as a clean error from the provider. We exercise this + // path indirectly via GetByName since ResolveFromNodeClaim requires + // scheduling fixtures. + p := NewProvider() + if p.GetByName(NodeClassKey{Region: "eastus2"}, "Standard_DoesNotExist_v1") != nil { + t.Fatalf("GetByName must return nil for unknown SKU") + } +} + +func TestProviderListCount(t *testing.T) { + p := NewProvider() + its := p.GetInstanceTypes(NodeClassKey{Region: "eastus2", OSDiskSizeGiB: 128, PerNodePodsCount: 110}) + if len(its) != len(Catalog) { + t.Fatalf("GetInstanceTypes returned %d entries, want %d", len(its), len(Catalog)) + } +} diff --git a/karpenter/pkg/cloudproviders/azure/instancetype/instancetype.go b/karpenter/pkg/cloudproviders/azure/instancetype/instancetype.go new file mode 100644 index 0000000..b1f4d84 --- /dev/null +++ b/karpenter/pkg/cloudproviders/azure/instancetype/instancetype.go @@ -0,0 +1,102 @@ +package instancetype + +import ( + "fmt" + + azurev1beta1 "github.com/Azure/karpenter-provider-azure/pkg/apis/v1beta1" + azinstancetype "github.com/Azure/karpenter-provider-azure/pkg/providers/instancetype" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + karpcloudprovider "sigs.k8s.io/karpenter/pkg/cloudprovider" + "sigs.k8s.io/karpenter/pkg/scheduling" +) + +const ( + // Architecture is amd64 only for Phase 1 (all five catalog SKUs are amd64). + Architecture = "amd64" + + // DefaultPerNodePodsCount is used when AzureFlexNodeClass.Spec.MaxPodsPerNode is unset. + DefaultPerNodePodsCount int32 = 110 +) + +// NodeClassKey holds the AzureFlexNodeClass fields that affect instance-type +// shape. We use it as a value-type so it's hashable for any future caching. +type NodeClassKey struct { + Region string + OSDiskSizeGiB int64 + PerNodePodsCount int32 +} + +// New builds a Karpenter InstanceType from a CatalogEntry, NodeClass key, and +// pre-built offerings. We follow the nebius shape so labels and overhead match. +func New( + key NodeClassKey, + entry *CatalogEntry, + offerings karpcloudprovider.Offerings, +) *karpcloudprovider.InstanceType { + return &karpcloudprovider.InstanceType{ + Name: entry.Name, + Requirements: requirements(key.Region, entry, offerings), + Offerings: offerings, + Capacity: capacity(key, entry), + Overhead: overhead(entry), + } +} + +func requirements( + region string, + e *CatalogEntry, + offerings karpcloudprovider.Offerings, +) scheduling.Requirements { + // Single zone (empty) for Phase 1: cross-region zonal placement is deferred. + zones := []string{""} + for _, o := range offerings { + if zoneReq := o.Requirements.Get(corev1.LabelTopologyZone); zoneReq != nil { + zones = zoneReq.Values() + } + } + capacityTypes := []string{karpv1.CapacityTypeOnDemand} + + vCPU := fmt.Sprint(e.VCPU) + memMiB := fmt.Sprint(e.MemoryGB * 1024) + gpu := fmt.Sprint(e.GPU) + + return scheduling.NewRequirements( + scheduling.NewRequirement(corev1.LabelInstanceTypeStable, corev1.NodeSelectorOpIn, e.Name), + scheduling.NewRequirement(corev1.LabelTopologyZone, corev1.NodeSelectorOpIn, zones...), + scheduling.NewRequirement(corev1.LabelTopologyRegion, corev1.NodeSelectorOpIn, region), + scheduling.NewRequirement(corev1.LabelOSStable, corev1.NodeSelectorOpIn, string(corev1.Linux)), + scheduling.NewRequirement(corev1.LabelArchStable, corev1.NodeSelectorOpIn, Architecture), + scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, capacityTypes...), + // Azure-domain labels (mirrors karpenter-provider-azure conventions). + scheduling.NewRequirement(azurev1beta1.LabelSKUCPU, corev1.NodeSelectorOpIn, vCPU), + scheduling.NewRequirement(azurev1beta1.LabelSKUMemory, corev1.NodeSelectorOpIn, memMiB), + scheduling.NewRequirement(azurev1beta1.AKSLabelCPU, corev1.NodeSelectorOpIn, vCPU), + scheduling.NewRequirement(azurev1beta1.AKSLabelMemory, corev1.NodeSelectorOpIn, memMiB), + scheduling.NewRequirement(azurev1beta1.LabelSKUGPUCount, corev1.NodeSelectorOpIn, gpu), + ) +} + +func capacity(key NodeClassKey, e *CatalogEntry) corev1.ResourceList { + osDisk := *resource.NewScaledQuantity(key.OSDiskSizeGiB, resource.Giga) + pods := resource.MustParse(fmt.Sprintf("%d", key.PerNodePodsCount)) + mem := resource.NewScaledQuantity(e.MemoryGB, resource.Giga) + cpu := resource.NewQuantity(e.VCPU, resource.DecimalSI) + gpu := resource.NewQuantity(e.GPU, resource.DecimalSI) + return corev1.ResourceList{ + corev1.ResourceCPU: *cpu, + corev1.ResourceMemory: *mem, + corev1.ResourceEphemeralStorage: osDisk, + corev1.ResourcePods: pods, + corev1.ResourceName("nvidia.com/gpu"): *gpu, + } +} + +func overhead(e *CatalogEntry) *karpcloudprovider.InstanceTypeOverhead { + return &karpcloudprovider.InstanceTypeOverhead{ + KubeReserved: azinstancetype.KubeReservedResources(e.VCPU, float64(e.MemoryGB)), + SystemReserved: azinstancetype.SystemReservedResources(), + EvictionThreshold: azinstancetype.EvictionThreshold(), + } +} diff --git a/karpenter/pkg/cloudproviders/azure/instancetype/offerings.go b/karpenter/pkg/cloudproviders/azure/instancetype/offerings.go new file mode 100644 index 0000000..6b01985 --- /dev/null +++ b/karpenter/pkg/cloudproviders/azure/instancetype/offerings.go @@ -0,0 +1,31 @@ +package instancetype + +import ( + corev1 "k8s.io/api/core/v1" + karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + karpcloudprovider "sigs.k8s.io/karpenter/pkg/cloudprovider" + "sigs.k8s.io/karpenter/pkg/scheduling" +) + +// defaultPrice is the placeholder price used for all Phase 1 catalog SKUs. The +// real price is irrelevant because consolidation is not enabled across cross- +// region NodePools — but Karpenter requires a non-zero price for ordering. +const defaultPrice = 1.0 + +// Offerings returns the cross-region offerings for a single SKU. Phase 1: +// - on-demand only (no spot) +// - empty zone (cross-region zonal placement is deferred) +func Offerings(region string) karpcloudprovider.Offerings { + return karpcloudprovider.Offerings{ + { + Price: defaultPrice, + Available: true, + Requirements: scheduling.NewRequirements( + scheduling.NewRequirement(karpv1.CapacityTypeLabelKey, corev1.NodeSelectorOpIn, karpv1.CapacityTypeOnDemand), + // Empty zone string — Phase 1 is region-only. Karpenter requires + // the zone requirement to exist on every offering. + scheduling.NewRequirement(corev1.LabelTopologyZone, corev1.NodeSelectorOpIn, ""), + ), + }, + } +} diff --git a/karpenter/pkg/cloudproviders/azure/instancetype/provider.go b/karpenter/pkg/cloudproviders/azure/instancetype/provider.go new file mode 100644 index 0000000..20d49f0 --- /dev/null +++ b/karpenter/pkg/cloudproviders/azure/instancetype/provider.go @@ -0,0 +1,73 @@ +package instancetype + +import ( + "fmt" + + "github.com/samber/lo" + corev1 "k8s.io/api/core/v1" + karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + karpcloudprovider "sigs.k8s.io/karpenter/pkg/cloudprovider" + "sigs.k8s.io/karpenter/pkg/scheduling" +) + +// Provider returns InstanceTypes for a given AzureFlexNodeClass key. There's +// no caching or background refresh because the Phase 1 catalog is hardcoded. +type Provider struct{} + +func NewProvider() *Provider { return &Provider{} } + +// GetInstanceTypes returns one InstanceType per catalog entry, all rooted at +// the NodeClass's region. Order matches Catalog (stable across calls). +func (p *Provider) GetInstanceTypes(key NodeClassKey) []*karpcloudprovider.InstanceType { + offerings := Offerings(key.Region) + out := make([]*karpcloudprovider.InstanceType, 0, len(Catalog)) + for i := range Catalog { + out = append(out, New(key, &Catalog[i], offerings)) + } + return out +} + +// GetByName returns a single InstanceType by SKU name, or nil if the SKU is +// not in the Phase 1 catalog. +func (p *Provider) GetByName(key NodeClassKey, name string) *karpcloudprovider.InstanceType { + entry := Get(name) + if entry == nil { + return nil + } + return New(key, entry, Offerings(key.Region)) +} + +// ResolveFromNodeClaim picks the catalog SKU that matches the NodeClaim's +// requirements. Phase 1 chooses the first matching catalog entry by stable +// catalog order (price is uniform). Returns an error if no catalog SKU +// satisfies the requirements. +func (p *Provider) ResolveFromNodeClaim( + key NodeClassKey, + requirements []karpv1.NodeSelectorRequirementWithMinValues, +) (*karpcloudprovider.InstanceType, error) { + reqs := scheduling.NewNodeSelectorRequirementsWithMinValues(requirements...) + + requested := map[string]struct{}{} + if itReq := reqs.Get(corev1.LabelInstanceTypeStable); itReq != nil { + for _, v := range itReq.Values() { + requested[v] = struct{}{} + } + } + + for i := range Catalog { + e := &Catalog[i] + if len(requested) > 0 { + if _, ok := requested[e.Name]; !ok { + continue + } + } + it := New(key, e, Offerings(key.Region)) + if !reqs.IsCompatible(it.Requirements, scheduling.AllowUndefinedWellKnownLabels) { + continue + } + return it, nil + } + + return nil, fmt.Errorf("no AzureFlex catalog SKU matches NodeClaim requirements (requested=%v)", + lo.Keys(requested)) +} diff --git a/karpenter/pkg/cloudproviders/azure/log.go b/karpenter/pkg/cloudproviders/azure/log.go new file mode 100644 index 0000000..b0bcbdd --- /dev/null +++ b/karpenter/pkg/cloudproviders/azure/log.go @@ -0,0 +1,12 @@ +package azure + +import ( + "context" + + "github.com/go-logr/logr" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +func loggerFromContext(ctx context.Context) logr.Logger { + return log.FromContext(ctx).WithName(ProviderIDScheme) +} diff --git a/karpenter/pkg/cloudproviders/azure/nodeclaim.go b/karpenter/pkg/cloudproviders/azure/nodeclaim.go new file mode 100644 index 0000000..b3da4ee --- /dev/null +++ b/karpenter/pkg/cloudproviders/azure/nodeclaim.go @@ -0,0 +1,223 @@ +package azure + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "net/url" + "sort" + "strings" + + "github.com/Azure/karpenter-provider-azure/pkg/operator/options" + labelspkg "github.com/Azure/karpenter-provider-azure/pkg/providers/labels" + "github.com/samber/lo" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + v1 "sigs.k8s.io/karpenter/pkg/apis/v1" + "sigs.k8s.io/karpenter/pkg/cloudprovider" + "sigs.k8s.io/karpenter/pkg/utils/resources" + + stretchapi "github.com/Azure/aks-flex/plugin/api" + "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/api/features/kubeadm" + flexvm "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/azure/flexvm" + "github.com/Azure/aks-flex/plugin/pkg/topology" + + "github.com/Azure/aks-flex/karpenter/pkg/apis/v1alpha1" + "github.com/Azure/aks-flex/karpenter/pkg/cloudproviders" +) + +// providerID format: +// +// azure-flex:///subscriptions//resourceGroups//providers/Microsoft.Compute/virtualMachines/ +// +// Three slashes after the scheme: empty host, then the canonical ARM resource +// id (which starts with a slash). Round-trip via [providerIDToARMID] / +// [armIDToProviderID] is lossless. + +func armIDToProviderID(armID string) string { + // Empty ARM ID (e.g. status not yet populated by plugin) → empty providerID. + // Karpenter treats empty providerID as "node not yet bound" and will retry, + // rather than producing an invalid `azure-flex:///` URL that breaks + // downstream parsers. + if armID == "" { + return "" + } + if !strings.HasPrefix(armID, "/") { + armID = "/" + armID + } + return ProviderIDScheme + "://" + armID +} + +func providerIDToARMID(providerID string) (string, error) { + u, err := url.Parse(providerID) + if err != nil { + return "", fmt.Errorf("parsing providerID %q: %w", providerID, err) + } + if u.Scheme != ProviderIDScheme { + return "", fmt.Errorf("unexpected providerID scheme %q, expected %q", u.Scheme, ProviderIDScheme) + } + if u.Host != "" { + // Canonical form has empty host. If there's anything in the host + // position the providerID was constructed wrong. + return "", fmt.Errorf("providerID %q has unexpected host %q", providerID, u.Host) + } + if u.Path == "" { + return "", fmt.Errorf("providerID %q has empty ARM path", providerID) + } + return u.Path, nil +} + +// providerIDToVMName extracts the VM name (last path segment) from the providerID. +func providerIDToVMName(providerID string) (string, error) { + armID, err := providerIDToARMID(providerID) + if err != nil { + return "", err + } + parts := strings.Split(strings.TrimPrefix(armID, "/"), "/") + if len(parts) == 0 { + return "", fmt.Errorf("providerID %q has no name segment", providerID) + } + return parts[len(parts)-1], nil +} + +// agentPoolToNodeClaim rebuilds a karpenter NodeClaim from a flexvm AgentPool +// returned by the plugin. The instanceType supplies the well-known scheduling +// labels and capacity. +func agentPoolToNodeClaim( + ap *flexvm.AgentPool, + instanceType *cloudprovider.InstanceType, +) *v1.NodeClaim { + rv := &v1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: ap.GetMetadata().GetId(), + Labels: lo.Assign(map[string]string{}, ap.GetSpec().GetKubeadm().GetNodeLabels()), + Annotations: map[string]string{}, + CreationTimestamp: metav1.NewTime(ap.GetStatus().GetCreatedAt().AsTime()), + }, + Spec: v1.NodeClaimSpec{}, + Status: v1.NodeClaimStatus{ + ProviderID: armIDToProviderID(ap.GetStatus().GetVmResourceId()), + }, + } + + if instanceType != nil { + rv.Labels = lo.Assign(rv.Labels, labelspkg.GetAllSingleValuedRequirementLabels(instanceType.Requirements)) + rv.Status.Capacity = lo.PickBy(instanceType.Capacity, filterNonZero) + rv.Status.Allocatable = lo.PickBy(instanceType.Allocatable(), filterNonZero) + } + + // Phase 1: on-demand only, region-only. + rv.Labels[v1.CapacityTypeLabelKey] = v1.CapacityTypeOnDemand + rv.Labels[corev1.LabelTopologyRegion] = ap.GetSpec().GetLocation() + + return rv +} + +// nodeClaimToAgentPool builds the plugin AgentPool message from the karpenter +// NodeClass + NodeClaim + resolved instance type. Resource names are entirely +// deterministic from the NodeClaim name (for retry idempotency). +func nodeClaimToAgentPool( + karpOpts *options.Options, + clusterCA []byte, + nodeClass *v1alpha1.AzureFlexNodeClass, + nodeClaim *v1.NodeClaim, + instanceType *cloudprovider.InstanceType, +) *flexvm.AgentPool { + mdBuilder := stretchapi.Metadata_builder{ + Id: lo.ToPtr(nodeClaim.Name), + } + + osDiskSize := lo.FromPtrOr(nodeClass.Spec.OSDiskSizeGB, 128) + securityType := lo.FromPtrOr(nodeClass.Spec.SecurityType, "Standard") + + kubeadmConfig := kubeadm.Config_builder{ + Server: lo.ToPtr(karpOpts.ClusterEndpoint), + CertificateAuthorityData: clusterCA, + Token: lo.ToPtr(karpOpts.KubeletClientTLSBootstrapToken), + NodeLabels: lo.Assign(map[string]string{}, nodeClaim.Labels, map[string]string{ + cloudproviders.NodeClaimLabelKey: nodeClaim.Name, + topology.NodeLabelKeyCloudProviderManaged: "false", + topology.NodeLabelKeyCloudProviderCluster: karpOpts.NodeResourceGroup, + topology.NodeLabelKeyStretchManaged: "true", + }), + }.Build() + kubeadmConfig.AddNodeLabels(map[string]string{ + corev1.LabelInstanceTypeStable: instanceType.Name, + corev1.LabelTopologyRegion: nodeClass.Spec.Location, + // Empty zone — region-only Phase 1. + corev1.LabelTopologyZone: "", + v1.CapacityTypeLabelKey: v1.CapacityTypeOnDemand, + "kubernetes.azure.com/mode": "user", + }) + kubeadmConfig.AddK8SRegisterTaints(nodeClaim.Spec.Taints...) + kubeadmConfig.AddK8SRegisterTaints(nodeClaim.Spec.StartupTaints...) + kubeadmConfig.AddK8SRegisterTaints(v1.UnregisteredNoExecuteTaint) + + specBuilder := flexvm.AgentPoolSpec_builder{ + SubscriptionId: lo.ToPtr(nodeClass.Spec.SubscriptionID), + ResourceGroup: lo.ToPtr(nodeClass.Spec.ResourceGroup), + Location: lo.ToPtr(nodeClass.Spec.Location), + SubnetId: lo.ToPtr(nodeClass.Spec.SubnetID), + VmSize: lo.ToPtr(instanceType.Name), + SecurityType: lo.ToPtr(securityType), + OsDiskSizeGb: lo.ToPtr(int32(osDiskSize)), + SshPublicKeys: nodeClass.Spec.SSHPublicKeys, + AllocatePublicIp: lo.ToPtr(lo.FromPtrOr(nodeClass.Spec.AllocateNodePublicIP, false)), + Tags: nodeClass.Spec.Tags, + Kubeadm: kubeadmConfig, + } + if ref := nodeClass.Spec.ImageReference; ref != nil { + specBuilder.ImageReference = flexvm.ImageReference_builder{ + Publisher: lo.ToPtr(ref.Publisher), + Offer: lo.ToPtr(ref.Offer), + Sku: lo.ToPtr(ref.SKU), + Version: lo.ToPtr(ref.Version), + }.Build() + } + if id := lo.FromPtrOr(nodeClass.Spec.ImageID, ""); id != "" { + specBuilder.ImageId = lo.ToPtr(id) + } + + return flexvm.AgentPool_builder{ + Metadata: mdBuilder.Build(), + Spec: specBuilder.Build(), + }.Build() +} + +// driftHash returns a deterministic hex digest over the AzureFlexNodeClass +// fields whose change must trigger node drift. Mirrors the nebius "rebuild +// from spec" pattern. +func driftHash(spec v1alpha1.AzureFlexNodeClassSpec) string { + h := sha256.New() + write := func(s string) { _, _ = h.Write([]byte(s)); _, _ = h.Write([]byte{0}) } + + write(spec.SubscriptionID) + write(spec.Location) + write(spec.ResourceGroup) + write(spec.SubnetID) + write(lo.FromPtrOr(spec.SecurityType, "Standard")) + write(fmt.Sprintf("%d", lo.FromPtrOr(spec.OSDiskSizeGB, 128))) + if ref := spec.ImageReference; ref != nil { + write("imgref:" + ref.Publisher + "|" + ref.Offer + "|" + ref.SKU + "|" + ref.Version) + } else { + write("imgref:") + } + write("imgid:" + lo.FromPtrOr(spec.ImageID, "")) + // Tags affect downstream observability/billing but not the VM identity. + // They DO contribute to drift so an operator-driven tag rotation forces + // nodes to reconcile. + keys := make([]string, 0, len(spec.Tags)) + for k := range spec.Tags { + keys = append(keys, k) + } + sort.Strings(keys) + for _, k := range keys { + write("tag:" + k + "=" + spec.Tags[k]) + } + return hex.EncodeToString(h.Sum(nil)) +} + +func filterNonZero(_ corev1.ResourceName, q resource.Quantity) bool { + return !resources.IsZero(q) +} diff --git a/karpenter/pkg/cloudproviders/azure/nodeclaim_test.go b/karpenter/pkg/cloudproviders/azure/nodeclaim_test.go new file mode 100644 index 0000000..6a258f3 --- /dev/null +++ b/karpenter/pkg/cloudproviders/azure/nodeclaim_test.go @@ -0,0 +1,348 @@ +package azure + +import ( + "context" + "strings" + "testing" + "time" + + karpoptions "github.com/Azure/karpenter-provider-azure/pkg/operator/options" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" + "google.golang.org/protobuf/types/known/timestamppb" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + v1 "sigs.k8s.io/karpenter/pkg/apis/v1" + corecloudprovider "sigs.k8s.io/karpenter/pkg/cloudprovider" + + "github.com/Azure/aks-flex/karpenter/pkg/apis/v1alpha1" + stretchapi "github.com/Azure/aks-flex/plugin/api" + agentpoolsapi "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/api" + flexvm "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/azure/flexvm" + nebiusinstance "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/nebius/instance" +) + +func TestProviderIDRoundTrip(t *testing.T) { + armID := "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/my-rg/providers/Microsoft.Compute/virtualMachines/nodeclaim-abc" + pid := armIDToProviderID(armID) + + if !strings.HasPrefix(pid, "azure-flex:///subscriptions/") { + t.Fatalf("providerID %q must have azure-flex:/// prefix and a slash before subscriptions", pid) + } + + got, err := providerIDToARMID(pid) + if err != nil { + t.Fatalf("providerIDToARMID: %v", err) + } + if got != armID { + t.Fatalf("round-trip mismatch:\n in: %s\n out: %s", armID, got) + } + + name, err := providerIDToVMName(pid) + if err != nil { + t.Fatalf("providerIDToVMName: %v", err) + } + if name != "nodeclaim-abc" { + t.Fatalf("expected name nodeclaim-abc, got %s", name) + } +} + +func TestProviderIDInvalidScheme(t *testing.T) { + cases := []string{ + "aks-nebius://abc", + "https://example.com/foo", + "azure:///subscriptions/x/y", + "not-a-url", + } + for _, c := range cases { + t.Run(c, func(t *testing.T) { + if _, err := providerIDToARMID(c); err == nil { + t.Fatalf("expected error parsing providerID %q", c) + } + }) + } +} + +func TestProviderIDRejectsHost(t *testing.T) { + // Three slashes are required: azure-flex:///. Two slashes followed + // by something puts that something in the URL host, which we reject. + bad := "azure-flex://hostname/subscriptions/x/y" + if _, err := providerIDToARMID(bad); err == nil { + t.Fatalf("expected error for providerID with host segment") + } +} + +func TestDriftHashDeterministic(t *testing.T) { + mk := func() v1alpha1.AzureFlexNodeClassSpec { + size := int32(256) + sec := "Standard" + return v1alpha1.AzureFlexNodeClassSpec{ + SubscriptionID: "sub", + Location: "eastus2", + ResourceGroup: "rg", + SubnetID: "/subscriptions/sub/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/s", + SecurityType: &sec, + OSDiskSizeGB: &size, + Tags: map[string]string{"a": "1", "b": "2"}, + } + } + a := driftHash(mk()) + b := driftHash(mk()) + if a != b { + t.Fatalf("hash should be deterministic: %s != %s", a, b) + } + + // Tags in different insertion order should yield the same hash because + // driftHash sorts tag keys. Map iteration order is non-deterministic, so + // build with the same content and just verify equality is preserved. + c := mk() + c.Tags = map[string]string{"b": "2", "a": "1"} + if driftHash(c) != a { + t.Fatalf("hash must not depend on map insertion order") + } + + // Different subnet → different hash. + d := mk() + d.SubnetID = "/subscriptions/sub/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/other" + if driftHash(d) == a { + t.Fatalf("different subnet should produce different hash") + } +} + +func TestDeleteCleansAgentPoolWithoutProviderID(t *testing.T) { + fake := newFakeAgentPoolsClient(testFlexVMAgentPool("nodeclaim-1", "/subscriptions/sub/resourceGroups/rg/providers/Microsoft.Compute/virtualMachines/nodeclaim-1")) + cp := &CloudProvider{stretchAgentPoolsClient: fake} + + err := cp.Delete(context.Background(), &v1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{Name: "nodeclaim-1"}, + }) + if err != nil { + t.Fatalf("Delete: %v", err) + } + if got := <-fake.deleted; got != "nodeclaim-1" { + t.Fatalf("expected delete for nodeclaim-1, got %q", got) + } +} + +func TestListSkipsAndCleansIncompleteAgentPools(t *testing.T) { + fake := newFakeAgentPoolsClient(testIncompleteFlexVMAgentPool("nodeclaim-1", time.Now().Add(-incompleteAgentPoolCleanupDelay-time.Minute))) + cp := &CloudProvider{stretchAgentPoolsClient: fake} + + nodeClaims, err := cp.List(context.Background()) + if err != nil { + t.Fatalf("List: %v", err) + } + if len(nodeClaims) != 0 { + t.Fatalf("expected incomplete agent pool to be skipped, got %d nodeclaims", len(nodeClaims)) + } + + select { + case got := <-fake.deleted: + if got != "nodeclaim-1" { + t.Fatalf("expected cleanup delete for nodeclaim-1, got %q", got) + } + case <-time.After(time.Second): + t.Fatal("timed out waiting for incomplete agent pool cleanup") + } +} + +func TestListDefersFreshIncompleteAgentPoolCleanup(t *testing.T) { + fake := newFakeAgentPoolsClient(testIncompleteFlexVMAgentPool("nodeclaim-1", time.Now())) + cp := &CloudProvider{stretchAgentPoolsClient: fake} + + nodeClaims, err := cp.List(context.Background()) + if err != nil { + t.Fatalf("List: %v", err) + } + if len(nodeClaims) != 0 { + t.Fatalf("expected incomplete agent pool to be skipped, got %d nodeclaims", len(nodeClaims)) + } + + select { + case got := <-fake.deleted: + t.Fatalf("fresh incomplete agent pool should not be cleaned up yet, deleted %q", got) + case <-time.After(100 * time.Millisecond): + } +} + +func TestListIgnoresNonAzureFlexAgentPools(t *testing.T) { + fake := newFakeAgentPoolsClient(testFlexVMAgentPool("nodeclaim-1", "/subscriptions/sub/resourceGroups/rg/providers/Microsoft.Compute/virtualMachines/nodeclaim-1")) + other, err := anypb.New(nebiusinstance.AgentPool_builder{}.Build()) + if err != nil { + t.Fatalf("building nebius agent pool Any: %v", err) + } + fake.rawItems = append(fake.rawItems, other) + cp := &CloudProvider{stretchAgentPoolsClient: fake} + + nodeClaims, err := cp.List(context.Background()) + if err != nil { + t.Fatalf("List: %v", err) + } + if len(nodeClaims) != 1 { + t.Fatalf("expected only the azure-flex agent pool, got %d nodeclaims", len(nodeClaims)) + } + if nodeClaims[0].Name != "nodeclaim-1" { + t.Fatalf("expected nodeclaim-1, got %q", nodeClaims[0].Name) + } +} + +func TestNodeClaimToAgentPoolPropagatesH200LabelsAndTaints(t *testing.T) { + osDiskSize := int32(256) + nodeClass := &v1alpha1.AzureFlexNodeClass{ + ObjectMeta: metav1.ObjectMeta{Name: "h200-eastus2"}, + Spec: v1alpha1.AzureFlexNodeClassSpec{ + SubscriptionID: "sub", + Location: "eastus2", + ResourceGroup: "rg", + SubnetID: "/subscriptions/sub/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/vnet/subnets/nodes", + OSDiskSizeGB: &osDiskSize, + }, + } + nodeClaim := &v1.NodeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "flex-h200-abcde", + Labels: map[string]string{ + "gpu": "h200", + "nvidia.com/gpu.present": "true", + "rune.ai/gpu-family": "h200", + }, + }, + Spec: v1.NodeClaimSpec{ + Taints: []corev1.Taint{ + {Key: "nvidia.com/gpu", Value: "present", Effect: corev1.TaintEffectNoSchedule}, + }, + StartupTaints: []corev1.Taint{ + {Key: "nvidia.com/gpu.init", Value: "true", Effect: corev1.TaintEffectNoSchedule}, + }, + }, + } + + ap := nodeClaimToAgentPool( + &karpoptions.Options{ + ClusterEndpoint: "https://cluster.example:443", + KubeletClientTLSBootstrapToken: "token", + NodeResourceGroup: "MC_rg_cluster_eastus2", + }, + []byte("ca"), + nodeClass, + nodeClaim, + &corecloudprovider.InstanceType{Name: "Standard_ND96isr_H200_v5"}, + ) + + labels := ap.GetSpec().GetKubeadm().GetNodeLabels() + if labels["gpu"] != "h200" { + t.Fatalf("expected gpu=h200 label, got %q", labels["gpu"]) + } + if labels["nvidia.com/gpu.present"] != "true" { + t.Fatalf("expected nvidia.com/gpu.present=true label, got %q", labels["nvidia.com/gpu.present"]) + } + if labels["node.kubernetes.io/instance-type"] != "Standard_ND96isr_H200_v5" { + t.Fatalf("expected stable instance-type label, got %q", labels["node.kubernetes.io/instance-type"]) + } + + taints := ap.GetSpec().GetKubeadm().GetK8SRegisterTaints() + assertHasTaint(t, taints, corev1.Taint{Key: "nvidia.com/gpu", Value: "present", Effect: corev1.TaintEffectNoSchedule}) + assertHasTaint(t, taints, corev1.Taint{Key: "nvidia.com/gpu.init", Value: "true", Effect: corev1.TaintEffectNoSchedule}) + assertHasTaint(t, taints, v1.UnregisteredNoExecuteTaint) + + ap.SetStatus(flexvm.AgentPoolStatus_builder{ + VmResourceId: proto.String("/subscriptions/sub/resourceGroups/rg/providers/Microsoft.Compute/virtualMachines/flex-h200-abcde"), + CreatedAt: timestamppb.Now(), + }.Build()) + listed := agentPoolToNodeClaim(ap, nil) + if listed.Labels["gpu"] != "h200" { + t.Fatalf("expected listed nodeclaim to retain gpu=h200 label, got %q", listed.Labels["gpu"]) + } + if listed.Labels["rune.ai/gpu-family"] != "h200" { + t.Fatalf("expected listed nodeclaim to retain rune.ai/gpu-family=h200 label, got %q", listed.Labels["rune.ai/gpu-family"]) + } +} + +func assertHasTaint(t *testing.T, taints []corev1.Taint, want corev1.Taint) { + t.Helper() + for i := range taints { + if want.MatchTaint(&taints[i]) { + return + } + } + t.Fatalf("expected taint %+v in %+v", want, taints) +} + +type fakeAgentPoolsClient struct { + items map[string]*flexvm.AgentPool + rawItems []*anypb.Any + deleted chan string +} + +func newFakeAgentPoolsClient(items ...*flexvm.AgentPool) *fakeAgentPoolsClient { + f := &fakeAgentPoolsClient{ + items: map[string]*flexvm.AgentPool{}, + deleted: make(chan string, len(items)+1), + } + for _, item := range items { + f.items[item.GetMetadata().GetId()] = item + } + return f +} + +var _ agentpoolsapi.AgentPoolsClient = (*fakeAgentPoolsClient)(nil) + +func (f *fakeAgentPoolsClient) CreateOrUpdate(context.Context, *stretchapi.CreateOrUpdateRequest, ...grpc.CallOption) (*stretchapi.CreateOrUpdateResponse, error) { + return nil, status.Error(codes.Unimplemented, "not implemented") +} + +func (f *fakeAgentPoolsClient) List(context.Context, *stretchapi.ListRequest, ...grpc.CallOption) (*stretchapi.ListResponse, error) { + items := make([]*anypb.Any, 0, len(f.items)+len(f.rawItems)) + for _, item := range f.items { + anyItem, err := anypb.New(item) + if err != nil { + return nil, err + } + items = append(items, anyItem) + } + items = append(items, f.rawItems...) + return stretchapi.ListResponse_builder{Items: items}.Build(), nil +} + +func (f *fakeAgentPoolsClient) Get(_ context.Context, req *stretchapi.GetRequest, _ ...grpc.CallOption) (*stretchapi.GetResponse, error) { + item, ok := f.items[req.GetId()] + if !ok { + return nil, status.Error(codes.NotFound, "") + } + anyItem, err := anypb.New(item) + if err != nil { + return nil, err + } + return stretchapi.GetResponse_builder{Item: anyItem}.Build(), nil +} + +func (f *fakeAgentPoolsClient) Delete(_ context.Context, req *stretchapi.DeleteRequest, _ ...grpc.CallOption) (*stretchapi.DeleteResponse, error) { + delete(f.items, req.GetId()) + f.deleted <- req.GetId() + return stretchapi.DeleteResponse_builder{}.Build(), nil +} + +func testFlexVMAgentPool(id, vmResourceID string) *flexvm.AgentPool { + return flexvm.AgentPool_builder{ + Metadata: stretchapi.Metadata_builder{ + Id: proto.String(id), + }.Build(), + Status: flexvm.AgentPoolStatus_builder{ + VmResourceId: proto.String(vmResourceID), + }.Build(), + }.Build() +} + +func testIncompleteFlexVMAgentPool(id string, createdAt time.Time) *flexvm.AgentPool { + return flexvm.AgentPool_builder{ + Metadata: stretchapi.Metadata_builder{ + Id: proto.String(id), + }.Build(), + Status: flexvm.AgentPoolStatus_builder{ + CreatedAt: timestamppb.New(createdAt), + }.Build(), + }.Build() +} diff --git a/karpenter/pkg/cloudproviders/kaito/cloudprovider.go b/karpenter/pkg/cloudproviders/kaito/cloudprovider.go index 01d62fc..3a12c4f 100644 --- a/karpenter/pkg/cloudproviders/kaito/cloudprovider.go +++ b/karpenter/pkg/cloudproviders/kaito/cloudprovider.go @@ -205,7 +205,7 @@ func (c *CloudProvider) GetInstanceTypes(ctx context.Context, nodePool *v1.NodeP } func (c *CloudProvider) List(ctx context.Context) ([]*v1.NodeClaim, error) { - agentPools, err := stretchhelper.List[*nebiusinstance.AgentPool]( + agentPools, err := stretchhelper.ListByType[*nebiusinstance.AgentPool]( c.stretchAgentPoolsClient.List, ctx, "", ) diff --git a/karpenter/pkg/cloudproviders/nebius/cloudprovider.go b/karpenter/pkg/cloudproviders/nebius/cloudprovider.go index 9a0db20..5dc56d2 100644 --- a/karpenter/pkg/cloudproviders/nebius/cloudprovider.go +++ b/karpenter/pkg/cloudproviders/nebius/cloudprovider.go @@ -296,7 +296,7 @@ func (c *CloudProvider) Get(ctx context.Context, providerID string) (*v1.NodeCla } func (c *CloudProvider) List(ctx context.Context) ([]*v1.NodeClaim, error) { - agentPools, err := stretchhelper.List[*nebiusinstance.AgentPool]( + agentPools, err := stretchhelper.ListByType[*nebiusinstance.AgentPool]( c.stretchAgentPoolsClient.List, ctx, "", ) diff --git a/karpenter/pkg/controllers/azure/nodeclass_status.go b/karpenter/pkg/controllers/azure/nodeclass_status.go new file mode 100644 index 0000000..998e8aa --- /dev/null +++ b/karpenter/pkg/controllers/azure/nodeclass_status.go @@ -0,0 +1,116 @@ +package azure + +import ( + "context" + "fmt" + "strings" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" + opcontroller "github.com/awslabs/operatorpkg/controller" + "github.com/awslabs/operatorpkg/reasonable" + "k8s.io/apimachinery/pkg/api/equality" + controllerruntime "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/karpenter/pkg/operator/injection" + + "github.com/Azure/aks-flex/karpenter/pkg/apis/v1alpha1" +) + +const controllerNameStatus = "azureflex_nodeclass.status" + +type NodeClassStatusController struct { + kubeClient client.Client +} + +var ( + _ opcontroller.Controller = (*NodeClassStatusController)(nil) + _ reconcile.ObjectReconciler[*v1alpha1.AzureFlexNodeClass] = (*NodeClassStatusController)(nil) +) + +func NewNodeClassStatusController(kubeClient client.Client) *NodeClassStatusController { + return &NodeClassStatusController{kubeClient: kubeClient} +} + +func (c *NodeClassStatusController) Register(_ context.Context, mgr manager.Manager) error { + return controllerruntime.NewControllerManagedBy(mgr). + Named(controllerNameStatus). + For(&v1alpha1.AzureFlexNodeClass{}). + WithOptions(controller.Options{ + RateLimiter: reasonable.RateLimiter(), + MaxConcurrentReconciles: 10, + }). + Complete(reconcile.AsReconciler(mgr.GetClient(), c)) +} + +func (c *NodeClassStatusController) Reconcile( + ctx context.Context, + nodeClass *v1alpha1.AzureFlexNodeClass, +) (reconcile.Result, error) { + ctx = injection.WithControllerName(ctx, controllerNameStatus) + + existing := nodeClass + future := nodeClass.DeepCopy() + + if err := c.ensureFinalizer(ctx, future); err != nil { + return reconcile.Result{}, err + } + + if err := validateSpec(future.Spec); err != nil { + future.StatusConditions().SetFalse( + v1alpha1.ConditionTypeValidationSucceeded, "InvalidSpec", err.Error(), + ) + } else { + future.StatusConditions().SetTrue(v1alpha1.ConditionTypeValidationSucceeded) + } + + if !equality.Semantic.DeepEqual(existing, future) { + if err := c.kubeClient.Status().Patch(ctx, future, client.MergeFrom(existing)); err != nil { + return reconcile.Result{}, err + } + } + + return reconcile.Result{}, nil +} + +func (c *NodeClassStatusController) ensureFinalizer( + ctx context.Context, + nodeClass *v1alpha1.AzureFlexNodeClass, +) error { + if controllerutil.ContainsFinalizer(nodeClass, v1alpha1.TerminationFinalizer) { + return nil + } + base := nodeClass.DeepCopy() + controllerutil.AddFinalizer(nodeClass, v1alpha1.TerminationFinalizer) + if err := c.kubeClient.Patch(ctx, nodeClass, client.MergeFrom(base)); err != nil { + return fmt.Errorf("patch finalizer: %w", err) + } + return nil +} + +// validateSpec performs cheap shape checks. Anything Azure-side (subnet +// existence, RG existence, identity perms) is detected on Create. +func validateSpec(spec v1alpha1.AzureFlexNodeClassSpec) error { + if strings.TrimSpace(spec.SubscriptionID) == "" { + return fmt.Errorf("subscriptionID is required") + } + if strings.TrimSpace(spec.Location) == "" { + return fmt.Errorf("location is required") + } + if strings.TrimSpace(spec.ResourceGroup) == "" { + return fmt.Errorf("resourceGroup is required") + } + if !strings.HasPrefix(spec.SubnetID, "/subscriptions/") { + return fmt.Errorf("subnetID %q must be a full ARM resource ID", spec.SubnetID) + } + if _, err := arm.ParseResourceID(spec.SubnetID); err != nil { + return fmt.Errorf("subnetID %q is not a valid ARM resource ID: %w", spec.SubnetID, err) + } + if spec.ImageReference != nil && spec.ImageID != nil && *spec.ImageID != "" { + return fmt.Errorf("imageReference and imageID are mutually exclusive") + } + return nil +} diff --git a/karpenter/pkg/controllers/azure/nodeclass_status_test.go b/karpenter/pkg/controllers/azure/nodeclass_status_test.go new file mode 100644 index 0000000..760d928 --- /dev/null +++ b/karpenter/pkg/controllers/azure/nodeclass_status_test.go @@ -0,0 +1,40 @@ +package azure + +import ( + "testing" + + "github.com/Azure/aks-flex/karpenter/pkg/apis/v1alpha1" +) + +func TestValidateSpec(t *testing.T) { + good := v1alpha1.AzureFlexNodeClassSpec{ + SubscriptionID: "sub", + Location: "eastus2", + ResourceGroup: "rg", + SubnetID: "/subscriptions/sub/resourceGroups/rg/providers/Microsoft.Network/virtualNetworks/v/subnets/s", + } + if err := validateSpec(good); err != nil { + t.Fatalf("expected good spec to pass: %v", err) + } + + bad := []v1alpha1.AzureFlexNodeClassSpec{ + {Location: "eastus2", ResourceGroup: "rg", SubnetID: "/subscriptions/x"}, // missing sub + {SubscriptionID: "sub", ResourceGroup: "rg", SubnetID: "/subscriptions/x"}, // missing loc + {SubscriptionID: "sub", Location: "eastus2", SubnetID: "/subscriptions/x"}, // missing rg + {SubscriptionID: "sub", Location: "eastus2", ResourceGroup: "rg", SubnetID: "not-an-arm-id"}, + } + for i, s := range bad { + if err := validateSpec(s); err == nil { + t.Errorf("case %d: expected validateSpec to fail", i) + } + } + + // Mutually exclusive fields. + id := "/subscriptions/sub/.../images/x" + mut := good + mut.ImageReference = &v1alpha1.AzureFlexImageReference{Publisher: "p", Offer: "o", SKU: "s"} + mut.ImageID = &id + if err := validateSpec(mut); err == nil { + t.Fatalf("imageReference + imageID must be mutually exclusive") + } +} diff --git a/karpenter/pkg/controllers/azure/nodeclass_termination.go b/karpenter/pkg/controllers/azure/nodeclass_termination.go new file mode 100644 index 0000000..4a04b01 --- /dev/null +++ b/karpenter/pkg/controllers/azure/nodeclass_termination.go @@ -0,0 +1,146 @@ +package azure + +import ( + "context" + "fmt" + "time" + + "github.com/Azure/karpenter-provider-azure/pkg/utils" + opcontroller "github.com/awslabs/operatorpkg/controller" + "github.com/awslabs/operatorpkg/reasonable" + "github.com/samber/lo" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + controllerruntime "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1" + "sigs.k8s.io/karpenter/pkg/events" + "sigs.k8s.io/karpenter/pkg/operator/injection" + + "github.com/Azure/aks-flex/karpenter/pkg/apis" + "github.com/Azure/aks-flex/karpenter/pkg/apis/v1alpha1" +) + +const ( + controllerNameTermination = "azureflex_nodeclass.termination" + nodeClassKind = "AzureFlexNodeClass" +) + +type NodeClassTerminationController struct { + kubeClient client.Client + recorder events.Recorder +} + +var ( + _ opcontroller.Controller = (*NodeClassTerminationController)(nil) + _ reconcile.ObjectReconciler[*v1alpha1.AzureFlexNodeClass] = (*NodeClassTerminationController)(nil) +) + +func NewNodeClassTerminationController( + kubeClient client.Client, + recorder events.Recorder, +) *NodeClassTerminationController { + return &NodeClassTerminationController{kubeClient: kubeClient, recorder: recorder} +} + +func (c *NodeClassTerminationController) Register(_ context.Context, mgr manager.Manager) error { + return controllerruntime.NewControllerManagedBy(mgr). + Named(controllerNameTermination). + For(&v1alpha1.AzureFlexNodeClass{}). + Watches( + &karpv1.NodeClaim{}, + handler.EnqueueRequestsFromMapFunc(func(_ context.Context, o client.Object) []reconcile.Request { + nc := o.(*karpv1.NodeClaim) + if nc.Spec.NodeClassRef == nil { + return nil + } + if nc.Spec.NodeClassRef.Group != apis.Group { + return nil + } + if nc.Spec.NodeClassRef.Kind != nodeClassKind { + return nil + } + return []reconcile.Request{{NamespacedName: types.NamespacedName{Name: nc.Spec.NodeClassRef.Name}}} + }), + builder.WithPredicates(predicate.Funcs{ + CreateFunc: func(_ event.CreateEvent) bool { return false }, + UpdateFunc: func(_ event.UpdateEvent) bool { return false }, + DeleteFunc: func(_ event.DeleteEvent) bool { return true }, + }), + ). + WithOptions(controller.Options{ + RateLimiter: reasonable.RateLimiter(), + MaxConcurrentReconciles: 10, + }). + Complete(reconcile.AsReconciler(mgr.GetClient(), c)) +} + +func (c *NodeClassTerminationController) Reconcile( + ctx context.Context, + nodeClass *v1alpha1.AzureFlexNodeClass, +) (reconcile.Result, error) { + ctx = injection.WithControllerName(ctx, controllerNameTermination) + if nodeClass.GetDeletionTimestamp().IsZero() { + return reconcile.Result{}, nil + } + return c.finalize(ctx, nodeClass) +} + +func (c *NodeClassTerminationController) finalize( + ctx context.Context, + nodeClass *v1alpha1.AzureFlexNodeClass, +) (reconcile.Result, error) { + if !controllerutil.ContainsFinalizer(nodeClass, v1alpha1.TerminationFinalizer) { + return reconcile.Result{}, nil + } + + stored := nodeClass.DeepCopy() + + nodeClaimList := &karpv1.NodeClaimList{} + if err := c.kubeClient.List(ctx, nodeClaimList, client.MatchingFields{"spec.nodeClassRef.name": nodeClass.Name}); err != nil { + return reconcile.Result{}, fmt.Errorf("listing nodeclaims using nodeclass: %w", err) + } + if len(nodeClaimList.Items) > 0 { + c.recorder.Publish(WaitingOnNodeClaimTerminationEvent(nodeClass, + lo.Map(nodeClaimList.Items, func(nc karpv1.NodeClaim, _ int) string { return nc.Name }))) + return reconcile.Result{RequeueAfter: 10 * time.Minute}, nil + } + + controllerutil.RemoveFinalizer(nodeClass, v1alpha1.TerminationFinalizer) + if !equality.Semantic.DeepEqual(stored, nodeClass) { + if err := c.kubeClient.Patch(ctx, nodeClass, + client.MergeFromWithOptions(stored, client.MergeFromWithOptimisticLock{})); err != nil { + if errors.IsConflict(err) { + return reconcile.Result{Requeue: true}, nil + } + return reconcile.Result{}, client.IgnoreNotFound(fmt.Errorf("removing termination finalizer: %w", err)) + } + } + return reconcile.Result{}, nil +} + +type RuntimeObjectWithUID interface { + runtime.Object + GetUID() types.UID +} + +func WaitingOnNodeClaimTerminationEvent(nodeClass RuntimeObjectWithUID, names []string) events.Event { + return events.Event{ + InvolvedObject: nodeClass, + Type: corev1.EventTypeNormal, + Reason: "WaitingOnNodeClaimTermination", + Message: fmt.Sprintf("Waiting on NodeClaim termination for %s", utils.PrettySlice(names, 5)), + DedupeValues: []string{string(nodeClass.GetUID())}, + } +} diff --git a/karpenter/pkg/controllers/controllers.go b/karpenter/pkg/controllers/controllers.go index 29f7172..a3d04f6 100644 --- a/karpenter/pkg/controllers/controllers.go +++ b/karpenter/pkg/controllers/controllers.go @@ -7,6 +7,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/karpenter/pkg/events" + "github.com/Azure/aks-flex/karpenter/pkg/controllers/azure" "github.com/Azure/aks-flex/karpenter/pkg/controllers/nebius" "github.com/Azure/aks-flex/karpenter/pkg/controllers/nodes" ) @@ -21,6 +22,9 @@ func NewControllers( nebius.NewNodeClassStatusController(kubeClient), nebius.NewNodeClassTerminationController(kubeClient, recorder), + azure.NewNodeClassStatusController(kubeClient), + azure.NewNodeClassTerminationController(kubeClient, recorder), + nodes.NewSetProviderIDController(kubeClient), } } diff --git a/plugin/pkg/helper/helper.go b/plugin/pkg/helper/helper.go index 11bf9a9..62a2b9e 100644 --- a/plugin/pkg/helper/helper.go +++ b/plugin/pkg/helper/helper.go @@ -2,6 +2,8 @@ package helper import ( "context" + "fmt" + "reflect" "google.golang.org/grpc" "google.golang.org/grpc/codes" @@ -98,6 +100,63 @@ func List[M proto.Message, REQT any, REQ id[REQT], RESP items](list func(context return ms, nil } +// ListByType filters parent list responses to the concrete protobuf type M. +// It still fails if a matching Any payload cannot be decoded. +func ListByType[M proto.Message, REQT any, REQ id[REQT], RESP items](list func(context.Context, REQ, ...grpc.CallOption) (RESP, error), ctx context.Context, id string, opts ...grpc.CallOption) ([]M, error) { + req := REQ(new(REQT)) + req.SetId(id) + + resp, err := list(ctx, req, opts...) + if err != nil { + return nil, err + } + + typeURL, err := typeURLFor[M]() + if err != nil { + return nil, err + } + + var ms []M + for _, item := range resp.GetItems() { + if item.GetTypeUrl() != typeURL { + continue + } + + m, err := AnyTo[M](item) + if err != nil { + return nil, err + } + + ms = append(ms, m) + } + + return ms, nil +} + +func typeURLFor[M proto.Message]() (string, error) { + msg, err := newProtoMessage[M]() + if err != nil { + return "", err + } + return "type.googleapis.com/" + string(msg.ProtoReflect().Descriptor().FullName()), nil +} + +func newProtoMessage[M proto.Message]() (M, error) { + var zero M + t := reflect.TypeOf(zero) + if t == nil { + return zero, fmt.Errorf("proto message type has no concrete type") + } + if t.Kind() != reflect.Pointer { + return zero, fmt.Errorf("proto message type %T must be a pointer", zero) + } + msg, ok := reflect.New(t.Elem()).Interface().(M) + if !ok { + return zero, fmt.Errorf("proto message type %T cannot be constructed", zero) + } + return msg, nil +} + func AnyTo[M proto.Message](o *anypb.Any) (M, error) { m, err := o.UnmarshalNew() if err != nil { diff --git a/plugin/pkg/helper/helper_test.go b/plugin/pkg/helper/helper_test.go new file mode 100644 index 0000000..41582a5 --- /dev/null +++ b/plugin/pkg/helper/helper_test.go @@ -0,0 +1,70 @@ +package helper_test + +import ( + "context" + "testing" + + "google.golang.org/grpc" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" + + stretchapi "github.com/Azure/aks-flex/plugin/api" + "github.com/Azure/aks-flex/plugin/pkg/helper" + flexvm "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/azure/flexvm" + nebiusinstance "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/nebius/instance" +) + +func TestListByTypeFiltersDifferentAnyTypes(t *testing.T) { + flexAny := mustAny(t, flexvm.AgentPool_builder{ + Metadata: stretchapi.Metadata_builder{Id: proto.String("flex")}.Build(), + }.Build()) + nebiusAny := mustAny(t, nebiusinstance.AgentPool_builder{ + Metadata: stretchapi.Metadata_builder{Id: proto.String("nebius")}.Build(), + }.Build()) + + var gotID string + list := func(_ context.Context, req *stretchapi.ListRequest, _ ...grpc.CallOption) (*stretchapi.ListResponse, error) { + gotID = req.GetId() + return stretchapi.ListResponse_builder{Items: []*anypb.Any{flexAny, nebiusAny}}.Build(), nil + } + + got, err := helper.ListByType[*flexvm.AgentPool](list, context.Background(), "nodeclaims") + if err != nil { + t.Fatalf("ListByType: %v", err) + } + if gotID != "nodeclaims" { + t.Fatalf("expected List request id nodeclaims, got %q", gotID) + } + if len(got) != 1 { + t.Fatalf("expected one flexvm agent pool, got %d", len(got)) + } + if got[0].GetMetadata().GetId() != "flex" { + t.Fatalf("expected flex agent pool, got %q", got[0].GetMetadata().GetId()) + } +} + +func TestListByTypeReturnsErrorsForInvalidMatchingPayloads(t *testing.T) { + flexAny := mustAny(t, flexvm.AgentPool_builder{}.Build()) + corruptFlexAny := &anypb.Any{ + TypeUrl: flexAny.GetTypeUrl(), + Value: []byte{0xff}, + } + nebiusAny := mustAny(t, nebiusinstance.AgentPool_builder{}.Build()) + + list := func(_ context.Context, _ *stretchapi.ListRequest, _ ...grpc.CallOption) (*stretchapi.ListResponse, error) { + return stretchapi.ListResponse_builder{Items: []*anypb.Any{nebiusAny, corruptFlexAny}}.Build(), nil + } + + if _, err := helper.ListByType[*flexvm.AgentPool](list, context.Background(), ""); err == nil { + t.Fatal("expected matching corrupt flexvm payload to return an error") + } +} + +func mustAny(t *testing.T, msg proto.Message) *anypb.Any { + t.Helper() + item, err := anypb.New(msg) + if err != nil { + t.Fatalf("anypb.New: %v", err) + } + return item +} diff --git a/plugin/pkg/services/agentpools/agentpools.go b/plugin/pkg/services/agentpools/agentpools.go index 5389c60..6b1acaf 100644 --- a/plugin/pkg/services/agentpools/agentpools.go +++ b/plugin/pkg/services/agentpools/agentpools.go @@ -5,6 +5,7 @@ import ( "github.com/Azure/aks-flex/plugin/pkg/server" "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/api" "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/aws/ubuntu2404instance" + "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/azure/flexvm" "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/azure/ubuntu2404vmss" nebiusinstance "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/nebius/instance" ) @@ -27,6 +28,10 @@ func NewAgentPoolsServer(db db.DB) api.AgentPoolsServer { return ubuntu2404vmss.NewAgentPoolsServer(srv.DB) }, &ubuntu2404vmss.AgentPool{}) + server.MustRegister(srv.Servers, func() (api.AgentPoolsServer, error) { + return flexvm.NewAgentPoolsServer(srv.DB) + }, &flexvm.AgentPool{}) + server.MustRegister(srv.Servers, func() (api.AgentPoolsServer, error) { return nebiusinstance.NewAgentPoolsServer(srv.DB) }, &nebiusinstance.AgentPool{}) diff --git a/plugin/pkg/services/agentpools/azure/flexvm/agentpools.go b/plugin/pkg/services/agentpools/azure/flexvm/agentpools.go new file mode 100644 index 0000000..86017ae --- /dev/null +++ b/plugin/pkg/services/agentpools/azure/flexvm/agentpools.go @@ -0,0 +1,502 @@ +// Package flexvm implements the cross-region Azure VM agent pool service. +// +// This service creates a single Microsoft.Compute/virtualMachines (and a +// dedicated NIC + OS disk) per AgentPool. It is intentionally separate from +// ubuntu2404vmss (which uses a Virtual Machine Scale Set) because per-VM +// management is required for Karpenter's per-NodeClaim lifecycle and for +// straightforward cross-region placement. +// +// Authentication: the plugin process is expected to authenticate via +// DefaultAzureCredential (e.g. a workload identity / managed identity) and +// hold Contributor on the target subscription / resource group / subnet. +// +// Resource naming is fully deterministic from the AgentPool ID (which is the +// NodeClaim name) so retries are idempotent: +// - VM name = +// - NIC name = -nic +// - IP cfg name = ipconfig +// +// The NIC and OS disk are configured with DeleteOption=Delete so a single VM +// delete cascades cleanup; this is critical for idempotency on Karpenter +// retries. +package flexvm + +import ( + "context" + "encoding/base64" + "errors" + "fmt" + "log/slog" + "strings" + "time" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" + "github.com/Azure/azure-sdk-for-go/sdk/azidentity" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v7" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v8" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/types/known/anypb" + "google.golang.org/protobuf/types/known/timestamppb" + + "github.com/Azure/aks-flex/plugin/api" + "github.com/Azure/aks-flex/plugin/pkg/db" + "github.com/Azure/aks-flex/plugin/pkg/helper" + agentpools "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/api" + "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/userdata/flex" + "github.com/Azure/aks-flex/plugin/pkg/topology" +) + +var _ api.Object = (*AgentPool)(nil) + +// Default DSVM image. SecurityType MUST stay "Standard" — TrustedLaunch is +// known to break the DSVM image (verified during manual H200 bringup). +const ( + defaultImagePublisher = "microsoft-dsvm" + defaultImageOffer = "ubuntu-hpc" + defaultImageSKU = "2204" + defaultImageVersion = "latest" + + defaultAdminUsername = "ubuntu" +) + +type agentpoolsServer struct { + agentpools.UnimplementedAgentPoolsServer + storage db.DB + + credentials azcore.TokenCredential +} + +func NewAgentPoolsServer(storage db.DB) (agentpools.AgentPoolsServer, error) { + credentials, err := azidentity.NewDefaultAzureCredential(nil) + if err != nil { + return nil, err + } + + return &agentpoolsServer{ + storage: storage, + credentials: credentials, + }, nil +} + +func (srv *agentpoolsServer) CreateOrUpdate(ctx context.Context, req *api.CreateOrUpdateRequest) (resp *api.CreateOrUpdateResponse, err error) { + ap, err := helper.AnyTo[*AgentPool](req.GetItem()) + if err != nil { + return nil, err + } + if err := validateSpec(ap.GetSpec()); err != nil { + return nil, status.Error(codes.InvalidArgument, err.Error()) + } + + spec := ap.GetSpec() + vmName := ap.GetMetadata().GetId() + nicName := vmName + "-nic" + + // Annotate kubeadm node labels with cross-region topology hints. Note: + // region is the *target* region (the VM's region), which may differ + // from the AKS control-plane region — that's the whole point of flexvm. + kubeadmConfig := spec.GetKubeadm() + kubeadmConfig.AddNodeLabels(map[string]string{ + topology.NodeLabelKeyCloud: "azure", + topology.NodeLabelKeyRegion: strings.ToLower(spec.GetLocation()), + topology.NodeLabelKeyInstanceType: strings.ToLower(spec.GetVmSize()), + }) + + userData, err := flex.UserData( + flex.WithKubeadmConfig(kubeadmConfig), + ) + if err != nil { + return nil, fmt.Errorf("rendering flex user data: %w", err) + } + userDataBytes, err := userData.Gzip() + if err != nil { + return nil, fmt.Errorf("gzipping user data: %w", err) + } + userDataB64 := base64.StdEncoding.EncodeToString(userDataBytes) + + // 1. NIC (idempotent: same name → ARM updates in place). + nicsClient, err := armnetwork.NewInterfacesClient(spec.GetSubscriptionId(), srv.credentials, nil) + if err != nil { + return nil, fmt.Errorf("creating NIC client: %w", err) + } + nicParams := armnetwork.Interface{ + Location: to.Ptr(spec.GetLocation()), + Tags: toARMTags(spec.GetTags()), + Properties: &armnetwork.InterfacePropertiesFormat{ + IPConfigurations: []*armnetwork.InterfaceIPConfiguration{ + { + Name: to.Ptr("ipconfig"), + Properties: &armnetwork.InterfaceIPConfigurationPropertiesFormat{ + Subnet: &armnetwork.Subnet{ID: to.Ptr(spec.GetSubnetId())}, + PrivateIPAllocationMethod: to.Ptr(armnetwork.IPAllocationMethodDynamic), + }, + }, + }, + }, + } + if spec.GetAllocatePublicIp() { + // validateSpec rejects this; the branch is kept as a compile-time + // reminder for when Phase 2 adds PIP support. + return nil, errors.New("allocate_public_ip=true is not supported in Phase 1") + } + nicPoller, err := nicsClient.BeginCreateOrUpdate(ctx, spec.GetResourceGroup(), nicName, nicParams, nil) + if err != nil { + return nil, fmt.Errorf("creating NIC %q: %w", nicName, err) + } + nicResp, err := nicPoller.PollUntilDone(ctx, nil) + if err != nil { + return nil, fmt.Errorf("polling NIC creation %q: %w", nicName, err) + } + nicID := *nicResp.ID + persistIncompleteAgentPool(srv.storage, ap) + + nicCleanedUp := false + defer func() { + if err == nil || nicCleanedUp { + return + } + go cleanupReservedNIC(nicsClient, spec.GetResourceGroup(), nicName, srv.storage, vmName) + }() + + // 2. VM. NIC + OS disk both set DeleteOption=Delete so a single VM + // delete cascades — this is critical for Karpenter retry idempotency. + vmsClient, err := armcompute.NewVirtualMachinesClient(spec.GetSubscriptionId(), srv.credentials, nil) + if err != nil { + return nil, fmt.Errorf("creating VM client: %w", err) + } + imageRef, err := buildImageReference(spec) + if err != nil { + return nil, status.Error(codes.InvalidArgument, err.Error()) + } + osDiskSizeGB := spec.GetOsDiskSizeGb() + if osDiskSizeGB == 0 { + osDiskSizeGB = 128 + } + + vmParams := armcompute.VirtualMachine{ + Location: to.Ptr(spec.GetLocation()), + Tags: toARMTags(spec.GetTags()), + Properties: &armcompute.VirtualMachineProperties{ + HardwareProfile: &armcompute.HardwareProfile{ + VMSize: to.Ptr(armcompute.VirtualMachineSizeTypes(spec.GetVmSize())), + }, + SecurityProfile: &armcompute.SecurityProfile{ + // Standard only — TrustedLaunch is deferred (breaks DSVM). + SecurityType: nil, + }, + NetworkProfile: &armcompute.NetworkProfile{ + NetworkInterfaces: []*armcompute.NetworkInterfaceReference{ + { + ID: to.Ptr(nicID), + Properties: &armcompute.NetworkInterfaceReferenceProperties{ + Primary: to.Ptr(true), + DeleteOption: to.Ptr(armcompute.DeleteOptionsDelete), + }, + }, + }, + }, + OSProfile: &armcompute.OSProfile{ + ComputerName: to.Ptr(vmName), + AdminUsername: to.Ptr(defaultAdminUsername), + LinuxConfiguration: &armcompute.LinuxConfiguration{ + DisablePasswordAuthentication: to.Ptr(true), + SSH: buildSSHConfig(spec.GetSshPublicKeys()), + }, + }, + StorageProfile: &armcompute.StorageProfile{ + ImageReference: imageRef, + OSDisk: &armcompute.OSDisk{ + CreateOption: to.Ptr(armcompute.DiskCreateOptionTypesFromImage), + Caching: to.Ptr(armcompute.CachingTypesReadWrite), + DiskSizeGB: to.Ptr(osDiskSizeGB), + DeleteOption: to.Ptr(armcompute.DiskDeleteOptionTypesDelete), + ManagedDisk: &armcompute.ManagedDiskParameters{ + StorageAccountType: to.Ptr(armcompute.StorageAccountTypesPremiumLRS), + }, + }, + }, + // UserData (NOT customData): the bootstrap renderer expects to read + // from the IMDS userData endpoint. Mirrors the ubuntu2404vmss path. + UserData: to.Ptr(userDataB64), + }, + } + + vmPoller, err := vmsClient.BeginCreateOrUpdate(ctx, spec.GetResourceGroup(), vmName, vmParams, nil) + if err != nil { + return nil, fmt.Errorf("creating VM %q: %w", vmName, err) + } + vmResp, err := vmPoller.PollUntilDone(ctx, nil) + if err != nil { + return nil, fmt.Errorf("polling VM creation %q: %w", vmName, err) + } + if vmResp.ID == nil { + return nil, fmt.Errorf("VM %q created but Azure returned nil resource ID", vmName) + } + // VM is up and owns the NIC via DeleteOption=Delete; suppress the deferred + // NIC cleanup so a downstream marshal failure doesn't tear down the node. + nicCleanedUp = true + + ap.SetStatus(AgentPoolStatus_builder{ + VmResourceId: vmResp.ID, + CreatedAt: timestamppb.Now(), + }.Build()) + + item, err := anypb.New(ap) + if err != nil { + return nil, err + } + + return api.CreateOrUpdateResponse_builder{ + Item: item, + }.Build(), nil +} + +func (srv *agentpoolsServer) Delete(ctx context.Context, req *api.DeleteRequest) (*api.DeleteResponse, error) { + obj, ok := srv.storage.Get(req.GetId()) + if !ok { + return api.DeleteResponse_builder{}.Build(), nil + } + + ap, err := helper.To[*AgentPool](obj) + if err != nil { + return nil, err + } + spec := ap.GetSpec() + + vmName := ap.GetMetadata().GetId() + nicName := vmName + "-nic" + + // Delete VM first; NIC + OS disk cascade because we set DeleteOption=Delete on create. + vmsClient, err := armcompute.NewVirtualMachinesClient(spec.GetSubscriptionId(), srv.credentials, nil) + if err != nil { + return nil, fmt.Errorf("creating VM client: %w", err) + } + vmPoller, err := vmsClient.BeginDelete(ctx, spec.GetResourceGroup(), vmName, &armcompute.VirtualMachinesClientBeginDeleteOptions{ + ForceDeletion: to.Ptr(true), + }) + if err != nil && !isNotFound(err) { + return nil, fmt.Errorf("starting VM delete %q: %w", vmName, err) + } + if vmPoller != nil { + if _, err := vmPoller.PollUntilDone(ctx, nil); err != nil && !isNotFound(err) { + return nil, fmt.Errorf("polling VM delete %q: %w", vmName, err) + } + } + + // Best-effort NIC delete in case the VM never made it to a state where + // DeleteOption applied (e.g. failed mid-create). Idempotent. Uses a fresh + // context so cleanup still runs if the caller's ctx was cancelled mid-Delete. + nicCtx, nicCancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer nicCancel() + nicsClient, err := armnetwork.NewInterfacesClient(spec.GetSubscriptionId(), srv.credentials, nil) + if err != nil { + return nil, fmt.Errorf("creating NIC client: %w", err) + } + nicPoller, err := nicsClient.BeginDelete(nicCtx, spec.GetResourceGroup(), nicName, nil) + if err != nil && !isNotFound(err) { + return nil, fmt.Errorf("starting NIC delete %q: %w", nicName, err) + } + if nicPoller != nil { + if _, err := nicPoller.PollUntilDone(nicCtx, nil); err != nil && !isNotFound(err) { + return nil, fmt.Errorf("polling NIC delete %q: %w", nicName, err) + } + } + + return api.DeleteResponse_builder{}.Build(), nil +} + +// validateSpec fails fast with the cheap structural checks that don't need an Azure round-trip. +func validateSpec(spec *AgentPoolSpec) error { + if spec.GetSubscriptionId() == "" { + return errors.New("subscription_id is required") + } + if spec.GetResourceGroup() == "" { + return errors.New("resource_group is required") + } + if spec.GetLocation() == "" { + return errors.New("location is required") + } + if spec.GetSubnetId() == "" { + return errors.New("subnet_id is required") + } + if _, err := arm.ParseResourceID(spec.GetSubnetId()); err != nil { + return fmt.Errorf("subnet_id %q is not a valid ARM resource id: %w", spec.GetSubnetId(), err) + } + if spec.GetVmSize() == "" { + return errors.New("vm_size is required") + } + if spec.GetImageId() != "" && spec.GetImageReference() != nil { + return errors.New("image_id and image_reference are mutually exclusive") + } + if st := spec.GetSecurityType(); st != "" && st != "Standard" { + return fmt.Errorf("unsupported security_type %q (only Standard is supported in Phase 1)", st) + } + // Public IP per NIC is not implemented in Phase 1. Reject instead of + // silently creating a private-only NIC when callers expect a public one. + if spec.GetAllocatePublicIp() { + return errors.New("allocate_public_ip=true is not supported in Phase 1") + } + // kubeadm config carries the AKS bootstrap token + CA and is used to + // render userdata; a nil value here would panic in CreateOrUpdate. + if spec.GetKubeadm() == nil { + return errors.New("kubeadm is required") + } + return nil +} + +func buildImageReference(spec *AgentPoolSpec) (*armcompute.ImageReference, error) { + if spec.GetImageId() != "" { + return &armcompute.ImageReference{ + ID: to.Ptr(spec.GetImageId()), + }, nil + } + ref := spec.GetImageReference() + if ref == nil { + return &armcompute.ImageReference{ + Publisher: to.Ptr(defaultImagePublisher), + Offer: to.Ptr(defaultImageOffer), + SKU: to.Ptr(defaultImageSKU), + Version: to.Ptr(defaultImageVersion), + }, nil + } + if ref.GetPublisher() == "" || ref.GetOffer() == "" || ref.GetSku() == "" { + return nil, errors.New("image_reference requires publisher, offer, and sku") + } + version := ref.GetVersion() + if version == "" { + version = defaultImageVersion + } + return &armcompute.ImageReference{ + Publisher: to.Ptr(ref.GetPublisher()), + Offer: to.Ptr(ref.GetOffer()), + SKU: to.Ptr(ref.GetSku()), + Version: to.Ptr(version), + }, nil +} + +func buildSSHConfig(keys []string) *armcompute.SSHConfiguration { + if len(keys) == 0 { + return nil + } + pks := make([]*armcompute.SSHPublicKey, 0, len(keys)) + for _, k := range keys { + pks = append(pks, &armcompute.SSHPublicKey{ + Path: to.Ptr("/home/" + defaultAdminUsername + "/.ssh/authorized_keys"), + KeyData: to.Ptr(k), + }) + } + return &armcompute.SSHConfiguration{PublicKeys: pks} +} + +func toARMTags(tags map[string]string) map[string]*string { + if len(tags) == 0 { + return nil + } + out := make(map[string]*string, len(tags)) + for k, v := range tags { + out[k] = to.Ptr(v) + } + return out +} + +func isNotFound(err error) bool { + var rerr *azcore.ResponseError + if errors.As(err, &rerr) { + return rerr.StatusCode == 404 + } + return false +} + +func persistIncompleteAgentPool(storage db.DB, ap *AgentPool) { + if storage == nil || ap == nil || ap.GetMetadata().GetId() == "" { + return + } + var createdAt *timestamppb.Timestamp + if obj, ok := storage.Get(ap.GetMetadata().GetId()); ok { + existing, err := helper.To[*AgentPool](obj) + if err == nil && existing.GetStatus().GetVmResourceId() != "" { + return + } + if err == nil { + createdAt = existing.GetStatus().GetCreatedAt() + } + } + if ap.GetStatus().GetCreatedAt() != nil { + createdAt = ap.GetStatus().GetCreatedAt() + } + if ap.GetStatus().GetVmResourceId() == "" && createdAt == nil { + createdAt = timestamppb.Now() + } + if ap.GetStatus().GetVmResourceId() == "" { + ap.SetStatus(AgentPoolStatus_builder{ + CreatedAt: createdAt, + }.Build()) + } + storage.CreateOrUpdate(ap) +} + +func deleteIncompleteAgentPool(storage db.DB, id string) { + if storage == nil || id == "" { + return + } + obj, ok := storage.Get(id) + if !ok { + return + } + ap, err := helper.To[*AgentPool](obj) + if err != nil || ap.GetStatus().GetVmResourceId() != "" { + return + } + storage.Delete(id) +} + +// cleanupReservedNIC deletes an orphan NIC after the 180s ARM reservation +// window expires. The failed AgentPool is persisted before this starts so a +// plugin restart can still rediscover and delete the orphan through Karpenter's +// normal List/Delete path. +func cleanupReservedNIC(nicsClient *armnetwork.InterfacesClient, resourceGroup, nicName string, storage db.DB, agentPoolID string) { + // Wait out the ARM 180s NIC reservation window, plus slack for clock + // skew and any in-flight VM-create retry that might re-reserve the NIC + // on the same name (Karpenter retries DO use new nodeclaim names, so + // this is belt-and-suspenders). + time.Sleep(3*time.Minute + 30*time.Second) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + + for attempt := 0; attempt < 5; attempt++ { + delPoller, delErr := nicsClient.BeginDelete(ctx, resourceGroup, nicName, nil) + if delErr == nil { + if _, pollErr := delPoller.PollUntilDone(ctx, nil); pollErr == nil { + deleteIncompleteAgentPool(storage, agentPoolID) + slog.Info("flexvm orphan NIC cleanup succeeded", + "nic", nicName, "attempt", attempt) + return + } else { + slog.Warn("flexvm orphan NIC poll failed", + "nic", nicName, "attempt", attempt, "err", pollErr) + } + } else { + // 404 = already gone (raced with someone else). Treat as success. + if isNotFound(delErr) { + deleteIncompleteAgentPool(storage, agentPoolID) + slog.Info("flexvm orphan NIC already gone", + "nic", nicName, "attempt", attempt) + return + } + slog.Warn("flexvm orphan NIC BeginDelete failed", + "nic", nicName, "attempt", attempt, "err", delErr) + } + + select { + case <-time.After(30 * time.Second): + case <-ctx.Done(): + slog.Error("flexvm orphan NIC cleanup timed out", + "nic", nicName, "attempts", attempt+1) + return + } + } + slog.Error("flexvm orphan NIC cleanup exhausted retries", "nic", nicName) +} diff --git a/plugin/pkg/services/agentpools/azure/flexvm/agentpools.pb.go b/plugin/pkg/services/agentpools/azure/flexvm/agentpools.pb.go new file mode 100644 index 0000000..16282f4 --- /dev/null +++ b/plugin/pkg/services/agentpools/azure/flexvm/agentpools.pb.go @@ -0,0 +1,941 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.36.10 +// protoc v6.33.0 +// source: plugin/pkg/services/agentpools/azure/flexvm/agentpools.proto + +package flexvm + +import ( + api "github.com/Azure/aks-flex/plugin/api" + kubeadm "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/api/features/kubeadm" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + timestamppb "google.golang.org/protobuf/types/known/timestamppb" + reflect "reflect" + unsafe "unsafe" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// AgentPool models a single cross-region Azure VM. +// +// Unlike the ubuntu2404vmss service which manages a Virtual Machine Scale Set, +// flexvm creates one Microsoft.Compute/virtualMachines per AgentPool. This is +// suitable for cross-region placement and matches Karpenter's per-NodeClaim +// resource lifecycle. +// +// The plugin process is assumed to authenticate via DefaultAzureCredential and +// hold Contributor on the target subscription / resource group / subnet. +type AgentPool struct { + state protoimpl.MessageState `protogen:"opaque.v1"` + xxx_hidden_Metadata *api.Metadata `protobuf:"bytes,1,opt,name=metadata"` + xxx_hidden_Spec *AgentPoolSpec `protobuf:"bytes,2,opt,name=spec"` + xxx_hidden_Status *AgentPoolStatus `protobuf:"bytes,3,opt,name=status"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *AgentPool) Reset() { + *x = AgentPool{} + mi := &file_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *AgentPool) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*AgentPool) ProtoMessage() {} + +func (x *AgentPool) ProtoReflect() protoreflect.Message { + mi := &file_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +func (x *AgentPool) GetMetadata() *api.Metadata { + if x != nil { + return x.xxx_hidden_Metadata + } + return nil +} + +func (x *AgentPool) GetSpec() *AgentPoolSpec { + if x != nil { + return x.xxx_hidden_Spec + } + return nil +} + +func (x *AgentPool) GetStatus() *AgentPoolStatus { + if x != nil { + return x.xxx_hidden_Status + } + return nil +} + +func (x *AgentPool) SetMetadata(v *api.Metadata) { + x.xxx_hidden_Metadata = v +} + +func (x *AgentPool) SetSpec(v *AgentPoolSpec) { + x.xxx_hidden_Spec = v +} + +func (x *AgentPool) SetStatus(v *AgentPoolStatus) { + x.xxx_hidden_Status = v +} + +func (x *AgentPool) HasMetadata() bool { + if x == nil { + return false + } + return x.xxx_hidden_Metadata != nil +} + +func (x *AgentPool) HasSpec() bool { + if x == nil { + return false + } + return x.xxx_hidden_Spec != nil +} + +func (x *AgentPool) HasStatus() bool { + if x == nil { + return false + } + return x.xxx_hidden_Status != nil +} + +func (x *AgentPool) ClearMetadata() { + x.xxx_hidden_Metadata = nil +} + +func (x *AgentPool) ClearSpec() { + x.xxx_hidden_Spec = nil +} + +func (x *AgentPool) ClearStatus() { + x.xxx_hidden_Status = nil +} + +type AgentPool_builder struct { + _ [0]func() // Prevents comparability and use of unkeyed literals for the builder. + + Metadata *api.Metadata + Spec *AgentPoolSpec + Status *AgentPoolStatus +} + +func (b0 AgentPool_builder) Build() *AgentPool { + m0 := &AgentPool{} + b, x := &b0, m0 + _, _ = b, x + x.xxx_hidden_Metadata = b.Metadata + x.xxx_hidden_Spec = b.Spec + x.xxx_hidden_Status = b.Status + return m0 +} + +type AgentPoolSpec struct { + state protoimpl.MessageState `protogen:"opaque.v1"` + xxx_hidden_SubscriptionId *string `protobuf:"bytes,1,opt,name=subscription_id,json=subscriptionId"` + xxx_hidden_ResourceGroup *string `protobuf:"bytes,2,opt,name=resource_group,json=resourceGroup"` + xxx_hidden_Location *string `protobuf:"bytes,3,opt,name=location"` + xxx_hidden_SubnetId *string `protobuf:"bytes,4,opt,name=subnet_id,json=subnetId"` + xxx_hidden_VmSize *string `protobuf:"bytes,5,opt,name=vm_size,json=vmSize"` + xxx_hidden_ImageReference *ImageReference `protobuf:"bytes,6,opt,name=image_reference,json=imageReference"` + xxx_hidden_ImageId *string `protobuf:"bytes,7,opt,name=image_id,json=imageId"` + xxx_hidden_SecurityType *string `protobuf:"bytes,8,opt,name=security_type,json=securityType"` + xxx_hidden_OsDiskSizeGb int32 `protobuf:"varint,9,opt,name=os_disk_size_gb,json=osDiskSizeGb"` + xxx_hidden_SshPublicKeys []string `protobuf:"bytes,10,rep,name=ssh_public_keys,json=sshPublicKeys"` + xxx_hidden_AllocatePublicIp bool `protobuf:"varint,11,opt,name=allocate_public_ip,json=allocatePublicIp"` + xxx_hidden_Tags map[string]string `protobuf:"bytes,12,rep,name=tags" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` + xxx_hidden_Kubeadm *kubeadm.Config `protobuf:"bytes,13,opt,name=kubeadm"` + XXX_raceDetectHookData protoimpl.RaceDetectHookData + XXX_presence [1]uint32 + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *AgentPoolSpec) Reset() { + *x = AgentPoolSpec{} + mi := &file_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *AgentPoolSpec) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*AgentPoolSpec) ProtoMessage() {} + +func (x *AgentPoolSpec) ProtoReflect() protoreflect.Message { + mi := &file_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto_msgTypes[1] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +func (x *AgentPoolSpec) GetSubscriptionId() string { + if x != nil { + if x.xxx_hidden_SubscriptionId != nil { + return *x.xxx_hidden_SubscriptionId + } + return "" + } + return "" +} + +func (x *AgentPoolSpec) GetResourceGroup() string { + if x != nil { + if x.xxx_hidden_ResourceGroup != nil { + return *x.xxx_hidden_ResourceGroup + } + return "" + } + return "" +} + +func (x *AgentPoolSpec) GetLocation() string { + if x != nil { + if x.xxx_hidden_Location != nil { + return *x.xxx_hidden_Location + } + return "" + } + return "" +} + +func (x *AgentPoolSpec) GetSubnetId() string { + if x != nil { + if x.xxx_hidden_SubnetId != nil { + return *x.xxx_hidden_SubnetId + } + return "" + } + return "" +} + +func (x *AgentPoolSpec) GetVmSize() string { + if x != nil { + if x.xxx_hidden_VmSize != nil { + return *x.xxx_hidden_VmSize + } + return "" + } + return "" +} + +func (x *AgentPoolSpec) GetImageReference() *ImageReference { + if x != nil { + return x.xxx_hidden_ImageReference + } + return nil +} + +func (x *AgentPoolSpec) GetImageId() string { + if x != nil { + if x.xxx_hidden_ImageId != nil { + return *x.xxx_hidden_ImageId + } + return "" + } + return "" +} + +func (x *AgentPoolSpec) GetSecurityType() string { + if x != nil { + if x.xxx_hidden_SecurityType != nil { + return *x.xxx_hidden_SecurityType + } + return "" + } + return "" +} + +func (x *AgentPoolSpec) GetOsDiskSizeGb() int32 { + if x != nil { + return x.xxx_hidden_OsDiskSizeGb + } + return 0 +} + +func (x *AgentPoolSpec) GetSshPublicKeys() []string { + if x != nil { + return x.xxx_hidden_SshPublicKeys + } + return nil +} + +func (x *AgentPoolSpec) GetAllocatePublicIp() bool { + if x != nil { + return x.xxx_hidden_AllocatePublicIp + } + return false +} + +func (x *AgentPoolSpec) GetTags() map[string]string { + if x != nil { + return x.xxx_hidden_Tags + } + return nil +} + +func (x *AgentPoolSpec) GetKubeadm() *kubeadm.Config { + if x != nil { + return x.xxx_hidden_Kubeadm + } + return nil +} + +func (x *AgentPoolSpec) SetSubscriptionId(v string) { + x.xxx_hidden_SubscriptionId = &v + protoimpl.X.SetPresent(&(x.XXX_presence[0]), 0, 13) +} + +func (x *AgentPoolSpec) SetResourceGroup(v string) { + x.xxx_hidden_ResourceGroup = &v + protoimpl.X.SetPresent(&(x.XXX_presence[0]), 1, 13) +} + +func (x *AgentPoolSpec) SetLocation(v string) { + x.xxx_hidden_Location = &v + protoimpl.X.SetPresent(&(x.XXX_presence[0]), 2, 13) +} + +func (x *AgentPoolSpec) SetSubnetId(v string) { + x.xxx_hidden_SubnetId = &v + protoimpl.X.SetPresent(&(x.XXX_presence[0]), 3, 13) +} + +func (x *AgentPoolSpec) SetVmSize(v string) { + x.xxx_hidden_VmSize = &v + protoimpl.X.SetPresent(&(x.XXX_presence[0]), 4, 13) +} + +func (x *AgentPoolSpec) SetImageReference(v *ImageReference) { + x.xxx_hidden_ImageReference = v +} + +func (x *AgentPoolSpec) SetImageId(v string) { + x.xxx_hidden_ImageId = &v + protoimpl.X.SetPresent(&(x.XXX_presence[0]), 6, 13) +} + +func (x *AgentPoolSpec) SetSecurityType(v string) { + x.xxx_hidden_SecurityType = &v + protoimpl.X.SetPresent(&(x.XXX_presence[0]), 7, 13) +} + +func (x *AgentPoolSpec) SetOsDiskSizeGb(v int32) { + x.xxx_hidden_OsDiskSizeGb = v + protoimpl.X.SetPresent(&(x.XXX_presence[0]), 8, 13) +} + +func (x *AgentPoolSpec) SetSshPublicKeys(v []string) { + x.xxx_hidden_SshPublicKeys = v +} + +func (x *AgentPoolSpec) SetAllocatePublicIp(v bool) { + x.xxx_hidden_AllocatePublicIp = v + protoimpl.X.SetPresent(&(x.XXX_presence[0]), 10, 13) +} + +func (x *AgentPoolSpec) SetTags(v map[string]string) { + x.xxx_hidden_Tags = v +} + +func (x *AgentPoolSpec) SetKubeadm(v *kubeadm.Config) { + x.xxx_hidden_Kubeadm = v +} + +func (x *AgentPoolSpec) HasSubscriptionId() bool { + if x == nil { + return false + } + return protoimpl.X.Present(&(x.XXX_presence[0]), 0) +} + +func (x *AgentPoolSpec) HasResourceGroup() bool { + if x == nil { + return false + } + return protoimpl.X.Present(&(x.XXX_presence[0]), 1) +} + +func (x *AgentPoolSpec) HasLocation() bool { + if x == nil { + return false + } + return protoimpl.X.Present(&(x.XXX_presence[0]), 2) +} + +func (x *AgentPoolSpec) HasSubnetId() bool { + if x == nil { + return false + } + return protoimpl.X.Present(&(x.XXX_presence[0]), 3) +} + +func (x *AgentPoolSpec) HasVmSize() bool { + if x == nil { + return false + } + return protoimpl.X.Present(&(x.XXX_presence[0]), 4) +} + +func (x *AgentPoolSpec) HasImageReference() bool { + if x == nil { + return false + } + return x.xxx_hidden_ImageReference != nil +} + +func (x *AgentPoolSpec) HasImageId() bool { + if x == nil { + return false + } + return protoimpl.X.Present(&(x.XXX_presence[0]), 6) +} + +func (x *AgentPoolSpec) HasSecurityType() bool { + if x == nil { + return false + } + return protoimpl.X.Present(&(x.XXX_presence[0]), 7) +} + +func (x *AgentPoolSpec) HasOsDiskSizeGb() bool { + if x == nil { + return false + } + return protoimpl.X.Present(&(x.XXX_presence[0]), 8) +} + +func (x *AgentPoolSpec) HasAllocatePublicIp() bool { + if x == nil { + return false + } + return protoimpl.X.Present(&(x.XXX_presence[0]), 10) +} + +func (x *AgentPoolSpec) HasKubeadm() bool { + if x == nil { + return false + } + return x.xxx_hidden_Kubeadm != nil +} + +func (x *AgentPoolSpec) ClearSubscriptionId() { + protoimpl.X.ClearPresent(&(x.XXX_presence[0]), 0) + x.xxx_hidden_SubscriptionId = nil +} + +func (x *AgentPoolSpec) ClearResourceGroup() { + protoimpl.X.ClearPresent(&(x.XXX_presence[0]), 1) + x.xxx_hidden_ResourceGroup = nil +} + +func (x *AgentPoolSpec) ClearLocation() { + protoimpl.X.ClearPresent(&(x.XXX_presence[0]), 2) + x.xxx_hidden_Location = nil +} + +func (x *AgentPoolSpec) ClearSubnetId() { + protoimpl.X.ClearPresent(&(x.XXX_presence[0]), 3) + x.xxx_hidden_SubnetId = nil +} + +func (x *AgentPoolSpec) ClearVmSize() { + protoimpl.X.ClearPresent(&(x.XXX_presence[0]), 4) + x.xxx_hidden_VmSize = nil +} + +func (x *AgentPoolSpec) ClearImageReference() { + x.xxx_hidden_ImageReference = nil +} + +func (x *AgentPoolSpec) ClearImageId() { + protoimpl.X.ClearPresent(&(x.XXX_presence[0]), 6) + x.xxx_hidden_ImageId = nil +} + +func (x *AgentPoolSpec) ClearSecurityType() { + protoimpl.X.ClearPresent(&(x.XXX_presence[0]), 7) + x.xxx_hidden_SecurityType = nil +} + +func (x *AgentPoolSpec) ClearOsDiskSizeGb() { + protoimpl.X.ClearPresent(&(x.XXX_presence[0]), 8) + x.xxx_hidden_OsDiskSizeGb = 0 +} + +func (x *AgentPoolSpec) ClearAllocatePublicIp() { + protoimpl.X.ClearPresent(&(x.XXX_presence[0]), 10) + x.xxx_hidden_AllocatePublicIp = false +} + +func (x *AgentPoolSpec) ClearKubeadm() { + x.xxx_hidden_Kubeadm = nil +} + +type AgentPoolSpec_builder struct { + _ [0]func() // Prevents comparability and use of unkeyed literals for the builder. + + SubscriptionId *string + ResourceGroup *string + Location *string + // Full ARM resource ID of the subnet (must already exist). + SubnetId *string + // VM SKU, e.g. "Standard_ND96isr_H200_v5". + VmSize *string + // ImageReference selects an Azure Marketplace image. + // Mutually exclusive with image_id. + ImageReference *ImageReference + // SIG / community gallery image resource ID. + // Mutually exclusive with image_reference. + ImageId *string + // Currently only "Standard" is supported. TrustedLaunch is deferred — + // it has been observed to break the DSVM image during manual H200 bringup. + SecurityType *string + OsDiskSizeGb *int32 + SshPublicKeys []string + AllocatePublicIp *bool + Tags map[string]string + Kubeadm *kubeadm.Config +} + +func (b0 AgentPoolSpec_builder) Build() *AgentPoolSpec { + m0 := &AgentPoolSpec{} + b, x := &b0, m0 + _, _ = b, x + if b.SubscriptionId != nil { + protoimpl.X.SetPresentNonAtomic(&(x.XXX_presence[0]), 0, 13) + x.xxx_hidden_SubscriptionId = b.SubscriptionId + } + if b.ResourceGroup != nil { + protoimpl.X.SetPresentNonAtomic(&(x.XXX_presence[0]), 1, 13) + x.xxx_hidden_ResourceGroup = b.ResourceGroup + } + if b.Location != nil { + protoimpl.X.SetPresentNonAtomic(&(x.XXX_presence[0]), 2, 13) + x.xxx_hidden_Location = b.Location + } + if b.SubnetId != nil { + protoimpl.X.SetPresentNonAtomic(&(x.XXX_presence[0]), 3, 13) + x.xxx_hidden_SubnetId = b.SubnetId + } + if b.VmSize != nil { + protoimpl.X.SetPresentNonAtomic(&(x.XXX_presence[0]), 4, 13) + x.xxx_hidden_VmSize = b.VmSize + } + x.xxx_hidden_ImageReference = b.ImageReference + if b.ImageId != nil { + protoimpl.X.SetPresentNonAtomic(&(x.XXX_presence[0]), 6, 13) + x.xxx_hidden_ImageId = b.ImageId + } + if b.SecurityType != nil { + protoimpl.X.SetPresentNonAtomic(&(x.XXX_presence[0]), 7, 13) + x.xxx_hidden_SecurityType = b.SecurityType + } + if b.OsDiskSizeGb != nil { + protoimpl.X.SetPresentNonAtomic(&(x.XXX_presence[0]), 8, 13) + x.xxx_hidden_OsDiskSizeGb = *b.OsDiskSizeGb + } + x.xxx_hidden_SshPublicKeys = b.SshPublicKeys + if b.AllocatePublicIp != nil { + protoimpl.X.SetPresentNonAtomic(&(x.XXX_presence[0]), 10, 13) + x.xxx_hidden_AllocatePublicIp = *b.AllocatePublicIp + } + x.xxx_hidden_Tags = b.Tags + x.xxx_hidden_Kubeadm = b.Kubeadm + return m0 +} + +type ImageReference struct { + state protoimpl.MessageState `protogen:"opaque.v1"` + xxx_hidden_Publisher *string `protobuf:"bytes,1,opt,name=publisher"` + xxx_hidden_Offer *string `protobuf:"bytes,2,opt,name=offer"` + xxx_hidden_Sku *string `protobuf:"bytes,3,opt,name=sku"` + xxx_hidden_Version *string `protobuf:"bytes,4,opt,name=version"` + XXX_raceDetectHookData protoimpl.RaceDetectHookData + XXX_presence [1]uint32 + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *ImageReference) Reset() { + *x = ImageReference{} + mi := &file_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *ImageReference) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ImageReference) ProtoMessage() {} + +func (x *ImageReference) ProtoReflect() protoreflect.Message { + mi := &file_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto_msgTypes[2] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +func (x *ImageReference) GetPublisher() string { + if x != nil { + if x.xxx_hidden_Publisher != nil { + return *x.xxx_hidden_Publisher + } + return "" + } + return "" +} + +func (x *ImageReference) GetOffer() string { + if x != nil { + if x.xxx_hidden_Offer != nil { + return *x.xxx_hidden_Offer + } + return "" + } + return "" +} + +func (x *ImageReference) GetSku() string { + if x != nil { + if x.xxx_hidden_Sku != nil { + return *x.xxx_hidden_Sku + } + return "" + } + return "" +} + +func (x *ImageReference) GetVersion() string { + if x != nil { + if x.xxx_hidden_Version != nil { + return *x.xxx_hidden_Version + } + return "" + } + return "" +} + +func (x *ImageReference) SetPublisher(v string) { + x.xxx_hidden_Publisher = &v + protoimpl.X.SetPresent(&(x.XXX_presence[0]), 0, 4) +} + +func (x *ImageReference) SetOffer(v string) { + x.xxx_hidden_Offer = &v + protoimpl.X.SetPresent(&(x.XXX_presence[0]), 1, 4) +} + +func (x *ImageReference) SetSku(v string) { + x.xxx_hidden_Sku = &v + protoimpl.X.SetPresent(&(x.XXX_presence[0]), 2, 4) +} + +func (x *ImageReference) SetVersion(v string) { + x.xxx_hidden_Version = &v + protoimpl.X.SetPresent(&(x.XXX_presence[0]), 3, 4) +} + +func (x *ImageReference) HasPublisher() bool { + if x == nil { + return false + } + return protoimpl.X.Present(&(x.XXX_presence[0]), 0) +} + +func (x *ImageReference) HasOffer() bool { + if x == nil { + return false + } + return protoimpl.X.Present(&(x.XXX_presence[0]), 1) +} + +func (x *ImageReference) HasSku() bool { + if x == nil { + return false + } + return protoimpl.X.Present(&(x.XXX_presence[0]), 2) +} + +func (x *ImageReference) HasVersion() bool { + if x == nil { + return false + } + return protoimpl.X.Present(&(x.XXX_presence[0]), 3) +} + +func (x *ImageReference) ClearPublisher() { + protoimpl.X.ClearPresent(&(x.XXX_presence[0]), 0) + x.xxx_hidden_Publisher = nil +} + +func (x *ImageReference) ClearOffer() { + protoimpl.X.ClearPresent(&(x.XXX_presence[0]), 1) + x.xxx_hidden_Offer = nil +} + +func (x *ImageReference) ClearSku() { + protoimpl.X.ClearPresent(&(x.XXX_presence[0]), 2) + x.xxx_hidden_Sku = nil +} + +func (x *ImageReference) ClearVersion() { + protoimpl.X.ClearPresent(&(x.XXX_presence[0]), 3) + x.xxx_hidden_Version = nil +} + +type ImageReference_builder struct { + _ [0]func() // Prevents comparability and use of unkeyed literals for the builder. + + Publisher *string + Offer *string + Sku *string + Version *string +} + +func (b0 ImageReference_builder) Build() *ImageReference { + m0 := &ImageReference{} + b, x := &b0, m0 + _, _ = b, x + if b.Publisher != nil { + protoimpl.X.SetPresentNonAtomic(&(x.XXX_presence[0]), 0, 4) + x.xxx_hidden_Publisher = b.Publisher + } + if b.Offer != nil { + protoimpl.X.SetPresentNonAtomic(&(x.XXX_presence[0]), 1, 4) + x.xxx_hidden_Offer = b.Offer + } + if b.Sku != nil { + protoimpl.X.SetPresentNonAtomic(&(x.XXX_presence[0]), 2, 4) + x.xxx_hidden_Sku = b.Sku + } + if b.Version != nil { + protoimpl.X.SetPresentNonAtomic(&(x.XXX_presence[0]), 3, 4) + x.xxx_hidden_Version = b.Version + } + return m0 +} + +type AgentPoolStatus struct { + state protoimpl.MessageState `protogen:"opaque.v1"` + xxx_hidden_VmResourceId *string `protobuf:"bytes,1,opt,name=vm_resource_id,json=vmResourceId"` + xxx_hidden_CreatedAt *timestamppb.Timestamp `protobuf:"bytes,2,opt,name=created_at,json=createdAt"` + XXX_raceDetectHookData protoimpl.RaceDetectHookData + XXX_presence [1]uint32 + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *AgentPoolStatus) Reset() { + *x = AgentPoolStatus{} + mi := &file_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *AgentPoolStatus) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*AgentPoolStatus) ProtoMessage() {} + +func (x *AgentPoolStatus) ProtoReflect() protoreflect.Message { + mi := &file_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto_msgTypes[3] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +func (x *AgentPoolStatus) GetVmResourceId() string { + if x != nil { + if x.xxx_hidden_VmResourceId != nil { + return *x.xxx_hidden_VmResourceId + } + return "" + } + return "" +} + +func (x *AgentPoolStatus) GetCreatedAt() *timestamppb.Timestamp { + if x != nil { + return x.xxx_hidden_CreatedAt + } + return nil +} + +func (x *AgentPoolStatus) SetVmResourceId(v string) { + x.xxx_hidden_VmResourceId = &v + protoimpl.X.SetPresent(&(x.XXX_presence[0]), 0, 2) +} + +func (x *AgentPoolStatus) SetCreatedAt(v *timestamppb.Timestamp) { + x.xxx_hidden_CreatedAt = v +} + +func (x *AgentPoolStatus) HasVmResourceId() bool { + if x == nil { + return false + } + return protoimpl.X.Present(&(x.XXX_presence[0]), 0) +} + +func (x *AgentPoolStatus) HasCreatedAt() bool { + if x == nil { + return false + } + return x.xxx_hidden_CreatedAt != nil +} + +func (x *AgentPoolStatus) ClearVmResourceId() { + protoimpl.X.ClearPresent(&(x.XXX_presence[0]), 0) + x.xxx_hidden_VmResourceId = nil +} + +func (x *AgentPoolStatus) ClearCreatedAt() { + x.xxx_hidden_CreatedAt = nil +} + +type AgentPoolStatus_builder struct { + _ [0]func() // Prevents comparability and use of unkeyed literals for the builder. + + // Full ARM resource ID of the created VM. + VmResourceId *string + CreatedAt *timestamppb.Timestamp +} + +func (b0 AgentPoolStatus_builder) Build() *AgentPoolStatus { + m0 := &AgentPoolStatus{} + b, x := &b0, m0 + _, _ = b, x + if b.VmResourceId != nil { + protoimpl.X.SetPresentNonAtomic(&(x.XXX_presence[0]), 0, 2) + x.xxx_hidden_VmResourceId = b.VmResourceId + } + x.xxx_hidden_CreatedAt = b.CreatedAt + return m0 +} + +var File_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto protoreflect.FileDescriptor + +const file_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto_rawDesc = "" + + "\n" + + " api.Metadata + 1, // 1: agentpools.azure.flexvm.AgentPool.spec:type_name -> agentpools.azure.flexvm.AgentPoolSpec + 3, // 2: agentpools.azure.flexvm.AgentPool.status:type_name -> agentpools.azure.flexvm.AgentPoolStatus + 2, // 3: agentpools.azure.flexvm.AgentPoolSpec.image_reference:type_name -> agentpools.azure.flexvm.ImageReference + 4, // 4: agentpools.azure.flexvm.AgentPoolSpec.tags:type_name -> agentpools.azure.flexvm.AgentPoolSpec.TagsEntry + 6, // 5: agentpools.azure.flexvm.AgentPoolSpec.kubeadm:type_name -> kubeadm.Config + 7, // 6: agentpools.azure.flexvm.AgentPoolStatus.created_at:type_name -> google.protobuf.Timestamp + 7, // [7:7] is the sub-list for method output_type + 7, // [7:7] is the sub-list for method input_type + 7, // [7:7] is the sub-list for extension type_name + 7, // [7:7] is the sub-list for extension extendee + 0, // [0:7] is the sub-list for field type_name +} + +func init() { file_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto_init() } +func file_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto_init() { + if File_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto_rawDesc), len(file_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto_rawDesc)), + NumEnums: 0, + NumMessages: 5, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto_goTypes, + DependencyIndexes: file_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto_depIdxs, + MessageInfos: file_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto_msgTypes, + }.Build() + File_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto = out.File + file_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto_goTypes = nil + file_plugin_pkg_services_agentpools_azure_flexvm_agentpools_proto_depIdxs = nil +} diff --git a/plugin/pkg/services/agentpools/azure/flexvm/agentpools.proto b/plugin/pkg/services/agentpools/azure/flexvm/agentpools.proto new file mode 100644 index 0000000..c866b97 --- /dev/null +++ b/plugin/pkg/services/agentpools/azure/flexvm/agentpools.proto @@ -0,0 +1,74 @@ +edition = "2024"; + +package agentpools.azure.flexvm; + +option go_package = "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/azure/flexvm"; + +import "google/protobuf/timestamp.proto"; +import "plugin/api/api.proto"; +import "plugin/pkg/services/agentpools/api/features/kubeadm/kubeadm.proto"; + +// AgentPool models a single cross-region Azure VM. +// +// Unlike the ubuntu2404vmss service which manages a Virtual Machine Scale Set, +// flexvm creates one Microsoft.Compute/virtualMachines per AgentPool. This is +// suitable for cross-region placement and matches Karpenter's per-NodeClaim +// resource lifecycle. +// +// The plugin process is assumed to authenticate via DefaultAzureCredential and +// hold Contributor on the target subscription / resource group / subnet. +message AgentPool { + api.Metadata metadata = 1; + + AgentPoolSpec spec = 2; + + AgentPoolStatus status = 3; +} + +message AgentPoolSpec { + string subscription_id = 1; + string resource_group = 2; + string location = 3; + + // Full ARM resource ID of the subnet (must already exist). + string subnet_id = 4; + + // VM SKU, e.g. "Standard_ND96isr_H200_v5". + string vm_size = 5; + + // ImageReference selects an Azure Marketplace image. + // Mutually exclusive with image_id. + ImageReference image_reference = 6; + + // SIG / community gallery image resource ID. + // Mutually exclusive with image_reference. + string image_id = 7; + + // Currently only "Standard" is supported. TrustedLaunch is deferred — + // it has been observed to break the DSVM image during manual H200 bringup. + string security_type = 8; + + int32 os_disk_size_gb = 9; + + repeated string ssh_public_keys = 10; + + bool allocate_public_ip = 11; + + map tags = 12; + + kubeadm.Config kubeadm = 13; +} + +message ImageReference { + string publisher = 1; + string offer = 2; + string sku = 3; + string version = 4; +} + +message AgentPoolStatus { + // Full ARM resource ID of the created VM. + string vm_resource_id = 1; + + google.protobuf.Timestamp created_at = 2; +} diff --git a/plugin/pkg/services/agentpools/azure/flexvm/agentpools_test.go b/plugin/pkg/services/agentpools/azure/flexvm/agentpools_test.go new file mode 100644 index 0000000..7bc55a1 --- /dev/null +++ b/plugin/pkg/services/agentpools/azure/flexvm/agentpools_test.go @@ -0,0 +1,92 @@ +package flexvm + +import ( + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/timestamppb" + + "github.com/Azure/aks-flex/plugin/api" + "github.com/Azure/aks-flex/plugin/pkg/db" + "github.com/Azure/aks-flex/plugin/pkg/helper" +) + +func tempDB(t *testing.T) *db.StupidDB { + t.Helper() + store := db.NewStupidDB(filepath.Join(t.TempDir(), "agentpools.db")) + t.Cleanup(store.Close) + return store +} + +func testAgentPool(id string, status *AgentPoolStatus) *AgentPool { + return AgentPool_builder{ + Metadata: api.Metadata_builder{ + Id: proto.String(id), + }.Build(), + Status: status, + }.Build() +} + +func TestPersistIncompleteAgentPoolStoresFailedCreate(t *testing.T) { + store := tempDB(t) + ap := testAgentPool("nodeclaim-1", nil) + + persistIncompleteAgentPool(store, ap) + + got, ok := store.Get("nodeclaim-1") + require.True(t, ok) + require.Equal(t, "nodeclaim-1", got.GetMetadata().GetId()) + gotAP, err := helper.To[*AgentPool](got) + require.NoError(t, err) + require.NotNil(t, gotAP.GetStatus().GetCreatedAt()) +} + +func TestPersistIncompleteAgentPoolDoesNotOverwriteCompletedStatus(t *testing.T) { + store := tempDB(t) + store.CreateOrUpdate(testAgentPool("nodeclaim-1", AgentPoolStatus_builder{ + VmResourceId: proto.String("/subscriptions/sub/resourceGroups/rg/providers/Microsoft.Compute/virtualMachines/nodeclaim-1"), + }.Build())) + + persistIncompleteAgentPool(store, testAgentPool("nodeclaim-1", nil)) + + gotObj, ok := store.Get("nodeclaim-1") + require.True(t, ok) + got, err := helper.To[*AgentPool](gotObj) + require.NoError(t, err) + require.NotEmpty(t, got.GetStatus().GetVmResourceId()) +} + +func TestPersistIncompleteAgentPoolPreservesPendingCreatedAt(t *testing.T) { + store := tempDB(t) + createdAt := timestamppb.New(time.Now().Add(-time.Hour)) + store.CreateOrUpdate(testAgentPool("nodeclaim-1", AgentPoolStatus_builder{ + CreatedAt: createdAt, + }.Build())) + + persistIncompleteAgentPool(store, testAgentPool("nodeclaim-1", nil)) + + gotObj, ok := store.Get("nodeclaim-1") + require.True(t, ok) + got, err := helper.To[*AgentPool](gotObj) + require.NoError(t, err) + require.Equal(t, createdAt.AsTime(), got.GetStatus().GetCreatedAt().AsTime()) +} + +func TestDeleteIncompleteAgentPoolOnlyDeletesPendingRecords(t *testing.T) { + store := tempDB(t) + store.CreateOrUpdate(testAgentPool("pending", nil)) + store.CreateOrUpdate(testAgentPool("complete", AgentPoolStatus_builder{ + VmResourceId: proto.String("/subscriptions/sub/resourceGroups/rg/providers/Microsoft.Compute/virtualMachines/complete"), + }.Build())) + + deleteIncompleteAgentPool(store, "pending") + deleteIncompleteAgentPool(store, "complete") + + _, ok := store.Get("pending") + require.False(t, ok) + _, ok = store.Get("complete") + require.True(t, ok) +} diff --git a/plugin/pkg/services/agentpools/azure/flexvm/instances.go b/plugin/pkg/services/agentpools/azure/flexvm/instances.go new file mode 100644 index 0000000..94116fc --- /dev/null +++ b/plugin/pkg/services/agentpools/azure/flexvm/instances.go @@ -0,0 +1,78 @@ +package flexvm + +import ( + "context" + "strings" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" + "github.com/Azure/azure-sdk-for-go/sdk/azidentity" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/types/known/anypb" + + "github.com/Azure/aks-flex/plugin/api" + "github.com/Azure/aks-flex/plugin/pkg/db" + agentpools "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/api" +) + +var _ api.Object = (*Instance)(nil) + +// Each AgentPool maps to exactly one VM, so the Instance API is a thin shim +// over the AgentPool — there is always one instance "/0". +type instancesServer struct { + agentpools.UnimplementedInstancesServer + storage db.RODB + + credentials azcore.TokenCredential +} + +func NewInstancesServer(storage db.RODB) (agentpools.InstancesServer, error) { + credentials, err := azidentity.NewDefaultAzureCredential(nil) + if err != nil { + return nil, err + } + return &instancesServer{ + storage: storage, + credentials: credentials, + }, nil +} + +func (srv *instancesServer) List(ctx context.Context, req *api.ListRequest) (*api.ListResponse, error) { + ap, ok := srv.storage.Get(req.GetId()) + if !ok { + return nil, status.Error(codes.NotFound, "") + } + item, err := anypb.New(Instance_builder{ + Metadata: api.Metadata_builder{ + Id: to.Ptr(ap.GetMetadata().GetId() + "/0"), + }.Build(), + }.Build()) + if err != nil { + return nil, err + } + return api.ListResponse_builder{ + Items: []*anypb.Any{item}, + }.Build(), nil +} + +func (srv *instancesServer) Get(ctx context.Context, req *api.GetRequest) (*api.GetResponse, error) { + ids := strings.Split(req.GetId(), "/") + if len(ids) != 2 || ids[1] != "0" { + return nil, status.Error(codes.NotFound, "") + } + if _, ok := srv.storage.Get(ids[0]); !ok { + return nil, status.Error(codes.NotFound, "") + } + item, err := anypb.New(Instance_builder{ + Metadata: api.Metadata_builder{ + Id: to.Ptr(req.GetId()), + }.Build(), + }.Build()) + if err != nil { + return nil, err + } + return api.GetResponse_builder{ + Item: item, + }.Build(), nil +} diff --git a/plugin/pkg/services/agentpools/azure/flexvm/instances.pb.go b/plugin/pkg/services/agentpools/azure/flexvm/instances.pb.go new file mode 100644 index 0000000..6b213fd --- /dev/null +++ b/plugin/pkg/services/agentpools/azure/flexvm/instances.pb.go @@ -0,0 +1,282 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.36.10 +// protoc v6.33.0 +// source: plugin/pkg/services/agentpools/azure/flexvm/instances.proto + +package flexvm + +import ( + api "github.com/Azure/aks-flex/plugin/api" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + unsafe "unsafe" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// flexvm models one VM per AgentPool, so an Instance is just the AgentPool +// itself with id "/0". This matches the AWS ubuntu2404instance shape. +type Instance struct { + state protoimpl.MessageState `protogen:"opaque.v1"` + xxx_hidden_Metadata *api.Metadata `protobuf:"bytes,1,opt,name=metadata"` + xxx_hidden_Spec *InstanceSpec `protobuf:"bytes,2,opt,name=spec"` + xxx_hidden_Status *InstanceStatus `protobuf:"bytes,3,opt,name=status"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *Instance) Reset() { + *x = Instance{} + mi := &file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Instance) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Instance) ProtoMessage() {} + +func (x *Instance) ProtoReflect() protoreflect.Message { + mi := &file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +func (x *Instance) GetMetadata() *api.Metadata { + if x != nil { + return x.xxx_hidden_Metadata + } + return nil +} + +func (x *Instance) GetSpec() *InstanceSpec { + if x != nil { + return x.xxx_hidden_Spec + } + return nil +} + +func (x *Instance) GetStatus() *InstanceStatus { + if x != nil { + return x.xxx_hidden_Status + } + return nil +} + +func (x *Instance) SetMetadata(v *api.Metadata) { + x.xxx_hidden_Metadata = v +} + +func (x *Instance) SetSpec(v *InstanceSpec) { + x.xxx_hidden_Spec = v +} + +func (x *Instance) SetStatus(v *InstanceStatus) { + x.xxx_hidden_Status = v +} + +func (x *Instance) HasMetadata() bool { + if x == nil { + return false + } + return x.xxx_hidden_Metadata != nil +} + +func (x *Instance) HasSpec() bool { + if x == nil { + return false + } + return x.xxx_hidden_Spec != nil +} + +func (x *Instance) HasStatus() bool { + if x == nil { + return false + } + return x.xxx_hidden_Status != nil +} + +func (x *Instance) ClearMetadata() { + x.xxx_hidden_Metadata = nil +} + +func (x *Instance) ClearSpec() { + x.xxx_hidden_Spec = nil +} + +func (x *Instance) ClearStatus() { + x.xxx_hidden_Status = nil +} + +type Instance_builder struct { + _ [0]func() // Prevents comparability and use of unkeyed literals for the builder. + + Metadata *api.Metadata + Spec *InstanceSpec + Status *InstanceStatus +} + +func (b0 Instance_builder) Build() *Instance { + m0 := &Instance{} + b, x := &b0, m0 + _, _ = b, x + x.xxx_hidden_Metadata = b.Metadata + x.xxx_hidden_Spec = b.Spec + x.xxx_hidden_Status = b.Status + return m0 +} + +type InstanceSpec struct { + state protoimpl.MessageState `protogen:"opaque.v1"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *InstanceSpec) Reset() { + *x = InstanceSpec{} + mi := &file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *InstanceSpec) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*InstanceSpec) ProtoMessage() {} + +func (x *InstanceSpec) ProtoReflect() protoreflect.Message { + mi := &file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_msgTypes[1] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +type InstanceSpec_builder struct { + _ [0]func() // Prevents comparability and use of unkeyed literals for the builder. + +} + +func (b0 InstanceSpec_builder) Build() *InstanceSpec { + m0 := &InstanceSpec{} + b, x := &b0, m0 + _, _ = b, x + return m0 +} + +type InstanceStatus struct { + state protoimpl.MessageState `protogen:"opaque.v1"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *InstanceStatus) Reset() { + *x = InstanceStatus{} + mi := &file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *InstanceStatus) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*InstanceStatus) ProtoMessage() {} + +func (x *InstanceStatus) ProtoReflect() protoreflect.Message { + mi := &file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_msgTypes[2] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +type InstanceStatus_builder struct { + _ [0]func() // Prevents comparability and use of unkeyed literals for the builder. + +} + +func (b0 InstanceStatus_builder) Build() *InstanceStatus { + m0 := &InstanceStatus{} + b, x := &b0, m0 + _, _ = b, x + return m0 +} + +var File_plugin_pkg_services_agentpools_azure_flexvm_instances_proto protoreflect.FileDescriptor + +const file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_rawDesc = "" + + "\n" + + ";plugin/pkg/services/agentpools/azure/flexvm/instances.proto\x12\x17agentpools.azure.flexvm\x1a\x14plugin/api/api.proto\"\xb1\x01\n" + + "\bInstance\x12)\n" + + "\bmetadata\x18\x01 \x01(\v2\r.api.MetadataR\bmetadata\x129\n" + + "\x04spec\x18\x02 \x01(\v2%.agentpools.azure.flexvm.InstanceSpecR\x04spec\x12?\n" + + "\x06status\x18\x03 \x01(\v2'.agentpools.azure.flexvm.InstanceStatusR\x06status\"\x0e\n" + + "\fInstanceSpec\"\x10\n" + + "\x0eInstanceStatusBGZEgithub.com/Azure/aks-flex/plugin/pkg/services/agentpools/azure/flexvmb\beditionsp\xe9\a" + +var file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_msgTypes = make([]protoimpl.MessageInfo, 3) +var file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_goTypes = []any{ + (*Instance)(nil), // 0: agentpools.azure.flexvm.Instance + (*InstanceSpec)(nil), // 1: agentpools.azure.flexvm.InstanceSpec + (*InstanceStatus)(nil), // 2: agentpools.azure.flexvm.InstanceStatus + (*api.Metadata)(nil), // 3: api.Metadata +} +var file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_depIdxs = []int32{ + 3, // 0: agentpools.azure.flexvm.Instance.metadata:type_name -> api.Metadata + 1, // 1: agentpools.azure.flexvm.Instance.spec:type_name -> agentpools.azure.flexvm.InstanceSpec + 2, // 2: agentpools.azure.flexvm.Instance.status:type_name -> agentpools.azure.flexvm.InstanceStatus + 3, // [3:3] is the sub-list for method output_type + 3, // [3:3] is the sub-list for method input_type + 3, // [3:3] is the sub-list for extension type_name + 3, // [3:3] is the sub-list for extension extendee + 0, // [0:3] is the sub-list for field type_name +} + +func init() { file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_init() } +func file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_init() { + if File_plugin_pkg_services_agentpools_azure_flexvm_instances_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_rawDesc), len(file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_rawDesc)), + NumEnums: 0, + NumMessages: 3, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_goTypes, + DependencyIndexes: file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_depIdxs, + MessageInfos: file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_msgTypes, + }.Build() + File_plugin_pkg_services_agentpools_azure_flexvm_instances_proto = out.File + file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_goTypes = nil + file_plugin_pkg_services_agentpools_azure_flexvm_instances_proto_depIdxs = nil +} diff --git a/plugin/pkg/services/agentpools/azure/flexvm/instances.proto b/plugin/pkg/services/agentpools/azure/flexvm/instances.proto new file mode 100644 index 0000000..eaabe07 --- /dev/null +++ b/plugin/pkg/services/agentpools/azure/flexvm/instances.proto @@ -0,0 +1,23 @@ +edition = "2024"; + +package agentpools.azure.flexvm; + +option go_package = "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/azure/flexvm"; + +import "plugin/api/api.proto"; + +// flexvm models one VM per AgentPool, so an Instance is just the AgentPool +// itself with id "/0". This matches the AWS ubuntu2404instance shape. +message Instance { + api.Metadata metadata = 1; + + InstanceSpec spec = 2; + + InstanceStatus status = 3; +} + +message InstanceSpec { +} + +message InstanceStatus { +} diff --git a/plugin/pkg/services/agentpools/azure/flexvm/redact.go b/plugin/pkg/services/agentpools/azure/flexvm/redact.go new file mode 100644 index 0000000..fcd1f0a --- /dev/null +++ b/plugin/pkg/services/agentpools/azure/flexvm/redact.go @@ -0,0 +1,10 @@ +// Package flexvm implements the cross-region Azure VM agent pool service. +// See agentpools.go for the package overview and authentication contract. +package flexvm + +func (ap *AgentPool) Redact() { + ap.GetSpec().GetKubeadm().Redact() +} + +func (i *Instance) Redact() { +} diff --git a/plugin/pkg/services/agentpools/instances.go b/plugin/pkg/services/agentpools/instances.go index 37b39f7..3dd7651 100644 --- a/plugin/pkg/services/agentpools/instances.go +++ b/plugin/pkg/services/agentpools/instances.go @@ -11,6 +11,7 @@ import ( "github.com/Azure/aks-flex/plugin/pkg/server" "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/api" "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/aws/ubuntu2404instance" + "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/azure/flexvm" "github.com/Azure/aks-flex/plugin/pkg/services/agentpools/azure/ubuntu2404vmss" ) @@ -32,6 +33,10 @@ func NewInstancesServer(db db.DB) api.InstancesServer { return ubuntu2404vmss.NewInstancesServer(srv.DB) }, &ubuntu2404vmss.AgentPool{}) + server.MustRegister(srv.Servers, func() (api.InstancesServer, error) { + return flexvm.NewInstancesServer(srv.DB) + }, &flexvm.AgentPool{}) + return srv } diff --git a/plugin/pkg/services/agentpools/userdata/flex/assets/bootstrap.sh.tmpl b/plugin/pkg/services/agentpools/userdata/flex/assets/bootstrap.sh.tmpl index 98dfc80..617a3a2 100644 --- a/plugin/pkg/services/agentpools/userdata/flex/assets/bootstrap.sh.tmpl +++ b/plugin/pkg/services/agentpools/userdata/flex/assets/bootstrap.sh.tmpl @@ -4,4 +4,58 @@ tar -xzf /tmp/flex/aks-flex-node-linux-{{ .Arch }}.tar.gz -C /tmp/flex mv /tmp/flex/aks-flex-node-linux-{{ .Arch }} /tmp/flex/aks-flex-node chmod +x /tmp/flex/aks-flex-node /tmp/flex/aks-flex-node apply -f /tmp/flex-config.json +# Workaround for AKSFlexNode v0.0.x gap: aks-flex-node apply writes a +# v1-schema containerd config (e.g. [plugins."io.containerd.grpc.v1.cri".cni]) +# but ships a containerd v2 binary which only honors v3-schema sections +# (e.g. [plugins."io.containerd.cri.v1.runtime".cni]). Result: CRI starts but +# CNI bin_dir/conf_dir are empty, every Pod fails with "failed to find plugin +# cilium-cni in path []". Overwrite with a v3-schema config and restart so +# Pod sandbox creation works. Mirror of the manual repair on existing nodes. +cat >/etc/containerd/config.toml <<'CONTAINERD_V3_EOF' +imports = ["/etc/containerd/conf.d/*.toml"] +oom_score = 0 +version = 3 + +[metrics] + address = "0.0.0.0:10257" + +[plugins] + + [plugins."io.containerd.cri.v1.images"] + sandbox_image = "mcr.microsoft.com/oss/kubernetes/pause:3.9" + + [plugins."io.containerd.cri.v1.images".registry] + config_path = "/etc/containerd/certs.d" + + [plugins."io.containerd.cri.v1.images".registry.headers] + X-Meta-Source-Client = ["azure/aks"] + + [plugins."io.containerd.cri.v1.runtime"] + + [plugins."io.containerd.cri.v1.runtime".cni] + bin_dir = "/opt/cni/bin" + conf_dir = "/etc/cni/net.d" + + [plugins."io.containerd.cri.v1.runtime".containerd] + + [plugins."io.containerd.cri.v1.runtime".containerd.runtimes] + + [plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc] + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc.options] + BinaryName = "/usr/local/bin/runc" + SystemdCgroup = true +CONTAINERD_V3_EOF +# Workaround for AKSFlexNode v0.0.x gap (continued): the gpu-operator/aks-flex +# bake drops /etc/containerd/conf.d/99-nvidia.toml that sets +# bin_dir = "" +# bin_dirs = ["/opt/cni/bin"] +# in [plugins."io.containerd.cri.v1.runtime".cni]. containerd 2.0.4 only honors +# bin_dir (singular) — the empty string blanks out our main config. Patch the +# import to set bin_dir to /opt/cni/bin so CNI plugin discovery works. +if [ -f /etc/containerd/conf.d/99-nvidia.toml ]; then + sed -i 's|bin_dir = ""|bin_dir = "/opt/cni/bin"|' /etc/containerd/conf.d/99-nvidia.toml +fi +systemctl restart containerd rm -rf /tmp/flex \ No newline at end of file diff --git a/plugin/pkg/services/agentpools/userdata/flex/flex.go b/plugin/pkg/services/agentpools/userdata/flex/flex.go index a1bd145..288f110 100644 --- a/plugin/pkg/services/agentpools/userdata/flex/flex.go +++ b/plugin/pkg/services/agentpools/userdata/flex/flex.go @@ -27,7 +27,7 @@ var bootstrapTmpl string var bootstrapTemplate = template.Must(template.New("bootstrap.sh").Parse(bootstrapTmpl)) const ( - flexNodeVersion = "v0.0.17" + flexNodeVersion = "v0.0.18" defaultArch = "amd64" DefaultKubeVer = "1.34.2" )