From dc6bd2616ca49541ba805f8d79a1983867e1f47f Mon Sep 17 00:00:00 2001 From: Malte Viering Date: Thu, 12 Feb 2026 13:50:08 +0100 Subject: [PATCH 1/2] fix: Added IsEvacuation helper for nova spec --- api/external/nova/messages.go | 11 +++++ api/external/nova/messages_test.go | 67 ++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/api/external/nova/messages.go b/api/external/nova/messages.go index d79379aa..fbc2b731 100644 --- a/api/external/nova/messages.go +++ b/api/external/nova/messages.go @@ -217,6 +217,17 @@ func (s NovaSpec) GetSchedulerHintStr(key string) (string, error) { return "", errors.New("unknown scheduler hint type") } +// IsEvacuation checks if this Nova spec represents an evacuation request. +// Nova sets the scheduler hint "_nova_check_type" to "evacuate" for evacuation requests. +// ref https://github.com/sapcc/nova/pull/594/changes +func (s NovaSpec) IsEvacuation() bool { + checkType, err := s.GetSchedulerHintStr("_nova_check_type") + if err != nil { + return false + } + return checkType == "evacuate" +} + type NovaInstanceGroup struct { UserID string `json:"user_id"` ProjectID string `json:"project_id"` diff --git a/api/external/nova/messages_test.go b/api/external/nova/messages_test.go index b4b67be6..bb1b2ad2 100644 --- a/api/external/nova/messages_test.go +++ b/api/external/nova/messages_test.go @@ -8,6 +8,73 @@ import ( "testing" ) +func TestNovaSpec_IsEvacuation(t *testing.T) { + tests := []struct { + name string + schedulerHints map[string]any + expected bool + }{ + { + name: "evacuation request with list hint", + schedulerHints: map[string]any{ + "_nova_check_type": []any{"evacuate"}, + }, + expected: true, + }, + { + name: "evacuation request with string hint", + schedulerHints: map[string]any{ + "_nova_check_type": "evacuate", + }, + expected: true, + }, + { + name: "non-evacuation request with different check type", + schedulerHints: map[string]any{ + "_nova_check_type": []any{"rebuild"}, + }, + expected: false, + }, + { + name: "non-evacuation request without check type hint", + schedulerHints: map[string]any{ + "domain_name": []any{"monsoon3"}, + }, + expected: false, + }, + { + name: "nil scheduler hints", + schedulerHints: nil, + expected: false, + }, + { + name: "empty scheduler hints", + schedulerHints: map[string]any{}, + expected: false, + }, + { + name: "evacuation with additional hints", + schedulerHints: map[string]any{ + "domain_name": []any{"monsoon3"}, + "_nova_check_type": []any{"evacuate"}, + }, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + spec := NovaSpec{ + SchedulerHints: tt.schedulerHints, + } + result := spec.IsEvacuation() + if result != tt.expected { + t.Errorf("IsEvacuation() = %v, expected %v", result, tt.expected) + } + }) + } +} + func TestGetIntent(t *testing.T) { tests := []struct { name string From b8b1fbbf38f2418ad4cb84da962f3d60e185e41d Mon Sep 17 00:00:00 2001 From: Malte Viering Date: Thu, 12 Feb 2026 14:28:38 +0100 Subject: [PATCH 2/2] feat!: refactor reservation crd --- Makefile | 3 + api/v1alpha1/reservation_types.go | 126 +++++-- api/v1alpha1/zz_generated.deepcopy.go | 66 ++-- .../files/crds/cortex.cloud_reservations.yaml | 121 +++++-- .../filters/filter_has_enough_capacity.go | 73 +++- .../filter_has_enough_capacity_test.go | 331 +++++++++++++++++- .../reservations/commitments/syncer.go | 16 +- .../reservations/commitments/syncer_test.go | 34 +- .../reservations/controller/controller.go | 56 ++- .../controller/controller_test.go | 34 +- .../reservations/controller/monitor.go | 2 +- .../reservations/controller/monitor_test.go | 34 +- 12 files changed, 691 insertions(+), 205 deletions(-) diff --git a/Makefile b/Makefile index 0102e4cc..e145863d 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,9 @@ lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes test: ## Run all tests. go test ./... +.PHONY: generate +generate: deepcopy crds ## Regenerate CRDs and DeepCopy after API type changes. + .PHONY: crds crds: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. $(CONTROLLER_GEN) rbac:roleName=manager-role crd:allowDangerousTypes=true webhook paths="./..." output:crd:artifacts:config=helm/library/cortex/files/crds diff --git a/api/v1alpha1/reservation_types.go b/api/v1alpha1/reservation_types.go index 847a1017..20ef2e3d 100644 --- a/api/v1alpha1/reservation_types.go +++ b/api/v1alpha1/reservation_types.go @@ -8,23 +8,21 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -// Additional specifications needed to place the reservation. -type ReservationSchedulerSpec struct { - // If the type of scheduler is cortex-nova, this field will contain additional - // information used by cortex-nova to place the instance. - CortexNova *ReservationSchedulerSpecCortexNova `json:"cortexNova,omitempty"` -} +// ReservationType defines the type of reservation. +type ReservationType string -// Additional specifications needed by cortex-nova to place the instance. -type ReservationSchedulerSpecCortexNova struct { - // The project ID to reserve for. - ProjectID string `json:"projectID,omitempty"` - // The domain ID to reserve for. - DomainID string `json:"domainID,omitempty"` - // The flavor name of the instance to reserve. - FlavorName string `json:"flavorName,omitempty"` - // Extra specifications relevant for initial placement of the instance. - FlavorExtraSpecs map[string]string `json:"flavorExtraSpecs,omitempty"` +const ( + // ReservationTypeCommittedResource is a reservation for committed/reserved capacity. + ReservationTypeCommittedResource ReservationType = "CommittedResourceReservation" + // ReservationTypeFailover is a reservation for failover capacity. + ReservationTypeFailover ReservationType = "FailoverReservation" +) + +// ReservationSelector defines the selector criteria for a reservation. +type ReservationSelector struct { + // IsNUMAAlignedHost specifies whether the host should be NUMA-aligned. + // +kubebuilder:validation:Optional + IsNUMAAlignedHost bool `json:"isNUMAAlignedHost,omitempty"` } // ReservationSpec defines the desired state of Reservation. @@ -33,10 +31,70 @@ type ReservationSpec struct { // This can be used to clean up reservations synced from external systems // without touching reservations created manually or by other systems. Creator string `json:"creator,omitempty"` - // Specification of the scheduler that will handle the reservation. - Scheduler ReservationSchedulerSpec `json:"scheduler,omitempty"` - // Resources requested to reserve for this instance. - Requests map[string]resource.Quantity `json:"requests,omitempty"` + + // Resources to reserve for this instance. + Resources map[string]resource.Quantity `json:"resources,omitempty"` + + // Selector specifies criteria for selecting appropriate hosts. + // +kubebuilder:validation:Optional + Selector ReservationSelector `json:"selector,omitempty"` + + // SchedulingDomain specifies the scheduling domain for this reservation (e.g., "nova", "ironcore"). + // +kubebuilder:validation:Optional + SchedulingDomain string `json:"schedulingDomain,omitempty"` + + // Type of reservation. Defaults to CommittedResourceReservation if not specified. + // +kubebuilder:validation:Enum=CommittedResourceReservation;FailoverReservation + // +kubebuilder:default=CommittedResourceReservation + Type ReservationType `json:"type,omitempty"` + + // --- Time-related fields --- + + // StartTime is the time when the reservation becomes active. + // +kubebuilder:validation:Optional + StartTime *metav1.Time `json:"startTime,omitempty"` + + // EndTime is the time when the reservation expires. + // +kubebuilder:validation:Optional + EndTime *metav1.Time `json:"endTime,omitempty"` + + // NOTE: ActiveTime was removed. Use StartTime and EndTime to define the active period. + // If you need a duration-based activation, calculate EndTime = StartTime + Duration externally. + + // --- Scheduling-related fields (e.g., for Nova) --- + + // ProjectID is the UUID of the project this reservation belongs to. + // +kubebuilder:validation:Optional + ProjectID string `json:"projectID,omitempty"` + + // DomainID is the domain ID to reserve for. + // +kubebuilder:validation:Optional + DomainID string `json:"domainID,omitempty"` + + // ResourceName is the name of the resource to reserve (e.g., FlavorName for Nova). + // +kubebuilder:validation:Optional + ResourceName string `json:"resourceName,omitempty"` + + // ResourceExtraSpecs contains extra specifications relevant for initial placement + // of the instance (e.g., FlavorExtraSpecs for Nova). + // +kubebuilder:validation:Optional + ResourceExtraSpecs map[string]string `json:"resourceExtraSpecs,omitempty"` + + // --- Placement fields (desired state) --- + + // Host is the desired compute host where the reservation should be placed. + // This is a generic name that represents different concepts depending on the scheduling domain: + // - For Nova: the hypervisor hostname + // - For Pods: the node name + // The scheduler will attempt to place the reservation on this host. + // +kubebuilder:validation:Optional + Host string `json:"host,omitempty"` + + // ConnectTo the instances that lossy use this reservation + // The key is the instance, e.g., VM UUID, the value is usecase specific and can be empty + // E.g., for failover reservations, this maps VM/instance UUIDs to the host the VM is currently running on. + // +kubebuilder:validation:Optional + ConnectTo map[string]string `json:"connectTo,omitempty"` } // The phase in which the reservation is. @@ -58,17 +116,39 @@ const ( type ReservationStatus struct { // The current phase of the reservation. Phase ReservationStatusPhase `json:"phase,omitempty"` + // The current status conditions of the reservation. + // Conditions include: + // - type: Ready + // status: True|False|Unknown + // reason: ReservationReady + // message: Reservation is successfully scheduled + // lastTransitionTime: // +kubebuilder:validation:Optional Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` - // The name of the compute host that was allocated. - Host string `json:"host"` + + // ObservedHost is the actual host where the reservation is on + // This is set by the scheduler after successful placement and reflects the current state. + // It should match Spec.Host when the reservation is successfully placed. + // This is a generic name that represents different concepts depending on the scheduling domain: + // - For Nova: the hypervisor hostname + // - For Pods: the node name + // +kubebuilder:validation:Optional + ObservedHost string `json:"observedHost,omitempty"` + + // ObservedConnectTo maps VM/instance UUIDs to the host they are currently allocated on. + // This tracks which VMs are actually using this failover reservation and their current placement. + // Key: VM/instance UUID, Value: Host name where the VM is currently running. + // Only used when Type is FailoverReservation. + // This should reflect the actual state and may differ from Spec.ConnectTo during transitions. + // +kubebuilder:validation:Optional + ObservedConnectTo map[string]string `json:"observedConnectTo,omitempty"` } // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +kubebuilder:resource:scope=Cluster -// +kubebuilder:printcolumn:name="Host",type="string",JSONPath=".status.host" +// +kubebuilder:printcolumn:name="Host",type="string",JSONPath=".status.observedHost" // +kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase" // Reservation is the Schema for the reservations API diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 2551ef3e..1c4d7a1d 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -1086,56 +1086,51 @@ func (in *ReservationList) DeepCopyObject() runtime.Object { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ReservationSchedulerSpec) DeepCopyInto(out *ReservationSchedulerSpec) { +func (in *ReservationSelector) DeepCopyInto(out *ReservationSelector) { *out = *in - if in.CortexNova != nil { - in, out := &in.CortexNova, &out.CortexNova - *out = new(ReservationSchedulerSpecCortexNova) - (*in).DeepCopyInto(*out) - } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReservationSchedulerSpec. -func (in *ReservationSchedulerSpec) DeepCopy() *ReservationSchedulerSpec { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReservationSelector. +func (in *ReservationSelector) DeepCopy() *ReservationSelector { if in == nil { return nil } - out := new(ReservationSchedulerSpec) + out := new(ReservationSelector) in.DeepCopyInto(out) return out } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ReservationSchedulerSpecCortexNova) DeepCopyInto(out *ReservationSchedulerSpecCortexNova) { +func (in *ReservationSpec) DeepCopyInto(out *ReservationSpec) { *out = *in - if in.FlavorExtraSpecs != nil { - in, out := &in.FlavorExtraSpecs, &out.FlavorExtraSpecs + if in.Resources != nil { + in, out := &in.Resources, &out.Resources + *out = make(map[string]resource.Quantity, len(*in)) + for key, val := range *in { + (*out)[key] = val.DeepCopy() + } + } + out.Selector = in.Selector + if in.StartTime != nil { + in, out := &in.StartTime, &out.StartTime + *out = (*in).DeepCopy() + } + if in.EndTime != nil { + in, out := &in.EndTime, &out.EndTime + *out = (*in).DeepCopy() + } + if in.ResourceExtraSpecs != nil { + in, out := &in.ResourceExtraSpecs, &out.ResourceExtraSpecs *out = make(map[string]string, len(*in)) for key, val := range *in { (*out)[key] = val } } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReservationSchedulerSpecCortexNova. -func (in *ReservationSchedulerSpecCortexNova) DeepCopy() *ReservationSchedulerSpecCortexNova { - if in == nil { - return nil - } - out := new(ReservationSchedulerSpecCortexNova) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ReservationSpec) DeepCopyInto(out *ReservationSpec) { - *out = *in - in.Scheduler.DeepCopyInto(&out.Scheduler) - if in.Requests != nil { - in, out := &in.Requests, &out.Requests - *out = make(map[string]resource.Quantity, len(*in)) + if in.ConnectTo != nil { + in, out := &in.ConnectTo, &out.ConnectTo + *out = make(map[string]string, len(*in)) for key, val := range *in { - (*out)[key] = val.DeepCopy() + (*out)[key] = val } } } @@ -1160,6 +1155,13 @@ func (in *ReservationStatus) DeepCopyInto(out *ReservationStatus) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.ObservedConnectTo != nil { + in, out := &in.ObservedConnectTo, &out.ObservedConnectTo + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReservationStatus. diff --git a/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml b/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml index 0a0428c4..6459f22c 100644 --- a/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml +++ b/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml @@ -15,7 +15,7 @@ spec: scope: Cluster versions: - additionalPrinterColumns: - - jsonPath: .status.host + - jsonPath: .status.observedHost name: Host type: string - jsonPath: .status.phase @@ -46,52 +46,97 @@ spec: spec: description: spec defines the desired state of Reservation properties: + connectTo: + additionalProperties: + type: string + description: |- + ConnectTo the instances that lossy use this reservation + The key is the instance, e.g., VM UUID, the value is usecase specific and can be empty + E.g., for failover reservations, this maps VM/instance UUIDs to the host the VM is currently running on. + type: object creator: description: |- A remark that can be used to identify the creator of the reservation. This can be used to clean up reservations synced from external systems without touching reservations created manually or by other systems. type: string - requests: + domainID: + description: DomainID is the domain ID to reserve for. + type: string + endTime: + description: EndTime is the time when the reservation expires. + format: date-time + type: string + host: + description: |- + Host is the desired compute host where the reservation should be placed. + This is a generic name that represents different concepts depending on the scheduling domain: + - For Nova: the hypervisor hostname + - For Pods: the node name + The scheduler will attempt to place the reservation on this host. + type: string + projectID: + description: ProjectID is the UUID of the project this reservation + belongs to. + type: string + resourceExtraSpecs: + additionalProperties: + type: string + description: |- + ResourceExtraSpecs contains extra specifications relevant for initial placement + of the instance (e.g., FlavorExtraSpecs for Nova). + type: object + resourceName: + description: ResourceName is the name of the resource to reserve (e.g., + FlavorName for Nova). + type: string + resources: additionalProperties: anyOf: - type: integer - type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ x-kubernetes-int-or-string: true - description: Resources requested to reserve for this instance. + description: Resources to reserve for this instance. type: object - scheduler: - description: Specification of the scheduler that will handle the reservation. + schedulingDomain: + description: SchedulingDomain specifies the scheduling domain for + this reservation (e.g., "nova", "ironcore"). + type: string + selector: + description: Selector specifies criteria for selecting appropriate + hosts. properties: - cortexNova: - description: |- - If the type of scheduler is cortex-nova, this field will contain additional - information used by cortex-nova to place the instance. - properties: - domainID: - description: The domain ID to reserve for. - type: string - flavorExtraSpecs: - additionalProperties: - type: string - description: Extra specifications relevant for initial placement - of the instance. - type: object - flavorName: - description: The flavor name of the instance to reserve. - type: string - projectID: - description: The project ID to reserve for. - type: string - type: object + isNUMAAlignedHost: + description: IsNUMAAlignedHost specifies whether the host should + be NUMA-aligned. + type: boolean type: object + startTime: + description: StartTime is the time when the reservation becomes active. + format: date-time + type: string + type: + default: CommittedResourceReservation + description: Type of reservation. Defaults to CommittedResourceReservation + if not specified. + enum: + - CommittedResourceReservation + - FailoverReservation + type: string type: object status: description: status defines the observed state of Reservation properties: conditions: - description: The current status conditions of the reservation. + description: |- + The current status conditions of the reservation. + Conditions include: + - type: Ready + status: True|False|Unknown + reason: ReservationReady + message: Reservation is successfully scheduled + lastTransitionTime: items: description: Condition contains details for one aspect of the current state of this API Resource. @@ -147,14 +192,28 @@ spec: - type type: object type: array - host: - description: The name of the compute host that was allocated. + observedConnectTo: + additionalProperties: + type: string + description: |- + ObservedConnectTo maps VM/instance UUIDs to the host they are currently allocated on. + This tracks which VMs are actually using this failover reservation and their current placement. + Key: VM/instance UUID, Value: Host name where the VM is currently running. + Only used when Type is FailoverReservation. + This should reflect the actual state and may differ from Spec.ConnectTo during transitions. + type: object + observedHost: + description: |- + ObservedHost is the actual host where the reservation is on + This is set by the scheduler after successful placement and reflects the current state. + It should match Spec.Host when the reservation is successfully placed. + This is a generic name that represents different concepts depending on the scheduling domain: + - For Nova: the hypervisor hostname + - For Pods: the node name type: string phase: description: The current phase of the reservation. type: string - required: - - host type: object required: - spec diff --git a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go index 046a0c3f..0079e115 100644 --- a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go +++ b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go @@ -82,26 +82,65 @@ func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.Externa if reservation.Status.Phase != v1alpha1.ReservationStatusPhaseActive { continue // Only consider active reservations. } - if reservation.Spec.Scheduler.CortexNova == nil { - continue // Not handled by us. + if reservation.Spec.ResourceName == "" { + continue // Not handled by us (no resource name set). } - // If the requested vm matches this reservation, free the resources. - if !s.Options.LockReserved && - reservation.Spec.Scheduler.CortexNova.ProjectID == request.Spec.Data.ProjectID && - reservation.Spec.Scheduler.CortexNova.FlavorName == request.Spec.Data.Flavor.Data.Name { - traceLog.Info("unlocking resources reserved by matching reservation", "reservation", reservation.Name) - continue + + // Handle reservation based on its type. + switch reservation.Spec.Type { + case v1alpha1.ReservationTypeCommittedResource, "": // Empty string for backward compatibility (defaults to CommittedResource) + // For committed resource reservations: if the requested VM matches this reservation, free the resources (slotting). + if !s.Options.LockReserved && + reservation.Spec.ProjectID == request.Spec.Data.ProjectID && + reservation.Spec.ResourceName == request.Spec.Data.Flavor.Data.Name { + traceLog.Info("unlocking resources reserved by matching committed resource reservation", "reservation", reservation.Name) + continue + } + case v1alpha1.ReservationTypeFailover: + // For failover reservations: if the requested VM is contained in the ConnectTo map + // AND this is an evacuation request, unlock the resources. + // We only unlock during evacuations because: + // 1. Failover reservations are specifically for HA/evacuation scenarios. + // 2. During live migrations or other operations, we don't want to use failover capacity. + // Note: we cannot use failover reservations from other VMs, as that can invalidate our HA guarantees. + if request.Spec.Data.IsEvacuation() { + if _, contained := reservation.Status.ObservedConnectTo[request.Spec.Data.InstanceUUID]; contained { + traceLog.Info("unlocking resources reserved by failover reservation for VM in ConnectTo (evacuation)", + "reservation", reservation.Name, + "instanceUUID", request.Spec.Data.InstanceUUID) + continue + } + } + traceLog.Debug("processing failover reservation", "reservation", reservation.Name) + } + + // Block resources on BOTH Spec.Host (desired) AND Status.ObservedHost (actual). + // This ensures capacity is blocked during the transition period when a reservation + // is being placed (Spec.Host set) and after it's placed (ObservedHost set). + // If both are the same, we only subtract once. + hostsToBlock := make(map[string]struct{}) + if reservation.Spec.Host != "" { + hostsToBlock[reservation.Spec.Host] = struct{}{} + } + if reservation.Status.ObservedHost != "" { + hostsToBlock[reservation.Status.ObservedHost] = struct{}{} } - host := reservation.Status.Host - if cpu, ok := reservation.Spec.Requests["cpu"]; ok { - freeCPU := freeResourcesByHost[host]["cpu"] - freeCPU.Sub(cpu) - freeResourcesByHost[host]["cpu"] = freeCPU + if len(hostsToBlock) == 0 { + traceLog.Debug("skipping reservation with no host", "reservation", reservation.Name) + continue } - if memory, ok := reservation.Spec.Requests["memory"]; ok { - freeMemory := freeResourcesByHost[host]["memory"] - freeMemory.Sub(memory) - freeResourcesByHost[host]["memory"] = freeMemory + + for host := range hostsToBlock { + if cpu, ok := reservation.Spec.Resources["cpu"]; ok { + freeCPU := freeResourcesByHost[host]["cpu"] + freeCPU.Sub(cpu) + freeResourcesByHost[host]["cpu"] = freeCPU + } + if memory, ok := reservation.Spec.Resources["memory"]; ok { + freeMemory := freeResourcesByHost[host]["memory"] + freeMemory.Sub(memory) + freeResourcesByHost[host]["memory"] = freeMemory + } } } diff --git a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go index a6e6561e..65328708 100644 --- a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go +++ b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go @@ -4,7 +4,17 @@ package filters import ( + "log/slog" "testing" + + api "github.com/cobaltcore-dev/cortex/api/external/nova" + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" ) func TestFilterHasEnoughCapacityOpts_Validate(t *testing.T) { @@ -12,36 +22,325 @@ func TestFilterHasEnoughCapacityOpts_Validate(t *testing.T) { name string opts FilterHasEnoughCapacityOpts expectError bool + }{ + {"valid options with lock reserved true", FilterHasEnoughCapacityOpts{LockReserved: true}, false}, + {"valid options with lock reserved false", FilterHasEnoughCapacityOpts{LockReserved: false}, false}, + {"valid options with default values", FilterHasEnoughCapacityOpts{}, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.opts.Validate() + if tt.expectError && err == nil { + t.Error("expected error, got nil") + } + if !tt.expectError && err != nil { + t.Errorf("expected no error, got %v", err) + } + }) + } +} + +// Test helpers to reduce boilerplate + +func buildTestScheme(t *testing.T) *runtime.Scheme { + scheme := runtime.NewScheme() + if err := hv1.SchemeBuilder.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add hv1 scheme: %v", err) + } + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add v1alpha1 scheme: %v", err) + } + return scheme +} + +func newHypervisor(name, cpuCap, cpuAlloc, memCap, memAlloc string) *hv1.Hypervisor { + return &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Status: hv1.HypervisorStatus{ + Capacity: map[string]resource.Quantity{"cpu": resource.MustParse(cpuCap), "memory": resource.MustParse(memCap)}, + Allocation: map[string]resource.Quantity{"cpu": resource.MustParse(cpuAlloc), "memory": resource.MustParse(memAlloc)}, + }, + } +} + +// newReservation creates a reservation with configurable Spec.Host and Status.ObservedHost. +// Use specHost="" or observedHost="" to leave them unset. +func newReservation(name, specHost, observedHost, projectID, flavorName, cpu, mem string, resType v1alpha1.ReservationType, connectTo map[string]string) *v1alpha1.Reservation { + return &v1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Spec: v1alpha1.ReservationSpec{ + Type: resType, + Host: specHost, + Resources: map[string]resource.Quantity{"cpu": resource.MustParse(cpu), "memory": resource.MustParse(mem)}, + ProjectID: projectID, + ResourceName: flavorName, + ConnectTo: connectTo, + }, + Status: v1alpha1.ReservationStatus{ + Phase: v1alpha1.ReservationStatusPhaseActive, + ObservedHost: observedHost, + ObservedConnectTo: connectTo, + }, + } +} + +// Convenience wrappers for common reservation types +func newCommittedReservation(name, host, projectID, flavorName, cpu, mem string) *v1alpha1.Reservation { + return newReservation(name, host, host, projectID, flavorName, cpu, mem, v1alpha1.ReservationTypeCommittedResource, nil) +} + +//nolint:unparam // projectID kept as parameter for test readability and consistency with newCommittedReservation +func newFailoverReservation(name, host, projectID, flavorName, cpu, mem string, usedBy map[string]string) *v1alpha1.Reservation { + return newReservation(name, host, host, projectID, flavorName, cpu, mem, v1alpha1.ReservationTypeFailover, usedBy) +} + +//nolint:unparam // vcpus kept as parameter for test readability +func newRequest(projectID, instanceUUID, flavorName string, vcpus, memMB int, evacuation bool, hosts ...string) api.ExternalSchedulerRequest { + hostList := make([]api.ExternalSchedulerHost, len(hosts)) + for i, h := range hosts { + hostList[i] = api.ExternalSchedulerHost{ComputeHost: h} + } + spec := api.NovaSpec{ + ProjectID: projectID, InstanceUUID: instanceUUID, NumInstances: 1, + Flavor: api.NovaObject[api.NovaFlavor]{Data: api.NovaFlavor{Name: flavorName, VCPUs: uint64(vcpus), MemoryMB: uint64(memMB)}}, //nolint:gosec // test code + } + if evacuation { + spec.SchedulerHints = map[string]any{"_nova_check_type": []any{"evacuate"}} + } + return api.ExternalSchedulerRequest{ + Spec: api.NovaObject[api.NovaSpec]{Data: spec}, + Hosts: hostList, + } +} + +func TestFilterHasEnoughCapacity_ReservationTypes(t *testing.T) { + scheme := buildTestScheme(t) + + // 4 hypervisors: 3 with capacity, 1 without + // host1: 8 CPU free, 16Gi free | host2: 4 CPU free, 8Gi free | host3: 16 CPU free, 32Gi free | host4: 0 free + hvs := []client.Object{ + newHypervisor("host1", "16", "8", "32Gi", "16Gi"), + newHypervisor("host2", "8", "4", "16Gi", "8Gi"), + newHypervisor("host3", "32", "16", "64Gi", "32Gi"), + newHypervisor("host4", "8", "8", "16Gi", "16Gi"), // no capacity + } + + tests := []struct { + name string + reservations []client.Object + request api.ExternalSchedulerRequest + opts FilterHasEnoughCapacityOpts + expectedHosts []string + filteredHosts []string }{ { - name: "valid options with lock reserved true", - opts: FilterHasEnoughCapacityOpts{ - LockReserved: true, + name: "CommittedResourceReservation blocks some hosts when project/flavor don't match", + reservations: []client.Object{ + newCommittedReservation("res-1", "host1", "project-A", "m1.large", "8", "16Gi"), + newCommittedReservation("res-2", "host2", "project-A", "m1.large", "4", "8Gi"), }, - expectError: false, + request: newRequest("project-B", "instance-123", "m1.small", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host3"}, + filteredHosts: []string{"host1", "host2", "host4"}, }, { - name: "valid options with lock reserved false", - opts: FilterHasEnoughCapacityOpts{ - LockReserved: false, + name: "CommittedResourceReservation unlocks all reserved hosts when project and flavor match", + reservations: []client.Object{ + newCommittedReservation("res-1", "host1", "project-A", "m1.large", "4", "8Gi"), + newCommittedReservation("res-2", "host2", "project-A", "m1.large", "4", "8Gi"), }, - expectError: false, + request: newRequest("project-A", "instance-123", "m1.large", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host1", "host2", "host3"}, + filteredHosts: []string{"host4"}, }, { - name: "valid options with default values", - opts: FilterHasEnoughCapacityOpts{}, - expectError: false, + name: "CommittedResourceReservation stays locked when LockReserved is true", + reservations: []client.Object{ + newCommittedReservation("res-1", "host1", "project-A", "m1.large", "8", "16Gi"), + newCommittedReservation("res-2", "host3", "project-A", "m1.large", "16", "32Gi"), + }, + request: newRequest("project-A", "instance-123", "m1.large", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: true}, + expectedHosts: []string{"host2"}, + filteredHosts: []string{"host1", "host3", "host4"}, + }, + { + name: "Empty reservation type defaults to CommittedResourceReservation behavior", + reservations: []client.Object{ + &v1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{Name: "legacy-res"}, + Spec: v1alpha1.ReservationSpec{ + Host: "host1", + Resources: map[string]resource.Quantity{"cpu": resource.MustParse("04"), "memory": resource.MustParse("08Gi")}, + ProjectID: "project-A", + ResourceName: "m1.large", + }, + Status: v1alpha1.ReservationStatus{ + Phase: v1alpha1.ReservationStatusPhaseActive, + ObservedHost: "host1", + }, + }, + }, + request: newRequest("project-A", "instance-123", "m1.large", 4, 8000, false, "host1", "host2"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host1", "host2"}, + filteredHosts: []string{}, + }, + { + name: "FailoverReservation blocks hosts for non-evacuation request even when instance is in UsedBy", + reservations: []client.Object{ + newFailoverReservation("failover-1", "host1", "project-A", "m1.large", "8", "16Gi", map[string]string{"instance-123": "host5"}), + newFailoverReservation("failover-2", "host2", "project-A", "m1.large", "4", "8Gi", map[string]string{"instance-123": "host6"}), + }, + request: newRequest("project-A", "instance-123", "m1.large", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host3"}, // Failover reservations stay locked for non-evacuation + filteredHosts: []string{"host1", "host2", "host4"}, + }, + { + name: "FailoverReservation unlocks hosts during evacuation when instance is in UsedBy", + reservations: []client.Object{ + newFailoverReservation("failover-1", "host1", "project-A", "m1.large", "4", "8Gi", map[string]string{"instance-123": "host5"}), + newFailoverReservation("failover-2", "host2", "project-A", "m1.large", "4", "8Gi", map[string]string{"instance-123": "host6"}), + }, + request: newRequest("project-A", "instance-123", "m1.large", 4, 8000, true, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host1", "host2", "host3"}, // Unlocked during evacuation + filteredHosts: []string{"host4"}, + }, + { + name: "FailoverReservation blocks hosts during evacuation when instance not in UsedBy", + reservations: []client.Object{ + newFailoverReservation("failover-1", "host1", "project-A", "m1.large", "8", "16Gi", map[string]string{"other-instance": "host5"}), + newFailoverReservation("failover-2", "host2", "project-A", "m1.large", "4", "8Gi", map[string]string{"another-instance": "host6"}), + }, + request: newRequest("project-A", "instance-123", "m1.large", 4, 8000, true, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host3"}, + filteredHosts: []string{"host1", "host2", "host4"}, + }, + { + name: "FailoverReservation with empty UsedBy blocks reserved host", + reservations: []client.Object{ + newFailoverReservation("failover-1", "host1", "project-A", "m1.large", "8", "16Gi", map[string]string{}), + }, + request: newRequest("project-A", "instance-123", "m1.large", 4, 8000, true, "host1", "host2", "host3"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host2", "host3"}, + filteredHosts: []string{"host1"}, + }, + { + name: "FailoverReservation with multiple instances in UsedBy unlocks for matching instance during evacuation", + reservations: []client.Object{ + newFailoverReservation("failover-1", "host1", "project-A", "m1.large", "4", "8Gi", map[string]string{"instance-111": "host5", "instance-222": "host6", "instance-333": "host7"}), + }, + request: newRequest("project-A", "instance-222", "m1.large", 4, 8000, true, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host1", "host2", "host3"}, + filteredHosts: []string{"host4"}, + }, + { + name: "No reservations - all hosts with capacity pass", + reservations: []client.Object{}, + request: newRequest("project-A", "instance-123", "m1.small", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host1", "host2", "host3"}, + filteredHosts: []string{"host4"}, + }, + { + name: "All hosts blocked by reservations - none pass", + reservations: []client.Object{ + newCommittedReservation("res-1", "host1", "project-X", "m1.xlarge", "8", "16Gi"), + newCommittedReservation("res-2", "host2", "project-X", "m1.xlarge", "4", "8Gi"), + newCommittedReservation("res-3", "host3", "project-X", "m1.xlarge", "16", "32Gi"), + }, + request: newRequest("project-A", "instance-123", "m1.small", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{}, + filteredHosts: []string{"host1", "host2", "host3", "host4"}, + }, + // ============================================================================ + // Tests for Spec.Host vs Status.ObservedHost behavior + // ============================================================================ + { + name: "Pending reservation (only Spec.Host set) blocks capacity on desired host", + reservations: []client.Object{ + // Pending reservation: Spec.Host is set, but ObservedHost is empty + newReservation("pending-res", "host1", "", "project-X", "m1.large", "8", "16Gi", v1alpha1.ReservationTypeCommittedResource, nil), + }, + request: newRequest("project-A", "instance-123", "m1.small", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host2", "host3"}, // host1 blocked by pending reservation + filteredHosts: []string{"host1", "host4"}, + }, + { + name: "Reservation with different Spec.Host and ObservedHost blocks BOTH hosts", + reservations: []client.Object{ + // Reservation was requested for host1 but placed on host2 - blocks BOTH + // host1: 8 CPU free - 4 CPU reserved = 4 CPU free (still has capacity for 4 CPU request) + // host2: 4 CPU free - 4 CPU reserved = 0 CPU free (blocked) + newReservation("moved-res", "host1", "host2", "project-X", "m1.large", "4", "8Gi", v1alpha1.ReservationTypeCommittedResource, nil), + }, + request: newRequest("project-A", "instance-123", "m1.small", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host1", "host3"}, // host1 still has capacity (4 CPU), host2 blocked (0 CPU) + filteredHosts: []string{"host2", "host4"}, + }, + { + name: "Multiple reservations: pending and placed block different hosts", + reservations: []client.Object{ + // Pending reservation blocks host1 (via Spec.Host only) + // host1: 8 CPU free - 8 CPU reserved = 0 CPU free (blocked) + newReservation("pending-res", "host1", "", "project-X", "m1.large", "8", "16Gi", v1alpha1.ReservationTypeCommittedResource, nil), + // Placed reservation blocks host2 AND host3 (via both Spec.Host and ObservedHost) + // host2: 4 CPU free - 4 CPU reserved = 0 CPU free (blocked) + // host3: 16 CPU free - 4 CPU reserved = 12 CPU free (still has capacity) + newReservation("placed-res", "host2", "host3", "project-X", "m1.large", "4", "8Gi", v1alpha1.ReservationTypeCommittedResource, nil), + }, + request: newRequest("project-A", "instance-123", "m1.small", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host3"}, // host1 blocked by pending, host2 blocked by placed, host3 still has capacity + filteredHosts: []string{"host1", "host2", "host4"}, + }, + { + name: "Reservation with no host (neither Spec.Host nor ObservedHost) is skipped", + reservations: []client.Object{ + newReservation("no-host-res", "", "", "project-X", "m1.large", "8", "16Gi", v1alpha1.ReservationTypeCommittedResource, nil), + }, + request: newRequest("project-A", "instance-123", "m1.small", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host1", "host2", "host3"}, // No hosts blocked - reservation has no host + filteredHosts: []string{"host4"}, // Only filtered due to no capacity }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - err := tt.opts.Validate() - if tt.expectError && err == nil { - t.Error("expected error, got nil") + objects := make([]client.Object, 0, len(hvs)+len(tt.reservations)) + objects = append(objects, hvs...) + objects = append(objects, tt.reservations...) + step := &FilterHasEnoughCapacity{} + step.Client = fake.NewClientBuilder().WithScheme(scheme).WithObjects(objects...).Build() + step.Options = tt.opts + + result, err := step.Run(slog.Default(), tt.request) + if err != nil { + t.Fatalf("expected no error, got %v", err) } - if !tt.expectError && err != nil { - t.Errorf("expected no error, got %v", err) + + for _, host := range tt.expectedHosts { + if _, ok := result.Activations[host]; !ok { + t.Errorf("expected host %s to be present in activations", host) + } + } + + for _, host := range tt.filteredHosts { + if _, ok := result.Activations[host]; ok { + t.Errorf("expected host %s to be filtered out", host) + } } }) } diff --git a/internal/scheduling/reservations/commitments/syncer.go b/internal/scheduling/reservations/commitments/syncer.go index 969812e9..69229e2a 100644 --- a/internal/scheduling/reservations/commitments/syncer.go +++ b/internal/scheduling/reservations/commitments/syncer.go @@ -184,16 +184,12 @@ func (s *Syncer) SyncReservations(ctx context.Context) error { } commitmentUUIDShort := commitment.UUID[:5] spec := v1alpha1.ReservationSpec{ - Creator: Creator, - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{ - ProjectID: commitment.ProjectID, - DomainID: commitment.DomainID, - FlavorName: commitment.Flavor.Name, - FlavorExtraSpecs: commitment.Flavor.ExtraSpecs, - }, - }, - Requests: map[string]resource.Quantity{ + Creator: Creator, + ProjectID: commitment.ProjectID, + DomainID: commitment.DomainID, + ResourceName: commitment.Flavor.Name, + ResourceExtraSpecs: commitment.Flavor.ExtraSpecs, + Resources: map[string]resource.Quantity{ "memory": *resource.NewQuantity(int64(commitment.Flavor.RAM)*1024*1024, resource.BinarySI), "cpu": *resource.NewQuantity(int64(commitment.Flavor.VCPUs), resource.DecimalSI), // Disk is currently not considered. diff --git a/internal/scheduling/reservations/commitments/syncer_test.go b/internal/scheduling/reservations/commitments/syncer_test.go index 450a68f2..ca1f5a29 100644 --- a/internal/scheduling/reservations/commitments/syncer_test.go +++ b/internal/scheduling/reservations/commitments/syncer_test.go @@ -209,23 +209,23 @@ func TestSyncer_SyncReservations_InstanceCommitments(t *testing.T) { // Verify the first reservation res := reservations.Items[0] - if res.Spec.Scheduler.CortexNova.ProjectID != "test-project-1" { - t.Errorf("Expected project ID test-project-1, got %v", res.Spec.Scheduler.CortexNova.ProjectID) + if res.Spec.ProjectID != "test-project-1" { + t.Errorf("Expected project ID test-project-1, got %v", res.Spec.ProjectID) } - if res.Spec.Scheduler.CortexNova.FlavorName != "test-flavor" { - t.Errorf("Expected flavor test-flavor, got %v", res.Spec.Scheduler.CortexNova.FlavorName) + if res.Spec.ResourceName != "test-flavor" { + t.Errorf("Expected flavor test-flavor, got %v", res.Spec.ResourceName) } // Check resource values expectedMemory := resource.MustParse("1073741824") // 1024MB in bytes - if !res.Spec.Requests["memory"].Equal(expectedMemory) { - t.Errorf("Expected memory %v, got %v", expectedMemory, res.Spec.Requests["memory"]) + if !res.Spec.Resources["memory"].Equal(expectedMemory) { + t.Errorf("Expected memory %v, got %v", expectedMemory, res.Spec.Resources["memory"]) } expectedVCPUs := resource.MustParse("2") - if !res.Spec.Requests["cpu"].Equal(expectedVCPUs) { - t.Errorf("Expected vCPUs %v, got %v", expectedVCPUs, res.Spec.Requests["cpu"]) + if !res.Spec.Resources["cpu"].Equal(expectedVCPUs) { + t.Errorf("Expected vCPUs %v, got %v", expectedVCPUs, res.Spec.Resources["cpu"]) } } @@ -241,13 +241,9 @@ func TestSyncer_SyncReservations_UpdateExisting(t *testing.T) { Name: "commitment-12345-0", // Instance commitments have -0 suffix }, Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{ - ProjectID: "old-project", - FlavorName: "old-flavor", - }, - }, - Requests: map[string]resource.Quantity{ + ProjectID: "old-project", + ResourceName: "old-flavor", + Resources: map[string]resource.Quantity{ "memory": resource.MustParse("512Mi"), "cpu": resource.MustParse("1"), }, @@ -333,12 +329,12 @@ func TestSyncer_SyncReservations_UpdateExisting(t *testing.T) { } // Verify the reservation was updated with new values - if updatedReservation.Spec.Scheduler.CortexNova.ProjectID != "new-project" { - t.Errorf("Expected project ID new-project, got %v", updatedReservation.Spec.Scheduler.CortexNova.ProjectID) + if updatedReservation.Spec.ProjectID != "new-project" { + t.Errorf("Expected project ID new-project, got %v", updatedReservation.Spec.ProjectID) } - if updatedReservation.Spec.Scheduler.CortexNova.FlavorName != "new-flavor" { - t.Errorf("Expected flavor new-flavor, got %v", updatedReservation.Spec.Scheduler.CortexNova.FlavorName) + if updatedReservation.Spec.ResourceName != "new-flavor" { + t.Errorf("Expected flavor new-flavor, got %v", updatedReservation.Spec.ResourceName) } } diff --git a/internal/scheduling/reservations/controller/controller.go b/internal/scheduling/reservations/controller/controller.go index 2f9cf03c..309057d9 100644 --- a/internal/scheduling/reservations/controller/controller.go +++ b/internal/scheduling/reservations/controller/controller.go @@ -72,15 +72,43 @@ func (r *ReservationReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil // Don't need to requeue. } - // Currently we can only reconcile cortex-nova reservations. - if res.Spec.Scheduler.CortexNova == nil { - log.Info("reservation is not a cortex-nova reservation, skipping", "reservation", req.Name) + // Sync Spec values to Status.Observed* fields + // This ensures the observed state reflects the desired state from Spec + needsStatusUpdate := false + if res.Spec.Host != "" && res.Status.ObservedHost != res.Spec.Host { + res.Status.ObservedHost = res.Spec.Host + needsStatusUpdate = true + } + if len(res.Spec.ConnectTo) > 0 { + if res.Status.ObservedConnectTo == nil { + res.Status.ObservedConnectTo = make(map[string]string) + } + for k, v := range res.Spec.ConnectTo { + if res.Status.ObservedConnectTo[k] != v { + res.Status.ObservedConnectTo[k] = v + needsStatusUpdate = true + } + } + } + if needsStatusUpdate { + old := res.DeepCopy() + patch := client.MergeFrom(old) + if err := r.Status().Patch(ctx, &res, patch); err != nil { + log.Error(err, "failed to sync spec to status") + return ctrl.Result{}, err + } + log.Info("synced spec to status", "reservation", req.Name, "host", res.Status.ObservedHost, "connectTo", res.Status.ObservedConnectTo) + } + + // Currently we can only reconcile nova reservations (those with ResourceName set). + if res.Spec.ResourceName == "" { + log.Info("reservation has no resource name, skipping", "reservation", req.Name) old := res.DeepCopy() meta.SetStatusCondition(&res.Status.Conditions, metav1.Condition{ Type: v1alpha1.ReservationConditionError, Status: metav1.ConditionTrue, - Reason: "UnsupportedScheduler", - Message: "reservation is not a cortex-nova reservation", + Reason: "MissingResourceName", + Message: "reservation has no resource name", }) res.Status.Phase = v1alpha1.ReservationStatusPhaseFailed patch := client.MergeFrom(old) @@ -93,7 +121,7 @@ func (r *ReservationReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Convert resource.Quantity to integers for the API var memoryMB uint64 - if memory, ok := res.Spec.Requests["memory"]; ok { + if memory, ok := res.Spec.Resources["memory"]; ok { memoryValue := memory.ScaledValue(resource.Mega) if memoryValue < 0 { return ctrl.Result{}, fmt.Errorf("invalid memory value: %d", memoryValue) @@ -102,7 +130,7 @@ func (r *ReservationReconciler) Reconcile(ctx context.Context, req ctrl.Request) } var cpu uint64 - if cpuQuantity, ok := res.Spec.Requests["cpu"]; ok { + if cpuQuantity, ok := res.Spec.Resources["cpu"]; ok { cpuValue := cpuQuantity.ScaledValue(resource.Milli) if cpuValue < 0 { return ctrl.Result{}, fmt.Errorf("invalid cpu value: %d", cpuValue) @@ -140,11 +168,11 @@ func (r *ReservationReconciler) Reconcile(ctx context.Context, req ctrl.Request) Data: schedulerdelegationapi.NovaSpec{ InstanceUUID: res.Name, NumInstances: 1, // One for each reservation. - ProjectID: res.Spec.Scheduler.CortexNova.ProjectID, + ProjectID: res.Spec.ProjectID, Flavor: schedulerdelegationapi.NovaObject[schedulerdelegationapi.NovaFlavor]{ Data: schedulerdelegationapi.NovaFlavor{ - Name: res.Spec.Scheduler.CortexNova.FlavorName, - ExtraSpecs: res.Spec.Scheduler.CortexNova.FlavorExtraSpecs, + Name: res.Spec.ResourceName, + ExtraSpecs: res.Spec.ResourceExtraSpecs, MemoryMB: memoryMB, VCPUs: cpu, // Disk is currently not considered. @@ -188,13 +216,14 @@ func (r *ReservationReconciler) Reconcile(ctx context.Context, req ctrl.Request) } return ctrl.Result{}, nil // No need to requeue, we didn't find a host. } + // Update the reservation with the found host (idx 0) host := externalSchedulerResponse.Hosts[0] log.Info("found host for reservation", "reservation", req.Name, "host", host) old := res.DeepCopy() meta.RemoveStatusCondition(&res.Status.Conditions, v1alpha1.ReservationConditionError) res.Status.Phase = v1alpha1.ReservationStatusPhaseActive - res.Status.Host = host + res.Status.ObservedHost = host patch := client.MergeFrom(old) if err := r.Status().Patch(ctx, &res, patch); err != nil { log.Error(err, "failed to patch reservation status") @@ -206,7 +235,10 @@ func (r *ReservationReconciler) Reconcile(ctx context.Context, req ctrl.Request) // SetupWithManager sets up the controller with the Manager. func (r *ReservationReconciler) SetupWithManager(mgr ctrl.Manager, mcl *multicluster.Client) error { if err := mgr.Add(manager.RunnableFunc(func(ctx context.Context) error { - return r.Init(ctx, mgr.GetClient(), r.Conf) + if err := r.Init(ctx, mgr.GetClient(), r.Conf); err != nil { + return err + } + return nil })); err != nil { return err } diff --git a/internal/scheduling/reservations/controller/controller_test.go b/internal/scheduling/reservations/controller/controller_test.go index 070eabf5..48ca7deb 100644 --- a/internal/scheduling/reservations/controller/controller_test.go +++ b/internal/scheduling/reservations/controller/controller_test.go @@ -41,12 +41,8 @@ func TestReservationReconciler_Reconcile(t *testing.T) { Name: "test-reservation", }, Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{ - ProjectID: "test-project", - FlavorName: "test-flavor", - }, - }, + ProjectID: "test-project", + ResourceName: "test-flavor", }, Status: v1alpha1.ReservationStatus{ Phase: v1alpha1.ReservationStatusPhaseActive, @@ -56,17 +52,15 @@ func TestReservationReconciler_Reconcile(t *testing.T) { shouldRequeue: false, }, { - name: "skip unsupported reservation scheduler", + name: "skip reservation without resource name", reservation: &v1alpha1.Reservation{ ObjectMeta: ctrl.ObjectMeta{ Name: "test-reservation", }, - Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{}, - }, + Spec: v1alpha1.ReservationSpec{}, }, expectedPhase: v1alpha1.ReservationStatusPhaseFailed, - expectedError: "reservation is not a cortex-nova reservation", + expectedError: "reservation has no resource name", shouldRequeue: false, }, } @@ -137,16 +131,12 @@ func TestReservationReconciler_reconcileInstanceReservation_Success(t *testing.T Name: "test-reservation", }, Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{ - ProjectID: "test-project", - FlavorName: "test-flavor", - FlavorExtraSpecs: map[string]string{ - "capabilities:hypervisor_type": "qemu", - }, - }, + ProjectID: "test-project", + ResourceName: "test-flavor", + ResourceExtraSpecs: map[string]string{ + "capabilities:hypervisor_type": "qemu", }, - Requests: map[string]resource.Quantity{ + Resources: map[string]resource.Quantity{ "memory": resource.MustParse("1Gi"), "cpu": resource.MustParse("2"), }, @@ -247,8 +237,8 @@ func TestReservationReconciler_reconcileInstanceReservation_Success(t *testing.T t.Errorf("Expected phase %v, got %v", v1alpha1.ReservationStatusPhaseActive, updated.Status.Phase) } - if updated.Status.Host != "test-host-1" { - t.Errorf("Expected host %v, got %v", "test-host-1", updated.Status.Host) + if updated.Status.ObservedHost != "test-host-1" { + t.Errorf("Expected host %v, got %v", "test-host-1", updated.Status.ObservedHost) } if meta.IsStatusConditionTrue(updated.Status.Conditions, v1alpha1.ReservationConditionError) { diff --git a/internal/scheduling/reservations/controller/monitor.go b/internal/scheduling/reservations/controller/monitor.go index 7e886fbc..44beaf5f 100644 --- a/internal/scheduling/reservations/controller/monitor.go +++ b/internal/scheduling/reservations/controller/monitor.go @@ -92,7 +92,7 @@ func (m *Monitor) Collect(ch chan<- prometheus.Metric) { if _, ok := resourcesByLabels[key]; !ok { resourcesByLabels[key] = map[string]uint64{} } - for resourceName, resourceQuantity := range reservation.Spec.Requests { + for resourceName, resourceQuantity := range reservation.Spec.Resources { resourcesByLabels[key][resourceName] += resourceQuantity.AsDec().UnscaledBig().Uint64() } } diff --git a/internal/scheduling/reservations/controller/monitor_test.go b/internal/scheduling/reservations/controller/monitor_test.go index fe428550..e87e6ffc 100644 --- a/internal/scheduling/reservations/controller/monitor_test.go +++ b/internal/scheduling/reservations/controller/monitor_test.go @@ -94,17 +94,15 @@ func TestMonitor_Collect_WithReservations(t *testing.T) { Name: "test-reservation-1", }, Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{}, - }, - Requests: map[string]resource.Quantity{ + ResourceName: "test-flavor", + Resources: map[string]resource.Quantity{ "memory": resource.MustParse("1Gi"), "cpu": resource.MustParse("2"), }, }, Status: v1alpha1.ReservationStatus{ - Phase: v1alpha1.ReservationStatusPhaseActive, - Host: "test-host-1", + Phase: v1alpha1.ReservationStatusPhaseActive, + ObservedHost: "test-host-1", }, }, { @@ -112,10 +110,8 @@ func TestMonitor_Collect_WithReservations(t *testing.T) { Name: "test-reservation-2", }, Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{}, - }, - Requests: map[string]resource.Quantity{ + ResourceName: "test-flavor", + Resources: map[string]resource.Quantity{ "memory": resource.MustParse("2Gi"), "cpu": resource.MustParse("4"), }, @@ -137,10 +133,8 @@ func TestMonitor_Collect_WithReservations(t *testing.T) { Name: "test-reservation-3", }, Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{}, - }, - Requests: map[string]resource.Quantity{ + ResourceName: "test-flavor", + Resources: map[string]resource.Quantity{ "memory": resource.MustParse("4Gi"), "cpu": resource.MustParse("4"), }, @@ -226,10 +220,8 @@ func TestMonitor_Collect_ResourceMetrics(t *testing.T) { Name: "test-reservation", }, Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{}, - }, - Requests: map[string]resource.Quantity{ + ResourceName: "test-flavor", + Resources: map[string]resource.Quantity{ "memory": resource.MustParse("1000Mi"), "cpu": resource.MustParse("2"), }, @@ -342,10 +334,8 @@ func TestMonitor_Collect_LabelSanitization(t *testing.T) { Name: "test-reservation", }, Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{}, - }, - Requests: map[string]resource.Quantity{ + ResourceName: "test-flavor", + Resources: map[string]resource.Quantity{ "memory": resource.MustParse("1Gi"), "cpu": resource.MustParse("2"), },