From 0fdb6340fcc0c182d6e65b8ce255862651b93d50 Mon Sep 17 00:00:00 2001 From: Malte Viering Date: Thu, 12 Feb 2026 13:50:08 +0100 Subject: [PATCH 1/3] feat!: refactor reservation crd --- Makefile | 3 + api/v1alpha1/reservation_types.go | 160 ++++++-- api/v1alpha1/zz_generated.deepcopy.go | 147 ++++--- .../files/crds/cortex.cloud_reservations.yaml | 141 +++++-- .../filters/filter_has_enough_capacity.go | 82 +++- .../filter_has_enough_capacity_test.go | 359 +++++++++++++++++- .../reservations/commitments/syncer.go | 27 +- .../reservations/commitments/syncer_test.go | 44 ++- .../reservations/controller/controller.go | 80 ++-- .../controller/controller_test.go | 86 ++--- .../reservations/controller/monitor.go | 44 ++- .../reservations/controller/monitor_test.go | 93 +++-- 12 files changed, 956 insertions(+), 310 deletions(-) diff --git a/Makefile b/Makefile index 0102e4cc0..e145863d6 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,9 @@ lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes test: ## Run all tests. go test ./... +.PHONY: generate +generate: deepcopy crds ## Regenerate CRDs and DeepCopy after API type changes. + .PHONY: crds crds: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. $(CONTROLLER_GEN) rbac:roleName=manager-role crd:allowDangerousTypes=true webhook paths="./..." output:crd:artifacts:config=helm/library/cortex/files/crds diff --git a/api/v1alpha1/reservation_types.go b/api/v1alpha1/reservation_types.go index 847a1017c..1c2833584 100644 --- a/api/v1alpha1/reservation_types.go +++ b/api/v1alpha1/reservation_types.go @@ -8,68 +8,150 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -// Additional specifications needed to place the reservation. -type ReservationSchedulerSpec struct { - // If the type of scheduler is cortex-nova, this field will contain additional - // information used by cortex-nova to place the instance. - CortexNova *ReservationSchedulerSpecCortexNova `json:"cortexNova,omitempty"` -} +// ReservationType defines the type of reservation. +// In Kubernetes, "Type" is the conventional field name for sub-type discrimination +// within a resource (similar to Service.spec.type), while "Kind" refers to the +// resource type itself (Pod, Deployment, etc.). +type ReservationType string + +const ( + // ReservationTypeCommittedResource is a reservation for committed/reserved capacity. + ReservationTypeCommittedResource ReservationType = "CommittedResourceReservation" + // ReservationTypeFailover is a reservation for failover capacity. + ReservationTypeFailover ReservationType = "FailoverReservation" +) + +// CommittedResourceReservationSpec defines the spec fields specific to committed resource reservations. +type CommittedResourceReservationSpec struct { + // ResourceName is the name of the resource to reserve (e.g., FlavorName for Nova). + // +kubebuilder:validation:Optional + ResourceName string `json:"resourceName,omitempty"` -// Additional specifications needed by cortex-nova to place the instance. -type ReservationSchedulerSpecCortexNova struct { - // The project ID to reserve for. + // ResourceGroup is the group/category of the resource (e.g., "hana_medium_v2"). + // +kubebuilder:validation:Optional + ResourceGroup string `json:"resourceGroup,omitempty"` + + // ProjectID is the UUID of the project this reservation belongs to. + // +kubebuilder:validation:Optional ProjectID string `json:"projectID,omitempty"` - // The domain ID to reserve for. + + // DomainID is the domain ID to reserve for. + // +kubebuilder:validation:Optional DomainID string `json:"domainID,omitempty"` - // The flavor name of the instance to reserve. - FlavorName string `json:"flavorName,omitempty"` - // Extra specifications relevant for initial placement of the instance. - FlavorExtraSpecs map[string]string `json:"flavorExtraSpecs,omitempty"` + + // Creator identifies the system or component that created this reservation. + // Used to track ownership and for cleanup purposes (e.g., "commitments-syncer"). + // +kubebuilder:validation:Optional + Creator string `json:"creator,omitempty"` +} + +// FailoverReservationSpec defines the spec fields specific to failover reservations. +type FailoverReservationSpec struct { + // ResourceGroup is the group/category of the resource (e.g., "hana_medium_v2"). + // +kubebuilder:validation:Optional + ResourceGroup string `json:"resourceGroup,omitempty"` } // ReservationSpec defines the desired state of Reservation. type ReservationSpec struct { - // A remark that can be used to identify the creator of the reservation. - // This can be used to clean up reservations synced from external systems - // without touching reservations created manually or by other systems. - Creator string `json:"creator,omitempty"` - // Specification of the scheduler that will handle the reservation. - Scheduler ReservationSchedulerSpec `json:"scheduler,omitempty"` - // Resources requested to reserve for this instance. - Requests map[string]resource.Quantity `json:"requests,omitempty"` -} + // Type of reservation. + // +kubebuilder:validation:Enum=CommittedResourceReservation;FailoverReservation + // +kubebuilder:validation:Required + Type ReservationType `json:"type"` -// The phase in which the reservation is. -type ReservationStatusPhase string + // SchedulingDomain specifies the scheduling domain for this reservation (e.g., "nova", "ironcore"). + // +kubebuilder:validation:Optional + SchedulingDomain string `json:"schedulingDomain,omitempty"` -const ( - // The reservation has been placed and is considered during scheduling. - ReservationStatusPhaseActive ReservationStatusPhase = "active" - // The reservation could not be fulfilled. - ReservationStatusPhaseFailed ReservationStatusPhase = "failed" -) + // Resources to reserve for this instance. + // +kubebuilder:validation:Optional + Resources map[string]resource.Quantity `json:"resources,omitempty"` + + // StartTime is the time when the reservation becomes active. + // +kubebuilder:validation:Optional + StartTime *metav1.Time `json:"startTime,omitempty"` + + // EndTime is the time when the reservation expires. + // +kubebuilder:validation:Optional + EndTime *metav1.Time `json:"endTime,omitempty"` + + // TargetHost is the desired compute host where the reservation should be placed. + // This is a generic name that represents different concepts depending on the scheduling domain: + // - For Nova: the hypervisor hostname + // - For Pods: the node name + // The scheduler will attempt to place the reservation on this host. + // +kubebuilder:validation:Optional + TargetHost string `json:"targetHost,omitempty"` + + // CommittedResourceReservation contains fields specific to committed resource reservations. + // Only used when Type is CommittedResourceReservation. + // +kubebuilder:validation:Optional + CommittedResourceReservation *CommittedResourceReservationSpec `json:"committedResourceReservation,omitempty"` + + // FailoverReservation contains fields specific to failover reservations. + // Only used when Type is FailoverReservation. + // +kubebuilder:validation:Optional + FailoverReservation *FailoverReservationSpec `json:"failoverReservation,omitempty"` +} const ( - // Something went wrong during the handling of the reservation. - ReservationConditionError = "Error" + // ReservationConditionReady indicates whether the reservation is active and ready. + ReservationConditionReady = "Ready" ) +// CommittedResourceReservationStatus defines the status fields specific to committed resource reservations. +type CommittedResourceReservationStatus struct { + // Allocations lists the VM/instance UUIDs that are currently allocated against this reservation. + // +kubebuilder:validation:Optional + Allocations []string `json:"allocations,omitempty"` +} + +// FailoverReservationStatus defines the status fields specific to failover reservations. +type FailoverReservationStatus struct { + // Allocations maps VM/instance UUIDs to the host they are currently allocated on. + // Key: VM/instance UUID, Value: Host name where the VM is currently running. + // +kubebuilder:validation:Optional + Allocations map[string]string `json:"allocations,omitempty"` +} + // ReservationStatus defines the observed state of Reservation. type ReservationStatus struct { - // The current phase of the reservation. - Phase ReservationStatusPhase `json:"phase,omitempty"` // The current status conditions of the reservation. + // Conditions include: + // - type: Ready + // status: True|False|Unknown + // reason: ReservationReady + // message: Reservation is successfully scheduled + // lastTransitionTime: // +kubebuilder:validation:Optional Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` - // The name of the compute host that was allocated. - Host string `json:"host"` + + // ObservedHost is the actual host where the reservation is on. + // This is set by the scheduler after successful placement and reflects the current state. + // It should match Spec.TargetHost when the reservation is successfully placed. + // This is a generic name that represents different concepts depending on the scheduling domain: + // - For Nova: the hypervisor hostname + // - For Pods: the node name + // +kubebuilder:validation:Optional + ObservedHost string `json:"observedHost,omitempty"` + + // CommittedResourceReservation contains status fields specific to committed resource reservations. + // Only used when Type is CommittedResourceReservation. + // +kubebuilder:validation:Optional + CommittedResourceReservation *CommittedResourceReservationStatus `json:"committedResourceReservation,omitempty"` + + // FailoverReservation contains status fields specific to failover reservations. + // Only used when Type is FailoverReservation. + // +kubebuilder:validation:Optional + FailoverReservation *FailoverReservationStatus `json:"failoverReservation,omitempty"` } // +kubebuilder:object:root=true // +kubebuilder:subresource:status // +kubebuilder:resource:scope=Cluster -// +kubebuilder:printcolumn:name="Host",type="string",JSONPath=".status.host" -// +kubebuilder:printcolumn:name="Phase",type="string",JSONPath=".status.phase" +// +kubebuilder:printcolumn:name="Type",type="string",JSONPath=".spec.type" +// +kubebuilder:printcolumn:name="Host",type="string",JSONPath=".status.observedHost" +// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status" // Reservation is the Schema for the reservations API type Reservation struct { diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 2551ef3ea..78f9d007b 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -29,6 +29,41 @@ func (in *CinderDatasource) DeepCopy() *CinderDatasource { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CommittedResourceReservationSpec) DeepCopyInto(out *CommittedResourceReservationSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CommittedResourceReservationSpec. +func (in *CommittedResourceReservationSpec) DeepCopy() *CommittedResourceReservationSpec { + if in == nil { + return nil + } + out := new(CommittedResourceReservationSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CommittedResourceReservationStatus) DeepCopyInto(out *CommittedResourceReservationStatus) { + *out = *in + if in.Allocations != nil { + in, out := &in.Allocations, &out.Allocations + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CommittedResourceReservationStatus. +func (in *CommittedResourceReservationStatus) DeepCopy() *CommittedResourceReservationStatus { + if in == nil { + return nil + } + out := new(CommittedResourceReservationStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Datasource) DeepCopyInto(out *Datasource) { *out = *in @@ -463,6 +498,43 @@ func (in *DetectorStatus) DeepCopy() *DetectorStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FailoverReservationSpec) DeepCopyInto(out *FailoverReservationSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailoverReservationSpec. +func (in *FailoverReservationSpec) DeepCopy() *FailoverReservationSpec { + if in == nil { + return nil + } + out := new(FailoverReservationSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *FailoverReservationStatus) DeepCopyInto(out *FailoverReservationStatus) { + *out = *in + if in.Allocations != nil { + in, out := &in.Allocations, &out.Allocations + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailoverReservationStatus. +func (in *FailoverReservationStatus) DeepCopy() *FailoverReservationStatus { + if in == nil { + return nil + } + out := new(FailoverReservationStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *FilterSpec) DeepCopyInto(out *FilterSpec) { *out = *in @@ -1085,59 +1157,34 @@ func (in *ReservationList) DeepCopyObject() runtime.Object { return nil } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ReservationSchedulerSpec) DeepCopyInto(out *ReservationSchedulerSpec) { - *out = *in - if in.CortexNova != nil { - in, out := &in.CortexNova, &out.CortexNova - *out = new(ReservationSchedulerSpecCortexNova) - (*in).DeepCopyInto(*out) - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReservationSchedulerSpec. -func (in *ReservationSchedulerSpec) DeepCopy() *ReservationSchedulerSpec { - if in == nil { - return nil - } - out := new(ReservationSchedulerSpec) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ReservationSchedulerSpecCortexNova) DeepCopyInto(out *ReservationSchedulerSpecCortexNova) { - *out = *in - if in.FlavorExtraSpecs != nil { - in, out := &in.FlavorExtraSpecs, &out.FlavorExtraSpecs - *out = make(map[string]string, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReservationSchedulerSpecCortexNova. -func (in *ReservationSchedulerSpecCortexNova) DeepCopy() *ReservationSchedulerSpecCortexNova { - if in == nil { - return nil - } - out := new(ReservationSchedulerSpecCortexNova) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ReservationSpec) DeepCopyInto(out *ReservationSpec) { *out = *in - in.Scheduler.DeepCopyInto(&out.Scheduler) - if in.Requests != nil { - in, out := &in.Requests, &out.Requests + if in.Resources != nil { + in, out := &in.Resources, &out.Resources *out = make(map[string]resource.Quantity, len(*in)) for key, val := range *in { (*out)[key] = val.DeepCopy() } } + if in.StartTime != nil { + in, out := &in.StartTime, &out.StartTime + *out = (*in).DeepCopy() + } + if in.EndTime != nil { + in, out := &in.EndTime, &out.EndTime + *out = (*in).DeepCopy() + } + if in.CommittedResourceReservation != nil { + in, out := &in.CommittedResourceReservation, &out.CommittedResourceReservation + *out = new(CommittedResourceReservationSpec) + **out = **in + } + if in.FailoverReservation != nil { + in, out := &in.FailoverReservation, &out.FailoverReservation + *out = new(FailoverReservationSpec) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReservationSpec. @@ -1160,6 +1207,16 @@ func (in *ReservationStatus) DeepCopyInto(out *ReservationStatus) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.CommittedResourceReservation != nil { + in, out := &in.CommittedResourceReservation, &out.CommittedResourceReservation + *out = new(CommittedResourceReservationStatus) + (*in).DeepCopyInto(*out) + } + if in.FailoverReservation != nil { + in, out := &in.FailoverReservation, &out.FailoverReservation + *out = new(FailoverReservationStatus) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReservationStatus. diff --git a/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml b/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml index 0a0428c44..931baf018 100644 --- a/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml +++ b/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml @@ -15,7 +15,10 @@ spec: scope: Cluster versions: - additionalPrinterColumns: - - jsonPath: .status.host + - jsonPath: .spec.type + name: Type + type: string + - jsonPath: .status.observedHost name: Host type: string - jsonPath: .status.phase @@ -46,52 +49,99 @@ spec: spec: description: spec defines the desired state of Reservation properties: - creator: + committedResourceReservation: description: |- - A remark that can be used to identify the creator of the reservation. - This can be used to clean up reservations synced from external systems - without touching reservations created manually or by other systems. + CommittedResourceReservation contains fields specific to committed resource reservations. + Only used when Type is CommittedResourceReservation. + properties: + domainID: + description: DomainID is the domain ID to reserve for. + type: string + projectID: + description: ProjectID is the UUID of the project this reservation + belongs to. + type: string + resourceGroup: + description: ResourceGroup is the group/category of the resource + (e.g., "hana_medium_v2"). + type: string + resourceName: + description: ResourceName is the name of the resource to reserve + (e.g., FlavorName for Nova). + type: string + type: object + endTime: + description: EndTime is the time when the reservation expires. + format: date-time type: string - requests: + failoverReservation: + description: |- + FailoverReservation contains fields specific to failover reservations. + Only used when Type is FailoverReservation. + properties: + resourceGroup: + description: ResourceGroup is the group/category of the resource + (e.g., "hana_medium_v2"). + type: string + type: object + resources: additionalProperties: anyOf: - type: integer - type: string pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ x-kubernetes-int-or-string: true - description: Resources requested to reserve for this instance. - type: object - scheduler: - description: Specification of the scheduler that will handle the reservation. - properties: - cortexNova: - description: |- - If the type of scheduler is cortex-nova, this field will contain additional - information used by cortex-nova to place the instance. - properties: - domainID: - description: The domain ID to reserve for. - type: string - flavorExtraSpecs: - additionalProperties: - type: string - description: Extra specifications relevant for initial placement - of the instance. - type: object - flavorName: - description: The flavor name of the instance to reserve. - type: string - projectID: - description: The project ID to reserve for. - type: string - type: object + description: Resources to reserve for this instance. type: object + schedulingDomain: + description: SchedulingDomain specifies the scheduling domain for + this reservation (e.g., "nova", "ironcore"). + type: string + startTime: + description: StartTime is the time when the reservation becomes active. + format: date-time + type: string + targetHost: + description: |- + TargetHost is the desired compute host where the reservation should be placed. + This is a generic name that represents different concepts depending on the scheduling domain: + - For Nova: the hypervisor hostname + - For Pods: the node name + The scheduler will attempt to place the reservation on this host. + type: string + type: + description: Type of reservation. + enum: + - CommittedResourceReservation + - FailoverReservation + type: string + required: + - type type: object status: description: status defines the observed state of Reservation properties: + committedResourceReservation: + description: |- + CommittedResourceReservation contains status fields specific to committed resource reservations. + Only used when Type is CommittedResourceReservation. + properties: + allocations: + description: Allocations lists the VM/instance UUIDs that are + currently allocated against this reservation. + items: + type: string + type: array + type: object conditions: - description: The current status conditions of the reservation. + description: |- + The current status conditions of the reservation. + Conditions include: + - type: Ready + status: True|False|Unknown + reason: ReservationReady + message: Reservation is successfully scheduled + lastTransitionTime: items: description: Condition contains details for one aspect of the current state of this API Resource. @@ -147,14 +197,31 @@ spec: - type type: object type: array - host: - description: The name of the compute host that was allocated. + failoverReservation: + description: |- + FailoverReservation contains status fields specific to failover reservations. + Only used when Type is FailoverReservation. + properties: + allocations: + additionalProperties: + type: string + description: |- + Allocations maps VM/instance UUIDs to the host they are currently allocated on. + Key: VM/instance UUID, Value: Host name where the VM is currently running. + type: object + type: object + observedHost: + description: |- + ObservedHost is the actual host where the reservation is on. + This is set by the scheduler after successful placement and reflects the current state. + It should match Spec.TargetHost when the reservation is successfully placed. + This is a generic name that represents different concepts depending on the scheduling domain: + - For Nova: the hypervisor hostname + - For Pods: the node name type: string phase: description: The current phase of the reservation. type: string - required: - - host type: object required: - spec diff --git a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go index 046a0c3f0..3998f1452 100644 --- a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go +++ b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go @@ -12,6 +12,7 @@ import ( "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" ) @@ -79,29 +80,72 @@ func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.Externa return nil, err } for _, reservation := range reservations.Items { - if reservation.Status.Phase != v1alpha1.ReservationStatusPhaseActive { - continue // Only consider active reservations. + if !meta.IsStatusConditionTrue(reservation.Status.Conditions, v1alpha1.ReservationConditionReady) { + continue // Only consider active reservations (Ready=True). } - if reservation.Spec.Scheduler.CortexNova == nil { - continue // Not handled by us. + + // Handle reservation based on its type. + switch reservation.Spec.Type { + case v1alpha1.ReservationTypeCommittedResource, "": // Empty string for backward compatibility + // Skip if no CommittedResourceReservation spec or no resource name set. + if reservation.Spec.CommittedResourceReservation == nil || reservation.Spec.CommittedResourceReservation.ResourceName == "" { + continue // Not handled by us (no resource name set). + } + // For committed resource reservations: if the requested VM matches this reservation, free the resources (slotting). + if !s.Options.LockReserved && + reservation.Spec.CommittedResourceReservation.ProjectID == request.Spec.Data.ProjectID && + reservation.Spec.CommittedResourceReservation.ResourceName == request.Spec.Data.Flavor.Data.Name { + traceLog.Info("unlocking resources reserved by matching committed resource reservation", "reservation", reservation.Name) + continue + } + case v1alpha1.ReservationTypeFailover: + // For failover reservations: if the requested VM is contained in the allocations map + // AND this is an evacuation request, unlock the resources. + // We only unlock during evacuations because: + // 1. Failover reservations are specifically for HA/evacuation scenarios. + // 2. During live migrations or other operations, we don't want to use failover capacity. + // Note: we cannot use failover reservations from other VMs, as that can invalidate our HA guarantees. + intent, err := request.GetIntent() + if err == nil && intent == api.EvacuateIntent { + if reservation.Status.FailoverReservation != nil { + if _, contained := reservation.Status.FailoverReservation.Allocations[request.Spec.Data.InstanceUUID]; contained { + traceLog.Info("unlocking resources reserved by failover reservation for VM in allocations (evacuation)", + "reservation", reservation.Name, + "instanceUUID", request.Spec.Data.InstanceUUID) + continue + } + } + } + traceLog.Debug("processing failover reservation", "reservation", reservation.Name) } - // If the requested vm matches this reservation, free the resources. - if !s.Options.LockReserved && - reservation.Spec.Scheduler.CortexNova.ProjectID == request.Spec.Data.ProjectID && - reservation.Spec.Scheduler.CortexNova.FlavorName == request.Spec.Data.Flavor.Data.Name { - traceLog.Info("unlocking resources reserved by matching reservation", "reservation", reservation.Name) - continue + + // Block resources on BOTH Spec.TargetHost (desired) AND Status.ObservedHost (actual). + // This ensures capacity is blocked during the transition period when a reservation + // is being placed (TargetHost set) and after it's placed (ObservedHost set). + // If both are the same, we only subtract once. + hostsToBlock := make(map[string]struct{}) + if reservation.Spec.TargetHost != "" { + hostsToBlock[reservation.Spec.TargetHost] = struct{}{} + } + if reservation.Status.ObservedHost != "" { + hostsToBlock[reservation.Status.ObservedHost] = struct{}{} } - host := reservation.Status.Host - if cpu, ok := reservation.Spec.Requests["cpu"]; ok { - freeCPU := freeResourcesByHost[host]["cpu"] - freeCPU.Sub(cpu) - freeResourcesByHost[host]["cpu"] = freeCPU + if len(hostsToBlock) == 0 { + traceLog.Debug("skipping reservation with no host", "reservation", reservation.Name) + continue } - if memory, ok := reservation.Spec.Requests["memory"]; ok { - freeMemory := freeResourcesByHost[host]["memory"] - freeMemory.Sub(memory) - freeResourcesByHost[host]["memory"] = freeMemory + + for host := range hostsToBlock { + if cpu, ok := reservation.Spec.Resources["cpu"]; ok { + freeCPU := freeResourcesByHost[host]["cpu"] + freeCPU.Sub(cpu) + freeResourcesByHost[host]["cpu"] = freeCPU + } + if memory, ok := reservation.Spec.Resources["memory"]; ok { + freeMemory := freeResourcesByHost[host]["memory"] + freeMemory.Sub(memory) + freeResourcesByHost[host]["memory"] = freeMemory + } } } diff --git a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go index a6e6561e9..d90728b74 100644 --- a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go +++ b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go @@ -4,7 +4,17 @@ package filters import ( + "log/slog" "testing" + + api "github.com/cobaltcore-dev/cortex/api/external/nova" + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" ) func TestFilterHasEnoughCapacityOpts_Validate(t *testing.T) { @@ -13,35 +23,352 @@ func TestFilterHasEnoughCapacityOpts_Validate(t *testing.T) { opts FilterHasEnoughCapacityOpts expectError bool }{ + {"valid options with lock reserved true", FilterHasEnoughCapacityOpts{LockReserved: true}, false}, + {"valid options with lock reserved false", FilterHasEnoughCapacityOpts{LockReserved: false}, false}, + {"valid options with default values", FilterHasEnoughCapacityOpts{}, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.opts.Validate() + if tt.expectError && err == nil { + t.Error("expected error, got nil") + } + if !tt.expectError && err != nil { + t.Errorf("expected no error, got %v", err) + } + }) + } +} + +// Test helpers to reduce boilerplate + +func buildTestScheme(t *testing.T) *runtime.Scheme { + scheme := runtime.NewScheme() + if err := hv1.SchemeBuilder.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add hv1 scheme: %v", err) + } + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatalf("failed to add v1alpha1 scheme: %v", err) + } + return scheme +} + +func newHypervisor(name, cpuCap, cpuAlloc, memCap, memAlloc string) *hv1.Hypervisor { + return &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Status: hv1.HypervisorStatus{ + Capacity: map[string]resource.Quantity{"cpu": resource.MustParse(cpuCap), "memory": resource.MustParse(memCap)}, + Allocation: map[string]resource.Quantity{"cpu": resource.MustParse(cpuAlloc), "memory": resource.MustParse(memAlloc)}, + }, + } +} + +// newReservation creates a reservation with configurable Spec.TargetHost and Status.ObservedHost. +// Use specHost="" or observedHost="" to leave them unset. +func newReservation(name, specHost, observedHost, projectID, flavorName, cpu, mem string, resType v1alpha1.ReservationType, connectTo map[string]string) *v1alpha1.Reservation { + res := &v1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Spec: v1alpha1.ReservationSpec{ + Type: resType, + TargetHost: specHost, + Resources: map[string]resource.Quantity{"cpu": resource.MustParse(cpu), "memory": resource.MustParse(mem)}, + }, + Status: v1alpha1.ReservationStatus{ + Conditions: []metav1.Condition{ + { + Type: v1alpha1.ReservationConditionReady, + Status: metav1.ConditionTrue, + Reason: "ReservationActive", + }, + }, + ObservedHost: observedHost, + }, + } + + // Set type-specific fields + switch resType { + case v1alpha1.ReservationTypeCommittedResource, "": + res.Spec.CommittedResourceReservation = &v1alpha1.CommittedResourceReservationSpec{ + ProjectID: projectID, + ResourceName: flavorName, + } + case v1alpha1.ReservationTypeFailover: + res.Spec.FailoverReservation = &v1alpha1.FailoverReservationSpec{} + if connectTo != nil { + res.Status.FailoverReservation = &v1alpha1.FailoverReservationStatus{ + Allocations: connectTo, + } + } + } + + return res +} + +// Convenience wrappers for common reservation types +func newCommittedReservation(name, host, projectID, flavorName, cpu, mem string) *v1alpha1.Reservation { + return newReservation(name, host, host, projectID, flavorName, cpu, mem, v1alpha1.ReservationTypeCommittedResource, nil) +} + +//nolint:unparam // projectID kept as parameter for test readability and consistency with newCommittedReservation +func newFailoverReservation(name, host, projectID, flavorName, cpu, mem string, usedBy map[string]string) *v1alpha1.Reservation { + return newReservation(name, host, host, projectID, flavorName, cpu, mem, v1alpha1.ReservationTypeFailover, usedBy) +} + +//nolint:unparam // vcpus kept as parameter for test readability +func newRequest(projectID, instanceUUID, flavorName string, vcpus, memMB int, evacuation bool, hosts ...string) api.ExternalSchedulerRequest { + hostList := make([]api.ExternalSchedulerHost, len(hosts)) + for i, h := range hosts { + hostList[i] = api.ExternalSchedulerHost{ComputeHost: h} + } + spec := api.NovaSpec{ + ProjectID: projectID, InstanceUUID: instanceUUID, NumInstances: 1, + Flavor: api.NovaObject[api.NovaFlavor]{Data: api.NovaFlavor{Name: flavorName, VCPUs: uint64(vcpus), MemoryMB: uint64(memMB)}}, //nolint:gosec // test code + } + if evacuation { + spec.SchedulerHints = map[string]any{"_nova_check_type": []any{"evacuate"}} + } + return api.ExternalSchedulerRequest{ + Spec: api.NovaObject[api.NovaSpec]{Data: spec}, + Hosts: hostList, + } +} + +func TestFilterHasEnoughCapacity_ReservationTypes(t *testing.T) { + scheme := buildTestScheme(t) + + // 4 hypervisors: 3 with capacity, 1 without + // host1: 8 CPU free, 16Gi free | host2: 4 CPU free, 8Gi free | host3: 16 CPU free, 32Gi free | host4: 0 free + hvs := []client.Object{ + newHypervisor("host1", "16", "8", "32Gi", "16Gi"), + newHypervisor("host2", "8", "4", "16Gi", "8Gi"), + newHypervisor("host3", "32", "16", "64Gi", "32Gi"), + newHypervisor("host4", "8", "8", "16Gi", "16Gi"), // no capacity + } + + tests := []struct { + name string + reservations []client.Object + request api.ExternalSchedulerRequest + opts FilterHasEnoughCapacityOpts + expectedHosts []string + filteredHosts []string + }{ + { + name: "CommittedResourceReservation blocks some hosts when project/flavor don't match", + reservations: []client.Object{ + newCommittedReservation("res-1", "host1", "project-A", "m1.large", "8", "16Gi"), + newCommittedReservation("res-2", "host2", "project-A", "m1.large", "4", "8Gi"), + }, + request: newRequest("project-B", "instance-123", "m1.small", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host3"}, + filteredHosts: []string{"host1", "host2", "host4"}, + }, + { + name: "CommittedResourceReservation unlocks all reserved hosts when project and flavor match", + reservations: []client.Object{ + newCommittedReservation("res-1", "host1", "project-A", "m1.large", "4", "8Gi"), + newCommittedReservation("res-2", "host2", "project-A", "m1.large", "4", "8Gi"), + }, + request: newRequest("project-A", "instance-123", "m1.large", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host1", "host2", "host3"}, + filteredHosts: []string{"host4"}, + }, + { + name: "CommittedResourceReservation stays locked when LockReserved is true", + reservations: []client.Object{ + newCommittedReservation("res-1", "host1", "project-A", "m1.large", "8", "16Gi"), + newCommittedReservation("res-2", "host3", "project-A", "m1.large", "16", "32Gi"), + }, + request: newRequest("project-A", "instance-123", "m1.large", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: true}, + expectedHosts: []string{"host2"}, + filteredHosts: []string{"host1", "host3", "host4"}, + }, + { + name: "Empty reservation type defaults to CommittedResourceReservation behavior", + reservations: []client.Object{ + &v1alpha1.Reservation{ + ObjectMeta: metav1.ObjectMeta{Name: "legacy-res"}, + Spec: v1alpha1.ReservationSpec{ + TargetHost: "host1", + Resources: map[string]resource.Quantity{"cpu": resource.MustParse("04"), "memory": resource.MustParse("08Gi")}, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + ProjectID: "project-A", + ResourceName: "m1.large", + }, + }, + Status: v1alpha1.ReservationStatus{ + Conditions: []metav1.Condition{ + { + Type: v1alpha1.ReservationConditionReady, + Status: metav1.ConditionTrue, + Reason: "ReservationActive", + }, + }, + ObservedHost: "host1", + }, + }, + }, + request: newRequest("project-A", "instance-123", "m1.large", 4, 8000, false, "host1", "host2"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host1", "host2"}, + filteredHosts: []string{}, + }, + { + name: "FailoverReservation blocks hosts for non-evacuation request even when instance is in UsedBy", + reservations: []client.Object{ + newFailoverReservation("failover-1", "host1", "project-A", "m1.large", "8", "16Gi", map[string]string{"instance-123": "host5"}), + newFailoverReservation("failover-2", "host2", "project-A", "m1.large", "4", "8Gi", map[string]string{"instance-123": "host6"}), + }, + request: newRequest("project-A", "instance-123", "m1.large", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host3"}, // Failover reservations stay locked for non-evacuation + filteredHosts: []string{"host1", "host2", "host4"}, + }, + { + name: "FailoverReservation unlocks hosts during evacuation when instance is in UsedBy", + reservations: []client.Object{ + newFailoverReservation("failover-1", "host1", "project-A", "m1.large", "4", "8Gi", map[string]string{"instance-123": "host5"}), + newFailoverReservation("failover-2", "host2", "project-A", "m1.large", "4", "8Gi", map[string]string{"instance-123": "host6"}), + }, + request: newRequest("project-A", "instance-123", "m1.large", 4, 8000, true, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host1", "host2", "host3"}, // Unlocked during evacuation + filteredHosts: []string{"host4"}, + }, + { + name: "FailoverReservation blocks hosts during evacuation when instance not in UsedBy", + reservations: []client.Object{ + newFailoverReservation("failover-1", "host1", "project-A", "m1.large", "8", "16Gi", map[string]string{"other-instance": "host5"}), + newFailoverReservation("failover-2", "host2", "project-A", "m1.large", "4", "8Gi", map[string]string{"another-instance": "host6"}), + }, + request: newRequest("project-A", "instance-123", "m1.large", 4, 8000, true, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host3"}, + filteredHosts: []string{"host1", "host2", "host4"}, + }, { - name: "valid options with lock reserved true", - opts: FilterHasEnoughCapacityOpts{ - LockReserved: true, + name: "FailoverReservation with empty UsedBy blocks reserved host", + reservations: []client.Object{ + newFailoverReservation("failover-1", "host1", "project-A", "m1.large", "8", "16Gi", map[string]string{}), }, - expectError: false, + request: newRequest("project-A", "instance-123", "m1.large", 4, 8000, true, "host1", "host2", "host3"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host2", "host3"}, + filteredHosts: []string{"host1"}, }, { - name: "valid options with lock reserved false", - opts: FilterHasEnoughCapacityOpts{ - LockReserved: false, + name: "FailoverReservation with multiple instances in UsedBy unlocks for matching instance during evacuation", + reservations: []client.Object{ + newFailoverReservation("failover-1", "host1", "project-A", "m1.large", "4", "8Gi", map[string]string{"instance-111": "host5", "instance-222": "host6", "instance-333": "host7"}), }, - expectError: false, + request: newRequest("project-A", "instance-222", "m1.large", 4, 8000, true, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host1", "host2", "host3"}, + filteredHosts: []string{"host4"}, }, { - name: "valid options with default values", - opts: FilterHasEnoughCapacityOpts{}, - expectError: false, + name: "No reservations - all hosts with capacity pass", + reservations: []client.Object{}, + request: newRequest("project-A", "instance-123", "m1.small", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host1", "host2", "host3"}, + filteredHosts: []string{"host4"}, + }, + { + name: "All hosts blocked by reservations - none pass", + reservations: []client.Object{ + newCommittedReservation("res-1", "host1", "project-X", "m1.xlarge", "8", "16Gi"), + newCommittedReservation("res-2", "host2", "project-X", "m1.xlarge", "4", "8Gi"), + newCommittedReservation("res-3", "host3", "project-X", "m1.xlarge", "16", "32Gi"), + }, + request: newRequest("project-A", "instance-123", "m1.small", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{}, + filteredHosts: []string{"host1", "host2", "host3", "host4"}, + }, + // ============================================================================ + // Tests for Spec.Host vs Status.ObservedHost behavior + // ============================================================================ + { + name: "Pending reservation (only Spec.Host set) blocks capacity on desired host", + reservations: []client.Object{ + // Pending reservation: Spec.Host is set, but ObservedHost is empty + newReservation("pending-res", "host1", "", "project-X", "m1.large", "8", "16Gi", v1alpha1.ReservationTypeCommittedResource, nil), + }, + request: newRequest("project-A", "instance-123", "m1.small", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host2", "host3"}, // host1 blocked by pending reservation + filteredHosts: []string{"host1", "host4"}, + }, + { + name: "Reservation with different Spec.Host and ObservedHost blocks BOTH hosts", + reservations: []client.Object{ + // Reservation was requested for host1 but placed on host2 - blocks BOTH + // host1: 8 CPU free - 4 CPU reserved = 4 CPU free (still has capacity for 4 CPU request) + // host2: 4 CPU free - 4 CPU reserved = 0 CPU free (blocked) + newReservation("moved-res", "host1", "host2", "project-X", "m1.large", "4", "8Gi", v1alpha1.ReservationTypeCommittedResource, nil), + }, + request: newRequest("project-A", "instance-123", "m1.small", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host1", "host3"}, // host1 still has capacity (4 CPU), host2 blocked (0 CPU) + filteredHosts: []string{"host2", "host4"}, + }, + { + name: "Multiple reservations: pending and placed block different hosts", + reservations: []client.Object{ + // Pending reservation blocks host1 (via Spec.Host only) + // host1: 8 CPU free - 8 CPU reserved = 0 CPU free (blocked) + newReservation("pending-res", "host1", "", "project-X", "m1.large", "8", "16Gi", v1alpha1.ReservationTypeCommittedResource, nil), + // Placed reservation blocks host2 AND host3 (via both Spec.Host and ObservedHost) + // host2: 4 CPU free - 4 CPU reserved = 0 CPU free (blocked) + // host3: 16 CPU free - 4 CPU reserved = 12 CPU free (still has capacity) + newReservation("placed-res", "host2", "host3", "project-X", "m1.large", "4", "8Gi", v1alpha1.ReservationTypeCommittedResource, nil), + }, + request: newRequest("project-A", "instance-123", "m1.small", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host3"}, // host1 blocked by pending, host2 blocked by placed, host3 still has capacity + filteredHosts: []string{"host1", "host2", "host4"}, + }, + { + name: "Reservation with no host (neither Spec.Host nor ObservedHost) is skipped", + reservations: []client.Object{ + newReservation("no-host-res", "", "", "project-X", "m1.large", "8", "16Gi", v1alpha1.ReservationTypeCommittedResource, nil), + }, + request: newRequest("project-A", "instance-123", "m1.small", 4, 8000, false, "host1", "host2", "host3", "host4"), + opts: FilterHasEnoughCapacityOpts{LockReserved: false}, + expectedHosts: []string{"host1", "host2", "host3"}, // No hosts blocked - reservation has no host + filteredHosts: []string{"host4"}, // Only filtered due to no capacity }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - err := tt.opts.Validate() - if tt.expectError && err == nil { - t.Error("expected error, got nil") + objects := make([]client.Object, 0, len(hvs)+len(tt.reservations)) + objects = append(objects, hvs...) + objects = append(objects, tt.reservations...) + step := &FilterHasEnoughCapacity{} + step.Client = fake.NewClientBuilder().WithScheme(scheme).WithObjects(objects...).Build() + step.Options = tt.opts + + result, err := step.Run(slog.Default(), tt.request) + if err != nil { + t.Fatalf("expected no error, got %v", err) } - if !tt.expectError && err != nil { - t.Errorf("expected no error, got %v", err) + + for _, host := range tt.expectedHosts { + if _, ok := result.Activations[host]; !ok { + t.Errorf("expected host %s to be present in activations", host) + } + } + + for _, host := range tt.filteredHosts { + if _, ok := result.Activations[host]; ok { + t.Errorf("expected host %s to be filtered out", host) + } } }) } diff --git a/internal/scheduling/reservations/commitments/syncer.go b/internal/scheduling/reservations/commitments/syncer.go index dd708c132..1b0d455b0 100644 --- a/internal/scheduling/reservations/commitments/syncer.go +++ b/internal/scheduling/reservations/commitments/syncer.go @@ -23,8 +23,8 @@ import ( var ( syncLog = ctrl.Log.WithName("sync") - // Identifier for the creator of reservations. - Creator = "commitments syncer" + // CreatorValue identifies reservations created by this syncer. + CreatorValue = "commitments-syncer" ) type SyncerConfig struct { @@ -191,20 +191,18 @@ func (s *Syncer) SyncReservations(ctx context.Context) error { } commitmentUUIDShort := commitment.UUID[:5] spec := v1alpha1.ReservationSpec{ - Creator: Creator, - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{ - ProjectID: commitment.ProjectID, - DomainID: commitment.DomainID, - FlavorName: commitment.Flavor.Name, - FlavorExtraSpecs: commitment.Flavor.ExtraSpecs, - }, - }, - Requests: map[string]resource.Quantity{ + Type: v1alpha1.ReservationTypeCommittedResource, + Resources: map[string]resource.Quantity{ "memory": *resource.NewQuantity(int64(commitment.Flavor.RAM)*1024*1024, resource.BinarySI), "cpu": *resource.NewQuantity(int64(commitment.Flavor.VCPUs), resource.DecimalSI), // Disk is currently not considered. }, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + ProjectID: commitment.ProjectID, + DomainID: commitment.DomainID, + ResourceName: commitment.Flavor.Name, + Creator: CreatorValue, + }, } for n := range commitment.Amount { // N instances meta := ctrl.ObjectMeta{ @@ -260,8 +258,9 @@ func (s *Syncer) SyncReservations(ctx context.Context) error { return err } for _, existing := range existingReservations.Items { - // Only manage reservations created by this syncer. - if existing.Spec.Creator != Creator { + // Only manage reservations created by this syncer (identified by Creator field). + if existing.Spec.CommittedResourceReservation == nil || + existing.Spec.CommittedResourceReservation.Creator != CreatorValue { continue } if _, found := reservationsByName[existing.Name]; !found { diff --git a/internal/scheduling/reservations/commitments/syncer_test.go b/internal/scheduling/reservations/commitments/syncer_test.go index 1d4156ee6..4db74801b 100644 --- a/internal/scheduling/reservations/commitments/syncer_test.go +++ b/internal/scheduling/reservations/commitments/syncer_test.go @@ -208,23 +208,27 @@ func TestSyncer_SyncReservations_InstanceCommitments(t *testing.T) { // Verify the first reservation res := reservations.Items[0] - if res.Spec.Scheduler.CortexNova.ProjectID != "test-project-1" { - t.Errorf("Expected project ID test-project-1, got %v", res.Spec.Scheduler.CortexNova.ProjectID) + if res.Spec.CommittedResourceReservation == nil { + t.Errorf("Expected CommittedResourceReservation to be set") + return + } + if res.Spec.CommittedResourceReservation.ProjectID != "test-project-1" { + t.Errorf("Expected project ID test-project-1, got %v", res.Spec.CommittedResourceReservation.ProjectID) } - if res.Spec.Scheduler.CortexNova.FlavorName != "test-flavor" { - t.Errorf("Expected flavor test-flavor, got %v", res.Spec.Scheduler.CortexNova.FlavorName) + if res.Spec.CommittedResourceReservation.ResourceName != "test-flavor" { + t.Errorf("Expected flavor test-flavor, got %v", res.Spec.CommittedResourceReservation.ResourceName) } // Check resource values expectedMemory := resource.MustParse("1073741824") // 1024MB in bytes - if !res.Spec.Requests["memory"].Equal(expectedMemory) { - t.Errorf("Expected memory %v, got %v", expectedMemory, res.Spec.Requests["memory"]) + if !res.Spec.Resources["memory"].Equal(expectedMemory) { + t.Errorf("Expected memory %v, got %v", expectedMemory, res.Spec.Resources["memory"]) } expectedVCPUs := resource.MustParse("2") - if !res.Spec.Requests["cpu"].Equal(expectedVCPUs) { - t.Errorf("Expected vCPUs %v, got %v", expectedVCPUs, res.Spec.Requests["cpu"]) + if !res.Spec.Resources["cpu"].Equal(expectedVCPUs) { + t.Errorf("Expected vCPUs %v, got %v", expectedVCPUs, res.Spec.Resources["cpu"]) } } @@ -240,13 +244,13 @@ func TestSyncer_SyncReservations_UpdateExisting(t *testing.T) { Name: "commitment-12345-0", // Instance commitments have -0 suffix }, Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{ - ProjectID: "old-project", - FlavorName: "old-flavor", - }, + Type: v1alpha1.ReservationTypeCommittedResource, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + ProjectID: "old-project", + ResourceName: "old-flavor", + Creator: CreatorValue, }, - Requests: map[string]resource.Quantity{ + Resources: map[string]resource.Quantity{ "memory": resource.MustParse("512Mi"), "cpu": resource.MustParse("1"), }, @@ -332,12 +336,16 @@ func TestSyncer_SyncReservations_UpdateExisting(t *testing.T) { } // Verify the reservation was updated with new values - if updatedReservation.Spec.Scheduler.CortexNova.ProjectID != "new-project" { - t.Errorf("Expected project ID new-project, got %v", updatedReservation.Spec.Scheduler.CortexNova.ProjectID) + if updatedReservation.Spec.CommittedResourceReservation == nil { + t.Errorf("Expected CommittedResourceReservation to be set") + return + } + if updatedReservation.Spec.CommittedResourceReservation.ProjectID != "new-project" { + t.Errorf("Expected project ID new-project, got %v", updatedReservation.Spec.CommittedResourceReservation.ProjectID) } - if updatedReservation.Spec.Scheduler.CortexNova.FlavorName != "new-flavor" { - t.Errorf("Expected flavor new-flavor, got %v", updatedReservation.Spec.Scheduler.CortexNova.FlavorName) + if updatedReservation.Spec.CommittedResourceReservation.ResourceName != "new-flavor" { + t.Errorf("Expected flavor new-flavor, got %v", updatedReservation.Spec.CommittedResourceReservation.ResourceName) } } diff --git a/internal/scheduling/reservations/controller/controller.go b/internal/scheduling/reservations/controller/controller.go index 2f9cf03c3..62055ed18 100644 --- a/internal/scheduling/reservations/controller/controller.go +++ b/internal/scheduling/reservations/controller/controller.go @@ -66,23 +66,43 @@ func (r *ReservationReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Can happen when the resource was just deleted. return ctrl.Result{}, err } - // If the reservation is already active, skip it. - if res.Status.Phase == v1alpha1.ReservationStatusPhaseActive { + // If the reservation is already active (Ready=True), skip it. + if meta.IsStatusConditionTrue(res.Status.Conditions, v1alpha1.ReservationConditionReady) { log.Info("reservation is already active, skipping", "reservation", req.Name) return ctrl.Result{}, nil // Don't need to requeue. } - // Currently we can only reconcile cortex-nova reservations. - if res.Spec.Scheduler.CortexNova == nil { - log.Info("reservation is not a cortex-nova reservation, skipping", "reservation", req.Name) + // Sync Spec values to Status.Observed* fields + // This ensures the observed state reflects the desired state from Spec + needsStatusUpdate := false + if res.Spec.TargetHost != "" && res.Status.ObservedHost != res.Spec.TargetHost { + res.Status.ObservedHost = res.Spec.TargetHost + needsStatusUpdate = true + } + if needsStatusUpdate { + old := res.DeepCopy() + patch := client.MergeFrom(old) + if err := r.Status().Patch(ctx, &res, patch); err != nil { + log.Error(err, "failed to sync spec to status") + return ctrl.Result{}, err + } + log.Info("synced spec to status", "reservation", req.Name, "host", res.Status.ObservedHost) + } + + // Currently we can only reconcile nova CommittedResourceReservations (those with ResourceName set). + resourceName := "" + if res.Spec.CommittedResourceReservation != nil { + resourceName = res.Spec.CommittedResourceReservation.ResourceName + } + if resourceName == "" { + log.Info("reservation has no resource name, skipping", "reservation", req.Name) old := res.DeepCopy() meta.SetStatusCondition(&res.Status.Conditions, metav1.Condition{ - Type: v1alpha1.ReservationConditionError, - Status: metav1.ConditionTrue, - Reason: "UnsupportedScheduler", - Message: "reservation is not a cortex-nova reservation", + Type: v1alpha1.ReservationConditionReady, + Status: metav1.ConditionFalse, + Reason: "MissingResourceName", + Message: "reservation has no resource name", }) - res.Status.Phase = v1alpha1.ReservationStatusPhaseFailed patch := client.MergeFrom(old) if err := r.Status().Patch(ctx, &res, patch); err != nil { log.Error(err, "failed to patch reservation status") @@ -93,7 +113,7 @@ func (r *ReservationReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Convert resource.Quantity to integers for the API var memoryMB uint64 - if memory, ok := res.Spec.Requests["memory"]; ok { + if memory, ok := res.Spec.Resources["memory"]; ok { memoryValue := memory.ScaledValue(resource.Mega) if memoryValue < 0 { return ctrl.Result{}, fmt.Errorf("invalid memory value: %d", memoryValue) @@ -102,7 +122,7 @@ func (r *ReservationReconciler) Reconcile(ctx context.Context, req ctrl.Request) } var cpu uint64 - if cpuQuantity, ok := res.Spec.Requests["cpu"]; ok { + if cpuQuantity, ok := res.Spec.Resources["cpu"]; ok { cpuValue := cpuQuantity.ScaledValue(resource.Milli) if cpuValue < 0 { return ctrl.Result{}, fmt.Errorf("invalid cpu value: %d", cpuValue) @@ -131,6 +151,12 @@ func (r *ReservationReconciler) Reconcile(ctx context.Context, req ctrl.Request) weights[host.ComputeHost] = 0.0 } + // Get project ID from CommittedResourceReservation spec if available. + projectID := "" + if res.Spec.CommittedResourceReservation != nil { + projectID = res.Spec.CommittedResourceReservation.ProjectID + } + // Call the external scheduler delegation API to get a host for the reservation. externalSchedulerRequest := schedulerdelegationapi.ExternalSchedulerRequest{ Reservation: true, @@ -140,13 +166,12 @@ func (r *ReservationReconciler) Reconcile(ctx context.Context, req ctrl.Request) Data: schedulerdelegationapi.NovaSpec{ InstanceUUID: res.Name, NumInstances: 1, // One for each reservation. - ProjectID: res.Spec.Scheduler.CortexNova.ProjectID, + ProjectID: projectID, Flavor: schedulerdelegationapi.NovaObject[schedulerdelegationapi.NovaFlavor]{ Data: schedulerdelegationapi.NovaFlavor{ - Name: res.Spec.Scheduler.CortexNova.FlavorName, - ExtraSpecs: res.Spec.Scheduler.CortexNova.FlavorExtraSpecs, - MemoryMB: memoryMB, - VCPUs: cpu, + Name: resourceName, + MemoryMB: memoryMB, + VCPUs: cpu, // Disk is currently not considered. }, }, @@ -175,12 +200,11 @@ func (r *ReservationReconciler) Reconcile(ctx context.Context, req ctrl.Request) log.Info("no hosts found for reservation", "reservation", req.Name) old := res.DeepCopy() meta.SetStatusCondition(&res.Status.Conditions, metav1.Condition{ - Type: v1alpha1.ReservationConditionError, - Status: metav1.ConditionTrue, + Type: v1alpha1.ReservationConditionReady, + Status: metav1.ConditionFalse, Reason: "NoHostsFound", Message: "no hosts found for reservation", }) - res.Status.Phase = v1alpha1.ReservationStatusPhaseFailed patch := client.MergeFrom(old) if err := r.Status().Patch(ctx, &res, patch); err != nil { log.Error(err, "failed to patch reservation status") @@ -188,13 +212,18 @@ func (r *ReservationReconciler) Reconcile(ctx context.Context, req ctrl.Request) } return ctrl.Result{}, nil // No need to requeue, we didn't find a host. } + // Update the reservation with the found host (idx 0) host := externalSchedulerResponse.Hosts[0] log.Info("found host for reservation", "reservation", req.Name, "host", host) old := res.DeepCopy() - meta.RemoveStatusCondition(&res.Status.Conditions, v1alpha1.ReservationConditionError) - res.Status.Phase = v1alpha1.ReservationStatusPhaseActive - res.Status.Host = host + meta.SetStatusCondition(&res.Status.Conditions, metav1.Condition{ + Type: v1alpha1.ReservationConditionReady, + Status: metav1.ConditionTrue, + Reason: "ReservationActive", + Message: "reservation is successfully scheduled", + }) + res.Status.ObservedHost = host patch := client.MergeFrom(old) if err := r.Status().Patch(ctx, &res, patch); err != nil { log.Error(err, "failed to patch reservation status") @@ -206,7 +235,10 @@ func (r *ReservationReconciler) Reconcile(ctx context.Context, req ctrl.Request) // SetupWithManager sets up the controller with the Manager. func (r *ReservationReconciler) SetupWithManager(mgr ctrl.Manager, mcl *multicluster.Client) error { if err := mgr.Add(manager.RunnableFunc(func(ctx context.Context) error { - return r.Init(ctx, mgr.GetClient(), r.Conf) + if err := r.Init(ctx, mgr.GetClient(), r.Conf); err != nil { + return err + } + return nil })); err != nil { return err } diff --git a/internal/scheduling/reservations/controller/controller_test.go b/internal/scheduling/reservations/controller/controller_test.go index 070eabf55..d8588e887 100644 --- a/internal/scheduling/reservations/controller/controller_test.go +++ b/internal/scheduling/reservations/controller/controller_test.go @@ -12,6 +12,7 @@ import ( "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" @@ -30,7 +31,7 @@ func TestReservationReconciler_Reconcile(t *testing.T) { tests := []struct { name string reservation *v1alpha1.Reservation - expectedPhase v1alpha1.ReservationStatusPhase + expectedReady bool expectedError string shouldRequeue bool }{ @@ -41,32 +42,35 @@ func TestReservationReconciler_Reconcile(t *testing.T) { Name: "test-reservation", }, Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{ - ProjectID: "test-project", - FlavorName: "test-flavor", - }, + Type: v1alpha1.ReservationTypeCommittedResource, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + ProjectID: "test-project", + ResourceName: "test-flavor", }, }, Status: v1alpha1.ReservationStatus{ - Phase: v1alpha1.ReservationStatusPhaseActive, + Conditions: []metav1.Condition{ + { + Type: v1alpha1.ReservationConditionReady, + Status: metav1.ConditionTrue, + Reason: "ReservationActive", + }, + }, }, }, - expectedPhase: v1alpha1.ReservationStatusPhaseActive, + expectedReady: true, shouldRequeue: false, }, { - name: "skip unsupported reservation scheduler", + name: "skip reservation without resource name", reservation: &v1alpha1.Reservation{ ObjectMeta: ctrl.ObjectMeta{ Name: "test-reservation", }, - Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{}, - }, + Spec: v1alpha1.ReservationSpec{}, }, - expectedPhase: v1alpha1.ReservationStatusPhaseFailed, - expectedError: "reservation is not a cortex-nova reservation", + expectedReady: false, + expectedError: "reservation has no resource name", shouldRequeue: false, }, } @@ -105,21 +109,23 @@ func TestReservationReconciler_Reconcile(t *testing.T) { t.Errorf("Expected no requeue but got %v", result.RequeueAfter) } - // Verify the reservation status if expected - if tt.expectedPhase != "" { - var updated v1alpha1.Reservation - err := client.Get(context.Background(), req.NamespacedName, &updated) - if err != nil { - t.Errorf("Failed to get updated reservation: %v", err) - return - } + // Verify the reservation status + var updated v1alpha1.Reservation + err = client.Get(context.Background(), req.NamespacedName, &updated) + if err != nil { + t.Errorf("Failed to get updated reservation: %v", err) + return + } - if updated.Status.Phase != tt.expectedPhase { - t.Errorf("Expected phase %v, got %v", tt.expectedPhase, updated.Status.Phase) - } + isReady := meta.IsStatusConditionTrue(updated.Status.Conditions, v1alpha1.ReservationConditionReady) + if isReady != tt.expectedReady { + t.Errorf("Expected ready=%v, got ready=%v", tt.expectedReady, isReady) + } - if tt.expectedError != "" && meta.IsStatusConditionFalse(updated.Status.Conditions, v1alpha1.ReservationConditionError) { - t.Errorf("Expected error %v, got %v", tt.expectedError, updated.Status.Conditions) + if tt.expectedError != "" { + cond := meta.FindStatusCondition(updated.Status.Conditions, v1alpha1.ReservationConditionReady) + if cond == nil || cond.Status != metav1.ConditionFalse { + t.Errorf("Expected Ready=False with error, got %v", updated.Status.Conditions) } } }) @@ -137,16 +143,12 @@ func TestReservationReconciler_reconcileInstanceReservation_Success(t *testing.T Name: "test-reservation", }, Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{ - ProjectID: "test-project", - FlavorName: "test-flavor", - FlavorExtraSpecs: map[string]string{ - "capabilities:hypervisor_type": "qemu", - }, - }, + Type: v1alpha1.ReservationTypeCommittedResource, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + ProjectID: "test-project", + ResourceName: "test-flavor", }, - Requests: map[string]resource.Quantity{ + Resources: map[string]resource.Quantity{ "memory": resource.MustParse("1Gi"), "cpu": resource.MustParse("2"), }, @@ -243,15 +245,11 @@ func TestReservationReconciler_reconcileInstanceReservation_Success(t *testing.T return } - if updated.Status.Phase != v1alpha1.ReservationStatusPhaseActive { - t.Errorf("Expected phase %v, got %v", v1alpha1.ReservationStatusPhaseActive, updated.Status.Phase) - } - - if updated.Status.Host != "test-host-1" { - t.Errorf("Expected host %v, got %v", "test-host-1", updated.Status.Host) + if !meta.IsStatusConditionTrue(updated.Status.Conditions, v1alpha1.ReservationConditionReady) { + t.Errorf("Expected Ready=True, got %v", updated.Status.Conditions) } - if meta.IsStatusConditionTrue(updated.Status.Conditions, v1alpha1.ReservationConditionError) { - t.Errorf("Expected no error, got %v", updated.Status.Conditions) + if updated.Status.ObservedHost != "test-host-1" { + t.Errorf("Expected host %v, got %v", "test-host-1", updated.Status.ObservedHost) } } diff --git a/internal/scheduling/reservations/controller/monitor.go b/internal/scheduling/reservations/controller/monitor.go index 7e886fbc3..1b4a0194a 100644 --- a/internal/scheduling/reservations/controller/monitor.go +++ b/internal/scheduling/reservations/controller/monitor.go @@ -32,14 +32,14 @@ type Monitor struct { func NewControllerMonitor(k8sClient client.Client) Monitor { return Monitor{ Client: k8sClient, - numberOfReservations: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "cortex_reservations_number", - Help: "Number of reservations.", - }, []string{"status_phase", "status_error"}), - reservedResources: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "cortex_reservations_resources", - Help: "Resources reserved by reservations.", - }, []string{"status_phase", "status_error", "host", "resource"}), + numberOfReservations: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_reservations_number", + Help: "Number of reservations.", + }, []string{"status_ready", "status_error"}), + reservedResources: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_reservations_resources", + Help: "Resources reserved by reservations.", + }, []string{"status_ready", "status_error", "host", "resource"}), } } @@ -63,12 +63,16 @@ func (m *Monitor) Collect(ch chan<- prometheus.Metric) { countByLabels := map[string]uint64{} for _, reservation := range reservations.Items { - errorCondition := meta.FindStatusCondition(reservation.Status.Conditions, v1alpha1.ReservationConditionError) + readyCondition := meta.FindStatusCondition(reservation.Status.Conditions, v1alpha1.ReservationConditionReady) + readyStatus := "Unknown" errorMsg := "" - if errorCondition != nil && errorCondition.Status == metav1.ConditionTrue { - errorMsg = errorCondition.Message + if readyCondition != nil { + readyStatus = string(readyCondition.Status) + if readyCondition.Status == metav1.ConditionFalse { + errorMsg = readyCondition.Message + } } - key := string(reservation.Status.Phase) + + key := readyStatus + "," + strings.ReplaceAll(errorMsg, ",", ";") countByLabels[key]++ } @@ -80,19 +84,23 @@ func (m *Monitor) Collect(ch chan<- prometheus.Metric) { resourcesByLabels := map[string]map[string]uint64{} for _, reservation := range reservations.Items { - host := "" - errorCondition := meta.FindStatusCondition(reservation.Status.Conditions, v1alpha1.ReservationConditionError) + host := reservation.Status.ObservedHost + readyCondition := meta.FindStatusCondition(reservation.Status.Conditions, v1alpha1.ReservationConditionReady) + readyStatus := "Unknown" errorMsg := "" - if errorCondition != nil && errorCondition.Status == metav1.ConditionTrue { - errorMsg = errorCondition.Message + if readyCondition != nil { + readyStatus = string(readyCondition.Status) + if readyCondition.Status == metav1.ConditionFalse { + errorMsg = readyCondition.Message + } } - key := string(reservation.Status.Phase) + + key := readyStatus + "," + strings.ReplaceAll(errorMsg, ",", ";") + "," + host if _, ok := resourcesByLabels[key]; !ok { resourcesByLabels[key] = map[string]uint64{} } - for resourceName, resourceQuantity := range reservation.Spec.Requests { + for resourceName, resourceQuantity := range reservation.Spec.Resources { resourcesByLabels[key][resourceName] += resourceQuantity.AsDec().UnscaledBig().Uint64() } } diff --git a/internal/scheduling/reservations/controller/monitor_test.go b/internal/scheduling/reservations/controller/monitor_test.go index fe4285500..e58fc22f9 100644 --- a/internal/scheduling/reservations/controller/monitor_test.go +++ b/internal/scheduling/reservations/controller/monitor_test.go @@ -94,17 +94,24 @@ func TestMonitor_Collect_WithReservations(t *testing.T) { Name: "test-reservation-1", }, Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{}, + Type: v1alpha1.ReservationTypeCommittedResource, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + ResourceName: "test-flavor", }, - Requests: map[string]resource.Quantity{ + Resources: map[string]resource.Quantity{ "memory": resource.MustParse("1Gi"), "cpu": resource.MustParse("2"), }, }, Status: v1alpha1.ReservationStatus{ - Phase: v1alpha1.ReservationStatusPhaseActive, - Host: "test-host-1", + Conditions: []metav1.Condition{ + { + Type: v1alpha1.ReservationConditionReady, + Status: metav1.ConditionTrue, + Reason: "ReservationActive", + }, + }, + ObservedHost: "test-host-1", }, }, { @@ -112,20 +119,20 @@ func TestMonitor_Collect_WithReservations(t *testing.T) { Name: "test-reservation-2", }, Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{}, + Type: v1alpha1.ReservationTypeCommittedResource, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + ResourceName: "test-flavor", }, - Requests: map[string]resource.Quantity{ + Resources: map[string]resource.Quantity{ "memory": resource.MustParse("2Gi"), "cpu": resource.MustParse("4"), }, }, Status: v1alpha1.ReservationStatus{ - Phase: v1alpha1.ReservationStatusPhaseFailed, Conditions: []metav1.Condition{ { - Type: v1alpha1.ReservationConditionError, - Status: metav1.ConditionTrue, + Type: v1alpha1.ReservationConditionReady, + Status: metav1.ConditionFalse, Reason: "SomeError", Message: "Failed due to some error", }, @@ -137,16 +144,23 @@ func TestMonitor_Collect_WithReservations(t *testing.T) { Name: "test-reservation-3", }, Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{}, + Type: v1alpha1.ReservationTypeCommittedResource, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + ResourceName: "test-flavor", }, - Requests: map[string]resource.Quantity{ + Resources: map[string]resource.Quantity{ "memory": resource.MustParse("4Gi"), "cpu": resource.MustParse("4"), }, }, Status: v1alpha1.ReservationStatus{ - Phase: v1alpha1.ReservationStatusPhaseActive, + Conditions: []metav1.Condition{ + { + Type: v1alpha1.ReservationConditionReady, + Status: metav1.ConditionTrue, + Reason: "ReservationActive", + }, + }, }, }, } @@ -180,9 +194,9 @@ func TestMonitor_Collect_WithReservations(t *testing.T) { t.Error("Expected some metrics to be collected") } - // Verify that we have metrics for different phases - foundActiveCortexNova := false - foundFailedCortexNova := false + // Verify that we have metrics for different ready states + foundReadyTrue := false + foundReadyFalse := false for _, metric := range metrics { var m dto.Metric @@ -197,20 +211,20 @@ func TestMonitor_Collect_WithReservations(t *testing.T) { labels[label.GetName()] = label.GetValue() } - if labels["status_phase"] == "active" { - foundActiveCortexNova = true + if labels["status_ready"] == "True" { + foundReadyTrue = true } - if labels["status_phase"] == "failed" { - foundFailedCortexNova = true + if labels["status_ready"] == "False" { + foundReadyFalse = true } } } - if !foundActiveCortexNova { - t.Error("Expected to find active cortex-nova reservation metric") + if !foundReadyTrue { + t.Error("Expected to find Ready=True reservation metric") } - if !foundFailedCortexNova { - t.Error("Expected to find failed cortex-nova reservation metric") + if !foundReadyFalse { + t.Error("Expected to find Ready=False reservation metric") } } @@ -226,16 +240,23 @@ func TestMonitor_Collect_ResourceMetrics(t *testing.T) { Name: "test-reservation", }, Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{}, + Type: v1alpha1.ReservationTypeCommittedResource, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + ResourceName: "test-flavor", }, - Requests: map[string]resource.Quantity{ + Resources: map[string]resource.Quantity{ "memory": resource.MustParse("1000Mi"), "cpu": resource.MustParse("2"), }, }, Status: v1alpha1.ReservationStatus{ - Phase: v1alpha1.ReservationStatusPhaseActive, + Conditions: []metav1.Condition{ + { + Type: v1alpha1.ReservationConditionReady, + Status: metav1.ConditionTrue, + Reason: "ReservationActive", + }, + }, }, } @@ -342,20 +363,20 @@ func TestMonitor_Collect_LabelSanitization(t *testing.T) { Name: "test-reservation", }, Spec: v1alpha1.ReservationSpec{ - Scheduler: v1alpha1.ReservationSchedulerSpec{ - CortexNova: &v1alpha1.ReservationSchedulerSpecCortexNova{}, + Type: v1alpha1.ReservationTypeCommittedResource, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + ResourceName: "test-flavor", }, - Requests: map[string]resource.Quantity{ + Resources: map[string]resource.Quantity{ "memory": resource.MustParse("1Gi"), "cpu": resource.MustParse("2"), }, }, Status: v1alpha1.ReservationStatus{ - Phase: v1alpha1.ReservationStatusPhaseFailed, Conditions: []metav1.Condition{ { - Type: v1alpha1.ReservationConditionError, - Status: metav1.ConditionTrue, + Type: v1alpha1.ReservationConditionReady, + Status: metav1.ConditionFalse, Reason: "SomeError", Message: "error with, commas, in it", }, From eb1328bec6a82cd55646118fecb7596e791e40a2 Mon Sep 17 00:00:00 2001 From: Malte Viering Date: Tue, 17 Feb 2026 14:04:49 +0100 Subject: [PATCH 2/3] linting --- .../files/crds/cortex.cloud_reservations.yaml | 12 +++++++----- .../reservations/controller/monitor.go | 16 ++++++++-------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml b/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml index 931baf018..7e5393260 100644 --- a/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml +++ b/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml @@ -21,8 +21,8 @@ spec: - jsonPath: .status.observedHost name: Host type: string - - jsonPath: .status.phase - name: Phase + - jsonPath: .status.conditions[?(@.type=='Ready')].status + name: Ready type: string name: v1alpha1 schema: @@ -54,6 +54,11 @@ spec: CommittedResourceReservation contains fields specific to committed resource reservations. Only used when Type is CommittedResourceReservation. properties: + creator: + description: |- + Creator identifies the system or component that created this reservation. + Used to track ownership and for cleanup purposes (e.g., "commitments-syncer"). + type: string domainID: description: DomainID is the domain ID to reserve for. type: string @@ -219,9 +224,6 @@ spec: - For Nova: the hypervisor hostname - For Pods: the node name type: string - phase: - description: The current phase of the reservation. - type: string type: object required: - spec diff --git a/internal/scheduling/reservations/controller/monitor.go b/internal/scheduling/reservations/controller/monitor.go index 1b4a0194a..e1409b3ed 100644 --- a/internal/scheduling/reservations/controller/monitor.go +++ b/internal/scheduling/reservations/controller/monitor.go @@ -32,14 +32,14 @@ type Monitor struct { func NewControllerMonitor(k8sClient client.Client) Monitor { return Monitor{ Client: k8sClient, - numberOfReservations: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "cortex_reservations_number", - Help: "Number of reservations.", - }, []string{"status_ready", "status_error"}), - reservedResources: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "cortex_reservations_resources", - Help: "Resources reserved by reservations.", - }, []string{"status_ready", "status_error", "host", "resource"}), + numberOfReservations: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_reservations_number", + Help: "Number of reservations.", + }, []string{"status_ready", "status_error"}), + reservedResources: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_reservations_resources", + Help: "Resources reserved by reservations.", + }, []string{"status_ready", "status_error", "host", "resource"}), } } From 7e0249ce36d209e637069e0fc6bb1d89546ab0dd Mon Sep 17 00:00:00 2001 From: Malte Viering Date: Tue, 17 Feb 2026 14:31:42 +0100 Subject: [PATCH 3/3] observedHost => Host --- api/v1alpha1/reservation_types.go | 6 +++--- .../files/crds/cortex.cloud_reservations.yaml | 6 +++--- .../plugins/filters/filter_has_enough_capacity.go | 8 ++++---- .../filters/filter_has_enough_capacity_test.go | 14 +++++++------- .../reservations/controller/controller.go | 10 +++++----- .../reservations/controller/controller_test.go | 4 ++-- .../scheduling/reservations/controller/monitor.go | 2 +- .../reservations/controller/monitor_test.go | 2 +- 8 files changed, 26 insertions(+), 26 deletions(-) diff --git a/api/v1alpha1/reservation_types.go b/api/v1alpha1/reservation_types.go index 1c2833584..dfb0eb894 100644 --- a/api/v1alpha1/reservation_types.go +++ b/api/v1alpha1/reservation_types.go @@ -126,14 +126,14 @@ type ReservationStatus struct { // +kubebuilder:validation:Optional Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` - // ObservedHost is the actual host where the reservation is on. + // Host is the actual host where the reservation is placed. // This is set by the scheduler after successful placement and reflects the current state. // It should match Spec.TargetHost when the reservation is successfully placed. // This is a generic name that represents different concepts depending on the scheduling domain: // - For Nova: the hypervisor hostname // - For Pods: the node name // +kubebuilder:validation:Optional - ObservedHost string `json:"observedHost,omitempty"` + Host string `json:"host,omitempty"` // CommittedResourceReservation contains status fields specific to committed resource reservations. // Only used when Type is CommittedResourceReservation. @@ -150,7 +150,7 @@ type ReservationStatus struct { // +kubebuilder:subresource:status // +kubebuilder:resource:scope=Cluster // +kubebuilder:printcolumn:name="Type",type="string",JSONPath=".spec.type" -// +kubebuilder:printcolumn:name="Host",type="string",JSONPath=".status.observedHost" +// +kubebuilder:printcolumn:name="Host",type="string",JSONPath=".status.host" // +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status" // Reservation is the Schema for the reservations API diff --git a/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml b/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml index 7e5393260..7d2e009dc 100644 --- a/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml +++ b/helm/library/cortex/files/crds/cortex.cloud_reservations.yaml @@ -18,7 +18,7 @@ spec: - jsonPath: .spec.type name: Type type: string - - jsonPath: .status.observedHost + - jsonPath: .status.host name: Host type: string - jsonPath: .status.conditions[?(@.type=='Ready')].status @@ -215,9 +215,9 @@ spec: Key: VM/instance UUID, Value: Host name where the VM is currently running. type: object type: object - observedHost: + host: description: |- - ObservedHost is the actual host where the reservation is on. + Host is the actual host where the reservation is placed. This is set by the scheduler after successful placement and reflects the current state. It should match Spec.TargetHost when the reservation is successfully placed. This is a generic name that represents different concepts depending on the scheduling domain: diff --git a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go index 3998f1452..ce87c3a50 100644 --- a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go +++ b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go @@ -119,16 +119,16 @@ func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.Externa traceLog.Debug("processing failover reservation", "reservation", reservation.Name) } - // Block resources on BOTH Spec.TargetHost (desired) AND Status.ObservedHost (actual). + // Block resources on BOTH Spec.TargetHost (desired) AND Status.Host (actual). // This ensures capacity is blocked during the transition period when a reservation - // is being placed (TargetHost set) and after it's placed (ObservedHost set). + // is being placed (TargetHost set) and after it's placed (Host set). // If both are the same, we only subtract once. hostsToBlock := make(map[string]struct{}) if reservation.Spec.TargetHost != "" { hostsToBlock[reservation.Spec.TargetHost] = struct{}{} } - if reservation.Status.ObservedHost != "" { - hostsToBlock[reservation.Status.ObservedHost] = struct{}{} + if reservation.Status.Host != "" { + hostsToBlock[reservation.Status.Host] = struct{}{} } if len(hostsToBlock) == 0 { traceLog.Debug("skipping reservation with no host", "reservation", reservation.Name) diff --git a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go index d90728b74..645efc311 100644 --- a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go +++ b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity_test.go @@ -82,7 +82,7 @@ func newReservation(name, specHost, observedHost, projectID, flavorName, cpu, me Reason: "ReservationActive", }, }, - ObservedHost: observedHost, + Host: observedHost, }, } @@ -208,7 +208,7 @@ func TestFilterHasEnoughCapacity_ReservationTypes(t *testing.T) { Reason: "ReservationActive", }, }, - ObservedHost: "host1", + Host: "host1", }, }, }, @@ -291,12 +291,12 @@ func TestFilterHasEnoughCapacity_ReservationTypes(t *testing.T) { filteredHosts: []string{"host1", "host2", "host3", "host4"}, }, // ============================================================================ - // Tests for Spec.Host vs Status.ObservedHost behavior + // Tests for Spec.Host vs Status.Host behavior // ============================================================================ { name: "Pending reservation (only Spec.Host set) blocks capacity on desired host", reservations: []client.Object{ - // Pending reservation: Spec.Host is set, but ObservedHost is empty + // Pending reservation: Spec.TargetHost is set, but Status.Host is empty newReservation("pending-res", "host1", "", "project-X", "m1.large", "8", "16Gi", v1alpha1.ReservationTypeCommittedResource, nil), }, request: newRequest("project-A", "instance-123", "m1.small", 4, 8000, false, "host1", "host2", "host3", "host4"), @@ -305,7 +305,7 @@ func TestFilterHasEnoughCapacity_ReservationTypes(t *testing.T) { filteredHosts: []string{"host1", "host4"}, }, { - name: "Reservation with different Spec.Host and ObservedHost blocks BOTH hosts", + name: "Reservation with different Spec.TargetHost and Status.Host blocks BOTH hosts", reservations: []client.Object{ // Reservation was requested for host1 but placed on host2 - blocks BOTH // host1: 8 CPU free - 4 CPU reserved = 4 CPU free (still has capacity for 4 CPU request) @@ -323,7 +323,7 @@ func TestFilterHasEnoughCapacity_ReservationTypes(t *testing.T) { // Pending reservation blocks host1 (via Spec.Host only) // host1: 8 CPU free - 8 CPU reserved = 0 CPU free (blocked) newReservation("pending-res", "host1", "", "project-X", "m1.large", "8", "16Gi", v1alpha1.ReservationTypeCommittedResource, nil), - // Placed reservation blocks host2 AND host3 (via both Spec.Host and ObservedHost) + // Placed reservation blocks host2 AND host3 (via both Spec.TargetHost and Status.Host) // host2: 4 CPU free - 4 CPU reserved = 0 CPU free (blocked) // host3: 16 CPU free - 4 CPU reserved = 12 CPU free (still has capacity) newReservation("placed-res", "host2", "host3", "project-X", "m1.large", "4", "8Gi", v1alpha1.ReservationTypeCommittedResource, nil), @@ -334,7 +334,7 @@ func TestFilterHasEnoughCapacity_ReservationTypes(t *testing.T) { filteredHosts: []string{"host1", "host2", "host4"}, }, { - name: "Reservation with no host (neither Spec.Host nor ObservedHost) is skipped", + name: "Reservation with no host (neither Spec.TargetHost nor Status.Host) is skipped", reservations: []client.Object{ newReservation("no-host-res", "", "", "project-X", "m1.large", "8", "16Gi", v1alpha1.ReservationTypeCommittedResource, nil), }, diff --git a/internal/scheduling/reservations/controller/controller.go b/internal/scheduling/reservations/controller/controller.go index 62055ed18..4eae4cfc2 100644 --- a/internal/scheduling/reservations/controller/controller.go +++ b/internal/scheduling/reservations/controller/controller.go @@ -72,11 +72,11 @@ func (r *ReservationReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil // Don't need to requeue. } - // Sync Spec values to Status.Observed* fields + // Sync Spec values to Status fields // This ensures the observed state reflects the desired state from Spec needsStatusUpdate := false - if res.Spec.TargetHost != "" && res.Status.ObservedHost != res.Spec.TargetHost { - res.Status.ObservedHost = res.Spec.TargetHost + if res.Spec.TargetHost != "" && res.Status.Host != res.Spec.TargetHost { + res.Status.Host = res.Spec.TargetHost needsStatusUpdate = true } if needsStatusUpdate { @@ -86,7 +86,7 @@ func (r *ReservationReconciler) Reconcile(ctx context.Context, req ctrl.Request) log.Error(err, "failed to sync spec to status") return ctrl.Result{}, err } - log.Info("synced spec to status", "reservation", req.Name, "host", res.Status.ObservedHost) + log.Info("synced spec to status", "reservation", req.Name, "host", res.Status.Host) } // Currently we can only reconcile nova CommittedResourceReservations (those with ResourceName set). @@ -223,7 +223,7 @@ func (r *ReservationReconciler) Reconcile(ctx context.Context, req ctrl.Request) Reason: "ReservationActive", Message: "reservation is successfully scheduled", }) - res.Status.ObservedHost = host + res.Status.Host = host patch := client.MergeFrom(old) if err := r.Status().Patch(ctx, &res, patch); err != nil { log.Error(err, "failed to patch reservation status") diff --git a/internal/scheduling/reservations/controller/controller_test.go b/internal/scheduling/reservations/controller/controller_test.go index d8588e887..d716c0b63 100644 --- a/internal/scheduling/reservations/controller/controller_test.go +++ b/internal/scheduling/reservations/controller/controller_test.go @@ -249,7 +249,7 @@ func TestReservationReconciler_reconcileInstanceReservation_Success(t *testing.T t.Errorf("Expected Ready=True, got %v", updated.Status.Conditions) } - if updated.Status.ObservedHost != "test-host-1" { - t.Errorf("Expected host %v, got %v", "test-host-1", updated.Status.ObservedHost) + if updated.Status.Host != "test-host-1" { + t.Errorf("Expected host %v, got %v", "test-host-1", updated.Status.Host) } } diff --git a/internal/scheduling/reservations/controller/monitor.go b/internal/scheduling/reservations/controller/monitor.go index e1409b3ed..3e6c6dae6 100644 --- a/internal/scheduling/reservations/controller/monitor.go +++ b/internal/scheduling/reservations/controller/monitor.go @@ -84,7 +84,7 @@ func (m *Monitor) Collect(ch chan<- prometheus.Metric) { resourcesByLabels := map[string]map[string]uint64{} for _, reservation := range reservations.Items { - host := reservation.Status.ObservedHost + host := reservation.Status.Host readyCondition := meta.FindStatusCondition(reservation.Status.Conditions, v1alpha1.ReservationConditionReady) readyStatus := "Unknown" errorMsg := "" diff --git a/internal/scheduling/reservations/controller/monitor_test.go b/internal/scheduling/reservations/controller/monitor_test.go index e58fc22f9..fef88e35e 100644 --- a/internal/scheduling/reservations/controller/monitor_test.go +++ b/internal/scheduling/reservations/controller/monitor_test.go @@ -111,7 +111,7 @@ func TestMonitor_Collect_WithReservations(t *testing.T) { Reason: "ReservationActive", }, }, - ObservedHost: "test-host-1", + Host: "test-host-1", }, }, {