From 0d4c47cbf8e3acc02c57e0b240cac0384fc7ae6c Mon Sep 17 00:00:00 2001 From: Antti Kervinen Date: Fri, 15 May 2026 10:05:05 +0300 Subject: [PATCH 1/6] balloons: add cpuClasses to config Signed-off-by: Antti Kervinen --- .../bases/config.nri_balloonspolicies.yaml | 70 ++++++ .../crds/config.nri_balloonspolicies.yaml | 70 ++++++ pkg/apis/config/v1alpha1/balloons-policy.go | 39 +++- .../v1alpha1/resmgr/policy/balloons/config.go | 7 + .../policy/balloons/zz_generated.deepcopy.go | 11 + .../config/v1alpha1/resmgr/policy/cpuclass.go | 59 +++++ .../v1alpha1/resmgr/policy/frequency.go | 205 ++++++++++++++++++ .../resmgr/policy/zz_generated.deepcopy.go | 20 ++ 8 files changed, 480 insertions(+), 1 deletion(-) create mode 100644 pkg/apis/config/v1alpha1/resmgr/policy/cpuclass.go create mode 100644 pkg/apis/config/v1alpha1/resmgr/policy/frequency.go diff --git a/config/crd/bases/config.nri_balloonspolicies.yaml b/config/crd/bases/config.nri_balloonspolicies.yaml index 321f3f417..a93bb7d0d 100644 --- a/config/crd/bases/config.nri_balloonspolicies.yaml +++ b/config/crd/bases/config.nri_balloonspolicies.yaml @@ -733,6 +733,76 @@ spec: type: boolean type: object type: object + cpuClasses: + description: |- + CPUClasses define CPU frequency, C-state, and turbo + attributes for CPU classes referenced by balloon types. + Exclusive turbo frequency access is controlled via + turboPriority. + items: + description: |- + CPUClass specifies CPU frequency, C-state, and turbo attributes + for a CPU class. + properties: + disabledCstates: + description: |- + DisabledCstates lists C-states disabled for CPUs in this class. + Example: ["C4", "C6", "C8", "C10"] + items: + type: string + type: array + energyPerformancePreference: + description: EnergyPerformancePreference for CPUs in this class. + minimum: 0 + type: integer + freqGovernor: + description: |- + FreqGovernor is the CPUFreq governor for this class + (e.g., "performance", "powersave", "schedutil"). + type: string + maxFreq: + description: |- + MaxFreq is the maximum CPU frequency for this class. + Same format and symbolic names as MinFreq. + type: string + minFreq: + description: |- + MinFreq is the minimum CPU frequency for this class. + Accepts values with units: "3.2GHz", "2900MHz", "2900000kHz", + or a plain number in kHz. Also accepts symbolic names: "min" + (platform minimum), "base" (CPU base frequency), "turbo" + (maximum turbo frequency), resolved at runtime from sysfs. + When turboPriority is set, "turbo" resolves to actual turbo + only for the highest-priority active class; others get base. + type: string + name: + description: Name of the CPU class. + type: string + turboPriority: + description: |- + TurboPriority controls exclusive turbo frequency access. + Among CPU classes with active balloons, only the class with + the highest turboPriority gets the symbolic frequency "turbo" + resolved to the actual turbo frequency. All other classes get + "turbo" resolved to the base frequency instead. + If all classes have turboPriority 0 (default), every class + gets actual turbo frequencies -- no competition occurs. + minimum: 0 + type: integer + uncoreMaxFreq: + description: |- + UncoreMaxFreq is the maximum uncore frequency for this class. + Accepts values with units like MinFreq. + type: string + uncoreMinFreq: + description: |- + UncoreMinFreq is the minimum uncore frequency for this class. + Accepts values with units like MinFreq. + type: string + required: + - name + type: object + type: array idleCPUClass: description: |- IdleCpuClass controls how unusded CPUs outside any a diff --git a/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml b/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml index 321f3f417..a93bb7d0d 100644 --- a/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml +++ b/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml @@ -733,6 +733,76 @@ spec: type: boolean type: object type: object + cpuClasses: + description: |- + CPUClasses define CPU frequency, C-state, and turbo + attributes for CPU classes referenced by balloon types. + Exclusive turbo frequency access is controlled via + turboPriority. + items: + description: |- + CPUClass specifies CPU frequency, C-state, and turbo attributes + for a CPU class. + properties: + disabledCstates: + description: |- + DisabledCstates lists C-states disabled for CPUs in this class. + Example: ["C4", "C6", "C8", "C10"] + items: + type: string + type: array + energyPerformancePreference: + description: EnergyPerformancePreference for CPUs in this class. + minimum: 0 + type: integer + freqGovernor: + description: |- + FreqGovernor is the CPUFreq governor for this class + (e.g., "performance", "powersave", "schedutil"). + type: string + maxFreq: + description: |- + MaxFreq is the maximum CPU frequency for this class. + Same format and symbolic names as MinFreq. + type: string + minFreq: + description: |- + MinFreq is the minimum CPU frequency for this class. + Accepts values with units: "3.2GHz", "2900MHz", "2900000kHz", + or a plain number in kHz. Also accepts symbolic names: "min" + (platform minimum), "base" (CPU base frequency), "turbo" + (maximum turbo frequency), resolved at runtime from sysfs. + When turboPriority is set, "turbo" resolves to actual turbo + only for the highest-priority active class; others get base. + type: string + name: + description: Name of the CPU class. + type: string + turboPriority: + description: |- + TurboPriority controls exclusive turbo frequency access. + Among CPU classes with active balloons, only the class with + the highest turboPriority gets the symbolic frequency "turbo" + resolved to the actual turbo frequency. All other classes get + "turbo" resolved to the base frequency instead. + If all classes have turboPriority 0 (default), every class + gets actual turbo frequencies -- no competition occurs. + minimum: 0 + type: integer + uncoreMaxFreq: + description: |- + UncoreMaxFreq is the maximum uncore frequency for this class. + Accepts values with units like MinFreq. + type: string + uncoreMinFreq: + description: |- + UncoreMinFreq is the minimum uncore frequency for this class. + Accepts values with units like MinFreq. + type: string + required: + - name + type: object + type: array idleCPUClass: description: |- IdleCpuClass controls how unusded CPUs outside any a diff --git a/pkg/apis/config/v1alpha1/balloons-policy.go b/pkg/apis/config/v1alpha1/balloons-policy.go index 259e1afaa..20eea30d5 100644 --- a/pkg/apis/config/v1alpha1/balloons-policy.go +++ b/pkg/apis/config/v1alpha1/balloons-policy.go @@ -14,6 +14,11 @@ package v1alpha1 +import ( + cpucfg "github.com/containers/nri-plugins/pkg/apis/config/v1alpha1/resmgr/control/cpu" + policyapi "github.com/containers/nri-plugins/pkg/apis/config/v1alpha1/resmgr/policy" +) + var ( _ ResmgrConfig = &BalloonsPolicy{} ) @@ -32,13 +37,45 @@ func (c *BalloonsPolicy) CommonConfig() *CommonConfig { if c == nil { return nil } + ctrl := c.Spec.Control + // Inject user-friendly cpuClasses into control.cpu.classes so + // the CPU controller sees them at startup. CPUClasses entries + // take precedence over identically-named control.cpu.classes. + // Symbolic frequencies (min, base, turbo) are passed as 0 here; + // the balloons policy resolves them at runtime using sysfs data. + if len(c.Spec.CPUClasses) > 0 { + if ctrl.CPU.Classes == nil { + ctrl.CPU.Classes = make(map[string]cpucfg.Class) + } + for _, cc := range c.Spec.CPUClasses { + ctrl.CPU.Classes[cc.Name] = cpucfg.Class{ + MinFreq: freqKHzOrZero(cc.MinFreq), + MaxFreq: freqKHzOrZero(cc.MaxFreq), + EnergyPerformancePreference: cc.EnergyPerformancePreference, + UncoreMinFreq: freqKHzOrZero(cc.UncoreMinFreq), + UncoreMaxFreq: freqKHzOrZero(cc.UncoreMaxFreq), + FreqGovernor: cc.FreqGovernor, + DisabledCstates: cc.DisabledCstates, + } + } + } return &CommonConfig{ - Control: c.Spec.Control, + Control: ctrl, Log: c.Spec.Log, Instrumentation: c.Spec.Instrumentation, } } +// freqKHzOrZero returns the kHz value of a frequency, or 0 if it is +// symbolic (min/base/turbo). Symbolic frequencies are resolved later +// by the policy using actual platform sysfs data. +func freqKHzOrZero(f policyapi.Frequency) uint { + if f.IsSymbolic() { + return 0 + } + return f.KHz() +} + func (c *BalloonsPolicy) PolicyConfig() interface{} { if c == nil { return nil diff --git a/pkg/apis/config/v1alpha1/resmgr/policy/balloons/config.go b/pkg/apis/config/v1alpha1/resmgr/policy/balloons/config.go index 496f851a6..5105b8792 100644 --- a/pkg/apis/config/v1alpha1/resmgr/policy/balloons/config.go +++ b/pkg/apis/config/v1alpha1/resmgr/policy/balloons/config.go @@ -32,6 +32,8 @@ type ( CPUTopologyLevel = policy.CPUTopologyLevel ComponentCreationStrategy = policy.ComponentCreationStrategy SchedulingClass = policy.SchedulingClass + CPUClass = policy.CPUClass + Frequency = policy.Frequency ) const ( @@ -135,6 +137,11 @@ type Config struct { // SchedulingClasses specify scheduling classes available in // balloon types. SchedulingClasses []*SchedulingClass `json:"schedulingClasses,omitempty"` + // CPUClasses define CPU frequency, C-state, and turbo + // attributes for CPU classes referenced by balloon types. + // Exclusive turbo frequency access is controlled via + // turboPriority. + CPUClasses []*CPUClass `json:"cpuClasses,omitempty"` } // BalloonDef contains a balloon definition. diff --git a/pkg/apis/config/v1alpha1/resmgr/policy/balloons/zz_generated.deepcopy.go b/pkg/apis/config/v1alpha1/resmgr/policy/balloons/zz_generated.deepcopy.go index 74276ce1d..e4b9ceae1 100644 --- a/pkg/apis/config/v1alpha1/resmgr/policy/balloons/zz_generated.deepcopy.go +++ b/pkg/apis/config/v1alpha1/resmgr/policy/balloons/zz_generated.deepcopy.go @@ -185,6 +185,17 @@ func (in *Config) DeepCopyInto(out *Config) { } } } + if in.CPUClasses != nil { + in, out := &in.CPUClasses, &out.CPUClasses + *out = make([]*CPUClass, len(*in)) + for i := range *in { + if (*in)[i] != nil { + in, out := &(*in)[i], &(*out)[i] + *out = new(CPUClass) + (*in).DeepCopyInto(*out) + } + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Config. diff --git a/pkg/apis/config/v1alpha1/resmgr/policy/cpuclass.go b/pkg/apis/config/v1alpha1/resmgr/policy/cpuclass.go new file mode 100644 index 000000000..0dd6f00e5 --- /dev/null +++ b/pkg/apis/config/v1alpha1/resmgr/policy/cpuclass.go @@ -0,0 +1,59 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package policy + +// CPUClass specifies CPU frequency, C-state, and turbo attributes +// for a CPU class. +// +k8s:deepcopy-gen=true +type CPUClass struct { + // Name of the CPU class. + // +kubebuilder:validation:Required + Name string `json:"name"` + // MinFreq is the minimum CPU frequency for this class. + // Accepts values with units: "3.2GHz", "2900MHz", "2900000kHz", + // or a plain number in kHz. Also accepts symbolic names: "min" + // (platform minimum), "base" (CPU base frequency), "turbo" + // (maximum turbo frequency), resolved at runtime from sysfs. + // When turboPriority is set, "turbo" resolves to actual turbo + // only for the highest-priority active class; others get base. + MinFreq Frequency `json:"minFreq,omitempty"` + // MaxFreq is the maximum CPU frequency for this class. + // Same format and symbolic names as MinFreq. + MaxFreq Frequency `json:"maxFreq,omitempty"` + // EnergyPerformancePreference for CPUs in this class. + // +kubebuilder:validation:Minimum=0 + EnergyPerformancePreference uint `json:"energyPerformancePreference,omitempty"` + // UncoreMinFreq is the minimum uncore frequency for this class. + // Accepts values with units like MinFreq. + UncoreMinFreq Frequency `json:"uncoreMinFreq,omitempty"` + // UncoreMaxFreq is the maximum uncore frequency for this class. + // Accepts values with units like MinFreq. + UncoreMaxFreq Frequency `json:"uncoreMaxFreq,omitempty"` + // FreqGovernor is the CPUFreq governor for this class + // (e.g., "performance", "powersave", "schedutil"). + FreqGovernor string `json:"freqGovernor,omitempty"` + // DisabledCstates lists C-states disabled for CPUs in this class. + // Example: ["C4", "C6", "C8", "C10"] + DisabledCstates []string `json:"disabledCstates,omitempty"` + // TurboPriority controls exclusive turbo frequency access. + // Among CPU classes with active balloons, only the class with + // the highest turboPriority gets the symbolic frequency "turbo" + // resolved to the actual turbo frequency. All other classes get + // "turbo" resolved to the base frequency instead. + // If all classes have turboPriority 0 (default), every class + // gets actual turbo frequencies -- no competition occurs. + // +kubebuilder:validation:Minimum=0 + TurboPriority int `json:"turboPriority,omitempty"` +} diff --git a/pkg/apis/config/v1alpha1/resmgr/policy/frequency.go b/pkg/apis/config/v1alpha1/resmgr/policy/frequency.go new file mode 100644 index 000000000..0095117ec --- /dev/null +++ b/pkg/apis/config/v1alpha1/resmgr/policy/frequency.go @@ -0,0 +1,205 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package policy + +import ( + "encoding/json" + "fmt" + "math" + "regexp" + "strconv" + "strings" +) + +// Frequency represents a CPU frequency value that can be specified +// with human-readable units in YAML/JSON configuration. Supported +// formats: +// - "3.2G" or "3.2GHz" = 3200000 (kHz) +// - "2900M" or "2900MHz" = 2900000 (kHz) +// - "2900000k" or "2900000kHz" = 2900000 (kHz) +// - "2900000" (bare number) = 2900000 (kHz, backwards compatible) +// - 2900000 (JSON number) = 2900000 (kHz, backwards compatible) +// - "min" = platform minimum frequency (resolved at runtime) +// - "base" = CPU base frequency (resolved at runtime) +// - "turbo" = maximum turbo frequency (resolved at runtime) +// +// The internal representation is always in kHz (the unit used by Linux +// kernel sysfs cpufreq interface). Symbolic values ("min", "base", +// "turbo") are stored as sentinel constants and must be resolved with +// Resolve() before being passed to the CPU controller. +// +kubebuilder:validation:Type=string +type Frequency uint + +const ( + // FrequencyMin is a sentinel indicating the platform minimum frequency. + FrequencyMin Frequency = math.MaxUint - 2 + // FrequencyBase is a sentinel indicating the CPU base frequency. + FrequencyBase Frequency = math.MaxUint - 1 + // FrequencyTurbo is a sentinel indicating the maximum turbo frequency. + FrequencyTurbo Frequency = math.MaxUint +) + +var frequencyRegexp = regexp.MustCompile(`(?i)^\s*([0-9]*\.?[0-9]+)\s*(GHz|G|MHz|M|kHz|k)?\s*$`) + +// parseFrequency parses a frequency string into kHz. +func parseFrequency(s string) (Frequency, error) { + s = strings.TrimSpace(s) + if s == "" { + return 0, nil + } + + // Check for symbolic frequency names. + switch strings.ToLower(s) { + case "min": + return FrequencyMin, nil + case "base": + return FrequencyBase, nil + case "turbo": + return FrequencyTurbo, nil + } + + matches := frequencyRegexp.FindStringSubmatch(s) + if matches == nil { + return 0, fmt.Errorf("invalid frequency %q: expected number with optional unit (GHz, MHz, kHz) or symbolic name (min, base, turbo)", s) + } + + numStr := matches[1] + unit := strings.ToLower(matches[2]) + + val, err := strconv.ParseFloat(numStr, 64) + if err != nil { + return 0, fmt.Errorf("invalid frequency %q: %w", s, err) + } + if val < 0 { + return 0, fmt.Errorf("invalid frequency %q: negative value", s) + } + + var kHz float64 + switch unit { + case "ghz", "g": + kHz = val * 1_000_000 + case "mhz", "m": + kHz = val * 1_000 + case "khz", "k": + kHz = val + case "": + // Bare number: interpret as kHz for backwards compatibility + // with the existing uint config fields. + kHz = val + } + + result := uint(math.Round(kHz)) + if result == 0 && val > 0 { + return 0, fmt.Errorf("invalid frequency %q: value too small to represent in kHz", s) + } + + return Frequency(result), nil +} + +// UnmarshalJSON implements json.Unmarshaler. Accepts both JSON strings +// with units (e.g., "3.2GHz") and plain JSON numbers (interpreted as kHz). +func (f *Frequency) UnmarshalJSON(data []byte) error { + // Try string first (quoted value with optional unit). + var s string + if err := json.Unmarshal(data, &s); err == nil { + parsed, err := parseFrequency(s) + if err != nil { + return err + } + *f = parsed + return nil + } + + // Try plain number (backwards compatible with uint kHz). + var n float64 + if err := json.Unmarshal(data, &n); err == nil { + if n < 0 { + return fmt.Errorf("invalid frequency: negative value %v", n) + } + *f = Frequency(uint(math.Round(n))) + return nil + } + + return fmt.Errorf("invalid frequency: expected string or number, got %s", string(data)) +} + +// MarshalJSON implements json.Marshaler. Symbolic frequencies are +// marshaled as their string name; numeric values as plain numbers (kHz) +// for backwards compatibility. +func (f Frequency) MarshalJSON() ([]byte, error) { + switch f { + case FrequencyMin: + return json.Marshal("min") + case FrequencyBase: + return json.Marshal("base") + case FrequencyTurbo: + return json.Marshal("turbo") + } + return json.Marshal(uint(f)) +} + +// KHz returns the frequency value in kHz. For symbolic frequencies +// (min, base, turbo) this returns the sentinel value; use Resolve() +// first to obtain the actual platform frequency. +func (f Frequency) KHz() uint { + return uint(f) +} + +// IsSymbolic returns true if this frequency is a symbolic name +// (min, base, or turbo) that requires runtime resolution. +func (f Frequency) IsSymbolic() bool { + return f == FrequencyMin || f == FrequencyBase || f == FrequencyTurbo +} + +// Resolve converts a symbolic frequency to its concrete kHz value +// using platform frequency information. For non-symbolic frequencies, +// the value is returned unchanged. The parameters are: +// - minKHz: platform minimum frequency (cpufreq/cpuinfo_min_freq) +// - baseKHz: CPU base frequency (cpufreq/base_frequency) +// - turboKHz: maximum turbo frequency (cpufreq/cpuinfo_max_freq) +func (f Frequency) Resolve(minKHz, baseKHz, turboKHz uint) uint { + switch f { + case FrequencyMin: + return minKHz + case FrequencyBase: + return baseKHz + case FrequencyTurbo: + return turboKHz + } + return uint(f) +} + +// String returns a human-readable representation. +func (f Frequency) String() string { + switch f { + case FrequencyMin: + return "min" + case FrequencyBase: + return "base" + case FrequencyTurbo: + return "turbo" + } + kHz := uint(f) + if kHz == 0 { + return "0" + } + if kHz >= 1_000_000 && kHz%1_000_000 == 0 { + return fmt.Sprintf("%dGHz", kHz/1_000_000) + } + if kHz >= 1_000 && kHz%1_000 == 0 { + return fmt.Sprintf("%dMHz", kHz/1_000) + } + return fmt.Sprintf("%dkHz", kHz) +} diff --git a/pkg/apis/config/v1alpha1/resmgr/policy/zz_generated.deepcopy.go b/pkg/apis/config/v1alpha1/resmgr/policy/zz_generated.deepcopy.go index 3bef85a34..9720ac1a7 100644 --- a/pkg/apis/config/v1alpha1/resmgr/policy/zz_generated.deepcopy.go +++ b/pkg/apis/config/v1alpha1/resmgr/policy/zz_generated.deepcopy.go @@ -20,6 +20,26 @@ package policy import () +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CPUClass) DeepCopyInto(out *CPUClass) { + *out = *in + if in.DisabledCstates != nil { + in, out := &in.DisabledCstates, &out.DisabledCstates + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CPUClass. +func (in *CPUClass) DeepCopy() *CPUClass { + if in == nil { + return nil + } + out := new(CPUClass) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SchedulingClass) DeepCopyInto(out *SchedulingClass) { *out = *in From 0e1b941088798df6f60c66c0c4ef55ddedb09353 Mon Sep 17 00:00:00 2001 From: Antti Kervinen Date: Fri, 15 May 2026 10:05:05 +0300 Subject: [PATCH 2/6] control/cpu: cache last-written cpufreq values and allow runtime CPU class definitions Signed-off-by: Antti Kervinen --- pkg/resmgr/control/control.go | 27 +++ pkg/resmgr/control/cpu/api.go | 40 ++-- pkg/resmgr/control/cpu/cache.go | 5 +- pkg/resmgr/control/cpu/cpu.go | 245 +++++++++++++++++++++--- pkg/resmgr/control/e2e-test/e2e-test.go | 5 + pkg/resmgr/nri.go | 9 + pkg/resmgr/resource-manager.go | 4 + 7 files changed, 290 insertions(+), 45 deletions(-) diff --git a/pkg/resmgr/control/control.go b/pkg/resmgr/control/control.go index 96d958c4c..be66edc1c 100644 --- a/pkg/resmgr/control/control.go +++ b/pkg/resmgr/control/control.go @@ -40,6 +40,11 @@ type Control interface { RunPostUpdateHooks(cache.Container) error // RunPostStopHooks runs the post-stop hooks of all registered controllers. RunPostStopHooks(cache.Container) error + // RunCommit invokes Commit on every running controller. It is meant + // to be called once per NRI request, after all per-container hooks + // have run, so that controllers can flush any deferred state + // changes (e.g. sysfs writes) in a single batch. + RunCommit() error } // Controller is the interface all resource controllers must implement. @@ -58,6 +63,11 @@ type Controller interface { PostUpdateHook(cache.Container) error // PostStopHook is the controller's post-stop hook. PostStopHook(cache.Container) error + // Commit applies any deferred state changes the controller has + // accumulated since the previous Commit. It is called once per + // NRI request, after all per-container hooks have run. Controllers + // with no deferred state should return nil. + Commit() error } // control encapsulates our controller-agnostic runtime state. @@ -191,6 +201,23 @@ func (c *control) RunPostStopHooks(container cache.Container) error { return nil } +// RunCommit invokes Commit() on every running controller, collecting +// all errors. Controllers are expected to defer sysfs (or other +// expensive) state changes accumulated during a single NRI request +// and apply them here in one batch. +func (c *control) RunCommit() error { + var errs []error + for _, controller := range c.controllers { + if !controller.running { + continue + } + if err := controller.c.Commit(); err != nil { + errs = append(errs, controlError("%s commit failed: %v", controller.name, err)) + } + } + return errors.Join(errs...) +} + // runhook executes the given container hook according to the controller settings func (c *control) runhook(controller *controller, hook string, container cache.Container) error { if !controller.running { diff --git a/pkg/resmgr/control/cpu/api.go b/pkg/resmgr/control/cpu/api.go index 33bd02025..fb8be183b 100644 --- a/pkg/resmgr/control/cpu/api.go +++ b/pkg/resmgr/control/cpu/api.go @@ -24,10 +24,31 @@ func GetClasses() map[string]Class { return getCPUController().getClasses() } +// SetClass adds or updates a CPU class definition. This allows +// policies to dynamically define CPU classes (e.g., from +// user-friendly CPUClasses configuration) without requiring them +// to be present in the static control.cpu.classes config. +// +// The change is purely in-memory: any CPUs currently assigned to the +// updated class are marked dirty so the next Commit() will re-enforce +// the new definition on them in a single batch. +func SetClass(name string, class Class) { + ctl := getCPUController() + if ctl.classes == nil { + ctl.classes = make(map[string]Class) + } + ctl.classes[name] = class + ctl.markClassDirty(name) +} + // Assign assigns a set of cpus to a class. // -// TODO: Drop this function. Don't store cpu class in policy data but implement -// controller-specific data store in cache. +// The assignment is recorded in the cache (so it survives across +// restarts) and the affected CPUs are marked dirty. No sysfs writes +// happen here; the CPU controller's Commit() (invoked once per NRI +// request after all per-container hooks have run) coalesces all +// pending changes into the minimal set of writes needed to reach the +// final desired state. func Assign(c cache.Cache, class string, cpus ...int) error { // NOTE: no locking implemented anywhere around -> we don't expect multiple parallel callers @@ -54,20 +75,7 @@ func Assign(c cache.Cache, class string, cpus ...int) error { setClassAssignments(c, &assignments) - if getCPUController().started { - // We don't want to try to enforce until the controller has been fully - // started. Enforcement of all assignments happens on StarT(), anyway. - ctl := getCPUController() - if err := ctl.enforceCpufreq(class, cpus...); err != nil { - log.Errorf("cpufreq enforcement failed: %v", err) - } - if err := ctl.enforceCstates(class, cpus...); err != nil { - log.Errorf("cstate enforcement failed: %v", err) - } - if err := ctl.enforceUncore(assignments, cpus...); err != nil { - log.Errorf("uncore frequency enforcement failed: %v", err) - } - } + getCPUController().markCPUsDirty(cpus...) return nil } diff --git a/pkg/resmgr/control/cpu/cache.go b/pkg/resmgr/control/cpu/cache.go index 0279dbd2d..3affed367 100644 --- a/pkg/resmgr/control/cpu/cache.go +++ b/pkg/resmgr/control/cpu/cache.go @@ -32,7 +32,10 @@ func getClassAssignments(c cache.Cache) *cpuClassAssignments { a := &cpuClassAssignments{} if !c.GetPolicyEntry(cacheKeyCPUAssignments, a) { - log.Errorf("no cached state of CPU class assignments found") + // Expected on a fresh policy startup: the cache has no + // prior CPU class assignments. Callers create entries + // via Assign() and persist them via setClassAssignments. + log.Debugf("no cached state of CPU class assignments found") } return a diff --git a/pkg/resmgr/control/cpu/cpu.go b/pkg/resmgr/control/cpu/cpu.go index 90159a5fa..05ab0e678 100644 --- a/pkg/resmgr/control/cpu/cpu.go +++ b/pkg/resmgr/control/cpu/cpu.go @@ -45,6 +45,25 @@ type cpuctl struct { cstates *cstates.Cstates // C-states handler uncoreEnabled bool // whether we need to care about uncore started bool + lastFreq map[int]cpufreqState // cpu id -> last successfully written cpufreq values + // dirtyCPUs accumulates CPUs whose desired class definition or + // class assignment has changed since the last Commit(). Writes to + // sysfs are deferred until Commit() so that intermediate bursts + // of Assign()/SetClass() calls within a single policy decision + // do not produce sequences of redundant or temporarily-wrong + // sysfs writes. + dirtyCPUs map[int]bool +} + +// cpufreqState tracks the last successfully written cpufreq values +// for a single CPU. Used to skip redundant sysfs writes. +type cpufreqState struct { + min uint + max uint + governor string + hasMin bool + hasMax bool + hasGov bool } type Class = cfgcpu.Class @@ -90,8 +109,6 @@ func (ctl *cpuctl) Start(cache cache.Cache, cfg *cfgapi.Config) (bool, error) { log.Errorf("failed apply /cpuinitial configuration: %v", err) } - ctl.started = true - return true, nil } @@ -124,35 +141,176 @@ func (ctl *cpuctl) PostStopHook(c cache.Container) error { return nil } -// enforceCpufreq enforces a class-specific cpufreq configuration to a cpuset +// markCPUsDirty records the given CPUs as needing a sysfs re-check at +// the next Commit(). +func (ctl *cpuctl) markCPUsDirty(cpus ...int) { + if ctl.dirtyCPUs == nil { + ctl.dirtyCPUs = make(map[int]bool) + } + for _, c := range cpus { + ctl.dirtyCPUs[c] = true + } +} + +// markClassDirty records every CPU currently assigned to the given +// class as dirty. Used when SetClass changes a class definition that +// already has CPUs assigned to it. +func (ctl *cpuctl) markClassDirty(class string) { + if ctl.cache == nil { + return + } + assignments := *getClassAssignments(ctl.cache) + cpus, ok := assignments[class] + if !ok { + return + } + if ctl.dirtyCPUs == nil { + ctl.dirtyCPUs = make(map[int]bool, len(cpus)) + } + for id := range cpus { + ctl.dirtyCPUs[int(id)] = true + } +} + +// Commit flushes deferred per-CPU sysfs updates accumulated since the +// previous Commit. It is the choke point that converts the desired +// state (class definitions in ctl.classes + cached class assignments) +// into the minimal set of sysfs writes needed to reach it. Per-CPU +// writes are still deduplicated against ctl.lastFreq, so even if a +// CPU is marked dirty by multiple intermediate Assign/SetClass calls +// the final value is written at most once. +func (ctl *cpuctl) Commit() error { + if !ctl.started || len(ctl.dirtyCPUs) == 0 { + return nil + } + + assignments := *getClassAssignments(ctl.cache) + + // Group dirty CPUs by their currently assigned class. CPUs that + // no longer appear in any class assignment are skipped: there is + // no class definition to enforce on them. + cpuClass := make(map[int]string, len(ctl.dirtyCPUs)) + for class, cpus := range assignments { + for id := range cpus { + if ctl.dirtyCPUs[int(id)] { + cpuClass[int(id)] = class + } + } + } + + byClass := make(map[string][]int, len(ctl.classes)) + for cpu, class := range cpuClass { + byClass[class] = append(byClass[class], cpu) + } + + var firstErr error + for class, cpus := range byClass { + if _, ok := ctl.classes[class]; !ok { + log.Warnf("commit: class %q (cpus %v) missing from configuration", class, cpus) + continue + } + if err := ctl.enforceCpufreq(class, cpus...); err != nil { + log.Errorf("commit: cpufreq enforcement failed for class %q: %v", class, err) + if firstErr == nil { + firstErr = err + } + } + if err := ctl.enforceCstates(class, cpus...); err != nil { + log.Errorf("commit: cstate enforcement failed for class %q: %v", class, err) + if firstErr == nil { + firstErr = err + } + } + } + + // Uncore is per-die; recompute over all dirty CPUs in one pass. + affectedCPUs := make([]int, 0, len(ctl.dirtyCPUs)) + for cpu := range ctl.dirtyCPUs { + affectedCPUs = append(affectedCPUs, cpu) + } + if err := ctl.enforceUncore(assignments, affectedCPUs...); err != nil { + log.Errorf("commit: uncore enforcement failed: %v", err) + if firstErr == nil { + firstErr = err + } + } + + // Clear the dirty set unconditionally. enforceCpufreq has its own + // per-property lastFreq update logic that avoids re-trying writes + // that keep failing for unchanged desired values. + ctl.dirtyCPUs = nil + + return firstErr +} + +// enforceCpufreq enforces a class-specific cpufreq configuration to a cpuset. +// Per-CPU sysfs writes are skipped when the desired value matches the +// last successfully written value (tracked in ctl.lastFreq). A write +// failure on one CPU/property is logged but does not stop processing +// of remaining CPUs/properties. The first error encountered is +// returned to the caller. func (ctl *cpuctl) enforceCpufreq(class string, cpus ...int) error { c, ok := ctl.classes[class] if !ok { return fmt.Errorf("non-existent cpu class %q", class) } + if ctl.lastFreq == nil { + ctl.lastFreq = make(map[int]cpufreqState) + } - if min := int(c.MinFreq); min > 0 { - log.Debugf("enforcing cpu frequency min %d from class %q on %v", min, class, cpus) - if err := utils.SetCPUsScalingMinFreq(cpus, min); err != nil { - return fmt.Errorf("cannot set min freq %d: %w", min, err) + min := uint(c.MinFreq) + max := uint(c.MaxFreq) + governor := c.FreqGovernor + + var firstErr error + for _, cpu := range cpus { + state := ctl.lastFreq[cpu] + + if min > 0 && (!state.hasMin || state.min != min) { + log.Debugf("enforcing cpu frequency min %d from class %q on cpu %d", min, class, cpu) + if err := utils.SetCPUScalingMinFreq(utils.ID(cpu), int(min)); err != nil { + log.Errorf("cannot set min freq %d on cpu %d: %v", min, cpu, err) + if firstErr == nil { + firstErr = err + } + } + // Update the cache even on failure: the desired value + // is unchanged so retrying on every Assign would just + // spam logs without ever succeeding. A subsequent + // configure() resets lastFreq so a real configuration + // change still triggers a fresh attempt. + state.min = min + state.hasMin = true } - } - if max := int(c.MaxFreq); max > 0 { - log.Debugf("enforcing cpu frequency max %d from class %q on %v", max, class, cpus) - if err := utils.SetCPUsScalingMaxFreq(cpus, max); err != nil { - return fmt.Errorf("cannot set max freq %d: %w", max, err) + if max > 0 && (!state.hasMax || state.max != max) { + log.Debugf("enforcing cpu frequency max %d from class %q on cpu %d", max, class, cpu) + if err := utils.SetCPUScalingMaxFreq(utils.ID(cpu), int(max)); err != nil { + log.Errorf("cannot set max freq %d on cpu %d: %v", max, cpu, err) + if firstErr == nil { + firstErr = err + } + } + state.max = max + state.hasMax = true } - } - if governor := c.FreqGovernor; governor != "" { - log.Debugf("enforcing cpu frequency governor %q from class %q on %v", governor, class, cpus) - if err := utils.SetScalingGovernorForCPUs(cpus, governor); err != nil { - return fmt.Errorf("cannot set cpufreq governor %q: %w", governor, err) + if governor != "" && (!state.hasGov || state.governor != governor) { + log.Debugf("enforcing cpu frequency governor %q from class %q on cpu %d", governor, class, cpu) + if err := utils.SetCPUScalingGovernor(utils.ID(cpu), governor); err != nil { + log.Errorf("cannot set cpufreq governor %q on cpu %d: %v", governor, cpu, err) + if firstErr == nil { + firstErr = err + } + } + state.governor = governor + state.hasGov = true } + + ctl.lastFreq[cpu] = state } - return nil + return firstErr } // enforceCstates enforces a class-specific C-state configuration to a cpuset @@ -276,11 +434,37 @@ func idSetIntersects(a, b utils.IDSet) bool { } func (ctl *cpuctl) configure(cfg *cfgapi.Config) error { + // Preserve any class definitions that were pushed via SetClass + // before the controller started. The balloons policy uses + // SetClass to publish CPU class definitions with proper kHz + // values resolved from symbolic frequencies (min/base/turbo). + // CommonConfig() also injects placeholder entries (kHz=0) into + // cfg.CPU.Classes so that controller startup sanity checks see + // the class names. Merge them: cfg-provided classes seed the + // map, then any SetClass-pushed values take precedence. + preserved := ctl.classes ctl.classes = nil ctl.uncoreEnabled = false + // Reset per-CPU last-written cache: a config change may + // alter min/max for the same class, so the next enforce + // pass must actually write to sysfs. + ctl.lastFreq = nil + // Reset the dirty set; we'll re-populate it below with every + // CPU currently assigned to a known class so that the Commit() + // at the end of configure() re-enforces the full desired state. + ctl.dirtyCPUs = nil if cfg != nil && len(cfg.CPU.Classes) != 0 { - ctl.classes = cfg.CPU.Classes + ctl.classes = make(map[string]Class, len(cfg.CPU.Classes)) + for name, c := range cfg.CPU.Classes { + ctl.classes[name] = c + } + } + for name, c := range preserved { + if ctl.classes == nil { + ctl.classes = make(map[string]Class) + } + ctl.classes[name] = c } // Re-configure CPUs that are assigned to some known class @@ -321,15 +505,15 @@ func (ctl *cpuctl) configure(cfg *cfgapi.Config) error { } } - // Configure the system + // Mark every CPU assigned to a known class as dirty so the + // Commit() below re-enforces all per-CPU values in one batch. + // Classes that have disappeared from the configuration are + // preserved in the cache, but their CPUs are not re-enforced + // (see the warning below). for class, cpus := range assignments { if _, ok := ctl.classes[class]; ok { - // Re-configure cpus (sysfs) according to new class parameters - if err := ctl.enforceCpufreq(class, cpus.SortedMembers()...); err != nil { - log.Errorf("cpufreq enforcement on re-configure failed: %v", err) - } - if err := ctl.enforceCstates(class, cpus.SortedMembers()...); err != nil { - log.Errorf("cpufreq enforcement on re-configure failed: %v", err) + for id := range cpus { + ctl.markCPUsDirty(int(id)) } } else { // TODO: what should we really do with classes that do not exist in @@ -339,8 +523,13 @@ func (ctl *cpuctl) configure(cfg *cfgapi.Config) error { log.Warnf("class %q with cpus %v missing from the configuration", class, cpus) } } - if err := ctl.enforceUncore(assignments); err != nil { - log.Errorf("uncore frequency enforcement on re-configure failed: %v", err) + + // Set started=true before the in-line Commit() call below: Commit + // gates on this flag (so a stray pre-Start call is a no-op), and + // configure() is invoked from Start() once classes are ready. + ctl.started = true + if err := ctl.Commit(); err != nil { + log.Errorf("initial commit failed: %v", err) } log.Debugf("cpu controller configured") diff --git a/pkg/resmgr/control/e2e-test/e2e-test.go b/pkg/resmgr/control/e2e-test/e2e-test.go index de9076c14..efd209c04 100644 --- a/pkg/resmgr/control/e2e-test/e2e-test.go +++ b/pkg/resmgr/control/e2e-test/e2e-test.go @@ -122,6 +122,11 @@ func (ctl *testctl) PostStopHook(c cache.Container) error { return nil } +// Commit is a no-op for the e2e test controller. +func (ctl *testctl) Commit() error { + return nil +} + // dumpE2ETestControllerState prints internal info used by e2e testing script. func (ctl *testctl) dumpE2ETestControllerState(w http.ResponseWriter, req *http.Request) { log.Debugf("output E2E test controller state...") diff --git a/pkg/resmgr/nri.go b/pkg/resmgr/nri.go index 0678fe0bf..3d93cb2bb 100644 --- a/pkg/resmgr/nri.go +++ b/pkg/resmgr/nri.go @@ -1074,6 +1074,9 @@ func (p *nriPlugin) runPostAllocateHooks(method string, created cache.Container) c.PrettyName(), c.GetState()) } } + if err := m.control.RunCommit(); err != nil { + nri.Warnf("%s: controller commit failed: %v", method, err) + } return nil } @@ -1083,6 +1086,9 @@ func (p *nriPlugin) runPostStartHooks(method string, c cache.Container) error { if err := m.control.RunPostStartHooks(c); err != nil { nri.Errorf("%s: post-start hook failed for %s: %v", method, c.PrettyName(), err) } + if err := m.control.RunCommit(); err != nil { + nri.Warnf("%s: controller commit failed: %v", method, err) + } return nil } @@ -1109,5 +1115,8 @@ func (p *nriPlugin) runPostReleaseHooks(method string, released ...cache.Contain method, c.PrettyName(), c.GetState()) } } + if err := m.control.RunCommit(); err != nil { + nri.Warnf("%s: controller commit failed: %v", method, err) + } return nil } diff --git a/pkg/resmgr/resource-manager.go b/pkg/resmgr/resource-manager.go index 28e5b9434..6537e9849 100644 --- a/pkg/resmgr/resource-manager.go +++ b/pkg/resmgr/resource-manager.go @@ -331,6 +331,10 @@ func (m *resmgr) reconfigure(cfg cfgapi.ResmgrConfig) error { log.Warnf("failed to apply configuration to containers: %v", err) } + if err := m.control.RunCommit(); err != nil { + log.Warnf("failed to commit controller state after reconfigure: %v", err) + } + return nil } From 4b461d86a86c6a54779ad7b65b6694761f362060 Mon Sep 17 00:00:00 2001 From: Antti Kervinen Date: Fri, 15 May 2026 10:05:05 +0300 Subject: [PATCH 3/6] sysfs: add OVERRIDE_SYS_CPUFREQ for faking CPU frequency ranges Signed-off-by: Antti Kervinen --- pkg/sysfs/system.go | 60 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 3 deletions(-) diff --git a/pkg/sysfs/system.go b/pkg/sysfs/system.go index 394b39d55..16db6ea29 100644 --- a/pkg/sysfs/system.go +++ b/pkg/sysfs/system.go @@ -292,8 +292,10 @@ var ( PerformanceCore: "OVERRIDE_SYS_CORE_CPUS", EfficientCore: "OVERRIDE_SYS_ATOM_CPUS", } - cacheEnvOverridesVar = "OVERRIDE_SYS_CACHES" - cacheEnvOverridesJson = os.Getenv(cacheEnvOverridesVar) + cacheEnvOverridesVar = "OVERRIDE_SYS_CACHES" + cacheEnvOverridesJson = os.Getenv(cacheEnvOverridesVar) + cpufreqEnvOverridesVar = "OVERRIDE_SYS_CPUFREQ" + cpufreqEnvOverridesJson = os.Getenv(cpufreqEnvOverridesVar) ) // MemInfo contains data read from a NUMA node meminfo file. @@ -338,6 +340,16 @@ type cacheOverride struct { var cacheEnvOverrides map[int][]*Cache +// cpufreqOverride specifies frequency values to use instead of reading sysfs. +type cpufreqOverride struct { + Cpus string `json:"cpus"` // CPU ids in list format, e.g. "0-15" + Base uint64 `json:"base"` // base frequency (kHz) + Min uint64 `json:"min"` // minimum frequency (kHz) + Max uint64 `json:"max"` // maximum/turbo frequency (kHz) +} + +var cpufreqEnvOverrides map[int]CPUFreq + // SetSysRoot sets the sys root directory. func SetSysRoot(root string) { if root != "" { @@ -1063,6 +1075,10 @@ func (sys *system) discoverCPU(path string) error { if _, err := readSysfsEntry(path, "cpufreq/cpuinfo_max_freq", &cpu.freq.Max); err != nil { cpu.freq.Max = 0 } + // Apply cpufreq overrides from OVERRIDE_SYS_CPUFREQ if set. + if err := sys.applyCpufreqOverrides(cpu); err != nil { + log.Warnf("failed to apply cpufreq overrides for cpu%d: %v", cpu.id, err) + } if _, err := readSysfsEntry(path, "cpufreq/energy_performance_preference", &cpu.epp); err != nil { cpu.epp = EPPUnknown } @@ -2082,7 +2098,45 @@ func (sys *system) discoverCacheFromOverrides(cpu *cpu) (bool, error) { return false, nil } -// Discover cache associated with the given CPU. +// applyCpufreqOverrides overrides CPU frequency values from OVERRIDE_SYS_CPUFREQ. +func (sys *system) applyCpufreqOverrides(cpu *cpu) error { + if cpufreqEnvOverridesJson == "" { + return nil + } + if cpufreqEnvOverrides == nil { + sys.Debugf("parsing cpufreq overrides from %s=%q", cpufreqEnvOverridesVar, cpufreqEnvOverridesJson) + overrides, err := parseCpufreqOverrides(cpufreqEnvOverridesJson) + if err != nil { + return fmt.Errorf("failed to parse %s: %v", cpufreqEnvOverridesVar, err) + } + cpufreqEnvOverrides = overrides + } + if freq, ok := cpufreqEnvOverrides[cpu.id]; ok { + sys.Debugf("cpufreq override for cpu%d: base=%d min=%d max=%d", cpu.id, freq.Base, freq.Min, freq.Max) + cpu.freq = freq + } + return nil +} + +// parseCpufreqOverrides parses JSON cpufreq overrides into a per-CPU map. +func parseCpufreqOverrides(jsonData string) (map[int]CPUFreq, error) { + var overrides []cpufreqOverride + if err := json.Unmarshal([]byte(jsonData), &overrides); err != nil { + return nil, err + } + result := make(map[int]CPUFreq) + for _, o := range overrides { + cpus, err := idset.NewIDSetFromString(o.Cpus) + if err != nil { + return nil, fmt.Errorf("invalid CPU list %q: %v", o.Cpus, err) + } + freq := CPUFreq{Base: o.Base, Min: o.Min, Max: o.Max} + for cpu := range cpus { + result[cpu] = freq + } + } + return result, nil +} func (sys *system) discoverCache(cpu *cpu, path string) error { var id idset.ID From edb0d16d31fc0fb9fb08d554a2711fcc4dec4d5c Mon Sep 17 00:00:00 2001 From: Antti Kervinen Date: Fri, 15 May 2026 10:05:05 +0300 Subject: [PATCH 4/6] balloons: implement cpuClasses with turbo priority Introduce CPUClassTurboAllocator that owns CPU-class state and all cpucontrol.Assign / cpucontrol.SetClass calls, keeping CPU-class concerns out of the rest of the policy. This change introduces very simple allocator that is unaware of zones (sockets, dies) or CPU core counts affected by turbo on different platforms. Smarter allocator is future work. Signed-off-by: Antti Kervinen --- .../balloons/policy/balloons-policy.go | 151 ++++-- cmd/plugins/balloons/policy/cpuclass.go | 448 ++++++++++++++++++ cmd/plugins/balloons/policy/flags.go | 2 + 3 files changed, 549 insertions(+), 52 deletions(-) create mode 100644 cmd/plugins/balloons/policy/cpuclass.go diff --git a/cmd/plugins/balloons/policy/balloons-policy.go b/cmd/plugins/balloons/policy/balloons-policy.go index a2f31323f..af9dfbd11 100644 --- a/cmd/plugins/balloons/policy/balloons-policy.go +++ b/cmd/plugins/balloons/policy/balloons-policy.go @@ -89,9 +89,10 @@ type balloons struct { meters *Meters // balloon metrics meterLock sync.RWMutex // protects metrics collection against allocation - cpuAllocator cpuallocator.CPUAllocator // CPU allocator used by the policy - memAllocator *libmem.Allocator // memory allocator used by the policy - loadVirtDev map[string]*loadClassVirtDev // map LoadClasses to virtual devices + cpuAllocator cpuallocator.CPUAllocator // CPU allocator used by the policy + memAllocator *libmem.Allocator // memory allocator used by the policy + turboAllocator *CPUClassTurboAllocator // turbo budget allocator based on CPUClasses + loadVirtDev map[string]*loadClassVirtDev // map LoadClasses to virtual devices } // Balloon contains attributes of a balloon instance @@ -791,80 +792,62 @@ func largest(sliceLen int, valueOf func(i int) int) ([]int, int) { // resetCpuClass resets CPU configurations globally. All balloons can // be ignored, their CPU configurations will be applied later. func (p *balloons) resetCpuClass() error { - // Usual inputs: - // - p.allowed (cpuset.CPUset): all CPUs available for this - // policy. - // - p.IdleCpuClass (string): CPU class for allowed CPUs. - // - // Other inputs, if needed: - // - p.reserved (cpuset.CPUset): CPUs of ReservedResources - // (typically for kube-system containers). - // - // Note: p.useCpuClass(balloon) will be called before assigning - // containers on the balloon, including the reserved balloon. - // - // TODO: don't depend on cpu controller directly - if err := cpucontrol.Assign(p.cch, p.bpoptions.IdleCpuClass, p.allowed.UnsortedList()...); err != nil { + // p.useCpuClass(balloon) will be called later for every balloon, + // including the reserved balloon, to set the per-balloon CPU + // class. Here we only assign the idle class to all allowed CPUs. + if p.turboAllocator == nil { + return nil + } + idle := p.turboAllocator.ResolveClassName(p.bpoptions.IdleCpuClass) + if err := p.turboAllocator.ResetIdle(p.allowed); err != nil { log.Warnf("failed to reset class of available cpus: %v", err) } else { - log.Debugf("reset class of available cpus: %q (reserved: %q)", p.allowed, p.reserved) + log.Debugf("reset class of available cpus: %q to idle class %q (reserved: %q)", + p.allowed, idle, p.reserved) } return nil } -// useCpuClass configures CPUs of a balloon. +// useCpuClass configures CPUs of a balloon by delegating to the +// turbo-aware CPU class allocator. func (p *balloons) useCpuClass(bln *Balloon) error { - // Usual inputs: - // - CPUs that cpuallocator has reserved for this balloon: - // bln.Cpus (cpuset.CPUSet). - // - User-defined CPU configuration for CPUs of balloon of this type: - // bln.Def.CpuClass (string). - // - Current configuration(?): feel free to add data - // structure for this. For instance policy-global p.cpuConfs, - // or balloon-local bln.cpuConfs. - // - // Other input examples, if needed: - // - Requested CPU resources by all containers in the balloon: - // p.requestedMilliCpus(bln). - // - Free CPU resources in the balloon: p.freeMilliCpus(bln). - // - Number of assigned containers: bln.ContainerCount(). - // - Container details: access p.cch with bln.ContainerIDs(). - // - User-defined CPU AllocatorPriority: bln.Def.AllocatorPriority. - // - All existing balloon instances: p.balloons. - // - CPU configurations by user: bln.Def.CpuClass (for bln in p.balloons) if len(bln.components) > 0 { - // If this is a composite balloon, CPU class is - // defined in the component balloons. - log.Debugf("apply CPU class %q on CPUs %s of composite balloon %q", - bln.Def.CpuClass, bln.Cpus, bln.PrettyName()) + // Composite balloon: each component carries its own CpuClass. + log.Debugf("apply CPU classes of components of composite balloon %q on CPUs %s", + bln.PrettyName(), bln.Cpus) for _, compBln := range bln.components { if err := p.useCpuClass(compBln); err != nil { log.Warnf("failed to apply CPU class %q on CPUs %s of %q in composite balloon %q: %v", compBln.Def.CpuClass, compBln.Cpus, compBln.PrettyName(), bln.PrettyName(), err) } - } return nil } - if err := cpucontrol.Assign(p.cch, bln.Def.CpuClass, bln.Cpus.UnsortedList()...); err != nil { - log.Warnf("failed to apply class %q on CPUs %q: %v", bln.Def.CpuClass, bln.Cpus, err) - } else { - log.Debugf("apply CPU class %q on CPUs %q of %q", bln.Def.CpuClass, bln.Cpus, bln.PrettyName()) + if p.turboAllocator == nil { + return nil + } + className := p.turboAllocator.ResolveClassName(bln.Def.CpuClass) + log.Debugf("apply CPU class %q on CPUs %q of %q", className, bln.Cpus, bln.PrettyName()) + if err := p.turboAllocator.UseClass(bln.Def.CpuClass, bln.Cpus); err != nil { + log.Warnf("failed to apply class %q on CPUs %q: %v", className, bln.Cpus, err) } return nil } // forgetCpuClass is called when CPUs of a balloon are released from duty. func (p *balloons) forgetCpuClass(bln *Balloon) { - // Use p.IdleCpuClass for bln.Cpus. - // Usual inputs: see useCpuClass - if err := cpucontrol.Assign(p.cch, p.bpoptions.IdleCpuClass, bln.Cpus.UnsortedList()...); err != nil { - log.Warnf("failed to forget class %q of cpus %q: %v", bln.Def.CpuClass, bln.Cpus, err) + if p.turboAllocator == nil { + return + } + idle := p.turboAllocator.ResolveClassName(p.bpoptions.IdleCpuClass) + if err := p.turboAllocator.ForgetClass(bln.Cpus); err != nil { + log.Warnf("failed to forget class of cpus %q (idle class %q): %v", bln.Cpus, idle, err) } else { if len(bln.components) > 0 { - log.Debugf("forget classes of composite balloon %q cpus %q", bln.Def.Name, bln.Cpus) + log.Debugf("forget classes of composite balloon %q cpus %q (idle class %q)", + bln.Def.Name, bln.Cpus, idle) } else { - log.Debugf("forget class %q of cpus %q", bln.Def.CpuClass, bln.Cpus) + log.Debugf("forget class of cpus %q (idle class %q)", bln.Cpus, idle) } } } @@ -1432,6 +1415,13 @@ func changesCpuClasses(opts0, opts1 *BalloonsOptions) bool { return true } } + // Detect changes in CPUClasses definitions (turbo attributes, frequencies, etc.) + if len(opts0.CPUClasses) != len(opts1.CPUClasses) { + return true + } + if utils.DumpJSON(opts0.CPUClasses) != utils.DumpJSON(opts1.CPUClasses) { + return true + } return false } @@ -1454,6 +1444,14 @@ func (p *balloons) Reconfigure(newCfg interface{}) error { log.Infof("no configuration changes") } else { log.Infof("configuration changes only on CPU classes") + // Update CPUClasses definitions. + p.bpoptions.CPUClasses = newBalloonsOptions.CPUClasses + p.bpoptions.IdleCpuClass = newBalloonsOptions.IdleCpuClass + if p.turboAllocator != nil { + if err := p.turboAllocator.Reconfigure(p.bpoptions.CPUClasses, p.bpoptions.IdleCpuClass); err != nil { + log.Warnf("failed to reconfigure CPU class allocator: %v", err) + } + } // Update new CPU classes to existing balloon // definitions. The same BalloonDef instances // must be kept in use, because each Balloon @@ -1600,6 +1598,31 @@ func (p *balloons) validateConfig(bpoptions *BalloonsOptions) error { if len(undefinedSchedulingClasses) > 0 { return balloonsError("schedulingClass(es) defined in balloonTypes but missing from schedulingClasses: %v", undefinedSchedulingClasses) } + // Validate CPUClasses. + cpuClassNames := map[string]struct{}{} + for _, cc := range bpoptions.CPUClasses { + if cc.Name == "" { + return balloonsError("missing or empty name in a cpuClasses entry") + } + if _, dup := cpuClassNames[cc.Name]; dup { + return balloonsError("duplicate cpuClasses name: %q", cc.Name) + } + cpuClassNames[cc.Name] = struct{}{} + } + // Verify that cpuClass references in balloon types are defined + // in either cpuClasses or existing control.cpu.classes. + existingControlClasses := cpucontrol.GetClasses() + for _, blnDef := range bpoptions.BalloonDefs { + if blnDef.CpuClass == "" { + continue + } + _, inCPUClasses := cpuClassNames[blnDef.CpuClass] + _, inControlClasses := existingControlClasses[blnDef.CpuClass] + if !inCPUClasses && !inControlClasses { + log.Warnf("cpuClass %q referenced by balloon type %q is not defined in cpuClasses or control.cpu.classes", + blnDef.CpuClass, blnDef.Name) + } + } var circularCheck func(name string, seen map[string]int) error circularCheck = func(name string, seen map[string]int) error { if seen[name] > 0 { @@ -1671,6 +1694,30 @@ func (p *balloons) setConfig(bpoptions *BalloonsOptions) error { setOmittedDefaults(bpoptions) + // Set bpoptions early so the turbo allocator construction below + // has access to CPUClasses. + p.bpoptions = bpoptions + + // Construct or reconfigure the turbo-aware CPU class allocator. + // All cpucontrol.SetClass / cpucontrol.Assign calls flow through + // it. + if p.turboAllocator == nil { + ta, err := NewCPUClassTurboAllocator( + WithSystem(p.options.System), + WithCache(p.cch), + WithCPUClasses(bpoptions.CPUClasses), + WithIdleClass(bpoptions.IdleCpuClass), + ) + if err != nil { + return balloonsError("failed to create CPU class turbo allocator: %w", err) + } + p.turboAllocator = ta + } else { + if err := p.turboAllocator.Reconfigure(bpoptions.CPUClasses, bpoptions.IdleCpuClass); err != nil { + return balloonsError("failed to reconfigure CPU class turbo allocator: %w", err) + } + } + reservedBalloonDef, defaultBalloonDef, err := p.fillBuiltinBalloonDefs(bpoptions) if err != nil { return err diff --git a/cmd/plugins/balloons/policy/cpuclass.go b/cmd/plugins/balloons/policy/cpuclass.go new file mode 100644 index 000000000..60cd3cc04 --- /dev/null +++ b/cmd/plugins/balloons/policy/cpuclass.go @@ -0,0 +1,448 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package balloons + +import ( + "fmt" + + cpucfg "github.com/containers/nri-plugins/pkg/apis/config/v1alpha1/resmgr/control/cpu" + "github.com/containers/nri-plugins/pkg/resmgr/cache" + cpucontrol "github.com/containers/nri-plugins/pkg/resmgr/control/cpu" + "github.com/containers/nri-plugins/pkg/sysfs" + "github.com/containers/nri-plugins/pkg/utils/cpuset" +) + +// CPUClassTurboAllocator owns all CPU-class lifecycle concerns for the +// balloons policy: resolution of symbolic frequencies (min/base/turbo), +// turbo-priority winner selection, and the actual cpucontrol.SetClass / +// cpucontrol.Assign calls that follow from those decisions. +// +// The allocator keeps the policy code free of any direct CPU controller +// access. Balloons code only needs to call UseClass/ForgetClass for the +// CPU sets it manages; the allocator takes care of pushing class +// definitions to the CPU controller and re-assigning CPUs of classes +// whose effective turbo frequency changes when the active winner +// changes. +type CPUClassTurboAllocator struct { + sys sysfs.System + cch cache.Cache + classes []*CPUClass + classByName map[string]*CPUClass + idleClassName string + turboInfo *platformTurboInfo + + // activeCpus tracks the set of CPUs currently assigned to each + // CPU class (by the latest UseClass/ForgetClass calls). It is the + // allocator's local model of "which classes are active". The + // recalculation of the turbo winner consults this map without + // reaching back into balloons or the CPU controller. + activeCpus map[string]cpuset.CPUSet + + // winnerPrio is the highest TurboPriority among CPU classes that + // had any active CPUs the last time recalculateTurbo() ran. + // Initialized to -1 to force the first recalculation. + winnerPrio int +} + +// TurboOption is a functional option for NewCPUClassTurboAllocator. +type TurboOption func(*CPUClassTurboAllocator) error + +// WithSystem provides the sysfs system topology for symbolic frequency +// resolution. +func WithSystem(sys sysfs.System) TurboOption { + return func(a *CPUClassTurboAllocator) error { + a.sys = sys + return nil + } +} + +// WithCache provides the resource manager cache for cpucontrol.Assign. +func WithCache(cch cache.Cache) TurboOption { + return func(a *CPUClassTurboAllocator) error { + a.cch = cch + return nil + } +} + +// WithCPUClasses provides the user-facing CPUClass definitions. +func WithCPUClasses(classes []*CPUClass) TurboOption { + return func(a *CPUClassTurboAllocator) error { + a.classes = classes + a.classByName = make(map[string]*CPUClass, len(classes)) + for _, cc := range classes { + a.classByName[cc.Name] = cc + } + return nil + } +} + +// WithIdleClass provides the name of the idle CPU class used by +// ForgetClass and ResetIdle. +func WithIdleClass(name string) TurboOption { + return func(a *CPUClassTurboAllocator) error { + a.idleClassName = name + return nil + } +} + +// NewCPUClassTurboAllocator creates a turbo allocator and applies the +// given options. The constructor pushes initial CPU class definitions +// (with symbolic frequencies resolved against sysfs, when possible) +// into the CPU controller via cpucontrol.SetClass, so subsequent +// cpucontrol.Assign calls see the correct effective frequencies. +func NewCPUClassTurboAllocator(opts ...TurboOption) (*CPUClassTurboAllocator, error) { + a := &CPUClassTurboAllocator{ + activeCpus: map[string]cpuset.CPUSet{}, + winnerPrio: -1, + } + for _, opt := range opts { + if err := opt(a); err != nil { + return nil, err + } + } + if a.sys == nil { + return nil, fmt.Errorf("CPUClassTurboAllocator: missing required option WithSystem") + } + if a.cch == nil { + return nil, fmt.Errorf("CPUClassTurboAllocator: missing required option WithCache") + } + a.discoverPlatformInfo() + a.pushInitialClassDefinitions() + return a, nil +} + +// Reconfigure replaces the CPU class set and idle class name. Resets +// the turbo winner so the next UseClass/ForgetClass call recomputes +// the effective frequencies, and re-pushes class definitions to the +// CPU controller. +func (a *CPUClassTurboAllocator) Reconfigure(classes []*CPUClass, idleClass string) error { + a.classes = classes + a.classByName = make(map[string]*CPUClass, len(classes)) + for _, cc := range classes { + a.classByName[cc.Name] = cc + } + a.idleClassName = idleClass + a.winnerPrio = -1 + a.pushInitialClassDefinitions() + return nil +} + +// Classes returns the current user-facing CPUClass set. +func (a *CPUClassTurboAllocator) Classes() []*CPUClass { + return a.classes +} + +// ClassByName looks up a CPUClass by name. +func (a *CPUClassTurboAllocator) ClassByName(name string) *CPUClass { + return a.classByName[name] +} + +// defaultClassName is the name of the CPU class used as a fallback +// when a balloon type does not specify cpuClass or when idleCpuClass +// is left empty. +const defaultClassName = "default" + +// isKnownClass reports whether the given class name is known to either +// the user-facing CPUClasses configuration of this allocator or to the +// CPU controller (via cpucontrol.GetClasses, which contains classes +// defined in the legacy control.cpu.classes section as well as every +// class pushed via cpucontrol.SetClass). The two sources can differ: +// classByName carries the turbo metadata needed by recalculateTurbo, +// while cpucontrol's class map is what actually drives sysfs writes, +// so a class defined only via control.cpu.classes is unknown to +// classByName but known to cpucontrol. +func (a *CPUClassTurboAllocator) isKnownClass(name string) bool { + if _, ok := a.classByName[name]; ok { + return true + } + if _, ok := cpucontrol.GetClasses()[name]; ok { + return true + } + return false +} + +// ResolveClassName resolves a (possibly empty or unknown) configured +// CPU class name to the class that should actually be applied. If the +// configured name matches a class known to either cpuClasses or +// control.cpu.classes it is returned unchanged. Otherwise, if a class +// named "default" is known to either source, "default" is returned. +// As a last resort the original name is returned, so the caller's +// existing log/warning paths still see what was requested. +func (a *CPUClassTurboAllocator) ResolveClassName(name string) string { + if a.isKnownClass(name) { + return name + } + if a.isKnownClass(defaultClassName) { + if name != "" { + log.Errorf("unknown CPU class %q: falling back to using %q", name, defaultClassName) + } + return defaultClassName + } + log.Errorf("unknown CPU class %q and fallback class %q missing from cpuClasses", name, defaultClassName) + return name +} + +// UseClass marks the given CPUs as active under className, recalculates +// the turbo winner, then assigns the CPUs to className via the CPU +// controller. The recalculation runs first so that the controller's +// in-memory class definition reflects the correct effective turbo +// frequency at the time of Assign. An empty or unknown className +// resolves to the "default" CPU class when one is configured. +func (a *CPUClassTurboAllocator) UseClass(className string, cpus cpuset.CPUSet) error { + if cpus.IsEmpty() { + return nil + } + className = a.ResolveClassName(className) + a.removeCpusFromAllClasses(cpus) + if className != "" { + a.activeCpus[className] = a.activeCpus[className].Union(cpus) + } + a.recalculateTurbo() + if err := cpucontrol.Assign(a.cch, className, cpus.UnsortedList()...); err != nil { + return fmt.Errorf("failed to assign CPUs %s to class %q: %w", cpus, className, err) + } + return nil +} + +// ForgetClass removes the given CPUs from any active class set, +// assigns them to the idle class via the CPU controller, then +// recalculates the turbo winner (the previously dominant class may +// have lost its last active balloon). An empty or unknown idle class +// name resolves to the "default" CPU class when one is configured. +func (a *CPUClassTurboAllocator) ForgetClass(cpus cpuset.CPUSet) error { + if cpus.IsEmpty() { + return nil + } + idle := a.ResolveClassName(a.idleClassName) + a.removeCpusFromAllClasses(cpus) + if err := cpucontrol.Assign(a.cch, idle, cpus.UnsortedList()...); err != nil { + return fmt.Errorf("failed to assign CPUs %s to idle class %q: %w", cpus, idle, err) + } + a.recalculateTurbo() + return nil +} + +// ResetIdle assigns the given CPU set to the idle class via the CPU +// controller. Used at policy startup to bring all allowed CPUs to a +// known baseline before any container-driven UseClass call. Does not +// affect the active-class tracking. An empty or unknown idle class +// name resolves to the "default" CPU class when one is configured. +func (a *CPUClassTurboAllocator) ResetIdle(cpus cpuset.CPUSet) error { + if cpus.IsEmpty() { + return nil + } + idle := a.ResolveClassName(a.idleClassName) + if err := cpucontrol.Assign(a.cch, idle, cpus.UnsortedList()...); err != nil { + return fmt.Errorf("failed to assign CPUs %s to idle class %q: %w", cpus, idle, err) + } + return nil +} + +// removeCpusFromAllClasses removes the given CPUs from every active +// class set. Empty class sets are deleted. +func (a *CPUClassTurboAllocator) removeCpusFromAllClasses(cpus cpuset.CPUSet) { + for name, set := range a.activeCpus { + newSet := set.Difference(cpus) + if newSet.IsEmpty() { + delete(a.activeCpus, name) + } else { + a.activeCpus[name] = newSet + } + } +} + +// discoverPlatformInfo reads platform turbo capabilities from sysfs. +// Failure is non-fatal; symbolic frequencies will resolve to 0 in +// that case (matching the behavior of the pre-allocator code path). +func (a *CPUClassTurboAllocator) discoverPlatformInfo() { + info, err := discoverTurboInfo(a.sys) + if err != nil { + log.Warnf("CPUClassTurboAllocator: cannot discover platform turbo info: %v", err) + return + } + a.turboInfo = info +} + +// pushInitialClassDefinitions resolves symbolic frequencies in every +// CPUClass and pushes the resulting cpucfg.Class to the CPU controller +// via cpucontrol.SetClass. At this point no class has been declared a +// turbo winner yet, so symbolic "turbo" resolves to the platform max +// turbo frequency for every class. The first UseClass call will +// trigger recalculateTurbo() to enforce the priority-based effective +// turbo. +func (a *CPUClassTurboAllocator) pushInitialClassDefinitions() { + for _, cc := range a.classes { + var controlClass cpucfg.Class + if a.turboInfo != nil { + controlClass = resolvedCpuClassToControlClass(cc, a.turboInfo, 0) + } else { + controlClass = cpuClassToControlClass(cc) + } + cpucontrol.SetClass(cc.Name, controlClass) + log.Infof("cpuClass %q configured: minFreq=%s(%d) maxFreq=%s(%d) disabledCstates=%v", + cc.Name, cc.MinFreq, controlClass.MinFreq, cc.MaxFreq, controlClass.MaxFreq, cc.DisabledCstates) + } +} + +// recalculateTurbo resolves exclusive turbo frequency access based on +// turboPriority across all CPU classes that currently have active CPUs. +// +// Algorithm (steady-state no-op): +// 1. Find the highest turboPriority among classes with non-empty +// active CPU sets. +// 2. If the new highest priority equals the previously computed one, +// return immediately. Effective frequencies cannot have changed. +// 3. Otherwise: update CPU controller class definitions for ALL +// CPUClasses via cpucontrol.SetClass. SetClass records the new +// definition in memory and marks every CPU currently assigned to +// the affected class as dirty. The CPU controller's Commit() +// (called once per NRI request after all per-container hooks) +// then issues the minimal set of sysfs writes needed to reach +// the new desired state, deduplicated against the per-CPU +// lastFreq cache. +func (a *CPUClassTurboAllocator) recalculateTurbo() { + if len(a.classes) == 0 { + return + } + + // Find the highest turboPriority among classes with active CPUs. + newPrio := 0 + for _, cc := range a.classes { + if cc.TurboPriority <= newPrio { + continue + } + if set, ok := a.activeCpus[cc.Name]; ok && !set.IsEmpty() { + newPrio = cc.TurboPriority + } + } + + // Steady-state fast path. + if newPrio == a.winnerPrio { + return + } + + a.winnerPrio = newPrio + + if a.turboInfo == nil { + // No platform info -> we cannot compute effective turbo. + // Still update winnerPrio to avoid repeated warnings. + log.Warnf("turbo recalculation skipped: no platform turbo info available") + return + } + + // Update CPU controller class definitions for every CPUClass with + // the new effective turbo. The actual sysfs writes are deferred + // until the CPU controller's next Commit() call. + for _, cc := range a.classes { + effectiveTurboKHz := a.turboInfo.baseFreqKHz + if newPrio == 0 || cc.TurboPriority >= newPrio { + effectiveTurboKHz = a.turboInfo.maxTurboFreqKHz + } + controlClass := resolvedCpuClassToControlClass(cc, a.turboInfo, effectiveTurboKHz) + cpucontrol.SetClass(cc.Name, controlClass) + log.Infof("turbo: class %q (prio=%d, winner=%v): minFreq=%d maxFreq=%d", + cc.Name, cc.TurboPriority, + newPrio == 0 || cc.TurboPriority >= newPrio, + controlClass.MinFreq, controlClass.MaxFreq) + } +} + +// cpuClassToControlClass converts a user-friendly CPUClass definition +// into the internal cpu.Class representation used by the CPU controller. +// Symbolic frequencies (min, base, turbo) are left as 0; use +// resolvedCpuClassToControlClass() when platform info is available. +func cpuClassToControlClass(cc *CPUClass) cpucfg.Class { + resolveFreq := func(f Frequency) uint { + if f.IsSymbolic() { + return 0 + } + return f.KHz() + } + return cpucfg.Class{ + MinFreq: resolveFreq(cc.MinFreq), + MaxFreq: resolveFreq(cc.MaxFreq), + EnergyPerformancePreference: cc.EnergyPerformancePreference, + UncoreMinFreq: resolveFreq(cc.UncoreMinFreq), + UncoreMaxFreq: resolveFreq(cc.UncoreMaxFreq), + FreqGovernor: cc.FreqGovernor, + DisabledCstates: cc.DisabledCstates, + } +} + +// resolvedCpuClassToControlClass converts a CPUClass to a control +// class with symbolic frequencies resolved using platform info. +// effectiveTurboKHz overrides the turbo frequency used when resolving +// the "turbo" symbolic name (0 means use the platform turbo frequency). +func resolvedCpuClassToControlClass(cc *CPUClass, info *platformTurboInfo, effectiveTurboKHz uint) cpucfg.Class { + turboKHz := info.maxTurboFreqKHz + if effectiveTurboKHz > 0 { + turboKHz = effectiveTurboKHz + } + resolve := func(f Frequency) uint { + if info != nil { + return f.Resolve(info.minFreqKHz, info.baseFreqKHz, turboKHz) + } + if f.IsSymbolic() { + return 0 + } + return f.KHz() + } + return cpucfg.Class{ + MinFreq: resolve(cc.MinFreq), + MaxFreq: resolve(cc.MaxFreq), + EnergyPerformancePreference: cc.EnergyPerformancePreference, + UncoreMinFreq: resolve(cc.UncoreMinFreq), + UncoreMaxFreq: resolve(cc.UncoreMaxFreq), + FreqGovernor: cc.FreqGovernor, + DisabledCstates: cc.DisabledCstates, + } +} + +// platformTurboInfo holds platform-level turbo frequency capabilities +// discovered from sysfs. +type platformTurboInfo struct { + // baseFreqKHz is the base frequency shared by all CPUs (kHz). + baseFreqKHz uint + // maxTurboFreqKHz is the maximum single-core turbo frequency (kHz). + maxTurboFreqKHz uint + // minFreqKHz is the platform minimum frequency (kHz). + minFreqKHz uint +} + +// discoverTurboInfo reads platform turbo capabilities from sysfs. +// It uses the first online CPU's frequency range as representative. +func discoverTurboInfo(sys sysfs.System) (*platformTurboInfo, error) { + cpuIDs := sys.CPUIDs() + if len(cpuIDs) == 0 { + return nil, fmt.Errorf("no CPUs found in system topology") + } + for _, id := range cpuIDs { + cpu := sys.CPU(id) + if cpu == nil || !cpu.Online() { + continue + } + freq := cpu.FrequencyRange() + baseFreq := cpu.BaseFrequency() + if baseFreq == 0 || freq.Max == 0 { + continue + } + return &platformTurboInfo{ + baseFreqKHz: uint(baseFreq), + maxTurboFreqKHz: uint(freq.Max), + minFreqKHz: uint(freq.Min), + }, nil + } + return nil, fmt.Errorf("no online CPU with valid frequency information found") +} diff --git a/cmd/plugins/balloons/policy/flags.go b/cmd/plugins/balloons/policy/flags.go index 19889ea15..e39809ca3 100644 --- a/cmd/plugins/balloons/policy/flags.go +++ b/cmd/plugins/balloons/policy/flags.go @@ -24,6 +24,8 @@ type ( BalloonDef = cfgapi.BalloonDef LoadClass = cfgapi.LoadClass SchedulingClass = cfgapi.SchedulingClass + CPUClass = cfgapi.CPUClass + Frequency = cfgapi.Frequency CPUTopologyLevel = cfgapi.CPUTopologyLevel ) From bb61c21c928b6192c71549187ce1825f50cd24a2 Mon Sep 17 00:00:00 2001 From: Antti Kervinen Date: Fri, 15 May 2026 10:05:05 +0300 Subject: [PATCH 5/6] e2e: test balloons cpuClasses with turbo priority Signed-off-by: Antti Kervinen --- .../balloons/balloons-config.yaml.in | 42 +- .../balloons-cstates.cfg | 12 +- .../balloons-turbo-defaultclass.cfg | 35 ++ .../balloons-turbo-oldsyntax.cfg | 36 ++ .../test18-turbo-priority/balloons-turbo.cfg | 48 ++ .../n4c16/test18-turbo-priority/code.var.sh | 495 ++++++++++++++++++ 6 files changed, 645 insertions(+), 23 deletions(-) create mode 100644 test/e2e/policies.test-suite/balloons/n4c16/test18-turbo-priority/balloons-turbo-defaultclass.cfg create mode 100644 test/e2e/policies.test-suite/balloons/n4c16/test18-turbo-priority/balloons-turbo-oldsyntax.cfg create mode 100644 test/e2e/policies.test-suite/balloons/n4c16/test18-turbo-priority/balloons-turbo.cfg create mode 100644 test/e2e/policies.test-suite/balloons/n4c16/test18-turbo-priority/code.var.sh diff --git a/test/e2e/policies.test-suite/balloons/balloons-config.yaml.in b/test/e2e/policies.test-suite/balloons/balloons-config.yaml.in index 8ebcfaacb..aa11dcf33 100644 --- a/test/e2e/policies.test-suite/balloons/balloons-config.yaml.in +++ b/test/e2e/policies.test-suite/balloons/balloons-config.yaml.in @@ -61,19 +61,29 @@ spec: debug: - policy - control: - cpu: - classes: - default: - minFreq: ${CPU_DEFAULT_MIN:-800000} - maxFreq: ${CPU_DEFAULT_MAX:-2800000} - classA: - minFreq: ${CPU_CLASSA_MIN:-900000} - maxFreq: ${CPU_CLASSA_MAX:-2900000} - classB: - minFreq: ${CPU_CLASSB_MIN:-1000000} - maxFreq: ${CPU_CLASSB_MAX:-3000000} - classC: - minFreq: ${CPU_CLASSC_MIN:-1100000} - maxFreq: ${CPU_CLASSC_MAX:-3100000} - energyPerformancePreference: ${CPU_CLASSC_EPP:-1} + cpuClasses: + + $([ -n "$CPUCLASS_DEFAULT_SKIP" ] || echo " + - name: default + minFreq: ${CPU_DEFAULT_MIN:-800MHz} + maxFreq: ${CPU_DEFAULT_MAX:-2.8GHz} + ") + + $([ -n "$CPUCLASS_A_SKIP" ] || echo " + - name: classA + minFreq: ${CPU_CLASSA_MIN:-900MHz} + maxFreq: ${CPU_CLASSA_MAX:-2.9GHz} + ") + + $([ -n "$CPUCLASS_B_SKIP" ] || echo " + - name: classB + minFreq: ${CPU_CLASSB_MIN:-1GHz} + maxFreq: ${CPU_CLASSB_MAX:-3GHz} + ") + + $([ -n "$CPUCLASS_C_SKIP" ] || echo " + - name: classC + minFreq: ${CPU_CLASSC_MIN:-1.1GHz} + maxFreq: ${CPU_CLASSC_MAX:-3.1GHz} + energyPerformancePreference: ${CPU_CLASSC_EPP:-1} + ") diff --git a/test/e2e/policies.test-suite/balloons/n4c16/test17-cstates-scheduling/balloons-cstates.cfg b/test/e2e/policies.test-suite/balloons/n4c16/test17-cstates-scheduling/balloons-cstates.cfg index 215432bf1..a89a699d2 100644 --- a/test/e2e/policies.test-suite/balloons/n4c16/test17-cstates-scheduling/balloons-cstates.cfg +++ b/test/e2e/policies.test-suite/balloons/n4c16/test17-cstates-scheduling/balloons-cstates.cfg @@ -16,13 +16,11 @@ config: cpuClass: lowlatency-class schedulingClass: realtime - control: - cpu: - classes: - lowlatency-class: - disabledCstates: [C4, C6, C8, C10] - default-class: - disabledCstates: [] + cpuClasses: + - name: lowlatency-class + disabledCstates: [C4, C6, C8, C10] + - name: default-class + disabledCstates: [] schedulingClasses: - name: realtime diff --git a/test/e2e/policies.test-suite/balloons/n4c16/test18-turbo-priority/balloons-turbo-defaultclass.cfg b/test/e2e/policies.test-suite/balloons/n4c16/test18-turbo-priority/balloons-turbo-defaultclass.cfg new file mode 100644 index 000000000..eb2a2b9cc --- /dev/null +++ b/test/e2e/policies.test-suite/balloons/n4c16/test18-turbo-priority/balloons-turbo-defaultclass.cfg @@ -0,0 +1,35 @@ +config: + agent: + nodeResourceTopology: true + allocatorTopologyBalancing: false + availableResources: + cpu: cpuset:2-7,10-13 + reservedResources: + cpu: 750m + + pinCPU: true + + # Intentionally no idleCPUClass and no cpuClass on the reserved + # balloon type: both must fall back to the cpuClass named "default". + balloonTypes: + - name: reserved + - name: fast-bln + cpuClass: fast + minCPUs: 1 + maxCPUs: 1 + + cpuClasses: + - name: default + minFreq: "min" + maxFreq: "base" + - name: fast + minFreq: "turbo" + maxFreq: "turbo" + + log: + debug: + - policy + - nri-plugin + - cpu +extraEnv: + OVERRIDE_SYS_CPUFREQ: '''[{"cpus": "0-15", "base": 2900000, "min": 800000, "max": 3800000}]''' diff --git a/test/e2e/policies.test-suite/balloons/n4c16/test18-turbo-priority/balloons-turbo-oldsyntax.cfg b/test/e2e/policies.test-suite/balloons/n4c16/test18-turbo-priority/balloons-turbo-oldsyntax.cfg new file mode 100644 index 000000000..61f2888fe --- /dev/null +++ b/test/e2e/policies.test-suite/balloons/n4c16/test18-turbo-priority/balloons-turbo-oldsyntax.cfg @@ -0,0 +1,36 @@ +config: + agent: + nodeResourceTopology: true + allocatorTopologyBalancing: false + availableResources: + cpu: cpuset:2-7,10-13 + reservedResources: + cpu: 750m + + pinCPU: true + + idleCPUClass: legacy-idle + + balloonTypes: + - name: legacy-bln + cpuClass: legacy-fast + minCPUs: 1 + maxCPUs: 1 + + control: + cpu: + classes: + legacy-idle: + minFreq: 800000 + maxFreq: 2900000 + legacy-fast: + minFreq: 3800000 + maxFreq: 3800000 + + log: + debug: + - policy + - nri-plugin + - cpu +extraEnv: + OVERRIDE_SYS_CPUFREQ: '''[{"cpus": "0-15", "base": 2900000, "min": 800000, "max": 3800000}]''' diff --git a/test/e2e/policies.test-suite/balloons/n4c16/test18-turbo-priority/balloons-turbo.cfg b/test/e2e/policies.test-suite/balloons/n4c16/test18-turbo-priority/balloons-turbo.cfg new file mode 100644 index 000000000..e73f41a37 --- /dev/null +++ b/test/e2e/policies.test-suite/balloons/n4c16/test18-turbo-priority/balloons-turbo.cfg @@ -0,0 +1,48 @@ +config: + agent: + nodeResourceTopology: true + allocatorTopologyBalancing: false + availableResources: + cpu: cpuset:2-7,10-13 + reservedResources: + cpu: 750m + + pinCPU: true + + idleCPUClass: default-noturbo + + balloonTypes: + - name: reserved + cpuClass: default-turbo + - name: turbo-high-bln + cpuClass: turbo-high + minCPUs: 1 + maxCPUs: 2 + - name: turbo-low-bln + cpuClass: turbo-low + minCPUs: 1 + maxCPUs: 2 + + cpuClasses: + - name: turbo-high + minFreq: "turbo" + maxFreq: "turbo" + turboPriority: 10 + - name: turbo-low + minFreq: "turbo" + maxFreq: "turbo" + turboPriority: 1 + - name: default-turbo + minFreq: "min" + maxFreq: "turbo" + - name: default-noturbo + minFreq: "min" + maxFreq: "base" + + log: + debug: + - policy + - nri-plugin + - cpu +extraEnv: + OVERRIDE_SYS_CPUFREQ: '''[{"cpus": "0-15", "base": 2900000, "min": 800000, "max": 3800000}]''' diff --git a/test/e2e/policies.test-suite/balloons/n4c16/test18-turbo-priority/code.var.sh b/test/e2e/policies.test-suite/balloons/n4c16/test18-turbo-priority/code.var.sh new file mode 100644 index 000000000..c840cfba5 --- /dev/null +++ b/test/e2e/policies.test-suite/balloons/n4c16/test18-turbo-priority/code.var.sh @@ -0,0 +1,495 @@ +# Test turbo priority: highest-priority active CPU class gets turbo, +# others get base. When the highest-priority balloon is removed, +# the next highest-priority class regains turbo. +# +# Also verifies CPU frequency write minimality: +# - no duplicate sysfs writes (each (cpu, prop, freq) tuple is logged +# at most once per recorded snapshot window, thanks to the per-CPU +# last-written cache in pkg/resmgr/control/cpu), +# - writes do happen on class transitions (turbo<->base) and when +# idle CPUs need their initial class settings, +# - a no-op event (creating a 2nd container that lands in the +# *same* turbo-low balloon as pod0) does not produce any new +# enforce writes. + +helm-terminate +helm_config=$TEST_DIR/balloons-turbo.cfg helm-launch balloons + +# turbo-log fetches the latest turbo recalculation log lines +turbo-log() { + local last_n=${1:-20} + vm-command "kubectl -n kube-system logs ds/nri-resource-policy-balloons | grep -E 'turbo:|cpuClass' | tail -n $last_n" +} + +# verify-turbo-winner checks that the given class is logged as a turbo winner +# with the expected maxFreq, within the last N turbo log lines. +verify-turbo-winner() { + local class=$1 + local expected_max_freq=$2 + local last_n=${3:-20} + echo "verify turbo winner: class=$class maxFreq=$expected_max_freq" + turbo-log $last_n + grep "class \"$class\"" <<< "$COMMAND_OUTPUT" | grep "winner=true" | tail -n 1 | grep -q "maxFreq=$expected_max_freq" || { + command-error "expected class $class as turbo winner with maxFreq=$expected_max_freq" + } +} + +# verify-turbo-loser checks that the given class is logged as NOT a turbo winner +# (winner=false) with the expected maxFreq (base), within the last N turbo log lines. +verify-turbo-loser() { + local class=$1 + local expected_max_freq=$2 + local last_n=${3:-20} + echo "verify turbo loser: class=$class maxFreq=$expected_max_freq" + turbo-log $last_n + grep "class \"$class\"" <<< "$COMMAND_OUTPUT" | grep "winner=false" | tail -n 1 | grep -q "maxFreq=$expected_max_freq" || { + command-error "expected class $class as turbo loser with maxFreq=$expected_max_freq" + } +} + +ENFORCE_PATTERN='enforcing cpu frequency' + +# enforce-count returns the total number of "enforcing cpu frequency" log lines so far. +enforce-count() { + vm-command "kubectl -n kube-system logs ds/nri-resource-policy-balloons | grep -c '$ENFORCE_PATTERN' || true" >/dev/null + echo "$COMMAND_OUTPUT" | tr -d '[:space:]' +} + +# wait-enforce-grows [timeout=15] +# Polls until the cumulative number of enforce writes is greater than . +wait-enforce-grows() { + local baseline=$1 + local timeout=${2:-15} + vm-run-until --timeout "$timeout" \ + "[ \$(kubectl -n kube-system logs ds/nri-resource-policy-balloons 2>/dev/null | grep -c '$ENFORCE_PATTERN') -gt $baseline ]" || { + command-error "expected enforce-count to grow above $baseline within ${timeout}s" + } +} + +# wait-pod-gone [timeout=30] +# Polls until the named pod no longer exists. +wait-pod-gone() { + local pod=$1 + local timeout=${2:-30} + vm-run-until --timeout "$timeout" "! kubectl get pod $pod -o name 2>/dev/null | grep -q ." || { + command-error "pod $pod did not disappear within ${timeout}s" + } +} + +# enforce-lines-since prints the enforce log lines added since the given absolute count. +enforce-lines-since() { + local from=$1 + vm-command "kubectl -n kube-system logs ds/nri-resource-policy-balloons | grep '$ENFORCE_PATTERN' | tail -n +$((from+1))" >/dev/null +} + +# assert-step-writes