Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/plugins/balloons/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARG GO_VERSION=1.25

FROM golang:${GO_VERSION}-bookworm AS builder
FROM docker.io/library/golang:${GO_VERSION}-bookworm AS builder

ARG IMAGE_VERSION
ARG BUILD_VERSION
Expand Down
63 changes: 58 additions & 5 deletions cmd/plugins/balloons/policy/balloons-policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,10 @@ type balloons struct {
options *policy.BackendOptions // configuration common to all policies
bpoptions *BalloonsOptions // balloons-specific configuration
cch cache.Cache // nri-resource-policy cache
allowed cpuset.CPUSet // bounding set of CPUs we're allowed to use
reserved cpuset.CPUSet // system-/kube-reserved CPUs
freeCpus cpuset.CPUSet // CPUs to be included in growing or new ballons
allowed cpuset.CPUSet // bounding set of CPUs we're allowed to use
reserved cpuset.CPUSet // system-/kube-reserved CPUs
reservedExact bool // keep built-in reserved balloon on the exact reserved cpuset
freeCpus cpuset.CPUSet // CPUs to be included in growing or new ballons
ifreeCpus cpuset.CPUSet // initially free CPUs before assigning any containers
cpuTree *cpuTreeNode // system CPU topology

Expand Down Expand Up @@ -1102,7 +1103,28 @@ func (p *balloons) newBalloon(blnDef *BalloonDef, confCpus bool) (*Balloon, erro
}
}
}
if err := p.resizeBalloon(bln, blnDef.MinCpus*1000); err != nil {
if p.reservedExact && blnDef.Name == reservedBalloonDefName {
requested := p.reserved.Clone()
if requested.Size() == 0 {
return nil, balloonsError("reservedCPUMode=hard-exact requires non-empty reserved cpuset")
}
if requested.Difference(p.freeCpus).Size() > 0 {
return nil, balloonsError("reserved exact cpuset %q is not fully free, missing %q", requested, requested.Difference(p.freeCpus))
}
// AllocateCpus() may mutate the preferred/requested cpuset argument.
// Keep an immutable copy for post-allocation validation and logging.
exact := requested.Clone()
newCpus, err := p.cpuAllocator.AllocateCpus(&exact, requested.Size(), blnDef.AllocatorPriority.Value().Option())
if err != nil {
return nil, balloonsError("failed to allocate exact reserved cpuset %q: %w", requested, err)
}
if !newCpus.Equals(requested) {
return nil, balloonsError("failed to allocate exact reserved cpuset: requested %q, got %q", requested, newCpus)

}
p.freeCpus = p.freeCpus.Difference(newCpus)
bln.Cpus = newCpus
} else if err := p.resizeBalloon(bln, blnDef.MinCpus*1000); err != nil {
return nil, err
}
bln.Mems = p.closestMems(bln.Cpus)
Expand Down Expand Up @@ -1809,7 +1831,19 @@ func (p *balloons) fillBuiltinBalloonDefs(bpoptions *BalloonsOptions) (*BalloonD
cset, p.allowed, cset.Difference(p.allowed))
}
p.reserved = p.allowed.Intersection(cset)
if reservedBalloonDef.MinCpus == 0 {
if bpoptions.ReservedCPUMode == cfgapi.ReservedCPUModeHardExact {
p.reservedExact = true
if reservedBalloonDef.MinCpus != 0 && reservedBalloonDef.MinCpus != p.reserved.Size() {
return nil, nil, balloonsError("mismatching reserved balloon minCpus: %d and ReservedResources cpuset size: %d",
reservedBalloonDef.MinCpus, p.reserved.Size())
}
if reservedBalloonDef.MaxCpus != 0 && reservedBalloonDef.MaxCpus != p.reserved.Size() {
return nil, nil, balloonsError("mismatching reserved balloon maxCpus: %d and ReservedResources cpuset size: %d",
reservedBalloonDef.MaxCpus, p.reserved.Size())
}
reservedBalloonDef.MinCpus = p.reserved.Size()
reservedBalloonDef.MaxCpus = p.reserved.Size()
} else if reservedBalloonDef.MinCpus == 0 {
if p.reserved.Size() < reservedBalloonDef.MaxCpus {
reservedBalloonDef.MinCpus = p.reserved.Size()
} else {
Expand All @@ -1828,6 +1862,9 @@ func (p *balloons) fillBuiltinBalloonDefs(bpoptions *BalloonsOptions) (*BalloonD
// balloon type.
reservedBalloonDef.PreferCloseToDevices = append([]string{virtDevReservedCpus}, reservedBalloonDef.PreferCloseToDevices...)
case cfgapi.AmountQuantity:
if bpoptions.ReservedCPUMode == cfgapi.ReservedCPUModeHardExact {
return nil, nil, balloonsError("reservedCPUMode=hard-exact requires ReservedResources cpu in cpuset form")
}
// ReservedResources.cpus defines number of
// CPUs. Treat the value as a minimum size for the
// reserved balloon, but the balloon is allowed to
Expand Down Expand Up @@ -1996,13 +2033,29 @@ func (p *balloons) resizeBalloon(bln *Balloon, newMilliCpus int) error {
if bln.Def.MinCpus > 0 && newCpuCount < bln.Def.MinCpus {
newCpuCount = bln.Def.MinCpus
}
if p.reservedExact && bln.Def.Name == reservedBalloonDefName {
exactCount := p.reserved.Size()
if oldCpuCount == exactCount && bln.Cpus.Equals(p.reserved) {
return nil
}
newCpuCount = exactCount
}
log.Debugf("resize %s to fit %d mCPU", bln, newMilliCpus)
log.Debugf("- change size from %d to %d full cpus", oldCpuCount, newCpuCount)
log.Debugf("- free cpus: %q", p.freeCpus)
if oldCpuCount == newCpuCount {
return nil
}
cpuCountDelta := newCpuCount - oldCpuCount
if p.reservedExact && bln.Def.Name == reservedBalloonDefName {
if cpuCountDelta == 0 {
if !bln.Cpus.Equals(p.reserved) {
return balloonsError("reserved balloon exact cpuset drifted: expected %q, got %q", p.reserved, bln.Cpus)
}
return nil
}
return balloonsError("reserved balloon exact cpuset cannot be resized: expected %q, current %q", p.reserved, bln.Cpus)
}
Comment on lines +2036 to +2058
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When the reserved balloon's CPU count matches exactCount but the cpuset has drifted, it falls
through and hits the generic if oldCpuCount == newCpuCount { return nil } before drift detection is ever reached. As such, currently the drift error in is dead code, it can never be triggered. We could merge both blocks into one that always returns, so the generic early-return can't short-circuit it. Example:

diff --git a/cmd/plugins/balloons/policy/balloons-policy.go b/cmd/plugins/balloons/policy/balloons-policy.go
index 3211893f..39a73cfe 100644
--- a/cmd/plugins/balloons/policy/balloons-policy.go
+++ b/cmd/plugins/balloons/policy/balloons-policy.go
@@ -2035,10 +2035,13 @@ func (p *balloons) resizeBalloon(bln *Balloon, newMilliCpus int) error {
        }
        if p.reservedExact && bln.Def.Name == reservedBalloonDefName {
                exactCount := p.reserved.Size()
-               if oldCpuCount == exactCount && bln.Cpus.Equals(p.reserved) {
-                       return nil
+               if oldCpuCount == exactCount {
+                       if bln.Cpus.Equals(p.reserved) {
+                               return nil
+                       }
+                       return balloonsError("reserved balloon exact cpuset drifted: expected %q, got %q", p.reserved, bln.Cpus)
                }
-               newCpuCount = exactCount
+               return balloonsError("reserved balloon exact cpuset cannot be resized: expected %q, current %q", p.reserved, bln.Cpus)
        }
        log.Debugf("resize %s to fit %d mCPU", bln, newMilliCpus)
        log.Debugf("- change size from %d to %d full cpus", oldCpuCount, newCpuCount)
@@ -2047,15 +2050,6 @@ func (p *balloons) resizeBalloon(bln *Balloon, newMilliCpus int) error {
                return nil
        }
        cpuCountDelta := newCpuCount - oldCpuCount
-       if p.reservedExact && bln.Def.Name == reservedBalloonDefName {
-               if cpuCountDelta == 0 {
-                       if !bln.Cpus.Equals(p.reserved) {
-                               return balloonsError("reserved balloon exact cpuset drifted: expected %q, got %q", p.reserved, bln.Cpus)
-                       }
-                       return nil
-               }
-               return balloonsError("reserved balloon exact cpuset cannot be resized: expected %q, current %q", p.reserved, bln.Cpus)
-       }
        p.forgetCpuClass(bln)
        defer func() {
                if err := p.useCpuClass(bln); err != nil {

p.forgetCpuClass(bln)
defer func() {
if err := p.useCpuClass(bln); err != nil {
Expand Down
4 changes: 4 additions & 0 deletions cmd/plugins/balloons/policy/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ type (
LoadClass = cfgapi.LoadClass
SchedulingClass = cfgapi.SchedulingClass
CPUTopologyLevel = cfgapi.CPUTopologyLevel
ReservedCPUMode = cfgapi.ReservedCPUMode
)

var (
Expand All @@ -35,6 +36,9 @@ var (
)

const (
ReservedCPUModePreferred = cfgapi.ReservedCPUModePreferred
ReservedCPUModeHardExact = cfgapi.ReservedCPUModeHardExact
Comment on lines +39 to +40
Copy link
Copy Markdown
Collaborator

@klihub klihub Apr 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@louie-tsai Can't we achieve this without introducing explicit dedicated modes for the existing and the new behavior ? Maybe with something like "if we have the reserved CPUs defined as an explicit CPU set, and we have an explicit definition of the reserved balloon in the configuration, and that explicit definition has MaxBalloons == 1 && MinCpus == ReservedCpus.Size() && MaxCpus == MinCpus, then we treat that as an immutable/unresizable reserved balloon" ?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean, something that would make this behavior less of an exception, and instead more of just applying existing/expected behavior to the reserved balloon.


CPUTopologyLevelUndefined = cfgapi.CPUTopologyLevelUndefined
CPUTopologyLevelSystem = cfgapi.CPUTopologyLevelSystem
CPUTopologyLevelPackage = cfgapi.CPUTopologyLevelPackage
Expand Down
9 changes: 9 additions & 0 deletions config/crd/bases/config.nri_balloonspolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,15 @@ spec:
type: object
type: array
type: object
reservedCPUMode:
description: |-
ReservedCPUMode controls how strictly ReservedResources.cpu is interpreted.
Value "preferred" keeps the current behavior where a reserved cpuset is only a preference.
Value "hard-exact" makes the reserved balloon stay within the exact configured cpuset.
enum:
- preferred
- hard-exact
type: string
reservedPoolNamespaces:
description: |-
ReservedPoolNamespaces is a list of namespace globs that
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,15 @@ spec:
type: object
type: array
type: object
reservedCPUMode:
description: |-
ReservedCPUMode controls how strictly ReservedResources.cpu is interpreted.
Value "preferred" keeps the current behavior where a reserved cpuset is only a preference.
Value "hard-exact" makes the reserved balloon stay within the exact configured cpuset.
enum:
- preferred
- hard-exact
type: string
reservedPoolNamespaces:
description: |-
ReservedPoolNamespaces is a list of namespace globs that
Expand Down
5 changes: 4 additions & 1 deletion deployment/helm/balloons/templates/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ spec:
{{ $name }}: "{{ $value }}"
{{- end }}
spec:
hostPID: true
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on the all other changes, this seems to me as unjustified security escalation, basically same as @klihub had below.

{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
Expand Down Expand Up @@ -99,7 +100,9 @@ spec:
image: {{ .Values.image.name }}:{{ .Values.image.tag | default .Chart.AppVersion }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
securityContext:
allowPrivilegeEscalation: false
privileged: true
allowPrivilegeEscalation: true
runAsUser: 0
Comment on lines +103 to +105
Copy link
Copy Markdown
Collaborator

@klihub klihub Apr 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@louie-tsai I don't see anything in this PR which would require such a change.

If we need to alter the capability set of the policy (for reasons beyond this PR), we should

  • do it in a separate PR
  • try to do it with additional capabilities instead of making the container privileged, if that is possible
  • make it an opt-in using template conditionals and some suitable chosen Helm value

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes. agree. that is for Openshift, so let me revert this change.

capabilities:
drop:
- ALL
Expand Down
9 changes: 9 additions & 0 deletions pkg/apis/config/v1alpha1/resmgr/policy/balloons/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ type (
CPUTopologyLevel = policy.CPUTopologyLevel
ComponentCreationStrategy = policy.ComponentCreationStrategy
SchedulingClass = policy.SchedulingClass
ReservedCPUMode string
)

const (
Expand All @@ -42,6 +43,9 @@ const (
AmountCPUSet = policy.AmountCPUSet
AmountExcludeCPUSet = policy.AmountExcludeCPUSet

ReservedCPUModePreferred ReservedCPUMode = "preferred"
ReservedCPUModeHardExact ReservedCPUMode = "hard-exact"

CPUTopologyLevelUndefined = policy.CPUTopologyLevelUndefined
CPUTopologyLevelSystem = policy.CPUTopologyLevelSystem
CPUTopologyLevelPackage = policy.CPUTopologyLevelPackage
Expand Down Expand Up @@ -115,6 +119,11 @@ type Config struct {
// Reserved (CPU) resources for kube-system namespace.
// +kubebuilder:validation:Required
ReservedResources Constraints `json:"reservedResources"`
// ReservedCPUMode controls how strictly ReservedResources.cpu is interpreted.
// Value "preferred" keeps the current behavior where a reserved cpuset is only a preference.
// Value "hard-exact" makes the reserved balloon stay within the exact configured cpuset.
// +kubebuilder:validation:Enum=preferred;hard-exact
ReservedCPUMode ReservedCPUMode `json:"reservedCPUMode,omitempty"`
// Preserve specifies containers whose resource pinning must not be
// modified by the policy.
Preserve *ContainerMatchConfig `json:"preserve,omitempty"`
Expand Down
Loading