From e44434bca35aeccd892d495d6b2b3949da88e413 Mon Sep 17 00:00:00 2001 From: "Christopher M. Cantalupo" Date: Tue, 7 Apr 2026 15:38:12 -0700 Subject: [PATCH 1/2] resctrl-mon: add NRI plugin for per-pod resctrl monitoring groups Add nri-resctrl-mon, a standalone NRI plugin that creates per-pod resctrl monitoring groups (mon_groups) to support passive monitorning of Application Energy Telemetry (AET). The plugin uses the PostCreateContainer hook to assign container PIDs to mon_groups before exec/fork, eliminating the fork race that plagues userspace daemon approaches. RMID allocation is delegated to the kernel via mkdir/rmdir on the resctrl filesystem. Includes: - Plugin source (main.go, plugin.go, resctrl.go, state.go) - Unit tests (plugin_test.go, resctrl_test.go) - Dockerfile following nri-memory-qos pattern - Helm chart (Chart.yaml, values.yaml, templates/, schema) - Documentation (monitoring category, plugin docs, Helm docs) - Sample configuration Signed-off-by: Christopher M. Cantalupo Signed-off-by: Jedrzej Wasiukiewicz --- Makefile | 3 +- cmd/plugins/resctrl-mon/Dockerfile | 40 ++ cmd/plugins/resctrl-mon/main.go | 90 +++++ cmd/plugins/resctrl-mon/plugin.go | 341 ++++++++++++++++++ cmd/plugins/resctrl-mon/plugin_test.go | 274 ++++++++++++++ cmd/plugins/resctrl-mon/resctrl.go | 216 +++++++++++ cmd/plugins/resctrl-mon/resctrl_test.go | 236 ++++++++++++ cmd/plugins/resctrl-mon/state.go | 109 ++++++ deployment/helm/resctrl-mon/.helmignore | 20 + deployment/helm/resctrl-mon/Chart.yaml | 11 + deployment/helm/resctrl-mon/README.md | 121 +++++++ .../helm/resctrl-mon/templates/_helpers.tpl | 16 + .../helm/resctrl-mon/templates/configmap.yaml | 12 + .../helm/resctrl-mon/templates/daemonset.yaml | 111 ++++++ .../helm/resctrl-mon/values.schema.json | 117 ++++++ deployment/helm/resctrl-mon/values.yaml | 66 ++++ docs/deployment/helm/index.md | 1 + docs/deployment/helm/resctrl-mon.md | 2 + docs/index.md | 1 + docs/monitoring/index.md | 9 + docs/monitoring/resctrl-mon.md | 160 ++++++++ sample-configs/nri-resctrl-mon.yaml | 3 + 22 files changed, 1958 insertions(+), 1 deletion(-) create mode 100644 cmd/plugins/resctrl-mon/Dockerfile create mode 100644 cmd/plugins/resctrl-mon/main.go create mode 100644 cmd/plugins/resctrl-mon/plugin.go create mode 100644 cmd/plugins/resctrl-mon/plugin_test.go create mode 100644 cmd/plugins/resctrl-mon/resctrl.go create mode 100644 cmd/plugins/resctrl-mon/resctrl_test.go create mode 100644 cmd/plugins/resctrl-mon/state.go create mode 100644 deployment/helm/resctrl-mon/.helmignore create mode 100644 deployment/helm/resctrl-mon/Chart.yaml create mode 100644 deployment/helm/resctrl-mon/README.md create mode 100644 deployment/helm/resctrl-mon/templates/_helpers.tpl create mode 100644 deployment/helm/resctrl-mon/templates/configmap.yaml create mode 100644 deployment/helm/resctrl-mon/templates/daemonset.yaml create mode 100644 deployment/helm/resctrl-mon/values.schema.json create mode 100644 deployment/helm/resctrl-mon/values.yaml create mode 100644 docs/deployment/helm/resctrl-mon.md create mode 100644 docs/monitoring/index.md create mode 100644 docs/monitoring/resctrl-mon.md create mode 100644 sample-configs/nri-resctrl-mon.yaml diff --git a/Makefile b/Makefile index 7191fba3f..3198b332e 100644 --- a/Makefile +++ b/Makefile @@ -83,7 +83,8 @@ PLUGINS ?= \ nri-memory-policy \ nri-memory-qos \ nri-memtierd \ - nri-sgx-epc + nri-sgx-epc \ + nri-resctrl-mon BINARIES ?= \ config-manager \ diff --git a/cmd/plugins/resctrl-mon/Dockerfile b/cmd/plugins/resctrl-mon/Dockerfile new file mode 100644 index 000000000..c833ca203 --- /dev/null +++ b/cmd/plugins/resctrl-mon/Dockerfile @@ -0,0 +1,40 @@ +ARG GO_VERSION=1.26 + +FROM golang:${GO_VERSION}-bookworm AS builder + +ARG IMAGE_VERSION +ARG BUILD_VERSION +ARG BUILD_BUILDID +ARG DEBUG=0 +ARG NORACE=0 +ARG SKIP_LICENSES=0 + +WORKDIR /go/builder + +# Fetch go dependencies in a separate layer for caching +COPY go.mod go.sum . +COPY pkg/topology/ pkg/topology/ +RUN --mount=type=cache,target=/go/pkg/mod/ go mod download + +# Build nri-resctrl-mon +COPY . . + +RUN --mount=type=cache,target=/go/pkg/mod/ \ + --mount=type=cache,target="/root/.cache/go-build" \ + make IMAGE_VERSION=${IMAGE_VERSION} \ + BUILD_VERSION=${BUILD_VERSION} \ + BUILD_BUILDID=${BUILD_BUILDID} \ + DEBUG=$DEBUG \ + NORACE=$NORACE \ + OTHER_IMAGE_TARGETS="" \ + BINARIES="" \ + PLUGINS=nri-resctrl-mon \ + clean install-go-licenses build-plugins-static licenses + +FROM gcr.io/distroless/static + +COPY --from=builder /go/builder/build/bin/nri-resctrl-mon /bin/nri-resctrl-mon +COPY --from=builder /go/builder/build/licenses/nri-resctrl-mon/ /licenses/nri-resctrl-mon/ +COPY --from=builder /go/builder/sample-configs/nri-resctrl-mon.yaml /etc/nri/resctrl-mon/config.yaml + +ENTRYPOINT ["/bin/nri-resctrl-mon", "-idx", "90", "-config", "/etc/nri/resctrl-mon/config.yaml"] diff --git a/cmd/plugins/resctrl-mon/main.go b/cmd/plugins/resctrl-mon/main.go new file mode 100644 index 000000000..eedf56599 --- /dev/null +++ b/cmd/plugins/resctrl-mon/main.go @@ -0,0 +1,90 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "flag" + "os" + + "github.com/containerd/nri/pkg/stub" + "github.com/sirupsen/logrus" +) + +var ( + log *logrus.Logger +) + +func main() { + var ( + pluginName string + pluginIdx string + configFile string + verbose bool + veryVerbose bool + err error + ) + + log = logrus.StandardLogger() + log.SetFormatter(&logrus.TextFormatter{ + PadLevelText: true, + }) + + flag.StringVar(&pluginName, "name", "", "plugin name to register to NRI") + flag.StringVar(&pluginIdx, "idx", "", "plugin index to register to NRI") + flag.StringVar(&configFile, "config", "", "configuration file name") + flag.BoolVar(&verbose, "v", false, "verbose output") + flag.BoolVar(&veryVerbose, "vv", false, "very verbose output") + flag.Parse() + + if verbose { + log.SetLevel(logrus.DebugLevel) + } + if veryVerbose { + log.SetLevel(logrus.TraceLevel) + } + + p := newPlugin() + + if configFile != "" { + log.Debugf("reading configuration from %q", configFile) + data, err := os.ReadFile(configFile) + if err != nil { + log.Fatalf("error reading configuration file %q: %s", configFile, err) + } + if err = p.setConfig(data); err != nil { + log.Fatalf("error applying configuration from file %q: %s", configFile, err) + } + } + + opts := []stub.Option{ + stub.WithOnClose(p.onClose), + } + if pluginName != "" { + opts = append(opts, stub.WithPluginName(pluginName)) + } + if pluginIdx != "" { + opts = append(opts, stub.WithPluginIdx(pluginIdx)) + } + + if p.stub, err = stub.New(p, opts...); err != nil { + log.Fatalf("failed to create plugin stub: %v", err) + } + + if err = p.stub.Run(context.Background()); err != nil { + log.Errorf("plugin exited (%v)", err) + os.Exit(1) + } +} diff --git a/cmd/plugins/resctrl-mon/plugin.go b/cmd/plugins/resctrl-mon/plugin.go new file mode 100644 index 000000000..80c33f19c --- /dev/null +++ b/cmd/plugins/resctrl-mon/plugin.go @@ -0,0 +1,341 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "fmt" + "os" + "path/filepath" + + "sigs.k8s.io/yaml" + + "github.com/containerd/nri/pkg/api" + "github.com/containerd/nri/pkg/stub" +) + +// plugin implements the NRI plugin interface for resctrl monitoring groups. +type plugin struct { + stub stub.Stub + config *pluginConfig + state *podState + rdt *resctrlOps +} + +// pluginConfig holds the runtime configuration for the plugin. +type pluginConfig struct { + // ResctrlPath is the mount point of the resctrl filesystem. + ResctrlPath string `json:"resctrlPath"` + + // Namespaces filters mon_group creation to pods in these namespaces. + // Empty list means all namespaces. + Namespaces []string `json:"namespaces"` + + // LabelSelector filters mon_group creation to pods matching these labels. + // Empty map means all pods. + LabelSelector map[string]string `json:"labelSelector"` +} + +func newPlugin() *plugin { + cfg := &pluginConfig{ + ResctrlPath: defaultResctrlPath, + } + return &plugin{ + config: cfg, + state: newPodState(), + rdt: newResctrlOps(cfg.ResctrlPath), + } +} + +// Configure handles connecting to container runtime's NRI server. +func (p *plugin) Configure(ctx context.Context, config, runtime, version string) (stub.EventMask, error) { + log.Infof("Connected to %s %s...", runtime, version) + if config != "" { + log.Debugf("loading configuration from NRI server") + if err := p.setConfig([]byte(config)); err != nil { + return 0, err + } + } + return 0, nil +} + +// onClose handles losing connection to container runtime. +func (p *plugin) onClose() { + log.Infof("Connection to the runtime lost, exiting...") + os.Exit(0) +} + +// setConfig applies new plugin configuration. +func (p *plugin) setConfig(data []byte) error { + log.Tracef("setConfig: parsing\n---8<---\n%s\n--->8---", data) + cfg := pluginConfig{ + ResctrlPath: defaultResctrlPath, + } + if err := yaml.Unmarshal(data, &cfg); err != nil { + return fmt.Errorf("setConfig: cannot parse configuration: %w", err) + } + resctrlPath := filepath.Clean(cfg.ResctrlPath) + if resctrlPath == "" || !filepath.IsAbs(resctrlPath) { + return fmt.Errorf("setConfig: resctrlPath must be an absolute path, got %q", cfg.ResctrlPath) + } + cfg.ResctrlPath = resctrlPath + p.config = &cfg + p.rdt = newResctrlOps(cfg.ResctrlPath) + log.Debugf("configuration: resctrlPath=%s namespaces=%v labelSelector=%v", + cfg.ResctrlPath, cfg.Namespaces, cfg.LabelSelector) + return nil +} + +// Synchronize is called at plugin startup with the current set of pods and containers. +// It reconciles in-memory state with what exists on the resctrl filesystem. +func (p *plugin) Synchronize(ctx context.Context, pods []*api.PodSandbox, containers []*api.Container) ([]*api.ContainerUpdate, error) { + log.Infof("synchronizing state: %d pods, %d containers", len(pods), len(containers)) + + // Build a lookup from sandbox ID to pod (containers reference + // pods by sandbox ID, not by Kubernetes UID). + podBySandboxID := make(map[string]*api.PodSandbox, len(pods)) + for _, pod := range pods { + podBySandboxID[pod.GetId()] = pod + } + + // Create mon_groups for running containers that don't have one, + // and write their PIDs to ensure monitoring is active after restart. + for _, ctr := range containers { + pod, ok := podBySandboxID[ctr.GetPodSandboxId()] + if !ok { + log.Debugf("Synchronize: container %s has no matching pod, skipping", ctr.GetName()) + continue + } + if !p.shouldMonitorPod(pod) { + continue + } + podUID := pod.GetUid() + rdtClass := getRDTClass(ctr) + if err := p.ensureMonGroup(podUID, ctr.GetId(), rdtClass); err != nil { + log.Warnf("Synchronize: failed to create mon_group for pod %s: %v", podUID, err) + continue + } + pid := int(ctr.GetPid()) + if pid > 0 { + monGroupDir := p.state.getMonGroupDir(podUID) + if err := p.rdt.writeTaskPID(monGroupDir, pid); err != nil { + log.Warnf("Synchronize: failed to write PID %d for pod %s: %v", pid, podUID, err) + } else { + log.Debugf("Synchronize: assigned pid %d for pod %s", pid, podUID) + } + } + } + + // Remove orphaned mon_groups from a previous plugin instance. + p.rdt.cleanOrphanedMonGroups(p.state) + + log.Infof("synchronization complete: tracking %d pods", p.state.podCount()) + return nil, nil +} + +// PostCreateContainer is called after the container is created but before +// it starts executing. The container PID is NOT yet available (pid=0) because +// the init process has not been started. We create the mon_group here so it +// is ready for PID assignment in StartContainer. +func (p *plugin) PostCreateContainer(ctx context.Context, pod *api.PodSandbox, ctr *api.Container) error { + podUID := pod.GetUid() + ctrName := pprintCtr(pod, ctr) + + log.Debugf("PostCreateContainer %s: pid=%d (expected 0)", ctrName, ctr.GetPid()) + + if !p.shouldMonitorPod(pod) { + log.Debugf("PostCreateContainer %s: pod filtered out, skipping", ctrName) + return nil + } + + rdtClass := getRDTClass(ctr) + if err := p.ensureMonGroup(podUID, ctr.GetId(), rdtClass); err != nil { + log.Warnf("PostCreateContainer %s: failed to create mon_group: %v", ctrName, err) + return nil // non-fatal: don't block container creation + } + + log.Infof("PostCreateContainer %s: mon_group ready, PID will be assigned in StartContainer", ctrName) + return nil +} + +// StartContainer is called just before the container process starts executing. +// At this point the init process has been created (via runc create) and the PID +// is available, but the process is paused and has NOT forked any threads yet. +// This is the ideal moment to write the PID to the resctrl mon_group tasks +// file: the kernel assigns the RMID to this PID, and when the process starts +// and forks threads they all inherit the RMID automatically. +// +// If the PID is not available (should not happen at this stage), we fall back +// to PostStartContainer which will write PIDs after the process starts. +func (p *plugin) StartContainer(ctx context.Context, pod *api.PodSandbox, ctr *api.Container) error { + podUID := pod.GetUid() + ctrName := pprintCtr(pod, ctr) + pid := int(ctr.GetPid()) + + log.Debugf("StartContainer %s: pid=%d", ctrName, pid) + + if !p.shouldMonitorPod(pod) { + return nil + } + + monGroupDir := p.state.getMonGroupDir(podUID) + if monGroupDir == "" { + log.Debugf("StartContainer %s: no mon_group (pod not tracked), skipping", ctrName) + return nil + } + + if pid > 0 { + if err := p.rdt.writeTaskPID(monGroupDir, pid); err != nil { + log.Warnf("StartContainer %s: failed to write PID %d to tasks: %v", ctrName, pid, err) + } else { + log.Infof("StartContainer %s: assigned pid %d to mon_group %s (pre-start, no threads yet)", ctrName, pid, monGroupDir) + } + } else { + log.Warnf("StartContainer %s: PID not available at pre-start, will retry in PostStartContainer", ctrName) + } + + return nil +} + +// PostStartContainer is called after the container process has been started. +// This is a fallback: if StartContainer did not have the PID (which should +// not happen on containerd ≥ 2.x), we write the init PID here. The init +// PID is sufficient because all child threads inherit the RMID. +func (p *plugin) PostStartContainer(ctx context.Context, pod *api.PodSandbox, ctr *api.Container) error { + podUID := pod.GetUid() + ctrName := pprintCtr(pod, ctr) + pid := int(ctr.GetPid()) + + log.Debugf("PostStartContainer %s: pid=%d", ctrName, pid) + + if !p.shouldMonitorPod(pod) { + return nil + } + + monGroupDir := p.state.getMonGroupDir(podUID) + if monGroupDir == "" { + return nil + } + + // Fallback: write the init PID if StartContainer didn't. + if pid > 0 { + if err := p.rdt.writeTaskPID(monGroupDir, pid); err != nil { + log.Warnf("PostStartContainer %s: failed to write PID %d to tasks: %v", ctrName, pid, err) + } else { + log.Infof("PostStartContainer %s: fallback assigned pid %d to mon_group %s", ctrName, pid, monGroupDir) + } + } else { + log.Warnf("PostStartContainer %s: PID still 0 after start, unexpected", ctrName) + } + + return nil +} + +// StopContainer is called when a container is being stopped. +func (p *plugin) StopContainer(ctx context.Context, pod *api.PodSandbox, ctr *api.Container) ([]*api.ContainerUpdate, error) { + podUID := pod.GetUid() + ctrName := pprintCtr(pod, ctr) + + log.Debugf("StopContainer %s", ctrName) + + monGroupDir := p.state.getMonGroupDir(podUID) + if monGroupDir == "" { + return nil, nil + } + + p.state.removeContainer(podUID, ctr.GetId()) + + if p.state.podHasNoContainers(podUID) { + log.Infof("StopContainer %s: last container, removing mon_group %s", ctrName, monGroupDir) + if err := p.rdt.removeMonGroup(monGroupDir); err != nil { + log.Warnf("StopContainer %s: failed to remove mon_group, will retry on next sync: %v", ctrName, err) + return nil, nil + } + p.state.removePod(podUID) + } + + return nil, nil +} + +// ensureMonGroup creates the mon_group directory if it doesn't exist and registers +// the container in the in-memory state. +// +// Limitation: all containers in a pod share a single mon_group under the first +// container's RDT class. If an allocation plugin assigns different classes to +// containers in the same pod, subsequent containers use the first class. +func (p *plugin) ensureMonGroup(podUID, containerID, rdtClass string) error { + if !looksLikePodUID(podUID) { + return fmt.Errorf("invalid pod UID %q", podUID) + } + + if p.state.getMonGroupDir(podUID) != "" { + // Mon_group already exists for this pod. Just add the container. + p.state.addContainer(podUID, containerID) + return nil + } + + monGroupDir, err := p.rdt.createMonGroup(rdtClass, podUID) + if err != nil { + return err + } + + p.state.addPod(podUID, monGroupDir) + p.state.addContainer(podUID, containerID) + log.Infof("created mon_group %s for pod %s", monGroupDir, podUID) + return nil +} + +// shouldMonitorPod checks namespace and label filters. +func (p *plugin) shouldMonitorPod(pod *api.PodSandbox) bool { + if len(p.config.Namespaces) > 0 { + ns := pod.GetNamespace() + found := false + for _, allowed := range p.config.Namespaces { + if ns == allowed { + found = true + break + } + } + if !found { + return false + } + } + if len(p.config.LabelSelector) > 0 { + labels := pod.GetLabels() + for k, v := range p.config.LabelSelector { + if labels[k] != v { + return false + } + } + } + return true +} + +// getRDTClass extracts the RDT class from a container's Linux resources. +func getRDTClass(ctr *api.Container) string { + if linux := ctr.GetLinux(); linux != nil { + if res := linux.GetResources(); res != nil { + if rdt := res.GetRdtClass(); rdt != nil { + return rdt.GetValue() + } + } + } + return "" +} + +// pprintCtr returns a human-readable container identifier. +func pprintCtr(pod *api.PodSandbox, ctr *api.Container) string { + return fmt.Sprintf("%s/%s:%s", pod.GetNamespace(), pod.GetName(), ctr.GetName()) +} diff --git a/cmd/plugins/resctrl-mon/plugin_test.go b/cmd/plugins/resctrl-mon/plugin_test.go new file mode 100644 index 000000000..e29ba24a5 --- /dev/null +++ b/cmd/plugins/resctrl-mon/plugin_test.go @@ -0,0 +1,274 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "os" + "path/filepath" + "testing" + + "github.com/containerd/nri/pkg/api" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func init() { + log = logrus.StandardLogger() + log.SetLevel(logrus.TraceLevel) +} + +func newTestPlugin(resctrlPath string) *plugin { + cfg := &pluginConfig{ + ResctrlPath: resctrlPath, + } + return &plugin{ + config: cfg, + state: newPodState(), + rdt: newResctrlOps(resctrlPath), + } +} + +func makePod(uid, namespace, name string) *api.PodSandbox { + return &api.PodSandbox{ + Id: "sandbox-" + uid, // CRI sandbox ID != K8s pod UID + Uid: uid, + Namespace: namespace, + Name: name, + Labels: map[string]string{}, + } +} + +func makeContainer(id, name, podSandboxID string, pid uint32, rdtClass string) *api.Container { + ctr := &api.Container{ + Id: id, + PodSandboxId: podSandboxID, + Name: name, + Pid: pid, + Linux: &api.LinuxContainer{ + Resources: &api.LinuxResources{}, + }, + } + if rdtClass != "" { + ctr.Linux.Resources.RdtClass = &api.OptionalString{Value: rdtClass} + } + return ctr +} + +func TestShouldMonitorPod_NoFilters(t *testing.T) { + p := newTestPlugin("/tmp/resctrl-test") + pod := makePod("uid-1", "default", "test-pod") + assert.True(t, p.shouldMonitorPod(pod)) +} + +func TestShouldMonitorPod_NamespaceFilter(t *testing.T) { + p := newTestPlugin("/tmp/resctrl-test") + p.config.Namespaces = []string{"production", "staging"} + + pod1 := makePod("uid-1", "production", "pod1") + assert.True(t, p.shouldMonitorPod(pod1)) + + pod2 := makePod("uid-2", "kube-system", "pod2") + assert.False(t, p.shouldMonitorPod(pod2)) +} + +func TestShouldMonitorPod_LabelFilter(t *testing.T) { + p := newTestPlugin("/tmp/resctrl-test") + p.config.LabelSelector = map[string]string{"monitor": "true"} + + pod1 := makePod("uid-1", "default", "pod1") + pod1.Labels = map[string]string{"monitor": "true", "app": "web"} + assert.True(t, p.shouldMonitorPod(pod1)) + + pod2 := makePod("uid-2", "default", "pod2") + pod2.Labels = map[string]string{"app": "web"} + assert.False(t, p.shouldMonitorPod(pod2)) +} + +func TestGetRDTClass(t *testing.T) { + ctr1 := makeContainer("c1", "container1", "uid-1", 1234, "BestEffort") + assert.Equal(t, "BestEffort", getRDTClass(ctr1)) + + ctr2 := makeContainer("c2", "container2", "uid-1", 1235, "") + assert.Equal(t, "", getRDTClass(ctr2)) + + ctr3 := &api.Container{ + Id: "c3", + Name: "container3", + } + assert.Equal(t, "", getRDTClass(ctr3)) +} + +func TestPprintCtr(t *testing.T) { + pod := makePod("uid-1", "default", "my-pod") + ctr := makeContainer("c1", "my-container", "uid-1", 1234, "") + assert.Equal(t, "default/my-pod:my-container", pprintCtr(pod, ctr)) +} + +func TestPostCreateContainer_FilteredPod(t *testing.T) { + p := newTestPlugin(t.TempDir()) + p.config.Namespaces = []string{"production"} + + pod := makePod("uid-1", "default", "test-pod") + ctr := makeContainer("c1", "container1", "uid-1", 1234, "") + + err := p.PostCreateContainer(context.Background(), pod, ctr) + require.NoError(t, err) + + // Pod should not be tracked since it's not in the production namespace. + assert.Equal(t, 0, p.state.podCount()) +} + +func TestPostCreateContainer_CreatesMonGroup(t *testing.T) { + tmpDir := t.TempDir() + p := newTestPlugin(tmpDir) + + pod := makePod("a1b2c3d4-e5f6-7890-abcd-ef1234567890", "default", "test-pod") + ctr := makeContainer("c1", "container1", "a1b2c3d4-e5f6-7890-abcd-ef1234567890", 0, "") + + err := p.PostCreateContainer(context.Background(), pod, ctr) + require.NoError(t, err) + + // Pod should be tracked. + assert.Equal(t, 1, p.state.podCount()) + monDir := p.state.getMonGroupDir("a1b2c3d4-e5f6-7890-abcd-ef1234567890") + assert.Contains(t, monDir, "mon_groups/a1b2c3d4-e5f6-7890-abcd-ef1234567890") +} + +func TestPostCreateContainer_WithRDTClass(t *testing.T) { + tmpDir := t.TempDir() + p := newTestPlugin(tmpDir) + require.NoError(t, os.Mkdir(filepath.Join(tmpDir, "BestEffort"), 0755)) + + pod := makePod("a1b2c3d4-e5f6-7890-abcd-ef1234567890", "default", "test-pod") + ctr := makeContainer("c1", "container1", "a1b2c3d4-e5f6-7890-abcd-ef1234567890", 0, "BestEffort") + + err := p.PostCreateContainer(context.Background(), pod, ctr) + require.NoError(t, err) + + monDir := p.state.getMonGroupDir("a1b2c3d4-e5f6-7890-abcd-ef1234567890") + assert.Contains(t, monDir, "BestEffort/mon_groups/a1b2c3d4-e5f6-7890-abcd-ef1234567890") +} + +func TestMultiContainerPod(t *testing.T) { + tmpDir := t.TempDir() + p := newTestPlugin(tmpDir) + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + + pod := makePod(podUID, "default", "multi-pod") + ctr1 := makeContainer("c1", "container1", podUID, 0, "") + ctr2 := makeContainer("c2", "container2", podUID, 0, "") + + // First container creates the mon_group. + err := p.PostCreateContainer(context.Background(), pod, ctr1) + require.NoError(t, err) + assert.Equal(t, 1, p.state.podCount()) + + // Second container reuses the same mon_group. + err = p.PostCreateContainer(context.Background(), pod, ctr2) + require.NoError(t, err) + assert.Equal(t, 1, p.state.podCount()) // still one pod + + // Stopping first container should not remove the mon_group. + _, err = p.StopContainer(context.Background(), pod, ctr1) + require.NoError(t, err) + assert.Equal(t, 1, p.state.podCount()) + assert.False(t, p.state.podHasNoContainers(podUID)) + + // Stopping second container should remove the mon_group. + _, err = p.StopContainer(context.Background(), pod, ctr2) + require.NoError(t, err) + assert.Equal(t, 0, p.state.podCount()) +} + +func TestStopContainer_UnknownPod(t *testing.T) { + p := newTestPlugin(t.TempDir()) + + pod := makePod("unknown-uid", "default", "unknown-pod") + ctr := makeContainer("c1", "container1", "unknown-uid", 1234, "") + + updates, err := p.StopContainer(context.Background(), pod, ctr) + require.NoError(t, err) + assert.Nil(t, updates) +} + +func TestSetConfig(t *testing.T) { + p := newTestPlugin("/tmp/resctrl-test") + + configYAML := []byte(` +resctrlPath: /tmp/test-resctrl +namespaces: + - production + - staging +labelSelector: + monitor: "true" +`) + + err := p.setConfig(configYAML) + require.NoError(t, err) + assert.Equal(t, "/tmp/test-resctrl", p.config.ResctrlPath) + assert.Equal(t, []string{"production", "staging"}, p.config.Namespaces) + assert.Equal(t, map[string]string{"monitor": "true"}, p.config.LabelSelector) +} + +func TestSetConfig_InvalidYAML(t *testing.T) { + p := newTestPlugin("/tmp/resctrl-test") + + err := p.setConfig([]byte(":::invalid yaml")) + assert.Error(t, err) +} + +func TestSetConfig_RelativePath(t *testing.T) { + p := newTestPlugin("/tmp/resctrl-test") + + err := p.setConfig([]byte("resctrlPath: relative/path")) + assert.Error(t, err) + assert.Contains(t, err.Error(), "absolute path") +} + +func TestSynchronize_UsesUIDNotSandboxID(t *testing.T) { + tmpDir := t.TempDir() + p := newTestPlugin(tmpDir) + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + + pod := makePod(podUID, "default", "sync-pod") + // Container references the pod by sandbox ID, not by UID. + ctr := makeContainer("c1", "container1", pod.GetId(), 0, "") + + _, err := p.Synchronize(context.Background(), []*api.PodSandbox{pod}, []*api.Container{ctr}) + require.NoError(t, err) + + // The mon_group should be keyed by the K8s pod UID, not the sandbox ID. + assert.Equal(t, 1, p.state.podCount()) + assert.True(t, p.state.hasPod(podUID)) + assert.False(t, p.state.hasPod(pod.GetId())) + + monDir := p.state.getMonGroupDir(podUID) + assert.Contains(t, monDir, podUID) +} + +func TestEnsureMonGroup_InvalidUID(t *testing.T) { + p := newTestPlugin(t.TempDir()) + + err := p.ensureMonGroup("", "c1", "") + assert.Error(t, err) + assert.Contains(t, err.Error(), "invalid pod UID") + + err = p.ensureMonGroup("not-a-uuid", "c1", "") + assert.Error(t, err) + + assert.Equal(t, 0, p.state.podCount()) +} diff --git a/cmd/plugins/resctrl-mon/resctrl.go b/cmd/plugins/resctrl-mon/resctrl.go new file mode 100644 index 000000000..cea0a0c16 --- /dev/null +++ b/cmd/plugins/resctrl-mon/resctrl.go @@ -0,0 +1,216 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + "syscall" +) + +const ( + defaultResctrlPath = "/sys/fs/resctrl" + monGroupsDir = "mon_groups" +) + +// resctrlOps handles filesystem operations on the resctrl mount. +type resctrlOps struct { + resctrlPath string +} + +func newResctrlOps(resctrlPath string) *resctrlOps { + return &resctrlOps{ + resctrlPath: resctrlPath, + } +} + +// createMonGroup creates a mon_group directory under the appropriate ctrl_group +// and returns the full path. If rdtClass is empty, the mon_group is created +// under the root resctrl directory. +// +// The kernel assigns an RMID to the new mon_group on mkdir. If no RMIDs are +// available, mkdir returns ENOSPC. +func (r *resctrlOps) createMonGroup(rdtClass, podUID string) (string, error) { + parentDir := r.resctrlPath + if rdtClass != "" { + if !isValidRDTClass(rdtClass) { + return "", fmt.Errorf("invalid RDT class name %q", rdtClass) + } + parentDir = filepath.Join(r.resctrlPath, rdtClass) + } + + // When an RDT class is specified, the ctrl_group must already exist + // (created by an allocation plugin). Do not create it implicitly — + // that would make an unintended ctrl_group in the resctrl filesystem. + if rdtClass != "" { + info, err := os.Stat(parentDir) + if err != nil { + return "", fmt.Errorf("ctrl_group %s does not exist: %w", parentDir, err) + } + if !info.IsDir() { + return "", fmt.Errorf("ctrl_group %s is not a directory", parentDir) + } + } + + monGroupsPath := filepath.Join(parentDir, monGroupsDir) + monGroupDir := filepath.Join(monGroupsPath, podUID) + + // Ensure the mon_groups/ directory exists. On a real resctrl mount + // this is always present. For testing, create it if needed. + if err := os.MkdirAll(monGroupsPath, 0755); err != nil { + return "", fmt.Errorf("mon_groups dir not available at %s: %w", monGroupsPath, err) + } + + // Use Mkdir (not MkdirAll) for the final mon_group directory to + // avoid accidentally creating a ctrl_group if rdtClass is wrong. + if err := os.Mkdir(monGroupDir, 0755); err != nil { + if errors.Is(err, os.ErrExist) { + return monGroupDir, nil + } + if errors.Is(err, syscall.ENOSPC) { + return "", fmt.Errorf("no RMIDs available for pod %s: %w", podUID, err) + } + return "", fmt.Errorf("failed to create mon_group %s: %w", monGroupDir, err) + } + + return monGroupDir, nil +} + +// removeMonGroup removes a mon_group directory. The kernel releases the RMID. +func (r *resctrlOps) removeMonGroup(monGroupDir string) error { + err := os.Remove(monGroupDir) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("failed to remove mon_group %s: %w", monGroupDir, err) + } + return nil +} + +// writeTaskPID writes a PID to the mon_group's tasks file. The kernel assigns +// this PID (and all future child processes) to the mon_group's RMID. +func (r *resctrlOps) writeTaskPID(monGroupDir string, pid int) error { + tasksFile := filepath.Join(monGroupDir, "tasks") + f, err := os.OpenFile(tasksFile, os.O_WRONLY, 0) + if err != nil { + return fmt.Errorf("failed to open %s for pid %d: %w", tasksFile, pid, err) + } + defer f.Close() + data := []byte(strconv.Itoa(pid) + "\n") + if _, err := f.Write(data); err != nil { + return fmt.Errorf("failed to write pid %d to %s: %w", pid, tasksFile, err) + } + return nil +} + +// cleanOrphanedMonGroups removes mon_group directories that are not tracked +// in the given state. This handles cleanup after a plugin crash/restart. +func (r *resctrlOps) cleanOrphanedMonGroups(state *podState) { + // Scan root-level mon_groups. + r.cleanOrphanedInDir(filepath.Join(r.resctrlPath, monGroupsDir), state) + + // Scan ctrl_group-level mon_groups. + entries, err := os.ReadDir(r.resctrlPath) + if err != nil { + log.Warnf("cleanOrphanedMonGroups: failed to read %s: %v", r.resctrlPath, err) + return + } + for _, entry := range entries { + if !entry.IsDir() { + continue + } + name := entry.Name() + // Skip non-ctrl_group entries. + if name == monGroupsDir || name == "info" || strings.HasPrefix(name, "mon_") { + continue + } + ctrlGroupMonDir := filepath.Join(r.resctrlPath, name, monGroupsDir) + r.cleanOrphanedInDir(ctrlGroupMonDir, state) + } +} + +// cleanOrphanedInDir removes mon_group directories in a specific mon_groups/ +// directory that look like pod UIDs but are not tracked in state. +func (r *resctrlOps) cleanOrphanedInDir(monGroupsPath string, state *podState) { + entries, err := os.ReadDir(monGroupsPath) + if err != nil { + if !errors.Is(err, os.ErrNotExist) { + log.Warnf("failed to read mon_groups directory %s: %v", monGroupsPath, err) + } + return + } + for _, entry := range entries { + if !entry.IsDir() { + continue + } + name := entry.Name() + // Only clean directories that look like pod UIDs (contain dashes like UUIDs). + if !looksLikePodUID(name) { + continue + } + orphanDir := filepath.Join(monGroupsPath, name) + trackedDir := state.getMonGroupDir(name) + if trackedDir == orphanDir { + // This is the active mon_group for this pod. + continue + } + log.Infof("removing orphaned mon_group %s", orphanDir) + if err := os.Remove(orphanDir); err != nil && !errors.Is(err, os.ErrNotExist) { + log.Warnf("failed to remove orphaned mon_group %s: %v", orphanDir, err) + } + } +} + +// looksLikePodUID returns true if the name looks like a Kubernetes pod UID +// (UUID format with dashes, e.g., a1b2c3d4-e5f6-7890-abcd-ef1234567890). +func looksLikePodUID(name string) bool { + if len(name) != 36 { + return false + } + // Check for UUID-like pattern: 8-4-4-4-12 hex chars. + parts := strings.Split(name, "-") + if len(parts) != 5 { + return false + } + expectedLens := []int{8, 4, 4, 4, 12} + for i, part := range parts { + if len(part) != expectedLens[i] { + return false + } + for _, c := range part { + if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) { + return false + } + } + } + return true +} + +// isValidRDTClass returns true if the name is a safe resctrl ctrl_group name. +// It rejects path separators, dot-segments, and empty strings to prevent +// path traversal outside the resctrl mount. +func isValidRDTClass(name string) bool { + if name == "" || name == "." || name == ".." { + return false + } + for _, c := range name { + if c == '/' || c == 0 { + return false + } + } + return true +} diff --git a/cmd/plugins/resctrl-mon/resctrl_test.go b/cmd/plugins/resctrl-mon/resctrl_test.go new file mode 100644 index 000000000..9d2dbedb1 --- /dev/null +++ b/cmd/plugins/resctrl-mon/resctrl_test.go @@ -0,0 +1,236 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCreateMonGroup_RootClass(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + + dir, err := r.createMonGroup("", "pod-uid-1") + require.NoError(t, err) + assert.Equal(t, filepath.Join(tmpDir, "mon_groups", "pod-uid-1"), dir) + + // Directory should exist. + info, err := os.Stat(dir) + require.NoError(t, err) + assert.True(t, info.IsDir()) +} + +func TestCreateMonGroup_WithRDTClass(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + require.NoError(t, os.Mkdir(filepath.Join(tmpDir, "BestEffort"), 0755)) + + dir, err := r.createMonGroup("BestEffort", "pod-uid-2") + require.NoError(t, err) + assert.Equal(t, filepath.Join(tmpDir, "BestEffort", "mon_groups", "pod-uid-2"), dir) + + info, err := os.Stat(dir) + require.NoError(t, err) + assert.True(t, info.IsDir()) +} + +func TestCreateMonGroup_MissingCtrlGroup(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + + // Attempt to create a mon_group under a non-existent ctrl_group. + _, err := r.createMonGroup("NoSuchClass", "pod-uid-3") + assert.Error(t, err) + assert.Contains(t, err.Error(), "ctrl_group") + + // Verify the ctrl_group was NOT created. + _, err = os.Stat(filepath.Join(tmpDir, "NoSuchClass")) + assert.True(t, os.IsNotExist(err)) +} + +func TestCreateMonGroup_Idempotent(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + + dir1, err := r.createMonGroup("", "pod-uid-1") + require.NoError(t, err) + + dir2, err := r.createMonGroup("", "pod-uid-1") + require.NoError(t, err) + + assert.Equal(t, dir1, dir2) +} + +func TestRemoveMonGroup(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + + dir, err := r.createMonGroup("", "pod-uid-1") + require.NoError(t, err) + + err = r.removeMonGroup(dir) + require.NoError(t, err) + + _, err = os.Stat(dir) + assert.True(t, os.IsNotExist(err)) +} + +func TestRemoveMonGroup_NotExist(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + + err := r.removeMonGroup(filepath.Join(tmpDir, "mon_groups", "nonexistent")) + assert.NoError(t, err) +} + +func TestWriteTaskPID(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + + dir, err := r.createMonGroup("", "pod-uid-1") + require.NoError(t, err) + + // In real resctrl, the kernel creates the tasks file when the + // mon_group directory is created. Simulate that here. + tasksFile := filepath.Join(dir, "tasks") + require.NoError(t, os.WriteFile(tasksFile, nil, 0644)) + + err = r.writeTaskPID(dir, 12345) + require.NoError(t, err) + + data, err := os.ReadFile(tasksFile) + require.NoError(t, err) + assert.Equal(t, "12345\n", string(data)) +} + +func TestCleanOrphanedMonGroups(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + state := newPodState() + + // Create a mon_group that IS tracked. + trackedUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + dir, err := r.createMonGroup("", trackedUID) + require.NoError(t, err) + state.addPod(trackedUID, dir) + + // Create a mon_group that is NOT tracked (orphan). + orphanUID := "deadbeef-dead-beef-dead-beefdeadbeef" + _, err = r.createMonGroup("", orphanUID) + require.NoError(t, err) + + r.cleanOrphanedMonGroups(state) + + // Tracked should still exist. + _, err = os.Stat(filepath.Join(tmpDir, "mon_groups", trackedUID)) + assert.NoError(t, err) + + // Orphan should be removed. + _, err = os.Stat(filepath.Join(tmpDir, "mon_groups", orphanUID)) + assert.True(t, os.IsNotExist(err)) +} + +func TestCleanOrphanedMonGroups_CtrlGroup(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + state := newPodState() + + // Create orphan under a ctrl_group. + orphanUID := "deadbeef-dead-beef-dead-beefdeadbeef" + require.NoError(t, os.Mkdir(filepath.Join(tmpDir, "BestEffort"), 0755)) + _, err := r.createMonGroup("BestEffort", orphanUID) + require.NoError(t, err) + + r.cleanOrphanedMonGroups(state) + + _, err = os.Stat(filepath.Join(tmpDir, "BestEffort", "mon_groups", orphanUID)) + assert.True(t, os.IsNotExist(err)) +} + +func TestCleanOrphanedMonGroups_StaleLocation(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + state := newPodState() + + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + + // Create a mon_group under BestEffort (simulates previous run). + require.NoError(t, os.Mkdir(filepath.Join(tmpDir, "BestEffort"), 0755)) + _, err := r.createMonGroup("BestEffort", podUID) + require.NoError(t, err) + + // Track the pod at the root class (simulates current run with different RDT class). + rootDir, err := r.createMonGroup("", podUID) + require.NoError(t, err) + state.addPod(podUID, rootDir) + + r.cleanOrphanedMonGroups(state) + + // Root mon_group (tracked) should still exist. + _, err = os.Stat(rootDir) + assert.NoError(t, err) + + // BestEffort mon_group (stale) should be removed. + _, err = os.Stat(filepath.Join(tmpDir, "BestEffort", "mon_groups", podUID)) + assert.True(t, os.IsNotExist(err)) +} + +func TestLooksLikePodUID(t *testing.T) { + assert.True(t, looksLikePodUID("a1b2c3d4-e5f6-7890-abcd-ef1234567890")) + assert.True(t, looksLikePodUID("DEADBEEF-DEAD-BEEF-DEAD-BEEFDEADBEEF")) + assert.True(t, looksLikePodUID("00000000-0000-0000-0000-000000000000")) + + assert.False(t, looksLikePodUID("short")) + assert.False(t, looksLikePodUID("not-a-uuid-at-all-nope-notthisone!")) + assert.False(t, looksLikePodUID("a1b2c3d4-e5f6-7890-abcd-ef123456789")) // too short last segment + assert.False(t, looksLikePodUID("g1b2c3d4-e5f6-7890-abcd-ef1234567890")) // 'g' is not hex + assert.False(t, looksLikePodUID("a1b2c3d4-e5f6-7890-abcd-ef1234567890x")) // too long +} + +func TestIsValidRDTClass(t *testing.T) { + assert.True(t, isValidRDTClass("BestEffort")) + assert.True(t, isValidRDTClass("Guaranteed")) + assert.True(t, isValidRDTClass("COS1")) + assert.True(t, isValidRDTClass("my-class_v2")) + + assert.False(t, isValidRDTClass("")) + assert.False(t, isValidRDTClass(".")) + assert.False(t, isValidRDTClass("..")) + assert.False(t, isValidRDTClass("../../etc")) + assert.False(t, isValidRDTClass("foo/bar")) + assert.False(t, isValidRDTClass("class\x00name")) +} + +func TestCreateMonGroup_PathTraversal(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + + _, err := r.createMonGroup("../../etc", "a1b2c3d4-e5f6-7890-abcd-ef1234567890") + assert.Error(t, err) + assert.Contains(t, err.Error(), "invalid RDT class") + + _, err = r.createMonGroup("foo/bar", "a1b2c3d4-e5f6-7890-abcd-ef1234567890") + assert.Error(t, err) + assert.Contains(t, err.Error(), "invalid RDT class") + + _, err = r.createMonGroup("..", "a1b2c3d4-e5f6-7890-abcd-ef1234567890") + assert.Error(t, err) + assert.Contains(t, err.Error(), "invalid RDT class") +} diff --git a/cmd/plugins/resctrl-mon/state.go b/cmd/plugins/resctrl-mon/state.go new file mode 100644 index 000000000..47dba1f1a --- /dev/null +++ b/cmd/plugins/resctrl-mon/state.go @@ -0,0 +1,109 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import "sync" + +// podInfo tracks the mon_group directory and container set for a single pod. +type podInfo struct { + monGroupDir string + containers map[string]struct{} // container IDs +} + +// podState tracks all pods with active mon_groups. +type podState struct { + mu sync.Mutex + pods map[string]*podInfo // keyed by pod UID +} + +func newPodState() *podState { + return &podState{ + pods: make(map[string]*podInfo), + } +} + +// addPod registers a new pod with its mon_group directory. +// If the pod already exists, the existing entry is preserved. +func (s *podState) addPod(podUID, monGroupDir string) { + s.mu.Lock() + defer s.mu.Unlock() + if _, ok := s.pods[podUID]; ok { + return + } + s.pods[podUID] = &podInfo{ + monGroupDir: monGroupDir, + containers: make(map[string]struct{}), + } +} + +// addContainer adds a container ID to an existing pod's tracking. +func (s *podState) addContainer(podUID, containerID string) { + s.mu.Lock() + defer s.mu.Unlock() + if info, ok := s.pods[podUID]; ok { + info.containers[containerID] = struct{}{} + } +} + +// removeContainer removes a container ID from a pod's tracking. +func (s *podState) removeContainer(podUID, containerID string) { + s.mu.Lock() + defer s.mu.Unlock() + if info, ok := s.pods[podUID]; ok { + delete(info.containers, containerID) + } +} + +// removePod removes all tracking for a pod. +func (s *podState) removePod(podUID string) { + s.mu.Lock() + defer s.mu.Unlock() + delete(s.pods, podUID) +} + +// getMonGroupDir returns the mon_group directory for a pod, or empty string. +func (s *podState) getMonGroupDir(podUID string) string { + s.mu.Lock() + defer s.mu.Unlock() + if info, ok := s.pods[podUID]; ok { + return info.monGroupDir + } + return "" +} + +// podHasNoContainers returns true if the pod has no remaining containers. +func (s *podState) podHasNoContainers(podUID string) bool { + s.mu.Lock() + defer s.mu.Unlock() + if info, ok := s.pods[podUID]; ok { + return len(info.containers) == 0 + } + return true +} + +// hasPod returns true if the pod UID is being tracked. +func (s *podState) hasPod(podUID string) bool { + s.mu.Lock() + defer s.mu.Unlock() + _, ok := s.pods[podUID] + return ok +} + +// podCount returns the number of tracked pods. +func (s *podState) podCount() int { + s.mu.Lock() + defer s.mu.Unlock() + return len(s.pods) +} diff --git a/deployment/helm/resctrl-mon/.helmignore b/deployment/helm/resctrl-mon/.helmignore new file mode 100644 index 000000000..bf4d580e7 --- /dev/null +++ b/deployment/helm/resctrl-mon/.helmignore @@ -0,0 +1,20 @@ +# Patterns to ignore when building packages. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/deployment/helm/resctrl-mon/Chart.yaml b/deployment/helm/resctrl-mon/Chart.yaml new file mode 100644 index 000000000..f1e3e0d70 --- /dev/null +++ b/deployment/helm/resctrl-mon/Chart.yaml @@ -0,0 +1,11 @@ +apiVersion: v2 +appVersion: unstable +description: | + The resctrl-mon NRI plugin creates per-pod resctrl monitoring groups + (mon_groups) for Application Energy Telemetry via Kepler passive mode. +name: nri-resctrl-mon +sources: + - https://github.com/containers/nri-plugins +home: https://github.com/containers/nri-plugins +type: application +version: v0.0.0 diff --git a/deployment/helm/resctrl-mon/README.md b/deployment/helm/resctrl-mon/README.md new file mode 100644 index 000000000..206e3eced --- /dev/null +++ b/deployment/helm/resctrl-mon/README.md @@ -0,0 +1,121 @@ +# Resctrl-Mon Plugin + +This chart deploys the resctrl-mon Node Resource Interface (NRI) plugin. The +resctrl-mon NRI plugin creates per-pod resctrl monitoring groups (mon_groups) +to support Application Energy Telemetry (AET) via Kepler passive mode. + +## Prerequisites + +- Kubernetes 1.24+ +- Helm 3.0.0+ +- Intel CPU with RDT monitoring support (CMT/MBM and/or AET) +- resctrl filesystem mounted at `/sys/fs/resctrl` +- Container runtime: + - containerd: + - At least [containerd 1.7.0](https://github.com/containerd/containerd/releases/tag/v1.7.0) + release version to use the NRI feature. + + - Enable NRI feature by following + [these](https://github.com/containerd/containerd/blob/main/docs/NRI.md#enabling-nri-support-in-containerd) + detailed instructions. You can optionally enable the NRI in containerd + using the Helm chart during the chart installation simply by setting the + `nri.runtime.patchConfig` parameter. For instance, + + ```sh + helm install my-resctrl-mon nri-plugins/nri-resctrl-mon --set nri.runtime.patchConfig=true --namespace kube-system + ``` + + Enabling `nri.runtime.patchConfig` creates an init container to turn on + NRI feature in containerd and only after that proceed the plugin + installation. + + - CRI-O + - At least [v1.26.0](https://github.com/cri-o/cri-o/releases/tag/v1.26.0) + release version to use the NRI feature + - Enable NRI feature by following + [these](https://github.com/cri-o/cri-o/blob/main/docs/crio.conf.5.md#crionri-table) + detailed instructions. You can optionally enable the NRI in CRI-O using + the Helm chart during the chart installation simply by setting the + `nri.runtime.patchConfig` parameter. For instance, + + ```sh + helm install my-resctrl-mon nri-plugins/nri-resctrl-mon --namespace kube-system --set nri.runtime.patchConfig=true + ``` + +## Installing the Chart + +Path to the chart: `nri-resctrl-mon`. + +```sh +helm repo add nri-plugins https://containers.github.io/nri-plugins +helm install my-resctrl-mon nri-plugins/nri-resctrl-mon --namespace kube-system +``` + +The command above deploys resctrl-mon NRI plugin on the Kubernetes cluster +within the `kube-system` namespace with default configuration. To customize the +available parameters as described in the [Configuration options](#configuration-options) +below, you have two options: you can use the `--set` flag or create a custom +values.yaml file and provide it using the `-f` flag. For example: + +```sh +# Install the resctrl-mon plugin with custom values provided using the --set option +helm install my-resctrl-mon nri-plugins/nri-resctrl-mon --namespace kube-system --set nri.runtime.patchConfig=true +``` + +```sh +# Install the resctrl-mon plugin with custom values specified in a custom values.yaml file +cat < myPath/values.yaml +nri: + runtime: + patchConfig: true + plugin: + index: 90 + +tolerations: +- key: "node-role.kubernetes.io/control-plane" + operator: "Exists" + effect: "NoSchedule" +EOF + +helm install my-resctrl-mon nri-plugins/nri-resctrl-mon --namespace kube-system -f myPath/values.yaml +``` + +## Uninstalling the Chart + +To uninstall the resctrl-mon plugin run the following command: + +```sh +helm delete my-resctrl-mon --namespace kube-system +``` + +## Security + +The DaemonSet runs with `hostPID: true` because the plugin must write +host-namespace PIDs into resctrl `tasks` files. Without host PID +visibility the kernel rejects the write (`ESRCH`). The container also +requires `SYS_ADMIN` and `DAC_OVERRIDE` capabilities to manage resctrl +`mon_group` directories. + +## Configuration options + +The tables below present an overview of the parameters available for users to +customize with their own values, along with the default values. + +| Name | Default | Description | +| ------------------------ | ----------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------- | +| `image.name` | [ghcr.io/containers/nri-plugins/nri-resctrl-mon](https://ghcr.io/containers/nri-plugins/nri-resctrl-mon) | container image name | +| `image.tag` | unstable | container image tag | +| `image.pullPolicy` | Always | image pull policy | +| `resources.cpu` | 10m | cpu resources for the Pod | +| `resources.memory` | 50Mi | memory quota for the Pod | +| `nri.runtime.config.pluginRegistrationTimeout` | "" | set NRI plugin registration timeout in NRI config of containerd or CRI-O | +| `nri.runtime.config.pluginRequestTimeout` | "" | set NRI plugin request timeout in NRI config of containerd or CRI-O | +| `nri.runtime.patchConfig` | false | patch NRI configuration in containerd or CRI-O | +| `nri.plugin.index` | 90 | NRI plugin index to register with | +| `initContainerImage.name` | [ghcr.io/containers/nri-plugins/nri-config-manager](https://ghcr.io/containers/nri-plugins/nri-config-manager) | init container image name | +| `initContainerImage.tag` | unstable | init container image tag | +| `initContainerImage.pullPolicy` | Always | init container image pull policy | +| `tolerations` | [] | specify taint toleration key, operator and effect | +| `affinity` | [] | specify node affinity | +| `nodeSelector` | [] | specify node selector labels | +| `podPriorityClassNodeCritical` | true | enable [marking Pod as node critical](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/#marking-pod-as-critical) | diff --git a/deployment/helm/resctrl-mon/templates/_helpers.tpl b/deployment/helm/resctrl-mon/templates/_helpers.tpl new file mode 100644 index 000000000..9b4239372 --- /dev/null +++ b/deployment/helm/resctrl-mon/templates/_helpers.tpl @@ -0,0 +1,16 @@ +{{/* +Common labels +*/}} +{{- define "nri-plugin.labels" -}} +helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{ include "nri-plugin.selectorLabels" . }} +{{- end -}} + +{{/* +Selector labels +*/}} +{{- define "nri-plugin.selectorLabels" -}} +app.kubernetes.io/name: nri-resctrl-mon +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} diff --git a/deployment/helm/resctrl-mon/templates/configmap.yaml b/deployment/helm/resctrl-mon/templates/configmap.yaml new file mode 100644 index 000000000..3ad305503 --- /dev/null +++ b/deployment/helm/resctrl-mon/templates/configmap.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: nri-resctrl-mon-config.default + namespace: {{ .Release.Namespace }} + labels: + {{- include "nri-plugin.labels" . | nindent 4 }} +data: + config.yaml: | + resctrlPath: /sys/fs/resctrl + namespaces: [] + labelSelector: {} diff --git a/deployment/helm/resctrl-mon/templates/daemonset.yaml b/deployment/helm/resctrl-mon/templates/daemonset.yaml new file mode 100644 index 000000000..7b3dd4ead --- /dev/null +++ b/deployment/helm/resctrl-mon/templates/daemonset.yaml @@ -0,0 +1,111 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + labels: + {{- include "nri-plugin.labels" . | nindent 4 }} + name: nri-resctrl-mon + namespace: {{ .Release.Namespace }} +spec: + selector: + matchLabels: + {{- include "nri-plugin.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "nri-plugin.labels" . | nindent 8 }} + spec: + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + hostPID: true + nodeSelector: + kubernetes.io/os: "linux" + {{- with .Values.nodeSelector }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.nri.runtime.patchConfig }} + initContainers: + - name: patch-runtime + {{- if (not (or (eq .Values.nri.runtime.config nil) (eq .Values.nri.runtime.config.pluginRegistrationTimeout ""))) }} + args: + - -nri-plugin-registration-timeout + - {{ .Values.nri.runtime.config.pluginRegistrationTimeout }} + - -nri-plugin-request-timeout + - {{ .Values.nri.runtime.config.pluginRequestTimeout }} + {{- end }} + image: {{ .Values.initContainerImage.name }}:{{ .Values.initContainerImage.tag | default .Chart.AppVersion }} + imagePullPolicy: {{ .Values.initContainerImage.pullPolicy }} + volumeMounts: + - name: containerd-config + mountPath: /etc/containerd + - name: crio-config + mountPath: /etc/crio/crio.conf.d + - name: dbus-socket + mountPath: /var/run/dbus/system_bus_socket + securityContext: + privileged: true + {{- end }} + containers: + - name: nri-resctrl-mon + command: + - nri-resctrl-mon + - --idx + - "{{ .Values.nri.plugin.index | int | printf "%02d" }}" + - --config + - /etc/nri/resctrl-mon/config.yaml + - -v + image: {{ .Values.image.name }}:{{ .Values.image.tag | default .Chart.AppVersion }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + resources: + requests: + cpu: {{ .Values.resources.cpu }} + memory: {{ .Values.resources.memory }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + add: + - SYS_ADMIN + - DAC_OVERRIDE + volumeMounts: + - name: resctrl-mon-config-vol + mountPath: /etc/nri/resctrl-mon + - name: nrisockets + mountPath: /var/run/nri + - name: resctrlfs + mountPath: /sys/fs/resctrl + {{- if .Values.podPriorityClassNodeCritical }} + priorityClassName: system-node-critical + {{- end }} + volumes: + - name: resctrl-mon-config-vol + configMap: + name: nri-resctrl-mon-config.default + - name: nrisockets + hostPath: + path: /var/run/nri + type: DirectoryOrCreate + - name: resctrlfs + hostPath: + path: /sys/fs/resctrl + type: Directory + {{- if .Values.nri.runtime.patchConfig }} + - name: containerd-config + hostPath: + path: /etc/containerd/ + type: DirectoryOrCreate + - name: crio-config + hostPath: + path: /etc/crio/crio.conf.d/ + type: DirectoryOrCreate + - name: dbus-socket + hostPath: + path: /var/run/dbus/system_bus_socket + type: Socket + {{- end }} diff --git a/deployment/helm/resctrl-mon/values.schema.json b/deployment/helm/resctrl-mon/values.schema.json new file mode 100644 index 000000000..07514a2c3 --- /dev/null +++ b/deployment/helm/resctrl-mon/values.schema.json @@ -0,0 +1,117 @@ +{ + "$schema": "http://json-schema.org/schema#", + "required": [ + "image", + "resources" + ], + "properties": { + "image": { + "type": "object", + "required": [ + "name", + "pullPolicy" + ], + "properties": { + "name": { + "type": "string" + }, + "tag": { + "type": "string" + }, + "pullPolicy": { + "type": "string", + "enum": ["Never", "Always", "IfNotPresent"] + } + } + }, + "initContainerImage": { + "type": "object", + "required": [ + "name", + "pullPolicy" + ], + "properties": { + "name": { + "type": "string" + }, + "tag": { + "type": "string" + }, + "pullPolicy": { + "type": "string", + "enum": ["Never", "Always", "IfNotPresent"] + } + } + }, + "resources": { + "type": "object", + "required": [ + "cpu", + "memory" + ], + "properties": { + "cpu": { + "type": "string" + }, + "memory": { + "type": "string" + } + } + }, + "nri": { + "type": "object", + "required": [ + "plugin", + "runtime" + ], + "properties": { + "plugin": { + "type": "object", + "required": [ + "index" + ], + "properties": { + "index": { + "type": "integer", + "minimum": 0, + "maximum": 99 + } + } + }, + "runtime": { + "type": "object", + "required": [ + "patchConfig" + ], + "properties": { + "patchConfig": { + "type": "boolean" + }, + "config": { + "type": "object", + "required": [ + "pluginRegistrationTimeout", + "pluginRequestTimeout" + ], + "properties": { + "pluginRegistrationTimeout": { + "type": "string", + "$comment": "allowed range is 5-30s", + "pattern": "^(([5-9])|([1-2][0-9])|(30))s$" + }, + "pluginRequestTimeout": { + "type": "string", + "$comment": "allowed range is 2-30s", + "pattern": "^(([2-9])|([1-2][0-9])|(30))s$" + } + } + } + } + } + } + }, + "podPriorityClassNodeCritical": { + "type": "boolean" + } + } +} diff --git a/deployment/helm/resctrl-mon/values.yaml b/deployment/helm/resctrl-mon/values.yaml new file mode 100644 index 000000000..58e0915bd --- /dev/null +++ b/deployment/helm/resctrl-mon/values.yaml @@ -0,0 +1,66 @@ +# Default values for resctrl-mon. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. +--- +image: + name: ghcr.io/containers/nri-plugins/nri-resctrl-mon + # tag, if defined will use the given image tag, otherwise Chart.AppVersion will be used + #tag: unstable + pullPolicy: Always + +resources: + cpu: 10m + memory: 50Mi + +nri: + plugin: + index: 90 + runtime: + patchConfig: false +# config: +# pluginRegistrationTimeout: 5s +# pluginRequestTimeout: 2s + +initContainerImage: + name: ghcr.io/containers/nri-plugins/nri-config-manager + # If not defined Chart.AppVersion will be used + #tag: unstable + pullPolicy: Always + +tolerations: [] +# +# Example: +# +# tolerations: +# - key: "node-role.kubernetes.io/control-plane" +# operator: "Exists" +# effect: "NoSchedule" + +affinity: [] +# +# Example: +# +# affinity: +# nodeAffinity: +# requiredDuringSchedulingIgnoredDuringExecution: +# nodeSelectorTerms: +# - matchExpressions: +# - key: feature.node.kubernetes.io/cpu-rdt.mon +# operator: In +# values: +# - "true" + +nodeSelector: [] +# +# Example: +# +# nodeSelector: +# feature.node.kubernetes.io/cpu-rdt.mon: "true" + +# NRI plugins should be considered as part of the container runtime. +# By default we make them part of the system-node-critical priority +# class. This should mitigate the potential risk of a plugin getting +# evicted under heavy system load. It should also ensure that during +# autoscaling enough new nodes are brought up to leave room for the +# plugin on each new node. +podPriorityClassNodeCritical: true diff --git a/docs/deployment/helm/index.md b/docs/deployment/helm/index.md index 183dacfe0..d34d9d84e 100644 --- a/docs/deployment/helm/index.md +++ b/docs/deployment/helm/index.md @@ -52,5 +52,6 @@ template.md memory-qos.md memtierd.md sgx-epc.md +resctrl-mon.md resource-annotator.md ``` diff --git a/docs/deployment/helm/resctrl-mon.md b/docs/deployment/helm/resctrl-mon.md new file mode 100644 index 000000000..8e87b8257 --- /dev/null +++ b/docs/deployment/helm/resctrl-mon.md @@ -0,0 +1,2 @@ +```{include} ../../../deployment/helm/resctrl-mon/README.md +``` diff --git a/docs/index.md b/docs/index.md index d6e4a7a36..dcb17f837 100644 --- a/docs/index.md +++ b/docs/index.md @@ -8,6 +8,7 @@ caption: Contents introduction.md resource-policy/index.rst memory/index.md +monitoring/index.md deployment/index.md contributing.md Project GitHub repository diff --git a/docs/monitoring/index.md b/docs/monitoring/index.md new file mode 100644 index 000000000..33ef9f208 --- /dev/null +++ b/docs/monitoring/index.md @@ -0,0 +1,9 @@ +# Monitoring plugins + +```{toctree} +--- +maxdepth: 2 +caption: Contents +--- +resctrl-mon.md +``` diff --git a/docs/monitoring/resctrl-mon.md b/docs/monitoring/resctrl-mon.md new file mode 100644 index 000000000..09eebf2d6 --- /dev/null +++ b/docs/monitoring/resctrl-mon.md @@ -0,0 +1,160 @@ +# Resctrl-Mon NRI Plugin + +The resctrl-mon NRI plugin creates per-pod resctrl monitoring groups +(`mon_groups`) to support Kepler's passive mode for Application Energy +Telemetry (AET). + +When a container is created, the plugin assigns its init process to a +`mon_group` before the process starts executing. The Linux kernel then +propagates the RMID (Resource Monitoring ID) to all child processes +automatically, eliminating the fork race that affects userspace-based +approaches. + +## How It Works + +1. The container runtime creates a container process (paused). +2. The NRI `PostCreateContainer` hook fires. +3. The plugin creates a `mon_group` named with the pod's UUID under + the appropriate resctrl control group. +4. The NRI `StartContainer` hook fires with the container's init PID. +5. The plugin writes the init PID to the `mon_group`'s `tasks` file. + (If the PID is not yet available, `PostStartContainer` retries.) +6. The runtime starts the container. All child processes inherit the RMID. +7. Kepler scans the resctrl filesystem and reads monitoring data. +8. When the last container in a pod stops, the plugin removes the `mon_group`. + +The plugin DaemonSet runs with `hostPID: true` so that it can write +host-namespace PIDs to the resctrl `tasks` file. Without `hostPID`, +the kernel rejects the write with `ESRCH` because the PID does not +exist in the plugin's PID namespace. + +## Mon_Group Naming + +Mon_groups are named with the Kubernetes pod UID: + +``` +/sys/fs/resctrl/[/]mon_groups// +``` + +This enables Kepler to correlate monitoring data with Kubernetes metadata +by querying the K8s API using the pod UID extracted from the directory name. + +## Plugin Configuration + +Configuration is loaded from a YAML file specified with the `-config` flag +or pushed by the container runtime via NRI. + +```yaml +# Path to the resctrl filesystem. Override for testing. +resctrlPath: /sys/fs/resctrl + +# Namespace filter: only create mon_groups for pods in these namespaces. +# Empty list = all namespaces. +namespaces: [] + +# Pod label selector: only create mon_groups for pods matching these labels. +# Empty = all pods. +labelSelector: {} +``` + +## Coexistence with Allocation Plugins + +If an NRI resource allocation plugin (balloons, topology-aware) is running, +it assigns containers to RDT classes via `SetLinuxRDTClass`. The resctrl-mon +plugin reads the effective RDT class from the NRI container spec and creates +`mon_groups` under the corresponding control group: + +``` +/sys/fs/resctrl//mon_groups// +``` + +The container keeps its CLOSID (allocation) and gets a distinct RMID +(monitoring). If no allocation plugin is active, `mon_groups` are created +under the root resctrl directory. + +## RMID Management + +RMID allocation is delegated entirely to the Linux kernel: + +- **Allocation**: `mkdir` on a `mon_group` directory assigns an RMID. If + none are available, the kernel returns `ENOSPC` and the plugin logs a + warning and skips the pod. +- **Deallocation**: `rmdir` releases the RMID. The kernel handles the + hardware recycling window. + +## Developer's Guide + +### Prerequisites + +- Containerd v1.7+ or CRI-O v1.26+ +- Enable NRI in /etc/containerd/config.toml: + + ```toml + [plugins."io.containerd.nri.v1.nri"] + disable = false + disable_connections = false + plugin_config_path = "/etc/nri/conf.d" + plugin_path = "/opt/nri/plugins" + plugin_registration_timeout = "5s" + plugin_request_timeout = "2s" + socket_path = "/var/run/nri/nri.sock" + ``` + +- Intel CPU with RDT monitoring support +- resctrl filesystem mounted at `/sys/fs/resctrl` + +### Build + +```bash +make PLUGINS=nri-resctrl-mon build-plugins +``` + +### Run + +```bash +./build/bin/nri-resctrl-mon -config sample-configs/nri-resctrl-mon.yaml -idx 90 -vv +``` + +### Manual Test + +Verify that `mon_groups` are created when pods start: + +```bash +# Start a test pod +kubectl run test-pod --image=busybox -- sleep 3600 + +# Check that a mon_group was created with the pod UID +POD_UID=$(kubectl get pod test-pod -o jsonpath='{.metadata.uid}') + +# Without an RDT allocation plugin, mon_groups are under the root class: +MON_GROUP_BASE=/sys/fs/resctrl/mon_groups +# With an allocation plugin that assigns an RDT class (e.g. BestEffort): +# MON_GROUP_BASE=/sys/fs/resctrl/BestEffort/mon_groups + +ls "$MON_GROUP_BASE/$POD_UID/" + +# Verify monitoring data is available +cat "$MON_GROUP_BASE/$POD_UID/mon_data/mon_L3_00/llc_occupancy" +``` + +### Debug + +```bash +go install github.com/go-delve/delve/cmd/dlv@latest +dlv exec build/bin/nri-resctrl-mon -- -config sample-configs/nri-resctrl-mon.yaml -idx 90 +(dlv) break plugin.PostCreateContainer +(dlv) continue +``` + +### Deploy + +Build an image, import it on the node, and deploy the plugin by +running the following in `nri-plugins`: + +```bash +rm -rf build +make clean +make PLUGINS=nri-resctrl-mon IMAGE_VERSION=devel images +ctr -n k8s.io images import build/images/nri-resctrl-mon-image-*.tar +kubectl create -f build/images/nri-resctrl-mon-deployment.yaml +``` diff --git a/sample-configs/nri-resctrl-mon.yaml b/sample-configs/nri-resctrl-mon.yaml new file mode 100644 index 000000000..9cbd339a0 --- /dev/null +++ b/sample-configs/nri-resctrl-mon.yaml @@ -0,0 +1,3 @@ +resctrlPath: /sys/fs/resctrl +namespaces: [] +labelSelector: {} From 491e45c107b879dd14a319bc31b091a4a4de12e6 Mon Sep 17 00:00:00 2001 From: "Christopher M. Cantalupo" Date: Mon, 27 Apr 2026 10:07:28 -0700 Subject: [PATCH 2/2] resctrl-mon: add counter snapshots that mirror the resctrl interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Record initial and final resctrl counter values so that consumers (e.g. Kepler) can account for energy consumed between the last scrape and mon_group removal, and between mon_group creation and the first scrape. The snapshot directory mirrors the resctrl mon_data layout: /run/nri-resctrl-mon// created_at completed_at mon_data/mon_PERF_PKG_00/ core_energy → /sys/fs/resctrl/.../core_energy core_energy.begin core_energy.end On first PID write (StartContainer / PostStartContainer / Synchronize), the plugin reads all mon_data event files and writes .begin counter files alongside symlinks to the live resctrl event files. On the last container stop (StopContainer), the counters are read again as .end files. Consumers compute lifetime totals as: end - begin. This format lets existing resctrl consumers discover and use the snapshot data with no new parsing logic — the same cat/read operations work on .begin/.end files as on kernel event files. If the snapshot directory is absent, consumers fall back to direct resctrl scraping. Completed snapshot directories are pruned after a configurable TTL (default 5m). New configuration keys: snapshotDir, snapshotTTL. Signed-off-by: Christopher M. Cantalupo Signed-off-by: Jedrzej Wasiukiewicz --- cmd/plugins/resctrl-mon/plugin.go | 112 +++++++- cmd/plugins/resctrl-mon/plugin_test.go | 108 ++++++- cmd/plugins/resctrl-mon/resctrl.go | 52 ++++ cmd/plugins/resctrl-mon/resctrl_test.go | 41 +++ cmd/plugins/resctrl-mon/snapshot.go | 201 +++++++++++++ cmd/plugins/resctrl-mon/snapshot_test.go | 266 ++++++++++++++++++ cmd/plugins/resctrl-mon/state.go | 24 +- deployment/helm/resctrl-mon/README.md | 2 + .../helm/resctrl-mon/templates/daemonset.yaml | 6 + .../helm/resctrl-mon/values.schema.json | 11 + deployment/helm/resctrl-mon/values.yaml | 3 + docs/monitoring/resctrl-mon.md | 103 +++++++ sample-configs/nri-resctrl-mon.yaml | 2 + 13 files changed, 917 insertions(+), 14 deletions(-) create mode 100644 cmd/plugins/resctrl-mon/snapshot.go create mode 100644 cmd/plugins/resctrl-mon/snapshot_test.go diff --git a/cmd/plugins/resctrl-mon/plugin.go b/cmd/plugins/resctrl-mon/plugin.go index 80c33f19c..3d171c1bc 100644 --- a/cmd/plugins/resctrl-mon/plugin.go +++ b/cmd/plugins/resctrl-mon/plugin.go @@ -19,6 +19,8 @@ import ( "fmt" "os" "path/filepath" + "strings" + "time" "sigs.k8s.io/yaml" @@ -28,12 +30,18 @@ import ( // plugin implements the NRI plugin interface for resctrl monitoring groups. type plugin struct { - stub stub.Stub - config *pluginConfig - state *podState - rdt *resctrlOps + stub stub.Stub + config *pluginConfig + state *podState + rdt *resctrlOps + snapshots *snapshotStore } +const ( + defaultSnapshotDir = "/run/nri-resctrl-mon" + defaultSnapshotTTL = 5 * time.Minute +) + // pluginConfig holds the runtime configuration for the plugin. type pluginConfig struct { // ResctrlPath is the mount point of the resctrl filesystem. @@ -46,16 +54,32 @@ type pluginConfig struct { // LabelSelector filters mon_group creation to pods matching these labels. // Empty map means all pods. LabelSelector map[string]string `json:"labelSelector"` + + // SnapshotDir is the directory for counter snapshot files. + // Default: /run/nri-resctrl-mon + SnapshotDir string `json:"snapshotDir"` + + // SnapshotTTL is how long completed snapshots are kept before pruning. + // Parsed as a Go duration string (e.g., "5m", "1h"). + // Default: 5m + SnapshotTTL string `json:"snapshotTTL"` } func newPlugin() *plugin { cfg := &pluginConfig{ ResctrlPath: defaultResctrlPath, + SnapshotDir: defaultSnapshotDir, + SnapshotTTL: defaultSnapshotTTL.String(), + } + snap, err := newSnapshotStore(cfg.SnapshotDir, defaultSnapshotTTL) + if err != nil { + log.Warnf("failed to create snapshot store at %s: %v (snapshots disabled)", cfg.SnapshotDir, err) } return &plugin{ - config: cfg, - state: newPodState(), - rdt: newResctrlOps(cfg.ResctrlPath), + config: cfg, + state: newPodState(), + rdt: newResctrlOps(cfg.ResctrlPath), + snapshots: snap, } } @@ -82,6 +106,8 @@ func (p *plugin) setConfig(data []byte) error { log.Tracef("setConfig: parsing\n---8<---\n%s\n--->8---", data) cfg := pluginConfig{ ResctrlPath: defaultResctrlPath, + SnapshotDir: defaultSnapshotDir, + SnapshotTTL: defaultSnapshotTTL.String(), } if err := yaml.Unmarshal(data, &cfg); err != nil { return fmt.Errorf("setConfig: cannot parse configuration: %w", err) @@ -91,10 +117,37 @@ func (p *plugin) setConfig(data []byte) error { return fmt.Errorf("setConfig: resctrlPath must be an absolute path, got %q", cfg.ResctrlPath) } cfg.ResctrlPath = resctrlPath + + snapshotDir := filepath.Clean(cfg.SnapshotDir) + if snapshotDir == "" || !filepath.IsAbs(snapshotDir) { + return fmt.Errorf("setConfig: snapshotDir must be an absolute path, got %q", cfg.SnapshotDir) + } + // Prevent writing snapshots into the resctrl filesystem or sensitive system dirs. + for _, forbidden := range []string{resctrlPath, "/sys", "/proc", "/dev", "/etc"} { + if snapshotDir == forbidden || strings.HasPrefix(snapshotDir, forbidden+"/") { + return fmt.Errorf("setConfig: snapshotDir must not be under %s, got %q", forbidden, snapshotDir) + } + } + cfg.SnapshotDir = snapshotDir + + ttl, err := time.ParseDuration(cfg.SnapshotTTL) + if err != nil { + return fmt.Errorf("setConfig: invalid snapshotTTL %q: %w", cfg.SnapshotTTL, err) + } + p.config = &cfg p.rdt = newResctrlOps(cfg.ResctrlPath) - log.Debugf("configuration: resctrlPath=%s namespaces=%v labelSelector=%v", - cfg.ResctrlPath, cfg.Namespaces, cfg.LabelSelector) + + snap, err := newSnapshotStore(cfg.SnapshotDir, ttl) + if err != nil { + log.Warnf("setConfig: failed to create snapshot store at %s: %v (snapshots disabled)", cfg.SnapshotDir, err) + p.snapshots = nil + } else { + p.snapshots = snap + } + + log.Debugf("configuration: resctrlPath=%s namespaces=%v labelSelector=%v snapshotDir=%s snapshotTTL=%s", + cfg.ResctrlPath, cfg.Namespaces, cfg.LabelSelector, cfg.SnapshotDir, cfg.SnapshotTTL) return nil } @@ -134,6 +187,7 @@ func (p *plugin) Synchronize(ctx context.Context, pods []*api.PodSandbox, contai log.Warnf("Synchronize: failed to write PID %d for pod %s: %v", pid, podUID, err) } else { log.Debugf("Synchronize: assigned pid %d for pod %s", pid, podUID) + p.takeInitialSnapshot(podUID, monGroupDir) } } } @@ -201,6 +255,7 @@ func (p *plugin) StartContainer(ctx context.Context, pod *api.PodSandbox, ctr *a log.Warnf("StartContainer %s: failed to write PID %d to tasks: %v", ctrName, pid, err) } else { log.Infof("StartContainer %s: assigned pid %d to mon_group %s (pre-start, no threads yet)", ctrName, pid, monGroupDir) + p.takeInitialSnapshot(podUID, monGroupDir) } } else { log.Warnf("StartContainer %s: PID not available at pre-start, will retry in PostStartContainer", ctrName) @@ -235,6 +290,7 @@ func (p *plugin) PostStartContainer(ctx context.Context, pod *api.PodSandbox, ct log.Warnf("PostStartContainer %s: failed to write PID %d to tasks: %v", ctrName, pid, err) } else { log.Infof("PostStartContainer %s: fallback assigned pid %d to mon_group %s", ctrName, pid, monGroupDir) + p.takeInitialSnapshot(podUID, monGroupDir) } } else { log.Warnf("PostStartContainer %s: PID still 0 after start, unexpected", ctrName) @@ -259,6 +315,7 @@ func (p *plugin) StopContainer(ctx context.Context, pod *api.PodSandbox, ctr *ap if p.state.podHasNoContainers(podUID) { log.Infof("StopContainer %s: last container, removing mon_group %s", ctrName, monGroupDir) + p.takeFinalSnapshot(podUID, monGroupDir) if err := p.rdt.removeMonGroup(monGroupDir); err != nil { log.Warnf("StopContainer %s: failed to remove mon_group, will retry on next sync: %v", ctrName, err) return nil, nil @@ -339,3 +396,40 @@ func getRDTClass(ctr *api.Container) string { func pprintCtr(pod *api.PodSandbox, ctr *api.Container) string { return fmt.Sprintf("%s/%s:%s", pod.GetNamespace(), pod.GetName(), ctr.GetName()) } + +// takeInitialSnapshot records the initial counter values for a pod's mon_group. +// It is called once per pod, after the first successful PID write. +func (p *plugin) takeInitialSnapshot(podUID, monGroupDir string) { + if p.snapshots == nil || p.state.isInitialSnapshotDone(podUID) { + return + } + counters, err := p.rdt.readMonData(monGroupDir) + if err != nil { + log.Warnf("takeInitialSnapshot: failed to read mon_data for pod %s: %v", podUID, err) + return + } + if err := p.snapshots.writeInitial(podUID, monGroupDir, counters); err != nil { + log.Warnf("takeInitialSnapshot: failed to write snapshot for pod %s: %v", podUID, err) + return + } + p.state.setInitialSnapshotDone(podUID) + log.Infof("takeInitialSnapshot: recorded initial counters for pod %s", podUID) +} + +// takeFinalSnapshot records the final counter values before a mon_group is removed. +func (p *plugin) takeFinalSnapshot(podUID, monGroupDir string) { + if p.snapshots == nil { + return + } + counters, err := p.rdt.readMonData(monGroupDir) + if err != nil { + log.Warnf("takeFinalSnapshot: failed to read mon_data for pod %s: %v", podUID, err) + return + } + if err := p.snapshots.writeFinal(podUID, counters); err != nil { + log.Warnf("takeFinalSnapshot: failed to write snapshot for pod %s: %v", podUID, err) + return + } + log.Infof("takeFinalSnapshot: recorded final counters for pod %s", podUID) + p.snapshots.pruneCompleted() +} diff --git a/cmd/plugins/resctrl-mon/plugin_test.go b/cmd/plugins/resctrl-mon/plugin_test.go index e29ba24a5..9863d1ed0 100644 --- a/cmd/plugins/resctrl-mon/plugin_test.go +++ b/cmd/plugins/resctrl-mon/plugin_test.go @@ -18,7 +18,10 @@ import ( "context" "os" "path/filepath" + "strconv" + "strings" "testing" + "time" "github.com/containerd/nri/pkg/api" "github.com/sirupsen/logrus" @@ -34,11 +37,15 @@ func init() { func newTestPlugin(resctrlPath string) *plugin { cfg := &pluginConfig{ ResctrlPath: resctrlPath, + SnapshotDir: filepath.Join(resctrlPath, "_snapshots"), + SnapshotTTL: "5m", } + snap, _ := newSnapshotStore(cfg.SnapshotDir, 5*time.Minute) return &plugin{ - config: cfg, - state: newPodState(), - rdt: newResctrlOps(resctrlPath), + config: cfg, + state: newPodState(), + rdt: newResctrlOps(resctrlPath), + snapshots: snap, } } @@ -272,3 +279,98 @@ func TestEnsureMonGroup_InvalidUID(t *testing.T) { assert.Equal(t, 0, p.state.podCount()) } + +func TestStartContainer_CreatesSnapshot(t *testing.T) { + tmpDir := t.TempDir() + p := newTestPlugin(tmpDir) + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + + pod := makePod(podUID, "default", "snap-pod") + ctr := makeContainer("c1", "container1", pod.GetId(), 12345, "") + + // Create the mon_group and fake mon_data so readMonData succeeds. + err := p.ensureMonGroup(podUID, "c1", "") + require.NoError(t, err) + + monDir := p.state.getMonGroupDir(podUID) + monData := filepath.Join(monDir, "mon_data", "mon_PERF_PKG_00") + require.NoError(t, os.MkdirAll(monData, 0755)) + require.NoError(t, os.WriteFile(filepath.Join(monData, "core_energy"), []byte("10.5\n"), 0644)) + // Create a tasks file so writeTaskPID succeeds in the test sandbox. + require.NoError(t, os.WriteFile(filepath.Join(monDir, "tasks"), nil, 0644)) + + // StartContainer triggers takeInitialSnapshot. + err = p.StartContainer(context.Background(), pod, ctr) + require.NoError(t, err) + + // Verify snapshot directory with .begin files and symlink. + snapDir := filepath.Join(p.snapshots.dir, podUID) + beginFile := filepath.Join(snapDir, "mon_data", "mon_PERF_PKG_00", "core_energy.begin") + data, err := os.ReadFile(beginFile) + require.NoError(t, err) + val, err := strconv.ParseFloat(strings.TrimSpace(string(data)), 64) + require.NoError(t, err) + assert.Equal(t, 10.5, val) + + // Verify symlink exists. + linkPath := filepath.Join(snapDir, "mon_data", "mon_PERF_PKG_00", "core_energy") + _, err = os.Readlink(linkPath) + assert.NoError(t, err) + + // Verify created_at exists, completed_at does not. + _, err = os.Stat(filepath.Join(snapDir, "created_at")) + assert.NoError(t, err) + _, err = os.Stat(filepath.Join(snapDir, "completed_at")) + assert.True(t, os.IsNotExist(err)) +} + +func TestStopContainer_WritesFinalSnapshot(t *testing.T) { + tmpDir := t.TempDir() + p := newTestPlugin(tmpDir) + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + + pod := makePod(podUID, "default", "snap-pod") + ctr := makeContainer("c1", "container1", pod.GetId(), 12345, "") + + // Set up mon_group + fake mon_data. + err := p.ensureMonGroup(podUID, "c1", "") + require.NoError(t, err) + + monDir := p.state.getMonGroupDir(podUID) + monData := filepath.Join(monDir, "mon_data", "mon_PERF_PKG_00") + require.NoError(t, os.MkdirAll(monData, 0755)) + require.NoError(t, os.WriteFile(filepath.Join(monData, "core_energy"), []byte("10.5\n"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(monDir, "tasks"), nil, 0644)) + + // Take initial snapshot. + err = p.StartContainer(context.Background(), pod, ctr) + require.NoError(t, err) + + // Update the counter value for final read. + require.NoError(t, os.WriteFile(filepath.Join(monData, "core_energy"), []byte("99.9\n"), 0644)) + + // StopContainer triggers takeFinalSnapshot. + _, err = p.StopContainer(context.Background(), pod, ctr) + require.NoError(t, err) + + // Verify .begin and .end files. + snapDir := filepath.Join(p.snapshots.dir, podUID) + beginFile := filepath.Join(snapDir, "mon_data", "mon_PERF_PKG_00", "core_energy.begin") + endFile := filepath.Join(snapDir, "mon_data", "mon_PERF_PKG_00", "core_energy.end") + + beginData, err := os.ReadFile(beginFile) + require.NoError(t, err) + beginVal, err := strconv.ParseFloat(strings.TrimSpace(string(beginData)), 64) + require.NoError(t, err) + assert.Equal(t, 10.5, beginVal) + + endData, err := os.ReadFile(endFile) + require.NoError(t, err) + endVal, err := strconv.ParseFloat(strings.TrimSpace(string(endData)), 64) + require.NoError(t, err) + assert.Equal(t, 99.9, endVal) + + // Verify completed_at exists. + _, err = os.Stat(filepath.Join(snapDir, "completed_at")) + assert.NoError(t, err) +} diff --git a/cmd/plugins/resctrl-mon/resctrl.go b/cmd/plugins/resctrl-mon/resctrl.go index cea0a0c16..985ff4fa0 100644 --- a/cmd/plugins/resctrl-mon/resctrl.go +++ b/cmd/plugins/resctrl-mon/resctrl.go @@ -117,6 +117,58 @@ func (r *resctrlOps) writeTaskPID(monGroupDir string, pid int) error { return nil } +// readMonData reads all monitoring event files from a mon_group's mon_data/ +// directory. It returns a nested map: domain → event → value. +// +// For example, a mon_group with PERF_PKG and L3 monitoring will return: +// +// { +// "mon_PERF_PKG_00": {"activity": 1234, "core_energy": 42.123}, +// "mon_L3_00": {"llc_occupancy": 12345, "mbm_local_bytes": 0, "mbm_total_bytes": 67890}, +// } +func (r *resctrlOps) readMonData(monGroupDir string) (map[string]map[string]float64, error) { + monDataDir := filepath.Join(monGroupDir, "mon_data") + domains, err := os.ReadDir(monDataDir) + if err != nil { + return nil, fmt.Errorf("failed to read mon_data directory %s: %w", monDataDir, err) + } + + result := make(map[string]map[string]float64) + for _, domain := range domains { + if !domain.IsDir() || !strings.HasPrefix(domain.Name(), "mon_") { + continue + } + domainDir := filepath.Join(monDataDir, domain.Name()) + events, err := os.ReadDir(domainDir) + if err != nil { + log.Warnf("readMonData: failed to read domain directory %s: %v", domainDir, err) + continue + } + domainMap := make(map[string]float64) + for _, event := range events { + if event.IsDir() || event.Name() == "tasks" { + continue + } + eventFile := filepath.Join(domainDir, event.Name()) + data, err := os.ReadFile(eventFile) + if err != nil { + log.Warnf("readMonData: failed to read %s: %v", eventFile, err) + continue + } + val, err := strconv.ParseFloat(strings.TrimSpace(string(data)), 64) + if err != nil { + log.Warnf("readMonData: failed to parse %s value %q: %v", eventFile, string(data), err) + continue + } + domainMap[event.Name()] = val + } + if len(domainMap) > 0 { + result[domain.Name()] = domainMap + } + } + return result, nil +} + // cleanOrphanedMonGroups removes mon_group directories that are not tracked // in the given state. This handles cleanup after a plugin crash/restart. func (r *resctrlOps) cleanOrphanedMonGroups(state *podState) { diff --git a/cmd/plugins/resctrl-mon/resctrl_test.go b/cmd/plugins/resctrl-mon/resctrl_test.go index 9d2dbedb1..760a6f39e 100644 --- a/cmd/plugins/resctrl-mon/resctrl_test.go +++ b/cmd/plugins/resctrl-mon/resctrl_test.go @@ -234,3 +234,44 @@ func TestCreateMonGroup_PathTraversal(t *testing.T) { assert.Error(t, err) assert.Contains(t, err.Error(), "invalid RDT class") } + +func TestReadMonData(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + + // Create a fake mon_group with mon_data directories. + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + monGroupDir, err := r.createMonGroup("", podUID) + require.NoError(t, err) + + monData := filepath.Join(monGroupDir, "mon_data") + require.NoError(t, os.MkdirAll(filepath.Join(monData, "mon_PERF_PKG_00"), 0755)) + require.NoError(t, os.MkdirAll(filepath.Join(monData, "mon_PERF_PKG_01"), 0755)) + require.NoError(t, os.MkdirAll(filepath.Join(monData, "mon_L3_00"), 0755)) + + require.NoError(t, os.WriteFile(filepath.Join(monData, "mon_PERF_PKG_00", "core_energy"), []byte("42.123456\n"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(monData, "mon_PERF_PKG_01", "core_energy"), []byte("0.000000\n"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(monData, "mon_L3_00", "llc_occupancy"), []byte("12345\n"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(monData, "mon_L3_00", "mbm_total_bytes"), []byte("67890\n"), 0644)) + + result, err := r.readMonData(monGroupDir) + require.NoError(t, err) + + assert.Equal(t, 42.123456, result["mon_PERF_PKG_00"]["core_energy"]) + assert.Equal(t, 0.0, result["mon_PERF_PKG_01"]["core_energy"]) + assert.Equal(t, 12345.0, result["mon_L3_00"]["llc_occupancy"]) + assert.Equal(t, 67890.0, result["mon_L3_00"]["mbm_total_bytes"]) +} + +func TestReadMonData_NoMonData(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + + // mon_group with no mon_data directory. + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + monGroupDir, err := r.createMonGroup("", podUID) + require.NoError(t, err) + + _, err = r.readMonData(monGroupDir) + assert.Error(t, err) +} diff --git a/cmd/plugins/resctrl-mon/snapshot.go b/cmd/plugins/resctrl-mon/snapshot.go new file mode 100644 index 000000000..a6ca93dde --- /dev/null +++ b/cmd/plugins/resctrl-mon/snapshot.go @@ -0,0 +1,201 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" + "time" +) + +// snapshotUIDRe validates that a pod UID is safe for use as a directory name. +var snapshotUIDRe = regexp.MustCompile(`^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$`) + +// snapshotStore manages a directory tree that mirrors the resctrl mon_data +// layout, augmenting it with .begin/.end counter files and symlinks to the +// live kernel counters. +// +// Layout for each pod: +// +// // +// created_at # RFC 3339 timestamp +// completed_at # RFC 3339 timestamp (written on stop) +// mon_data/ +// mon_PERF_PKG_00/ +// core_energy → /sys/fs/resctrl/.../core_energy (symlink) +// core_energy.begin # counter value at mon_group creation +// core_energy.end # counter value at mon_group removal +// mon_L3_00/ +// llc_occupancy → ... +// llc_occupancy.begin +// llc_occupancy.end +type snapshotStore struct { + dir string + ttl time.Duration +} + +func newSnapshotStore(dir string, ttl time.Duration) (*snapshotStore, error) { + if err := os.MkdirAll(dir, 0700); err != nil { + return nil, fmt.Errorf("failed to create snapshot directory %s: %w", dir, err) + } + // Tighten permissions in case the directory already existed with looser mode. + if err := os.Chmod(dir, 0700); err != nil { + return nil, fmt.Errorf("failed to chmod snapshot directory %s: %w", dir, err) + } + return &snapshotStore{dir: dir, ttl: ttl}, nil +} + +// podDir returns the validated directory path for a pod's snapshot tree. +func (s *snapshotStore) podDir(podUID string) (string, error) { + if !snapshotUIDRe.MatchString(podUID) { + return "", fmt.Errorf("invalid pod UID for snapshot directory: %q", podUID) + } + return filepath.Join(s.dir, podUID), nil +} + +// writeInitial creates the mirror directory tree with .begin counter files +// and symlinks to the live resctrl event files. The created_at file is +// written last as a commit marker. +func (s *snapshotStore) writeInitial(podUID, monGroupDir string, counters map[string]map[string]float64) error { + podDir, err := s.podDir(podUID) + if err != nil { + return err + } + + for domain, events := range counters { + if !isSafePathComponent(domain) { + log.Warnf("writeInitial: skipping invalid domain name %q", domain) + continue + } + domainDir := filepath.Join(podDir, "mon_data", domain) + if err := os.MkdirAll(domainDir, 0700); err != nil { + return fmt.Errorf("failed to create domain directory %s: %w", domainDir, err) + } + srcDomainDir := filepath.Join(monGroupDir, "mon_data", domain) + for event, val := range events { + if !isSafePathComponent(event) { + log.Warnf("writeInitial: skipping invalid event name %q", event) + continue + } + // Write .begin file. + if err := writeCounterFile(filepath.Join(domainDir, event+".begin"), val); err != nil { + return err + } + // Create symlink to live counter. + linkPath := filepath.Join(domainDir, event) + os.Remove(linkPath) // idempotent: remove stale symlink on re-create + if err := os.Symlink(filepath.Join(srcDomainDir, event), linkPath); err != nil { + log.Warnf("writeInitial: failed to create symlink %s: %v", linkPath, err) + } + } + } + + // Write created_at last as commit marker. + return os.WriteFile(filepath.Join(podDir, "created_at"), + []byte(time.Now().UTC().Format(time.RFC3339Nano)+"\n"), 0600) +} + +// writeFinal writes .end counter files and a completed_at timestamp. +// The completed_at file is written last as a commit marker so consumers +// know all .end files are present. +func (s *snapshotStore) writeFinal(podUID string, counters map[string]map[string]float64) error { + podDir, err := s.podDir(podUID) + if err != nil { + return err + } + + for domain, events := range counters { + if !isSafePathComponent(domain) { + log.Warnf("writeFinal: skipping invalid domain name %q", domain) + continue + } + domainDir := filepath.Join(podDir, "mon_data", domain) + if err := os.MkdirAll(domainDir, 0700); err != nil { + return fmt.Errorf("failed to create domain directory %s: %w", domainDir, err) + } + for event, val := range events { + if !isSafePathComponent(event) { + log.Warnf("writeFinal: skipping invalid event name %q", event) + continue + } + if err := writeCounterFile(filepath.Join(domainDir, event+".end"), val); err != nil { + return err + } + // Remove the symlink to the (now-removed) live counter. + // After .end is written the symlink is dangling and serves no purpose. + linkPath := filepath.Join(domainDir, event) + os.Remove(linkPath) + } + } + + // Write completed_at last as commit marker. + return os.WriteFile(filepath.Join(podDir, "completed_at"), + []byte(time.Now().UTC().Format(time.RFC3339Nano)+"\n"), 0600) +} + +// pruneCompleted removes snapshot directories for completed pods older than the TTL. +func (s *snapshotStore) pruneCompleted() { + entries, err := os.ReadDir(s.dir) + if err != nil { + log.Warnf("pruneCompleted: failed to read snapshot directory %s: %v", s.dir, err) + return + } + + cutoff := time.Now().UTC().Add(-s.ttl) + for _, entry := range entries { + if !entry.IsDir() || !snapshotUIDRe.MatchString(entry.Name()) { + continue + } + podDir := filepath.Join(s.dir, entry.Name()) + data, err := os.ReadFile(filepath.Join(podDir, "completed_at")) + if err != nil { + continue // not completed yet, or not a snapshot dir + } + ts, err := time.Parse(time.RFC3339Nano, strings.TrimSpace(string(data))) + if err != nil { + continue + } + if ts.Before(cutoff) { + if err := os.RemoveAll(podDir); err != nil { + log.Warnf("pruneCompleted: failed to remove %s: %v", podDir, err) + } else { + log.Debugf("pruneCompleted: removed expired snapshot %s", podDir) + } + } + } +} + +// writeCounterFile writes a single counter value in the same text format +// as the kernel's resctrl event files (one number per line, no trailing zeros). +func writeCounterFile(path string, val float64) error { + s := strconv.FormatFloat(val, 'f', -1, 64) + if err := os.WriteFile(path, []byte(s+"\n"), 0600); err != nil { + return fmt.Errorf("failed to write counter file %s: %w", path, err) + } + return nil +} + +// isSafePathComponent returns true if name is a single, non-empty filename +// component with no path separators or traversal sequences. +func isSafePathComponent(name string) bool { + return name != "" && + name != "." && + name != ".." && + !strings.ContainsAny(name, "/\\") +} diff --git a/cmd/plugins/resctrl-mon/snapshot_test.go b/cmd/plugins/resctrl-mon/snapshot_test.go new file mode 100644 index 000000000..95101efbc --- /dev/null +++ b/cmd/plugins/resctrl-mon/snapshot_test.go @@ -0,0 +1,266 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "os" + "path/filepath" + "strconv" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// mustPodDir is a test helper that calls podDir and fails on error. +func mustPodDir(t *testing.T, s *snapshotStore, podUID string) string { + t.Helper() + p, err := s.podDir(podUID) + require.NoError(t, err) + return p +} + +// readCounterFile reads and parses a counter value file in the test. +func readCounterFile(t *testing.T, path string) float64 { + t.Helper() + data, err := os.ReadFile(path) + require.NoError(t, err) + val, err := strconv.ParseFloat(strings.TrimSpace(string(data)), 64) + require.NoError(t, err) + return val +} + +func TestPodDir_RejectsTraversal(t *testing.T) { + store, err := newSnapshotStore(t.TempDir(), 5*time.Minute) + require.NoError(t, err) + + for _, bad := range []string{"../etc/passwd", "../../root", "not-a-uid", "", "a1b2c3d4-e5f6-7890-ABCD-ef1234567890"} { + _, err := store.podDir(bad) + assert.Error(t, err, "should reject %q", bad) + } +} + +func TestIsSafePathComponent(t *testing.T) { + for _, safe := range []string{"mon_PERF_PKG_00", "core_energy", "llc_occupancy", "mbm_total_bytes"} { + assert.True(t, isSafePathComponent(safe), "%q should be safe", safe) + } + for _, bad := range []string{"", ".", "..", "../etc", "foo/bar", "foo\\bar"} { + assert.False(t, isSafePathComponent(bad), "%q should be rejected", bad) + } +} + +func TestNewSnapshotStore_DirPermissions(t *testing.T) { + parent := t.TempDir() + dir := filepath.Join(parent, "restricted") + _, err := newSnapshotStore(dir, 5*time.Minute) + require.NoError(t, err) + + info, err := os.Stat(dir) + require.NoError(t, err) + assert.Equal(t, os.FileMode(0700), info.Mode().Perm()) +} + +func TestNewSnapshotStore_CreatesDir(t *testing.T) { + parent := t.TempDir() + dir := filepath.Join(parent, "sub", "dir") + + store, err := newSnapshotStore(dir, 5*time.Minute) + require.NoError(t, err) + assert.Equal(t, dir, store.dir) + + info, err := os.Stat(dir) + require.NoError(t, err) + assert.True(t, info.IsDir()) +} + +func TestWriteInitial(t *testing.T) { + dir := t.TempDir() + store, err := newSnapshotStore(dir, 5*time.Minute) + require.NoError(t, err) + + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + monGroupDir := "/sys/fs/resctrl/mon_groups/" + podUID + counters := map[string]map[string]float64{ + "mon_PERF_PKG_00": {"core_energy": 42.123, "activity": 100.5}, + "mon_L3_00": {"llc_occupancy": 12345}, + } + + err = store.writeInitial(podUID, monGroupDir, counters) + require.NoError(t, err) + + podDir := mustPodDir(t, store, podUID) + + // Verify .begin files. + assert.Equal(t, 42.123, readCounterFile(t, filepath.Join(podDir, "mon_data", "mon_PERF_PKG_00", "core_energy.begin"))) + assert.Equal(t, 100.5, readCounterFile(t, filepath.Join(podDir, "mon_data", "mon_PERF_PKG_00", "activity.begin"))) + assert.Equal(t, 12345.0, readCounterFile(t, filepath.Join(podDir, "mon_data", "mon_L3_00", "llc_occupancy.begin"))) + + // Verify symlinks point to resctrl. + target, err := os.Readlink(filepath.Join(podDir, "mon_data", "mon_PERF_PKG_00", "core_energy")) + require.NoError(t, err) + assert.Equal(t, filepath.Join(monGroupDir, "mon_data", "mon_PERF_PKG_00", "core_energy"), target) + + // Verify created_at exists and is valid. + data, err := os.ReadFile(filepath.Join(podDir, "created_at")) + require.NoError(t, err) + _, err = time.Parse(time.RFC3339Nano, strings.TrimSpace(string(data))) + assert.NoError(t, err) + + // Verify no completed_at yet. + _, err = os.Stat(filepath.Join(podDir, "completed_at")) + assert.True(t, os.IsNotExist(err)) + + // Verify .begin file permissions. + info, err := os.Stat(filepath.Join(podDir, "mon_data", "mon_PERF_PKG_00", "core_energy.begin")) + require.NoError(t, err) + assert.Equal(t, os.FileMode(0600), info.Mode().Perm()) +} + +func TestWriteFinal(t *testing.T) { + dir := t.TempDir() + store, err := newSnapshotStore(dir, 5*time.Minute) + require.NoError(t, err) + + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + initial := map[string]map[string]float64{ + "mon_PERF_PKG_00": {"core_energy": 42.123}, + } + final := map[string]map[string]float64{ + "mon_PERF_PKG_00": {"core_energy": 99.456}, + } + + err = store.writeInitial(podUID, "/fake/mon_groups/"+podUID, initial) + require.NoError(t, err) + + err = store.writeFinal(podUID, final) + require.NoError(t, err) + + podDir := mustPodDir(t, store, podUID) + + // Verify .begin preserved. + assert.Equal(t, 42.123, readCounterFile(t, filepath.Join(podDir, "mon_data", "mon_PERF_PKG_00", "core_energy.begin"))) + // Verify .end written. + assert.Equal(t, 99.456, readCounterFile(t, filepath.Join(podDir, "mon_data", "mon_PERF_PKG_00", "core_energy.end"))) + // Verify symlink removed (was dangling after mon_group removal). + _, err = os.Lstat(filepath.Join(podDir, "mon_data", "mon_PERF_PKG_00", "core_energy")) + assert.True(t, os.IsNotExist(err), "symlink should be removed after writeFinal") + // Verify completed_at exists. + data, err := os.ReadFile(filepath.Join(podDir, "completed_at")) + require.NoError(t, err) + _, err = time.Parse(time.RFC3339Nano, strings.TrimSpace(string(data))) + assert.NoError(t, err) +} + +func TestWriteFinal_NoInitial(t *testing.T) { + dir := t.TempDir() + store, err := newSnapshotStore(dir, 5*time.Minute) + require.NoError(t, err) + + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + final := map[string]map[string]float64{ + "mon_PERF_PKG_00": {"core_energy": 99.456}, + } + + err = store.writeFinal(podUID, final) + require.NoError(t, err) + + podDir := mustPodDir(t, store, podUID) + assert.Equal(t, 99.456, readCounterFile(t, filepath.Join(podDir, "mon_data", "mon_PERF_PKG_00", "core_energy.end"))) + + // No .begin file should exist. + _, err = os.Stat(filepath.Join(podDir, "mon_data", "mon_PERF_PKG_00", "core_energy.begin")) + assert.True(t, os.IsNotExist(err)) + + // completed_at should exist. + _, err = os.Stat(filepath.Join(podDir, "completed_at")) + assert.NoError(t, err) +} + +func TestPruneCompleted(t *testing.T) { + dir := t.TempDir() + store, err := newSnapshotStore(dir, 1*time.Millisecond) // very short TTL + require.NoError(t, err) + + // Create a completed snapshot. + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + err = store.writeInitial(podUID, "/fake/mon_groups/"+podUID, map[string]map[string]float64{ + "mon_PERF_PKG_00": {"core_energy": 1.0}, + }) + require.NoError(t, err) + err = store.writeFinal(podUID, map[string]map[string]float64{ + "mon_PERF_PKG_00": {"core_energy": 2.0}, + }) + require.NoError(t, err) + + // Create an active snapshot (no completed_at). + activeUID := "deadbeef-dead-beef-dead-beefdeadbeef" + err = store.writeInitial(activeUID, "/fake/mon_groups/"+activeUID, map[string]map[string]float64{ + "mon_PERF_PKG_00": {"core_energy": 5.0}, + }) + require.NoError(t, err) + + // Wait for TTL to expire. + time.Sleep(5 * time.Millisecond) + + store.pruneCompleted() + + // Completed should be removed. + _, err = os.Stat(mustPodDir(t, store, podUID)) + assert.True(t, os.IsNotExist(err)) + + // Active should still exist. + _, err = os.Stat(mustPodDir(t, store, activeUID)) + assert.NoError(t, err) +} + +func TestPruneCompleted_KeepsRecent(t *testing.T) { + dir := t.TempDir() + store, err := newSnapshotStore(dir, 1*time.Hour) // long TTL + require.NoError(t, err) + + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + err = store.writeInitial(podUID, "/fake/mon_groups/"+podUID, map[string]map[string]float64{ + "mon_PERF_PKG_00": {"core_energy": 1.0}, + }) + require.NoError(t, err) + err = store.writeFinal(podUID, map[string]map[string]float64{ + "mon_PERF_PKG_00": {"core_energy": 2.0}, + }) + require.NoError(t, err) + + store.pruneCompleted() + + // Should still exist (TTL not expired). + _, err = os.Stat(mustPodDir(t, store, podUID)) + assert.NoError(t, err) +} + +func TestWriteCounterFile_IntegerFormat(t *testing.T) { + f := filepath.Join(t.TempDir(), "test") + require.NoError(t, writeCounterFile(f, 3833856)) + data, err := os.ReadFile(f) + require.NoError(t, err) + assert.Equal(t, "3833856\n", string(data)) +} + +func TestWriteCounterFile_FloatFormat(t *testing.T) { + f := filepath.Join(t.TempDir(), "test") + require.NoError(t, writeCounterFile(f, 792.341167)) + data, err := os.ReadFile(f) + require.NoError(t, err) + assert.Equal(t, "792.341167\n", string(data)) +} diff --git a/cmd/plugins/resctrl-mon/state.go b/cmd/plugins/resctrl-mon/state.go index 47dba1f1a..f3373bc9f 100644 --- a/cmd/plugins/resctrl-mon/state.go +++ b/cmd/plugins/resctrl-mon/state.go @@ -18,8 +18,9 @@ import "sync" // podInfo tracks the mon_group directory and container set for a single pod. type podInfo struct { - monGroupDir string - containers map[string]struct{} // container IDs + monGroupDir string + containers map[string]struct{} // container IDs + initialSnapshotDone bool } // podState tracks all pods with active mon_groups. @@ -107,3 +108,22 @@ func (s *podState) podCount() int { defer s.mu.Unlock() return len(s.pods) } + +// setInitialSnapshotDone marks the initial counter snapshot as taken. +func (s *podState) setInitialSnapshotDone(podUID string) { + s.mu.Lock() + defer s.mu.Unlock() + if info, ok := s.pods[podUID]; ok { + info.initialSnapshotDone = true + } +} + +// isInitialSnapshotDone returns true if the initial snapshot was already taken. +func (s *podState) isInitialSnapshotDone(podUID string) bool { + s.mu.Lock() + defer s.mu.Unlock() + if info, ok := s.pods[podUID]; ok { + return info.initialSnapshotDone + } + return false +} diff --git a/deployment/helm/resctrl-mon/README.md b/deployment/helm/resctrl-mon/README.md index 206e3eced..534eafd8a 100644 --- a/deployment/helm/resctrl-mon/README.md +++ b/deployment/helm/resctrl-mon/README.md @@ -119,3 +119,5 @@ customize with their own values, along with the default values. | `affinity` | [] | specify node affinity | | `nodeSelector` | [] | specify node selector labels | | `podPriorityClassNodeCritical` | true | enable [marking Pod as node critical](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/#marking-pod-as-critical) | +| `snapshotDir` | /run/nri-resctrl-mon | host path for counter snapshot JSON files | +| `snapshotTTL` | 5m | how long completed snapshots are kept before pruning | diff --git a/deployment/helm/resctrl-mon/templates/daemonset.yaml b/deployment/helm/resctrl-mon/templates/daemonset.yaml index 7b3dd4ead..cfa895273 100644 --- a/deployment/helm/resctrl-mon/templates/daemonset.yaml +++ b/deployment/helm/resctrl-mon/templates/daemonset.yaml @@ -80,6 +80,8 @@ spec: mountPath: /var/run/nri - name: resctrlfs mountPath: /sys/fs/resctrl + - name: snapshot-dir + mountPath: {{ .Values.snapshotDir | default "/run/nri-resctrl-mon" }} {{- if .Values.podPriorityClassNodeCritical }} priorityClassName: system-node-critical {{- end }} @@ -95,6 +97,10 @@ spec: hostPath: path: /sys/fs/resctrl type: Directory + - name: snapshot-dir + hostPath: + path: {{ .Values.snapshotDir | default "/run/nri-resctrl-mon" }} + type: DirectoryOrCreate {{- if .Values.nri.runtime.patchConfig }} - name: containerd-config hostPath: diff --git a/deployment/helm/resctrl-mon/values.schema.json b/deployment/helm/resctrl-mon/values.schema.json index 07514a2c3..9fd52aa3d 100644 --- a/deployment/helm/resctrl-mon/values.schema.json +++ b/deployment/helm/resctrl-mon/values.schema.json @@ -112,6 +112,17 @@ }, "podPriorityClassNodeCritical": { "type": "boolean" + }, + "snapshotDir": { + "type": "string", + "description": "Host path for counter snapshot files", + "default": "/run/nri-resctrl-mon" + }, + "snapshotTTL": { + "type": "string", + "description": "Duration to keep completed snapshots (Go duration, e.g. 5m, 1h)", + "default": "5m", + "pattern": "^[0-9]+(s|m|h)$" } } } diff --git a/deployment/helm/resctrl-mon/values.yaml b/deployment/helm/resctrl-mon/values.yaml index 58e0915bd..ac3948f6a 100644 --- a/deployment/helm/resctrl-mon/values.yaml +++ b/deployment/helm/resctrl-mon/values.yaml @@ -12,6 +12,9 @@ resources: cpu: 10m memory: 50Mi +snapshotDir: /run/nri-resctrl-mon +snapshotTTL: 5m + nri: plugin: index: 90 diff --git a/docs/monitoring/resctrl-mon.md b/docs/monitoring/resctrl-mon.md index 09eebf2d6..96fb20735 100644 --- a/docs/monitoring/resctrl-mon.md +++ b/docs/monitoring/resctrl-mon.md @@ -55,6 +55,12 @@ namespaces: [] # Pod label selector: only create mon_groups for pods matching these labels. # Empty = all pods. labelSelector: {} + +# Directory for counter snapshot files (see "Counter Snapshots" below). +snapshotDir: /run/nri-resctrl-mon + +# How long completed snapshots are kept before pruning (Go duration). +snapshotTTL: 5m ``` ## Coexistence with Allocation Plugins @@ -82,6 +88,103 @@ RMID allocation is delegated entirely to the Linux kernel: - **Deallocation**: `rmdir` releases the RMID. The kernel handles the hardware recycling window. +## Counter Snapshots + +Resctrl monitoring counters in a new `mon_group` do not start at zero — +the kernel seeds them with the current hardware register value. This +creates measurement gaps: + +1. **Start gap**: The delta between mon_group creation and the first + consumer scrape is missed. +2. **End gap**: The delta between the last scrape and mon_group removal + is lost. +3. **Missed pods**: Pods whose entire lifetime falls between two scrape + intervals are never observed. + +To close these gaps the plugin writes a directory tree to a configurable +host path (default `/run/nri-resctrl-mon/`) that mirrors the resctrl +`mon_data` layout: + +- **On first PID write** (`StartContainer`/`PostStartContainer`): the + plugin reads all `mon_data/` event files, writes `.begin` counter + files, and creates symlinks to the live resctrl event files. +- **On last container stop** (`StopContainer`, before `rmdir`): the + plugin reads the counters again and writes `.end` counter files. + +Consumers compute lifetime totals as: + +``` +lifetime = cat .end - cat .begin +``` + +### Snapshot Directory Layout + +Each pod gets a directory named by its UID: + +``` +/run/nri-resctrl-mon// + created_at # RFC 3339 timestamp + completed_at # RFC 3339 timestamp (appears on stop) + mon_data/ + mon_PERF_PKG_00/ + core_energy → /sys/fs/resctrl/.../core_energy (symlink) + core_energy.begin # 792.341167 + core_energy.end # 1285.102834 + activity → ... + activity.begin # 6009.826054 + activity.end # 12019.652108 + mon_L3_00/ + llc_occupancy → ... + llc_occupancy.begin # 3833856 + llc_occupancy.end # 4521984 + mbm_local_bytes.begin # 7912636416 + mbm_local_bytes.end # 15825272832 + ... +``` + +Counter files use the same one-number-per-file text format as the kernel. +Symlinks point to the live resctrl event files and become dangling after +the `mon_group` is removed — a natural signal that the pod has stopped +and `.end` files contain the final values. + +Consumers that already read `/sys/fs/resctrl` directly can discover and +use the snapshot data with no new parsing logic. If the snapshot directory +is absent, consumers fall back to their existing resctrl scraping path. + +Completed snapshots are pruned after `snapshotTTL` (default 5 minutes). +The TTL should be set longer than the consumer's scrape interval. + +### Consumer Mount Requirements + +The snapshot directory (`/run/nri-resctrl-mon`) can be mounted at any +container path — `.begin`, `.end`, `created_at`, and `completed_at` are +regular files that work regardless of mount layout. + +The symlinks to live resctrl counters use **absolute host paths** (e.g. +`/sys/fs/resctrl/mon_groups//mon_data/.../core_energy`). They +resolve correctly only if the consumer container mounts the resctrl +filesystem at the **same path** as the host: + +```yaml +volumeMounts: + - name: resctrlfs + mountPath: /sys/fs/resctrl # must match the host path + readOnly: true + - name: snapshots + mountPath: /run/nri-resctrl-mon # can be any path + readOnly: true +``` + +If the consumer mounts resctrl at a different path (e.g. +`/host/sys/fs/resctrl`), the symlinks will not resolve. This does not +affect the primary use case: computing lifetime totals from `.begin` and +`.end` files requires no symlink resolution. The symlinks are a +convenience for reading the live counter value mid-lifecycle. + +After a pod stops, its symlinks become dangling because the kernel +`mon_group` has been removed. This is expected and serves as a signal +that the `.end` files contain the final counter values. + ## Developer's Guide ### Prerequisites diff --git a/sample-configs/nri-resctrl-mon.yaml b/sample-configs/nri-resctrl-mon.yaml index 9cbd339a0..52f63372c 100644 --- a/sample-configs/nri-resctrl-mon.yaml +++ b/sample-configs/nri-resctrl-mon.yaml @@ -1,3 +1,5 @@ resctrlPath: /sys/fs/resctrl namespaces: [] labelSelector: {} +snapshotDir: /run/nri-resctrl-mon +snapshotTTL: 5m