diff --git a/api/v0/types.go b/api/v0/types.go index 38d5b4f..2a41567 100644 --- a/api/v0/types.go +++ b/api/v0/types.go @@ -62,3 +62,15 @@ type Allocation struct { Aligned *AlignedInfo `json:"aligned,omitempty"` Unaligned *UnalignedInfo `json:"unaligned,omitempty"` } + +type NUMAMapsNodeInfo struct { + Pages int64 `json:"pages"` + SizeKiB int64 `json:"sizeKiB"` +} + +type NUMAMapsInfo struct { + Nodes map[int]NUMAMapsNodeInfo `json:"nodes,omitempty"` + LocalPages int64 `json:"localPages"` + RemotePages int64 `json:"remotePages"` + Local bool `json:"local"` +} diff --git a/internal/cli/alignmem.go b/internal/cli/alignmem.go new file mode 100644 index 0000000..5fe3bba --- /dev/null +++ b/internal/cli/alignmem.go @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: Apache-2.0 + +package cli + +import ( + "encoding/json" + "os" + + "github.com/spf13/cobra" + + "k8s.io/utils/cpuset" + + apiv0 "github.com/ffromani/ctrreschk/api/v0" + "github.com/ffromani/ctrreschk/pkg/cgroups" + "github.com/ffromani/ctrreschk/pkg/environ" + "github.com/ffromani/ctrreschk/pkg/machine" + "github.com/ffromani/ctrreschk/pkg/numamaps" +) + +func NewAlignMemCommand(env *environ.Environ, opts *Options) *cobra.Command { + alignMemCmd := &cobra.Command{ + Use: "alignmem", + Short: "verify actual memory NUMA placement via numa_maps", + RunE: func(cmd *cobra.Command, args []string) error { + cpus, err := cgroups.Cpuset(env) + if err != nil { + return err + } + + mach, err := machine.Discover(env) + if err != nil { + return err + } + + nm, err := numamaps.Read(env) + if err != nil { + return err + } + + cpuNUMANodes := cpuNUMANodesFromTopology(cpus, mach) + env.Log.V(2).Info("alignmem", "cpuNUMANodes", cpuNUMANodes.String()) + + result := buildNUMAMapsInfo(nm, cpuNUMANodes) + + err = json.NewEncoder(os.Stdout).Encode(result) + if err != nil { + return err + } + return MainLoop(opts) + }, + Args: cobra.NoArgs, + } + + alignMemCmd.PersistentFlags().StringVarP(&env.DataPath, "machinedata", "M", "", "read fake machine data from path, don't read real data from the system") + + return alignMemCmd +} + +func cpuNUMANodesFromTopology(cpus cpuset.CPUSet, mach machine.Machine) cpuset.CPUSet { + result := cpuset.New() + for _, node := range mach.Topology.Nodes { + for _, core := range node.Cores { + coreCPUs := cpuset.New(core.LogicalProcessors...) + if !cpus.Intersection(coreCPUs).IsEmpty() { + result = result.Union(cpuset.New(node.ID)) + break + } + } + } + return result +} + +func buildNUMAMapsInfo(nm numamaps.NumaMaps, cpuNUMANodes cpuset.CPUSet) apiv0.NUMAMapsInfo { + pagesByNode := nm.TotalPagesByNode() + bytesByNode := nm.TotalBytesByNode() + + info := apiv0.NUMAMapsInfo{ + Nodes: make(map[int]apiv0.NUMAMapsNodeInfo), + } + + for nodeID, pages := range pagesByNode { + info.Nodes[nodeID] = apiv0.NUMAMapsNodeInfo{ + Pages: pages, + SizeKiB: bytesByNode[nodeID] / 1024, + } + if cpuNUMANodes.Contains(nodeID) { + info.LocalPages += pages + } else { + info.RemotePages += pages + } + } + + info.Local = info.RemotePages == 0 && info.LocalPages > 0 + + return info +} diff --git a/internal/cli/root.go b/internal/cli/root.go index 265f380..31ac1ab 100644 --- a/internal/cli/root.go +++ b/internal/cli/root.go @@ -64,6 +64,7 @@ func NewRootCommand(env *environ.Environ, extraCmds ...NewCommandFunc) *cobra.Co root.AddCommand( NewAlignCommand(env, &opts), + NewAlignMemCommand(env, &opts), NewInfoCommand(env, &opts), NewK8SCommand(env, &opts), NewPauseCommand(env, &opts), diff --git a/pkg/environ/environ.go b/pkg/environ/environ.go index 0a0e272..036e66e 100644 --- a/pkg/environ/environ.go +++ b/pkg/environ/environ.go @@ -25,12 +25,14 @@ import ( ) type FS struct { - Sys string + Sys string + Proc string } func DefaultFS() FS { return FS{ - Sys: "/sys", + Sys: "/sys", + Proc: "/proc", } } diff --git a/pkg/numamaps/numamaps.go b/pkg/numamaps/numamaps.go new file mode 100644 index 0000000..98a7e6b --- /dev/null +++ b/pkg/numamaps/numamaps.go @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: Apache-2.0 + +package numamaps + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "regexp" + "sort" + "strconv" + "strings" + + "github.com/ffromani/ctrreschk/pkg/environ" +) + +const ( + ProcSelfPath = "self" + NumaMapsFile = "numa_maps" +) + +var numaPageRe = regexp.MustCompile(`^N(\d+)=(\d+)$`) + +type VMA struct { + Address uint64 + Policy string + FilePath string + NUMAPages map[int]int64 + KernPageSizeKB int64 +} + +type NumaMaps struct { + VMAs []VMA +} + +func NumaMapsPath(env *environ.Environ) string { + return filepath.Join(env.Root.Proc, ProcSelfPath, NumaMapsFile) +} + +func Read(env *environ.Environ) (NumaMaps, error) { + path := NumaMapsPath(env) + env.Log.V(2).Info("reading numa_maps", "path", path) + + f, err := os.Open(path) + if err != nil { + return NumaMaps{}, err + } + defer f.Close() + + return parse(f) +} + +func parse(f *os.File) (NumaMaps, error) { + var result NumaMaps + scanner := bufio.NewScanner(f) + + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" { + continue + } + vma, err := parseLine(line) + if err != nil { + return NumaMaps{}, fmt.Errorf("parsing line %q: %w", line, err) + } + result.VMAs = append(result.VMAs, vma) + } + if err := scanner.Err(); err != nil { + return NumaMaps{}, err + } + return result, nil +} + +func parseLine(line string) (VMA, error) { + fields := strings.Fields(line) + if len(fields) < 2 { + return VMA{}, fmt.Errorf("too few fields") + } + + addr, err := strconv.ParseUint(fields[0], 16, 64) + if err != nil { + return VMA{}, fmt.Errorf("invalid address %q: %w", fields[0], err) + } + + vma := VMA{ + Address: addr, + Policy: fields[1], + NUMAPages: make(map[int]int64), + KernPageSizeKB: 4, // default + } + + for _, field := range fields[2:] { + if m := numaPageRe.FindStringSubmatch(field); m != nil { + nodeID, _ := strconv.Atoi(m[1]) + pages, _ := strconv.ParseInt(m[2], 10, 64) + vma.NUMAPages[nodeID] = pages + } else if rest, ok := strings.CutPrefix(field, "file="); ok { + vma.FilePath = rest + } else if rest, ok := strings.CutPrefix(field, "kernelpagesize_kB="); ok { + if v, err := strconv.ParseInt(rest, 10, 64); err == nil { + vma.KernPageSizeKB = v + } + } + } + + return vma, nil +} + +func (nm NumaMaps) NUMANodes() []int { + seen := make(map[int]struct{}) + for _, vma := range nm.VMAs { + for nodeID := range vma.NUMAPages { + seen[nodeID] = struct{}{} + } + } + nodes := make([]int, 0, len(seen)) + for nodeID := range seen { + nodes = append(nodes, nodeID) + } + sort.Ints(nodes) + return nodes +} + +func (nm NumaMaps) TotalPagesByNode() map[int]int64 { + totals := make(map[int]int64) + for _, vma := range nm.VMAs { + for nodeID, pages := range vma.NUMAPages { + totals[nodeID] += pages + } + } + return totals +} + +func (nm NumaMaps) TotalBytesByNode() map[int]int64 { + totals := make(map[int]int64) + for _, vma := range nm.VMAs { + for nodeID, pages := range vma.NUMAPages { + totals[nodeID] += pages * vma.KernPageSizeKB * 1024 + } + } + return totals +} diff --git a/pkg/numamaps/numamaps_test.go b/pkg/numamaps/numamaps_test.go new file mode 100644 index 0000000..27d8f9b --- /dev/null +++ b/pkg/numamaps/numamaps_test.go @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: Apache-2.0 + +package numamaps + +import ( + "os" + "path/filepath" + "reflect" + "testing" + + "github.com/ffromani/ctrreschk/pkg/environ" +) + +func TestRead(t *testing.T) { + testCases := []struct { + name string + path string + content string + expectedErr bool + checkFn func(t *testing.T, nm NumaMaps) + }{ + { + name: "non-existent path", + path: "/this/path/does/not/exist", + expectedErr: true, + }, + { + name: "single node all local", + content: "55dc822f1000 default file=/usr/bin/ctrreschk mapped=10 active=5 N0=10 kernelpagesize_kB=4\n" + + "7fffef2ff000 default stack anon=3 dirty=3 active=1 N0=3 kernelpagesize_kB=4\n", + checkFn: func(t *testing.T, nm NumaMaps) { + if len(nm.VMAs) != 2 { + t.Fatalf("expected 2 VMAs, got %d", len(nm.VMAs)) + } + nodes := nm.NUMANodes() + if !reflect.DeepEqual(nodes, []int{0}) { + t.Fatalf("expected nodes [0], got %v", nodes) + } + pages := nm.TotalPagesByNode() + if pages[0] != 13 { + t.Fatalf("expected 13 pages on N0, got %d", pages[0]) + } + bytes := nm.TotalBytesByNode() + if bytes[0] != 13*4*1024 { + t.Fatalf("expected %d bytes on N0, got %d", 13*4*1024, bytes[0]) + } + }, + }, + { + name: "two nodes split pages", + content: "55dc822f1000 default file=/usr/bin/sleep mapped=2 active=0 N1=2 kernelpagesize_kB=4\n" + + "55dc822fb000 default file=/usr/bin/sleep anon=1 dirty=1 active=0 N0=1 kernelpagesize_kB=4\n" + + "7fb2a62c7000 default file=/usr/lib/x86_64-linux-gnu/libc.so.6 mapped=40 active=0 N1=40 kernelpagesize_kB=4\n" + + "7fffef2ff000 default stack anon=3 dirty=3 active=1 N0=3 kernelpagesize_kB=4\n", + checkFn: func(t *testing.T, nm NumaMaps) { + if len(nm.VMAs) != 4 { + t.Fatalf("expected 4 VMAs, got %d", len(nm.VMAs)) + } + nodes := nm.NUMANodes() + if !reflect.DeepEqual(nodes, []int{0, 1}) { + t.Fatalf("expected nodes [0 1], got %v", nodes) + } + pages := nm.TotalPagesByNode() + if pages[0] != 4 { + t.Fatalf("expected 4 pages on N0, got %d", pages[0]) + } + if pages[1] != 42 { + t.Fatalf("expected 42 pages on N1, got %d", pages[1]) + } + }, + }, + { + name: "empty VMAs no pages", + content: "7fb2a64be000 default\n" + + "7fb2a64c2000 default\n", + checkFn: func(t *testing.T, nm NumaMaps) { + if len(nm.VMAs) != 2 { + t.Fatalf("expected 2 VMAs, got %d", len(nm.VMAs)) + } + nodes := nm.NUMANodes() + if len(nodes) != 0 { + t.Fatalf("expected no nodes, got %v", nodes) + } + }, + }, + { + name: "mixed page sizes", + content: "400000 default anon=10 N0=10 kernelpagesize_kB=4\n500000 default anon=5 N0=5 kernelpagesize_kB=2048\n", + checkFn: func(t *testing.T, nm NumaMaps) { + pages := nm.TotalPagesByNode() + if pages[0] != 15 { + t.Fatalf("expected 15 pages on N0, got %d", pages[0]) + } + bytes := nm.TotalBytesByNode() + expectedBytes := int64(10*4*1024 + 5*2048*1024) + if bytes[0] != expectedBytes { + t.Fatalf("expected %d bytes on N0, got %d", expectedBytes, bytes[0]) + } + }, + }, + { + name: "file path parsed", + content: "55dc822f1000 default file=/usr/bin/ctrreschk mapped=2 N0=2 kernelpagesize_kB=4\n", + checkFn: func(t *testing.T, nm NumaMaps) { + if nm.VMAs[0].FilePath != "/usr/bin/ctrreschk" { + t.Fatalf("expected file path /usr/bin/ctrreschk, got %q", nm.VMAs[0].FilePath) + } + }, + }, + { + name: "bind policy", + content: "400000 bind:0 anon=5 N0=5 kernelpagesize_kB=4\n", + checkFn: func(t *testing.T, nm NumaMaps) { + if nm.VMAs[0].Policy != "bind:0" { + t.Fatalf("expected policy bind:0, got %q", nm.VMAs[0].Policy) + } + }, + }, + } + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + var env environ.Environ + if len(tt.path) > 0 { + env = environ.Environ{ + Root: environ.FS{ + Proc: tt.path, + }, + Log: environ.DefaultLog(), + } + } else if len(tt.content) > 0 { + tmpDir := t.TempDir() + env = environ.Environ{ + Root: environ.FS{ + Proc: tmpDir, + }, + Log: environ.DefaultLog(), + } + tmpPath := NumaMapsPath(&env) + err := os.MkdirAll(filepath.Dir(tmpPath), os.ModePerm) + if err != nil { + t.Fatalf("cannot prepare fake data path at %v: %v", tmpPath, err) + } + err = os.WriteFile(tmpPath, []byte(tt.content), 0o644) + if err != nil { + t.Fatalf("cannot prepare fake data file at %v: %v", tmpPath, err) + } + } else { + t.Fatalf("neither path or content given; wrong test") + } + + got, err := Read(&env) + if tt.expectedErr && err == nil { + t.Fatalf("expected error, got success") + } + if !tt.expectedErr && err != nil { + t.Fatalf("expected success, got err=%v", err) + } + if tt.checkFn != nil { + tt.checkFn(t, got) + } + }) + } +} diff --git a/test/e2e/alignmem_smoke_test.go b/test/e2e/alignmem_smoke_test.go new file mode 100644 index 0000000..ef515ad --- /dev/null +++ b/test/e2e/alignmem_smoke_test.go @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: Apache-2.0 + +package e2e + +import ( + "context" + "encoding/json" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + apiv0 "github.com/ffromani/ctrreschk/api/v0" +) + +var _ = Describe("ctrreschk alignmem smoke test", func() { + It("should run and produce valid NUMAMapsInfo JSON", func(ctx context.Context) { + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ctrreschk-alignmem-smoke", + Namespace: testNamespace, + }, + Spec: corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + Containers: []corev1.Container{ + { + Name: "ctrreschk", + Image: testImage, + ImagePullPolicy: corev1.PullIfNotPresent, + Command: []string{"/ctrreschk", "alignmem"}, + }, + }, + }, + } + + created, deletePod := createPod(ctx, pod) + DeferCleanup(deletePod, ctx) + + finished := waitForPodDone(ctx, created.Namespace, created.Name) + Expect(finished.Status.Phase).To(Equal(corev1.PodSucceeded), "pod should complete successfully") + + logs := getPodLogs(ctx, created.Namespace, created.Name) + Expect(logs).NotTo(BeEmpty(), "pod logs should not be empty") + + var result apiv0.NUMAMapsInfo + err := json.Unmarshal([]byte(logs), &result) + Expect(err).NotTo(HaveOccurred(), "pod output should be valid JSON, got: %s", logs) + + Expect(result.Nodes).NotTo(BeEmpty(), "should report at least one NUMA node with pages") + Expect(result.LocalPages).To(BeNumerically(">", 0), "should have local pages") + }) +})