From 56767d330b8517fc2dd343a0beb0b4cd15f73650 Mon Sep 17 00:00:00 2001 From: Till0196 <16399842+Till0196@users.noreply.github.com> Date: Tue, 19 May 2026 18:22:29 +0900 Subject: [PATCH] mount: call mount(2)/umount(2) directly when euid==0 fusermount3 exists to let non-root callers mount FUSE filesystems. When the caller is already root with CAP_SYS_ADMIN -- which is exactly the case for sysbox-fs running as a systemd-managed daemon -- every check fusermount3 does is a no-op (libfuse util/fusermount.c:1163, "if (getuid() == 0) return 0;"), and we still pay a fork+exec plus an AF_UNIX SCM_RIGHTS round-trip per mount. It also forces a runtime dependency on the fusermount3 binary, which is the main reason sysbox-fs has to ship and install the helper on every host (read-only /usr on Flatcar, distroless images, etc.). Add an early return at the top of mount() and unmount() that takes a direct path when running as root: open /dev/fuse, stat the target for rootmode, call mount(2) with the kernel option set fusermount3 would have produced (fd, rootmode, user_id, group_id, plus an allowlist of allow_other / default_permissions / max_read / blksize). Flags are MS_NOSUID | MS_NODEV. source is the fsname option, type is "fuse". unmount on the root path becomes syscall.Unmount(dir, 0). The non-root mount() and the fusermount3-based unmount() fallback are byte-for-byte unchanged. Cross-checked against libfuse util/fusermount.c (prepare_mount) and lib/mount_util.c (fuse_mnt_umount): mount flags, type, source, required data, kernel-OK opts, and /dev/fuse open mode all match. Tested on Flatcar Container Linux 4593.2.1 + kernel 6.12.87 + RKE2 v1.36.0+rke2r1 + containerd 2.2.3-k3s1, with no fusermount3 binary present on the host: sysbox-fs starts clean, all six sysboxfs FUSE mounts are established and serve reads, observed mount options match expectation (rw,nosuid,nodev,relatime,user_id=0,group_id=0,default_permissions,allow_other), unmount on pod teardown leaves no leaked mounts. --- mount_linux.go | 62 ++++++++++++++++++++++++++++++++++++++++++++++++ unmount_linux.go | 6 +++++ 2 files changed, 68 insertions(+) diff --git a/mount_linux.go b/mount_linux.go index 47e375a1..2d0d2ef5 100644 --- a/mount_linux.go +++ b/mount_linux.go @@ -60,6 +60,13 @@ func isBoringFusermountError(err error) bool { } func mount(dir string, conf *mountConfig) (fusefd *os.File, err error) { + // When running as root we can call mount(2) directly and skip the + // fusermount3 helper. This removes the runtime dependency on the + // fusermount3 binary for privileged daemons. + if os.Geteuid() == 0 { + return directMount(dir, conf) + } + fds, err := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) if err != nil { return nil, fmt.Errorf("socketpair error: %v", err) @@ -154,3 +161,58 @@ func mount(dir string, conf *mountConfig) (fusefd *os.File, err error) { f := os.NewFile(uintptr(gotFds[0]), "/dev/fuse") return f, nil } + +// kernelMountOptions is the set of options accepted by the kernel FUSE +// driver. Everything else in mountConfig.options is parsed by +// fusermount3/libfuse in userspace and would be rejected by mount(2). +var kernelMountOptions = map[string]struct{}{ + "allow_other": {}, + "default_permissions": {}, + "max_read": {}, + "blksize": {}, +} + +func directMount(dir string, conf *mountConfig) (*os.File, error) { + f, err := os.OpenFile("/dev/fuse", os.O_RDWR, 0o000) + if err != nil { + return nil, err + } + + var st syscall.Stat_t + if err := syscall.Stat(dir, &st); err != nil { + f.Close() + return nil, fmt.Errorf("stat %s: %v", dir, err) + } + rootmode := st.Mode & syscall.S_IFMT + + parts := []string{ + fmt.Sprintf("fd=%d", f.Fd()), + fmt.Sprintf("rootmode=%o", rootmode), + fmt.Sprintf("user_id=%d", os.Getuid()), + fmt.Sprintf("group_id=%d", os.Getgid()), + } + for k, v := range conf.options { + if _, ok := kernelMountOptions[k]; !ok { + continue + } + if v == "" { + parts = append(parts, k) + } else { + parts = append(parts, k+"="+v) + } + } + opts := strings.Join(parts, ",") + + source := conf.options["fsname"] + if source == "" { + source = "fuse" + } + + flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV) + if err := syscall.Mount(source, dir, "fuse", flags, opts); err != nil { + f.Close() + return nil, fmt.Errorf("fuse mount at %s: %v", dir, err) + } + + return f, nil +} diff --git a/unmount_linux.go b/unmount_linux.go index cf133d4e..b3288d14 100644 --- a/unmount_linux.go +++ b/unmount_linux.go @@ -3,10 +3,16 @@ package fuse import ( "bytes" "errors" + "os" "os/exec" + "syscall" ) func unmount(dir string) error { + if os.Geteuid() == 0 { + return syscall.Unmount(dir, 0) + } + cmd := exec.Command("fusermount3", "-u", dir) output, err := cmd.CombinedOutput() if err != nil {