From 849eb4aa0ff33f5f17cc4b16117029c293bc0038 Mon Sep 17 00:00:00 2001 From: Scott Dodson Date: Tue, 8 Apr 2025 15:02:34 -0400 Subject: [PATCH] Don't use MADV_RANDOM In addition to the explicit documented behavior in posix_madvise(2) this call since Linux 6.4 also causes the kernel to aggressively free pages from the page cache by short circuiting the LRU second chance mechanism. The result is compaction events that took 900ms now take up to 20s and a system which generally operated with near zero major page faults sees 600 or more major faults per second during compaction events. We've tested this change in older kernels and observed no negative impact in typical cloud instances. Fixes #939 --- bolt_unix.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/bolt_unix.go b/bolt_unix.go index d1922c2d9..faf7c9978 100644 --- a/bolt_unix.go +++ b/bolt_unix.go @@ -3,7 +3,6 @@ package bbolt import ( - "fmt" "syscall" "time" "unsafe" @@ -58,13 +57,6 @@ func mmap(db *DB, sz int) error { return err } - // Advise the kernel that the mmap is accessed randomly. - err = unix.Madvise(b, syscall.MADV_RANDOM) - if err != nil && err != syscall.ENOSYS { - // Ignore not implemented error in kernel because it still works. - return fmt.Errorf("madvise: %s", err) - } - // Save the original byte slice and convert to a byte array pointer. db.dataref = b db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))