Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 26 additions & 5 deletions internal/collector/fd.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,32 @@ func (c *FDCollector) Snapshot() any {
entries = entries[:MaxFDEntriesPerSnapshot]
}

// Populate CurrentFDs and FDLimit for the top-N entries (capped to avoid
// /proc overhead on systems with thousands of processes).
const procReadCap = 5
for i := range entries {
if i >= procReadCap {
break
}
if n, err := CountProcFDs(entries[i].PID); err == nil {
entries[i].CurrentFDs = n
}
if lim, err := ReadProcFDLimit(entries[i].PID); err == nil && lim > 0 {
entries[i].FDLimit = lim
}
}

topCurrentFDs := 0
if len(entries) > 0 {
topCurrentFDs = entries[0].CurrentFDs
}

return &FDSnapshot{
Entries: entries,
TotalOpens: totalOpens,
TotalCloses: totalCloses,
NetDelta: netDelta,
GrowthRate: growthRate,
Entries: entries,
TotalOpens: totalOpens,
TotalCloses: totalCloses,
NetDelta: netDelta,
GrowthRate: growthRate,
TopPIDCurrentFDs: topCurrentFDs,
}
}
60 changes: 60 additions & 0 deletions internal/collector/proc_fd.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Copyright 2026 Optiqor contributors
// SPDX-License-Identifier: Apache-2.0

package collector

import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
)

var procDir = "/proc"

// CountProcFDs counts open file descriptors for pid by listing /proc/<pid>/fd.
// Returns (count, nil) on success; (0, err) if the directory is unreadable
// (process exited, permission denied, etc.). Callers should treat 0 as
// "unknown" rather than "zero open fds".
func CountProcFDs(pid uint32) (int, error) {
dir := fmt.Sprintf("%s/%d/fd", procDir, pid)
f, err := os.Open(dir) //nolint:gosec // dir is constructed from the controlled procDir
if err != nil {
return 0, err
}
defer f.Close()
// Readdirnames is cheaper than Readdir — no stat per entry.
names, err := f.Readdirnames(-1)
if err != nil {
return 0, err
}
return len(names), nil
}

// ReadProcFDLimit reads the soft RLIMIT_NOFILE for pid from
// /proc/<pid>/limits. Returns (limit, nil) on success; (0, err) on failure.
func ReadProcFDLimit(pid uint32) (int, error) {
path := fmt.Sprintf("%s/%d/limits", procDir, pid)
f, err := os.Open(path) //nolint:gosec // path is constructed from the controlled procDir
if err != nil {
return 0, err
}
defer f.Close()

scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Text()
if !strings.HasPrefix(line, "Max open files") {
continue
}
// Format: "Max open files 65536 65536 files"
fields := strings.Fields(line)
// fields[3] is the soft limit; "unlimited" maps to 0 (treat as unknown).
if len(fields) < 4 || fields[3] == "unlimited" {
return 0, nil
}
return strconv.Atoi(fields[3])
}
return 0, fmt.Errorf("RLIMIT_NOFILE not found in %s", path)
}
144 changes: 144 additions & 0 deletions internal/collector/proc_fd_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
// Copyright 2026 Optiqor contributors
// SPDX-License-Identifier: Apache-2.0

package collector

import (
"os"
"path/filepath"
"testing"
)

func TestCountProcFDs(t *testing.T) {
// Override procDir
oldProcDir := procDir
tmpDir, err := os.MkdirTemp("", "kerno-proc-test-*")
if err != nil {
t.Fatalf("failed to create temp dir: %v", err)
}
defer os.RemoveAll(tmpDir)
procDir = tmpDir
defer func() { procDir = oldProcDir }()

var pid uint32 = 1234
fdPath := filepath.Join(tmpDir, "1234", "fd")

// Test case 1: Directory doesn't exist (unknown/process exited)
count, err := CountProcFDs(pid)
if err == nil {
t.Errorf("expected error for non-existent fd directory, got nil")
}
if count != 0 {
t.Errorf("expected count to be 0 on error, got %d", count)
}

// Create the fd directory
if err := os.MkdirAll(fdPath, 0755); err != nil {
t.Fatalf("failed to create mock fd dir: %v", err)
}

// Test case 2: Empty directory
count, err = CountProcFDs(pid)
if err != nil {
t.Errorf("unexpected error for empty fd directory: %v", err)
}
if count != 0 {
t.Errorf("expected count to be 0 for empty directory, got %d", count)
}

// Create mock fd files
files := []string{"0", "1", "2", "3", "4"}
for _, fname := range files {
if err := os.WriteFile(filepath.Join(fdPath, fname), []byte(""), 0644); err != nil {
t.Fatalf("failed to write mock fd file %s: %v", fname, err)
}
}

// Test case 3: 5 open FDs
count, err = CountProcFDs(pid)
if err != nil {
t.Errorf("unexpected error counting fds: %v", err)
}
if count != len(files) {
t.Errorf("expected count to be %d, got %d", len(files), count)
}
}

func TestReadProcFDLimit(t *testing.T) {
// Override procDir
oldProcDir := procDir
tmpDir, err := os.MkdirTemp("", "kerno-proc-test-*")
if err != nil {
t.Fatalf("failed to create temp dir: %v", err)
}
defer os.RemoveAll(tmpDir)
procDir = tmpDir
defer func() { procDir = oldProcDir }()

var pid uint32 = 1234
limitsDir := filepath.Join(tmpDir, "1234")
if err := os.MkdirAll(limitsDir, 0755); err != nil {
t.Fatalf("failed to create limits dir: %v", err)
}
limitsPath := filepath.Join(limitsDir, "limits")

// Test case 1: Missing limits file
lim, err := ReadProcFDLimit(pid)
if err == nil {
t.Errorf("expected error for missing limits file, got nil")
}
if lim != 0 {
t.Errorf("expected limit to be 0 on error, got %d", lim)
}

// Test case 2: Valid limits file
mockLimits := `Limit Soft Limit Hard Limit Units
Max cpu time unlimited unlimited seconds
Max open files 65536 65536 files
Max locked memory 8388608 8388608 bytes
`
if err := os.WriteFile(limitsPath, []byte(mockLimits), 0644); err != nil {
t.Fatalf("failed to write mock limits: %v", err)
}

lim, err = ReadProcFDLimit(pid)
if err != nil {
t.Errorf("unexpected error reading limits: %v", err)
}
if lim != 65536 {
t.Errorf("expected limit to be 65536, got %d", lim)
}

// Test case 3: Limits file with "unlimited"
mockLimitsUnlimited := `Limit Soft Limit Hard Limit Units
Max cpu time unlimited unlimited seconds
Max open files unlimited unlimited files
`
if err := os.WriteFile(limitsPath, []byte(mockLimitsUnlimited), 0644); err != nil {
t.Fatalf("failed to write mock limits: %v", err)
}

lim, err = ReadProcFDLimit(pid)
if err != nil {
t.Errorf("unexpected error reading limits: %v", err)
}
if lim != 0 {
t.Errorf("expected limit to be 0 (unknown) for unlimited, got %d", lim)
}

// Test case 4: Limits file with no Max open files line
mockLimitsMissingLine := `Limit Soft Limit Hard Limit Units
Max cpu time unlimited unlimited seconds
`
if err := os.WriteFile(limitsPath, []byte(mockLimitsMissingLine), 0644); err != nil {
t.Fatalf("failed to write mock limits: %v", err)
}

lim, err = ReadProcFDLimit(pid)
if err == nil {
t.Errorf("expected error when Max open files is missing, got nil")
}
if lim != 0 {
t.Errorf("expected limit to be 0, got %d", lim)
}
}
9 changes: 9 additions & 0 deletions internal/collector/signals.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,10 @@ type FDSnapshot struct {
TotalCloses uint64 `json:"totalCloses"`
NetDelta int64 `json:"netDelta"` // opens - closes
GrowthRate float64 `json:"growthRate"` // FDs per second

// TopPIDCurrentFDs is the actual open-fd count of the top leaker, read from /proc/<pid>/fd.
// 0 means it was not available (process exited, permission denied, etc.).
TopPIDCurrentFDs int `json:"topPidCurrentFDs,omitempty"`
}

// FDEntry represents FD stats for one process.
Expand All @@ -183,6 +187,11 @@ type FDEntry struct {
Closes uint64 `json:"closes"`
NetDelta int64 `json:"netDelta"`
GrowthRate float64 `json:"growthRate"` // FDs per second

// CurrentFDs is the live fd count read from /proc/<pid>/fd at snapshot time. 0 = unavailable.
CurrentFDs int `json:"currentFDs,omitempty"`
// FDLimit is the soft RLIMIT_NOFILE for this PID read from /proc/<pid>/limits. 0 = unavailable.
FDLimit int `json:"fdLimit,omitempty"`
}

// ─── Cgroup Memory Snapshot ──────────────────────────────────────────────────
Expand Down
31 changes: 31 additions & 0 deletions internal/doctor/eta.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,34 @@ func etaDuration(etaSecs float64) (time.Duration, bool) {

return eta, true
}

// fdHeadroom returns (remaining, limit, exact) where remaining is the number
// of file descriptors before the process hits its limit, limit is the value
// used as the ceiling, and exact is true when remaining was derived from a
// live /proc read rather than a window delta.
//
// Priority order:
// 1. entry.CurrentFDs + entry.FDLimit — both available: most accurate
// 2. entry.CurrentFDs + default limit — live count, assumed limit
// 3. entry.NetDelta + entry.FDLimit — window delta, known limit (rare)
// 4. entry.NetDelta + default limit — worst case: window delta only
//
// If remaining <= 0, returns (1, limit, exact) so callers don't divide by zero.
func fdHeadroom(currentFDs, netDelta int64, fdLimit int) (remaining float64, limit float64, exact bool) {
const defaultLimit = 65536.0
limit = defaultLimit
if fdLimit > 0 {
limit = float64(fdLimit)
}
if currentFDs > 0 {
remaining = limit - float64(currentFDs)
exact = true
} else {
remaining = limit - float64(netDelta)
exact = false
}
if remaining <= 0 {
remaining = 1
}
return
}
73 changes: 73 additions & 0 deletions internal/doctor/eta_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,76 @@ func TestETADurationAcceptsCeiling(t *testing.T) {
t.Fatalf("expected %s, got %s", maxMeaningfulETA, eta)
}
}

func TestFDHeadroom(t *testing.T) {
tests := []struct {
name string
currentFDs int64
netDelta int64
fdLimit int
expectedRemain float64
expectedLimit float64
expectedExact bool
}{
{
name: "Case 1: Both currentFDs and fdLimit set",
currentFDs: 64000,
netDelta: 100,
fdLimit: 70000,
expectedRemain: 6000,
expectedLimit: 70000,
expectedExact: true,
},
{
name: "Case 2: currentFDs set, fdLimit = 0 (defaults to 65536)",
currentFDs: 64000,
netDelta: 100,
fdLimit: 0,
expectedRemain: 1536,
expectedLimit: 65536,
expectedExact: true,
},
{
name: "Case 3: currentFDs = 0, fdLimit set (falls back to netDelta with known limit)",
currentFDs: 0,
netDelta: 500,
fdLimit: 80000,
expectedRemain: 79500,
expectedLimit: 80000,
expectedExact: false,
},
{
name: "Case 4: Neither set (falls back to netDelta with 65536 default limit)",
currentFDs: 0,
netDelta: 500,
fdLimit: 0,
expectedRemain: 65036,
expectedLimit: 65536,
expectedExact: false,
},
{
name: "Case 5: remaining <= 0 (returns remaining = 1)",
currentFDs: 66000,
netDelta: 100,
fdLimit: 65536,
expectedRemain: 1,
expectedLimit: 65536,
expectedExact: true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
remain, limit, exact := fdHeadroom(tt.currentFDs, tt.netDelta, tt.fdLimit)
if remain != tt.expectedRemain {
t.Errorf("expected remaining=%v, got %v", tt.expectedRemain, remain)
}
if limit != tt.expectedLimit {
t.Errorf("expected limit=%v, got %v", tt.expectedLimit, limit)
}
if exact != tt.expectedExact {
t.Errorf("expected exact=%v, got %v", tt.expectedExact, exact)
}
})
}
}
Loading
Loading