Skip to content
Open
7 changes: 1 addition & 6 deletions internal/bpf/c/disk_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,9 @@ int tracepoint_block_rq_complete(struct trace_event_raw_block_rq_completion *ctx
e->latency_ns = latency;
e->sector = sector;
e->dev = (__u32)ctx->dev;
e->nr_bytes = ctx->nr_sector * 512;
e->nr_bytes = (__u64)ctx->nr_sector * 512;
e->pid = (__u32)(bpf_get_current_pid_tgid() >> 32);

// rwbs[0] is the primary op (R/W/D); subsequent positions hold flag
// chars (S=sync, F=FUA, A=ahead, M=meta). Promote fsync'd writes to
// op='S' so the doctor's SyncLatency tracker actually sees them.
//
// The verifier disallows variable-index reads off a tracepoint ctx
// pointer, so copy rwbs into a local buffer via the helper and
// inspect that. With a stack-resident buffer, indexed reads are fine.
Expand All @@ -72,7 +68,6 @@ int tracepoint_block_rq_complete(struct trace_event_raw_block_rq_completion *ctx
}
e->op = op;

// Zero padding.
__builtin_memset(e->_pad, 0, sizeof(e->_pad));
bpf_get_current_comm(&e->comm, sizeof(e->comm));

Expand Down
4 changes: 2 additions & 2 deletions internal/bpf/c/headers/kerno.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,10 @@ struct disk_event {
__u64 latency_ns;
__u64 sector;
__u32 dev; // device number (MKDEV)
__u32 nr_bytes;
__u32 pid;
__u64 nr_bytes; // widened to __u64: merged/discard requests can exceed 8 MiB
__u8 op; // 'R' = read, 'W' = write, 'S' = sync
__u8 _pad[3];
__u8 _pad[7]; // re-pad to keep struct size a multiple of 8
char comm[TASK_COMM_LEN];
};

Expand Down
62 changes: 44 additions & 18 deletions internal/bpf/decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,50 @@ func TestDecodeDiskEvent(t *testing.T) {
}
}

// TestDecodeDiskEventLargeIO verifies that nr_bytes is not truncated for
// merged or discard requests whose size exceeds the former __u32 limit (~8 MiB).
func TestDecodeDiskEventLargeIO(t *testing.T) {
const nrSector = 32768
want := DiskEvent{
TimestampNs: 2,
LatencyNs: 1_000_000,
Sector: 8192,
Dev: 8,
NrBytes: uint64(nrSector) * 512,
PID: 1,
Op: 'W',
}
copy(want.Comm[:], "kworker")

data := encode(t, &want)
got, err := DecodeDiskEvent(data)
if err != nil {
t.Fatal(err)
}
const wantBytes = uint64(16_777_216)
if got.NrBytes != wantBytes {
t.Errorf("NrBytes = %d, want %d (large I/O was truncated)", got.NrBytes, wantBytes)
}
}

func TestDiskEventOpStrings(t *testing.T) {
cases := []struct {
op byte
want string
}{
{'R', "read"},
{'W', "write"},
{'S', "sync"},
{'X', "unknown(X)"},
}
for _, c := range cases {
e := DiskEvent{Op: c.op}
if got := e.OpString(); got != c.want {
t.Errorf("OpString(%c) = %q, want %q", c.op, got, c.want)
}
}
}

func TestDecodeSchedEvent(t *testing.T) {
validEvent := SchedEvent{
TimestampNs: 1,
Expand Down Expand Up @@ -393,24 +437,6 @@ func TestTCPEventTypeStringRoundTrip(t *testing.T) {
}
}

func TestDiskEventOpStrings(t *testing.T) {
cases := []struct {
op byte
want string
}{
{'R', "read"},
{'W', "write"},
{'S', "sync"},
{'X', "unknown(X)"},
}
for _, c := range cases {
e := DiskEvent{Op: c.op}
if got := e.OpString(); got != c.want {
t.Errorf("OpString(%c) = %q, want %q", c.op, got, c.want)
}
}
}

func TestFDOpStringRoundTrip(t *testing.T) {
cases := map[FDOp]string{
FDOpOpen: "open",
Expand Down
4 changes: 2 additions & 2 deletions internal/bpf/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,10 @@ type DiskEvent struct {
LatencyNs uint64
Sector uint64
Dev uint32
NrBytes uint32
PID uint32
NrBytes uint64 // widened from uint32: merged/discard requests can exceed 8 MiB
Op byte
Pad0 [3]byte // padding to align Comm
Pad0 [7]byte // re-pad to keep struct size a multiple of 8
Comm [TaskCommLen]byte
}

Expand Down
4 changes: 2 additions & 2 deletions internal/cli/trace_disk.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ func runTraceDisk(ctx context.Context, opts traceDiskOpts) error {
event.OpString(),
formatLatency(event.Latency()),
formatDev(event.Dev),
formatBytes(uint64(event.NrBytes)),
formatBytes(event.NrBytes),
)
}
}
Expand All @@ -177,7 +177,7 @@ type diskEventOut struct {
LatencyNs uint64 `json:"latencyNs"`
Dev string `json:"dev"`
Sector uint64 `json:"sector"`
Bytes uint32 `json:"bytes"`
Bytes uint64 `json:"bytes"`
}

func diskEventJSON(e *bpf.DiskEvent) diskEventOut {
Expand Down
4 changes: 2 additions & 2 deletions internal/collector/disk.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,11 @@ func (c *DiskIOCollector) record(event *bpf.DiskEvent) {
case 'R':
c.readHist.Record(event.LatencyNs)
c.reads++
c.rdBytes += uint64(event.NrBytes)
c.rdBytes += event.NrBytes
case 'W':
c.writeHist.Record(event.LatencyNs)
c.writes++
c.wrBytes += uint64(event.NrBytes)
c.wrBytes += event.NrBytes
case 'S':
c.syncHist.Record(event.LatencyNs)
c.syncs++
Expand Down
2 changes: 1 addition & 1 deletion internal/collector/syscall_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ func TestSyscallCollectorEntriesCapped(t *testing.T) {
}

// makeDiskEvent builds a disk event of the given op type.
func makeDiskEvent(op byte, latencyNs uint64, bytes uint32) *bpf.DiskEvent {
func makeDiskEvent(op byte, latencyNs uint64, bytes uint64) *bpf.DiskEvent {
return &bpf.DiskEvent{
LatencyNs: latencyNs,
NrBytes: bytes,
Expand Down
14 changes: 7 additions & 7 deletions internal/metrics/bridge_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func TestRecordDiskIO(t *testing.T) {
devLabel string
op byte
opLabel string
bytes uint32
bytes uint64
}{
{
name: "disk_write",
Expand Down Expand Up @@ -298,12 +298,12 @@ func encodeDiskEvent(e *bpf.DiskEvent) []byte {
binary.LittleEndian.PutUint64(buf[8:], e.LatencyNs)
binary.LittleEndian.PutUint64(buf[16:], e.Sector)
binary.LittleEndian.PutUint32(buf[24:], e.Dev)
binary.LittleEndian.PutUint32(buf[28:], e.NrBytes)
binary.LittleEndian.PutUint32(buf[32:], e.PID)
buf[36] = e.Op
// pad [37:40]
copy(buf[40:56], e.Comm[:])
return buf[:56]
binary.LittleEndian.PutUint32(buf[28:], e.PID)
binary.LittleEndian.PutUint64(buf[32:], e.NrBytes)
buf[40] = e.Op
// pad [41:48]
copy(buf[48:64], e.Comm[:])
return buf[:64]
}

func encodeOOMEvent(e *bpf.OOMEvent) []byte {
Expand Down
Loading