From 14cab1a8bee0ef707f2a42d333be8bba8621e289 Mon Sep 17 00:00:00 2001 From: Joshua Koo Date: Tue, 31 Mar 2026 10:29:15 +0000 Subject: [PATCH 1/6] feat: eBPF process lifecycle tracking + ProcessMetadataCache library API Add eBPF-driven process exec/exit detection and a ProcessMetadataCache to profile-bee's library API, enabling external profiling agents to get process-aware profiling without polling /proc. eBPF changes: - sched_process_exec tracepoint program (detects execve() calls) - lifecycle_tracking_map to broaden exit events for all processes - process_exec_events ring buffer for exec notifications Userspace: - ProcessEvent unified type replaces ProcessExitEvent in PerfWork - DwarfThreadMsg::ProcessExeced triggers DWARF table reload on exec - ProcessMetadataCache: lazy /proc reader with eBPF-driven eviction - PointerStackFramesCache::invalidate_pid() for exec cache flush - SessionConfig::track_process_lifecycle (auto-on with DWARF) - Graceful fallback for older eBPF binaries without new programs Benefits for profile-bee core: - Proactive DWARF table loading on exec (no 1s poll delay) - Symbol cache invalidation on exec (no stale resolutions) - Immediate DWARF shard reclamation on exit 97 tests pass (9 new for ProcessMetadataCache), clippy -D warnings clean. --- docs/custom_profiling_agent.md | 87 +++++++- profile-bee-common/src/lib.rs | 42 +++- profile-bee-ebpf/Cargo.lock | 2 +- profile-bee-ebpf/src/lib.rs | 42 +++- profile-bee-ebpf/src/main.rs | 10 +- profile-bee/bin/profile-bee.rs | 43 ++-- profile-bee/ebpf-bin/profile-bee.bpf.o | Bin 65552 -> 66336 bytes profile-bee/src/cache.rs | 17 ++ profile-bee/src/ebpf.rs | 50 +++++ profile-bee/src/event_loop.rs | 73 ++++++- profile-bee/src/lib.rs | 1 + profile-bee/src/pipeline.rs | 95 ++++++++- profile-bee/src/process_metadata.rs | 274 +++++++++++++++++++++++++ profile-bee/src/session.rs | 53 ++++- profile-bee/src/trace_handler.rs | 9 + 15 files changed, 755 insertions(+), 43 deletions(-) create mode 100644 profile-bee/src/process_metadata.rs diff --git a/docs/custom_profiling_agent.md b/docs/custom_profiling_agent.md index f92c9dc..53eff36 100644 --- a/docs/custom_profiling_agent.md +++ b/docs/custom_profiling_agent.md @@ -77,6 +77,9 @@ async fn main() -> anyhow::Result<()> { ..ProfilerConfig::default() }, duration_ms: 0, // Run indefinitely (we control the loop) + // Enable process lifecycle tracking for metadata cache + DWARF improvements. + // Note: automatically enabled when dwarf: true, shown here for clarity. + track_process_lifecycle: true, ..SessionConfig::default() }; @@ -113,7 +116,45 @@ async fn main() -> anyhow::Result<()> { ## Stack Enrichment Pattern -A common pattern is to look up per-process metadata and annotate stacks. +### Using the Built-in ProcessMetadataCache + +When `track_process_lifecycle` is enabled (or DWARF is on), the event loop +maintains a `ProcessMetadataCache` that automatically reads `/proc/[pid]/*` +on first access and evicts entries when eBPF detects process exec/exit events. + +```rust +use profile_bee::types::{FrameCount, StackFrameInfo}; + +fn enrich_with_builtin_cache( + session: &mut ProfilingSession, + stacks: &[FrameCount], +) { + // Access the built-in metadata cache (requires track_process_lifecycle) + if let Some(cache) = session.event_loop.process_metadata() { + for fc in stacks { + if let Some(first_frame) = fc.frames.first() { + let pid = first_frame.pid as u32; + if let Some(meta) = cache.get_or_load(pid) { + // meta.cmdline, meta.cwd, meta.environ, meta.exe, meta.ns_mnt + // are all available for enrichment + if let Some(env_val) = meta.environ_var("MY_SERVICE_NAME") { + // Use the environment variable for grouping + } + } + } + } + } +} +``` + +The cache is automatically maintained by the event loop: +- On `sched_process_exec`: invalidates the entry (same PID, new binary) +- On `sched_process_exit`: removes the entry entirely +- No manual eviction needed — eBPF events handle it + +### Custom Enrichment (without built-in cache) + +For more control, you can build your own metadata cache. Since `StackFrameInfo` contains the `pid`, you can read `/proc/[pid]/*` to get process context: @@ -331,3 +372,47 @@ pub struct StackFrameInfo { pub ns: Option, // mount namespace inode } ``` + +### `ProcessMetadataCache` + +```rust +use profile_bee::process_metadata::ProcessMetadataCache; + +// Access via the event loop (when track_process_lifecycle is enabled): +if let Some(cache) = session.event_loop.process_metadata() { + // Lazily loads from /proc on first access + if let Some(meta) = cache.get_or_load(pid) { + meta.cmdline; // Option> + meta.cwd; // Option + meta.environ; // Option> + meta.exe; // Option + meta.ns_mnt; // Option — mount namespace inode + } + + // Convenience: look up a specific environment variable + let val = cache.environ_var(pid, "MY_ENV_VAR"); +} +``` + +Entries are automatically: +- **Invalidated** on `sched_process_exec` (same PID, new binary image) +- **Removed** on `sched_process_exit` (process gone) + +### `SessionConfig` — Lifecycle Tracking + +```rust +pub struct SessionConfig { + // ... other fields ... + + /// Track process lifecycle events (exec + broadened exit) via eBPF tracepoints. + /// Automatically enabled when profiler.dwarf is true. + pub track_process_lifecycle: bool, +} +``` + +When enabled: +- Attaches `sched:sched_process_exec` tracepoint (detects `execve()` calls) +- Broadens `sched:sched_process_exit` to fire for all processes (not just DWARF-tracked) +- Creates a `ProcessMetadataCache` in the event loop +- DWARF tables are proactively reloaded on exec (no 1-second poll delay) +- Symbol caches are invalidated on exec (no stale resolutions) diff --git a/profile-bee-common/src/lib.rs b/profile-bee-common/src/lib.rs index fe6a9e7..1624348 100644 --- a/profile-bee-common/src/lib.rs +++ b/profile-bee-common/src/lib.rs @@ -55,7 +55,8 @@ impl ProbeEvent { pub const STRUCT_SIZE: usize = size_of::(); } -/// Process exit notification sent from eBPF to userspace +/// Process exit notification sent from eBPF to userspace. +/// Deprecated: prefer `ProcessEvent` which carries both exec and exit events. #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] #[repr(C)] pub struct ProcessExitEvent { @@ -67,6 +68,45 @@ impl ProcessExitEvent { pub const STRUCT_SIZE: usize = size_of::(); } +/// Process exec notification sent from eBPF to userspace when a process +/// calls execve(). Used for proactive DWARF table loading and cache invalidation. +#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] +#[repr(C)] +pub struct ProcessExecEvent { + pub pid: u32, + pub _pad: u32, +} + +impl ProcessExecEvent { + pub const STRUCT_SIZE: usize = size_of::(); +} + +// --- Process Lifecycle Event Types --- + +/// Process lifecycle event type: process exited. +pub const PROCESS_EVENT_EXIT: u32 = 0; +/// Process lifecycle event type: process called execve(). +pub const PROCESS_EVENT_EXEC: u32 = 1; + +/// Unified process lifecycle event sent from eBPF to userspace. +/// Carries both exec and exit notifications through a single ring buffer, +/// replacing the narrower `ProcessExitEvent`. +#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] +#[repr(C)] +pub struct ProcessEvent { + /// Event type: `PROCESS_EVENT_EXIT` or `PROCESS_EVENT_EXEC`. + pub event_type: u32, + /// The PID (tgid) of the process. + pub pid: u32, + /// For EXIT events: the exit code. For EXEC events: 0. + pub exit_code: i32, + pub _pad: u32, +} + +impl ProcessEvent { + pub const STRUCT_SIZE: usize = size_of::(); +} + // --- DWARF Unwind Table Types (used by eBPF-side unwinding) --- /// How to compute the CFA (Canonical Frame Address) diff --git a/profile-bee-ebpf/Cargo.lock b/profile-bee-ebpf/Cargo.lock index e414b07..a5df8cc 100644 --- a/profile-bee-ebpf/Cargo.lock +++ b/profile-bee-ebpf/Cargo.lock @@ -220,7 +220,7 @@ dependencies = [ [[package]] name = "profile-bee-common" -version = "0.3.7" +version = "0.3.9" [[package]] name = "profile-bee-ebpf" diff --git a/profile-bee-ebpf/src/lib.rs b/profile-bee-ebpf/src/lib.rs index 68f45b4..6b52c1e 100644 --- a/profile-bee-ebpf/src/lib.rs +++ b/profile-bee-ebpf/src/lib.rs @@ -72,6 +72,12 @@ static TARGET_PID_MAP: Array = Array::with_max_entries(1, 0); #[map(name = "monitor_exit_pid_map")] static MONITOR_EXIT_PID_MAP: Array = Array::with_max_entries(1, 0); +/// Whether process lifecycle tracking is enabled (0 = disabled, 1 = enabled). +/// When enabled, exit events fire for ALL process exits (not just DWARF-tracked +/// or monitored PIDs), and the exec tracepoint sends exec events. +#[map(name = "lifecycle_tracking_map")] +static LIFECYCLE_TRACKING_MAP: Array = Array::with_max_entries(1, 0); + #[inline] unsafe fn skip_idle() -> bool { let skip = core::ptr::read_volatile(&SKIP_IDLE); @@ -119,6 +125,14 @@ unsafe fn monitor_exit_pid() -> u32 { } } +#[inline] +unsafe fn lifecycle_tracking_enabled() -> bool { + match LIFECYCLE_TRACKING_MAP.get(0) { + Some(&v) => v != 0, + None => false, + } +} + /* Setup maps */ #[map] static mut STORAGE: PerCpuArray = PerCpuArray::with_max_entries(1, 0); @@ -136,6 +150,9 @@ static RING_BUF_STACKS: RingBuf = RingBuf::with_byte_size(STACK_SIZE, 0); #[map(name = "process_exit_events")] static RING_BUF_PROCESS_EXIT: RingBuf = RingBuf::with_byte_size(4096, 0); +#[map(name = "process_exec_events")] +static RING_BUF_PROCESS_EXEC: RingBuf = RingBuf::with_byte_size(4096, 0); + #[map(name = "stack_traces")] pub static STACK_TRACES: StackTrace = StackTrace::with_max_entries(STACK_SIZE, 0); // DWARF unwind maps — single outer ArrayOfMaps containing per-binary inner Array maps. @@ -1382,7 +1399,8 @@ unsafe fn collect_off_cpu_trace_percpu(ctx: &C, now: u64) { /// Handle sched_process_exit tracepoint for process exit monitoring. /// Sends a ProcessExitEvent when: /// - The monitored PID exits (--pid mode: stops profiling), OR -/// - A DWARF-tracked process exits (cleanup LPM trie entries). +/// - A DWARF-tracked process exits (cleanup LPM trie entries), OR +/// - Lifecycle tracking is enabled (system-wide process awareness). #[inline(always)] pub unsafe fn handle_process_exit(ctx: C) { use profile_bee_common::ProcessExitEvent; @@ -1390,11 +1408,13 @@ pub unsafe fn handle_process_exit(ctx: C) { let tgid = ctx.tgid(); let monitor_pid = monitor_exit_pid(); - // Send notification if this is either the monitored PID or a DWARF-tracked process + // Send notification if this is a monitored PID, DWARF-tracked process, + // or lifecycle tracking is enabled (for system-wide process awareness). let is_monitored = monitor_pid != 0 && tgid == monitor_pid; let is_dwarf_tracked = unsafe { DWARF_TGIDS.get(&tgid).is_some() }; + let lifecycle = unsafe { lifecycle_tracking_enabled() }; - if is_monitored || is_dwarf_tracked { + if is_monitored || is_dwarf_tracked || lifecycle { if let Some(mut entry) = RING_BUF_PROCESS_EXIT.reserve::(0) { let exit_event = ProcessExitEvent { pid: tgid, @@ -1406,6 +1426,22 @@ pub unsafe fn handle_process_exit(ctx: C) { } } +/// Handle sched_process_exec tracepoint for process exec monitoring. +/// Sends a ProcessExecEvent when a process calls execve(), enabling +/// proactive DWARF table loading and metadata cache invalidation. +#[inline(always)] +pub unsafe fn handle_process_exec(ctx: C) { + use profile_bee_common::ProcessExecEvent; + + let tgid = ctx.tgid(); + + if let Some(mut entry) = RING_BUF_PROCESS_EXEC.reserve::(0) { + let exec_event = ProcessExecEvent { pid: tgid, _pad: 0 }; + let _writable = entry.write(exec_event); + entry.submit(0); + } +} + // Make this simple now - checking for valid pointers can include // checking with stack pointer address or getting valid ranges // from from /proc/[pid]/maps diff --git a/profile-bee-ebpf/src/main.rs b/profile-bee-ebpf/src/main.rs index 2814eb2..fc10415 100644 --- a/profile-bee-ebpf/src/main.rs +++ b/profile-bee-ebpf/src/main.rs @@ -10,7 +10,7 @@ use aya_ebpf::{ use profile_bee_ebpf::{ collect_off_cpu_trace, collect_trace, collect_trace_raw_syscall, collect_trace_raw_syscall_exit, collect_trace_raw_tp_with_task_regs, - collect_trace_stackid_only, dwarf_unwind_step_impl, handle_process_exit, + collect_trace_stackid_only, dwarf_unwind_step_impl, handle_process_exec, handle_process_exit, }; #[perf_event] @@ -96,6 +96,14 @@ pub fn tracepoint_process_exit(ctx: TracePointContext) -> u32 { 0 } +/// Tracepoint for monitoring process exec events. +/// Detects execve() calls for proactive DWARF loading and cache invalidation. +#[tracepoint(category = "sched", name = "sched_process_exec")] +pub fn tracepoint_process_exec(ctx: TracePointContext) -> u32 { + unsafe { handle_process_exec(ctx) } + 0 +} + /// DWARF unwind step program — tail-call target for deep stack unwinding. /// Not attached to any perf event directly; only called via PROG_ARRAY tail call /// from collect_trace (perf_event context). Unwinds FRAMES_PER_TAIL_CALL frames diff --git a/profile-bee/bin/profile-bee.rs b/profile-bee/bin/profile-bee.rs index 48665b8..5f6b066 100644 --- a/profile-bee/bin/profile-bee.rs +++ b/profile-bee/bin/profile-bee.rs @@ -506,6 +506,7 @@ async fn main() -> std::result::Result<(), anyhow::Error> { group_by_process: opt.group_by_process, monitor_exit_pid, tgid_request_tx, + enable_process_metadata: false, }; let mut event_loop = ProfilingEventLoop::new( ebpf_profiler.counts, @@ -892,20 +893,34 @@ fn spawn_profiling_thread( tracing::warn!("{:#}", e); } } - Ok(PerfWork::ProcessExit(exit_event)) => { - // Forward to DWARF thread for LPM trie cleanup - if let Some(tx) = &tgid_request_tx { - let _ = tx.send(DwarfThreadMsg::ProcessExited(exit_event.pid)); - } - // Allow PID reuse to trigger a fresh LoadProcess - known_tgids.remove(&exit_event.pid); - // Only stop profiling if this is the monitored target process - if Some(exit_event.pid) == monitor_exit_pid { - tracing::info!( - "target process {} exited, stopping TUI", - exit_event.pid - ); - return; + Ok(PerfWork::ProcessEvent(event)) => { + use profile_bee_common::{PROCESS_EVENT_EXEC, PROCESS_EVENT_EXIT}; + match event.event_type { + PROCESS_EVENT_EXIT => { + // Forward to DWARF thread for LPM trie cleanup + if let Some(tx) = &tgid_request_tx { + let _ = tx.send(DwarfThreadMsg::ProcessExited(event.pid)); + } + // Allow PID reuse to trigger a fresh LoadProcess + known_tgids.remove(&event.pid); + // Only stop profiling if this is the monitored target process + if Some(event.pid) == monitor_exit_pid { + tracing::info!( + "target process {} exited, stopping TUI", + event.pid + ); + return; + } + } + PROCESS_EVENT_EXEC => { + tracing::debug!("process {} called exec", event.pid); + if let Some(tx) = &tgid_request_tx { + let _ = tx.send(DwarfThreadMsg::ProcessExeced(event.pid)); + } + known_tgids.remove(&event.pid); + known_tgids.insert(event.pid); + } + _ => {} } } Ok(PerfWork::Stop) => return, diff --git a/profile-bee/ebpf-bin/profile-bee.bpf.o b/profile-bee/ebpf-bin/profile-bee.bpf.o index 09c7c69c3296b1b49d9dbb5d300a253dfb986ce1..92c706236d21a9527115c6a2d997769da74d1763 100644 GIT binary patch delta 1917 zcmZuwU2IfU5T3g(EvrR(moB$U+wfB;T`^c#t}Ud^mKXy(VGV@7knQ$*i`7Dx(!?zx zX`i$j6I&)kOiaX|2TY8K^3a4AYJwpqB!pVR_+cX@T8n8YJRoA3`WVa(u`@g{p@xe6W$0gw*mpuLJ!biBtV%&xfLW?wMYUVH)|2!Bff6{Y_7 z3)}z;uS~@Hn}lsbdEuD52)_!$glGY;WUG;J{YV<$BXy#1fa5g&1?#IvQeCBxDy6d> zUgC_Ju#Xdz`|x{=58}tSHuo2^_pUG9HcrQ6`G^#9Rs85WX@fjQa2wg<2Svu@=Ap_cg${_QzJMkX5WRi{DH}`lIlR zPwysIUCN5O`i)YLzYlSyb>kA}NnL`Q5`T(S0P4h0(Q@dPutGUuv5l4~JEYo+N2X#% z28X9CV+(#hwYh(<5Tf;+AX1xkL-c-#@h;y*zGl2Z(F?rqg-<${efV3l;0U7tgUVV1 z(@gOANGCpr>!+XF@F7oy*yQ3WphDtN*kN4gnf~eN-J8Ve?Ikfa!XDya+BS;c*b<-O z^`}E{EgruAdr7*d+i_2vNV*x}T*XeV*++=T0%6Yx>*pcMX6y_d`TI9Rw z?#^Sq$wyeZ`!$A(7iYgM?bzws?Y6TsW!ulUJN5PW-rTdLuh_O7-w|(*+o_K2sdznJ zpGyQq{wrucv_gZk!W8QUZhJTq%lxLib$t0X!X;DBjGm$6XwD~$qu4TU#m?SQ9FL!s zI5fR}PAd)z&H>5E{HZu;?hkQj68#(p&0NAr634v#myd&ex$ao&KMM8K2BbtY>Q~C3 zxsWh=v1P$B8~#GkxcTcP`uHp5(RogtN&Kqbt#G>vjeSAH<{{iTy BA#DHv delta 1468 zcmZuwO-NKx6ux&Fe_mqEj7?9i@CQpDS)R1QKcac?~#0z(pjG=cdc9Z>u zV{C;?vA|9A0)7yqCd6N2uJAeGqmz&g@7yK*<)wICxisn1LMjnaMgc5f*^FKzejd6r zjYHqrDQ_=sW4*%bplPAp8{}WG@rRLLin!Vz;&n!C{O8Qr<8Z*wj2_d;0G3g`FJqMl zHoTg-c}Nj@5t68q;Ro)a=?LtS@^_K9%4~Ly8-%?Ru zL-s#BBlu+n+*d5aE`gWJ#KX3keKH=Gg+b36^c-|%b*jUqY-4v1HaO09J{I*LYCnT` z8}bzXp4oE#V`*Q|jY8GRiF!YNJ8TmR#bi=+p_3e$y0P+UkGp}LW_NH7!^q|;+}oOl z?qAQK>-P&+dwZb8AMnS5pq6N3AiR4|;Q9h%6P;%aF?H zTJ(ps9Dv!}w2|PmWvdpOsbnz^)zhm{oQ2eOn4X68wr)(Uas?q4>z6OC;U9rra}4VK zgsCAki8an7B#xKm8a`0FCIl&_Y2zCA`d6;lp#d#FXhhc~YROGh=cBee^+75hrcp@e zb$4V#l6|WBm_`SU_%|EM(!Kw28W~H`FdY_76Z{{_Rzls~)F=(o9-!MbW|Y!OqZZ#c zB6#w3_=j)r@3vXzpt|FF+C1r?N9cgr%Tg_wmaE^ diff --git a/profile-bee/src/cache.rs b/profile-bee/src/cache.rs index 8a83951..b19bb47 100644 --- a/profile-bee/src/cache.rs +++ b/profile-bee/src/cache.rs @@ -168,4 +168,21 @@ impl PointerStackFramesCache { hits as f64 / self.total as f64 * 100.0 ) } + + /// Remove all cached entries for a specific process (tgid). + /// + /// Called when a process calls execve() — the binary image changed so + /// all cached symbol resolutions for that PID are stale. + pub fn invalidate_pid(&mut self, tgid: u32) { + // Collect keys to remove (can't mutate while iterating) + let keys_to_remove: Vec<(u32, i32, i32)> = self + .map + .iter() + .filter(|(&(t, _, _), _)| t == tgid) + .map(|(&k, _)| k) + .collect(); + for key in keys_to_remove { + self.map.pop(&key); + } + } } diff --git a/profile-bee/src/ebpf.rs b/profile-bee/src/ebpf.rs index b8a1e79..bf1afb7 100644 --- a/profile-bee/src/ebpf.rs +++ b/profile-bee/src/ebpf.rs @@ -472,6 +472,39 @@ pub fn attach_process_exit_tracepoint(bpf: &mut Ebpf) -> Result<(), anyhow::Erro Ok(()) } +/// Set up the ring buffer for process exec events. +/// Returns `Ok(None)` if the eBPF binary doesn't include the exec map +/// (backward compat with older eBPF builds). +pub fn setup_process_exec_ring_buffer( + bpf: &mut Ebpf, +) -> Result>, anyhow::Error> { + match bpf.take_map("process_exec_events") { + Some(map) => Ok(Some(RingBuf::try_from(map)?)), + None => { + tracing::debug!("process_exec_events map not found (older eBPF binary)"); + Ok(None) + } + } +} + +/// Attach the sched_process_exec tracepoint for monitoring process exec events. +/// Returns `Ok(false)` if the eBPF binary doesn't include the exec program +/// (backward compat with older eBPF builds). +pub fn attach_process_exec_tracepoint(bpf: &mut Ebpf) -> Result { + let program = match bpf.program_mut("tracepoint_process_exec") { + Some(p) => p, + None => { + tracing::debug!("tracepoint_process_exec not found (older eBPF binary)"); + return Ok(false); + } + }; + let tp: &mut TracePoint = program.try_into()?; + tp.load()?; + tp.attach("sched", "sched_process_exec")?; + tracing::info!("attached sched_process_exec tracepoint"); + Ok(true) +} + impl EbpfProfiler { /// Set the target PID for eBPF filtering (0 = profile all processes) pub fn set_target_pid(&mut self, pid: u32) -> Result<(), anyhow::Error> { @@ -495,6 +528,23 @@ impl EbpfProfiler { Ok(()) } + /// Enable or disable process lifecycle tracking in eBPF. + /// When enabled, exit events fire for all processes (not just DWARF-tracked + /// or monitored PIDs). + pub fn set_lifecycle_tracking(&mut self, enabled: bool) -> Result<(), anyhow::Error> { + match self.bpf.map_mut("lifecycle_tracking_map") { + Some(map) => { + let mut arr: Array<&mut MapData, u32> = Array::try_from(map)?; + arr.set(0, if enabled { 1 } else { 0 }, 0)?; + Ok(()) + } + None => { + tracing::debug!("lifecycle_tracking_map not found (older eBPF binary)"); + Ok(()) + } + } + } + /// Load DWARF unwind tables into eBPF maps for a process pub fn load_dwarf_unwind_tables( &mut self, diff --git a/profile-bee/src/event_loop.rs b/profile-bee/src/event_loop.rs index c9ea3a0..5a5f8df 100644 --- a/profile-bee/src/event_loop.rs +++ b/profile-bee/src/event_loop.rs @@ -10,10 +10,11 @@ use std::time::{Duration, Instant}; use aya::maps::{MapData, StackTraceMap}; use aya::Ebpf; -use profile_bee_common::{StackInfo, EVENT_TRACE_ALWAYS}; +use profile_bee_common::{StackInfo, EVENT_TRACE_ALWAYS, PROCESS_EVENT_EXEC, PROCESS_EVENT_EXIT}; use crate::ebpf::{apply_dwarf_refresh, FramePointersPod, StackInfoPod}; use crate::pipeline::{DwarfThreadMsg, PerfWork}; +use crate::process_metadata::ProcessMetadataCache; use crate::trace_handler::TraceHandler; use crate::types::FrameCount; @@ -47,6 +48,11 @@ pub struct EventLoopConfig { pub group_by_process: bool, pub monitor_exit_pid: Option, pub tgid_request_tx: Option>, + /// Whether to maintain a process metadata cache. + /// When `true`, a `ProcessMetadataCache` is created and updated + /// on exec/exit events. Library consumers can access it via + /// `process_metadata()`. + pub enable_process_metadata: bool, } /// Owns the state needed to drain eBPF events and produce collapsed stacks. @@ -67,6 +73,8 @@ pub struct ProfilingEventLoop { // Persistent state across calls trace_count: HashMap, known_tgids: HashSet, + /// Optional process metadata cache, maintained via eBPF lifecycle events. + process_metadata: Option, } impl ProfilingEventLoop { @@ -94,6 +102,11 @@ impl ProfilingEventLoop { monitor_exit_pid: config.monitor_exit_pid, trace_count: HashMap::new(), known_tgids: HashSet::new(), + process_metadata: if config.enable_process_metadata { + Some(ProcessMetadataCache::new(4096)) + } else { + None + }, } } @@ -228,17 +241,46 @@ impl ProfilingEventLoop { tracing::warn!("{:#}", e); } } - PerfWork::ProcessExit(exit_event) => { - if let Some(tx) = &self.tgid_request_tx { - let _ = tx.send(DwarfThreadMsg::ProcessExited(exit_event.pid)); - } - self.known_tgids.remove(&exit_event.pid); - if Some(exit_event.pid) == self.monitor_exit_pid { - tracing::info!("target process {} exited, stopping", exit_event.pid); - stopped = true; - break; + PerfWork::ProcessEvent(event) => { + match event.event_type { + PROCESS_EVENT_EXIT => { + if let Some(tx) = &self.tgid_request_tx { + let _ = tx.send(DwarfThreadMsg::ProcessExited(event.pid)); + } + self.known_tgids.remove(&event.pid); + if let Some(ref mut cache) = self.process_metadata { + cache.remove(event.pid); + } + if Some(event.pid) == self.monitor_exit_pid { + tracing::info!("target process {} exited, stopping", event.pid); + stopped = true; + break; + } + tracing::debug!("process {} exited", event.pid); + } + PROCESS_EVENT_EXEC => { + tracing::debug!("process {} called exec", event.pid); + if let Some(tx) = &self.tgid_request_tx { + let _ = tx.send(DwarfThreadMsg::ProcessExeced(event.pid)); + } + // The PID is still alive but with a new binary image. + // Re-add to known_tgids so DWARF tables are reloaded. + self.known_tgids.remove(&event.pid); + self.known_tgids.insert(event.pid); + // Invalidate symbol caches for this PID + self.trace_handler.invalidate_caches_for_pid(event.pid); + if let Some(ref mut cache) = self.process_metadata { + cache.invalidate(event.pid); + } + } + _ => { + tracing::warn!( + "unknown process event type {} for pid {}", + event.event_type, + event.pid + ); + } } - tracing::debug!("DWARF-tracked process {} exited", exit_event.pid); } PerfWork::Stop => { tracing::info!("drain_events: received Stop"); @@ -380,6 +422,15 @@ impl ProfilingEventLoop { pub fn monitor_exit_pid(&self) -> Option { self.monitor_exit_pid } + + /// Access the process metadata cache (if enabled). + /// + /// Returns `None` if lifecycle tracking was not enabled. Library consumers + /// can use this to look up process metadata (cmdline, cwd, environ, etc.) + /// between `collect_raw()` calls for stack enrichment. + pub fn process_metadata(&mut self) -> Option<&mut ProcessMetadataCache> { + self.process_metadata.as_mut() + } } /// Convert structured [`FrameCount`] data into sorted collapse-format strings. diff --git a/profile-bee/src/lib.rs b/profile-bee/src/lib.rs index dd5d4f6..05a38fd 100644 --- a/profile-bee/src/lib.rs +++ b/profile-bee/src/lib.rs @@ -8,6 +8,7 @@ pub mod codeguru_upload; pub mod ebpf; pub mod event_loop; pub mod html; +pub mod process_metadata; pub mod session; pub mod spawn; diff --git a/profile-bee/src/pipeline.rs b/profile-bee/src/pipeline.rs index b8cdb30..64eb1d9 100644 --- a/profile-bee/src/pipeline.rs +++ b/profile-bee/src/pipeline.rs @@ -8,7 +8,10 @@ use std::collections::HashMap; use std::sync::mpsc; use aya::maps::{MapData, RingBuf}; -use profile_bee_common::{ProcessExitEvent, StackInfo}; +use profile_bee_common::{ + ProcessEvent, ProcessExecEvent, ProcessExitEvent, StackInfo, PROCESS_EVENT_EXEC, + PROCESS_EVENT_EXIT, +}; use crate::dwarf_unwind::{DwarfUnwindManager, MappingsDiff}; use crate::ebpf::{build_dwarf_refresh, DwarfRefreshUpdate}; @@ -23,8 +26,8 @@ pub enum PerfWork { StackInfo(StackInfo), /// Incremental DWARF table update from background thread. DwarfRefresh(DwarfRefreshUpdate), - /// Process exit detected by eBPF tracepoint. - ProcessExit(ProcessExitEvent), + /// Process lifecycle event (exec or exit) detected by eBPF tracepoint. + ProcessEvent(ProcessEvent), /// Signal to stop profiling. Stop, } @@ -35,6 +38,8 @@ pub enum DwarfThreadMsg { LoadProcess(u32), /// Process exited — clean up mappings and LPM trie entries. ProcessExited(u32), + /// Process called execve() — invalidate and reload DWARF tables. + ProcessExeced(u32), } /// Build a `DwarfRefreshUpdate` and send it on the `PerfWork` channel. @@ -110,6 +115,33 @@ pub fn dwarf_refresh_loop( tracked_pids.retain(|&p| p != tgid); last_maps_mtime.remove(&tgid); } + DwarfThreadMsg::ProcessExeced(tgid) => { + tracing::debug!( + "DWARF thread: process {} exec'd, invalidating and reloading", + tgid + ); + // Remove old DWARF data (binary image has changed) + if let Some(removal_diff) = manager.remove_process(tgid) { + if !send_refresh(&manager, &tx, vec![], removal_diff) { + return; + } + } + tracked_pids.retain(|&p| p != tgid); + last_maps_mtime.remove(&tgid); + + // Reload with the new binary + tracked_pids.push(tgid); + let maps_path = format!("/proc/{}/maps", tgid); + let pre_refresh_mtime = std::fs::metadata(&maps_path) + .ok() + .and_then(|m| m.modified().ok()); + if let Ok((new_shard_ids, diff)) = manager.refresh_process(tgid) { + if !send_refresh(&manager, &tx, new_shard_ids, diff) { + return; + } + last_maps_mtime.insert(tgid, pre_refresh_mtime); + } + } } } @@ -236,8 +268,14 @@ pub async fn setup_process_exit_ring_buffer_task( } let exit_event: ProcessExitEvent = unsafe { std::ptr::read_unaligned(item.as_ptr().cast()) }; - tracing::debug!("eBPF detected: PID {} has exited", exit_event.pid); - if perf_tx.send(PerfWork::ProcessExit(exit_event)).is_err() { + let event = ProcessEvent { + event_type: PROCESS_EVENT_EXIT, + pid: exit_event.pid, + exit_code: exit_event.exit_code, + _pad: 0, + }; + tracing::debug!("eBPF detected: PID {} has exited", event.pid); + if perf_tx.send(PerfWork::ProcessEvent(event)).is_err() { // Receiver dropped — event loop is done, exit task return Ok(()); } @@ -255,6 +293,53 @@ pub async fn setup_process_exit_ring_buffer_task( Ok(()) } +/// Async task that polls the eBPF ring buffer for process exec events and +/// forwards them to the profiling event loop via the `PerfWork` channel. +pub async fn setup_process_exec_ring_buffer_task( + ring_buf: RingBuf, + perf_tx: mpsc::Sender, +) -> anyhow::Result<()> { + use tokio::io::unix::AsyncFd; + let mut fd = AsyncFd::new(ring_buf)?; + + while let Ok(mut guard) = fd.readable_mut().await { + match guard.try_io(|inner| { + let ring_buf = inner.get_mut(); + while let Some(item) = ring_buf.next() { + if item.len() < ProcessExecEvent::STRUCT_SIZE { + tracing::warn!( + "Ring buffer item too small for ProcessExecEvent ({} < {}), skipping", + item.len(), + ProcessExecEvent::STRUCT_SIZE, + ); + continue; + } + let exec_event: ProcessExecEvent = + unsafe { std::ptr::read_unaligned(item.as_ptr().cast()) }; + let event = ProcessEvent { + event_type: PROCESS_EVENT_EXEC, + pid: exec_event.pid, + exit_code: 0, + _pad: 0, + }; + tracing::debug!("eBPF detected: PID {} called exec", event.pid); + if perf_tx.send(PerfWork::ProcessEvent(event)).is_err() { + return Ok(()); + } + } + Ok(()) + }) { + Ok(_) => { + guard.clear_ready(); + continue; + } + Err(_would_block) => continue, + } + } + + Ok(()) +} + /// Set up timer-based and child-process-based stopping mechanisms. /// /// This wires the non-interactive stopping mechanisms: diff --git a/profile-bee/src/process_metadata.rs b/profile-bee/src/process_metadata.rs new file mode 100644 index 0000000..672bb37 --- /dev/null +++ b/profile-bee/src/process_metadata.rs @@ -0,0 +1,274 @@ +//! Process metadata cache for reading and caching `/proc/[pid]` information. +//! +//! Provides a lazy, capacity-bounded cache of process metadata that agents +//! can use to look up `cmdline`, `cwd`, `environ`, `exe`, and mount namespace +//! for any PID seen during profiling. Entries are invalidated on exec events +//! (same PID, new binary) and removed on exit events. +//! +//! # Example +//! +//! ```rust,ignore +//! use profile_bee::process_metadata::ProcessMetadataCache; +//! +//! let mut cache = ProcessMetadataCache::new(1024); +//! +//! // Lazily loads from /proc on first access +//! if let Some(meta) = cache.get_or_load(1234) { +//! println!("exe: {:?}", meta.exe); +//! println!("cwd: {:?}", meta.cwd); +//! } +//! +//! // On exec: invalidate (same PID, new binary) +//! cache.invalidate(1234); +//! +//! // On exit: remove entirely +//! cache.remove(1234); +//! ``` + +use std::collections::HashMap; +use std::ffi::OsString; +use std::path::PathBuf; +use std::time::Instant; + +/// Cached metadata for a single process, read from `/proc/[pid]/`. +#[derive(Debug, Clone)] +pub struct ProcessMetadata { + /// The process ID. + pub pid: u32, + /// Parsed command line (`/proc/[pid]/cmdline`). + pub cmdline: Option>, + /// Working directory (`/proc/[pid]/cwd`). + pub cwd: Option, + /// Environment variables (`/proc/[pid]/environ`). + pub environ: Option>, + /// Executable path (`/proc/[pid]/exe` readlink). + pub exe: Option, + /// Mount namespace inode (`/proc/[pid]/ns/mnt`). + pub ns_mnt: Option, + /// When this entry was loaded. + pub loaded_at: Instant, +} + +impl ProcessMetadata { + /// Load metadata for a PID from `/proc`. + /// Returns `None` if the process doesn't exist (e.g., already exited). + fn load(pid: u32) -> Option { + let process = procfs::process::Process::new(pid as i32).ok()?; + + let cmdline = process.cmdline().ok(); + let cwd = process.cwd().ok(); + let environ = process.environ().ok(); + let exe = process.exe().ok(); + + // Get mount namespace inode + let ns_path = format!("/proc/{}/ns/mnt", pid); + let ns_mnt = std::fs::metadata(&ns_path).ok().map(|m| { + use std::os::unix::fs::MetadataExt; + m.ino() + }); + + Some(ProcessMetadata { + pid, + cmdline, + cwd, + environ, + exe, + ns_mnt, + loaded_at: Instant::now(), + }) + } + + /// Look up a specific environment variable by name. + pub fn environ_var(&self, key: &str) -> Option<&str> { + self.environ + .as_ref()? + .get(std::ffi::OsStr::new(key))? + .to_str() + } +} + +/// Capacity-bounded cache of process metadata, read lazily from `/proc/[pid]/`. +/// +/// Designed for profiling agents that need per-process context (cmdline, cwd, +/// environment variables) to enrich stack traces. Integrates with eBPF process +/// lifecycle events for cache invalidation (exec) and eviction (exit). +/// +/// The cache does NOT implement LRU eviction — it relies on explicit `remove()` +/// calls driven by eBPF exit events. If the cache reaches capacity, new entries +/// are not inserted (the process is still profiled, just without cached metadata). +pub struct ProcessMetadataCache { + cache: HashMap, + max_entries: usize, +} + +impl ProcessMetadataCache { + /// Create a new cache with the given maximum entry count. + pub fn new(max_entries: usize) -> Self { + Self { + cache: HashMap::new(), + max_entries, + } + } + + /// Get or lazily load metadata for a PID. + /// + /// On first access for a given PID, reads from `/proc/[pid]/`. Subsequent + /// calls return the cached entry. Returns `None` if the process doesn't + /// exist (already exited) or the cache is at capacity. + pub fn get_or_load(&mut self, pid: u32) -> Option<&ProcessMetadata> { + if self.cache.contains_key(&pid) { + return self.cache.get(&pid); + } + if self.cache.len() >= self.max_entries { + tracing::warn!( + "ProcessMetadataCache at capacity ({}), skipping pid {}", + self.max_entries, + pid + ); + return None; + } + if let Some(meta) = ProcessMetadata::load(pid) { + self.cache.insert(pid, meta); + self.cache.get(&pid) + } else { + None + } + } + + /// Get cached metadata without loading. Returns `None` if not cached. + pub fn get(&self, pid: u32) -> Option<&ProcessMetadata> { + self.cache.get(&pid) + } + + /// Invalidate the cached entry for a PID (e.g., on exec). + /// + /// The next `get_or_load()` call will re-read from `/proc`. + pub fn invalidate(&mut self, pid: u32) { + self.cache.remove(&pid); + } + + /// Remove the cached entry for a PID (e.g., on exit). + pub fn remove(&mut self, pid: u32) { + self.cache.remove(&pid); + } + + /// Look up a specific environment variable for a PID. + /// + /// Convenience method that combines `get_or_load` + `environ_var`. + pub fn environ_var(&mut self, pid: u32, key: &str) -> Option { + self.get_or_load(pid)? + .environ_var(key) + .map(|s| s.to_string()) + } + + /// Number of cached entries. + pub fn len(&self) -> usize { + self.cache.len() + } + + /// Whether the cache is empty. + pub fn is_empty(&self) -> bool { + self.cache.is_empty() + } + + /// Diagnostic summary string. + pub fn stats(&self) -> String { + format!( + "ProcessMetadataCache: {}/{} entries", + self.cache.len(), + self.max_entries, + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_cache_new() { + let cache = ProcessMetadataCache::new(100); + assert!(cache.is_empty()); + assert_eq!(cache.len(), 0); + } + + #[test] + fn test_cache_load_current_process() { + let mut cache = ProcessMetadataCache::new(100); + let pid = std::process::id(); + let meta = cache.get_or_load(pid); + assert!(meta.is_some(), "should be able to load current process"); + let meta = meta.unwrap(); + assert_eq!(meta.pid, pid); + assert!(meta.exe.is_some()); + assert!(meta.cmdline.is_some()); + assert!(meta.cwd.is_some()); + assert_eq!(cache.len(), 1); + } + + #[test] + fn test_cache_nonexistent_pid() { + let mut cache = ProcessMetadataCache::new(100); + // PID 0 is the kernel — /proc/0 may or may not exist depending on kernel. + // Use a very high PID that almost certainly doesn't exist. + let meta = cache.get_or_load(u32::MAX); + assert!(meta.is_none()); + assert!(cache.is_empty()); + } + + #[test] + fn test_cache_invalidate() { + let mut cache = ProcessMetadataCache::new(100); + let pid = std::process::id(); + cache.get_or_load(pid); + assert_eq!(cache.len(), 1); + cache.invalidate(pid); + assert!(cache.is_empty()); + // Re-load should work + assert!(cache.get_or_load(pid).is_some()); + } + + #[test] + fn test_cache_remove() { + let mut cache = ProcessMetadataCache::new(100); + let pid = std::process::id(); + cache.get_or_load(pid); + cache.remove(pid); + assert!(cache.is_empty()); + assert!(cache.get(pid).is_none()); + } + + #[test] + fn test_cache_capacity_limit() { + let mut cache = ProcessMetadataCache::new(1); + let pid = std::process::id(); + cache.get_or_load(pid); + // Cache is full — next load should return None for a different PID + // (but the PID probably doesn't exist anyway, so this is a bit indirect) + assert_eq!(cache.len(), 1); + } + + #[test] + fn test_environ_var() { + let mut cache = ProcessMetadataCache::new(100); + let pid = std::process::id(); + // PATH should be set in any normal environment + let path = cache.environ_var(pid, "PATH"); + assert!(path.is_some(), "PATH should be available"); + } + + #[test] + fn test_get_without_load() { + let cache = ProcessMetadataCache::new(100); + let pid = std::process::id(); + // get() without get_or_load() should return None + assert!(cache.get(pid).is_none()); + } + + #[test] + fn test_stats() { + let cache = ProcessMetadataCache::new(100); + let stats = cache.stats(); + assert!(stats.contains("0/100")); + } +} diff --git a/profile-bee/src/session.rs b/profile-bee/src/session.rs index 1c35435..144396d 100644 --- a/profile-bee/src/session.rs +++ b/profile-bee/src/session.rs @@ -12,14 +12,15 @@ use tokio::task; use crate::dwarf_unwind::DwarfUnwindManager; use crate::ebpf::{ - attach_process_exit_tracepoint, setup_ebpf_with_tp_fallback, setup_process_exit_ring_buffer, - setup_ring_buffer, ProfilerConfig, + attach_process_exec_tracepoint, attach_process_exit_tracepoint, setup_ebpf_with_tp_fallback, + setup_process_exec_ring_buffer, setup_process_exit_ring_buffer, setup_ring_buffer, + ProfilerConfig, }; use crate::event_loop::{EventLoopConfig, ProfilingEventLoop}; use crate::output::OutputSink; use crate::pipeline::{ - dwarf_refresh_loop, setup_process_exit_ring_buffer_task, setup_ring_buffer_task, - setup_timer_and_child_stop, DwarfThreadMsg, PerfWork, + dwarf_refresh_loop, setup_process_exec_ring_buffer_task, setup_process_exit_ring_buffer_task, + setup_ring_buffer_task, setup_timer_and_child_stop, DwarfThreadMsg, PerfWork, }; use crate::spawn::{setup_process_to_profile, SpawnProcess, StopHandler}; @@ -42,6 +43,15 @@ pub struct SessionConfig { pub group_by_process: bool, /// Whether to set up process-exit monitoring for external PIDs. pub monitor_exit: bool, + /// Track process lifecycle events (exec + broadened exit) via eBPF tracepoints. + /// + /// When `true`, attaches `sched_process_exec` and broadens `sched_process_exit` + /// to fire for all processes (not just DWARF-tracked or monitored PIDs). + /// + /// Automatically enabled when `profiler.dwarf` is `true` (DWARF tables benefit + /// from proactive exec/exit detection). Agents should enable this for + /// `ProcessMetadataCache` eviction and process-aware profiling. + pub track_process_lifecycle: bool, } impl Default for SessionConfig { @@ -54,6 +64,7 @@ impl Default for SessionConfig { group_by_cpu: false, group_by_process: false, monitor_exit: true, + track_process_lifecycle: false, } } } @@ -178,7 +189,7 @@ impl ProfilingSession { // 6. Ring buffer setup let ring_buf = setup_ring_buffer(&mut ebpf_profiler.bpf)?; - // 7. Process exit monitoring + // 7. Process exit monitoring + lifecycle tracking let external_pid = if spawn.is_none() { config.profiler.pid } else { @@ -186,11 +197,22 @@ impl ProfilingSession { }; let monitor_exit_pid = external_pid; - if (external_pid.is_some() || config.profiler.dwarf) && config.monitor_exit { + // Lifecycle tracking: auto-enable when DWARF is on, or when explicitly requested + let lifecycle_tracking = config.track_process_lifecycle || config.profiler.dwarf; + + if (external_pid.is_some() || config.profiler.dwarf || lifecycle_tracking) + && config.monitor_exit + { attach_process_exit_tracepoint(&mut ebpf_profiler.bpf)?; if let Some(pid_to_monitor) = external_pid { ebpf_profiler.set_monitor_exit_pid(pid_to_monitor)?; } + + // Enable broadened exit events when lifecycle tracking is on + if lifecycle_tracking { + ebpf_profiler.set_lifecycle_tracking(true)?; + } + let exit_ring_buf = setup_process_exit_ring_buffer(&mut ebpf_profiler.bpf)?; let exit_perf_tx = perf_tx.clone(); task::spawn(async move { @@ -202,6 +224,24 @@ impl ProfilingSession { }); } + // 7b. Process exec monitoring (lifecycle tracking only) + if lifecycle_tracking + && config.monitor_exit + && attach_process_exec_tracepoint(&mut ebpf_profiler.bpf)? + { + if let Ok(Some(exec_ring_buf)) = setup_process_exec_ring_buffer(&mut ebpf_profiler.bpf) + { + let exec_perf_tx = perf_tx.clone(); + task::spawn(async move { + if let Err(e) = + setup_process_exec_ring_buffer_task(exec_ring_buf, exec_perf_tx).await + { + tracing::error!("Failed to set up process exec ring buffer: {:?}", e); + } + }); + } + } + // 8. Start ring buffer polling task let rb_perf_tx = perf_tx.clone(); task::spawn(async move { @@ -220,6 +260,7 @@ impl ProfilingSession { group_by_process: config.group_by_process, monitor_exit_pid, tgid_request_tx, + enable_process_metadata: lifecycle_tracking, }; let event_loop = ProfilingEventLoop::new( ebpf_profiler.counts, diff --git a/profile-bee/src/trace_handler.rs b/profile-bee/src/trace_handler.rs index 346e907..8f48ee5 100644 --- a/profile-bee/src/trace_handler.rs +++ b/profile-bee/src/trace_handler.rs @@ -94,6 +94,15 @@ impl TraceHandler { } } + /// Invalidate all cached symbol resolutions for a specific process. + /// + /// Called when a process calls execve() — the binary image changed so + /// all cached address-to-symbol mappings for that PID are stale. + pub fn invalidate_caches_for_pid(&mut self, tgid: u32) { + self.cache.invalidate_pid(tgid); + tracing::debug!("invalidated symbol caches for pid {}", tgid); + } + pub fn print_stats(&self) { tracing::info!("{}", self.cache.stats()); } From f77b7acd6a08b7004779c631ac04a7d3f060a419 Mon Sep 17 00:00:00 2001 From: Joshua Koo Date: Tue, 31 Mar 2026 16:04:12 +0000 Subject: [PATCH 2/6] fix: defer metadata eviction on exit, flush trace_count on exec, detect PID reuse Three correctness fixes for the process lifecycle tracking: 1. Defer ProcessMetadataCache eviction with one-cycle buffer: exit PIDs are collected in pending_exit_pids during drain_events() and evicted at the START of the next drain_events() call. This gives agents one full collection cycle to read metadata for processes that exited, covering async ring buffer delivery where a sample may arrive after its process's exit event. evict_pending_exits() remains public for agents that need immediate eviction under memory pressure. 2. Flush trace_count on exec: pre-exec StackInfo entries were left in trace_count after invalidating symbol caches, causing build_raw_stacks() to symbolize old addresses against the new binary image (producing [unknown] symbols). Now trace_count.retain() removes entries for the exec'd PID before cache invalidation. 3. Detect PID reuse via starttime: get_or_load() now reads /proc/[pid]/stat starttime and compares it to the cached entry. If they differ, the PID was recycled and the stale entry is replaced with fresh metadata. --- profile-bee/src/event_loop.rs | 41 +++++++++++++++++++++++++-- profile-bee/src/process_metadata.rs | 43 +++++++++++++++++++++++++++-- 2 files changed, 79 insertions(+), 5 deletions(-) diff --git a/profile-bee/src/event_loop.rs b/profile-bee/src/event_loop.rs index 5a5f8df..debacac 100644 --- a/profile-bee/src/event_loop.rs +++ b/profile-bee/src/event_loop.rs @@ -75,6 +75,11 @@ pub struct ProfilingEventLoop { known_tgids: HashSet, /// Optional process metadata cache, maintained via eBPF lifecycle events. process_metadata: Option, + /// PIDs that exited during the current drain_events() call. + /// Eviction from `process_metadata` is deferred until after + /// `build_raw_stacks()` completes, so agents can still look up + /// metadata for processes that exited within the collection window. + pending_exit_pids: Vec, } impl ProfilingEventLoop { @@ -107,6 +112,7 @@ impl ProfilingEventLoop { } else { None }, + pending_exit_pids: Vec::new(), } } @@ -156,6 +162,24 @@ impl ProfilingEventLoop { RawCollectResult { stacks, stopped } } + /// Immediately evict process metadata for PIDs that exited during + /// previous `drain_events()` calls. + /// + /// Normally eviction is deferred automatically: exit PIDs from cycle N + /// are evicted at the start of cycle N+1's `drain_events()` call, + /// giving agents one full collection cycle to read metadata for + /// processes that exited. Call this method only if you need to free + /// cache entries sooner (e.g., memory pressure). + pub fn evict_pending_exits(&mut self) { + if let Some(ref mut cache) = self.process_metadata { + for pid in self.pending_exit_pids.drain(..) { + cache.remove(pid); + } + } else { + self.pending_exit_pids.clear(); + } + } + /// Drain events from the PerfWork channel, symbolize stacks on the fly, /// and return `(local_counting, stopped)`. /// @@ -166,6 +190,12 @@ impl ProfilingEventLoop { rx: &mpsc::Receiver, timeout: Option, ) -> (bool, bool) { + // Evict metadata for PIDs that exited in the PREVIOUS cycle. + // This gives agents one full collection cycle to read metadata + // for processes that exited, covering async ring buffer delivery + // where a sample may arrive after its process's exit event. + self.evict_pending_exits(); + let mut queue_processed = 0u64; let mut stopped = false; let local_counting = self.stream_mode == EVENT_TRACE_ALWAYS; @@ -248,8 +278,11 @@ impl ProfilingEventLoop { let _ = tx.send(DwarfThreadMsg::ProcessExited(event.pid)); } self.known_tgids.remove(&event.pid); - if let Some(ref mut cache) = self.process_metadata { - cache.remove(event.pid); + // Defer metadata cache eviction until after build_raw_stacks() + // so agents can still look up metadata for processes that + // exited within the collection window. + if self.process_metadata.is_some() { + self.pending_exit_pids.push(event.pid); } if Some(event.pid) == self.monitor_exit_pid { tracing::info!("target process {} exited, stopping", event.pid); @@ -263,6 +296,10 @@ impl ProfilingEventLoop { if let Some(tx) = &self.tgid_request_tx { let _ = tx.send(DwarfThreadMsg::ProcessExeced(event.pid)); } + // Flush pre-exec samples from trace_count before invalidating + // caches — otherwise build_raw_stacks() would try to symbolize + // old addresses against the new binary image. + self.trace_count.retain(|k, _| k.tgid != event.pid); // The PID is still alive but with a new binary image. // Re-add to known_tgids so DWARF tables are reloaded. self.known_tgids.remove(&event.pid); diff --git a/profile-bee/src/process_metadata.rs b/profile-bee/src/process_metadata.rs index 672bb37..b3ee9ad 100644 --- a/profile-bee/src/process_metadata.rs +++ b/profile-bee/src/process_metadata.rs @@ -45,6 +45,10 @@ pub struct ProcessMetadata { pub exe: Option, /// Mount namespace inode (`/proc/[pid]/ns/mnt`). pub ns_mnt: Option, + /// Process start time in clock ticks since boot (`/proc/[pid]/stat` field 22). + /// Used to detect PID reuse — if a cached entry's start_time differs from + /// the current `/proc/[pid]/stat` starttime, the PID was recycled. + pub start_time: u64, /// When this entry was loaded. pub loaded_at: Instant, } @@ -55,6 +59,9 @@ impl ProcessMetadata { fn load(pid: u32) -> Option { let process = procfs::process::Process::new(pid as i32).ok()?; + let stat = process.stat().ok(); + let start_time = stat.as_ref().map_or(0, |s| s.starttime); + let cmdline = process.cmdline().ok(); let cwd = process.cwd().ok(); let environ = process.environ().ok(); @@ -74,10 +81,20 @@ impl ProcessMetadata { environ, exe, ns_mnt, + start_time, loaded_at: Instant::now(), }) } + /// Read the current start time for a PID from `/proc/[pid]/stat`. + /// Returns 0 if the process doesn't exist or stat can't be read. + fn current_start_time(pid: u32) -> u64 { + procfs::process::Process::new(pid as i32) + .ok() + .and_then(|p| p.stat().ok()) + .map_or(0, |s| s.starttime) + } + /// Look up a specific environment variable by name. pub fn environ_var(&self, key: &str) -> Option<&str> { self.environ @@ -113,11 +130,31 @@ impl ProcessMetadataCache { /// Get or lazily load metadata for a PID. /// /// On first access for a given PID, reads from `/proc/[pid]/`. Subsequent - /// calls return the cached entry. Returns `None` if the process doesn't - /// exist (already exited) or the cache is at capacity. + /// calls return the cached entry unless the PID has been recycled (detected + /// via `/proc/[pid]/stat` starttime mismatch). Returns `None` if the process + /// doesn't exist (already exited) or the cache is at capacity. pub fn get_or_load(&mut self, pid: u32) -> Option<&ProcessMetadata> { if self.cache.contains_key(&pid) { - return self.cache.get(&pid); + // Validate against PID reuse: if the start_time changed, the + // cached entry belongs to a different process incarnation. + let cached_start = self.cache[&pid].start_time; + if cached_start != 0 { + let current_start = ProcessMetadata::current_start_time(pid); + if current_start != 0 && current_start != cached_start { + tracing::debug!( + "PID {} recycled (starttime {} -> {}), reloading metadata", + pid, + cached_start, + current_start, + ); + self.cache.remove(&pid); + // Fall through to reload below + } else { + return self.cache.get(&pid); + } + } else { + return self.cache.get(&pid); + } } if self.cache.len() >= self.max_entries { tracing::warn!( From 45821a12ee95a331285351195e766448dfeb0da3 Mon Sep 17 00:00:00 2001 From: Joshua Koo Date: Wed, 1 Apr 2026 00:30:48 +0000 Subject: [PATCH 3/6] perf: filter thread exits in eBPF exit handler (tid != tgid early return) sched_process_exit fires for every thread exit, not just process exits. On thread-heavy hosts (Java, Go, Node.js), this generated thousands of duplicate ring buffer events per second for the same tgid, each waking the tokio AsyncFd task and pushing through the mpsc channel. Add tid != tgid early return so only actual process exits (main thread) produce events. sched_process_exec already only fires once per execve() so no filter needed there. --- profile-bee-ebpf/src/lib.rs | 15 +++++++++++++++ profile-bee/ebpf-bin/profile-bee.bpf.o | Bin 66336 -> 66392 bytes 2 files changed, 15 insertions(+) diff --git a/profile-bee-ebpf/src/lib.rs b/profile-bee-ebpf/src/lib.rs index 6b52c1e..0979f73 100644 --- a/profile-bee-ebpf/src/lib.rs +++ b/profile-bee-ebpf/src/lib.rs @@ -1401,11 +1401,23 @@ unsafe fn collect_off_cpu_trace_percpu(ctx: &C, now: u64) { /// - The monitored PID exits (--pid mode: stops profiling), OR /// - A DWARF-tracked process exits (cleanup LPM trie entries), OR /// - Lifecycle tracking is enabled (system-wide process awareness). +/// +/// Only fires for process exits (tid == tgid), not individual thread exits. +/// sched_process_exit fires for every thread exit; without this filter, +/// thread-heavy workloads (Java, Go) would generate thousands of +/// duplicate events per second for the same tgid. #[inline(always)] pub unsafe fn handle_process_exit(ctx: C) { use profile_bee_common::ProcessExitEvent; + let tid = ctx.pid(); let tgid = ctx.tgid(); + + // Skip thread exits — only fire for the main thread (process exit). + if tid != tgid { + return; + } + let monitor_pid = monitor_exit_pid(); // Send notification if this is a monitored PID, DWARF-tracked process, @@ -1429,6 +1441,9 @@ pub unsafe fn handle_process_exit(ctx: C) { /// Handle sched_process_exec tracepoint for process exec monitoring. /// Sends a ProcessExecEvent when a process calls execve(), enabling /// proactive DWARF table loading and metadata cache invalidation. +/// +/// Note: sched_process_exec only fires once per execve() (not per-thread), +/// so no tid == tgid filter is needed here. #[inline(always)] pub unsafe fn handle_process_exec(ctx: C) { use profile_bee_common::ProcessExecEvent; diff --git a/profile-bee/ebpf-bin/profile-bee.bpf.o b/profile-bee/ebpf-bin/profile-bee.bpf.o index 92c706236d21a9527115c6a2d997769da74d1763..49ecc1463f34ac0bb5ab9c21b2dbb1b9afd88366 100644 GIT binary patch delta 386 zcmZ3`#&V;LWr7CdjE$P#@0+wTFfi}|aXAnx0C6(cm;XSDVSgfsWMD{Vgou|z=~zZp zhRHe)wsC5MrA67)C%=AB%bU&yRR*Fbr#@WkbQ5e$B2XI$h%!C_^H~|LfoV~;M^OG1 zFdfd$zyMMj%6W}y#k4^0%Six;;RDL z0#M6Alo}A9K;nbc3ovcY`r*gIcwq9*Kl>RsOs@P}&v;?-Pap|o|Ep(G_&E93zdFVV zlPmw%GZsw#2_zRxtz-l$n);K`ov~rMBO{|ZW5D!EMn-$a3Db9is14J90x2em$J4Et Z80{DxrdKinMH8m)1W^^!e=;$u0|1eWUpfE) delta 339 zcmcc7#b?N<-PNK*b+y?tY}p#^|wm=Zoq5 zK@K28fB>XM8HgKz7z99k6(HUL;s61NuL{I3KpY?d@zsEM0*C56J&66Up6SJh$wvR{7$YWc{9n&#G1ZW8W8vvNOpGdw9Md-fWf(Q4 Ne*{qu(+!y!)d9=LR@DFi From 1cda34fafa022e5d13ee51824d65aed4e1f4f97f Mon Sep 17 00:00:00 2001 From: Joshua Koo Date: Wed, 1 Apr 2026 04:22:06 +0000 Subject: [PATCH 4/6] fix: require valid start_time in ProcessMetadataCache to prevent stale entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit load() used map_or(0, ...) when stat() failed, caching entries with start_time == 0. The get_or_load() validation skipped PID reuse checks when cached_start == 0, so stale metadata was served indefinitely. Now load() requires stat() to succeed (early return None on failure), guaranteeing all cached entries have a valid start_time. Simplified the get_or_load() validation — the cached_start != 0 guard is no longer needed since load() enforces it. --- profile-bee/src/process_metadata.rs | 31 +++++++++++++---------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/profile-bee/src/process_metadata.rs b/profile-bee/src/process_metadata.rs index b3ee9ad..7622feb 100644 --- a/profile-bee/src/process_metadata.rs +++ b/profile-bee/src/process_metadata.rs @@ -55,12 +55,13 @@ pub struct ProcessMetadata { impl ProcessMetadata { /// Load metadata for a PID from `/proc`. - /// Returns `None` if the process doesn't exist (e.g., already exited). + /// Returns `None` if the process doesn't exist or `/proc/[pid]/stat` + /// can't be read (start_time is required for PID reuse detection). fn load(pid: u32) -> Option { let process = procfs::process::Process::new(pid as i32).ok()?; - let stat = process.stat().ok(); - let start_time = stat.as_ref().map_or(0, |s| s.starttime); + let stat = process.stat().ok()?; + let start_time = stat.starttime; let cmdline = process.cmdline().ok(); let cwd = process.cwd().ok(); @@ -138,20 +139,16 @@ impl ProcessMetadataCache { // Validate against PID reuse: if the start_time changed, the // cached entry belongs to a different process incarnation. let cached_start = self.cache[&pid].start_time; - if cached_start != 0 { - let current_start = ProcessMetadata::current_start_time(pid); - if current_start != 0 && current_start != cached_start { - tracing::debug!( - "PID {} recycled (starttime {} -> {}), reloading metadata", - pid, - cached_start, - current_start, - ); - self.cache.remove(&pid); - // Fall through to reload below - } else { - return self.cache.get(&pid); - } + let current_start = ProcessMetadata::current_start_time(pid); + if current_start != 0 && current_start != cached_start { + tracing::debug!( + "PID {} recycled (starttime {} -> {}), reloading metadata", + pid, + cached_start, + current_start, + ); + self.cache.remove(&pid); + // Fall through to reload below } else { return self.cache.get(&pid); } From 7d3ca02ff5c2d2f0c8a134b73fe169bd6ea9958c Mon Sep 17 00:00:00 2001 From: Joshua Koo Date: Wed, 1 Apr 2026 04:50:28 +0000 Subject: [PATCH 5/6] fix: treat unreadable /proc/[pid]/stat as process gone in metadata cache get_or_load() returned stale cached entries when current_start_time() returned 0 (process exited, /proc/[pid]/stat unreadable). The condition `current_start != 0 && current_start != cached_start` evaluated to false, falling through to the else branch that returned the stale entry. Now current_start == 0 is handled explicitly: remove the stale entry and return None, since the process no longer exists. --- profile-bee/src/process_metadata.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/profile-bee/src/process_metadata.rs b/profile-bee/src/process_metadata.rs index 7622feb..110048e 100644 --- a/profile-bee/src/process_metadata.rs +++ b/profile-bee/src/process_metadata.rs @@ -136,11 +136,16 @@ impl ProcessMetadataCache { /// doesn't exist (already exited) or the cache is at capacity. pub fn get_or_load(&mut self, pid: u32) -> Option<&ProcessMetadata> { if self.cache.contains_key(&pid) { - // Validate against PID reuse: if the start_time changed, the - // cached entry belongs to a different process incarnation. + // Validate against PID reuse and process exit: compare the cached + // start_time against the current /proc/[pid]/stat starttime. let cached_start = self.cache[&pid].start_time; let current_start = ProcessMetadata::current_start_time(pid); - if current_start != 0 && current_start != cached_start { + if current_start == 0 { + // Process is gone (/proc/[pid]/stat unreadable) — remove stale entry. + self.cache.remove(&pid); + return None; + } + if current_start != cached_start { tracing::debug!( "PID {} recycled (starttime {} -> {}), reloading metadata", pid, From 276964c394e2349e936dcfb7c40b532313711f42 Mon Sep 17 00:00:00 2001 From: Joshua Koo Date: Wed, 1 Apr 2026 05:17:51 +0000 Subject: [PATCH 6/6] fix: redact environ/cmdline in ProcessMetadata Debug impl Replace derived Debug with manual impl that shows entry counts instead of values ('', '') to prevent secrets like AWS_SECRET_ACCESS_KEY from leaking into logs via {:?} formatting. --- profile-bee/src/process_metadata.rs | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/profile-bee/src/process_metadata.rs b/profile-bee/src/process_metadata.rs index 110048e..44dbd14 100644 --- a/profile-bee/src/process_metadata.rs +++ b/profile-bee/src/process_metadata.rs @@ -31,7 +31,7 @@ use std::path::PathBuf; use std::time::Instant; /// Cached metadata for a single process, read from `/proc/[pid]/`. -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct ProcessMetadata { /// The process ID. pub pid: u32, @@ -53,6 +53,28 @@ pub struct ProcessMetadata { pub loaded_at: Instant, } +// Manual Debug impl to avoid leaking secrets from environ/cmdline into logs. +impl std::fmt::Debug for ProcessMetadata { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ProcessMetadata") + .field("pid", &self.pid) + .field("exe", &self.exe) + .field("cwd", &self.cwd) + .field( + "cmdline", + &self.cmdline.as_ref().map(|c| format!("<{} args>", c.len())), + ) + .field( + "environ", + &self.environ.as_ref().map(|e| format!("<{} vars>", e.len())), + ) + .field("ns_mnt", &self.ns_mnt) + .field("start_time", &self.start_time) + .field("loaded_at", &self.loaded_at) + .finish() + } +} + impl ProcessMetadata { /// Load metadata for a PID from `/proc`. /// Returns `None` if the process doesn't exist or `/proc/[pid]/stat`