diff --git a/docs/custom_profiling_agent.md b/docs/custom_profiling_agent.md index f92c9dc..53eff36 100644 --- a/docs/custom_profiling_agent.md +++ b/docs/custom_profiling_agent.md @@ -77,6 +77,9 @@ async fn main() -> anyhow::Result<()> { ..ProfilerConfig::default() }, duration_ms: 0, // Run indefinitely (we control the loop) + // Enable process lifecycle tracking for metadata cache + DWARF improvements. + // Note: automatically enabled when dwarf: true, shown here for clarity. + track_process_lifecycle: true, ..SessionConfig::default() }; @@ -113,7 +116,45 @@ async fn main() -> anyhow::Result<()> { ## Stack Enrichment Pattern -A common pattern is to look up per-process metadata and annotate stacks. +### Using the Built-in ProcessMetadataCache + +When `track_process_lifecycle` is enabled (or DWARF is on), the event loop +maintains a `ProcessMetadataCache` that automatically reads `/proc/[pid]/*` +on first access and evicts entries when eBPF detects process exec/exit events. + +```rust +use profile_bee::types::{FrameCount, StackFrameInfo}; + +fn enrich_with_builtin_cache( + session: &mut ProfilingSession, + stacks: &[FrameCount], +) { + // Access the built-in metadata cache (requires track_process_lifecycle) + if let Some(cache) = session.event_loop.process_metadata() { + for fc in stacks { + if let Some(first_frame) = fc.frames.first() { + let pid = first_frame.pid as u32; + if let Some(meta) = cache.get_or_load(pid) { + // meta.cmdline, meta.cwd, meta.environ, meta.exe, meta.ns_mnt + // are all available for enrichment + if let Some(env_val) = meta.environ_var("MY_SERVICE_NAME") { + // Use the environment variable for grouping + } + } + } + } + } +} +``` + +The cache is automatically maintained by the event loop: +- On `sched_process_exec`: invalidates the entry (same PID, new binary) +- On `sched_process_exit`: removes the entry entirely +- No manual eviction needed — eBPF events handle it + +### Custom Enrichment (without built-in cache) + +For more control, you can build your own metadata cache. Since `StackFrameInfo` contains the `pid`, you can read `/proc/[pid]/*` to get process context: @@ -331,3 +372,47 @@ pub struct StackFrameInfo { pub ns: Option, // mount namespace inode } ``` + +### `ProcessMetadataCache` + +```rust +use profile_bee::process_metadata::ProcessMetadataCache; + +// Access via the event loop (when track_process_lifecycle is enabled): +if let Some(cache) = session.event_loop.process_metadata() { + // Lazily loads from /proc on first access + if let Some(meta) = cache.get_or_load(pid) { + meta.cmdline; // Option> + meta.cwd; // Option + meta.environ; // Option> + meta.exe; // Option + meta.ns_mnt; // Option — mount namespace inode + } + + // Convenience: look up a specific environment variable + let val = cache.environ_var(pid, "MY_ENV_VAR"); +} +``` + +Entries are automatically: +- **Invalidated** on `sched_process_exec` (same PID, new binary image) +- **Removed** on `sched_process_exit` (process gone) + +### `SessionConfig` — Lifecycle Tracking + +```rust +pub struct SessionConfig { + // ... other fields ... + + /// Track process lifecycle events (exec + broadened exit) via eBPF tracepoints. + /// Automatically enabled when profiler.dwarf is true. + pub track_process_lifecycle: bool, +} +``` + +When enabled: +- Attaches `sched:sched_process_exec` tracepoint (detects `execve()` calls) +- Broadens `sched:sched_process_exit` to fire for all processes (not just DWARF-tracked) +- Creates a `ProcessMetadataCache` in the event loop +- DWARF tables are proactively reloaded on exec (no 1-second poll delay) +- Symbol caches are invalidated on exec (no stale resolutions) diff --git a/profile-bee-common/src/lib.rs b/profile-bee-common/src/lib.rs index fe6a9e7..1624348 100644 --- a/profile-bee-common/src/lib.rs +++ b/profile-bee-common/src/lib.rs @@ -55,7 +55,8 @@ impl ProbeEvent { pub const STRUCT_SIZE: usize = size_of::(); } -/// Process exit notification sent from eBPF to userspace +/// Process exit notification sent from eBPF to userspace. +/// Deprecated: prefer `ProcessEvent` which carries both exec and exit events. #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] #[repr(C)] pub struct ProcessExitEvent { @@ -67,6 +68,45 @@ impl ProcessExitEvent { pub const STRUCT_SIZE: usize = size_of::(); } +/// Process exec notification sent from eBPF to userspace when a process +/// calls execve(). Used for proactive DWARF table loading and cache invalidation. +#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] +#[repr(C)] +pub struct ProcessExecEvent { + pub pid: u32, + pub _pad: u32, +} + +impl ProcessExecEvent { + pub const STRUCT_SIZE: usize = size_of::(); +} + +// --- Process Lifecycle Event Types --- + +/// Process lifecycle event type: process exited. +pub const PROCESS_EVENT_EXIT: u32 = 0; +/// Process lifecycle event type: process called execve(). +pub const PROCESS_EVENT_EXEC: u32 = 1; + +/// Unified process lifecycle event sent from eBPF to userspace. +/// Carries both exec and exit notifications through a single ring buffer, +/// replacing the narrower `ProcessExitEvent`. +#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] +#[repr(C)] +pub struct ProcessEvent { + /// Event type: `PROCESS_EVENT_EXIT` or `PROCESS_EVENT_EXEC`. + pub event_type: u32, + /// The PID (tgid) of the process. + pub pid: u32, + /// For EXIT events: the exit code. For EXEC events: 0. + pub exit_code: i32, + pub _pad: u32, +} + +impl ProcessEvent { + pub const STRUCT_SIZE: usize = size_of::(); +} + // --- DWARF Unwind Table Types (used by eBPF-side unwinding) --- /// How to compute the CFA (Canonical Frame Address) diff --git a/profile-bee-ebpf/Cargo.lock b/profile-bee-ebpf/Cargo.lock index e414b07..a5df8cc 100644 --- a/profile-bee-ebpf/Cargo.lock +++ b/profile-bee-ebpf/Cargo.lock @@ -220,7 +220,7 @@ dependencies = [ [[package]] name = "profile-bee-common" -version = "0.3.7" +version = "0.3.9" [[package]] name = "profile-bee-ebpf" diff --git a/profile-bee-ebpf/src/lib.rs b/profile-bee-ebpf/src/lib.rs index 68f45b4..0979f73 100644 --- a/profile-bee-ebpf/src/lib.rs +++ b/profile-bee-ebpf/src/lib.rs @@ -72,6 +72,12 @@ static TARGET_PID_MAP: Array = Array::with_max_entries(1, 0); #[map(name = "monitor_exit_pid_map")] static MONITOR_EXIT_PID_MAP: Array = Array::with_max_entries(1, 0); +/// Whether process lifecycle tracking is enabled (0 = disabled, 1 = enabled). +/// When enabled, exit events fire for ALL process exits (not just DWARF-tracked +/// or monitored PIDs), and the exec tracepoint sends exec events. +#[map(name = "lifecycle_tracking_map")] +static LIFECYCLE_TRACKING_MAP: Array = Array::with_max_entries(1, 0); + #[inline] unsafe fn skip_idle() -> bool { let skip = core::ptr::read_volatile(&SKIP_IDLE); @@ -119,6 +125,14 @@ unsafe fn monitor_exit_pid() -> u32 { } } +#[inline] +unsafe fn lifecycle_tracking_enabled() -> bool { + match LIFECYCLE_TRACKING_MAP.get(0) { + Some(&v) => v != 0, + None => false, + } +} + /* Setup maps */ #[map] static mut STORAGE: PerCpuArray = PerCpuArray::with_max_entries(1, 0); @@ -136,6 +150,9 @@ static RING_BUF_STACKS: RingBuf = RingBuf::with_byte_size(STACK_SIZE, 0); #[map(name = "process_exit_events")] static RING_BUF_PROCESS_EXIT: RingBuf = RingBuf::with_byte_size(4096, 0); +#[map(name = "process_exec_events")] +static RING_BUF_PROCESS_EXEC: RingBuf = RingBuf::with_byte_size(4096, 0); + #[map(name = "stack_traces")] pub static STACK_TRACES: StackTrace = StackTrace::with_max_entries(STACK_SIZE, 0); // DWARF unwind maps — single outer ArrayOfMaps containing per-binary inner Array maps. @@ -1382,19 +1399,34 @@ unsafe fn collect_off_cpu_trace_percpu(ctx: &C, now: u64) { /// Handle sched_process_exit tracepoint for process exit monitoring. /// Sends a ProcessExitEvent when: /// - The monitored PID exits (--pid mode: stops profiling), OR -/// - A DWARF-tracked process exits (cleanup LPM trie entries). +/// - A DWARF-tracked process exits (cleanup LPM trie entries), OR +/// - Lifecycle tracking is enabled (system-wide process awareness). +/// +/// Only fires for process exits (tid == tgid), not individual thread exits. +/// sched_process_exit fires for every thread exit; without this filter, +/// thread-heavy workloads (Java, Go) would generate thousands of +/// duplicate events per second for the same tgid. #[inline(always)] pub unsafe fn handle_process_exit(ctx: C) { use profile_bee_common::ProcessExitEvent; + let tid = ctx.pid(); let tgid = ctx.tgid(); + + // Skip thread exits — only fire for the main thread (process exit). + if tid != tgid { + return; + } + let monitor_pid = monitor_exit_pid(); - // Send notification if this is either the monitored PID or a DWARF-tracked process + // Send notification if this is a monitored PID, DWARF-tracked process, + // or lifecycle tracking is enabled (for system-wide process awareness). let is_monitored = monitor_pid != 0 && tgid == monitor_pid; let is_dwarf_tracked = unsafe { DWARF_TGIDS.get(&tgid).is_some() }; + let lifecycle = unsafe { lifecycle_tracking_enabled() }; - if is_monitored || is_dwarf_tracked { + if is_monitored || is_dwarf_tracked || lifecycle { if let Some(mut entry) = RING_BUF_PROCESS_EXIT.reserve::(0) { let exit_event = ProcessExitEvent { pid: tgid, @@ -1406,6 +1438,25 @@ pub unsafe fn handle_process_exit(ctx: C) { } } +/// Handle sched_process_exec tracepoint for process exec monitoring. +/// Sends a ProcessExecEvent when a process calls execve(), enabling +/// proactive DWARF table loading and metadata cache invalidation. +/// +/// Note: sched_process_exec only fires once per execve() (not per-thread), +/// so no tid == tgid filter is needed here. +#[inline(always)] +pub unsafe fn handle_process_exec(ctx: C) { + use profile_bee_common::ProcessExecEvent; + + let tgid = ctx.tgid(); + + if let Some(mut entry) = RING_BUF_PROCESS_EXEC.reserve::(0) { + let exec_event = ProcessExecEvent { pid: tgid, _pad: 0 }; + let _writable = entry.write(exec_event); + entry.submit(0); + } +} + // Make this simple now - checking for valid pointers can include // checking with stack pointer address or getting valid ranges // from from /proc/[pid]/maps diff --git a/profile-bee-ebpf/src/main.rs b/profile-bee-ebpf/src/main.rs index 2814eb2..fc10415 100644 --- a/profile-bee-ebpf/src/main.rs +++ b/profile-bee-ebpf/src/main.rs @@ -10,7 +10,7 @@ use aya_ebpf::{ use profile_bee_ebpf::{ collect_off_cpu_trace, collect_trace, collect_trace_raw_syscall, collect_trace_raw_syscall_exit, collect_trace_raw_tp_with_task_regs, - collect_trace_stackid_only, dwarf_unwind_step_impl, handle_process_exit, + collect_trace_stackid_only, dwarf_unwind_step_impl, handle_process_exec, handle_process_exit, }; #[perf_event] @@ -96,6 +96,14 @@ pub fn tracepoint_process_exit(ctx: TracePointContext) -> u32 { 0 } +/// Tracepoint for monitoring process exec events. +/// Detects execve() calls for proactive DWARF loading and cache invalidation. +#[tracepoint(category = "sched", name = "sched_process_exec")] +pub fn tracepoint_process_exec(ctx: TracePointContext) -> u32 { + unsafe { handle_process_exec(ctx) } + 0 +} + /// DWARF unwind step program — tail-call target for deep stack unwinding. /// Not attached to any perf event directly; only called via PROG_ARRAY tail call /// from collect_trace (perf_event context). Unwinds FRAMES_PER_TAIL_CALL frames diff --git a/profile-bee/bin/profile-bee.rs b/profile-bee/bin/profile-bee.rs index 48665b8..5f6b066 100644 --- a/profile-bee/bin/profile-bee.rs +++ b/profile-bee/bin/profile-bee.rs @@ -506,6 +506,7 @@ async fn main() -> std::result::Result<(), anyhow::Error> { group_by_process: opt.group_by_process, monitor_exit_pid, tgid_request_tx, + enable_process_metadata: false, }; let mut event_loop = ProfilingEventLoop::new( ebpf_profiler.counts, @@ -892,20 +893,34 @@ fn spawn_profiling_thread( tracing::warn!("{:#}", e); } } - Ok(PerfWork::ProcessExit(exit_event)) => { - // Forward to DWARF thread for LPM trie cleanup - if let Some(tx) = &tgid_request_tx { - let _ = tx.send(DwarfThreadMsg::ProcessExited(exit_event.pid)); - } - // Allow PID reuse to trigger a fresh LoadProcess - known_tgids.remove(&exit_event.pid); - // Only stop profiling if this is the monitored target process - if Some(exit_event.pid) == monitor_exit_pid { - tracing::info!( - "target process {} exited, stopping TUI", - exit_event.pid - ); - return; + Ok(PerfWork::ProcessEvent(event)) => { + use profile_bee_common::{PROCESS_EVENT_EXEC, PROCESS_EVENT_EXIT}; + match event.event_type { + PROCESS_EVENT_EXIT => { + // Forward to DWARF thread for LPM trie cleanup + if let Some(tx) = &tgid_request_tx { + let _ = tx.send(DwarfThreadMsg::ProcessExited(event.pid)); + } + // Allow PID reuse to trigger a fresh LoadProcess + known_tgids.remove(&event.pid); + // Only stop profiling if this is the monitored target process + if Some(event.pid) == monitor_exit_pid { + tracing::info!( + "target process {} exited, stopping TUI", + event.pid + ); + return; + } + } + PROCESS_EVENT_EXEC => { + tracing::debug!("process {} called exec", event.pid); + if let Some(tx) = &tgid_request_tx { + let _ = tx.send(DwarfThreadMsg::ProcessExeced(event.pid)); + } + known_tgids.remove(&event.pid); + known_tgids.insert(event.pid); + } + _ => {} } } Ok(PerfWork::Stop) => return, diff --git a/profile-bee/ebpf-bin/profile-bee.bpf.o b/profile-bee/ebpf-bin/profile-bee.bpf.o index 09c7c69..49ecc14 100644 Binary files a/profile-bee/ebpf-bin/profile-bee.bpf.o and b/profile-bee/ebpf-bin/profile-bee.bpf.o differ diff --git a/profile-bee/src/cache.rs b/profile-bee/src/cache.rs index 8a83951..b19bb47 100644 --- a/profile-bee/src/cache.rs +++ b/profile-bee/src/cache.rs @@ -168,4 +168,21 @@ impl PointerStackFramesCache { hits as f64 / self.total as f64 * 100.0 ) } + + /// Remove all cached entries for a specific process (tgid). + /// + /// Called when a process calls execve() — the binary image changed so + /// all cached symbol resolutions for that PID are stale. + pub fn invalidate_pid(&mut self, tgid: u32) { + // Collect keys to remove (can't mutate while iterating) + let keys_to_remove: Vec<(u32, i32, i32)> = self + .map + .iter() + .filter(|(&(t, _, _), _)| t == tgid) + .map(|(&k, _)| k) + .collect(); + for key in keys_to_remove { + self.map.pop(&key); + } + } } diff --git a/profile-bee/src/ebpf.rs b/profile-bee/src/ebpf.rs index b8a1e79..bf1afb7 100644 --- a/profile-bee/src/ebpf.rs +++ b/profile-bee/src/ebpf.rs @@ -472,6 +472,39 @@ pub fn attach_process_exit_tracepoint(bpf: &mut Ebpf) -> Result<(), anyhow::Erro Ok(()) } +/// Set up the ring buffer for process exec events. +/// Returns `Ok(None)` if the eBPF binary doesn't include the exec map +/// (backward compat with older eBPF builds). +pub fn setup_process_exec_ring_buffer( + bpf: &mut Ebpf, +) -> Result>, anyhow::Error> { + match bpf.take_map("process_exec_events") { + Some(map) => Ok(Some(RingBuf::try_from(map)?)), + None => { + tracing::debug!("process_exec_events map not found (older eBPF binary)"); + Ok(None) + } + } +} + +/// Attach the sched_process_exec tracepoint for monitoring process exec events. +/// Returns `Ok(false)` if the eBPF binary doesn't include the exec program +/// (backward compat with older eBPF builds). +pub fn attach_process_exec_tracepoint(bpf: &mut Ebpf) -> Result { + let program = match bpf.program_mut("tracepoint_process_exec") { + Some(p) => p, + None => { + tracing::debug!("tracepoint_process_exec not found (older eBPF binary)"); + return Ok(false); + } + }; + let tp: &mut TracePoint = program.try_into()?; + tp.load()?; + tp.attach("sched", "sched_process_exec")?; + tracing::info!("attached sched_process_exec tracepoint"); + Ok(true) +} + impl EbpfProfiler { /// Set the target PID for eBPF filtering (0 = profile all processes) pub fn set_target_pid(&mut self, pid: u32) -> Result<(), anyhow::Error> { @@ -495,6 +528,23 @@ impl EbpfProfiler { Ok(()) } + /// Enable or disable process lifecycle tracking in eBPF. + /// When enabled, exit events fire for all processes (not just DWARF-tracked + /// or monitored PIDs). + pub fn set_lifecycle_tracking(&mut self, enabled: bool) -> Result<(), anyhow::Error> { + match self.bpf.map_mut("lifecycle_tracking_map") { + Some(map) => { + let mut arr: Array<&mut MapData, u32> = Array::try_from(map)?; + arr.set(0, if enabled { 1 } else { 0 }, 0)?; + Ok(()) + } + None => { + tracing::debug!("lifecycle_tracking_map not found (older eBPF binary)"); + Ok(()) + } + } + } + /// Load DWARF unwind tables into eBPF maps for a process pub fn load_dwarf_unwind_tables( &mut self, diff --git a/profile-bee/src/event_loop.rs b/profile-bee/src/event_loop.rs index c9ea3a0..debacac 100644 --- a/profile-bee/src/event_loop.rs +++ b/profile-bee/src/event_loop.rs @@ -10,10 +10,11 @@ use std::time::{Duration, Instant}; use aya::maps::{MapData, StackTraceMap}; use aya::Ebpf; -use profile_bee_common::{StackInfo, EVENT_TRACE_ALWAYS}; +use profile_bee_common::{StackInfo, EVENT_TRACE_ALWAYS, PROCESS_EVENT_EXEC, PROCESS_EVENT_EXIT}; use crate::ebpf::{apply_dwarf_refresh, FramePointersPod, StackInfoPod}; use crate::pipeline::{DwarfThreadMsg, PerfWork}; +use crate::process_metadata::ProcessMetadataCache; use crate::trace_handler::TraceHandler; use crate::types::FrameCount; @@ -47,6 +48,11 @@ pub struct EventLoopConfig { pub group_by_process: bool, pub monitor_exit_pid: Option, pub tgid_request_tx: Option>, + /// Whether to maintain a process metadata cache. + /// When `true`, a `ProcessMetadataCache` is created and updated + /// on exec/exit events. Library consumers can access it via + /// `process_metadata()`. + pub enable_process_metadata: bool, } /// Owns the state needed to drain eBPF events and produce collapsed stacks. @@ -67,6 +73,13 @@ pub struct ProfilingEventLoop { // Persistent state across calls trace_count: HashMap, known_tgids: HashSet, + /// Optional process metadata cache, maintained via eBPF lifecycle events. + process_metadata: Option, + /// PIDs that exited during the current drain_events() call. + /// Eviction from `process_metadata` is deferred until after + /// `build_raw_stacks()` completes, so agents can still look up + /// metadata for processes that exited within the collection window. + pending_exit_pids: Vec, } impl ProfilingEventLoop { @@ -94,6 +107,12 @@ impl ProfilingEventLoop { monitor_exit_pid: config.monitor_exit_pid, trace_count: HashMap::new(), known_tgids: HashSet::new(), + process_metadata: if config.enable_process_metadata { + Some(ProcessMetadataCache::new(4096)) + } else { + None + }, + pending_exit_pids: Vec::new(), } } @@ -143,6 +162,24 @@ impl ProfilingEventLoop { RawCollectResult { stacks, stopped } } + /// Immediately evict process metadata for PIDs that exited during + /// previous `drain_events()` calls. + /// + /// Normally eviction is deferred automatically: exit PIDs from cycle N + /// are evicted at the start of cycle N+1's `drain_events()` call, + /// giving agents one full collection cycle to read metadata for + /// processes that exited. Call this method only if you need to free + /// cache entries sooner (e.g., memory pressure). + pub fn evict_pending_exits(&mut self) { + if let Some(ref mut cache) = self.process_metadata { + for pid in self.pending_exit_pids.drain(..) { + cache.remove(pid); + } + } else { + self.pending_exit_pids.clear(); + } + } + /// Drain events from the PerfWork channel, symbolize stacks on the fly, /// and return `(local_counting, stopped)`. /// @@ -153,6 +190,12 @@ impl ProfilingEventLoop { rx: &mpsc::Receiver, timeout: Option, ) -> (bool, bool) { + // Evict metadata for PIDs that exited in the PREVIOUS cycle. + // This gives agents one full collection cycle to read metadata + // for processes that exited, covering async ring buffer delivery + // where a sample may arrive after its process's exit event. + self.evict_pending_exits(); + let mut queue_processed = 0u64; let mut stopped = false; let local_counting = self.stream_mode == EVENT_TRACE_ALWAYS; @@ -228,17 +271,53 @@ impl ProfilingEventLoop { tracing::warn!("{:#}", e); } } - PerfWork::ProcessExit(exit_event) => { - if let Some(tx) = &self.tgid_request_tx { - let _ = tx.send(DwarfThreadMsg::ProcessExited(exit_event.pid)); - } - self.known_tgids.remove(&exit_event.pid); - if Some(exit_event.pid) == self.monitor_exit_pid { - tracing::info!("target process {} exited, stopping", exit_event.pid); - stopped = true; - break; + PerfWork::ProcessEvent(event) => { + match event.event_type { + PROCESS_EVENT_EXIT => { + if let Some(tx) = &self.tgid_request_tx { + let _ = tx.send(DwarfThreadMsg::ProcessExited(event.pid)); + } + self.known_tgids.remove(&event.pid); + // Defer metadata cache eviction until after build_raw_stacks() + // so agents can still look up metadata for processes that + // exited within the collection window. + if self.process_metadata.is_some() { + self.pending_exit_pids.push(event.pid); + } + if Some(event.pid) == self.monitor_exit_pid { + tracing::info!("target process {} exited, stopping", event.pid); + stopped = true; + break; + } + tracing::debug!("process {} exited", event.pid); + } + PROCESS_EVENT_EXEC => { + tracing::debug!("process {} called exec", event.pid); + if let Some(tx) = &self.tgid_request_tx { + let _ = tx.send(DwarfThreadMsg::ProcessExeced(event.pid)); + } + // Flush pre-exec samples from trace_count before invalidating + // caches — otherwise build_raw_stacks() would try to symbolize + // old addresses against the new binary image. + self.trace_count.retain(|k, _| k.tgid != event.pid); + // The PID is still alive but with a new binary image. + // Re-add to known_tgids so DWARF tables are reloaded. + self.known_tgids.remove(&event.pid); + self.known_tgids.insert(event.pid); + // Invalidate symbol caches for this PID + self.trace_handler.invalidate_caches_for_pid(event.pid); + if let Some(ref mut cache) = self.process_metadata { + cache.invalidate(event.pid); + } + } + _ => { + tracing::warn!( + "unknown process event type {} for pid {}", + event.event_type, + event.pid + ); + } } - tracing::debug!("DWARF-tracked process {} exited", exit_event.pid); } PerfWork::Stop => { tracing::info!("drain_events: received Stop"); @@ -380,6 +459,15 @@ impl ProfilingEventLoop { pub fn monitor_exit_pid(&self) -> Option { self.monitor_exit_pid } + + /// Access the process metadata cache (if enabled). + /// + /// Returns `None` if lifecycle tracking was not enabled. Library consumers + /// can use this to look up process metadata (cmdline, cwd, environ, etc.) + /// between `collect_raw()` calls for stack enrichment. + pub fn process_metadata(&mut self) -> Option<&mut ProcessMetadataCache> { + self.process_metadata.as_mut() + } } /// Convert structured [`FrameCount`] data into sorted collapse-format strings. diff --git a/profile-bee/src/lib.rs b/profile-bee/src/lib.rs index dd5d4f6..05a38fd 100644 --- a/profile-bee/src/lib.rs +++ b/profile-bee/src/lib.rs @@ -8,6 +8,7 @@ pub mod codeguru_upload; pub mod ebpf; pub mod event_loop; pub mod html; +pub mod process_metadata; pub mod session; pub mod spawn; diff --git a/profile-bee/src/pipeline.rs b/profile-bee/src/pipeline.rs index b8cdb30..64eb1d9 100644 --- a/profile-bee/src/pipeline.rs +++ b/profile-bee/src/pipeline.rs @@ -8,7 +8,10 @@ use std::collections::HashMap; use std::sync::mpsc; use aya::maps::{MapData, RingBuf}; -use profile_bee_common::{ProcessExitEvent, StackInfo}; +use profile_bee_common::{ + ProcessEvent, ProcessExecEvent, ProcessExitEvent, StackInfo, PROCESS_EVENT_EXEC, + PROCESS_EVENT_EXIT, +}; use crate::dwarf_unwind::{DwarfUnwindManager, MappingsDiff}; use crate::ebpf::{build_dwarf_refresh, DwarfRefreshUpdate}; @@ -23,8 +26,8 @@ pub enum PerfWork { StackInfo(StackInfo), /// Incremental DWARF table update from background thread. DwarfRefresh(DwarfRefreshUpdate), - /// Process exit detected by eBPF tracepoint. - ProcessExit(ProcessExitEvent), + /// Process lifecycle event (exec or exit) detected by eBPF tracepoint. + ProcessEvent(ProcessEvent), /// Signal to stop profiling. Stop, } @@ -35,6 +38,8 @@ pub enum DwarfThreadMsg { LoadProcess(u32), /// Process exited — clean up mappings and LPM trie entries. ProcessExited(u32), + /// Process called execve() — invalidate and reload DWARF tables. + ProcessExeced(u32), } /// Build a `DwarfRefreshUpdate` and send it on the `PerfWork` channel. @@ -110,6 +115,33 @@ pub fn dwarf_refresh_loop( tracked_pids.retain(|&p| p != tgid); last_maps_mtime.remove(&tgid); } + DwarfThreadMsg::ProcessExeced(tgid) => { + tracing::debug!( + "DWARF thread: process {} exec'd, invalidating and reloading", + tgid + ); + // Remove old DWARF data (binary image has changed) + if let Some(removal_diff) = manager.remove_process(tgid) { + if !send_refresh(&manager, &tx, vec![], removal_diff) { + return; + } + } + tracked_pids.retain(|&p| p != tgid); + last_maps_mtime.remove(&tgid); + + // Reload with the new binary + tracked_pids.push(tgid); + let maps_path = format!("/proc/{}/maps", tgid); + let pre_refresh_mtime = std::fs::metadata(&maps_path) + .ok() + .and_then(|m| m.modified().ok()); + if let Ok((new_shard_ids, diff)) = manager.refresh_process(tgid) { + if !send_refresh(&manager, &tx, new_shard_ids, diff) { + return; + } + last_maps_mtime.insert(tgid, pre_refresh_mtime); + } + } } } @@ -236,8 +268,14 @@ pub async fn setup_process_exit_ring_buffer_task( } let exit_event: ProcessExitEvent = unsafe { std::ptr::read_unaligned(item.as_ptr().cast()) }; - tracing::debug!("eBPF detected: PID {} has exited", exit_event.pid); - if perf_tx.send(PerfWork::ProcessExit(exit_event)).is_err() { + let event = ProcessEvent { + event_type: PROCESS_EVENT_EXIT, + pid: exit_event.pid, + exit_code: exit_event.exit_code, + _pad: 0, + }; + tracing::debug!("eBPF detected: PID {} has exited", event.pid); + if perf_tx.send(PerfWork::ProcessEvent(event)).is_err() { // Receiver dropped — event loop is done, exit task return Ok(()); } @@ -255,6 +293,53 @@ pub async fn setup_process_exit_ring_buffer_task( Ok(()) } +/// Async task that polls the eBPF ring buffer for process exec events and +/// forwards them to the profiling event loop via the `PerfWork` channel. +pub async fn setup_process_exec_ring_buffer_task( + ring_buf: RingBuf, + perf_tx: mpsc::Sender, +) -> anyhow::Result<()> { + use tokio::io::unix::AsyncFd; + let mut fd = AsyncFd::new(ring_buf)?; + + while let Ok(mut guard) = fd.readable_mut().await { + match guard.try_io(|inner| { + let ring_buf = inner.get_mut(); + while let Some(item) = ring_buf.next() { + if item.len() < ProcessExecEvent::STRUCT_SIZE { + tracing::warn!( + "Ring buffer item too small for ProcessExecEvent ({} < {}), skipping", + item.len(), + ProcessExecEvent::STRUCT_SIZE, + ); + continue; + } + let exec_event: ProcessExecEvent = + unsafe { std::ptr::read_unaligned(item.as_ptr().cast()) }; + let event = ProcessEvent { + event_type: PROCESS_EVENT_EXEC, + pid: exec_event.pid, + exit_code: 0, + _pad: 0, + }; + tracing::debug!("eBPF detected: PID {} called exec", event.pid); + if perf_tx.send(PerfWork::ProcessEvent(event)).is_err() { + return Ok(()); + } + } + Ok(()) + }) { + Ok(_) => { + guard.clear_ready(); + continue; + } + Err(_would_block) => continue, + } + } + + Ok(()) +} + /// Set up timer-based and child-process-based stopping mechanisms. /// /// This wires the non-interactive stopping mechanisms: diff --git a/profile-bee/src/process_metadata.rs b/profile-bee/src/process_metadata.rs new file mode 100644 index 0000000..44dbd14 --- /dev/null +++ b/profile-bee/src/process_metadata.rs @@ -0,0 +1,335 @@ +//! Process metadata cache for reading and caching `/proc/[pid]` information. +//! +//! Provides a lazy, capacity-bounded cache of process metadata that agents +//! can use to look up `cmdline`, `cwd`, `environ`, `exe`, and mount namespace +//! for any PID seen during profiling. Entries are invalidated on exec events +//! (same PID, new binary) and removed on exit events. +//! +//! # Example +//! +//! ```rust,ignore +//! use profile_bee::process_metadata::ProcessMetadataCache; +//! +//! let mut cache = ProcessMetadataCache::new(1024); +//! +//! // Lazily loads from /proc on first access +//! if let Some(meta) = cache.get_or_load(1234) { +//! println!("exe: {:?}", meta.exe); +//! println!("cwd: {:?}", meta.cwd); +//! } +//! +//! // On exec: invalidate (same PID, new binary) +//! cache.invalidate(1234); +//! +//! // On exit: remove entirely +//! cache.remove(1234); +//! ``` + +use std::collections::HashMap; +use std::ffi::OsString; +use std::path::PathBuf; +use std::time::Instant; + +/// Cached metadata for a single process, read from `/proc/[pid]/`. +#[derive(Clone)] +pub struct ProcessMetadata { + /// The process ID. + pub pid: u32, + /// Parsed command line (`/proc/[pid]/cmdline`). + pub cmdline: Option>, + /// Working directory (`/proc/[pid]/cwd`). + pub cwd: Option, + /// Environment variables (`/proc/[pid]/environ`). + pub environ: Option>, + /// Executable path (`/proc/[pid]/exe` readlink). + pub exe: Option, + /// Mount namespace inode (`/proc/[pid]/ns/mnt`). + pub ns_mnt: Option, + /// Process start time in clock ticks since boot (`/proc/[pid]/stat` field 22). + /// Used to detect PID reuse — if a cached entry's start_time differs from + /// the current `/proc/[pid]/stat` starttime, the PID was recycled. + pub start_time: u64, + /// When this entry was loaded. + pub loaded_at: Instant, +} + +// Manual Debug impl to avoid leaking secrets from environ/cmdline into logs. +impl std::fmt::Debug for ProcessMetadata { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ProcessMetadata") + .field("pid", &self.pid) + .field("exe", &self.exe) + .field("cwd", &self.cwd) + .field( + "cmdline", + &self.cmdline.as_ref().map(|c| format!("<{} args>", c.len())), + ) + .field( + "environ", + &self.environ.as_ref().map(|e| format!("<{} vars>", e.len())), + ) + .field("ns_mnt", &self.ns_mnt) + .field("start_time", &self.start_time) + .field("loaded_at", &self.loaded_at) + .finish() + } +} + +impl ProcessMetadata { + /// Load metadata for a PID from `/proc`. + /// Returns `None` if the process doesn't exist or `/proc/[pid]/stat` + /// can't be read (start_time is required for PID reuse detection). + fn load(pid: u32) -> Option { + let process = procfs::process::Process::new(pid as i32).ok()?; + + let stat = process.stat().ok()?; + let start_time = stat.starttime; + + let cmdline = process.cmdline().ok(); + let cwd = process.cwd().ok(); + let environ = process.environ().ok(); + let exe = process.exe().ok(); + + // Get mount namespace inode + let ns_path = format!("/proc/{}/ns/mnt", pid); + let ns_mnt = std::fs::metadata(&ns_path).ok().map(|m| { + use std::os::unix::fs::MetadataExt; + m.ino() + }); + + Some(ProcessMetadata { + pid, + cmdline, + cwd, + environ, + exe, + ns_mnt, + start_time, + loaded_at: Instant::now(), + }) + } + + /// Read the current start time for a PID from `/proc/[pid]/stat`. + /// Returns 0 if the process doesn't exist or stat can't be read. + fn current_start_time(pid: u32) -> u64 { + procfs::process::Process::new(pid as i32) + .ok() + .and_then(|p| p.stat().ok()) + .map_or(0, |s| s.starttime) + } + + /// Look up a specific environment variable by name. + pub fn environ_var(&self, key: &str) -> Option<&str> { + self.environ + .as_ref()? + .get(std::ffi::OsStr::new(key))? + .to_str() + } +} + +/// Capacity-bounded cache of process metadata, read lazily from `/proc/[pid]/`. +/// +/// Designed for profiling agents that need per-process context (cmdline, cwd, +/// environment variables) to enrich stack traces. Integrates with eBPF process +/// lifecycle events for cache invalidation (exec) and eviction (exit). +/// +/// The cache does NOT implement LRU eviction — it relies on explicit `remove()` +/// calls driven by eBPF exit events. If the cache reaches capacity, new entries +/// are not inserted (the process is still profiled, just without cached metadata). +pub struct ProcessMetadataCache { + cache: HashMap, + max_entries: usize, +} + +impl ProcessMetadataCache { + /// Create a new cache with the given maximum entry count. + pub fn new(max_entries: usize) -> Self { + Self { + cache: HashMap::new(), + max_entries, + } + } + + /// Get or lazily load metadata for a PID. + /// + /// On first access for a given PID, reads from `/proc/[pid]/`. Subsequent + /// calls return the cached entry unless the PID has been recycled (detected + /// via `/proc/[pid]/stat` starttime mismatch). Returns `None` if the process + /// doesn't exist (already exited) or the cache is at capacity. + pub fn get_or_load(&mut self, pid: u32) -> Option<&ProcessMetadata> { + if self.cache.contains_key(&pid) { + // Validate against PID reuse and process exit: compare the cached + // start_time against the current /proc/[pid]/stat starttime. + let cached_start = self.cache[&pid].start_time; + let current_start = ProcessMetadata::current_start_time(pid); + if current_start == 0 { + // Process is gone (/proc/[pid]/stat unreadable) — remove stale entry. + self.cache.remove(&pid); + return None; + } + if current_start != cached_start { + tracing::debug!( + "PID {} recycled (starttime {} -> {}), reloading metadata", + pid, + cached_start, + current_start, + ); + self.cache.remove(&pid); + // Fall through to reload below + } else { + return self.cache.get(&pid); + } + } + if self.cache.len() >= self.max_entries { + tracing::warn!( + "ProcessMetadataCache at capacity ({}), skipping pid {}", + self.max_entries, + pid + ); + return None; + } + if let Some(meta) = ProcessMetadata::load(pid) { + self.cache.insert(pid, meta); + self.cache.get(&pid) + } else { + None + } + } + + /// Get cached metadata without loading. Returns `None` if not cached. + pub fn get(&self, pid: u32) -> Option<&ProcessMetadata> { + self.cache.get(&pid) + } + + /// Invalidate the cached entry for a PID (e.g., on exec). + /// + /// The next `get_or_load()` call will re-read from `/proc`. + pub fn invalidate(&mut self, pid: u32) { + self.cache.remove(&pid); + } + + /// Remove the cached entry for a PID (e.g., on exit). + pub fn remove(&mut self, pid: u32) { + self.cache.remove(&pid); + } + + /// Look up a specific environment variable for a PID. + /// + /// Convenience method that combines `get_or_load` + `environ_var`. + pub fn environ_var(&mut self, pid: u32, key: &str) -> Option { + self.get_or_load(pid)? + .environ_var(key) + .map(|s| s.to_string()) + } + + /// Number of cached entries. + pub fn len(&self) -> usize { + self.cache.len() + } + + /// Whether the cache is empty. + pub fn is_empty(&self) -> bool { + self.cache.is_empty() + } + + /// Diagnostic summary string. + pub fn stats(&self) -> String { + format!( + "ProcessMetadataCache: {}/{} entries", + self.cache.len(), + self.max_entries, + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_cache_new() { + let cache = ProcessMetadataCache::new(100); + assert!(cache.is_empty()); + assert_eq!(cache.len(), 0); + } + + #[test] + fn test_cache_load_current_process() { + let mut cache = ProcessMetadataCache::new(100); + let pid = std::process::id(); + let meta = cache.get_or_load(pid); + assert!(meta.is_some(), "should be able to load current process"); + let meta = meta.unwrap(); + assert_eq!(meta.pid, pid); + assert!(meta.exe.is_some()); + assert!(meta.cmdline.is_some()); + assert!(meta.cwd.is_some()); + assert_eq!(cache.len(), 1); + } + + #[test] + fn test_cache_nonexistent_pid() { + let mut cache = ProcessMetadataCache::new(100); + // PID 0 is the kernel — /proc/0 may or may not exist depending on kernel. + // Use a very high PID that almost certainly doesn't exist. + let meta = cache.get_or_load(u32::MAX); + assert!(meta.is_none()); + assert!(cache.is_empty()); + } + + #[test] + fn test_cache_invalidate() { + let mut cache = ProcessMetadataCache::new(100); + let pid = std::process::id(); + cache.get_or_load(pid); + assert_eq!(cache.len(), 1); + cache.invalidate(pid); + assert!(cache.is_empty()); + // Re-load should work + assert!(cache.get_or_load(pid).is_some()); + } + + #[test] + fn test_cache_remove() { + let mut cache = ProcessMetadataCache::new(100); + let pid = std::process::id(); + cache.get_or_load(pid); + cache.remove(pid); + assert!(cache.is_empty()); + assert!(cache.get(pid).is_none()); + } + + #[test] + fn test_cache_capacity_limit() { + let mut cache = ProcessMetadataCache::new(1); + let pid = std::process::id(); + cache.get_or_load(pid); + // Cache is full — next load should return None for a different PID + // (but the PID probably doesn't exist anyway, so this is a bit indirect) + assert_eq!(cache.len(), 1); + } + + #[test] + fn test_environ_var() { + let mut cache = ProcessMetadataCache::new(100); + let pid = std::process::id(); + // PATH should be set in any normal environment + let path = cache.environ_var(pid, "PATH"); + assert!(path.is_some(), "PATH should be available"); + } + + #[test] + fn test_get_without_load() { + let cache = ProcessMetadataCache::new(100); + let pid = std::process::id(); + // get() without get_or_load() should return None + assert!(cache.get(pid).is_none()); + } + + #[test] + fn test_stats() { + let cache = ProcessMetadataCache::new(100); + let stats = cache.stats(); + assert!(stats.contains("0/100")); + } +} diff --git a/profile-bee/src/session.rs b/profile-bee/src/session.rs index 1c35435..144396d 100644 --- a/profile-bee/src/session.rs +++ b/profile-bee/src/session.rs @@ -12,14 +12,15 @@ use tokio::task; use crate::dwarf_unwind::DwarfUnwindManager; use crate::ebpf::{ - attach_process_exit_tracepoint, setup_ebpf_with_tp_fallback, setup_process_exit_ring_buffer, - setup_ring_buffer, ProfilerConfig, + attach_process_exec_tracepoint, attach_process_exit_tracepoint, setup_ebpf_with_tp_fallback, + setup_process_exec_ring_buffer, setup_process_exit_ring_buffer, setup_ring_buffer, + ProfilerConfig, }; use crate::event_loop::{EventLoopConfig, ProfilingEventLoop}; use crate::output::OutputSink; use crate::pipeline::{ - dwarf_refresh_loop, setup_process_exit_ring_buffer_task, setup_ring_buffer_task, - setup_timer_and_child_stop, DwarfThreadMsg, PerfWork, + dwarf_refresh_loop, setup_process_exec_ring_buffer_task, setup_process_exit_ring_buffer_task, + setup_ring_buffer_task, setup_timer_and_child_stop, DwarfThreadMsg, PerfWork, }; use crate::spawn::{setup_process_to_profile, SpawnProcess, StopHandler}; @@ -42,6 +43,15 @@ pub struct SessionConfig { pub group_by_process: bool, /// Whether to set up process-exit monitoring for external PIDs. pub monitor_exit: bool, + /// Track process lifecycle events (exec + broadened exit) via eBPF tracepoints. + /// + /// When `true`, attaches `sched_process_exec` and broadens `sched_process_exit` + /// to fire for all processes (not just DWARF-tracked or monitored PIDs). + /// + /// Automatically enabled when `profiler.dwarf` is `true` (DWARF tables benefit + /// from proactive exec/exit detection). Agents should enable this for + /// `ProcessMetadataCache` eviction and process-aware profiling. + pub track_process_lifecycle: bool, } impl Default for SessionConfig { @@ -54,6 +64,7 @@ impl Default for SessionConfig { group_by_cpu: false, group_by_process: false, monitor_exit: true, + track_process_lifecycle: false, } } } @@ -178,7 +189,7 @@ impl ProfilingSession { // 6. Ring buffer setup let ring_buf = setup_ring_buffer(&mut ebpf_profiler.bpf)?; - // 7. Process exit monitoring + // 7. Process exit monitoring + lifecycle tracking let external_pid = if spawn.is_none() { config.profiler.pid } else { @@ -186,11 +197,22 @@ impl ProfilingSession { }; let monitor_exit_pid = external_pid; - if (external_pid.is_some() || config.profiler.dwarf) && config.monitor_exit { + // Lifecycle tracking: auto-enable when DWARF is on, or when explicitly requested + let lifecycle_tracking = config.track_process_lifecycle || config.profiler.dwarf; + + if (external_pid.is_some() || config.profiler.dwarf || lifecycle_tracking) + && config.monitor_exit + { attach_process_exit_tracepoint(&mut ebpf_profiler.bpf)?; if let Some(pid_to_monitor) = external_pid { ebpf_profiler.set_monitor_exit_pid(pid_to_monitor)?; } + + // Enable broadened exit events when lifecycle tracking is on + if lifecycle_tracking { + ebpf_profiler.set_lifecycle_tracking(true)?; + } + let exit_ring_buf = setup_process_exit_ring_buffer(&mut ebpf_profiler.bpf)?; let exit_perf_tx = perf_tx.clone(); task::spawn(async move { @@ -202,6 +224,24 @@ impl ProfilingSession { }); } + // 7b. Process exec monitoring (lifecycle tracking only) + if lifecycle_tracking + && config.monitor_exit + && attach_process_exec_tracepoint(&mut ebpf_profiler.bpf)? + { + if let Ok(Some(exec_ring_buf)) = setup_process_exec_ring_buffer(&mut ebpf_profiler.bpf) + { + let exec_perf_tx = perf_tx.clone(); + task::spawn(async move { + if let Err(e) = + setup_process_exec_ring_buffer_task(exec_ring_buf, exec_perf_tx).await + { + tracing::error!("Failed to set up process exec ring buffer: {:?}", e); + } + }); + } + } + // 8. Start ring buffer polling task let rb_perf_tx = perf_tx.clone(); task::spawn(async move { @@ -220,6 +260,7 @@ impl ProfilingSession { group_by_process: config.group_by_process, monitor_exit_pid, tgid_request_tx, + enable_process_metadata: lifecycle_tracking, }; let event_loop = ProfilingEventLoop::new( ebpf_profiler.counts, diff --git a/profile-bee/src/trace_handler.rs b/profile-bee/src/trace_handler.rs index 346e907..8f48ee5 100644 --- a/profile-bee/src/trace_handler.rs +++ b/profile-bee/src/trace_handler.rs @@ -94,6 +94,15 @@ impl TraceHandler { } } + /// Invalidate all cached symbol resolutions for a specific process. + /// + /// Called when a process calls execve() — the binary image changed so + /// all cached address-to-symbol mappings for that PID are stale. + pub fn invalidate_caches_for_pid(&mut self, tgid: u32) { + self.cache.invalidate_pid(tgid); + tracing::debug!("invalidated symbol caches for pid {}", tgid); + } + pub fn print_stats(&self) { tracing::info!("{}", self.cache.stats()); }