Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 86 additions & 1 deletion docs/custom_profiling_agent.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ async fn main() -> anyhow::Result<()> {
..ProfilerConfig::default()
},
duration_ms: 0, // Run indefinitely (we control the loop)
// Enable process lifecycle tracking for metadata cache + DWARF improvements.
// Note: automatically enabled when dwarf: true, shown here for clarity.
track_process_lifecycle: true,
..SessionConfig::default()
};

Expand Down Expand Up @@ -113,7 +116,45 @@ async fn main() -> anyhow::Result<()> {

## Stack Enrichment Pattern

A common pattern is to look up per-process metadata and annotate stacks.
### Using the Built-in ProcessMetadataCache

When `track_process_lifecycle` is enabled (or DWARF is on), the event loop
maintains a `ProcessMetadataCache` that automatically reads `/proc/[pid]/*`
on first access and evicts entries when eBPF detects process exec/exit events.

```rust
use profile_bee::types::{FrameCount, StackFrameInfo};

fn enrich_with_builtin_cache(
session: &mut ProfilingSession,
stacks: &[FrameCount],
) {
// Access the built-in metadata cache (requires track_process_lifecycle)
if let Some(cache) = session.event_loop.process_metadata() {
for fc in stacks {
if let Some(first_frame) = fc.frames.first() {
let pid = first_frame.pid as u32;
if let Some(meta) = cache.get_or_load(pid) {
// meta.cmdline, meta.cwd, meta.environ, meta.exe, meta.ns_mnt
// are all available for enrichment
if let Some(env_val) = meta.environ_var("MY_SERVICE_NAME") {
// Use the environment variable for grouping
}
}
}
}
}
}
```

The cache is automatically maintained by the event loop:
- On `sched_process_exec`: invalidates the entry (same PID, new binary)
- On `sched_process_exit`: removes the entry entirely
- No manual eviction needed — eBPF events handle it

### Custom Enrichment (without built-in cache)

For more control, you can build your own metadata cache.
Since `StackFrameInfo` contains the `pid`, you can read `/proc/[pid]/*`
to get process context:

Expand Down Expand Up @@ -331,3 +372,47 @@ pub struct StackFrameInfo {
pub ns: Option<u64>, // mount namespace inode
}
```

### `ProcessMetadataCache`

```rust
use profile_bee::process_metadata::ProcessMetadataCache;

// Access via the event loop (when track_process_lifecycle is enabled):
if let Some(cache) = session.event_loop.process_metadata() {
// Lazily loads from /proc on first access
if let Some(meta) = cache.get_or_load(pid) {
meta.cmdline; // Option<Vec<String>>
meta.cwd; // Option<PathBuf>
meta.environ; // Option<HashMap<OsString, OsString>>
meta.exe; // Option<PathBuf>
meta.ns_mnt; // Option<u64> — mount namespace inode
}

// Convenience: look up a specific environment variable
let val = cache.environ_var(pid, "MY_ENV_VAR");
}
```

Entries are automatically:
- **Invalidated** on `sched_process_exec` (same PID, new binary image)
- **Removed** on `sched_process_exit` (process gone)

### `SessionConfig` — Lifecycle Tracking

```rust
pub struct SessionConfig {
// ... other fields ...

/// Track process lifecycle events (exec + broadened exit) via eBPF tracepoints.
/// Automatically enabled when profiler.dwarf is true.
pub track_process_lifecycle: bool,
}
```

When enabled:
- Attaches `sched:sched_process_exec` tracepoint (detects `execve()` calls)
- Broadens `sched:sched_process_exit` to fire for all processes (not just DWARF-tracked)
- Creates a `ProcessMetadataCache` in the event loop
- DWARF tables are proactively reloaded on exec (no 1-second poll delay)
- Symbol caches are invalidated on exec (no stale resolutions)
42 changes: 41 additions & 1 deletion profile-bee-common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ impl ProbeEvent {
pub const STRUCT_SIZE: usize = size_of::<ProbeEvent>();
}

/// Process exit notification sent from eBPF to userspace
/// Process exit notification sent from eBPF to userspace.
/// Deprecated: prefer `ProcessEvent` which carries both exec and exit events.
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
#[repr(C)]
pub struct ProcessExitEvent {
Expand All @@ -67,6 +68,45 @@ impl ProcessExitEvent {
pub const STRUCT_SIZE: usize = size_of::<ProcessExitEvent>();
}

/// Process exec notification sent from eBPF to userspace when a process
/// calls execve(). Used for proactive DWARF table loading and cache invalidation.
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
#[repr(C)]
pub struct ProcessExecEvent {
pub pid: u32,
pub _pad: u32,
}

impl ProcessExecEvent {
pub const STRUCT_SIZE: usize = size_of::<ProcessExecEvent>();
}

// --- Process Lifecycle Event Types ---

/// Process lifecycle event type: process exited.
pub const PROCESS_EVENT_EXIT: u32 = 0;
/// Process lifecycle event type: process called execve().
pub const PROCESS_EVENT_EXEC: u32 = 1;

/// Unified process lifecycle event sent from eBPF to userspace.
/// Carries both exec and exit notifications through a single ring buffer,
/// replacing the narrower `ProcessExitEvent`.
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
#[repr(C)]
pub struct ProcessEvent {
/// Event type: `PROCESS_EVENT_EXIT` or `PROCESS_EVENT_EXEC`.
pub event_type: u32,
/// The PID (tgid) of the process.
pub pid: u32,
/// For EXIT events: the exit code. For EXEC events: 0.
pub exit_code: i32,
pub _pad: u32,
}

impl ProcessEvent {
pub const STRUCT_SIZE: usize = size_of::<ProcessEvent>();
}

// --- DWARF Unwind Table Types (used by eBPF-side unwinding) ---

/// How to compute the CFA (Canonical Frame Address)
Expand Down
2 changes: 1 addition & 1 deletion profile-bee-ebpf/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

57 changes: 54 additions & 3 deletions profile-bee-ebpf/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ static TARGET_PID_MAP: Array<u32> = Array::with_max_entries(1, 0);
#[map(name = "monitor_exit_pid_map")]
static MONITOR_EXIT_PID_MAP: Array<u32> = Array::with_max_entries(1, 0);

/// Whether process lifecycle tracking is enabled (0 = disabled, 1 = enabled).
/// When enabled, exit events fire for ALL process exits (not just DWARF-tracked
/// or monitored PIDs), and the exec tracepoint sends exec events.
#[map(name = "lifecycle_tracking_map")]
static LIFECYCLE_TRACKING_MAP: Array<u32> = Array::with_max_entries(1, 0);
Comment on lines +75 to +79

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Gate exec notifications on lifecycle_tracking_enabled().

LIFECYCLE_TRACKING_MAP is documented as the runtime switch for exec notifications, but handle_process_exec() ignores it and emits on every attached exec. That creates avoidable host-wide ring-buffer pressure and cache churn when lifecycle tracking is off.

Suggested guard
 pub unsafe fn handle_process_exec<C: EbpfContext>(ctx: C) {
     use profile_bee_common::ProcessExecEvent;
 
+    if !lifecycle_tracking_enabled() {
+        return;
+    }
+
     let tgid = ctx.tgid();
 
     if let Some(mut entry) = RING_BUF_PROCESS_EXEC.reserve::<ProcessExecEvent>(0) {
         let exec_event = ProcessExecEvent { pid: tgid, _pad: 0 };
         let _writable = entry.write(exec_event);

Also applies to: 1448-1457

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@profile-bee-ebpf/src/lib.rs` around lines 75 - 79, The exec tracepoint
handler emits events regardless of the runtime gate; update handle_process_exec
(and the other exec-emitting sites noted) to check lifecycle_tracking_enabled()
(which reads LIFECYCLE_TRACKING_MAP) before allocating or writing exec
notifications so events are only produced when lifecycle tracking is enabled;
bail out early from handle_process_exec and the same exec emission paths when
lifecycle_tracking_enabled() returns false to avoid host-wide ring-buffer
pressure and cache churn.


#[inline]
unsafe fn skip_idle() -> bool {
let skip = core::ptr::read_volatile(&SKIP_IDLE);
Expand Down Expand Up @@ -119,6 +125,14 @@ unsafe fn monitor_exit_pid() -> u32 {
}
}

#[inline]
unsafe fn lifecycle_tracking_enabled() -> bool {
match LIFECYCLE_TRACKING_MAP.get(0) {
Some(&v) => v != 0,
None => false,
}
}

/* Setup maps */
#[map]
static mut STORAGE: PerCpuArray<FramePointers> = PerCpuArray::with_max_entries(1, 0);
Expand All @@ -136,6 +150,9 @@ static RING_BUF_STACKS: RingBuf = RingBuf::with_byte_size(STACK_SIZE, 0);
#[map(name = "process_exit_events")]
static RING_BUF_PROCESS_EXIT: RingBuf = RingBuf::with_byte_size(4096, 0);

#[map(name = "process_exec_events")]
static RING_BUF_PROCESS_EXEC: RingBuf = RingBuf::with_byte_size(4096, 0);

#[map(name = "stack_traces")]
pub static STACK_TRACES: StackTrace = StackTrace::with_max_entries(STACK_SIZE, 0);
// DWARF unwind maps — single outer ArrayOfMaps containing per-binary inner Array maps.
Expand Down Expand Up @@ -1382,19 +1399,34 @@ unsafe fn collect_off_cpu_trace_percpu<C: EbpfContext>(ctx: &C, now: u64) {
/// Handle sched_process_exit tracepoint for process exit monitoring.
/// Sends a ProcessExitEvent when:
/// - The monitored PID exits (--pid mode: stops profiling), OR
/// - A DWARF-tracked process exits (cleanup LPM trie entries).
/// - A DWARF-tracked process exits (cleanup LPM trie entries), OR
/// - Lifecycle tracking is enabled (system-wide process awareness).
///
/// Only fires for process exits (tid == tgid), not individual thread exits.
/// sched_process_exit fires for every thread exit; without this filter,
/// thread-heavy workloads (Java, Go) would generate thousands of
/// duplicate events per second for the same tgid.
#[inline(always)]
pub unsafe fn handle_process_exit<C: EbpfContext>(ctx: C) {
use profile_bee_common::ProcessExitEvent;

let tid = ctx.pid();
let tgid = ctx.tgid();

// Skip thread exits — only fire for the main thread (process exit).
if tid != tgid {
return;
}
Comment on lines +1413 to +1419

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

tid == tgid is not a reliable “process is gone” check.

The thread-group leader can exit via pthread_exit() while other threads in the same tgid keep running. This branch would emit a ProcessExitEvent early, which can stop --pid profiling and evict DWARF/process metadata for a still-live process, while the real last-thread exit may be filtered out later. Please key this off actual thread-group teardown, not leader-thread exit.


let monitor_pid = monitor_exit_pid();

// Send notification if this is either the monitored PID or a DWARF-tracked process
// Send notification if this is a monitored PID, DWARF-tracked process,
// or lifecycle tracking is enabled (for system-wide process awareness).
let is_monitored = monitor_pid != 0 && tgid == monitor_pid;
let is_dwarf_tracked = unsafe { DWARF_TGIDS.get(&tgid).is_some() };
let lifecycle = unsafe { lifecycle_tracking_enabled() };

if is_monitored || is_dwarf_tracked {
if is_monitored || is_dwarf_tracked || lifecycle {
if let Some(mut entry) = RING_BUF_PROCESS_EXIT.reserve::<ProcessExitEvent>(0) {
let exit_event = ProcessExitEvent {
pid: tgid,
Expand All @@ -1406,6 +1438,25 @@ pub unsafe fn handle_process_exit<C: EbpfContext>(ctx: C) {
}
}

/// Handle sched_process_exec tracepoint for process exec monitoring.
/// Sends a ProcessExecEvent when a process calls execve(), enabling
/// proactive DWARF table loading and metadata cache invalidation.
///
/// Note: sched_process_exec only fires once per execve() (not per-thread),
/// so no tid == tgid filter is needed here.
#[inline(always)]
pub unsafe fn handle_process_exec<C: EbpfContext>(ctx: C) {
use profile_bee_common::ProcessExecEvent;

let tgid = ctx.tgid();

if let Some(mut entry) = RING_BUF_PROCESS_EXEC.reserve::<ProcessExecEvent>(0) {
let exec_event = ProcessExecEvent { pid: tgid, _pad: 0 };
let _writable = entry.write(exec_event);
entry.submit(0);
}
}

// Make this simple now - checking for valid pointers can include
// checking with stack pointer address or getting valid ranges
// from from /proc/[pid]/maps
Expand Down
10 changes: 9 additions & 1 deletion profile-bee-ebpf/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use aya_ebpf::{
use profile_bee_ebpf::{
collect_off_cpu_trace, collect_trace, collect_trace_raw_syscall,
collect_trace_raw_syscall_exit, collect_trace_raw_tp_with_task_regs,
collect_trace_stackid_only, dwarf_unwind_step_impl, handle_process_exit,
collect_trace_stackid_only, dwarf_unwind_step_impl, handle_process_exec, handle_process_exit,
};

#[perf_event]
Expand Down Expand Up @@ -96,6 +96,14 @@ pub fn tracepoint_process_exit(ctx: TracePointContext) -> u32 {
0
}

/// Tracepoint for monitoring process exec events.
/// Detects execve() calls for proactive DWARF loading and cache invalidation.
#[tracepoint(category = "sched", name = "sched_process_exec")]
pub fn tracepoint_process_exec(ctx: TracePointContext) -> u32 {
unsafe { handle_process_exec(ctx) }
0
}

/// DWARF unwind step program — tail-call target for deep stack unwinding.
/// Not attached to any perf event directly; only called via PROG_ARRAY tail call
/// from collect_trace (perf_event context). Unwinds FRAMES_PER_TAIL_CALL frames
Expand Down
43 changes: 29 additions & 14 deletions profile-bee/bin/profile-bee.rs
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,7 @@ async fn main() -> std::result::Result<(), anyhow::Error> {
group_by_process: opt.group_by_process,
monitor_exit_pid,
tgid_request_tx,
enable_process_metadata: false,
};
let mut event_loop = ProfilingEventLoop::new(
ebpf_profiler.counts,
Expand Down Expand Up @@ -892,20 +893,34 @@ fn spawn_profiling_thread(
tracing::warn!("{:#}", e);
}
}
Ok(PerfWork::ProcessExit(exit_event)) => {
// Forward to DWARF thread for LPM trie cleanup
if let Some(tx) = &tgid_request_tx {
let _ = tx.send(DwarfThreadMsg::ProcessExited(exit_event.pid));
}
// Allow PID reuse to trigger a fresh LoadProcess
known_tgids.remove(&exit_event.pid);
// Only stop profiling if this is the monitored target process
if Some(exit_event.pid) == monitor_exit_pid {
tracing::info!(
"target process {} exited, stopping TUI",
exit_event.pid
);
return;
Ok(PerfWork::ProcessEvent(event)) => {
use profile_bee_common::{PROCESS_EVENT_EXEC, PROCESS_EVENT_EXIT};
match event.event_type {
PROCESS_EVENT_EXIT => {
// Forward to DWARF thread for LPM trie cleanup
if let Some(tx) = &tgid_request_tx {
let _ = tx.send(DwarfThreadMsg::ProcessExited(event.pid));
}
// Allow PID reuse to trigger a fresh LoadProcess
known_tgids.remove(&event.pid);
// Only stop profiling if this is the monitored target process
if Some(event.pid) == monitor_exit_pid {
tracing::info!(
"target process {} exited, stopping TUI",
event.pid
);
return;
}
}
PROCESS_EVENT_EXEC => {
tracing::debug!("process {} called exec", event.pid);
if let Some(tx) = &tgid_request_tx {
let _ = tx.send(DwarfThreadMsg::ProcessExeced(event.pid));
}
known_tgids.remove(&event.pid);
known_tgids.insert(event.pid);
}
_ => {}
}
}
Ok(PerfWork::Stop) => return,
Expand Down
Binary file modified profile-bee/ebpf-bin/profile-bee.bpf.o
Binary file not shown.
17 changes: 17 additions & 0 deletions profile-bee/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,4 +168,21 @@ impl PointerStackFramesCache {
hits as f64 / self.total as f64 * 100.0
)
}

/// Remove all cached entries for a specific process (tgid).
///
/// Called when a process calls execve() — the binary image changed so
/// all cached symbol resolutions for that PID are stale.
pub fn invalidate_pid(&mut self, tgid: u32) {
// Collect keys to remove (can't mutate while iterating)
let keys_to_remove: Vec<(u32, i32, i32)> = self
.map
.iter()
.filter(|(&(t, _, _), _)| t == tgid)
.map(|(&k, _)| k)
.collect();
for key in keys_to_remove {
self.map.pop(&key);
}
}
}
Loading
Loading