Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions lib/vm/src/trap/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ mod traphandlers;

pub use trap::Trap;
pub use traphandlers::{
TrapHandlerFn, VMConfig, catch_traps, on_host_stack, raise_lib_trap, raise_user_trap,
set_stack_size, wasmer_call_trampoline,
MAX_STACK_SIZE, TrapHandlerFn, VMConfig, catch_traps, get_stack_size, get_thread_stack_size,
on_host_stack, raise_lib_trap, raise_user_trap, set_stack_size, set_thread_stack_size,
wasmer_call_trampoline,
};
pub use traphandlers::{init_traps, resume_panic};
pub use wasmer_types::TrapCode;
284 changes: 267 additions & 17 deletions lib/vm/src/trap/traphandlers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,47 @@ struct ucontext_t {
#[cfg(all(unix, not(all(target_arch = "aarch64", target_os = "macos"))))]
use libc::ucontext_t;

/// Default stack size is 1MB.
/// Maximum allowed stack size (100 MB).
pub const MAX_STACK_SIZE: usize = 100 * 1024 * 1024;

/// Sets the process-wide default stack size for new Wasmer coroutines.
/// The value is clamped to [8 KB, 100 MB].
pub fn set_stack_size(size: usize) {
DEFAULT_STACK_SIZE.store(size.clamp(8 * 1024, 100 * 1024 * 1024), Ordering::Relaxed);
DEFAULT_STACK_SIZE.store(size.clamp(8 * 1024, MAX_STACK_SIZE), Ordering::Relaxed);
}

/// Returns the process-wide default stack size in bytes.
pub fn get_stack_size() -> usize {
DEFAULT_STACK_SIZE.load(Ordering::Relaxed)
}

thread_local! {
/// Per-thread override for the coroutine stack size. When `Some`, this
/// takes precedence over the process-wide `DEFAULT_STACK_SIZE` in
/// `catch_traps`. This allows a single call site (e.g. the Stylus retry
/// loop) to request a larger stack without affecting other threads.
static STACK_SIZE_OVERRIDE: Cell<Option<usize>> = const { Cell::new(None) };
}

/// Sets a thread-local stack size override. While `Some`, all Wasmer
/// coroutines created on this thread will use the given size instead of the
/// process-wide default. Pass `None` to clear the override.
/// The value is clamped to [8 KB, MAX_STACK_SIZE] just like `set_stack_size`.
pub fn set_thread_stack_size(size: Option<usize>) {
STACK_SIZE_OVERRIDE.with(|cell| cell.set(size.map(|s| s.clamp(8 * 1024, MAX_STACK_SIZE))));
}

/// Returns the current thread-local stack size override, if any.
pub fn get_thread_stack_size() -> Option<usize> {
STACK_SIZE_OVERRIDE.with(|cell| cell.get())
}

/// Pool of pre-allocated coroutine stacks to avoid repeated mmap syscalls.
/// Each entry is tagged with the size it was allocated at so that callers
/// requesting a larger stack can skip undersized entries instead of reusing them.
static STACK_POOL: LazyLock<crossbeam_queue::SegQueue<(DefaultStack, usize)>> =
LazyLock::new(crossbeam_queue::SegQueue::new);

cfg_if::cfg_if! {
if #[cfg(unix)] {
/// Function which may handle custom signals while processing traps.
Expand Down Expand Up @@ -725,6 +761,15 @@ pub unsafe fn wasmer_call_trampoline(
}
}

/// Resolves the effective stack size from the three-tier priority chain:
/// per-call VMConfig > thread-local override > process-wide default.
fn resolve_stack_size(config: &VMConfig) -> usize {
config
.wasm_stack_size
.or_else(get_thread_stack_size)
.unwrap_or_else(|| DEFAULT_STACK_SIZE.load(Ordering::Relaxed))
}

/// Catches any wasm traps that happen within the execution of `closure`,
/// returning them as a `Result`.
///
Expand All @@ -741,9 +786,7 @@ where
{
// Ensure that per-thread initialization is done.
lazy_per_thread_init()?;
let stack_size = config
.wasm_stack_size
.unwrap_or_else(|| DEFAULT_STACK_SIZE.load(Ordering::Relaxed));
let stack_size = resolve_stack_size(config);
on_wasm_stack(stack_size, trap_handler, closure).map_err(UnwindReason::into_trap)
}

Expand Down Expand Up @@ -971,20 +1014,35 @@ fn on_wasm_stack<F: FnOnce() -> T + 'static, T: 'static>(
trap_handler: Option<*const TrapHandlerFn<'static>>,
f: F,
) -> Result<T, UnwindReason> {
// Allocating a new stack is pretty expensive since it involves several
// system calls. We therefore keep a cache of pre-allocated stacks which
// allows them to be reused multiple times.
// FIXME(Amanieu): We should refactor this to avoid the lock.
static STACK_POOL: LazyLock<crossbeam_queue::SegQueue<DefaultStack>> =
LazyLock::new(crossbeam_queue::SegQueue::new);

let stack = STACK_POOL
.pop()
.unwrap_or_else(|| DefaultStack::new(stack_size).unwrap());
let mut stack = scopeguard::guard(stack, |stack| STACK_POOL.push(stack));
// Try to reuse a pooled stack that is large enough. Undersized stacks are
// collected and returned to the pool after the search so we don't re-pop
// the same entry in a loop.
let stack = {
let mut found = None;
let mut skipped = Vec::new();
while let Some((s, sz)) = STACK_POOL.pop() {
if sz >= stack_size {
found = Some((s, sz));
break;
}
skipped.push((s, sz));
}
for entry in skipped {
STACK_POOL.push(entry);
}
match found {
Some(entry) => entry,
None => {
let s = DefaultStack::new(stack_size)
.map_err(|_| UnwindReason::LibTrap(Trap::oom()))?;
(s, stack_size)
}
}
};
let mut stack = scopeguard::guard(stack, |entry| STACK_POOL.push(entry));

// Create a coroutine with a new stack to run the function on.
let coro = ScopedCoroutine::with_stack(&mut *stack, move |yielder, ()| {
let coro = ScopedCoroutine::with_stack(&mut stack.0, move |yielder, ()| {
// Save the yielder to TLS so that it can be used later.
YIELDER.with(|cell| cell.set(Some(yielder.into())));

Expand Down Expand Up @@ -1174,3 +1232,195 @@ pub fn lazy_per_thread_init() -> Result<(), Trap> {
}
}
}

#[cfg(test)]
mod tests {
use super::*;
use std::sync::{Arc, Barrier};

#[test]
fn thread_local_override_is_isolated() {
// Each thread's override must be invisible to other threads.
let original = get_stack_size();
let barrier = Arc::new(Barrier::new(3));

let b1 = barrier.clone();
let t1 = std::thread::spawn(move || {
assert_eq!(get_thread_stack_size(), None);
set_thread_stack_size(Some(2 * 1024 * 1024));
assert_eq!(get_thread_stack_size(), Some(2 * 1024 * 1024));
b1.wait(); // sync: all threads have set their overrides
// Still our own value — not polluted by t2.
assert_eq!(get_thread_stack_size(), Some(2 * 1024 * 1024));
b1.wait(); // sync: all threads have verified
set_thread_stack_size(None);
assert_eq!(get_thread_stack_size(), None);
});

let b2 = barrier.clone();
let t2 = std::thread::spawn(move || {
assert_eq!(get_thread_stack_size(), None);
set_thread_stack_size(Some(4 * 1024 * 1024));
assert_eq!(get_thread_stack_size(), Some(4 * 1024 * 1024));
b2.wait(); // sync
// Still our own value — not polluted by t1.
assert_eq!(get_thread_stack_size(), Some(4 * 1024 * 1024));
b2.wait(); // sync
set_thread_stack_size(None);
assert_eq!(get_thread_stack_size(), None);
});

// Main thread: no override set, should see None throughout.
barrier.wait(); // sync: t1 and t2 have set overrides
assert_eq!(get_thread_stack_size(), None);
barrier.wait(); // sync: let threads verify

t1.join().unwrap();
t2.join().unwrap();

// Global default must be untouched.
assert_eq!(get_stack_size(), original);
}

#[test]
fn thread_local_override_does_not_affect_global() {
let original = get_stack_size();

set_thread_stack_size(Some(8 * 1024 * 1024));
// Global is unchanged.
assert_eq!(get_stack_size(), original);
assert_eq!(get_thread_stack_size(), Some(8 * 1024 * 1024));

set_thread_stack_size(None);
assert_eq!(get_stack_size(), original);
assert_eq!(get_thread_stack_size(), None);
}

#[test]
fn concurrent_retries_do_not_interfere() {
// Simulate the stylus_call retry pattern on multiple threads:
// each thread bumps its thread-local, "retries", then clears it.
// No thread should see another thread's override.
let original = get_stack_size();
let num_threads = 8;
let barrier = Arc::new(Barrier::new(num_threads));

let handles: Vec<_> = (0..num_threads)
.map(|i| {
let b = barrier.clone();
std::thread::spawn(move || {
let my_size = (i + 1) * 1024 * 1024; // 1MB, 2MB, ..., 8MB

// Phase 1: all threads set different overrides simultaneously.
set_thread_stack_size(Some(my_size));
b.wait();

// Phase 2: verify each thread still sees its own value.
let seen = get_thread_stack_size();
assert_eq!(
seen,
Some(my_size),
"thread {i} expected {my_size}, got {seen:?}"
);
b.wait();

// Phase 3: simulate "retry succeeded" — double and verify.
let doubled = my_size * 2;
set_thread_stack_size(Some(doubled));
b.wait();

let seen = get_thread_stack_size();
assert_eq!(
seen,
Some(doubled),
"thread {i} after doubling: expected {doubled}, got {seen:?}"
);
b.wait();

// Phase 4: clear (like the drop guard in stylus_call).
set_thread_stack_size(None);
assert_eq!(get_thread_stack_size(), None);
})
})
.collect();

for h in handles {
h.join().unwrap();
}

// Global must be untouched after all threads finish.
assert_eq!(get_stack_size(), original);
}

#[test]
fn pool_returns_correctly_sized_stacks() {
// Push stacks of different sizes into the pool, then verify that
// on_wasm_stack picks one that is large enough.
let small = 64 * 1024;
let large = 2 * 1024 * 1024;

// Seed the pool with a small stack.
let s = DefaultStack::new(small).unwrap();
STACK_POOL.push((s, small));

// Request a large stack — the small one should be skipped.
// We can't directly call on_wasm_stack (it needs trap init), but we
// can test the pool search logic by replicating it.
let mut skipped = Vec::new();
let mut found = None;
while let Some((s, sz)) = STACK_POOL.pop() {
if sz >= large {
found = Some((s, sz));
break;
}
skipped.push((s, sz));
}
for entry in skipped {
STACK_POOL.push(entry);
}

// Should not have found anything large enough.
assert!(found.is_none(), "pool should not have a stack >= {large}");

// The small stack should still be in the pool.
let entry = STACK_POOL.pop();
assert!(entry.is_some(), "small stack should still be pooled");
let (_, sz) = entry.unwrap();
assert_eq!(sz, small);
}

#[test]
fn resolve_stack_size_priority_chain() {
let global_default = get_stack_size();

// Case 1: no VMConfig override, no thread-local → uses global default.
set_thread_stack_size(None);
let config = VMConfig {
wasm_stack_size: None,
};
assert_eq!(resolve_stack_size(&config), global_default);

// Case 2: thread-local set, VMConfig None → thread-local wins.
set_thread_stack_size(Some(4 * 1024 * 1024));
let config = VMConfig {
wasm_stack_size: None,
};
assert_eq!(resolve_stack_size(&config), 4 * 1024 * 1024);

// Case 3: both thread-local and VMConfig set → VMConfig wins.
let config = VMConfig {
wasm_stack_size: Some(2 * 1024 * 1024),
};
assert_eq!(resolve_stack_size(&config), 2 * 1024 * 1024);

// Case 4: VMConfig set, no thread-local → VMConfig wins.
set_thread_stack_size(None);
let config = VMConfig {
wasm_stack_size: Some(6 * 1024 * 1024),
};
assert_eq!(resolve_stack_size(&config), 6 * 1024 * 1024);

// Cleanup.
set_thread_stack_size(None);
}
}
Loading