From 46f22dae9b0ce475bb7e392f3b2983af90b2ccca Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Mon, 18 May 2026 12:19:27 -0400 Subject: [PATCH 1/2] examples/vsock-latency: reproducer for first-connection vsock delay The first host-to-guest vsock round trip after VM start blocks until libkrun's vsock reaper TTL fires (src/devices/src/virtio/vsock/reaper.rs:10, currently 5 s). The first host connect() succeeds at the unix layer immediately, but the first read() blocks for the full reaper window before libkrun closes the unix socket with EOF. A second connect() immediately afterwards uses a fresh proxy and completes in <1 ms. The reproducer is ~400 lines of C in a new examples/vsock-latency/ subdirectory: * guest.c tiny static AF_VSOCK listener; one ack byte per connect * host.c libkrun setup + a timing thread that prints phase: socket-created + phase: first-roundtrip-ok + (attempts=) warm-roundtrip[0..3]: + * Makefile builds both; "make rootfs" pulls a busybox image into ./rootfs/ via docker (a one-file rootfs is not enough for libkrunfw's init) * README.md what it measures, expected output, root cause The host.c timing loop also honours PROBE_TIMEOUT_MS to apply SO_RCVTIMEO per attempt (simulates a caller that wraps each probe in a deadline). Measured trade-off on libkrunfw 5.4.0 / bundled kernel 6.12.87: PROBE_TIMEOUT_MS first-roundtrip-ok attempts unset ~5340 ms 2 2000 ~2355 ms 2 100 ~425 ms 2 10 ~373 ms 3 Co-Authored-By: Claude Opus 4.7 (1M context) --- examples/vsock-latency/.gitignore | 2 + examples/vsock-latency/Makefile | 55 +++++++ examples/vsock-latency/README.md | 146 +++++++++++++++++++ examples/vsock-latency/guest.c | 64 +++++++++ examples/vsock-latency/host.c | 232 ++++++++++++++++++++++++++++++ 5 files changed, 499 insertions(+) create mode 100644 examples/vsock-latency/.gitignore create mode 100644 examples/vsock-latency/Makefile create mode 100644 examples/vsock-latency/README.md create mode 100644 examples/vsock-latency/guest.c create mode 100644 examples/vsock-latency/host.c diff --git a/examples/vsock-latency/.gitignore b/examples/vsock-latency/.gitignore new file mode 100644 index 000000000..2fc58dd33 --- /dev/null +++ b/examples/vsock-latency/.gitignore @@ -0,0 +1,2 @@ +host +rootfs/ diff --git a/examples/vsock-latency/Makefile b/examples/vsock-latency/Makefile new file mode 100644 index 000000000..72435c6b8 --- /dev/null +++ b/examples/vsock-latency/Makefile @@ -0,0 +1,55 @@ +ARCH = $(shell uname -m) +OS = $(shell uname -s) + +LDFLAGS_x86_64_Linux = -lkrun -lpthread +LDFLAGS_aarch64_Linux = -lkrun -lpthread +LDFLAGS_riscv64_Linux = -lkrun -lpthread + +CFLAGS_HOST = -O2 -g -I../../include +CFLAGS_GUEST = -O2 -static + +ROOTFS_DIR = rootfs + +.PHONY: all demo clean + +all: host $(ROOTFS_DIR)/guest + +host: host.c + gcc -o $@ $< $(CFLAGS_HOST) $(LDFLAGS_$(ARCH)_$(OS)) + +# Minimal rootfs: a statically linked guest binary plus the +# conventional /proc, /sys, /dev mountpoints. No userland needed — +# the guest binary doesn't fork/exec or open anything outside vsock, +# so we don't need /bin, /lib, /etc, or any container image. +$(ROOTFS_DIR)/guest: guest.c + mkdir -p $(ROOTFS_DIR)/proc $(ROOTFS_DIR)/sys $(ROOTFS_DIR)/dev + gcc -o $@ $< $(CFLAGS_GUEST) + +# Run the host a few times at different per-probe timeouts and print +# just the per-attempt / first-roundtrip lines from each run. Shows +# the trade-off: shorter caller-side probe timeout → shorter observed +# cold start, because attempt[1] never completes regardless. +# +# Each iteration is one cold VM boot — they can't share a VM, since +# the whole point of the measurement is cold start. Override +# DEMO_TIMEOUTS to add/remove cases, e.g. `make demo DEMO_TIMEOUTS="'' 2000"` +# to see the 5 s reaper-TTL floor explicitly. +DEMO_TIMEOUTS = '' 2000 100 10 +demo: host $(ROOTFS_DIR)/guest + @exec /dev/null 2>/tmp/vsock-demo.err; \ + grep -E 'attempt\[|first-roundtrip-ok' /tmp/vsock-demo.err \ + | grep -v 'deferring proxy'; \ + done + @rm -f /tmp/vsock-demo.err + @printf '\n' + +clean: + rm -rf host $(ROOTFS_DIR) + rm -f /tmp/vsock-latency-*.sock diff --git a/examples/vsock-latency/README.md b/examples/vsock-latency/README.md new file mode 100644 index 000000000..ca4cd0cef --- /dev/null +++ b/examples/vsock-latency/README.md @@ -0,0 +1,146 @@ +# vsock-latency + +Minimal reproducer for a multi-second one-time delay on the **first +host-to-guest vsock connection** through libkrun. After the first +round trip, all subsequent connections on the same VM are sub-millisecond. + +## What it measures + +`host.c` configures a libkrun microVM whose PID 1 is a tiny static +binary (`guest.c`). The guest binds an AF_VSOCK listener on port 1234 +and writes one ack byte to each accepted connection. The host maps +that vsock port to a host UNIX socket via +`krun_add_vsock_port2(..., listen=true)`, then a timing thread breaks +startup into phases: + +1. **socket-created**: time from `krun_start_enter` until libkrun's + `UnixAcceptorProxy` binds the host-side UNIX socket. +2. **first-roundtrip-ok**: time until the first host `connect()` plus + `read(1 byte)` completes end-to-end — i.e. the first byte actually + arrives through the kernel's virtio-vsock RX path. +3. **warm-roundtrip[0..3]**: four more back-to-back round trips for + comparison. + +## Build + +Prereqs: libkrun installed (`pkg-config --modversion libkrun` works), +`/dev/kvm` writable, gcc with `-static` support (`glibc-static` on +Fedora, `musl-dev` on Alpine, etc.). + +``` +make +``` + +Produces `./host` and `./rootfs/guest`. The rootfs is just the static +guest binary plus empty `proc/`, `sys/`, `dev/` mountpoints — no +container image or external rootfs needed. + +## Run + +``` +./host ./rootfs +``` + +The argument is the directory libkrun mounts as the VM's `/` (via +virtio-fs). It must contain a top-level `guest` binary; `make` +produces that at `./rootfs/guest`. + +## Expected output + +On an affected libkrunfw/kernel combination (e.g. libkrunfw 5.4.0 on +host kernel 6.18, libkrunfw-bundled kernel 6.12.87): + +``` +vsock-latency: rootfs=./rootfs sock=/tmp/vsock-latency-12345.sock +guest: listening on vsock:1234 +phase: socket-created + 343.10 ms +phase: first-roundtrip-ok +5365.22 ms (delta-from-socket=5012.58 ms, attempts=2) +guest: accept[0] ack +warm-roundtrip[0]: 0.22 ms +guest: accept[1] ack +warm-roundtrip[1]: 0.28 ms +guest: accept[2] ack +warm-roundtrip[2]: 0.24 ms +guest: accept[3] ack +warm-roundtrip[3]: 0.22 ms +``` + +The diagnostic is **`delta-from-socket`**: the time between libkrun's +unix socket being ready and the first successful end-to-end round +trip. On this kernel it is ~5 seconds; warm round trips on the same +VM are ~0.3 ms. + +With per-attempt timing enabled (uncomment the inner `fprintf` in +`host.c`) the pattern is: + +``` + attempt[1] +5000.83 ms connect=ok read=EOF + attempt[2] +0.39 ms connect=ok read=ack +``` + +The first host `connect()` succeeds at the unix layer immediately, but +the `read()` blocks for **exactly 5 seconds** before libkrun closes +the unix socket with EOF. The second attempt — on a brand-new unix +connection — succeeds in under a millisecond. + +The 5 s is hardcoded in libkrun: `src/devices/src/virtio/vsock/reaper.rs` +sets `TIMEOUT = Duration::new(5, 0)` for the vsock reaper thread, +which holds proxies in `released_map` for 5 s before actually +removing them. The first vsock leg was refused (guest's AF_VSOCK +accept() not yet ready when libkrun tried to forward), libkrun queued +the proxy for `ProxyRemoval::Deferred`, and the unix socket FD stays +associated with that not-yet-reaped proxy for the full 5 s. + +The corresponding `deferring proxy removal: ` WARN messages are +visible at libkrun log level WARN or higher in the muxer thread +(`muxer_thread.rs:100`). + +### Effect of a caller-side per-probe timeout + +A caller that probes for agent readiness typically wraps each attempt +in some kind of deadline — for example `context.WithTimeout(ctx, X)` +around each round trip in a polling loop. That deadline caps how +long the caller waits for the doomed first attempt before retrying. + +Set `PROBE_TIMEOUT_MS` to apply `SO_RCVTIMEO` to each probe attempt +(simulates a per-probe `WithTimeout(ctx, X)`). The reproducer prints +per-attempt outcomes plus the total wall-clock to first success. + +| `PROBE_TIMEOUT_MS` | first-roundtrip-ok | attempts | attempt[1] outcome | +| --- | --- | --- | --- | +| unset (block to EOF) | ~5340 ms | 2 | EOF at 5007 ms | +| `2000` | ~2355 ms | 2 | timeout at 2022 ms | +| `100` | ~425 ms | 2 | timeout at 101 ms | +| `10` | ~373 ms | 3 | timeout, timeout, ack | + +In every case attempt 1 never completes — it either EOFs at libkrun's +5 s reaper TTL or hits the per-attempt timeout earlier. Attempt 2 is +a fresh unix connection, fresh libkrun proxy, and succeeds in <1 ms. + +A caller polling with a 2 s deadline observes a ~2.4 s cold start +that is entirely the duration of *its own* probe deadline — nothing +in the libkrun + guest path benefits from waiting that long. The +same caller switched to a 100 ms deadline observes ~400 ms total +cold start (vsock), matching console cold-start. + +Run it yourself: + +``` +PROBE_TIMEOUT_MS=2000 ./host ./rootfs # ~2.4s cold start +PROBE_TIMEOUT_MS=100 ./host ./rootfs # ~400ms cold start +``` + +Or run all four columns of the table in one shot: + +``` +make demo +``` + +## Cleanup + +``` +make clean +``` + +Removes `host`, the staged `rootfs/`, and any leftover +`/tmp/vsock-latency-*.sock` from prior runs. diff --git a/examples/vsock-latency/guest.c b/examples/vsock-latency/guest.c new file mode 100644 index 000000000..7bdc3d24d --- /dev/null +++ b/examples/vsock-latency/guest.c @@ -0,0 +1,64 @@ +/* + * Guest side of the vsock first-connection latency reproducer. + * + * Runs as PID 1 inside a libkrun microVM. Binds AF_VSOCK on port 1234, + * accepts ITERATIONS connections, and exits — which makes libkrun shut + * the VM down. + * + * Each accepted connection gets exactly one ack byte. The host times + * connect() + read(1) to measure end-to-end vsock round-trip cost, + * since the host-side connect() returns as soon as libkrun's + * UnixAcceptorProxy accepts on the host — NOT when the guest accepts. + * + * Built static so it only depends on one file in the rootfs. + */ + +#include +#include +#include +#include +#include +#include + +#define VSOCK_PORT 1234 +#define ITERATIONS 5 + +static int die(const char *what) +{ + fprintf(stderr, "guest: %s: %s\n", what, strerror(errno)); + return 1; +} + +int main(void) +{ + int s = socket(AF_VSOCK, SOCK_STREAM, 0); + if (s < 0) + return die("socket(AF_VSOCK)"); + + struct sockaddr_vm a; + memset(&a, 0, sizeof a); + a.svm_family = AF_VSOCK; + a.svm_cid = VMADDR_CID_ANY; + a.svm_port = VSOCK_PORT; + + if (bind(s, (struct sockaddr *)&a, sizeof a) < 0) + return die("bind"); + if (listen(s, 8) < 0) + return die("listen"); + + fprintf(stderr, "guest: listening on vsock:%u\n", VSOCK_PORT); + + for (int i = 0; i < ITERATIONS; i++) { + int c = accept(s, NULL, NULL); + if (c < 0) + return die("accept"); + char ack = 'a'; + if (write(c, &ack, 1) != 1) + return die("write ack"); + fprintf(stderr, "guest: accept[%d] ack\n", i); + close(c); + } + + close(s); + return 0; +} diff --git a/examples/vsock-latency/host.c b/examples/vsock-latency/host.c new file mode 100644 index 000000000..b86c482c1 --- /dev/null +++ b/examples/vsock-latency/host.c @@ -0,0 +1,232 @@ +/* + * Reproducer for a multi-second one-time delay on the first + * host-to-guest vsock connection through libkrun. + * + * Observed at the downstream that reported this issue: vsock cold-start + * (mgr.Start through first agent round trip) takes ~2.4 s, while the same + * cold-start over virtio-console takes ~0.5 s. Once the VM is warm, + * per-connection vsock cost matches console (~10 ms). + * + * Lifecycle: + * - configure a libkrun microVM with krun_add_vsock_port2(listen=true) + * - the guest binary (statically linked, staged by the Makefile into + * the rootfs at /guest) binds AF_VSOCK:1234 and accepts connections, + * writing one ack byte per connection + * - the host program spawns a timing thread, then calls krun_start_enter + * - the timing thread breaks startup into three phases (see "phase:" / + * "warm-roundtrip[]:" lines printed to stderr) + * + * On affected libkrunfw/kernel combos the "first-roundtrip-ok" phase + * shows a multi-second jump relative to the "socket-created" phase, + * even though warm-path round trips on the same VM are sub-millisecond. + * + * Usage: + * ./host + * + * The rootfs dir must contain a `guest` binary at its top level + * (libkrun mounts the dir as the guest VM's / via virtio-fs, so the + * binary appears as /guest inside the guest). `make` produces a + * suitable directory at ./rootfs. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define VSOCK_PORT 1234 +#define WARM_ITERS 4 +#define RETRY_USEC 10000 /* 10 ms */ + +struct timing_args { + char sock_path[108]; /* sun_path is 108 bytes on Linux */ +}; + +static double ms_since(struct timespec *t0) +{ + struct timespec t1; + clock_gettime(CLOCK_MONOTONIC, &t1); + return (t1.tv_sec - t0->tv_sec) * 1e3 + + (t1.tv_nsec - t0->tv_nsec) / 1e6; +} + +static void *timing_thread(void *arg) +{ + struct timing_args *args = arg; + const char *sock_path = args->sock_path; + + /* t_start = thread entry, very close to when krun_start_enter began. */ + struct timespec t_start; + clock_gettime(CLOCK_MONOTONIC, &t_start); + + /* Phase 1: wait for libkrun's UnixAcceptorProxy to bind the socket. */ + while (access(sock_path, F_OK) != 0) { + if (errno != ENOENT) { + perror("timing: access"); + return NULL; + } + usleep(RETRY_USEC); + } + double ms_sock_exists = ms_since(&t_start); + + struct sockaddr_un addr; + memset(&addr, 0, sizeof addr); + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, sock_path, sizeof addr.sun_path - 1); + + /* + * Phase 2: retry connect+ack-read until a full round trip succeeds. + * + * The host's connect() returns as soon as libkrun's + * UnixAcceptorProxy on the host accepts, well before the guest's + * AF_VSOCK accept(). If the guest's listener isn't up yet, the vsock + * leg gets refused and the host's read() returns 0 (EOF). Only a + * complete round trip (connect → byte from guest) proves the + * connection actually traversed the virtio-vsock RX path. + */ + /* + * Per-attempt read timeout, simulating a caller that wraps each + * probe in a deadline (e.g. context.WithTimeout). Set via env to + * study the effect of shorter probe deadlines on observed + * cold-start time. 0 = no timeout (block until libkrun closes + * the socket via the reaper TTL). + */ + int timeout_ms = 0; + if (getenv("PROBE_TIMEOUT_MS")) + timeout_ms = atoi(getenv("PROBE_TIMEOUT_MS")); + struct timeval rcvto = { + .tv_sec = timeout_ms / 1000, + .tv_usec = (timeout_ms % 1000) * 1000, + }; + + int attempts = 0; + for (;;) { + attempts++; + struct timespec ta; + clock_gettime(CLOCK_MONOTONIC, &ta); + + int fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) { perror("timing: socket"); return NULL; } + if (timeout_ms > 0) + setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &rcvto, sizeof rcvto); + + int connect_ok = connect(fd, (struct sockaddr *)&addr, sizeof addr) == 0; + const char *outcome = "skip"; + ssize_t n = -2; + if (connect_ok) { + char ack; + n = read(fd, &ack, 1); + if (n == 1) outcome = "ack"; + else if (n == 0) outcome = "EOF"; + else if (errno == EAGAIN || errno == EWOULDBLOCK) outcome = "timeout"; + else outcome = "err"; + } + close(fd); + fprintf(stderr, " attempt[%d] %+7.2f ms read=%s\n", + attempts, ms_since(&ta), outcome); + if (n == 1) break; + usleep(RETRY_USEC); + } + double ms_first_roundtrip = ms_since(&t_start); + + fprintf(stderr, + "phase: socket-created +%7.2f ms\n", ms_sock_exists); + fprintf(stderr, + "phase: first-roundtrip-ok +%7.2f ms" + " (delta-from-socket=%.2f ms, attempts=%d)\n", + ms_first_roundtrip, + ms_first_roundtrip - ms_sock_exists, + attempts); + + /* Phase 3: warm-path round trips for comparison. */ + for (int i = 0; i < WARM_ITERS; i++) { + struct timespec t0; + clock_gettime(CLOCK_MONOTONIC, &t0); + int fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) { perror("warm: socket"); return NULL; } + if (connect(fd, (struct sockaddr *)&addr, sizeof addr) < 0) { + perror("warm: connect"); close(fd); return NULL; + } + char ack; + if (read(fd, &ack, 1) != 1) { + fprintf(stderr, "warm: short read on iter %d\n", i); + close(fd); + return NULL; + } + close(fd); + fprintf(stderr, "warm-roundtrip[%d]: %7.2f ms\n", + i, ms_since(&t0)); + } + fflush(stderr); + + return NULL; +} + +static int check(int err, const char *what) +{ + if (err) { + errno = -err; + perror(what); + return -1; + } + return 0; +} + +int main(int argc, char *argv[]) +{ + if (argc != 2) { + fprintf(stderr, "usage: %s \n", argv[0]); + return 2; + } + const char *rootfs = argv[1]; + + struct timing_args targs; + snprintf(targs.sock_path, sizeof targs.sock_path, + "/tmp/vsock-latency-%d.sock", (int)getpid()); + /* Clear any stale socket from a previous crashed run. */ + unlink(targs.sock_path); + + krun_set_log_level(2 /* KRUN_LOG_LEVEL_WARN */); + + int ctx = krun_create_ctx(); + if (ctx < 0) { + errno = -ctx; + perror("krun_create_ctx"); + return 1; + } + + if (check(krun_set_vm_config(ctx, 1, 1024), "krun_set_vm_config")) return 1; + if (check(krun_set_root(ctx, rootfs), "krun_set_root")) return 1; + if (check(krun_add_vsock_port2(ctx, VSOCK_PORT, targs.sock_path, true), + "krun_add_vsock_port2")) return 1; + + const char *guest_argv[] = { "guest", NULL }; + const char *guest_envp[] = { NULL }; + if (check(krun_set_exec(ctx, "/guest", guest_argv, guest_envp), + "krun_set_exec")) return 1; + + pthread_t tid; + if (pthread_create(&tid, NULL, timing_thread, &targs) != 0) { + perror("pthread_create"); + return 1; + } + pthread_detach(tid); + + fprintf(stderr, "vsock-latency: rootfs=%s sock=%s\n", + rootfs, targs.sock_path); + + /* + * krun_start_enter never returns: it exits the process when the + * guest exits. The unix socket and any leftover state are cleaned + * up by `make clean`. + */ + if (check(krun_start_enter(ctx), "krun_start_enter")) return 1; + return 0; +} From 12033e75f0f948196f1c144febff7de29c9106d1 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Mon, 18 May 2026 14:12:09 -0400 Subject: [PATCH 2/2] get rid of /dev, /proc /sys, not needed. --- examples/vsock-latency/Makefile | 10 +++++----- examples/vsock-latency/README.md | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/vsock-latency/Makefile b/examples/vsock-latency/Makefile index 72435c6b8..690bc2128 100644 --- a/examples/vsock-latency/Makefile +++ b/examples/vsock-latency/Makefile @@ -17,12 +17,12 @@ all: host $(ROOTFS_DIR)/guest host: host.c gcc -o $@ $< $(CFLAGS_HOST) $(LDFLAGS_$(ARCH)_$(OS)) -# Minimal rootfs: a statically linked guest binary plus the -# conventional /proc, /sys, /dev mountpoints. No userland needed — -# the guest binary doesn't fork/exec or open anything outside vsock, -# so we don't need /bin, /lib, /etc, or any container image. +# Minimal rootfs: just the statically linked guest binary. No userland +# needed — the guest binary doesn't fork/exec or open anything outside +# vsock, and libkrunfw's init doesn't require /proc, /sys, or /dev to +# exist as mountpoints in the rootfs. $(ROOTFS_DIR)/guest: guest.c - mkdir -p $(ROOTFS_DIR)/proc $(ROOTFS_DIR)/sys $(ROOTFS_DIR)/dev + mkdir -p $(ROOTFS_DIR) gcc -o $@ $< $(CFLAGS_GUEST) # Run the host a few times at different per-probe timeouts and print diff --git a/examples/vsock-latency/README.md b/examples/vsock-latency/README.md index ca4cd0cef..93e3102ac 100644 --- a/examples/vsock-latency/README.md +++ b/examples/vsock-latency/README.md @@ -31,9 +31,9 @@ Fedora, `musl-dev` on Alpine, etc.). make ``` -Produces `./host` and `./rootfs/guest`. The rootfs is just the static -guest binary plus empty `proc/`, `sys/`, `dev/` mountpoints — no -container image or external rootfs needed. +Produces `./host` and `./rootfs/guest`. The rootfs is just the +single static guest binary — no userland, no `/proc` or `/dev` +mountpoints, no container image needed. ## Run