Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions init/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -1552,6 +1552,14 @@ int main(int argc, char **argv)
if (enable_dummy_interface() < 0) {
printf("Warning: Couldn't enable dummy interface\n");
}

/* Allow unprivileged ICMP ping sockets (SOCK_DGRAM + IPPROTO_ICMP)
* for all GIDs so that TSI can hijack and proxy them to the host. */
int ping_fd = open("/proc/sys/net/ipv4/ping_group_range", O_WRONLY);
if (ping_fd >= 0) {
write(ping_fd, "0 2147483647\n", 13);
close(ping_fd);
}
}
#endif

Expand Down
3 changes: 2 additions & 1 deletion src/devices/src/virtio/vsock/muxer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ impl VsockMuxer {
}
}
defs::SOCK_DGRAM => {
debug!("proxy create dgram");
debug!("proxy create dgram (protocol={})", req.protocol);
let id = ((req.peer_port as u64) << 32) | (defs::TSI_PROXY_PORT as u64);
if req.family as i32 == libc::AF_UNIX
&& !self.tsi_flags.contains(TsiFlags::HIJACK_UNIX)
Expand All @@ -335,6 +335,7 @@ impl VsockMuxer {
self.cid,
req.family,
req.peer_port,
req.protocol,
mem.clone(),
queue.clone(),
self.rxq.clone(),
Expand Down
34 changes: 22 additions & 12 deletions src/devices/src/virtio/vsock/packet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ pub struct TsiProxyCreate {
pub peer_port: u32,
pub family: u16,
pub _type: u16,
pub protocol: u16,
}

#[repr(C)]
Expand Down Expand Up @@ -625,19 +626,28 @@ impl VsockPacket {
}

pub fn read_proxy_create(&self) -> Option<TsiProxyCreate> {
if self.buf_size >= 6 {
let peer_port: u32 = byte_order::read_le_u32(&self.buf().unwrap()[0..]);
let family: u16 = byte_order::read_le_u16(&self.buf().unwrap()[4..]);
let _type: u16 = byte_order::read_le_u16(&self.buf().unwrap()[6..]);

Some(TsiProxyCreate {
peer_port,
family,
_type,
})
let buf = self.buf()?;
if buf.len() < 8 {
return None;
}

let peer_port: u32 = byte_order::read_le_u32(&buf[0..]);
let family: u16 = byte_order::read_le_u16(&buf[4..]);
let _type: u16 = byte_order::read_le_u16(&buf[6..]);
// Protocol field added for ICMP ping socket support. Old guests
// that don't send it get 0 (= default, same as before).
let protocol: u16 = if buf.len() >= 10 {
byte_order::read_le_u16(&buf[8..])
} else {
None
}
0
};

Some(TsiProxyCreate {
peer_port,
family,
_type,
protocol,
})
}

pub fn read_connect_req(&self) -> Option<TsiConnectReq> {
Expand Down
45 changes: 39 additions & 6 deletions src/devices/src/virtio/vsock/tsi_dgram.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use nix::fcntl::{fcntl, FcntlArg, OFlag};
use nix::sys::socket::UnixAddr;
use nix::sys::socket::{
bind, connect, getpeername, recv, send, sendto, socket, AddressFamily, MsgFlags, SockFlag,
SockType, SockaddrIn, SockaddrLike, SockaddrStorage,
SockProtocol, SockType, SockaddrIn, SockaddrLike, SockaddrStorage,
};

#[cfg(target_os = "macos")]
Expand Down Expand Up @@ -38,6 +38,8 @@ pub struct TsiDgramProxy {
sendto_addr: Option<SockaddrStorage>,
listening: bool,
family: AddressFamily,
#[cfg_attr(not(target_os = "macos"), allow(dead_code))]
protocol: u16,
mem: GuestMemoryMmap,
queue: Arc<Mutex<VirtQueue>>,
rxq: Arc<Mutex<MuxerRxQ>>,
Expand All @@ -48,11 +50,13 @@ pub struct TsiDgramProxy {
}

impl TsiDgramProxy {
#[allow(clippy::too_many_arguments)]
pub fn new(
id: u64,
cid: u64,
family: u16,
peer_port: u32,
protocol: u16,
mem: GuestMemoryMmap,
queue: Arc<Mutex<VirtQueue>>,
rxq: Arc<Mutex<MuxerRxQ>>,
Expand All @@ -65,7 +69,15 @@ impl TsiDgramProxy {
_ => return Err(ProxyError::InvalidFamily),
};

let fd = socket(family, SockType::Datagram, SockFlag::empty(), None)
// When the guest requests IPPROTO_ICMP (1) or IPPROTO_ICMPV6 (58),
// create a ping socket instead of a plain UDP socket.
let sock_protocol = match protocol as _ {
libc::IPPROTO_ICMP => Some(SockProtocol::Icmp),
libc::IPPROTO_ICMPV6 => Some(SockProtocol::IcmpV6),
_ => None,
};

let fd = socket(family, SockType::Datagram, SockFlag::empty(), sock_protocol)
.map_err(ProxyError::CreatingSocket)?;

// macOS forces us to do this here instead of just using SockFlag::SOCK_NONBLOCK above.
Expand Down Expand Up @@ -106,6 +118,7 @@ impl TsiDgramProxy {
sendto_addr: None,
listening: false,
family,
protocol,
mem,
queue,
rxq,
Expand Down Expand Up @@ -170,11 +183,31 @@ impl TsiDgramProxy {
match recv(self.fd.as_raw_fd(), &mut buf[..max_len], MsgFlags::empty()) {
Ok(cnt) => {
debug!("recv cnt={cnt}");
if cnt > 0 {
RecvPkt::Read(cnt)
} else {
RecvPkt::Close
if cnt == 0 {
return RecvPkt::Close;
}

// macOS DGRAM ICMP sockets include the IP header in
// recv, unlike Linux which strips it. Strip the IP
// header (variable length, from the IHL field) so the
// guest sees the same format as a Linux ping socket.
// buf is the guest's RX virtqueue descriptor — writable.
#[cfg(target_os = "macos")]
if matches!(
self.protocol as _,
libc::IPPROTO_ICMP | libc::IPPROTO_ICMPV6
) && cnt >= 20
{
// IHL (Internet Header Length): low 4 bits of first
// byte, in 32-bit words. Typically 5 (= 20 bytes).
let ip_hdr_len = (buf[0] & 0x0F) as usize * 4;
if ip_hdr_len <= cnt {
buf.copy_within(ip_hdr_len..cnt, 0);
return RecvPkt::Read(cnt - ip_hdr_len);
}
}

RecvPkt::Read(cnt)
}
Err(e) => {
debug!("recv_pkt: recv error: {e:?}");
Expand Down
1 change: 1 addition & 0 deletions tests/runner/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ fn run_single_test(
};
let use_buildah_unshare = cfg!(target_os = "linux")
&& std::env::var_os("KRUN_NO_UNSHARE").is_none()
&& !test_case.needs_host_network()
&& has_cmd("buildah")
&& has_cmd("unshare");

Expand Down
14 changes: 14 additions & 0 deletions tests/test_cases/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ use test_net_perf::TestNetPerf;
mod test_multiport_console;
use test_multiport_console::TestMultiportConsole;

mod test_tsi_ping;
use test_tsi_ping::TestTsiPing;

mod test_virtiofs_root_ro;
use test_virtiofs_root_ro::TestVirtiofsRootRo;

Expand Down Expand Up @@ -82,6 +85,7 @@ pub fn test_cases() -> Vec<TestCase> {
TestCase::new("net-tap", Box::new(TestNet::new_tap())),
TestCase::new("net-gvproxy", Box::new(TestNet::new_gvproxy())),
TestCase::new("net-vmnet-helper", Box::new(TestNet::new_vmnet_helper())),
TestCase::new("tsi-ping", Box::new(TestTsiPing)),
TestCase::new("multiport-console", Box::new(TestMultiportConsole)),
TestCase::new("virtiofs-root-ro", Box::new(TestVirtiofsRootRo)),
TestCase::new("virtiofs-misc", Box::new(TestVirtioFsMisc)),
Expand Down Expand Up @@ -221,6 +225,11 @@ pub trait Test {
fn timeout_secs(&self) -> u64 {
15
}

/// Whether this test needs the host's real network (skips unshare --net).
fn needs_host_network(&self) -> bool {
false
}
}

#[guest]
Expand Down Expand Up @@ -257,6 +266,11 @@ impl TestCase {
self.test.timeout_secs()
}

#[host]
pub fn needs_host_network(&self) -> bool {
self.test.needs_host_network()
}

#[allow(dead_code)]
pub fn name(&self) -> &'static str {
self.name
Expand Down
84 changes: 84 additions & 0 deletions tests/test_cases/src/test_tsi_ping.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
use macros::{guest, host};

pub struct TestTsiPing;

#[host]
mod host {
use super::*;
use crate::common::setup_fs_and_enter;
use crate::{krun_call, krun_call_u32};
use crate::{ShouldRun, Test, TestOutcome, TestSetup};
use krun_sys::*;

const CONTAINERFILE: &str = "\
FROM fedora:44
RUN dnf install -y iputils && dnf clean all
";

impl Test for TestTsiPing {
fn rootfs_image(&self) -> Option<&'static str> {
Some(CONTAINERFILE)
}

fn should_run(&self) -> ShouldRun {
ShouldRun::Yes
}

fn timeout_secs(&self) -> u64 {
30
}

fn needs_host_network(&self) -> bool {
true
}

fn start_vm(self: Box<Self>, test_setup: TestSetup) -> anyhow::Result<()> {
unsafe {
krun_call!(krun_set_log_level(KRUN_LOG_LEVEL_TRACE))?;
let ctx = krun_call_u32!(krun_create_ctx())?;
krun_call!(krun_set_vm_config(ctx, 1, 512))?;
setup_fs_and_enter(ctx, test_setup)?;
}
Ok(())
}

fn check(self: Box<Self>, stdout: Vec<u8>, _test_setup: TestSetup) -> TestOutcome {
let output = String::from_utf8(stdout).unwrap_or_default();
if output == "OK\n" {
TestOutcome::Pass
} else {
TestOutcome::Fail(format!("expected {:?}, got {:?}", "OK\n", output))
}
}
}
}

#[guest]
mod guest {
use super::*;
use crate::Test;
use std::process::Command;

impl Test for TestTsiPing {
fn in_guest(self: Box<Self>) {
// Ping an external address so the guest kernel can't satisfy it
// locally — forces the TSI vsock proxy path. Without the
// protocol fix, TSI creates a UDP socket and ping times out.
let output = Command::new("/usr/bin/ping")
.args(["-c", "3", "-W", "2", "8.8.8.8"])
.output()
.expect("Failed to run ping");

if output.status.success() {
println!("OK");
} else {
let stderr = String::from_utf8_lossy(&output.stderr);
let stdout = String::from_utf8_lossy(&output.stdout);
panic!(
"ping failed (exit={}):\nstdout: {}\nstderr: {}",
output.status, stdout, stderr
);
}
}
}
}
Loading