From 8fa4f6fa3296c9a660da19883062596d6c649f75 Mon Sep 17 00:00:00 2001 From: Wayland Yang Date: Tue, 23 Jun 2026 22:09:18 +0800 Subject: [PATCH] fix(py-sdk): robust target parsing + empty-response handling in Sandbox._send MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #256 (thanks @Bug-Keeper — accurate report, exact code citations). Two real defects in sdk/python/forkd/sandbox.py `_send`: 1. `host, _, port_s = self.target.rpartition(":")` mangled IPv6 targets: `[::1]:8888` → host `"[::1]"` (brackets kept), which `socket.create_connection` can't resolve. Latent today (default is `10.42.0.2:8888`) but `target` / `FORKD_TARGET` is user-configurable. Fixed with a `_split_host_port` helper using `urllib.parse.urlsplit`, which strips the brackets and raises a clear ValueError on a malformed target instead of a bare `int()` failure. 2. `return json.loads(buf.decode())` on an empty `buf` raised an opaque JSONDecodeError when the agent closed the connection without replying (crash / mid-response death). Now raises a plain "empty response" RuntimeError — matching the TS SDK's empty-body handling in controller.ts. Also turns a recv timeout into a clear TimeoutError rather than a bare socket.timeout. (The issue title says "loses data with custom timeout" — the actual defects are the two above; no data loss, but the report's substance and proposed fixes were correct.) Adds sdk/python/tests/ (the SDK had no tests — only example.py) with 12 regression cases covering IPv4/IPv6/hostname/malformed parsing and the empty-response + roundtrip wire behavior against an in-process socket server (no firecracker/daemon needed). New `sdk-python` CI job installs the package and runs them so they can't rot. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/ci.yml | 16 ++++++ sdk/python/forkd/sandbox.py | 42 ++++++++++++++-- sdk/python/tests/test_send.py | 94 +++++++++++++++++++++++++++++++++++ 3 files changed, 148 insertions(+), 4 deletions(-) create mode 100644 sdk/python/tests/test_send.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a0c851a..155b754 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,3 +42,19 @@ jobs: python -m py_compile echo_server.py - name: run.sh shellcheck-style parse run: bash -n bench/pause-window/run.sh + + sdk-python: + # SDK unit tests that need no firecracker/daemon — target parsing + # and the _send wire protocol against an in-process socket server. + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: install + pytest + working-directory: sdk/python + run: | + python -m pip install --quiet pytest + python -m pip install --quiet -e . + python -m pytest tests/ -q diff --git a/sdk/python/forkd/sandbox.py b/sdk/python/forkd/sandbox.py index 11e391c..60f7df8 100644 --- a/sdk/python/forkd/sandbox.py +++ b/sdk/python/forkd/sandbox.py @@ -9,7 +9,27 @@ import subprocess import time from dataclasses import dataclass -from typing import Optional, Sequence, Union +from typing import Optional, Sequence, Tuple, Union +from urllib.parse import urlsplit + + +def _split_host_port(target: str) -> Tuple[str, int]: + """Split a ``host:port`` target into ``(host, port)``. + + Uses ``urlsplit`` rather than ``rpartition(":")`` so IPv6 literals + like ``[::1]:8888`` parse correctly — ``urlsplit`` strips the + brackets, handing the bare ``::1`` to ``socket.create_connection`` + (which a naive last-colon split would not, leaving the brackets in + the host string). Raises ``ValueError`` with an actionable message + on a malformed target instead of a bare ``int()`` ValueError. + """ + parts = urlsplit("//" + target) + if parts.hostname is None or parts.port is None: + raise ValueError( + f"invalid forkd target {target!r}; expected host:port " + "(e.g. 10.42.0.2:8888 or [::1]:8888)" + ) + return parts.hostname, parts.port @dataclass @@ -205,16 +225,30 @@ def _spawn(self) -> None: ) def _send(self, msg: dict) -> dict: - host, _, port_s = self.target.rpartition(":") - port = int(port_s) + host, port = _split_host_port(self.target) with socket.create_connection((host, port), timeout=5) as s: s.settimeout(self.timeout + 5) s.sendall((json.dumps(msg) + "\n").encode()) s.shutdown(socket.SHUT_WR) buf = bytearray() while True: - chunk = s.recv(65536) + try: + chunk = s.recv(65536) + except socket.timeout as e: + raise TimeoutError( + f"forkd: no response from guest agent at {self.target} " + f"within {self.timeout + 5}s" + ) from e if not chunk: break buf.extend(chunk) + # An empty body means the agent closed the connection without + # replying (crash / mid-response death). Surface that plainly + # rather than a baffling JSONDecodeError on ``json.loads("")`` — + # matches the TS SDK's empty-body handling in controller.ts. + if not buf: + raise RuntimeError( + f"forkd: empty response from guest agent at {self.target} " + "(connection closed before any data — the agent may have crashed)" + ) return json.loads(buf.decode()) diff --git a/sdk/python/tests/test_send.py b/sdk/python/tests/test_send.py new file mode 100644 index 0000000..03b2fe6 --- /dev/null +++ b/sdk/python/tests/test_send.py @@ -0,0 +1,94 @@ +"""Regression tests for Sandbox._send / target parsing (issue #256). + +These cover the two bugs in the original `_send`: + 1. `rpartition(":")` mangled IPv6 targets — fixed via `_split_host_port`. + 2. an empty response body raised a baffling `JSONDecodeError` instead + of a clear "agent closed the connection" error. + +No live guest agent needed: `_split_host_port` is a pure function, and +`_send` is exercised against a tiny in-process TCP server. `Sandbox` is +constructed with `spawn=False` so nothing forks. +""" +import json +import socket +import threading + +import pytest + +from forkd.sandbox import Sandbox, _split_host_port + + +# ---- _split_host_port (bug 1: IPv6) ---------------------------------------- + +def test_split_ipv4(): + assert _split_host_port("10.42.0.2:8888") == ("10.42.0.2", 8888) + + +def test_split_hostname(): + assert _split_host_port("agent.local:9000") == ("agent.local", 9000) + + +def test_split_ipv6_bracketed(): + # The original rpartition(":") left brackets in the host; urlsplit + # strips them so the bare address reaches create_connection. + assert _split_host_port("[::1]:8888") == ("::1", 8888) + + +def test_split_ipv6_full(): + assert _split_host_port("[2001:db8::1]:443") == ("2001:db8::1", 443) + + +@pytest.mark.parametrize("bad", ["nohost", "10.42.0.2", "host:", ":8888", "host:notaport"]) +def test_split_malformed_raises_valueerror(bad): + with pytest.raises(ValueError): + _split_host_port(bad) + + +# ---- _send against a local server ------------------------------------------ + +def _serve_once(reply: bytes): + """Start a one-shot TCP server on 127.0.0.1; return its 'host:port'. + Each connection gets `reply` then the socket closes.""" + srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + srv.bind(("127.0.0.1", 0)) + srv.listen(1) + host, port = srv.getsockname() + + def handle(): + conn, _ = srv.accept() + with conn: + # Drain the request (the client SHUT_WR after sending). + while conn.recv(65536): + pass + if reply: + conn.sendall(reply) + srv.close() + + threading.Thread(target=handle, daemon=True).start() + return f"{host}:{port}" + + +def _sandbox_for(target: str) -> Sandbox: + return Sandbox(target=target, timeout=2, spawn=False) + + +def test_send_roundtrip_ok(): + target = _serve_once((json.dumps({"ok": True, "echo": 1}) + "\n").encode()) + sb = _sandbox_for(target) + assert sb._send({"ping": 1}) == {"ok": True, "echo": 1} + + +def test_send_empty_response_raises_clear_error(): + # bug 2: server closes without replying. The RuntimeError("empty + # response") proves we no longer surface a baffling JSONDecodeError + # from json.loads(""). + target = _serve_once(b"") + sb = _sandbox_for(target) + with pytest.raises(RuntimeError, match="empty response"): + sb._send({"ping": 1}) + + +def test_send_invalid_target_raises_before_connect(): + sb = _sandbox_for("not-a-target") + with pytest.raises(ValueError, match="invalid forkd target"): + sb._send({"ping": 1})