From c6a9d1846adfb2f01ee8d7d53119e562ea01b988 Mon Sep 17 00:00:00 2001 From: Rick Morgans Date: Sat, 14 Mar 2026 22:50:34 +1030 Subject: [PATCH 1/3] fix: retry partial writes in packet and push paths Extract write_all() and use it everywhere a socket write must be complete-or-fail. Fixes spurious disconnects under memory pressure or signal interruption that returns a short count. Co-Authored-By: Claude Opus 4.6 (1M context) --- attach.c | 79 +++++++++++++++++++++++++++----------------------------- 1 file changed, 38 insertions(+), 41 deletions(-) diff --git a/attach.c b/attach.c index 2de74ac..8630e8d 100644 --- a/attach.c +++ b/attach.c @@ -27,8 +27,9 @@ char const *clear_csi_data(void) return "\033[999H\r\n"; } -/* Write buf to fd handling partial writes. Exit on failure. */ -void write_buf_or_fail(int fd, const void *buf, size_t count) +/* Write all of buf to fd, retrying on short writes and EINTR. +** Returns 0 on success, -1 on failure (errno is set). */ +static int write_all(int fd, const void *buf, size_t count) { while (count != 0) { ssize_t ret = write(fd, buf, count); @@ -36,49 +37,49 @@ void write_buf_or_fail(int fd, const void *buf, size_t count) if (ret >= 0) { buf = (const char *)buf + ret; count -= ret; - } else if (ret < 0 && errno == EINTR) + } else if (errno == EINTR) continue; - else { - if (session_start) { - char age[32]; - session_age(age, sizeof(age)); - printf - ("%s[%s: session '%s' write failed after %s]\r\n", - clear_csi_data(), progname, - session_shortname(), age); - } else { - printf("%s[%s: write failed]\r\n", - clear_csi_data(), progname); - } - exit(1); + else + return -1; + } + return 0; +} + +/* Write buf to fd handling partial writes. Exit on failure. */ +void write_buf_or_fail(int fd, const void *buf, size_t count) +{ + if (write_all(fd, buf, count) < 0) { + if (session_start) { + char age[32]; + session_age(age, sizeof(age)); + printf + ("%s[%s: session '%s' write failed after %s]\r\n", + clear_csi_data(), progname, + session_shortname(), age); + } else { + printf("%s[%s: write failed]\r\n", + clear_csi_data(), progname); } + exit(1); } } /* Write pkt to fd. Exit on failure. */ void write_packet_or_fail(int fd, const struct packet *pkt) { - while (1) { - ssize_t ret = write(fd, pkt, sizeof(struct packet)); - - if (ret == sizeof(struct packet)) - return; - else if (ret < 0 && errno == EINTR) - continue; - else { - if (session_start) { - char age[32]; - session_age(age, sizeof(age)); - printf - ("%s[%s: session '%s' write failed after %s]\r\n", - clear_csi_data(), progname, - session_shortname(), age); - } else { - printf("%s[%s: write failed]\r\n", - clear_csi_data(), progname); - } - exit(1); + if (write_all(fd, pkt, sizeof(struct packet)) < 0) { + if (session_start) { + char age[32]; + session_age(age, sizeof(age)); + printf + ("%s[%s: session '%s' write failed after %s]\r\n", + clear_csi_data(), progname, + session_shortname(), age); + } else { + printf("%s[%s: write failed]\r\n", + clear_csi_data(), progname); } + exit(1); } } @@ -497,11 +498,7 @@ int push_main() } pkt.len = len; - len = write(s, &pkt, sizeof(struct packet)); - if (len != sizeof(struct packet)) { - if (len >= 0) - errno = EPIPE; - + if (write_all(s, &pkt, sizeof(struct packet)) < 0) { printf("%s: %s: %s\n", progname, sockname, strerror(errno)); return 1; From 851934a7678f4c6cada8f8a5c3adc8b93355a57e Mon Sep 17 00:00:00 2001 From: Rick Morgans Date: Sat, 14 Mar 2026 23:05:18 +1030 Subject: [PATCH 2/3] fix: use write_all in send_kill (missed in initial fix) send_kill() still used a bare write() for the kill packet. Apply the same write_all() retry loop as push and attach paths. Co-Authored-By: Claude Opus 4.6 (1M context) --- attach.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/attach.c b/attach.c index 8630e8d..f49ac9a 100644 --- a/attach.c +++ b/attach.c @@ -518,9 +518,9 @@ static int send_kill(int sig) memset(&pkt, 0, sizeof(pkt)); pkt.type = MSG_KILL; pkt.len = (unsigned char)sig; - ret = write(s, &pkt, sizeof(pkt)); + ret = write_all(s, &pkt, sizeof(pkt)); close(s); - return (ret == sizeof(pkt)) ? 0 : -1; + return ret; } static int session_gone(void) From 6b8c395e35c6c5d72b3293674f17c71ffd72ceb7 Mon Sep 17 00:00:00 2001 From: Rick Morgans Date: Sat, 14 Mar 2026 23:15:27 +1030 Subject: [PATCH 3/3] fix: treat write()=0 as failure, add regression tests - write_all: return -1 with errno=EIO when write() returns 0 (no progress), preventing an infinite retry loop - Include fault injection tests (preload_short_write.c) that force short socket writes to deterministically verify the retry logic in push and kill paths Co-Authored-By: Claude Opus 4.6 (1M context) --- attach.c | 10 +++++-- tests/preload_short_write.c | 58 +++++++++++++++++++++++++++++++++++++ tests/test.sh | 57 ++++++++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 3 deletions(-) create mode 100644 tests/preload_short_write.c diff --git a/attach.c b/attach.c index f49ac9a..48fcf70 100644 --- a/attach.c +++ b/attach.c @@ -34,13 +34,17 @@ static int write_all(int fd, const void *buf, size_t count) while (count != 0) { ssize_t ret = write(fd, buf, count); - if (ret >= 0) { + if (ret > 0) { buf = (const char *)buf + ret; count -= ret; - } else if (errno == EINTR) + } else if (ret < 0 && errno == EINTR) continue; - else + else { + /* ret == 0 (no progress) or ret < 0 (real error) */ + if (ret == 0) + errno = EIO; return -1; + } } return 0; } diff --git a/tests/preload_short_write.c b/tests/preload_short_write.c new file mode 100644 index 0000000..11828d3 --- /dev/null +++ b/tests/preload_short_write.c @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include + +static int did_inject; + +static ssize_t real_write(int fd, const void *buf, size_t count) +{ + return syscall(SYS_write, fd, buf, count); +} + +static int should_inject(int fd, size_t count) +{ + struct stat st; + + if (did_inject || count <= 1) + return 0; + if (!getenv("ATCH_FAULT_SHORT_WRITE_ONCE")) + return 0; + if (fstat(fd, &st) < 0) + return 0; + return S_ISSOCK(st.st_mode); +} + +static ssize_t short_write_impl(int fd, const void *buf, size_t count) +{ + if (should_inject(fd, count)) { + did_inject = 1; + return real_write(fd, buf, 1); + } + return real_write(fd, buf, count); +} + +#ifdef __APPLE__ +#define DYLD_INTERPOSE(_replacement, _replacee) \ + __attribute__((used)) static struct { \ + const void *replacement; \ + const void *replacee; \ + } _interpose_##_replacee \ + __attribute__((section("__DATA,__interpose"))) = { \ + (const void *)(unsigned long)&_replacement, \ + (const void *)(unsigned long)&_replacee \ + } + +ssize_t interposed_write(int fd, const void *buf, size_t count) +{ + return short_write_impl(fd, buf, count); +} + +DYLD_INTERPOSE(interposed_write, write); +#else +ssize_t write(int fd, const void *buf, size_t count) +{ + return short_write_impl(fd, buf, count); +} +#endif diff --git a/tests/test.sh b/tests/test.sh index 0c954c0..64b7bc7 100644 --- a/tests/test.sh +++ b/tests/test.sh @@ -720,6 +720,63 @@ assert_contains "no args: shows Usage:" "Usage:" "$out" run "$ATCH" --help assert_contains "help: shows tail command" "tail" "$out" +# ── 23. fault injection: short socket writes are retried ─────────────────── +# Force the first packet write to a socket to complete with 1 byte. +# Verifies write_all() retries correctly instead of treating short writes +# as fatal. + +TESTS_DIR=$(CDPATH= cd -- "$(dirname "$0")" && pwd) +OS_NAME=$(uname -s) + +FAULT_LIB= +build_short_write_injector() { + [ -n "$FAULT_LIB" ] && return 0 + case "$OS_NAME" in + Darwin) + FAULT_LIB="$TESTDIR/libshortwrite.dylib" + cc -dynamiclib -O2 -Wall -o "$FAULT_LIB" \ + "$TESTS_DIR/preload_short_write.c" >/dev/null 2>&1 ;; + *) + FAULT_LIB="$TESTDIR/libshortwrite.so" + cc -shared -fPIC -O2 -Wall -o "$FAULT_LIB" \ + "$TESTS_DIR/preload_short_write.c" -ldl >/dev/null 2>&1 ;; + esac +} + +with_short_socket_write() { + build_short_write_injector || return 1 + case "$OS_NAME" in + Darwin) + env DYLD_INSERT_LIBRARIES="$FAULT_LIB" \ + DYLD_FORCE_FLAT_NAMESPACE=1 \ + ATCH_FAULT_SHORT_WRITE_ONCE=1 "$@" ;; + *) + env LD_PRELOAD="$FAULT_LIB" \ + ATCH_FAULT_SHORT_WRITE_ONCE=1 "$@" ;; + esac +} + +"$ATCH" start short-push sh -c 'cat' +wait_socket short-push +out=$(printf 'short-write-marker\n' | with_short_socket_write \ + "$ATCH" push short-push 2>&1) +prc=$? +assert_exit "fault: push retries short socket write" 0 "$prc" +sleep 0.2 +assert_contains "fault: push data reaches session after short write" \ + "short-write-marker" "$(cat "$HOME/.cache/atch/short-push.log" 2>/dev/null)" +tidy short-push + +"$ATCH" start short-kill sleep 999 +wait_socket short-kill +out=$(with_short_socket_write "$ATCH" kill short-kill 2>&1) +krc=$? +assert_exit "fault: kill retries short socket write" 0 "$krc" +run "$ATCH" list +assert_not_contains "fault: session is gone after short-write kill" \ + "short-kill" "$out" +"$ATCH" kill -f short-kill >/dev/null 2>&1 || true + # ── summary ────────────────────────────────────────────────────────────────── printf "\n1..%d\n" "$T"