From f36b0403ebed7de3f2b4a8e65c95e7469d0fec5d Mon Sep 17 00:00:00 2001 From: Auke Kok Date: Fri, 6 Mar 2026 10:35:09 -0800 Subject: [PATCH 01/10] Set BLOCK_BIT_ERROR on bio submit failure. When block_submit_bio() fails, set BLOCK_BIT_ERROR so that waiters in wait_event(uptodate_or_error) will wake up rather than waiting indefinitely for a completion. Signed-off-by: Auke Kok --- kmod/src/block.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kmod/src/block.c b/kmod/src/block.c index 463e1b015..8602f18c8 100644 --- a/kmod/src/block.c +++ b/kmod/src/block.c @@ -624,8 +624,10 @@ static struct block_private *block_read(struct super_block *sb, u64 blkno) if (!test_bit(BLOCK_BIT_UPTODATE, &bp->bits) && test_and_clear_bit(BLOCK_BIT_NEW, &bp->bits)) { ret = block_submit_bio(sb, bp, REQ_OP_READ); - if (ret < 0) + if (ret < 0) { + set_bit(BLOCK_BIT_ERROR, &bp->bits); goto out; + } } wait_event(binf->waitq, uptodate_or_error(bp)); From 40f2446eb6e0b0c229d496e5fbe6009a6415bd29 Mon Sep 17 00:00:00 2001 From: Auke Kok Date: Fri, 6 Mar 2026 10:35:28 -0800 Subject: [PATCH 02/10] Add client timeout to farewell completion wait. Replace unbounded wait_for_completion() with a 120 second timeout to prevent indefinite hangs during unmount if the server never responds to the farewell request. Signed-off-by: Auke Kok --- kmod/src/client.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kmod/src/client.c b/kmod/src/client.c index cffd2bfac..246616c38 100644 --- a/kmod/src/client.c +++ b/kmod/src/client.c @@ -646,8 +646,12 @@ void scoutfs_client_destroy(struct super_block *sb) client_farewell_response, NULL, NULL); if (ret == 0) { - wait_for_completion(&client->farewell_comp); - ret = client->farewell_error; + if (!wait_for_completion_timeout(&client->farewell_comp, + 120 * HZ)) { + ret = -ETIMEDOUT; + } else { + ret = client->farewell_error; + } } if (ret) { scoutfs_inc_counter(sb, client_farewell_error); From 0fd79167a72e0999a3f55b556feb997a384626c8 Mon Sep 17 00:00:00 2001 From: Auke Kok Date: Fri, 6 Mar 2026 10:35:48 -0800 Subject: [PATCH 03/10] Wake up lock waiters to prevent hangs during unmount. Add unmounting checks to lock_wait_cond() and lock_key_range() so that lock waiters wake up and new lock requests fail with -ESHUTDOWN during unmount. Replace the unbounded wait_event() with a 60 second timeout to prevent indefinite hangs. Relax the WARN_ON_ONCE at lock_key_range entry to only warn when not unmounting, since late lock attempts during shutdown are expected. Signed-off-by: Auke Kok --- kmod/src/lock.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/kmod/src/lock.c b/kmod/src/lock.c index 63213874b..3bca88ad1 100644 --- a/kmod/src/lock.c +++ b/kmod/src/lock.c @@ -71,6 +71,8 @@ * relative to that lock state we resend. */ +#define CLIENT_LOCK_WAIT_TIMEOUT (60 * HZ) + /* * allocated per-super, freed on unmount. */ @@ -953,6 +955,9 @@ static bool lock_wait_cond(struct super_block *sb, struct scoutfs_lock *lock, !lock->request_pending; spin_unlock(&linfo->lock); + if (!wake) + wake = scoutfs_unmounting(sb); + if (!wake) scoutfs_inc_counter(sb, lock_wait); @@ -997,8 +1002,10 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i return -EINVAL; /* maybe catch _setup() and _shutdown order mistakes */ - if (WARN_ON_ONCE(!linfo || linfo->shutdown)) + if (!linfo || linfo->shutdown) { + WARN_ON_ONCE(!scoutfs_unmounting(sb)); return -ENOLCK; + } /* have to lock before entering transactions */ if (WARN_ON_ONCE(scoutfs_trans_held())) @@ -1024,6 +1031,11 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i break; } + if (scoutfs_unmounting(sb)) { + ret = -ESHUTDOWN; + break; + } + /* the fast path where we can use the granted mode */ if (lock_modes_match(lock->mode, mode)) { lock_inc_count(lock->users, mode); @@ -1067,8 +1079,9 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i if (flags & SCOUTFS_LKF_INTERRUPTIBLE) { ret = wait_event_interruptible(lock->waitq, lock_wait_cond(sb, lock, mode)); - } else { - wait_event(lock->waitq, lock_wait_cond(sb, lock, mode)); + } else if (!wait_event_timeout(lock->waitq, + lock_wait_cond(sb, lock, mode), + CLIENT_LOCK_WAIT_TIMEOUT)) { ret = 0; } From b832c73611165f4c840a0800e1335be353e6bc3d Mon Sep 17 00:00:00 2001 From: Chris Kirby Date: Thu, 29 Jan 2026 08:25:52 -0600 Subject: [PATCH 04/10] Suppress another forced shutdown error message The "server error emptying freed" error was causing a fence-and-reclaim test failure. In this case, the error was -ENOLINK, which we should ignore for messaging purposes. Signed-off-by: Chris Kirby --- kmod/src/server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kmod/src/server.c b/kmod/src/server.c index e47919801..d1c808c56 100644 --- a/kmod/src/server.c +++ b/kmod/src/server.c @@ -638,7 +638,7 @@ static void scoutfs_server_commit_func(struct work_struct *work) ret = scoutfs_alloc_empty_list(sb, &server->alloc, &server->wri, server->meta_freed, server->other_freed); - if (ret) { + if (ret && ret != -ENOLINK) { scoutfs_err(sb, "server error emptying freed: %d", ret); goto out; } From e8ff7671b02b4229d2f7816120b8db72258ac66f Mon Sep 17 00:00:00 2001 From: Auke Kok Date: Tue, 17 Mar 2026 17:45:32 -0700 Subject: [PATCH 05/10] net: break out of sync request wait during unmount Replace unbounded wait_for_completion() in scoutfs_net_sync_request() with a 60 second timeout loop that checks scoutfs_unmounting(). Cancel the queued request before returning -ESHUTDOWN so that sync_response cannot fire on freed stack memory after the caller returns. Signed-off-by: Auke Kok --- kmod/src/net.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/kmod/src/net.c b/kmod/src/net.c index 8e7cdb4cf..11ef1dabf 100644 --- a/kmod/src/net.c +++ b/kmod/src/net.c @@ -1990,8 +1990,9 @@ static int sync_response(struct super_block *sb, * buffer. Errors returned can come from the remote request processing * or local failure to send. * - * The wait for the response is interruptible and can return - * -ERESTARTSYS if it is interrupted. + * The wait for the response uses a 60 second timeout loop that + * checks for unmount, returning -ESHUTDOWN if the mount is + * being torn down. * * -EOVERFLOW is returned if the response message's data_length doesn't * match the caller's resp_len buffer. @@ -2002,6 +2003,7 @@ int scoutfs_net_sync_request(struct super_block *sb, void *resp, size_t resp_len) { struct sync_request_completion sreq; + struct message_send *msend; int ret; u64 id; @@ -2014,8 +2016,21 @@ int scoutfs_net_sync_request(struct super_block *sb, sync_response, &sreq, &id); if (ret == 0) { - wait_for_completion(&sreq.comp); - ret = sreq.error; + while (!wait_for_completion_timeout(&sreq.comp, 60 * HZ)) { + if (scoutfs_unmounting(sb)) { + ret = -ESHUTDOWN; + break; + } + } + if (ret == -ESHUTDOWN) { + spin_lock(&conn->lock); + msend = find_request(conn, cmd, id); + if (msend) + queue_dead_free(conn, msend); + spin_unlock(&conn->lock); + } else { + ret = sreq.error; + } } return ret; From ee607578574a2aa0dd7cb6ccb687a3e8343e2822 Mon Sep 17 00:00:00 2001 From: Auke Kok Date: Wed, 15 Apr 2026 12:14:51 -0700 Subject: [PATCH 06/10] lock: clear coverage and skip invalidation during unmount During normal unmount, lock_invalidate_worker can hang in scoutfs_trans_sync(sb, 1) because the trans commit path may return network errors that cause an infinite retry loop. Skip full lock_invalidate() during shutdown and unmount, and extract lock_clear_coverage() to still clean up coverage items in those paths and in scoutfs_lock_destroy(). Without this, coverage items can remain attached to locks being freed. Signed-off-by: Auke Kok --- kmod/src/lock.c | 54 ++++++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/kmod/src/lock.c b/kmod/src/lock.c index 3bca88ad1..e653cdc46 100644 --- a/kmod/src/lock.c +++ b/kmod/src/lock.c @@ -159,6 +159,33 @@ static void invalidate_inode(struct super_block *sb, u64 ino) } } +/* + * Remove all coverage items from the lock to tell users that their + * cache is stale. This is lock-internal bookkeeping that is safe to + * call during shutdown and unmount. The unconditional unlock/relock + * of cov_list_lock avoids sparse warnings from unbalanced locking in + * the trylock failure path. + */ +static void lock_clear_coverage(struct super_block *sb, + struct scoutfs_lock *lock) +{ + struct scoutfs_lock_coverage *cov; + + spin_lock(&lock->cov_list_lock); + while ((cov = list_first_entry_or_null(&lock->cov_list, + struct scoutfs_lock_coverage, head))) { + if (spin_trylock(&cov->cov_lock)) { + list_del_init(&cov->head); + cov->lock = NULL; + spin_unlock(&cov->cov_lock); + scoutfs_inc_counter(sb, lock_invalidate_coverage); + } + spin_unlock(&lock->cov_list_lock); + spin_lock(&lock->cov_list_lock); + } + spin_unlock(&lock->cov_list_lock); +} + /* * Invalidate caches associated with this lock. Either we're * invalidating a write to a read or we're invalidating to null. We @@ -168,7 +195,6 @@ static void invalidate_inode(struct super_block *sb, u64 ino) static int lock_invalidate(struct super_block *sb, struct scoutfs_lock *lock, enum scoutfs_lock_mode prev, enum scoutfs_lock_mode mode) { - struct scoutfs_lock_coverage *cov; u64 ino, last; int ret = 0; @@ -192,24 +218,7 @@ static int lock_invalidate(struct super_block *sb, struct scoutfs_lock *lock, /* have to invalidate if we're not in the only usable case */ if (!(prev == SCOUTFS_LOCK_WRITE && mode == SCOUTFS_LOCK_READ)) { - /* - * Remove cov items to tell users that their cache is - * stale. The unlock pattern comes from avoiding bad - * sparse warnings when taking else in a failed trylock. - */ - spin_lock(&lock->cov_list_lock); - while ((cov = list_first_entry_or_null(&lock->cov_list, - struct scoutfs_lock_coverage, head))) { - if (spin_trylock(&cov->cov_lock)) { - list_del_init(&cov->head); - cov->lock = NULL; - spin_unlock(&cov->cov_lock); - scoutfs_inc_counter(sb, lock_invalidate_coverage); - } - spin_unlock(&lock->cov_list_lock); - spin_lock(&lock->cov_list_lock); - } - spin_unlock(&lock->cov_list_lock); + lock_clear_coverage(sb, lock); /* invalidate inodes after removing coverage so drop/evict aren't covered */ if (lock->start.sk_zone == SCOUTFS_FS_ZONE) { @@ -716,10 +725,12 @@ static void lock_invalidate_worker(struct work_struct *work) ireq = list_first_entry(&lock->inv_list, struct inv_req, head); nl = &ireq->nl; - /* only lock protocol, inv can't call subsystems after shutdown */ - if (!linfo->shutdown) { + /* only lock protocol, inv can't call subsystems after shutdown or unmount */ + if (!linfo->shutdown && !scoutfs_unmounting(sb)) { ret = lock_invalidate(sb, lock, nl->old_mode, nl->new_mode); BUG_ON(ret < 0 && ret != -ENOLINK); + } else { + lock_clear_coverage(sb, lock); } /* respond with the key and modes from the request, server might have died */ @@ -1663,6 +1674,7 @@ void scoutfs_lock_destroy(struct super_block *sb) list_del_init(&lock->inv_head); lock->invalidate_pending = 0; } + lock_clear_coverage(sb, lock); lock_remove(linfo, lock); lock_free(linfo, lock); } From cd076c6f77e42768450ab4ff365868b3f15238be Mon Sep 17 00:00:00 2001 From: Auke Kok Date: Wed, 15 Apr 2026 12:14:51 -0700 Subject: [PATCH 07/10] Break retry_forever loop on normal unmount retry_forever() only checked scoutfs_forcing_unmount(), so a normal unmount with a network error in the commit path would loop forever. Also check scoutfs_unmounting() so the write worker can exit cleanly. Signed-off-by: Auke Kok --- kmod/src/trans.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kmod/src/trans.c b/kmod/src/trans.c index d131bfa1e..d4ff36451 100644 --- a/kmod/src/trans.c +++ b/kmod/src/trans.c @@ -195,7 +195,8 @@ static int retry_forever(struct super_block *sb, int (*func)(struct super_block retrying = true; } - if (scoutfs_forcing_unmount(sb)) { + if (scoutfs_forcing_unmount(sb) || + scoutfs_unmounting(sb)) { ret = -ENOLINK; break; } From 98f130012669c45f9aed326cfd693cd562922446 Mon Sep 17 00:00:00 2001 From: Auke Kok Date: Wed, 15 Apr 2026 12:06:36 -0700 Subject: [PATCH 08/10] Validate freed ref consistency in dirty_alloc_blocks Add a WARN_ON_ONCE check that the freed list ref blkno matches the block header blkno after dirtying alloc blocks. Also save and restore freed.first_nr on the error path, and initialize av_old/fr_old to 0 so the diagnostic message has valid values. Signed-off-by: Auke Kok --- kmod/src/alloc.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/kmod/src/alloc.c b/kmod/src/alloc.c index 0ceaf3b8b..e33a3e04c 100644 --- a/kmod/src/alloc.c +++ b/kmod/src/alloc.c @@ -24,6 +24,7 @@ #include "trans.h" #include "alloc.h" #include "counters.h" +#include "msg.h" #include "scoutfs_trace.h" /* @@ -496,10 +497,11 @@ static int dirty_alloc_blocks(struct super_block *sb, struct scoutfs_block *fr_bl = NULL; struct scoutfs_block *bl; bool link_orig = false; + __le32 orig_first_nr; u64 av_peek; - u64 av_old; + u64 av_old = 0; u64 fr_peek; - u64 fr_old; + u64 fr_old = 0; int ret; if (alloc->dirty_avail_bl != NULL) @@ -509,6 +511,7 @@ static int dirty_alloc_blocks(struct super_block *sb, /* undo dirty freed if we get an error after */ orig_freed = alloc->freed.ref; + orig_first_nr = alloc->freed.first_nr; if (alloc->dirty_avail_bl != NULL) { ret = 0; @@ -562,6 +565,17 @@ static int dirty_alloc_blocks(struct super_block *sb, /* sort dirty avail to encourage contiguous sorted meta blocks */ list_block_sort(av_bl->data); + lblk = fr_bl->data; + if (WARN_ON_ONCE(alloc->freed.ref.blkno != lblk->hdr.blkno)) { + scoutfs_err(sb, "dirty_alloc freed ref %llu hdr %llu av_old %llu fr_old %llu av_peek %llu fr_peek %llu link_orig %d", + le64_to_cpu(alloc->freed.ref.blkno), + le64_to_cpu(lblk->hdr.blkno), + av_old, fr_old, av_peek, fr_peek, link_orig); + ret = -EIO; + goto out; + } + lblk = NULL; + if (av_old) list_block_add(&alloc->freed, fr_bl->data, av_old); if (fr_old) @@ -578,6 +592,7 @@ static int dirty_alloc_blocks(struct super_block *sb, if (fr_bl) scoutfs_block_writer_forget(sb, wri, fr_bl); alloc->freed.ref = orig_freed; + alloc->freed.first_nr = orig_first_nr; } mutex_unlock(&alloc->mutex); From 0a80704c608af28cfddb726f83ef32247e00d23e Mon Sep 17 00:00:00 2001 From: Auke Kok Date: Wed, 15 Apr 2026 12:06:42 -0700 Subject: [PATCH 09/10] Clear ref_blkno output when block is already dirty block_dirty_ref() skipped setting *ref_blkno when the block was already dirty, leaving the caller with a stale value. Set it to 0 on the already-dirty fast path so callers do not try to free a block that was not allocated. Signed-off-by: Auke Kok --- kmod/src/block.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kmod/src/block.c b/kmod/src/block.c index 8602f18c8..5999681d2 100644 --- a/kmod/src/block.c +++ b/kmod/src/block.c @@ -838,6 +838,8 @@ int scoutfs_block_dirty_ref(struct super_block *sb, struct scoutfs_alloc *alloc, bp = BLOCK_PRIVATE(bl); if (block_is_dirty(bp)) { + if (ref_blkno) + *ref_blkno = 0; ret = 0; goto out; } From a46e70123d9ec35c678cd79e1715721072c756b7 Mon Sep 17 00:00:00 2001 From: Auke Kok Date: Wed, 15 Apr 2026 13:49:03 -0700 Subject: [PATCH 10/10] Warn on block read bio completion timeout Replace the unbounded wait_event() in block_read() with a 120 second timeout that issues a WARN if the bio completion never arrives. A lost completion would otherwise hang silently. Signed-off-by: Auke Kok --- kmod/src/block.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kmod/src/block.c b/kmod/src/block.c index 5999681d2..64712b47f 100644 --- a/kmod/src/block.c +++ b/kmod/src/block.c @@ -630,7 +630,9 @@ static struct block_private *block_read(struct super_block *sb, u64 blkno) } } - wait_event(binf->waitq, uptodate_or_error(bp)); + while (!wait_event_timeout(binf->waitq, uptodate_or_error(bp), 120 * HZ)) + WARN(1, "block read blkno %llu waiting for bio completion\n", + bp->bl.blkno); if (test_bit(BLOCK_BIT_ERROR, &bp->bits)) ret = -EIO; else