From d1137ab3fb62ead484adb0ccba2403fea9d0d66d Mon Sep 17 00:00:00 2001 From: satyakwok <119509589+satyakwok@users.noreply.github.com> Date: Thu, 4 Jun 2026 19:16:56 +0200 Subject: [PATCH] fix(trie): refresh prune live-set between node and value GC passes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The periodic prune runs on a background-thread clone whose version is frozen. gc_orphaned_nodes GC'd nodes then values off a single live snapshot, but the nodes pass alone runs 10-20 min on a big chain.db. Blocks committed during that pass write new leaf values the snapshot never saw, so the values pass deleted them — testnet halt at h=6,239,526 (orphan value for a hot account leaf, account 0xd98d..., nonce 54). Split GC into gc_nodes + gc_values and re-augment the live set with roots committed during the nodes pass before running the values pass. Shrinks the race window from the full nodes-pass duration to the gap before the values cursor opens its txn. Full atomicity (walk+delete in one RW txn) is the eventual complete fix; boot-time verify_integrity is the backstop. Bumps workspace to 2.2.30. --- Cargo.lock | 38 ++++---- Cargo.toml | 2 +- crates/sentrix-trie/src/storage.rs | 32 ++++++- crates/sentrix-trie/src/tree.rs | 139 ++++++++++++++++++++++------- 4 files changed, 158 insertions(+), 53 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bf89c67a..8cd81057 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5178,7 +5178,7 @@ dependencies = [ [[package]] name = "sentrix" -version = "2.2.28" +version = "2.2.30" dependencies = [ "aes-gcm", "alloy-consensus", @@ -5226,7 +5226,7 @@ dependencies = [ [[package]] name = "sentrix-bft" -version = "2.2.28" +version = "2.2.30" dependencies = [ "bincode", "hex", @@ -5254,7 +5254,7 @@ dependencies = [ [[package]] name = "sentrix-core" -version = "2.2.28" +version = "2.2.30" dependencies = [ "alloy-consensus", "alloy-eips", @@ -5285,7 +5285,7 @@ dependencies = [ [[package]] name = "sentrix-evm" -version = "2.2.28" +version = "2.2.30" dependencies = [ "alloy-primitives", "hex", @@ -5300,7 +5300,7 @@ dependencies = [ [[package]] name = "sentrix-faucet" -version = "2.2.28" +version = "2.2.30" dependencies = [ "anyhow", "axum", @@ -5324,7 +5324,7 @@ dependencies = [ [[package]] name = "sentrix-grpc" -version = "2.2.28" +version = "2.2.30" dependencies = [ "async-stream", "bincode", @@ -5343,7 +5343,7 @@ dependencies = [ [[package]] name = "sentrix-network" -version = "2.2.28" +version = "2.2.30" dependencies = [ "async-trait", "bincode", @@ -5361,7 +5361,7 @@ dependencies = [ [[package]] name = "sentrix-nft" -version = "2.2.28" +version = "2.2.30" dependencies = [ "hex", "serde", @@ -5372,7 +5372,7 @@ dependencies = [ [[package]] name = "sentrix-node" -version = "2.2.28" +version = "2.2.30" dependencies = [ "anyhow", "axum", @@ -5398,14 +5398,14 @@ dependencies = [ [[package]] name = "sentrix-precompiles" -version = "2.2.28" +version = "2.2.30" dependencies = [ "alloy-primitives", ] [[package]] name = "sentrix-primitives" -version = "2.2.28" +version = "2.2.30" dependencies = [ "hex", "proptest", @@ -5420,7 +5420,7 @@ dependencies = [ [[package]] name = "sentrix-prom-exporter" -version = "2.2.28" +version = "2.2.30" dependencies = [ "http-body-util", "hyper", @@ -5449,7 +5449,7 @@ dependencies = [ [[package]] name = "sentrix-rpc" -version = "2.2.28" +version = "2.2.30" dependencies = [ "alloy-consensus", "alloy-eips", @@ -5482,14 +5482,14 @@ dependencies = [ [[package]] name = "sentrix-rpc-types" -version = "2.2.28" +version = "2.2.30" dependencies = [ "serde_json", ] [[package]] name = "sentrix-staking" -version = "2.2.28" +version = "2.2.30" dependencies = [ "sentrix-primitives", "serde", @@ -5499,7 +5499,7 @@ dependencies = [ [[package]] name = "sentrix-storage" -version = "2.2.28" +version = "2.2.30" dependencies = [ "bincode", "libmdbx", @@ -5514,7 +5514,7 @@ dependencies = [ [[package]] name = "sentrix-trie" -version = "2.2.28" +version = "2.2.30" dependencies = [ "bincode", "blake3", @@ -5531,7 +5531,7 @@ dependencies = [ [[package]] name = "sentrix-wallet" -version = "2.2.28" +version = "2.2.30" dependencies = [ "aes-gcm", "argon2", @@ -5551,7 +5551,7 @@ dependencies = [ [[package]] name = "sentrix-wire" -version = "2.2.28" +version = "2.2.30" dependencies = [ "bincode", "secp256k1 0.31.1", diff --git a/Cargo.toml b/Cargo.toml index ff91841f..903366f2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ members = [".", "crates/sentrix-primitives", "crates/sentrix-wallet", "crates/se # `version.workspace = true`. Same goes for edition/license/repository so # they can't drift across crates. [workspace.package] -version = "2.2.28" +version = "2.2.30" edition = "2024" license = "BUSL-1.1" repository = "https://github.com/sentrix-labs/sentrix" diff --git a/crates/sentrix-trie/src/storage.rs b/crates/sentrix-trie/src/storage.rs index 2f24c91b..315ad597 100644 --- a/crates/sentrix-trie/src/storage.rs +++ b/crates/sentrix-trie/src/storage.rs @@ -311,15 +311,43 @@ impl TrieStorage { } /// Garbage-collect node and value entries not present in `live_hashes`. + /// + /// Prefer [`Self::gc_nodes`] + [`Self::gc_values`] for the periodic prune: + /// the nodes pass alone can run 10–20 min on a big chain.db, during which + /// the apply loop commits new leaf VALUES. Driving both passes from one + /// `live` snapshot lets the values pass delete those freshly-committed + /// (still-live) values — the 2026-06-04 testnet trie corruption. The split + /// methods let the caller refresh `live` between passes. This combined + /// method stays for non-racy callers (one-shot recovery / tests). pub fn gc_orphaned_nodes( &self, live_hashes: &std::collections::HashSet, ) -> SentrixResult { - let node_count = self.gc_table(tables::TABLE_TRIE_NODES, live_hashes)?; - let value_count = self.gc_table(tables::TABLE_TRIE_VALUES, live_hashes)?; + let node_count = self.gc_nodes(live_hashes)?; + let value_count = self.gc_values(live_hashes)?; Ok(node_count + value_count) } + /// GC only the trie_nodes table against `live_hashes`. See + /// [`Self::gc_orphaned_nodes`] for why nodes and values are split. + pub fn gc_nodes( + &self, + live_hashes: &std::collections::HashSet, + ) -> SentrixResult { + self.gc_table(tables::TABLE_TRIE_NODES, live_hashes) + } + + /// GC only the trie_values table against `live_hashes`. The caller MUST + /// refresh `live_hashes` with any roots committed since the nodes pass + /// before calling this, or it will delete leaf values written during the + /// (long) nodes pass. + pub fn gc_values( + &self, + live_hashes: &std::collections::HashSet, + ) -> SentrixResult { + self.gc_table(tables::TABLE_TRIE_VALUES, live_hashes) + } + /// Shared helper: scan an MDBX table for hashes not in `live_hashes` and remove them. /// /// Streams the table via `iter_from` so only the orphan-hash subset diff --git a/crates/sentrix-trie/src/tree.rs b/crates/sentrix-trie/src/tree.rs index 8ca48905..dcd7fa58 100644 --- a/crates/sentrix-trie/src/tree.rs +++ b/crates/sentrix-trie/src/tree.rs @@ -657,46 +657,66 @@ impl SentrixTrie { // the prune built `live` from a snapshot that pre-dated those // writes and then deleted the freshly-committed data. // - // Re-load the highest version currently in TABLE_TRIE_ROOTS and - // walk every root committed AFTER self.version. Shrinks the - // race window from "full walk duration" to "the gap between - // this re-load and the gc_orphaned_nodes commit" — at most one - // block's worth of writes can still slip through, and the B3b - // total_minted self-heal plus boot-time verify_integrity catch - // those as defense in depth. + // Re-load the highest version currently in TABLE_TRIE_ROOTS and walk + // every root committed AFTER self.version, BEFORE the nodes GC. // - // A proper race-free fix requires running the live walk and the - // delete inside the same MDBX RW transaction. That's a bigger - // refactor (exposing the txn handle through TrieStorage) and - // belongs in its own PR. This patch is the minimal targeted - // closer for the observed symptom. - let on_disk_latest = self.cache.storage.latest_version()?.unwrap_or(self.version); - let mut augmented_roots = 0usize; - for version in (self.version + 1)..=on_disk_latest { + // 2026-06-04 follow-up: the original single augment above still left a + // hole. gc_orphaned_nodes GC'd nodes THEN values off this one snapshot, + // and the nodes pass alone runs 10–20 min on a big chain.db. Blocks + // committed during that pass wrote new leaf values the snapshot never + // saw, and the values pass deleted them — orphan value for a hot + // account leaf at testnet h=6,239,526. So we now split the GC and + // re-augment `live` again between the passes (see below). The complete + // race-free fix is still one RW txn around walk+delete (a TrieStorage + // refactor, tracked separately); boot-time verify_integrity is the + // backstop. + let on_disk_latest = self.augment_live_to_latest(&mut live, self.version + 1)?; + + // Pass 1 — nodes. Pass 2 — values, but only after re-walking the roots + // committed DURING pass 1, so freshly-written leaf values survive. + let nodes_gc = self.cache.storage.gc_nodes(&live)?; + self.augment_live_to_latest(&mut live, on_disk_latest + 1)?; + let values_gc = self.cache.storage.gc_values(&live)?; + + tracing::info!( + "trie prune: removed {} old roots, GC'd {} nodes + {} values", + roots_pruned, + nodes_gc, + values_gc + ); + Ok((roots_pruned, nodes_gc + values_gc)) + } + + /// Walk every committed root in `[from_version, on-disk latest]` not yet in + /// `live` and add its reachable hashes. Returns the on-disk latest version + /// observed. Keeps the live set current across the long nodes/values GC + /// passes — the prune runs on a clone whose `self.version` is frozen while + /// the apply loop keeps committing newer roots. + fn augment_live_to_latest( + &self, + live: &mut std::collections::HashSet, + from_version: u64, + ) -> SentrixResult { + let latest = self.cache.storage.latest_version()?.unwrap_or(self.version); + let mut added = 0usize; + for version in from_version..=latest { if let Some(root) = self.cache.storage.load_root(version)? && !live.contains(&root) { - self.collect_reachable(root, 0, &mut live)?; - augmented_roots += 1; + self.collect_reachable(root, 0, live)?; + added += 1; } } - if augmented_roots > 0 { + if added > 0 { tracing::info!( - "trie prune: augmented live set with {} new roots committed during walk \ - (snapshot v{} → on-disk v{})", - augmented_roots, - self.version, - on_disk_latest + "trie prune: augmented live set with {} roots committed during walk \ + (from v{} → on-disk v{})", + added, + from_version, + latest ); } - - let nodes_gc = self.cache.storage.gc_orphaned_nodes(&live)?; - tracing::info!( - "trie prune: removed {} old roots, GC'd {} orphaned entries", - roots_pruned, - nodes_gc - ); - Ok((roots_pruned, nodes_gc)) + Ok(latest) } /// Recursively collect all node hashes reachable from `hash` at `depth`. @@ -1105,6 +1125,63 @@ mod tests { ); } + /// Regression for the 2026-06-04 testnet trie corruption: a leaf value + /// committed DURING the (long) nodes GC pass must survive the values GC + /// pass. The prune runs on a clone whose version is frozen, so its initial + /// `live` set predates that commit; splitting node/value GC and + /// re-augmenting `live` between the passes is what keeps the fresh value. + /// Asserts both directions: the stale `live` would have deleted it (it's + /// absent), and the re-augmented `live` keeps it (it's present). + #[test] + fn test_values_gc_keeps_leaf_committed_during_nodes_pass() { + use std::collections::HashSet; + let (_dir, mdbx) = temp_mdbx(); + let mut trie = SentrixTrie::open(Arc::clone(&mdbx), 0).unwrap(); + + // v1: account A committed — this is the prune snapshot. + let a = address_to_key("0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); + trie.insert(&a, &account_value_bytes(100, 0)).unwrap(); + trie.commit(1).unwrap(); + + // The background prune clones the trie frozen at v1 and builds `live` + // from the v1 root only. + let pruner = SentrixTrie::open(Arc::clone(&mdbx), 1).unwrap(); + let mut live: HashSet<[u8; 32]> = HashSet::new(); + pruner.collect_reachable(pruner.root, 0, &mut live).unwrap(); + + // --- pretend the long nodes GC pass is running here --- + // Meanwhile the apply loop commits account B at v2 on the live trie. + let b = address_to_key("0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + let b_val = account_value_bytes(777, 3); + trie.insert(&b, &b_val).unwrap(); + trie.commit(2).unwrap(); + let b_vh = crate::node::hash_leaf(&b, &b_val); + assert!( + pruner.cache.storage.load_value(&b_vh).unwrap().is_some(), + "B's value must be on disk after its commit" + ); + + // The stale snapshot live set does NOT cover B — a values GC here would + // delete a live value (the bug). + assert!( + !live.contains(&b_vh), + "stale live set must not yet contain B's value (this is what caused the bug)" + ); + + // The fix: re-augment `live` to the on-disk latest before the values + // pass, then GC values. + pruner.augment_live_to_latest(&mut live, 2).unwrap(); + assert!( + live.contains(&b_vh), + "re-augment must add B's value hash committed during the nodes pass" + ); + pruner.cache.storage.gc_values(&live).unwrap(); + assert!( + pruner.cache.storage.load_value(&b_vh).unwrap().is_some(), + "leaf value committed during the nodes pass must survive the values GC" + ); + } + /// Updates accumulate internal-node storage between commits; prune() is /// what reclaims it. Inline cleanup of old_internal_hashes (removed /// 2026-04-20) was unsound — see the comment in insert() and the