diff --git a/Cargo.lock b/Cargo.lock index e9db3b2c..7d6b86dc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5178,7 +5178,7 @@ dependencies = [ [[package]] name = "sentrix" -version = "2.2.37" +version = "2.2.38" dependencies = [ "aes-gcm", "alloy-consensus", @@ -5226,7 +5226,7 @@ dependencies = [ [[package]] name = "sentrix-bft" -version = "2.2.37" +version = "2.2.38" dependencies = [ "bincode", "hex", @@ -5254,7 +5254,7 @@ dependencies = [ [[package]] name = "sentrix-core" -version = "2.2.37" +version = "2.2.38" dependencies = [ "alloy-consensus", "alloy-eips", @@ -5286,7 +5286,7 @@ dependencies = [ [[package]] name = "sentrix-evm" -version = "2.2.37" +version = "2.2.38" dependencies = [ "alloy-primitives", "hex", @@ -5301,7 +5301,7 @@ dependencies = [ [[package]] name = "sentrix-faucet" -version = "2.2.37" +version = "2.2.38" dependencies = [ "anyhow", "axum", @@ -5325,14 +5325,14 @@ dependencies = [ [[package]] name = "sentrix-fork-heights" -version = "2.2.37" +version = "2.2.38" dependencies = [ "tracing", ] [[package]] name = "sentrix-grpc" -version = "2.2.37" +version = "2.2.38" dependencies = [ "async-stream", "bincode", @@ -5351,7 +5351,7 @@ dependencies = [ [[package]] name = "sentrix-network" -version = "2.2.37" +version = "2.2.38" dependencies = [ "async-trait", "bincode", @@ -5369,7 +5369,7 @@ dependencies = [ [[package]] name = "sentrix-nft" -version = "2.2.37" +version = "2.2.38" dependencies = [ "hex", "serde", @@ -5380,7 +5380,7 @@ dependencies = [ [[package]] name = "sentrix-node" -version = "2.2.37" +version = "2.2.38" dependencies = [ "anyhow", "axum", @@ -5406,14 +5406,14 @@ dependencies = [ [[package]] name = "sentrix-precompiles" -version = "2.2.37" +version = "2.2.38" dependencies = [ "alloy-primitives", ] [[package]] name = "sentrix-primitives" -version = "2.2.37" +version = "2.2.38" dependencies = [ "hex", "proptest", @@ -5428,7 +5428,7 @@ dependencies = [ [[package]] name = "sentrix-prom-exporter" -version = "2.2.37" +version = "2.2.38" dependencies = [ "http-body-util", "hyper", @@ -5457,7 +5457,7 @@ dependencies = [ [[package]] name = "sentrix-rpc" -version = "2.2.37" +version = "2.2.38" dependencies = [ "alloy-consensus", "alloy-eips", @@ -5490,14 +5490,14 @@ dependencies = [ [[package]] name = "sentrix-rpc-types" -version = "2.2.37" +version = "2.2.38" dependencies = [ "serde_json", ] [[package]] name = "sentrix-staking" -version = "2.2.37" +version = "2.2.38" dependencies = [ "sentrix-primitives", "serde", @@ -5507,7 +5507,7 @@ dependencies = [ [[package]] name = "sentrix-storage" -version = "2.2.37" +version = "2.2.38" dependencies = [ "bincode", "libmdbx", @@ -5522,7 +5522,7 @@ dependencies = [ [[package]] name = "sentrix-trie" -version = "2.2.37" +version = "2.2.38" dependencies = [ "bincode", "blake3", @@ -5539,7 +5539,7 @@ dependencies = [ [[package]] name = "sentrix-wallet" -version = "2.2.37" +version = "2.2.38" dependencies = [ "aes-gcm", "argon2", @@ -5559,7 +5559,7 @@ dependencies = [ [[package]] name = "sentrix-wire" -version = "2.2.37" +version = "2.2.38" dependencies = [ "bincode", "secp256k1 0.31.1", diff --git a/Cargo.toml b/Cargo.toml index 31baf075..ed6a9eaf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ members = [".", "crates/sentrix-primitives", "crates/sentrix-wallet", "crates/se # `version.workspace = true`. Same goes for edition/license/repository so # they can't drift across crates. [workspace.package] -version = "2.2.37" +version = "2.2.38" edition = "2024" license = "BUSL-1.1" repository = "https://github.com/sentrix-labs/sentrix" diff --git a/crates/sentrix-storage/src/tables.rs b/crates/sentrix-storage/src/tables.rs index 9f1c11c6..8d2d0044 100644 --- a/crates/sentrix-storage/src/tables.rs +++ b/crates/sentrix-storage/src/tables.rs @@ -27,6 +27,14 @@ pub const TABLE_TRIE_ROOTS: &str = "trie_roots"; /// Trie committed roots: height (u64 BE) → root_hash (32 bytes) pub const TABLE_TRIE_COMMITTED: &str = "trie_committed_roots"; +/// Trie GC tombstones: key = discriminator byte (b'n' nodes / b'v' values) || +/// hash (32 bytes) → tombstone version (u64 BE). Generational GC: the prune +/// tombstones an orphan here instead of deleting it, and only deletes on a +/// LATER prune if it's still orphan. A hash committed during a long prune +/// (the #791 race) is tombstoned that cycle but back in the live-set next +/// cycle, so it's never deleted. Deletion lags one prune interval. +pub const TABLE_TRIE_TOMBSTONES: &str = "trie_tombstones"; + /// Chain metadata: key string → value bytes (height, hash_index_complete, etc.) pub const TABLE_META: &str = "meta"; @@ -55,6 +63,7 @@ pub const ALL_TABLES: &[&str] = &[ TABLE_TRIE_VALUES, TABLE_TRIE_ROOTS, TABLE_TRIE_COMMITTED, + TABLE_TRIE_TOMBSTONES, TABLE_META, TABLE_LOGS, TABLE_BLOOM, diff --git a/crates/sentrix-trie/src/storage.rs b/crates/sentrix-trie/src/storage.rs index 315ad597..696ac118 100644 --- a/crates/sentrix-trie/src/storage.rs +++ b/crates/sentrix-trie/src/storage.rs @@ -348,6 +348,117 @@ impl TrieStorage { self.gc_table(tables::TABLE_TRIE_VALUES, live_hashes) } + /// Generational GC for the trie_nodes table — see [`Self::gc_table_generational`]. + pub fn gc_nodes_generational( + &self, + live_hashes: &std::collections::HashSet, + version: u64, + ) -> SentrixResult { + self.gc_table_generational(tables::TABLE_TRIE_NODES, b'n', live_hashes, version) + } + + /// Generational GC for the trie_values table — see [`Self::gc_table_generational`]. + pub fn gc_values_generational( + &self, + live_hashes: &std::collections::HashSet, + version: u64, + ) -> SentrixResult { + self.gc_table_generational(tables::TABLE_TRIE_VALUES, b'v', live_hashes, version) + } + + /// Race-free generational GC: defer deletes by one prune cycle via + /// `TABLE_TRIE_TOMBSTONES` (keyed `disc || hash` → tombstone version u64 BE). + /// + /// The old `gc_table` deleted any hash not in the live-set snapshot. But the + /// snapshot is frozen when the background prune is spawned, and blocks keep + /// committing new (live) nodes/values during the multi-minute walk — those + /// aren't in the snapshot, so they were deleted as "orphans" (the recurring + /// "missing node" stalls; #791 only narrowed the window). + /// + /// Generational fix: + /// - **Phase A (reap):** for each existing tombstone of this `disc` — + /// live again → drop tombstone (false orphan, keep entry); still orphan + /// AND tombstoned in an earlier cycle (`tv < version`) → delete entry + + /// tombstone; tombstoned this cycle → leave. + /// - **Phase B (mark):** tombstone any orphan not already tombstoned, at + /// `version`. + /// + /// A hash committed DURING this prune is orphan vs the snapshot, so Phase B + /// tombstones it — but next prune it's a recent live node, so Phase A drops + /// the tombstone instead of deleting it. Worst-case failure mode is benign: + /// under-deletion (storage grows), never deletion of a live entry. Returns + /// the count actually deleted this cycle. + fn gc_table_generational( + &self, + data_table: &str, + disc: u8, + live_hashes: &std::collections::HashSet, + version: u64, + ) -> SentrixResult { + use std::collections::HashSet; + + // Phase A — scan existing tombstones for this discriminator. + let mut reap: Vec = Vec::new(); // still-orphan, prior cycle → delete + let mut clear: Vec = Vec::new(); // resurrected → drop tombstone only + let mut tombstoned: HashSet = HashSet::new(); + self.mdbx + .iter_from(tables::TABLE_TRIE_TOMBSTONES, &[], |k, v| { + if k.len() == 33 && k[0] == disc { + let mut h = [0u8; 32]; + h.copy_from_slice(&k[1..]); + tombstoned.insert(h); + if live_hashes.contains(&h) { + clear.push(h); + } else { + let tv = v.try_into().map(u64::from_be_bytes).unwrap_or(0); + if tv < version { + reap.push(h); + } + } + } + true + }) + .map_err(|e| SentrixError::StorageError(e.to_string()))?; + + let mut tomb_key = [0u8; 33]; + tomb_key[0] = disc; + for h in clear.iter().chain(reap.iter()) { + tomb_key[1..].copy_from_slice(h); + self.mdbx + .delete(tables::TABLE_TRIE_TOMBSTONES, &tomb_key) + .map_err(|e| SentrixError::StorageError(e.to_string()))?; + } + let deleted = reap.len(); + for h in &reap { + self.mdbx + .delete(data_table, h) + .map_err(|e| SentrixError::StorageError(e.to_string()))?; + } + + // Phase B — tombstone new orphans not already tracked. + let mut new_tomb: Vec = Vec::new(); + self.mdbx + .iter_from(data_table, &[], |k, _v| { + if k.len() == 32 { + let mut h = [0u8; 32]; + h.copy_from_slice(k); + if !live_hashes.contains(&h) && !tombstoned.contains(&h) { + new_tomb.push(h); + } + } + true + }) + .map_err(|e| SentrixError::StorageError(e.to_string()))?; + let vbytes = version.to_be_bytes(); + for h in &new_tomb { + tomb_key[1..].copy_from_slice(h); + self.mdbx + .put(tables::TABLE_TRIE_TOMBSTONES, &tomb_key, &vbytes) + .map_err(|e| SentrixError::StorageError(e.to_string()))?; + } + Ok(deleted) + } + /// Shared helper: scan an MDBX table for hashes not in `live_hashes` and remove them. /// /// Streams the table via `iter_from` so only the orphan-hash subset @@ -517,6 +628,75 @@ mod tests { ); } + #[test] + fn test_generational_gc_defers_then_reaps() { + let (_dir, storage) = temp_storage(); + let live_hash = dummy_hash(0x01); + let orphan_hash = dummy_hash(0x02); + let node = TrieNode::Leaf { + key: [0u8; 32], + value_hash: empty_hash(0), + }; + storage.store_node(&live_hash, &node).unwrap(); + storage.store_node(&orphan_hash, &node).unwrap(); + + let mut live: HashSet = HashSet::new(); + live.insert(live_hash); + + // Cycle 1: orphan is tombstoned, NOT deleted (deferred). + let d1 = storage.gc_nodes_generational(&live, 100).unwrap(); + assert_eq!(d1, 0, "first cycle defers all deletes (tombstone only)"); + assert!( + storage.load_node(&orphan_hash).unwrap().is_some(), + "orphan survives the cycle it was first seen" + ); + + // Cycle 2 (higher version): still orphan → now reaped. + let d2 = storage.gc_nodes_generational(&live, 200).unwrap(); + assert_eq!(d2, 1, "second cycle reaps the still-orphan tombstoned node"); + assert!( + storage.load_node(&orphan_hash).unwrap().is_none(), + "stale orphan deleted after a full cycle" + ); + assert!( + storage.load_node(&live_hash).unwrap().is_some(), + "live node always survives" + ); + } + + #[test] + fn test_generational_gc_spares_node_committed_during_prune() { + // The #791 race: a node orphan vs THIS cycle's snapshot (it was + // committed mid-prune) but live NEXT cycle must never be deleted. + // The old gc_table deleted it in cycle 1 → "missing node" stall. + let (_dir, storage) = temp_storage(); + let raced = dummy_hash(0x03); + let node = TrieNode::Leaf { + key: [0u8; 32], + value_hash: empty_hash(0), + }; + storage.store_node(&raced, &node).unwrap(); + + // Cycle 1: snapshot live-set misses it → tombstoned, not deleted. + let empty: HashSet = HashSet::new(); + let d1 = storage.gc_nodes_generational(&empty, 100).unwrap(); + assert_eq!(d1, 0); + assert!( + storage.load_node(&raced).unwrap().is_some(), + "raced node survives cycle 1 (tombstoned, not deleted)" + ); + + // Cycle 2: now it IS live → tombstone dropped, node spared. + let mut live: HashSet = HashSet::new(); + live.insert(raced); + let d2 = storage.gc_nodes_generational(&live, 200).unwrap(); + assert_eq!(d2, 0, "resurrected node must not be deleted"); + assert!( + storage.load_node(&raced).unwrap().is_some(), + "raced/live node survives — #791 race fixed" + ); + } + #[test] fn test_gc_empty_live_set_removes_all() { let (_dir, storage) = temp_storage(); diff --git a/crates/sentrix-trie/src/tree.rs b/crates/sentrix-trie/src/tree.rs index dcd7fa58..4ce77df8 100644 --- a/crates/sentrix-trie/src/tree.rs +++ b/crates/sentrix-trie/src/tree.rs @@ -672,11 +672,23 @@ impl SentrixTrie { // backstop. let on_disk_latest = self.augment_live_to_latest(&mut live, self.version + 1)?; - // Pass 1 — nodes. Pass 2 — values, but only after re-walking the roots - // committed DURING pass 1, so freshly-written leaf values survive. - let nodes_gc = self.cache.storage.gc_nodes(&live)?; + // Generational GC (race-free): nodes/values orphan vs this snapshot are + // TOMBSTONED, not deleted; a hash only gets deleted on a LATER prune if + // it's still orphan then. A hash committed DURING this long prune is + // orphan vs `live` here, but back in the live-set next cycle, so it's + // never deleted — closing the #791 race that the re-augment only + // narrowed. `self.version` is the snapshot height = this cycle's marker. + // The re-augment between passes is kept (cheap, shrinks the tombstone + // churn). Pass 1 — nodes. Pass 2 — values. + let nodes_gc = self + .cache + .storage + .gc_nodes_generational(&live, self.version)?; self.augment_live_to_latest(&mut live, on_disk_latest + 1)?; - let values_gc = self.cache.storage.gc_values(&live)?; + let values_gc = self + .cache + .storage + .gc_values_generational(&live, self.version)?; tracing::info!( "trie prune: removed {} old roots, GC'd {} nodes + {} values", @@ -1007,10 +1019,17 @@ mod tests { "before prune, node count must grow (old leaf is still stored for v1)" ); - // prune(keep=0) retires v1 and GCs nodes only reachable from it. - let (roots_pruned, nodes_gc) = trie.prune(0).unwrap(); + // Generational GC defers deletion one cycle: the first prune tombstones + // the now-unreachable v1 nodes; a later prune at a higher version reaps + // them. Advance the version marker with a no-op commit, then prune again. + let (roots_pruned, _deferred) = trie.prune(0).unwrap(); assert!(roots_pruned >= 1, "must retire at least one old root"); - assert!(nodes_gc >= 1, "must GC at least one unreachable leaf"); + let _ = trie.commit(3).unwrap(); + let (_r2, nodes_gc) = trie.prune(0).unwrap(); + assert!( + nodes_gc >= 1, + "must GC at least one unreachable leaf after the deferral cycle" + ); let nodes_after_prune = mdbx .count(sentrix_storage::tables::TABLE_TRIE_NODES) @@ -1112,9 +1131,15 @@ mod tests { "key must still be retrievable from v1 root after a v2 delete" ); - // prune(keep=0) retires v1; its leaf+value become unreachable. + // Generational GC defers one cycle: the first prune tombstones the + // deleted leaf's storage; a later prune at a higher version reaps it. + let _ = trie.prune(0).unwrap(); + let _ = trie.commit(3).unwrap(); let (_roots_pruned, nodes_gc) = trie.prune(0).unwrap(); - assert!(nodes_gc >= 1, "prune must GC the deleted leaf's storage"); + assert!( + nodes_gc >= 1, + "prune must GC the deleted leaf's storage after the deferral cycle" + ); let values_after_prune = mdbx .count(sentrix_storage::tables::TABLE_TRIE_VALUES) @@ -1223,10 +1248,17 @@ mod tests { let v1_old = trie_v1.get(&k1).unwrap().unwrap(); assert_eq!(account_value_decode(&v1_old).unwrap().0, 100); - // prune(keep=0) reclaims v1's now-unreachable internal nodes. - let (roots_pruned, nodes_gc) = trie.prune(0).unwrap(); + // Generational GC defers one cycle: the first prune tombstones v1's + // now-unreachable internal nodes; a later prune at a higher version + // reaps them. + let (roots_pruned, _deferred) = trie.prune(0).unwrap(); assert!(roots_pruned >= 1, "prune must retire at least v1"); - assert!(nodes_gc >= 1, "prune must GC v1's orphaned internal nodes"); + let _ = trie.commit(3).unwrap(); + let (_r2, nodes_gc) = trie.prune(0).unwrap(); + assert!( + nodes_gc >= 1, + "prune must GC v1's orphaned internal nodes after the deferral cycle" + ); } /// ROOT CAUSE #3 regression guard: insert() must not delete the root node of a