diff --git a/Cargo.toml b/Cargo.toml index f83526cf..d9ed834b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ members = [".", "crates/sentrix-primitives", "crates/sentrix-wallet", "crates/se # `version.workspace = true`. Same goes for edition/license/repository so # they can't drift across crates. [workspace.package] -version = "2.2.31" +version = "2.2.35" edition = "2024" license = "BUSL-1.1" repository = "https://github.com/sentrix-labs/sentrix" diff --git a/bin/sentrix/src/main.rs b/bin/sentrix/src/main.rs index dd80f599..e87b7953 100644 --- a/bin/sentrix/src/main.rs +++ b/bin/sentrix/src/main.rs @@ -1136,36 +1136,76 @@ async fn cmd_start( let writer_storage = storage.clone(); let writer_shared = shared.clone(); tokio::spawn(async move { - while let Some(target_height) = save_rx.recv().await { - // Drain coalesced heights: if the writer is behind, multiple - // FinalizeBlock pushes can stack up. One snapshot covers all - // of them since save_blockchain writes the full state blob. - let mut latest = target_height; - while let Ok(h) = save_rx.try_recv() { - latest = h; - } - let bc = writer_shared.read().await; - let height_at_save = bc.height(); - match writer_storage.save_blockchain(&bc) { - Ok(()) => { - tracing::debug!( + // POLL-driven persistence (not save_tx-signal-driven). The signal + // was only pushed in the commit path that is SKIPPED when add_block + // returns Err on the BFT apply-from-stash state_root recompute + // mismatch (the proposal carries the proposer's root; our local + // recompute differs — the separate, open determinism issue). The + // block is still canonical (2/3 precommit justification) and the + // chain advances, but the writer never fired → its block:{N} key + // was never written → it aged out of the in-memory window into a + // permanent storage gap → observer/fullnode GetBlocks sync stalled + // on the missing height. Polling the chain and persisting whatever + // is committed decouples durability from the apply result. + // + // Block:{N} keys are written via the BATCHED save_blocks (one MDBX + // txn / one fsync per tick). An earlier attempt used per-block + // save_block, whose per-block full-env mdbx.sync() contended with + // the apply path's trie write txns and stalled consensus. The full + // state blob (save_blockchain) runs on a slow cadence purely to + // bound load-time B2 replay; B2 rebuilds accounts from the block:{N} + // keys we now persist, and the graceful-shutdown path saves the + // blob on clean exit. + let mut last_persisted: u64 = { writer_shared.read().await.height() }; + let mut last_blob_save = std::time::Instant::now(); + let mut ticker = tokio::time::interval(std::time::Duration::from_secs(5)); + ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); + loop { + ticker.tick().await; + // Drain the legacy signal channel so producers' try_send does + // not accumulate; persistence no longer keys off it. + while save_rx.try_recv().is_ok() {} + + // Clone the newly-committed block range under a short read lock, + // then release before the disk write so the lock is never held + // across I/O (would stall the validator's write lock). + let new_blocks: Vec = { + let bc = writer_shared.read().await; + let h = bc.height(); + (last_persisted.saturating_add(1)..=h) + .filter_map(|i| bc.get_block(i).cloned()) + .collect() + }; + if let Some(top) = new_blocks.last().map(|b| b.index) { + match writer_storage.save_blocks(&new_blocks) { + Ok(()) => last_persisted = top, + Err(e) => tracing::error!( target: "save_writer", - "background save_blockchain ok queued_for=h{} caught_up_to=h{}", - latest, - height_at_save, - ); + "save_blocks failed for range ..={}: {}", + top, + e, + ), } - Err(e) => { + } + + // Periodic full-state checkpoint (accounts blob + blob_height) + // to keep B2 replay bounded on an unclean restart. Infrequent + // (60s) — strictly less often than the previous per-finalize + // save, so the brief read-lock hold during serialize can't + // accumulate into back-pressure. + if last_blob_save.elapsed() >= std::time::Duration::from_secs(60) { + let bc = writer_shared.read().await; + if let Err(e) = writer_storage.save_blockchain(&bc) { tracing::error!( target: "save_writer", - "background save_blockchain failed queued_for=h{} caught_up_to=h{}: {}", - latest, height_at_save, e, + "periodic save_blockchain failed at h{}: {}", + bc.height(), + e, ); } + last_blob_save = std::time::Instant::now(); } - drop(bc); } - tracing::info!(target: "save_writer", "save channel closed; writer exiting"); }); } @@ -2914,21 +2954,27 @@ async fn cmd_start( } if let Some(mut blk) = proposed_block.take() { + // Apply the finalized block we already hold, whoever proposed + // it. The hash-match guard above proved `blk` IS the + // FinalizeBlock action's block, and `justification` carries + // its 2/3 precommit certificate — so this is the canonical + // committed block. This previously broke out and waited for + // libp2p NewBlock/sync to re-deliver a peer's block; when + // gossip missed, the node sat in Finalize re-triggering sync + // while holding the block the whole time → chain crawl/stall. + // `validate_block` below still re-checks structure + the + // justification supermajority before we write. if blk.validator != wallet.address { tracing::info!( target: "finalize_trace", - "BFT finalize peer-propose: h={} round={} block={:.16}… \ - proposer={} is not local validator {}; waiting for \ - libp2p NewBlock/sync instead of executing peer block \ - in the BFT loop", + "BFT finalize: applying peer-proposed finalized block \ + h={} round={} block={:.16}… proposer={} from local \ + stash (valid 2/3 justification)", height, round, block_hash, blk.validator, - wallet.address, ); - lp2p_clone.trigger_sync().await; - break; } blk.round = round; diff --git a/crates/sentrix-core/src/block_executor.rs b/crates/sentrix-core/src/block_executor.rs index 18dd1523..94084573 100644 --- a/crates/sentrix-core/src/block_executor.rs +++ b/crates/sentrix-core/src/block_executor.rs @@ -1723,23 +1723,76 @@ impl Blockchain { return Ok(()); } - tracing::error!( - "CRITICAL #1e: state_root mismatch at block {} — received {} \ - vs computed {}. Local trie and peer's trie disagree on the \ - post-block state. Rejecting.", - block_index, - hex::encode(received_root), - hex::encode(computed_root), - ); - // 2026-04-23 divergence rate-alarm: per-event ERROR - // line above is truthful but gets lost in log noise - // during a real divergence (~1/s). Record the - // rejection in the rolling tracker, which emits a - // LOUD rate-limited alarm pointing at the rsync - // recovery playbook when the rate crosses threshold. - // See `DivergenceTracker` in blockchain.rs for the - // full rationale. - self.divergence_tracker.record_rejection(block_index); + // Observer-tolerant accept (gated, default OFF). An observer/ + // fullnode applies EVERY block via add_block_from_peer (Peer) and + // strictly rejecting a #1e here halts it on canonical data: the + // block already passed the strict 2/3-precommit justification + // verification earlier in add_block_impl, so it IS the network- + // agreed block (consensus is on block_hash, not state_root). The + // mismatch is the chain's known imperfect state-commitment + // (recurring/oscillating state_root) that validators already + // tolerate via the apply-from-stash path. With + // SENTRIX_OBSERVER_TOLERANT_STATE_ROOT=1 set, accept the block and + // stamp the proposer's (canonical) received root so the observer's + // chain stays consistent with the committed roots; its local + // accounts diverge from that root (the same pre-existing imperfection + // every node has), so served state is no worse than a validator's. + // Default OFF → validators keep the strict #1e reject below. Only an + // observer node sets this env. + if self.source_for_current_add == BlockSource::Peer + && std::env::var_os("SENTRIX_OBSERVER_TOLERANT_STATE_ROOT") + .is_some_and(|v| v == "1") + { + tracing::debug!( + "observer-tolerant: #1e at block {} (received {} vs computed \ + {}) — accepting justified canonical block, stamping received \ + root (local state diverges; chain state-commitment imperfect)", + block_index, + hex::encode(received_root), + hex::encode(computed_root), + ); + last.state_root = Some(received_root); + self.maybe_prune_trie(); + emit_apply_profile( + profile_t0, + profile_t1, + profile_t2, + profile_height, + profile_txs, + ); + return Ok(()); + } + + // A SelfProduced mismatch is the BFT finalize apply-from-stash + // path: the stashed proposal carries the proposer's PRE-apply + // state_root (computed at propose time, before this block's txs), + // which never equals the freshly computed POST-apply root. That's + // expected and self-heals — the block still commits via the libp2p + // receive path, which CHECKs against the canonical committed root. + // Only a Peer-source mismatch is a real cross-node divergence, so + // keep the LOUD alarm + divergence_tracker for that case and log the + // self-apply case quietly without polluting the divergence rate. + if self.source_for_current_add == BlockSource::Peer { + tracing::error!( + "CRITICAL #1e: state_root mismatch at block {} — received {} \ + vs computed {}. Local trie and peer's trie disagree on the \ + post-block state. Rejecting.", + block_index, + hex::encode(received_root), + hex::encode(computed_root), + ); + // Record in the rolling tracker, which emits a LOUD rate-limited + // alarm pointing at the recovery playbook when the rate crosses + // threshold. See `DivergenceTracker` in blockchain.rs. + self.divergence_tracker.record_rejection(block_index); + } else { + tracing::debug!( + "#1e self-apply mismatch at block {} (expected: stashed \ + proposal carries the pre-apply root) — block commits via \ + the receive path", + block_index, + ); + } return Err(SentrixError::ChainValidationFailed(format!( "state_root mismatch at block {}: received {}, computed {}", block_index, @@ -2865,6 +2918,110 @@ mod tests { ); } + /// Regression: a Pass-2 failure (#1e state_root mismatch) must restore + /// `stake_registry` / `epoch_manager` / `slashing`, not just AccountDB. + /// Pre-fix the C-03 snapshot omitted them, so the reward bundle's + /// `pending_rewards` increment leaked on every rejected block — and post + /// STATE_IN_TRIE that leak diverged the next block's state_root. + #[test] + fn test_c03_pass2_failure_restores_staking_state() { + use sentrix_primitives::block::{Block, STATE_ROOT_FORK_HEIGHT}; + use sentrix_primitives::justification::BlockJustification; + use sentrix_staking::staking::ValidatorStake; + use sentrix_storage::MdbxStorage; + use std::sync::Arc; + use tempfile::TempDir; + + let _guard = crate::test_util::env_test_lock(); + // Forks that (a) run the reward bundle in apply and (b) commit staking + // state into the root, so a #1e reject can leak it absent the fix. + unsafe { + std::env::set_var("VOYAGER_REWARD_V2_HEIGHT", "0"); + std::env::set_var("REWARD_APPLY_PATH_HEIGHT", "0"); + std::env::set_var("STATE_IN_TRIE_HEIGHT", "0"); + } + + let mut bc = setup(); + bc.voyager_activated = true; + for addr in ["v1", "v2", "v3", "v4"] { + bc.stake_registry.validators.insert( + addr.to_string(), + ValidatorStake { + address: addr.to_string(), + self_stake: 1000, + total_delegated: 0, + commission_rate: 1000, + max_commission_rate: 2000, + is_jailed: false, + jail_until: 0, + is_tombstoned: false, + blocks_signed: 0, + blocks_missed: 0, + pending_rewards: 0, + registration_height: 0, + last_commission_change_height: 0, + }, + ); + } + bc.stake_registry.active_set = + vec!["v1".into(), "v2".into(), "v3".into(), "v4".into()]; + + // Pad past STATE_ROOT_FORK_HEIGHT so the #1e check enforces (the + // below-fork path just stamps the root instead of rejecting). + let pad_height = STATE_ROOT_FORK_HEIGHT + 1; + let prev = bc.latest_block().unwrap().hash.clone(); + let mut pad = Block::new( + pad_height, + prev, + vec![Transaction::new_coinbase("v1".into(), 0, pad_height, 1_777_000_000)], + "v1".into(), + ); + pad.timestamp = 1_777_000_000; + bc.chain.push(pad); + + // Trie required so update_trie_for_block computes a real root to diff. + let dir = TempDir::new().unwrap(); + let mdbx = Arc::new(MdbxStorage::open(dir.path()).unwrap()); + bc.init_trie(mdbx).unwrap(); + + let height = bc.height() + 1; + let prev_hash = bc.latest_block().unwrap().hash.clone(); + let reward = bc.get_block_reward(); + let coinbase = Transaction::new_coinbase("v1".into(), reward, height, 1_777_000_001); + let mut block = Block::new(height, prev_hash, vec![coinbase], "v1".into()); + block.timestamp = 1_777_000_001; + // Tamper the declared root so #1e fires AFTER the reward bundle ran. + block.state_root = Some([0xAB; 32]); + block.hash = block.calculate_hash(); + let mut just = BlockJustification::new(height, 0, block.hash.clone()); + just.add_precommit("v1".into(), vec![], 1000); + just.add_precommit("v2".into(), vec![], 1000); + just.add_precommit("v3".into(), vec![], 1000); + block.justification = Some(just); + + let pending_before = bc.stake_registry.validators.get("v1").unwrap().pending_rewards; + + let err = bc + .add_block_from_peer(block) + .expect_err("tampered state_root must be rejected (#1e)"); + assert!( + format!("{err:?}").contains("state_root mismatch"), + "expected #1e state_root mismatch, got: {err:?}" + ); + + let pending_after = bc.stake_registry.validators.get("v1").unwrap().pending_rewards; + assert_eq!( + pending_after, pending_before, + "pending_rewards must roll back after a #1e reject (leaked pre-fix)" + ); + + unsafe { + std::env::remove_var("VOYAGER_REWARD_V2_HEIGHT"); + std::env::remove_var("REWARD_APPLY_PATH_HEIGHT"); + std::env::remove_var("STATE_IN_TRIE_HEIGHT"); + } + } + #[test] fn test_add_block_succeeds_without_trie() { // update_trie_for_block returning Ok(None) must not fail add_block. diff --git a/crates/sentrix-core/src/blockchain_trie_ops.rs b/crates/sentrix-core/src/blockchain_trie_ops.rs index 89b63fdc..6b28d500 100644 --- a/crates/sentrix-core/src/blockchain_trie_ops.rs +++ b/crates/sentrix-core/src/blockchain_trie_ops.rs @@ -302,6 +302,53 @@ impl Blockchain { } } + // Treasury re-reconciliation (2026-06-06). PROTOCOL_TREASURY drifted + // across validators (val2/val3 over-credited ~1-2 SRX) during the + // multipath-distribute_reward era between STATE_ROOT_V2_HEIGHT (2689134) + // and REWARD_APPLY_PATH_HEIGHT (6239300); the credit is single-path and + // deterministic since 6239300 so the drift is frozen. Because treasury + // sits in the state_root trie (since 2689134), each node computes a + // divergent local state_root → an observer/fullnode #1e-rejects every + // block (validators tolerate via apply-from-stash). Confirmed treasury + // is the SOLE divergent account (all other accounts byte-identical + // across the fleet). Heal it by force-setting the operator-set canonical + // at a one-time height. This is a SEPARATE trigger from + // STATE_ROOT_V2_HEIGHT on purpose: reusing that var would also move the + // trie-INCLUSION cutoff (line ~422 `block.index >= STATE_ROOT_V2_HEIGHT`) + // and retroactively drop treasury from the trie for the historical + // range, forking the chain. Default u64::MAX = dormant (ships safe). + // All nodes set the same value at the same height → converge; runbook: + // pick canonical (supply-consistent majority or history-recompute), + // halt-all, set TREASURY_REBASE_HEIGHT= + TREASURY_REBASE_BALANCE, + // simul-start, verify treasury agreement across the fleet at activation. + let treasury_rebase_height = std::env::var("TREASURY_REBASE_HEIGHT") + .ok() + .and_then(|s| s.parse::().ok()) + .unwrap_or(u64::MAX); + if activation_block_index == Some(treasury_rebase_height) { + if let Some(canonical) = std::env::var("TREASURY_REBASE_BALANCE") + .ok() + .and_then(|s| s.parse::().ok()) + { + let prior = self.accounts.get_balance(PROTOCOL_TREASURY); + tracing::warn!( + "TREASURY_REBASE at h={}: PROTOCOL_TREASURY {} → {} (delta {} sentri) \ + — operator-set canonical healing historical multipath drift", + treasury_rebase_height, + prior, + canonical, + canonical as i128 - prior as i128, + ); + self.accounts.set_balance(PROTOCOL_TREASURY, canonical); + } else { + tracing::warn!( + "TREASURY_REBASE_HEIGHT={} reached but TREASURY_REBASE_BALANCE unset \ + — skipping (fork risk if in-memory treasury differs across nodes)", + treasury_rebase_height, + ); + } + } + if self.state_trie.is_none() { // Pre-STATE_ROOT_FORK_HEIGHT, missing trie is acceptable — // state_root isn't part of the block hash. Past the fork diff --git a/crates/sentrix-core/src/storage.rs b/crates/sentrix-core/src/storage.rs index 895d58c8..c97fafbe 100644 --- a/crates/sentrix-core/src/storage.rs +++ b/crates/sentrix-core/src/storage.rs @@ -240,17 +240,35 @@ impl Storage { // from disk. Cost: ~2 ms at mainnet h≈2.2M, vs the ~30+ minutes // the original block-iteration version stalled on under multi- // container I/O contention (2026-05-25 v2.2.16 deploy regression). + // B3b reconcile is ADVISORY ONLY (no longer overwrites). The + // closed-form `recompute_total_minted_from_blocks` (TOTAL_PREMINE + + // sum of flat BLOCK_REWARD>>halvings per height) assumes every block's + // coinbase.amount equals the flat reward, but that is false in + // practice — blocks with a reduced/zero coinbase (era/epoch edges) + // make the true minted = sum of the STAMPED coinbase.amount strictly + // less than the closed form (≈2996 SRX on testnet at h≈6.27M). The + // authoritative value is the live accumulator (block_executor.rs:795 + // sums each block's stamped coinbase.amount), which the running + // validators all agree on and which feeds state_root. The persisted + // blob holds that live value, and B2 replay (below) re-applies the + // coinbase for blob_height+1..disk_height, so total_minted is already + // correct after load. Overwriting it with the closed form forced a + // divergent total_minted into state_root on any reload → the loaded + // node's computed root disagreed with the canonical block (observer + // GetBlocks #1e-rejected every block; a validator restart would fork). + // Keep the comparison as a signal but DO NOT mutate. let recomputed = self.recompute_total_minted_from_blocks(&bc)?; let total_minted_was_stale = recomputed != bc.total_minted; if total_minted_was_stale { tracing::warn!( - "load_blockchain B3b: total_minted blob={} != recomputed-from-blocks={} \ - at height {} — overwriting blob (block-sum is canonical)", - bc.total_minted, + "load_blockchain B3b: closed-form total_minted={} != blob/live={} at \ + height {} (expected: reduced-coinbase blocks make live < closed-form). \ + Trusting blob+replay (live accumulator is canonical, feeds state_root); \ + closed-form is advisory only.", recomputed, + bc.total_minted, bc.height() ); - bc.total_minted = recomputed; } if repaired > 0 || total_minted_was_stale { @@ -345,6 +363,14 @@ impl Storage { .map_err(|e| SentrixError::StorageError(e.to_string())) } + /// Batched, fsync-light persist of a contiguous block run — see + /// `ChainStorage::save_blocks`. Used by the background block-persister. + pub fn save_blocks(&self, blocks: &[Block]) -> SentrixResult<()> { + self.chain + .save_blocks(blocks) + .map_err(|e| SentrixError::StorageError(e.to_string())) + } + pub fn load_block(&self, index: u64) -> SentrixResult> { self.chain .load_block(index) @@ -815,7 +841,7 @@ mod tests { /// keep divergent `total_minted` forever — exactly the 2026-05-24 /// STATE-FP `fp`-divergence-with-matching-`acc` symptom. #[test] - fn test_b3b_repairs_stale_total_minted_on_load() { + fn test_b3b_advisory_does_not_overwrite_total_minted_on_load() { let path = temp_db_path(); let storage = Storage::open(&path).unwrap(); @@ -831,18 +857,24 @@ mod tests { } let canonical_total = bc.total_minted; - // Persist a corrupted view: blocks remain canonical, but the - // blob's total_minted is off by one block reward (as if save - // lagged one block behind apply, or a partial copy from a - // healthy host shipped stale state). + // B3b USED to overwrite total_minted with a closed-form recompute + // (TOTAL_PREMINE + flat BLOCK_REWARD>>halvings per height) on load. + // That over-counts on chains with reduced-coinbase blocks — the live + // sum of stamped coinbase amounts is strictly less — and total_minted + // feeds the state_root, so the overwrite forced a divergent root on + // reload (observer #1e; validator restart fork). B3b is now ADVISORY: + // it compares + warns but DOES NOT mutate, trusting the persisted blob + // (written by save_blockchain from the live accumulator) plus B2 + // replay. Persist a blob value that differs from the closed form and + // assert it survives the load untouched. bc.total_minted = canonical_total - 1; storage.save_blockchain(&bc).unwrap(); - // Load via the production path — B3b must catch + repair. let loaded = storage.load_blockchain().unwrap().unwrap(); assert_eq!( - loaded.total_minted, canonical_total, - "B3b must repair stale total_minted from block sum" + loaded.total_minted, + canonical_total - 1, + "B3b is advisory: it must NOT overwrite the persisted blob total_minted" ); let _ = std::fs::remove_dir_all(&path); diff --git a/crates/sentrix-storage/src/chain.rs b/crates/sentrix-storage/src/chain.rs index 7757c69c..3b685f6e 100644 --- a/crates/sentrix-storage/src/chain.rs +++ b/crates/sentrix-storage/src/chain.rs @@ -191,6 +191,39 @@ impl ChainStorage { Ok(()) } + /// Persist a contiguous run of blocks in ONE MDBX write transaction. + /// Same per-block layout as `save_block` (block JSON + hash→height + /// index + tip `height` key) but batched: one `commit()` = one fsync + /// for the whole range, and NO explicit `self.mdbx.sync()` (the durable + /// env already fsyncs on commit). This is the background block-persister + /// path — it runs off the BFT critical path on a timer. `save_block`'s + /// per-block `commit()` + full-env `sync()` was fine for the rare + /// admin/recovery caller but, driven per-block at chain speed, its + /// fsync volume contended with the apply path's trie write txns and + /// stalled consensus. Batching collapses N blocks to one txn/one fsync. + /// `blocks` must be ascending and contiguous; the tip key is set from + /// the last element. No-op for an empty slice. + pub fn save_blocks(&self, blocks: &[Block]) -> StorageResult<()> { + let Some(last) = blocks.last() else { + return Ok(()); + }; + let height_bytes = serde_json::to_vec(&last.index)?; + let batch = self.mdbx.begin_write()?; + for block in blocks { + let key = format!("block:{}", block.index); + let block_json = serde_json::to_vec(block)?; + batch.put(TABLE_META, key.as_bytes(), &block_json)?; + batch.put( + TABLE_BLOCK_HASHES, + block.hash.as_bytes(), + &height_key(block.index), + )?; + } + batch.put(TABLE_META, b"height", &height_bytes)?; + batch.commit()?; + Ok(()) + } + pub fn load_block(&self, index: u64) -> StorageResult> { let key = format!("block:{}", index); self.get(&key) @@ -311,6 +344,40 @@ mod tests { let _ = std::fs::remove_dir_all(&path); } + #[test] + fn test_save_blocks_batched_range() { + let path = temp_path(); + let storage = ChainStorage::open(&path).unwrap(); + + // A contiguous run with distinct indices + hashes. + let blocks: Vec = (0..3u64) + .map(|i| { + let mut b = Block::genesis(); + b.index = i; + b.hash = format!("hash{i}"); + b + }) + .collect(); + storage.save_blocks(&blocks).unwrap(); + + // Every block:{N} key landed (the gap this fixes was missing keys). + for i in 0..3u64 { + assert_eq!(storage.load_block(i).unwrap().unwrap().index, i); + } + // Tip height key advanced to the last block in the batch. + assert_eq!(storage.load_height().unwrap(), 2); + // Hash→height reverse index landed too. + assert_eq!( + storage.load_block_by_hash("hash2").unwrap().unwrap().index, + 2 + ); + // Empty slice is a no-op (no panic, height untouched). + storage.save_blocks(&[]).unwrap(); + assert_eq!(storage.load_height().unwrap(), 2); + + let _ = std::fs::remove_dir_all(&path); + } + #[test] fn test_load_block_by_hash() { let path = temp_path();