From 8ef1e59989a0e2ac12ebb63e389dde0cb6d2296b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 8 Dec 2025 02:50:10 +0000 Subject: [PATCH 1/8] Initial plan From c3b75c708f5b39158ddec9fa51dbbc42266d6103 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 8 Dec 2025 02:55:50 +0000 Subject: [PATCH 2/8] Add ProgressiveMerkleHasher implementation with basic tests Co-authored-by: michaelsproul <4452260+michaelsproul@users.noreply.github.com> --- tree_hash/src/lib.rs | 2 + tree_hash/src/progressive_merkle_hasher.rs | 266 +++++++++++++++++++++ 2 files changed, 268 insertions(+) create mode 100644 tree_hash/src/progressive_merkle_hasher.rs diff --git a/tree_hash/src/lib.rs b/tree_hash/src/lib.rs index e5ac66f..72d4790 100644 --- a/tree_hash/src/lib.rs +++ b/tree_hash/src/lib.rs @@ -2,10 +2,12 @@ pub mod impls; mod merkle_hasher; mod merkleize_padded; mod merkleize_standard; +mod progressive_merkle_hasher; pub use merkle_hasher::{Error, MerkleHasher}; pub use merkleize_padded::merkleize_padded; pub use merkleize_standard::merkleize_standard; +pub use progressive_merkle_hasher::ProgressiveMerkleHasher; use ethereum_hashing::{hash_fixed, ZERO_HASHES, ZERO_HASHES_MAX_INDEX}; use smallvec::SmallVec; diff --git a/tree_hash/src/progressive_merkle_hasher.rs b/tree_hash/src/progressive_merkle_hasher.rs new file mode 100644 index 0000000..f1a19db --- /dev/null +++ b/tree_hash/src/progressive_merkle_hasher.rs @@ -0,0 +1,266 @@ +use crate::{get_zero_hash, merkle_root, Hash256, BYTES_PER_CHUNK}; +use ethereum_hashing::hash32_concat; + +#[derive(Clone, Debug, PartialEq)] +pub enum Error { + /// The maximum number of leaves has been exceeded. + MaximumLeavesExceeded { max_leaves: usize }, +} + +/// A progressive Merkle hasher that implements the semantics of `merkleize_progressive` as +/// defined in EIP-7916. +/// +/// The progressive merkle tree has a unique structure where: +/// - At each level, the right child is a binary merkle tree with a specific number of leaves +/// - The left child recursively contains more progressive structure +/// - The number of leaves in each right subtree grows by 4x at each level (1, 4, 16, 64, ...) +/// +/// # Example Tree Structure +/// +/// ```text +/// root +/// /\ +/// / \ +/// /\ 1: chunks[0 ..< 1] +/// / \ +/// /\ 4: chunks[1 ..< 5] +/// / \ +/// /\ 16: chunks[5 ..< 21] +/// / \ +/// 0 64: chunks[21 ..< 85] +/// ``` +/// +/// This structure allows efficient appending and proof generation for growing lists. +pub struct ProgressiveMerkleHasher { + /// All chunks that have been written to the hasher. + chunks: Vec<[u8; BYTES_PER_CHUNK]>, + /// Maximum number of leaves this hasher can accept. + max_leaves: usize, +} + +impl ProgressiveMerkleHasher { + /// Create a new progressive merkle hasher that can accept up to `max_leaves` leaves. + /// + /// # Panics + /// + /// Panics if `max_leaves == 0`. + pub fn with_leaves(max_leaves: usize) -> Self { + assert!(max_leaves > 0, "must have at least one leaf"); + Self { + chunks: Vec::new(), + max_leaves, + } + } + + /// Write bytes to the hasher. + /// + /// The bytes will be split into 32-byte chunks. If the final chunk is incomplete, + /// it will be padded with zeros. + /// + /// # Errors + /// + /// Returns an error if writing these bytes would exceed the maximum number of leaves. + pub fn write(&mut self, bytes: &[u8]) -> Result<(), Error> { + let num_new_leaves = bytes.len().div_ceil(BYTES_PER_CHUNK); + + if self.chunks.len() + num_new_leaves > self.max_leaves { + return Err(Error::MaximumLeavesExceeded { + max_leaves: self.max_leaves, + }); + } + + // Split bytes into 32-byte chunks + for chunk_bytes in bytes.chunks(BYTES_PER_CHUNK) { + let mut chunk = [0u8; BYTES_PER_CHUNK]; + chunk[..chunk_bytes.len()].copy_from_slice(chunk_bytes); + self.chunks.push(chunk); + } + + Ok(()) + } + + /// Finish the hasher and return the progressive merkle root. + /// + /// This implements the recursive merkleize_progressive algorithm: + /// - If no chunks: return zero hash + /// - Otherwise: hash(merkleize_progressive(left), merkleize(right)) + /// where right contains the first num_leaves chunks as a binary tree, + /// and left recursively contains the rest with num_leaves * 4. + pub fn finish(self) -> Result { + Ok(merkleize_progressive(&self.chunks, 1)) + } +} + +/// Recursively compute the progressive merkle root for the given chunks. +/// +/// # Arguments +/// +/// * `chunks` - The chunks to merkleize +/// * `num_leaves` - The number of leaves for the right (binary tree) subtree at this level +/// +/// # Algorithm +/// +/// Following the spec: +/// ```text +/// merkleize_progressive(chunks, num_leaves=1): Given ordered BYTES_PER_CHUNK-byte chunks: +/// The merkleization depends on the number of input chunks and is defined recursively: +/// If len(chunks) == 0: the root is a zero value, Bytes32(). +/// Otherwise: compute the root using hash(a, b) +/// a: Recursively merkleize chunks beyond num_leaves using +/// merkleize_progressive(chunks[num_leaves:], num_leaves * 4). +/// b: Merkleize the first up to num_leaves chunks as a binary tree using +/// merkleize(chunks[:num_leaves], num_leaves). +/// ``` +fn merkleize_progressive(chunks: &[[u8; BYTES_PER_CHUNK]], num_leaves: usize) -> Hash256 { + if chunks.is_empty() { + // Base case: no chunks, return zero hash + return Hash256::ZERO; + } + + // Split chunks into right (first num_leaves) and left (rest) + let right_chunks = &chunks[..chunks.len().min(num_leaves)]; + let left_chunks = &chunks[chunks.len().min(num_leaves)..]; + + // Compute right subtree: binary merkle tree with num_leaves leaves + let right_root = if right_chunks.is_empty() { + // If no chunks for right, use zero hash + Hash256::from_slice(get_zero_hash(compute_height(num_leaves))) + } else { + // Use merkle_root to compute binary tree root + let bytes: Vec = right_chunks.iter().flat_map(|c| c.iter().copied()).collect(); + merkle_root(&bytes, num_leaves) + }; + + // Compute left subtree: recursive progressive merkle tree with num_leaves * 4 + let left_root = merkleize_progressive(left_chunks, num_leaves * 4); + + // Combine left and right roots + Hash256::from_slice(&hash32_concat(left_root.as_slice(), right_root.as_slice())) +} + +/// Compute the height of a binary tree with the given number of leaves. +fn compute_height(num_leaves: usize) -> usize { + if num_leaves == 0 { + 0 + } else { + // Height is log2(next_power_of_two(num_leaves)) + let power_of_two = num_leaves.next_power_of_two(); + power_of_two.trailing_zeros() as usize + } +} + + + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_empty_tree() { + let hasher = ProgressiveMerkleHasher::with_leaves(1); + let root = hasher.finish().unwrap(); + assert_eq!(root, Hash256::ZERO); + } + + #[test] + fn test_single_chunk() { + let mut hasher = ProgressiveMerkleHasher::with_leaves(1); + let chunk = [1u8; BYTES_PER_CHUNK]; + hasher.write(&chunk).unwrap(); + let root = hasher.finish().unwrap(); + + // For a single chunk, the progressive tree should be: + // hash(merkleize_progressive([], 4), merkleize([chunk], 1)) + // = hash(zero_hash, chunk) + let zero_left = Hash256::ZERO; + let right = Hash256::from_slice(&chunk); + let expected = Hash256::from_slice(&hash32_concat( + zero_left.as_slice(), + right.as_slice() + )); + + assert_eq!(root, expected); + } + + #[test] + fn test_two_chunks() { + let mut hasher = ProgressiveMerkleHasher::with_leaves(5); + let chunk1 = [1u8; BYTES_PER_CHUNK]; + let chunk2 = [2u8; BYTES_PER_CHUNK]; + hasher.write(&chunk1).unwrap(); + hasher.write(&chunk2).unwrap(); + let root = hasher.finish().unwrap(); + + // First chunk goes to right (num_leaves=1) + // Second chunk goes to left recursive call (num_leaves=4) + + // Right: binary tree with 1 leaf = chunk1 + let right = Hash256::from_slice(&chunk1); + + // Left: progressive tree with chunk2 at num_leaves=4 + // At this level: hash(merkleize_progressive([], 16), merkleize([chunk2], 4)) + // = hash(zero_hash, merkle([chunk2], 4)) + let chunk2_padded = merkle_root(&chunk2, 4); + let zero_left_inner = Hash256::ZERO; + let left = Hash256::from_slice(&hash32_concat( + zero_left_inner.as_slice(), + chunk2_padded.as_slice() + )); + + let expected = Hash256::from_slice(&hash32_concat(left.as_slice(), right.as_slice())); + assert_eq!(root, expected); + } + + #[test] + fn test_max_leaves_exceeded() { + let mut hasher = ProgressiveMerkleHasher::with_leaves(2); + let chunk = [1u8; BYTES_PER_CHUNK]; + hasher.write(&chunk).unwrap(); + hasher.write(&chunk).unwrap(); + + // Third write should fail + let result = hasher.write(&chunk); + assert!(matches!(result, Err(Error::MaximumLeavesExceeded { .. }))); + } + + #[test] + fn test_partial_chunk() { + let mut hasher = ProgressiveMerkleHasher::with_leaves(1); + let partial = vec![1u8, 2u8, 3u8]; + hasher.write(&partial).unwrap(); + let root = hasher.finish().unwrap(); + + // Partial chunk should be padded with zeros + let mut chunk = [0u8; BYTES_PER_CHUNK]; + chunk[0] = 1; + chunk[1] = 2; + chunk[2] = 3; + + let zero_left = Hash256::ZERO; + let right = Hash256::from_slice(&chunk); + let expected = Hash256::from_slice(&hash32_concat( + zero_left.as_slice(), + right.as_slice() + )); + + assert_eq!(root, expected); + } + + #[test] + fn test_multiple_writes() { + let mut hasher = ProgressiveMerkleHasher::with_leaves(10); + hasher.write(&[1u8; 16]).unwrap(); + hasher.write(&[2u8; 16]).unwrap(); + hasher.write(&[3u8; 32]).unwrap(); + let root = hasher.finish().unwrap(); + + // Should handle multiple writes correctly + assert_ne!(root, Hash256::ZERO); + } + + #[test] + #[should_panic(expected = "must have at least one leaf")] + fn test_zero_leaves_panics() { + ProgressiveMerkleHasher::with_leaves(0); + } +} From 74331be814a1a24c886993d1f134485a13f22a61 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 8 Dec 2025 02:57:58 +0000 Subject: [PATCH 3/8] Add byte buffering and comprehensive tests for ProgressiveMerkleHasher Co-authored-by: michaelsproul <4452260+michaelsproul@users.noreply.github.com> --- tree_hash/src/progressive_merkle_hasher.rs | 166 +++++++++++++++++++-- 1 file changed, 153 insertions(+), 13 deletions(-) diff --git a/tree_hash/src/progressive_merkle_hasher.rs b/tree_hash/src/progressive_merkle_hasher.rs index f1a19db..592e17c 100644 --- a/tree_hash/src/progressive_merkle_hasher.rs +++ b/tree_hash/src/progressive_merkle_hasher.rs @@ -36,6 +36,8 @@ pub struct ProgressiveMerkleHasher { chunks: Vec<[u8; BYTES_PER_CHUNK]>, /// Maximum number of leaves this hasher can accept. max_leaves: usize, + /// Buffer for bytes that haven't been completed into a chunk yet. + buffer: Vec, } impl ProgressiveMerkleHasher { @@ -49,31 +51,34 @@ impl ProgressiveMerkleHasher { Self { chunks: Vec::new(), max_leaves, + buffer: Vec::new(), } } /// Write bytes to the hasher. /// - /// The bytes will be split into 32-byte chunks. If the final chunk is incomplete, - /// it will be padded with zeros. + /// The bytes will be split into 32-byte chunks. Bytes are buffered across multiple + /// write calls to ensure proper chunk boundaries. /// /// # Errors /// /// Returns an error if writing these bytes would exceed the maximum number of leaves. pub fn write(&mut self, bytes: &[u8]) -> Result<(), Error> { - let num_new_leaves = bytes.len().div_ceil(BYTES_PER_CHUNK); + // Add bytes to buffer + self.buffer.extend_from_slice(bytes); - if self.chunks.len() + num_new_leaves > self.max_leaves { - return Err(Error::MaximumLeavesExceeded { - max_leaves: self.max_leaves, - }); - } - - // Split bytes into 32-byte chunks - for chunk_bytes in bytes.chunks(BYTES_PER_CHUNK) { + // Process complete chunks from buffer + while self.buffer.len() >= BYTES_PER_CHUNK { + if self.chunks.len() >= self.max_leaves { + return Err(Error::MaximumLeavesExceeded { + max_leaves: self.max_leaves, + }); + } + let mut chunk = [0u8; BYTES_PER_CHUNK]; - chunk[..chunk_bytes.len()].copy_from_slice(chunk_bytes); + chunk.copy_from_slice(&self.buffer[..BYTES_PER_CHUNK]); self.chunks.push(chunk); + self.buffer.drain(..BYTES_PER_CHUNK); } Ok(()) @@ -86,7 +91,22 @@ impl ProgressiveMerkleHasher { /// - Otherwise: hash(merkleize_progressive(left), merkleize(right)) /// where right contains the first num_leaves chunks as a binary tree, /// and left recursively contains the rest with num_leaves * 4. - pub fn finish(self) -> Result { + /// + /// Any remaining bytes in the buffer will be padded to form a final chunk. + pub fn finish(mut self) -> Result { + // Process any remaining bytes in the buffer as a final chunk + if !self.buffer.is_empty() { + if self.chunks.len() >= self.max_leaves { + return Err(Error::MaximumLeavesExceeded { + max_leaves: self.max_leaves, + }); + } + + let mut chunk = [0u8; BYTES_PER_CHUNK]; + chunk[..self.buffer.len()].copy_from_slice(&self.buffer); + self.chunks.push(chunk); + } + Ok(merkleize_progressive(&self.chunks, 1)) } } @@ -263,4 +283,124 @@ mod tests { fn test_zero_leaves_panics() { ProgressiveMerkleHasher::with_leaves(0); } + + #[test] + fn test_five_chunks() { + // Test with 5 chunks as per the problem statement structure: + // chunks[0] goes to right at level 1 (1 leaf) + // chunks[1..5] go to left recursive call (4 leaves at level 2) + let mut hasher = ProgressiveMerkleHasher::with_leaves(5); + for i in 0..5 { + let mut chunk = [0u8; BYTES_PER_CHUNK]; + chunk[0] = i as u8; + hasher.write(&chunk).unwrap(); + } + let root = hasher.finish().unwrap(); + + // Manually compute expected root: + // Right: chunks[0] + let mut chunk0 = [0u8; BYTES_PER_CHUNK]; + chunk0[0] = 0; + let right = Hash256::from_slice(&chunk0); + + // Left: merkleize_progressive(chunks[1..5], 4) + // Which is: hash(merkleize_progressive([], 16), merkleize(chunks[1..5], 4)) + let chunks_1_to_4: Vec = (1..5) + .flat_map(|i| { + let mut chunk = [0u8; BYTES_PER_CHUNK]; + chunk[0] = i; + chunk + }) + .collect(); + let right_inner = merkle_root(&chunks_1_to_4, 4); + let left_inner = Hash256::ZERO; + let left = Hash256::from_slice(&hash32_concat( + left_inner.as_slice(), + right_inner.as_slice() + )); + + let expected = Hash256::from_slice(&hash32_concat(left.as_slice(), right.as_slice())); + assert_eq!(root, expected); + } + + #[test] + fn test_21_chunks() { + // Test with 21 chunks as per problem statement: + // chunks[0] goes to right at level 1 (1 leaf) + // chunks[1..5] go to right at level 2 (4 leaves) + // chunks[5..21] go to right at level 3 (16 leaves) + let mut hasher = ProgressiveMerkleHasher::with_leaves(21); + for i in 0..21 { + let mut chunk = [0u8; BYTES_PER_CHUNK]; + chunk[0] = i as u8; + hasher.write(&chunk).unwrap(); + } + let root = hasher.finish().unwrap(); + + // Root should not be zero + assert_ne!(root, Hash256::ZERO); + } + + #[test] + fn test_85_chunks() { + // Test with 85 chunks as per problem statement structure: + // chunks[0] at level 1 (1 leaf) + // chunks[1..5] at level 2 (4 leaves) + // chunks[5..21] at level 3 (16 leaves) + // chunks[21..85] at level 4 (64 leaves) + let mut hasher = ProgressiveMerkleHasher::with_leaves(85); + for i in 0..85 { + let mut chunk = [0u8; BYTES_PER_CHUNK]; + chunk[0] = (i % 256) as u8; + hasher.write(&chunk).unwrap(); + } + let root = hasher.finish().unwrap(); + + // Root should not be zero + assert_ne!(root, Hash256::ZERO); + } + + #[test] + fn test_consistency_with_manual_calculation() { + // Test that using ProgressiveMerkleHasher gives the same result as + // manually calling merkleize_progressive + let chunks: Vec<[u8; BYTES_PER_CHUNK]> = (0..10) + .map(|i| { + let mut chunk = [0u8; BYTES_PER_CHUNK]; + chunk[0] = i; + chunk + }) + .collect(); + + // Use ProgressiveMerkleHasher + let mut hasher = ProgressiveMerkleHasher::with_leaves(10); + for chunk in &chunks { + hasher.write(chunk).unwrap(); + } + let hasher_root = hasher.finish().unwrap(); + + // Manually call merkleize_progressive + let manual_root = merkleize_progressive(&chunks, 1); + + assert_eq!(hasher_root, manual_root); + } + + #[test] + fn test_byte_streaming() { + // Test that we can write bytes in various chunk sizes + let data = vec![42u8; BYTES_PER_CHUNK * 3 + 10]; + + // Write all at once + let mut hasher1 = ProgressiveMerkleHasher::with_leaves(10); + hasher1.write(&data).unwrap(); + let root1 = hasher1.finish().unwrap(); + + // Write in smaller chunks + let mut hasher2 = ProgressiveMerkleHasher::with_leaves(10); + hasher2.write(&data[0..50]).unwrap(); + hasher2.write(&data[50..]).unwrap(); + let root2 = hasher2.finish().unwrap(); + + assert_eq!(root1, root2); + } } From f5ecf94920c66142a8f6960085c58fc25a24b7f8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 8 Dec 2025 03:00:50 +0000 Subject: [PATCH 4/8] Export ProgressiveMerkleHasherError from lib.rs Co-authored-by: michaelsproul <4452260+michaelsproul@users.noreply.github.com> --- tree_hash/src/lib.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tree_hash/src/lib.rs b/tree_hash/src/lib.rs index 72d4790..996a5cc 100644 --- a/tree_hash/src/lib.rs +++ b/tree_hash/src/lib.rs @@ -7,7 +7,9 @@ mod progressive_merkle_hasher; pub use merkle_hasher::{Error, MerkleHasher}; pub use merkleize_padded::merkleize_padded; pub use merkleize_standard::merkleize_standard; -pub use progressive_merkle_hasher::ProgressiveMerkleHasher; +pub use progressive_merkle_hasher::{ + Error as ProgressiveMerkleHasherError, ProgressiveMerkleHasher, +}; use ethereum_hashing::{hash_fixed, ZERO_HASHES, ZERO_HASHES_MAX_INDEX}; use smallvec::SmallVec; From 3e3051213e36dbe92e777402acd5855dabaaca40 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 8 Dec 2025 03:04:53 +0000 Subject: [PATCH 5/8] Add clarifying comment about hash order in progressive merkleization Co-authored-by: michaelsproul <4452260+michaelsproul@users.noreply.github.com> --- tree_hash/src/progressive_merkle_hasher.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tree_hash/src/progressive_merkle_hasher.rs b/tree_hash/src/progressive_merkle_hasher.rs index 592e17c..d1184b9 100644 --- a/tree_hash/src/progressive_merkle_hasher.rs +++ b/tree_hash/src/progressive_merkle_hasher.rs @@ -154,7 +154,8 @@ fn merkleize_progressive(chunks: &[[u8; BYTES_PER_CHUNK]], num_leaves: usize) -> // Compute left subtree: recursive progressive merkle tree with num_leaves * 4 let left_root = merkleize_progressive(left_chunks, num_leaves * 4); - // Combine left and right roots + // Combine left and right roots according to spec: hash(a, b) where + // a = left subtree (recursive progressive), b = right subtree (binary tree) Hash256::from_slice(&hash32_concat(left_root.as_slice(), right_root.as_slice())) } From 07466523c9be0c9478a76e08d1571e28cf16ed3a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 8 Dec 2025 04:40:08 +0000 Subject: [PATCH 6/8] Refactor ProgressiveMerkleHasher for efficiency - hash chunks as they stream in Co-authored-by: michaelsproul <4452260+michaelsproul@users.noreply.github.com> --- tree_hash/src/progressive_merkle_hasher.rs | 205 ++++++++++++--------- 1 file changed, 118 insertions(+), 87 deletions(-) diff --git a/tree_hash/src/progressive_merkle_hasher.rs b/tree_hash/src/progressive_merkle_hasher.rs index d1184b9..d838df5 100644 --- a/tree_hash/src/progressive_merkle_hasher.rs +++ b/tree_hash/src/progressive_merkle_hasher.rs @@ -1,4 +1,4 @@ -use crate::{get_zero_hash, merkle_root, Hash256, BYTES_PER_CHUNK}; +use crate::{merkle_root, Hash256, BYTES_PER_CHUNK}; use ethereum_hashing::hash32_concat; #[derive(Clone, Debug, PartialEq)] @@ -31,13 +31,27 @@ pub enum Error { /// ``` /// /// This structure allows efficient appending and proof generation for growing lists. +/// +/// # Efficiency +/// +/// This implementation hashes chunks as they are streamed in, storing only the minimum +/// necessary state (completed subtree roots). When a level is filled, its binary merkle +/// root is computed and stored, avoiding the need to keep all chunks in memory. pub struct ProgressiveMerkleHasher { - /// All chunks that have been written to the hasher. - chunks: Vec<[u8; BYTES_PER_CHUNK]>, - /// Maximum number of leaves this hasher can accept. - max_leaves: usize, + /// Completed subtree roots at each level, stored in reverse order. + /// Index 0 = most recent (smallest) completed level, higher indices = larger levels. + /// Each level i contains 4^i leaves. + completed_roots: Vec, + /// Chunks currently being accumulated for the next level to fill. + current_chunks: Vec<[u8; BYTES_PER_CHUNK]>, + /// The number of leaves expected at the current level (1, 4, 16, 64, ...). + current_level_size: usize, /// Buffer for bytes that haven't been completed into a chunk yet. buffer: Vec, + /// Maximum number of leaves this hasher can accept. + max_leaves: usize, + /// Total number of chunks written so far. + total_chunks: usize, } impl ProgressiveMerkleHasher { @@ -49,16 +63,20 @@ impl ProgressiveMerkleHasher { pub fn with_leaves(max_leaves: usize) -> Self { assert!(max_leaves > 0, "must have at least one leaf"); Self { - chunks: Vec::new(), - max_leaves, + completed_roots: Vec::new(), + current_chunks: Vec::new(), + current_level_size: 1, buffer: Vec::new(), + max_leaves, + total_chunks: 0, } } /// Write bytes to the hasher. /// /// The bytes will be split into 32-byte chunks. Bytes are buffered across multiple - /// write calls to ensure proper chunk boundaries. + /// write calls to ensure proper chunk boundaries. Complete subtrees are hashed + /// immediately as chunks are written. /// /// # Errors /// @@ -69,7 +87,7 @@ impl ProgressiveMerkleHasher { // Process complete chunks from buffer while self.buffer.len() >= BYTES_PER_CHUNK { - if self.chunks.len() >= self.max_leaves { + if self.total_chunks >= self.max_leaves { return Err(Error::MaximumLeavesExceeded { max_leaves: self.max_leaves, }); @@ -77,26 +95,46 @@ impl ProgressiveMerkleHasher { let mut chunk = [0u8; BYTES_PER_CHUNK]; chunk.copy_from_slice(&self.buffer[..BYTES_PER_CHUNK]); - self.chunks.push(chunk); self.buffer.drain(..BYTES_PER_CHUNK); + + self.process_chunk(chunk)?; } Ok(()) } + + /// Process a single chunk by adding it to the current level and completing the level if full. + fn process_chunk(&mut self, chunk: [u8; BYTES_PER_CHUNK]) -> Result<(), Error> { + self.current_chunks.push(chunk); + self.total_chunks += 1; + + // Check if current level is complete + if self.current_chunks.len() == self.current_level_size { + // Compute the merkle root for this level + let bytes: Vec = self.current_chunks.iter().flat_map(|c| c.iter().copied()).collect(); + let root = merkle_root(&bytes, self.current_level_size); + + // Store this completed root + self.completed_roots.push(root); + + // Move to next level (4x larger) + self.current_chunks.clear(); + self.current_level_size *= 4; + } + + Ok(()) + } /// Finish the hasher and return the progressive merkle root. /// - /// This implements the recursive merkleize_progressive algorithm: - /// - If no chunks: return zero hash - /// - Otherwise: hash(merkleize_progressive(left), merkleize(right)) - /// where right contains the first num_leaves chunks as a binary tree, - /// and left recursively contains the rest with num_leaves * 4. + /// This completes any partial level and combines all completed subtree roots + /// according to the progressive merkleization algorithm. /// /// Any remaining bytes in the buffer will be padded to form a final chunk. pub fn finish(mut self) -> Result { // Process any remaining bytes in the buffer as a final chunk if !self.buffer.is_empty() { - if self.chunks.len() >= self.max_leaves { + if self.total_chunks >= self.max_leaves { return Err(Error::MaximumLeavesExceeded { max_leaves: self.max_leaves, }); @@ -104,74 +142,57 @@ impl ProgressiveMerkleHasher { let mut chunk = [0u8; BYTES_PER_CHUNK]; chunk[..self.buffer.len()].copy_from_slice(&self.buffer); - self.chunks.push(chunk); + self.process_chunk(chunk)?; } - Ok(merkleize_progressive(&self.chunks, 1)) - } -} - -/// Recursively compute the progressive merkle root for the given chunks. -/// -/// # Arguments -/// -/// * `chunks` - The chunks to merkleize -/// * `num_leaves` - The number of leaves for the right (binary tree) subtree at this level -/// -/// # Algorithm -/// -/// Following the spec: -/// ```text -/// merkleize_progressive(chunks, num_leaves=1): Given ordered BYTES_PER_CHUNK-byte chunks: -/// The merkleization depends on the number of input chunks and is defined recursively: -/// If len(chunks) == 0: the root is a zero value, Bytes32(). -/// Otherwise: compute the root using hash(a, b) -/// a: Recursively merkleize chunks beyond num_leaves using -/// merkleize_progressive(chunks[num_leaves:], num_leaves * 4). -/// b: Merkleize the first up to num_leaves chunks as a binary tree using -/// merkleize(chunks[:num_leaves], num_leaves). -/// ``` -fn merkleize_progressive(chunks: &[[u8; BYTES_PER_CHUNK]], num_leaves: usize) -> Hash256 { - if chunks.is_empty() { - // Base case: no chunks, return zero hash - return Hash256::ZERO; + // If we have no chunks at all, return zero hash + if self.total_chunks == 0 { + return Ok(Hash256::ZERO); + } + + // If there are chunks in current_chunks (partial level), compute their root + let current_root = if !self.current_chunks.is_empty() { + let bytes: Vec = self.current_chunks.iter().flat_map(|c| c.iter().copied()).collect(); + Some(merkle_root(&bytes, self.current_level_size)) + } else { + None + }; + + // Build the progressive tree from completed roots and current root + // completed_roots are in order: [smallest level, ..., largest level] + // We need to build from right to left in the tree + Ok(self.build_progressive_root(current_root)) } - - // Split chunks into right (first num_leaves) and left (rest) - let right_chunks = &chunks[..chunks.len().min(num_leaves)]; - let left_chunks = &chunks[chunks.len().min(num_leaves)..]; - - // Compute right subtree: binary merkle tree with num_leaves leaves - let right_root = if right_chunks.is_empty() { - // If no chunks for right, use zero hash - Hash256::from_slice(get_zero_hash(compute_height(num_leaves))) - } else { - // Use merkle_root to compute binary tree root - let bytes: Vec = right_chunks.iter().flat_map(|c| c.iter().copied()).collect(); - merkle_root(&bytes, num_leaves) - }; - - // Compute left subtree: recursive progressive merkle tree with num_leaves * 4 - let left_root = merkleize_progressive(left_chunks, num_leaves * 4); - - // Combine left and right roots according to spec: hash(a, b) where - // a = left subtree (recursive progressive), b = right subtree (binary tree) - Hash256::from_slice(&hash32_concat(left_root.as_slice(), right_root.as_slice())) -} - -/// Compute the height of a binary tree with the given number of leaves. -fn compute_height(num_leaves: usize) -> usize { - if num_leaves == 0 { - 0 - } else { - // Height is log2(next_power_of_two(num_leaves)) - let power_of_two = num_leaves.next_power_of_two(); - power_of_two.trailing_zeros() as usize + + /// Build the final progressive merkle root by combining completed subtree roots. + /// + /// The progressive tree structure: at each node, hash(left=next_levels, right=this_level). + /// Build from the current (largest/partial) level backwards to the first level. + fn build_progressive_root(&self, current_root: Option) -> Hash256 { + // Start from the leftmost (largest) level + // If there's a current partial level, it needs to be wrapped: hash(ZERO, current_root) + // because the spec applies the structure at every level + let mut result = if let Some(curr) = current_root { + Hash256::from_slice(&hash32_concat(Hash256::ZERO.as_slice(), curr.as_slice())) + } else { + Hash256::ZERO + }; + + // Process completed roots from largest to smallest (reverse order) + // At each step: result = hash(result, completed_root) because + // result is the accumulated left subtree, completed_root is the right subtree at this level + for &completed_root in self.completed_roots.iter().rev() { + result = Hash256::from_slice(&hash32_concat( + result.as_slice(), + completed_root.as_slice(), + )); + } + + result } } - #[cfg(test)] mod tests { use super::*; @@ -362,9 +383,8 @@ mod tests { } #[test] - fn test_consistency_with_manual_calculation() { - // Test that using ProgressiveMerkleHasher gives the same result as - // manually calling merkleize_progressive + fn test_consistency_across_write_patterns() { + // Test that different write patterns produce the same result let chunks: Vec<[u8; BYTES_PER_CHUNK]> = (0..10) .map(|i| { let mut chunk = [0u8; BYTES_PER_CHUNK]; @@ -373,17 +393,28 @@ mod tests { }) .collect(); - // Use ProgressiveMerkleHasher - let mut hasher = ProgressiveMerkleHasher::with_leaves(10); + // Write all chunks individually + let mut hasher1 = ProgressiveMerkleHasher::with_leaves(10); for chunk in &chunks { - hasher.write(chunk).unwrap(); + hasher1.write(chunk).unwrap(); } - let hasher_root = hasher.finish().unwrap(); + let root1 = hasher1.finish().unwrap(); - // Manually call merkleize_progressive - let manual_root = merkleize_progressive(&chunks, 1); + // Write all chunks at once + let mut hasher2 = ProgressiveMerkleHasher::with_leaves(10); + let all_bytes: Vec = chunks.iter().flat_map(|c| c.iter().copied()).collect(); + hasher2.write(&all_bytes).unwrap(); + let root2 = hasher2.finish().unwrap(); - assert_eq!(hasher_root, manual_root); + // Write in groups + let mut hasher3 = ProgressiveMerkleHasher::with_leaves(10); + hasher3.write(&all_bytes[..3 * BYTES_PER_CHUNK]).unwrap(); + hasher3.write(&all_bytes[3 * BYTES_PER_CHUNK..7 * BYTES_PER_CHUNK]).unwrap(); + hasher3.write(&all_bytes[7 * BYTES_PER_CHUNK..]).unwrap(); + let root3 = hasher3.finish().unwrap(); + + assert_eq!(root1, root2); + assert_eq!(root1, root3); } #[test] From 16fc4a0368c385f1e116031d589529a775371449 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 8 Dec 2025 04:45:39 +0000 Subject: [PATCH 7/8] Address code review feedback - improve documentation and extract helper method Co-authored-by: michaelsproul <4452260+michaelsproul@users.noreply.github.com> --- tree_hash/src/progressive_merkle_hasher.rs | 34 +++++++++++++--------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/tree_hash/src/progressive_merkle_hasher.rs b/tree_hash/src/progressive_merkle_hasher.rs index d838df5..5cbb1e6 100644 --- a/tree_hash/src/progressive_merkle_hasher.rs +++ b/tree_hash/src/progressive_merkle_hasher.rs @@ -38,9 +38,9 @@ pub enum Error { /// necessary state (completed subtree roots). When a level is filled, its binary merkle /// root is computed and stored, avoiding the need to keep all chunks in memory. pub struct ProgressiveMerkleHasher { - /// Completed subtree roots at each level, stored in reverse order. - /// Index 0 = most recent (smallest) completed level, higher indices = larger levels. - /// Each level i contains 4^i leaves. + /// Completed subtree roots at each level, stored in order of completion. + /// Index 0 = first completed level (1 leaf), index 1 = second level (4 leaves), etc. + /// Level i contains 4^i leaves. completed_roots: Vec, /// Chunks currently being accumulated for the next level to fill. current_chunks: Vec<[u8; BYTES_PER_CHUNK]>, @@ -111,8 +111,7 @@ impl ProgressiveMerkleHasher { // Check if current level is complete if self.current_chunks.len() == self.current_level_size { // Compute the merkle root for this level - let bytes: Vec = self.current_chunks.iter().flat_map(|c| c.iter().copied()).collect(); - let root = merkle_root(&bytes, self.current_level_size); + let root = Self::compute_level_root(&self.current_chunks, self.current_level_size); // Store this completed root self.completed_roots.push(root); @@ -124,6 +123,12 @@ impl ProgressiveMerkleHasher { Ok(()) } + + /// Helper to compute the merkle root for a level's chunks. + fn compute_level_root(chunks: &[[u8; BYTES_PER_CHUNK]], num_leaves: usize) -> Hash256 { + let bytes: Vec = chunks.iter().flat_map(|c| c.iter().copied()).collect(); + merkle_root(&bytes, num_leaves) + } /// Finish the hasher and return the progressive merkle root. /// @@ -152,8 +157,7 @@ impl ProgressiveMerkleHasher { // If there are chunks in current_chunks (partial level), compute their root let current_root = if !self.current_chunks.is_empty() { - let bytes: Vec = self.current_chunks.iter().flat_map(|c| c.iter().copied()).collect(); - Some(merkle_root(&bytes, self.current_level_size)) + Some(Self::compute_level_root(&self.current_chunks, self.current_level_size)) } else { None }; @@ -166,12 +170,13 @@ impl ProgressiveMerkleHasher { /// Build the final progressive merkle root by combining completed subtree roots. /// - /// The progressive tree structure: at each node, hash(left=next_levels, right=this_level). - /// Build from the current (largest/partial) level backwards to the first level. + /// The progressive tree structure: at each node, hash(left=deeper_levels, right=this_level). + /// This builds the tree from the largest (leftmost) level backwards to the smallest (rightmost). fn build_progressive_root(&self, current_root: Option) -> Hash256 { - // Start from the leftmost (largest) level - // If there's a current partial level, it needs to be wrapped: hash(ZERO, current_root) - // because the spec applies the structure at every level + // Start from the leftmost (largest/deepest) level + // Per EIP-7916 spec, even partial levels follow the progressive structure: + // merkleize_progressive(chunks, n) = hash(merkleize_progressive(chunks[n:], n*4), merkleize(chunks[:n], n)) + // So a partial level with k chunks becomes: hash(ZERO (no further chunks), merkleize(chunks, n)) let mut result = if let Some(curr) = current_root { Hash256::from_slice(&hash32_concat(Hash256::ZERO.as_slice(), curr.as_slice())) } else { @@ -179,8 +184,9 @@ impl ProgressiveMerkleHasher { }; // Process completed roots from largest to smallest (reverse order) - // At each step: result = hash(result, completed_root) because - // result is the accumulated left subtree, completed_root is the right subtree at this level + // At each step: result = hash(result, completed_root) + // - result accumulates the left subtree (deeper/larger levels) + // - completed_root is the right subtree at this level for &completed_root in self.completed_roots.iter().rev() { result = Hash256::from_slice(&hash32_concat( result.as_slice(), From 733344a8e0f975713da23f4905f7dc1ff8905646 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 9 Dec 2025 02:35:35 +0000 Subject: [PATCH 8/8] Refactor ProgressiveMerkleHasher to use MerkleHasher internally for binary tree hashing Co-authored-by: michaelsproul <4452260+michaelsproul@users.noreply.github.com> --- tree_hash/src/progressive_merkle_hasher.rs | 60 ++++++++++++++-------- 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/tree_hash/src/progressive_merkle_hasher.rs b/tree_hash/src/progressive_merkle_hasher.rs index 5cbb1e6..67d93ab 100644 --- a/tree_hash/src/progressive_merkle_hasher.rs +++ b/tree_hash/src/progressive_merkle_hasher.rs @@ -1,4 +1,4 @@ -use crate::{merkle_root, Hash256, BYTES_PER_CHUNK}; +use crate::{Hash256, MerkleHasher, BYTES_PER_CHUNK}; use ethereum_hashing::hash32_concat; #[derive(Clone, Debug, PartialEq)] @@ -42,10 +42,12 @@ pub struct ProgressiveMerkleHasher { /// Index 0 = first completed level (1 leaf), index 1 = second level (4 leaves), etc. /// Level i contains 4^i leaves. completed_roots: Vec, - /// Chunks currently being accumulated for the next level to fill. - current_chunks: Vec<[u8; BYTES_PER_CHUNK]>, + /// MerkleHasher for computing the current level's binary tree root. + current_hasher: MerkleHasher, /// The number of leaves expected at the current level (1, 4, 16, 64, ...). current_level_size: usize, + /// Number of chunks written to the current hasher. + current_level_chunks: usize, /// Buffer for bytes that haven't been completed into a chunk yet. buffer: Vec, /// Maximum number of leaves this hasher can accept. @@ -64,8 +66,9 @@ impl ProgressiveMerkleHasher { assert!(max_leaves > 0, "must have at least one leaf"); Self { completed_roots: Vec::new(), - current_chunks: Vec::new(), + current_hasher: MerkleHasher::with_leaves(1), current_level_size: 1, + current_level_chunks: 0, buffer: Vec::new(), max_leaves, total_chunks: 0, @@ -105,30 +108,39 @@ impl ProgressiveMerkleHasher { /// Process a single chunk by adding it to the current level and completing the level if full. fn process_chunk(&mut self, chunk: [u8; BYTES_PER_CHUNK]) -> Result<(), Error> { - self.current_chunks.push(chunk); + // Write the chunk to the current MerkleHasher + self.current_hasher.write(&chunk).map_err(|_| Error::MaximumLeavesExceeded { + max_leaves: self.max_leaves, + })?; + + self.current_level_chunks += 1; self.total_chunks += 1; // Check if current level is complete - if self.current_chunks.len() == self.current_level_size { - // Compute the merkle root for this level - let root = Self::compute_level_root(&self.current_chunks, self.current_level_size); + if self.current_level_chunks == self.current_level_size { + // Move to next level (4x larger) + let next_level_size = self.current_level_size * 4; + + // Replace the current hasher with a new one for the next level + let completed_hasher = std::mem::replace( + &mut self.current_hasher, + MerkleHasher::with_leaves(next_level_size) + ); + + // Finish the completed hasher to get the root + let root = completed_hasher.finish().map_err(|_| Error::MaximumLeavesExceeded { + max_leaves: self.max_leaves, + })?; // Store this completed root self.completed_roots.push(root); - // Move to next level (4x larger) - self.current_chunks.clear(); - self.current_level_size *= 4; + self.current_level_size = next_level_size; + self.current_level_chunks = 0; } Ok(()) } - - /// Helper to compute the merkle root for a level's chunks. - fn compute_level_root(chunks: &[[u8; BYTES_PER_CHUNK]], num_leaves: usize) -> Hash256 { - let bytes: Vec = chunks.iter().flat_map(|c| c.iter().copied()).collect(); - merkle_root(&bytes, num_leaves) - } /// Finish the hasher and return the progressive merkle root. /// @@ -155,9 +167,16 @@ impl ProgressiveMerkleHasher { return Ok(Hash256::ZERO); } - // If there are chunks in current_chunks (partial level), compute their root - let current_root = if !self.current_chunks.is_empty() { - Some(Self::compute_level_root(&self.current_chunks, self.current_level_size)) + // If there are chunks in current level (partial level), compute their root + let current_root = if self.current_level_chunks > 0 { + // Create a temporary hasher to replace the current one (since finish() takes ownership) + let temp_hasher = std::mem::replace( + &mut self.current_hasher, + MerkleHasher::with_leaves(1) // dummy value, won't be used + ); + Some(temp_hasher.finish().map_err(|_| Error::MaximumLeavesExceeded { + max_leaves: self.max_leaves, + })?) } else { None }; @@ -202,6 +221,7 @@ impl ProgressiveMerkleHasher { #[cfg(test)] mod tests { use super::*; + use crate::merkle_root; #[test] fn test_empty_tree() {