paradigmxyz · shekhirin · Feb 14, 2025 · Feb 14, 2025 · Feb 14, 2025 · Feb 14, 2025
@@ -54,10 +54,11 @@ metrics.workspace = true
 reth-metrics = { workspace = true, features = ["common"] }
 
 # misc
-schnellru.workspace = true
+derive_more.workspace = true
+itertools.workspace = true
 rayon.workspace = true
+schnellru.workspace = true
 tracing.workspace = true
-derive_more.workspace = true
 
 # optional deps for test-utils
 reth-prune-types = { workspace = true, optional = true }

@@ -1,7 +1,12 @@
 //! State root task related functionality.
 
-use alloy_primitives::map::HashSet;
+use alloy_primitives::{
+    keccak256,
+    map::{B256Map, B256Set, HashSet},
+    B256,
+};
 use derive_more::derive::Deref;
+use itertools::Itertools;
 use metrics::Histogram;
 use rayon::iter::{ParallelBridge, ParallelIterator};
 use reth_errors::{ProviderError, ProviderResult};
@@ -28,9 +33,8 @@ use reth_trie_sparse::{
     errors::{SparseStateTrieResult, SparseTrieErrorKind},
     SparseStateTrie,
 };
-use revm_primitives::{keccak256, B256};
 use std::{
-    collections::{BTreeMap, VecDeque},
+    collections::{hash_map::Entry, BTreeMap, VecDeque},
     sync::{
         mpsc::{self, channel, Receiver, Sender},
         Arc,
@@ -42,6 +46,11 @@ use tracing::{debug, error, trace, trace_span};
 /// The level below which the sparse trie hashes are calculated in [`update_sparse_trie`].
 const SPARSE_TRIE_INCREMENTAL_LEVEL: usize = 2;
 
+/// Maximum number of account targets in a multiproof.
+const MULTIPROOF_ACCOUNTS_CHUNK_SIZE: usize = 5;
+/// Maximum number of storage slots targets per account in a multiproof.
+const MULTIPROOF_STORAGES_CHUNK_SIZE: usize = 5;
+
 /// Determines the size of the rayon thread pool to be used in [`StateRootTask`].
 ///
 /// The value is determined as `max(NUM_THREADS - 2, 3)`:
@@ -620,14 +629,16 @@ where
         let proof_targets = self.get_prefetch_proof_targets(targets);
         extend_multi_proof_targets_ref(&mut self.fetched_proof_targets, &proof_targets);
 
-        self.multiproof_manager.spawn_or_queue(MultiproofInput {
-            config: self.config.clone(),
-            source: None,
-            hashed_state_update: Default::default(),
-            proof_targets,
-            proof_sequence_number: self.proof_sequencer.next_sequence(),
-            state_root_message_sender: self.tx.clone(),
-        });
+        for chunk in ChunkedProofTargets::new(proof_targets).flatten() {
+            self.multiproof_manager.spawn_or_queue(MultiproofInput {
+                config: self.config.clone(),
+                source: None,
+                hashed_state_update: HashedPostState::default(),
+                proof_targets: chunk,
+                proof_sequence_number: self.proof_sequencer.next_sequence(),
+                state_root_message_sender: self.tx.clone(),
+            });
+        }
     }
 
     /// Calls `get_proof_targets` with existing proof targets for prefetching.
@@ -680,27 +691,68 @@ where
         targets
     }
 
-    /// Handles state updates.
+    /// Handles state update.
     ///
-    /// Returns proof targets derived from the state update.
-    fn on_state_update(
-        &mut self,
-        source: StateChangeSource,
-        update: EvmState,
-        proof_sequence_number: u64,
-    ) {
-        let hashed_state_update = evm_state_to_hashed_post_state(update);
-        let proof_targets = get_proof_targets(&hashed_state_update, &self.fetched_proof_targets);
+    /// Chunks into multiple state updates, so that each has at most
+    /// `MULTIPROOF_ACCOUNTS_CHUNK_SIZE` accounts and `MULTIPROOF_STORAGES_CHUNK_SIZE` storage
+    /// slots per account.
+    ///
+    /// After chunking, [`MultiproofManager::spawn_or_queue`] is called for each state update.
+    ///
+    /// Returns number of new updates generated by one state update.
+    fn on_state_update(&mut self, source: StateChangeSource, update: EvmState) -> u64 {
+        let mut state_update = evm_state_to_hashed_post_state(update);
+        let proof_targets = get_proof_targets(&state_update, &self.fetched_proof_targets);
         extend_multi_proof_targets_ref(&mut self.fetched_proof_targets, &proof_targets);
 
-        self.multiproof_manager.spawn_or_queue(MultiproofInput {
-            config: self.config.clone(),
-            source: Some(source),
-            hashed_state_update,
-            proof_targets,
-            proof_sequence_number,
-            state_root_message_sender: self.tx.clone(),
-        });
+        let mut total_updates = 0;
+
+        for chunk in ChunkedProofTargets::new(proof_targets).flatten() {
+            total_updates += 1;
+
+            let mut accounts = B256Map::with_capacity_and_hasher(chunk.len(), Default::default());
+            let mut storages = B256Map::with_capacity_and_hasher(chunk.len(), Default::default());
+
+            for (&address, storage_slots) in &chunk {
+                if let Some(account) = state_update.accounts.remove(&address) {
+                    accounts.insert(address, account);
+                }
+
+                if !storage_slots.is_empty() {
+                    let state_storage = state_update.storages.entry(address);
+                    let mut hashed_storage = HashedStorage::default();
+                    match state_storage {
+                        Entry::Occupied(mut entry) => {
+                            for storage_slot in storage_slots {
+                                let value = entry
+                                    .get_mut()
+                                    .storage
+                                    .remove(storage_slot)
+                                    .expect("storage slot should be present");
+                                hashed_storage.storage.insert(*storage_slot, value);
+                            }
+
+                            if entry.get_mut().storage.is_empty() {
+                                entry.remove();
+                            }
+                        }
+                        Entry::Vacant(_) => unreachable!(),
+                    }
+                    storages.insert(address, hashed_storage);
+                }
+            }
+
+            self.multiproof_manager.spawn_or_queue(MultiproofInput {
+                config: self.config.clone(),
+                source: Some(source),
+                hashed_state_update: HashedPostState { accounts, storages },
+                proof_targets: chunk,
+                proof_sequence_number: self.proof_sequencer.next_sequence(),
+                state_root_message_sender: self.tx.clone(),
+            });
+        }
+
+        total_updates as u64
     }
 
     /// Handler for new proof calculated, aggregates all the existing sequential proofs.
@@ -801,16 +853,16 @@ where
                         }
                         last_update_time = Some(Instant::now());
 
-                        updates_received += 1;
+                        let update_size = update.len();
+                        let new_updates = self.on_state_update(source, update);
+                        updates_received += new_updates;
                         debug!(
                             target: "engine::root",
-                            ?source,
-                            len = update.len(),
+                            update_size,
+                            new_updates,
                             total_updates = updates_received,
                             "Received new state update"
                         );
-                        let next_sequence = self.proof_sequencer.next_sequence();
-                        self.on_state_update(source, update, next_sequence);
                     }
                     StateRootMessage::FinishedStateUpdates => {
                         trace!(target: "engine::root", "processing StateRootMessage::FinishedStateUpdates");
@@ -1103,6 +1155,52 @@ fn get_proof_targets(
     targets
 }
 
+/// Iterator over proof targets chunks.
+///
+/// Each chunk will have at most [`MULTIPROOF_ACCOUNTS_CHUNK_SIZE`] accounts and
+/// [`MULTIPROOF_STORAGES_CHUNK_SIZE`] storage slots per account.
+///
+/// This iterator will yield items of type [`Vec<B256Map<B256Set>>`], with each mapping having a
+/// maximum length of [`MULTIPROOF_ACCOUNTS_CHUNK_SIZE`], and each mapping value having a maximum
+/// length of [`MULTIPROOF_STORAGES_CHUNK_SIZE`].
+struct ChunkedProofTargets {
+    proof_targets: MultiProofTargets,
+}
+
+impl ChunkedProofTargets {
+    fn new(proof_targets: MultiProofTargets) -> Self {
+        Self { proof_targets }
+    }
+}
+
+impl Iterator for ChunkedProofTargets {
+    type Item = Vec<B256Map<B256Set>>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.proof_targets.is_empty() {
+            return None;
+        }
+
+        let mut chunks = vec![B256Map::<B256Set>::default(); 1];
+
+        let accounts_chunk: B256Map<B256Set> =
+            self.proof_targets.drain().take(MULTIPROOF_ACCOUNTS_CHUNK_SIZE).collect();
+
+        for (address, storage_slots) in accounts_chunk {
+            let storage_chunks = storage_slots.into_iter().chunks(MULTIPROOF_STORAGES_CHUNK_SIZE);
+
+            for (i, chunk) in storage_chunks.into_iter().enumerate() {
+                if i >= chunks.len() {
+                    chunks.push(B256Map::default());
+                }
+                chunks[i].entry(address).or_default().extend(chunk);
+            }
+        }
+
+        Some(chunks)
+    }
+}
+
 /// Calculate multiproof for the targets.
 #[inline]
 fn calculate_multiproof<Factory>(