From 6e6f9383d53ed6ba4457267abad869b8ed9382fe Mon Sep 17 00:00:00 2001 From: Richard Pringle Date: Thu, 19 Oct 2023 13:26:33 -0400 Subject: [PATCH 1/7] Add path to branch-node Doesn't actually get used yet --- firewood/src/merkle.rs | 180 ++++++++++++++++++++-------- firewood/src/merkle/node.rs | 30 +++-- firewood/src/merkle/partial_path.rs | 7 ++ 3 files changed, 158 insertions(+), 59 deletions(-) diff --git a/firewood/src/merkle.rs b/firewood/src/merkle.rs index 4f2f77c71..ca210b3b6 100644 --- a/firewood/src/merkle.rs +++ b/firewood/src/merkle.rs @@ -87,6 +87,7 @@ impl + Send + Sync> Merkle { self.store .put_item( Node::branch(BranchNode { + path: vec![].into(), children: [None; NBRANCH], value: None, children_encoded: Default::default(), @@ -168,17 +169,18 @@ impl + Send + Sync> Merkle { Ok(()) } + // TODO: replace `split` with a `split_at` function. Handle the logic for matching paths in `insert` instead. #[allow(clippy::too_many_arguments)] - fn split( - &self, - mut node_to_split: ObjRef, - parents: &mut [(ObjRef, u8)], + fn split<'a>( + &'a self, + mut node_to_split: ObjRef<'a>, + parents: &mut [(ObjRef<'a>, u8)], insert_path: &[u8], n_path: Vec, n_value: Option, val: Vec, deleted: &mut Vec, - ) -> Result>, MerkleError> { + ) -> Result, Vec)>, MerkleError> { let node_to_split_address = node_to_split.as_ptr(); let split_index = insert_path .iter() @@ -192,7 +194,7 @@ impl + Send + Sync> Merkle { node_to_split.write(|node| { // TODO: handle unwrap better - let path = node.inner.path_mut().unwrap(); + let path = node.inner.path_mut(); *path = PartialPath(new_split_node_path.to_vec()); @@ -218,6 +220,7 @@ impl + Send + Sync> Merkle { chd[n_path[idx] as usize] = Some(address); let new_branch = Node::branch(BranchNode { + path: PartialPath(matching_path[..idx].to_vec()), children: chd, value: None, children_encoded: Default::default(), @@ -225,22 +228,13 @@ impl + Send + Sync> Merkle { let new_branch_address = self.put_node(new_branch)?.as_ptr(); - if idx > 0 { - self.put_node(Node::from(NodeType::Extension(ExtNode { - path: PartialPath(matching_path[..idx].to_vec()), - child: new_branch_address, - child_encoded: None, - })))? - .as_ptr() - } else { - new_branch_address - } + new_branch_address } else { // paths do not diverge let (leaf_address, prefix, idx, value) = match (insert_path.len().cmp(&n_path.len()), n_value) { // no node-value means this is an extension node and we can therefore continue walking the tree - (Ordering::Greater, None) => return Ok(Some(val)), + (Ordering::Greater, None) => return Ok(Some((node_to_split, val))), // if the paths are equal, we overwrite the data (Ordering::Equal, _) => { @@ -280,7 +274,9 @@ impl + Send + Sync> Merkle { result = Err(e); } } - NodeType::Branch(_) => unreachable!(), + NodeType::Branch(u) => { + u.value = Some(Data(val)); + } } u.rehash(); @@ -298,7 +294,7 @@ impl + Send + Sync> Merkle { node_to_split .write(|u| { // TODO: handle unwraps better - let path = u.inner.path_mut().unwrap(); + let path = u.inner.path_mut(); *path = PartialPath(n_path[insert_path.len() + 1..].to_vec()); u.rehash(); @@ -306,6 +302,7 @@ impl + Send + Sync> Merkle { .unwrap(); let leaf_address = match &node_to_split.inner { + // TODO: handle BranchNode case NodeType::Extension(u) if u.path.len() == 0 => { deleted.push(node_to_split_address); u.chd() @@ -347,22 +344,14 @@ impl + Send + Sync> Merkle { let branch_address = self .put_node(Node::branch(BranchNode { + path: PartialPath(prefix.to_vec()), children, value, children_encoded: Default::default(), }))? .as_ptr(); - if !prefix.is_empty() { - self.put_node(Node::from(NodeType::Extension(ExtNode { - path: PartialPath(prefix.to_vec()), - child: branch_address, - child_encoded: None, - })))? - .as_ptr() - } else { - branch_address - } + branch_address }; // observation: @@ -428,26 +417,6 @@ impl + Send + Sync> Merkle { // For a Branch node, we look at the child pointer. If it points // to another node, we walk down that. Otherwise, we can store our // value as a leaf and we're done - NodeType::Branch(n) => match n.children[current_nibble as usize] { - Some(c) => (node, c), - None => { - // insert the leaf to the empty slot - // create a new leaf - let leaf_ptr = self - .put_node(Node::leaf(PartialPath(key_nibbles.collect()), Data(val)))? - .as_ptr(); - // set the current child to point to this leaf - node.write(|u| { - let uu = u.inner.as_branch_mut().unwrap(); - uu.children[current_nibble as usize] = Some(leaf_ptr); - u.rehash(); - }) - .unwrap(); - - break None; - } - }, - NodeType::Leaf(n) => { // we collided with another key; make a copy // of the stored key to pass into split @@ -468,6 +437,87 @@ impl + Send + Sync> Merkle { break None; } + NodeType::Branch(n) if n.path.len() == 0 => { + match n.children[current_nibble as usize] { + Some(c) => (node, c), + None => { + // insert the leaf to the empty slot + // create a new leaf + let leaf_ptr = self + .put_node(Node::leaf( + PartialPath(key_nibbles.collect()), + Data(val), + ))? + .as_ptr(); + // set the current child to point to this leaf + node.write(|u| { + let uu = u.inner.as_branch_mut().unwrap(); + uu.children[current_nibble as usize] = Some(leaf_ptr); + u.rehash(); + }) + .unwrap(); + + break None; + } + } + } + + NodeType::Branch(n) => { + let n_path = n.path.to_vec(); + let rem_path = once(current_nibble) + .chain(key_nibbles.clone()) + .collect::>(); + let n_path_len = n_path.len(); + let n_value = n.value.clone(); + + // TODO: don't always call split if the paths match (avoids an allocation) + if let Some((mut node, v)) = self.split( + node, + &mut parents, + &rem_path, + n_path, + n_value, + val, + &mut deleted, + )? { + (0..n_path_len).for_each(|_| { + key_nibbles.next(); + }); + + val = v; + + let next_nibble = rem_path[n_path_len] as usize; + // we're already in the match-arm that states that this was a branch-node + // TODO: cleaning up the split-logic should fix this awkwardness + let n_ptr = node.inner.as_branch().unwrap().children[next_nibble]; + + match n_ptr { + Some(n_ptr) => (self.get_node(n_ptr)?, n_ptr), + None => { + // insert the leaf to the empty slot + // create a new leaf + let leaf_ptr = self + .put_node(Node::leaf( + PartialPath(key_nibbles.collect()), + Data(val), + ))? + .as_ptr(); + // set the current child to point to this leaf + node.write(|u| { + let uu = u.inner.as_branch_mut().unwrap(); + uu.children[next_nibble] = Some(leaf_ptr); + u.rehash(); + }) + .unwrap(); + + break None; + } + } + } else { + break None; + } + } + NodeType::Extension(n) => { let n_path = n.path.to_vec(); let n_ptr = n.chd(); @@ -476,7 +526,7 @@ impl + Send + Sync> Merkle { .collect::>(); let n_path_len = n_path.len(); - if let Some(v) = self.split( + if let Some((_ext_node, v)) = self.split( node, &mut parents, &rem_path, @@ -570,6 +620,7 @@ impl + Send + Sync> Merkle { let branch = self .put_node(Node::branch(BranchNode { + path: vec![].into(), children: chd, value: Some(Data(val)), children_encoded: Default::default(), @@ -1003,10 +1054,34 @@ impl + Send + Sync> Merkle { }; let next_ptr = match &node_ref.inner { - NodeType::Branch(n) => match n.children[nib as usize] { + NodeType::Branch(n) if n.path.len() == 0 => match n.children[nib as usize] { Some(c) => c, None => return Ok(None), }, + NodeType::Branch(n) => { + let mut n_path_iter = n.path.iter().copied(); + + if n_path_iter.next() != Some(nib) { + return Ok(None); + } + + let path_matches = n_path_iter + .map(Some) + .all(|n_path_nibble| key_nibbles.next() == n_path_nibble); + + if !path_matches { + return Ok(None); + } + + let Some(nib) = key_nibbles.next() else { + break; + }; + + match n.children[nib as usize] { + Some(c) => c, + None => return Ok(None), + } + } NodeType::Leaf(n) => { let node_ref = if once(nib).chain(key_nibbles).eq(n.0.iter().copied()) { Some(node_ref) @@ -1617,6 +1692,7 @@ mod tests { } Node::branch(BranchNode { + path: vec![].into(), children, value, children_encoded, @@ -1639,7 +1715,7 @@ mod tests { } #[test] - fn insert_and_retrieve() { + fn insert_and_retrieve_one() { let key = b"hello"; let val = b"world"; diff --git a/firewood/src/merkle/node.rs b/firewood/src/merkle/node.rs index 604b38d2e..976108b12 100644 --- a/firewood/src/merkle/node.rs +++ b/firewood/src/merkle/node.rs @@ -59,6 +59,7 @@ impl> Encoded { #[derive(PartialEq, Eq, Clone)] pub struct BranchNode { + pub(super) path: PartialPath, pub(super) children: [Option; NBRANCH], pub(super) value: Option, pub(super) children_encoded: [Option>; NBRANCH], @@ -67,16 +68,20 @@ pub struct BranchNode { impl Debug for BranchNode { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { write!(f, "[Branch")?; + write!(f, " path={:?}", self.path)?; + for (i, c) in self.children.iter().enumerate() { if let Some(c) = c { write!(f, " ({i:x} {c:?})")?; } } + for (i, c) in self.children_encoded.iter().enumerate() { if let Some(c) = c { write!(f, " ({i:x} {:?})", c)?; } } + write!( f, " v={}]", @@ -122,10 +127,14 @@ impl BranchNode { chd_encoded[i] = Some(data).filter(|data| !data.is_empty()); } - Ok(BranchNode::new([None; NBRANCH], value, chd_encoded)) + // TODO: add path + let path = Vec::new().into(); + + Ok(BranchNode::new(path, [None; NBRANCH], value, chd_encoded)) } fn encode>(&self, store: &S) -> Vec { + // TODO: add path to encoded node let mut list = <[Encoded>; NBRANCH + 1]>::default(); for (i, c) in self.children.iter().enumerate() { @@ -175,11 +184,13 @@ impl BranchNode { } pub fn new( + path: PartialPath, chd: [Option; NBRANCH], value: Option>, chd_encoded: [Option>; NBRANCH], ) -> Self { BranchNode { + path, children: chd, value: value.map(Data), children_encoded: chd_encoded, @@ -396,6 +407,7 @@ impl NodeType { })) } } + // TODO: add path BRANCH_NODE_SIZE => Ok(NodeType::Branch(BranchNode::decode(buf)?)), size => Err(Box::new(bincode::ErrorKind::Custom(format!( "invalid size: {size}" @@ -411,14 +423,12 @@ impl NodeType { } } - pub fn path_mut(&mut self) -> Option<&mut PartialPath> { - let path = match self { - NodeType::Branch(_) => return None, + pub fn path_mut(&mut self) -> &mut PartialPath { + match self { + NodeType::Branch(u) => &mut u.path, NodeType::Leaf(node) => &mut node.0, NodeType::Extension(node) => &mut node.path, - }; - - path.into() + } } } @@ -449,6 +459,7 @@ impl Node { is_encoded_longer_than_hash_len: OnceLock::new(), encoded: OnceLock::new(), inner: NodeType::Branch(BranchNode { + path: vec![].into(), children: [Some(DiskAddress::null()); NBRANCH], value: Some(Data(Vec::new())), children_encoded: Default::default(), @@ -551,6 +562,7 @@ impl Storable for Node { }; match meta_raw.as_deref()[33] { Self::BRANCH_NODE => { + // TODO: add path let branch_header_size = NBRANCH as u64 * 8 + 4; let node_raw = mem.get_view(addr + META_SIZE, branch_header_size).ok_or( ShaleError::InvalidCacheView { @@ -619,6 +631,7 @@ impl Storable for Node { root_hash, is_encoded_longer_than_hash_len, NodeType::Branch(BranchNode { + path: vec![].into(), children: chd, value, children_encoded: chd_encoded, @@ -744,6 +757,7 @@ impl Storable for Node { + 1 + match &self.inner { NodeType::Branch(n) => { + // TODO: add path let mut encoded_len = 0; for emcoded in n.children_encoded.iter() { encoded_len += match emcoded { @@ -793,6 +807,7 @@ impl Storable for Node { match &self.inner { NodeType::Branch(n) => { + // TODO: add path cur.write_all(&[Self::BRANCH_NODE]).unwrap(); for c in n.children.iter() { cur.write_all(&match c { @@ -884,6 +899,7 @@ pub(super) mod tests { .unwrap_or_default(); Node::branch(BranchNode { + path: vec![].into(), children, value: value.map(Data), children_encoded, diff --git a/firewood/src/merkle/partial_path.rs b/firewood/src/merkle/partial_path.rs index eb2bd94d8..309f77c19 100644 --- a/firewood/src/merkle/partial_path.rs +++ b/firewood/src/merkle/partial_path.rs @@ -4,6 +4,7 @@ use crate::nibbles::NibblesIterator; use std::fmt::{self, Debug}; +// TODO: use smallvec /// PartialPath keeps a list of nibbles to represent a path on the Trie. #[derive(PartialEq, Eq, Clone)] pub struct PartialPath(pub Vec); @@ -24,6 +25,12 @@ impl std::ops::Deref for PartialPath { } } +impl From> for PartialPath { + fn from(value: Vec) -> Self { + Self(value) + } +} + impl PartialPath { pub fn into_inner(self) -> Vec { self.0 From 652d3ba3fe3ee3754bd0120e3f1cbd4b7f36dfdc Mon Sep 17 00:00:00 2001 From: Richard Pringle Date: Tue, 21 Nov 2023 12:05:35 -0500 Subject: [PATCH 2/7] Used named properties in Leaf-nodes --- firewood/src/merkle.rs | 40 ++++++++++++++++++------------------- firewood/src/merkle/node.rs | 34 ++++++++++++++++++------------- 2 files changed, 40 insertions(+), 34 deletions(-) diff --git a/firewood/src/merkle.rs b/firewood/src/merkle.rs index ca210b3b6..16fec33e7 100644 --- a/firewood/src/merkle.rs +++ b/firewood/src/merkle.rs @@ -245,7 +245,7 @@ impl + Send + Sync> Merkle { node_to_split, |u| { match &mut u.inner { - NodeType::Leaf(u) => u.1 = Data(val), + NodeType::Leaf(u) => u.data = Data(val), NodeType::Extension(u) => { let write_result = self.get_node(u.chd()).and_then(|mut b_ref| { @@ -420,8 +420,8 @@ impl + Send + Sync> Merkle { NodeType::Leaf(n) => { // we collided with another key; make a copy // of the stored key to pass into split - let n_path = n.0.to_vec(); - let n_value = Some(n.1.clone()); + let n_path = n.path.to_vec(); + let n_value = Some(n.data.clone()); let rem_path = once(current_nibble).chain(key_nibbles).collect::>(); self.split( @@ -572,13 +572,13 @@ impl + Send + Sync> Merkle { None } NodeType::Leaf(n) => { - if n.0.len() == 0 { - n.1 = Data(val); + if n.path.len() == 0 { + n.data = Data(val); None } else { - let idx = n.0[0]; - n.0 = PartialPath(n.0[1..].to_vec()); + let idx = n.path[0]; + n.path = PartialPath(n.path[1..].to_vec()); u.rehash(); Some((idx, true, None, val)) @@ -738,7 +738,7 @@ impl + Send + Sync> Merkle { // to: [p: Branch] -> [Leaf/Ext] let write_result = c_ref.write(|c| { let partial_path = match &mut c.inner { - NodeType::Leaf(n) => &mut n.0, + NodeType::Leaf(n) => &mut n.path, NodeType::Extension(n) => &mut n.path, _ => unreachable!(), }; @@ -779,7 +779,7 @@ impl + Send + Sync> Merkle { let mut path = n.path.clone().into_inner(); path.push(idx); let path0 = match &mut c.inner { - NodeType::Leaf(n) => &mut n.0, + NodeType::Leaf(n) => &mut n.path, NodeType::Extension(n) => &mut n.path, _ => unreachable!(), }; @@ -862,7 +862,7 @@ impl + Send + Sync> Merkle { // to: [Branch] -> [Leaf/Ext] let write_result = c_ref.write(|c| { match &mut c.inner { - NodeType::Leaf(n) => &mut n.0, + NodeType::Leaf(n) => &mut n.path, NodeType::Extension(n) => &mut n.path, _ => unreachable!(), } @@ -891,7 +891,7 @@ impl + Send + Sync> Merkle { let mut path = n.path.clone().into_inner(); path.push(idx); let path0 = match &mut c.inner { - NodeType::Leaf(n) => &mut n.0, + NodeType::Leaf(n) => &mut n.path, NodeType::Extension(n) => &mut n.path, _ => unreachable!(), }; @@ -954,7 +954,7 @@ impl + Send + Sync> Merkle { } NodeType::Leaf(n) => { - found = Some(n.1.clone()); + found = Some(n.data.clone()); deleted.push(node_ref.as_ptr()); self.after_remove_leaf(&mut parents, &mut deleted)? } @@ -1083,7 +1083,7 @@ impl + Send + Sync> Merkle { } } NodeType::Leaf(n) => { - let node_ref = if once(nib).chain(key_nibbles).eq(n.0.iter().copied()) { + let node_ref = if once(nib).chain(key_nibbles).eq(n.path.iter().copied()) { Some(node_ref) } else { None @@ -1118,7 +1118,7 @@ impl + Send + Sync> Merkle { // when we're done iterating over nibbles, check if the node we're at has a value let node_ref = match &node_ref.inner { NodeType::Branch(n) if n.value.as_ref().is_some() => Some(node_ref), - NodeType::Leaf(n) if n.0.len() == 0 => Some(node_ref), + NodeType::Leaf(n) if n.path.len() == 0 => Some(node_ref), _ => None, }; @@ -1217,7 +1217,7 @@ impl + Send + Sync> Merkle { } } NodeType::Leaf(n) => { - if n.0.len() == 0 { + if n.path.len() == 0 { nodes.push(u_ref.as_ptr()); } } @@ -1383,7 +1383,7 @@ impl<'a, S: shale::ShaleStore + Send + Sync> Stream for MerkleKeyVal let returned_key_value = match last_node.inner() { NodeType::Branch(branch) => (key, branch.value.to_owned().unwrap().to_vec()), - NodeType::Leaf(leaf) => (key, leaf.1.to_vec()), + NodeType::Leaf(leaf) => (key, leaf.data.to_vec()), NodeType::Extension(_) => todo!(), }; @@ -1513,7 +1513,7 @@ impl<'a, S: shale::ShaleStore + Send + Sync> Stream for MerkleKeyVal } => { let value = match last_node.inner() { NodeType::Branch(branch) => branch.value.to_owned().unwrap().to_vec(), - NodeType::Leaf(leaf) => leaf.1.to_vec(), + NodeType::Leaf(leaf) => leaf.data.to_vec(), NodeType::Extension(_) => todo!(), }; @@ -1537,7 +1537,7 @@ fn key_from_parents_and_leaf(parents: &[(ObjRef, u8)], leaf: &LeafNode) -> Vec std::ops::Deref for Ref<'a> { fn deref(&self) -> &[u8] { match &self.0.inner { NodeType::Branch(n) => n.value.as_ref().unwrap(), - NodeType::Leaf(n) => &n.1, + NodeType::Leaf(n) => &n.data, _ => unreachable!(), } } @@ -1606,7 +1606,7 @@ impl<'a, S: ShaleStore + Send + Sync> RefMut<'a, S> { |u| { modify(match &mut u.inner { NodeType::Branch(n) => &mut n.value.as_mut().unwrap().0, - NodeType::Leaf(n) => &mut n.1 .0, + NodeType::Leaf(n) => &mut n.data.0, _ => unreachable!(), }); u.rehash() diff --git a/firewood/src/merkle/node.rs b/firewood/src/merkle/node.rs index 976108b12..2a5ca8a8b 100644 --- a/firewood/src/merkle/node.rs +++ b/firewood/src/merkle/node.rs @@ -219,11 +219,14 @@ impl BranchNode { } #[derive(PartialEq, Eq, Clone)] -pub struct LeafNode(pub(super) PartialPath, pub(super) Data); +pub struct LeafNode { + pub(super) path: PartialPath, + pub(super) data: Data, +} impl Debug for LeafNode { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { - write!(f, "[Leaf {:?} {}]", self.0, hex::encode(&*self.1)) + write!(f, "[Leaf {:?} {}]", self.path, hex::encode(&*self.data)) } } @@ -232,8 +235,8 @@ impl LeafNode { bincode::DefaultOptions::new() .serialize( [ - Encoded::Raw(from_nibbles(&self.0.encode(true)).collect()), - Encoded::Raw(self.1.to_vec()), + Encoded::Raw(from_nibbles(&self.path.encode(true)).collect()), + Encoded::Raw(self.data.to_vec()), ] .as_slice(), ) @@ -241,15 +244,18 @@ impl LeafNode { } pub fn new(path: Vec, data: Vec) -> Self { - LeafNode(PartialPath(path), Data(data)) + LeafNode { + path: PartialPath(path), + data: Data(data), + } } pub fn path(&self) -> &PartialPath { - &self.0 + &self.path } pub fn data(&self) -> &Data { - &self.1 + &self.data } } @@ -426,7 +432,7 @@ impl NodeType { pub fn path_mut(&mut self) -> &mut PartialPath { match self { NodeType::Branch(u) => &mut u.path, - NodeType::Leaf(node) => &mut node.0, + NodeType::Leaf(node) => &mut node.path, NodeType::Extension(node) => &mut node.path, } } @@ -499,7 +505,7 @@ impl Node { } pub fn leaf(path: PartialPath, data: Data) -> Self { - Self::from(NodeType::Leaf(LeafNode(path, data))) + Self::from(NodeType::Leaf(LeafNode { path, data })) } pub fn inner(&self) -> &NodeType { @@ -745,7 +751,7 @@ impl Storable for Node { Ok(Self::new_from_hash( root_hash, is_encoded_longer_than_hash_len, - NodeType::Leaf(LeafNode(path, value)), + NodeType::Leaf(LeafNode { path, data: value }), )) } _ => Err(ShaleError::InvalidNodeType), @@ -781,7 +787,7 @@ impl Storable for Node { None => 1, } } - NodeType::Leaf(n) => 1 + 4 + n.0.dehydrated_len() + n.1.len() as u64, + NodeType::Leaf(n) => 1 + 4 + n.path.dehydrated_len() + n.data.len() as u64, } } @@ -851,11 +857,11 @@ impl Storable for Node { } NodeType::Leaf(n) => { cur.write_all(&[Self::LEAF_NODE])?; - let path: Vec = from_nibbles(&n.0.encode(true)).collect(); + let path: Vec = from_nibbles(&n.path.encode(true)).collect(); cur.write_all(&[path.len() as u8])?; - cur.write_all(&(n.1.len() as u32).to_le_bytes())?; + cur.write_all(&(n.data.len() as u32).to_le_bytes())?; cur.write_all(&path)?; - cur.write_all(&n.1).map_err(ShaleError::Io) + cur.write_all(&n.data).map_err(ShaleError::Io) } } } From 058ec3ee22f7e4c4242482a49a019d023d2326e6 Mon Sep 17 00:00:00 2001 From: Richard Pringle Date: Tue, 21 Nov 2023 12:07:16 -0500 Subject: [PATCH 3/7] Rename NBRANCH to MAX_CHILDREN --- firewood/src/merkle.rs | 12 +++++----- firewood/src/merkle/node.rs | 48 +++++++++++++++++++++---------------- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/firewood/src/merkle.rs b/firewood/src/merkle.rs index 16fec33e7..c1dc007a0 100644 --- a/firewood/src/merkle.rs +++ b/firewood/src/merkle.rs @@ -20,7 +20,7 @@ mod partial_path; pub(super) mod range_proof; mod trie_hash; -pub use node::{BranchNode, Data, ExtNode, LeafNode, Node, NodeType, NBRANCH}; +pub use node::{BranchNode, Data, ExtNode, LeafNode, Node, NodeType, MAX_CHILDREN}; pub use partial_path::PartialPath; pub use trie_hash::{TrieHash, TRIE_HASH_LEN}; @@ -88,7 +88,7 @@ impl + Send + Sync> Merkle { .put_item( Node::branch(BranchNode { path: vec![].into(), - children: [None; NBRANCH], + children: [None; MAX_CHILDREN], value: None, children_encoded: Default::default(), }), @@ -204,7 +204,7 @@ impl + Send + Sync> Merkle { let new_node = Node::leaf(PartialPath(new_node_path.to_vec()), Data(val)); let leaf_address = self.put_node(new_node)?.as_ptr(); - let mut chd = [None; NBRANCH]; + let mut chd = [None; MAX_CHILDREN]; let last_matching_nibble = matching_path[idx]; chd[last_matching_nibble as usize] = Some(leaf_address); @@ -338,7 +338,7 @@ impl + Send + Sync> Merkle { }; // [parent] (-> [ExtNode]) -> [branch with v] -> [Leaf] - let mut children = [None; NBRANCH]; + let mut children = [None; MAX_CHILDREN]; children[idx] = leaf_address.into(); @@ -607,7 +607,7 @@ impl + Send + Sync> Merkle { }; if let Some((idx, more, ext, val)) = info { - let mut chd = [None; NBRANCH]; + let mut chd = [None; MAX_CHILDREN]; let c_ptr = if more { u_ptr @@ -1685,7 +1685,7 @@ mod tests { fn branch(value: Vec, encoded_child: Option>) -> Node { let children = Default::default(); let value = Some(Data(value)); - let mut children_encoded = <[Option>; NBRANCH]>::default(); + let mut children_encoded = <[Option>; MAX_CHILDREN]>::default(); if let Some(child) = encoded_child { children_encoded[0] = Some(child); diff --git a/firewood/src/merkle/node.rs b/firewood/src/merkle/node.rs index 2a5ca8a8b..362991ce4 100644 --- a/firewood/src/merkle/node.rs +++ b/firewood/src/merkle/node.rs @@ -20,7 +20,7 @@ use crate::nibbles::Nibbles; use super::{from_nibbles, PartialPath, TrieHash, TRIE_HASH_LEN}; -pub const NBRANCH: usize = 16; +pub const MAX_CHILDREN: usize = 16; const EXT_NODE_SIZE: usize = 2; const BRANCH_NODE_SIZE: usize = 17; @@ -60,9 +60,9 @@ impl> Encoded { #[derive(PartialEq, Eq, Clone)] pub struct BranchNode { pub(super) path: PartialPath, - pub(super) children: [Option; NBRANCH], + pub(super) children: [Option; MAX_CHILDREN], pub(super) value: Option, - pub(super) children_encoded: [Option>; NBRANCH], + pub(super) children_encoded: [Option>; MAX_CHILDREN], } impl Debug for BranchNode { @@ -119,7 +119,7 @@ impl BranchNode { let value = Some(data).filter(|data| !data.is_empty()); // encode all children. - let mut chd_encoded: [Option>; NBRANCH] = Default::default(); + let mut chd_encoded: [Option>; MAX_CHILDREN] = Default::default(); // we popped the last element, so their should only be NBRANCH items left for (i, chd) in items.into_iter().enumerate() { @@ -130,12 +130,17 @@ impl BranchNode { // TODO: add path let path = Vec::new().into(); - Ok(BranchNode::new(path, [None; NBRANCH], value, chd_encoded)) + Ok(BranchNode::new( + path, + [None; MAX_CHILDREN], + value, + chd_encoded, + )) } fn encode>(&self, store: &S) -> Vec { // TODO: add path to encoded node - let mut list = <[Encoded>; NBRANCH + 1]>::default(); + let mut list = <[Encoded>; MAX_CHILDREN + 1]>::default(); for (i, c) in self.children.iter().enumerate() { match c { @@ -175,7 +180,8 @@ impl BranchNode { } if let Some(Data(val)) = &self.value { - list[NBRANCH] = Encoded::Data(bincode::DefaultOptions::new().serialize(val).unwrap()); + list[MAX_CHILDREN] = + Encoded::Data(bincode::DefaultOptions::new().serialize(val).unwrap()); } bincode::DefaultOptions::new() @@ -185,9 +191,9 @@ impl BranchNode { pub fn new( path: PartialPath, - chd: [Option; NBRANCH], + chd: [Option; MAX_CHILDREN], value: Option>, - chd_encoded: [Option>; NBRANCH], + chd_encoded: [Option>; MAX_CHILDREN], ) -> Self { BranchNode { path, @@ -201,19 +207,19 @@ impl BranchNode { &self.value } - pub fn chd(&self) -> &[Option; NBRANCH] { + pub fn chd(&self) -> &[Option; MAX_CHILDREN] { &self.children } - pub fn chd_mut(&mut self) -> &mut [Option; NBRANCH] { + pub fn chd_mut(&mut self) -> &mut [Option; MAX_CHILDREN] { &mut self.children } - pub fn chd_encode(&self) -> &[Option>; NBRANCH] { + pub fn chd_encode(&self) -> &[Option>; MAX_CHILDREN] { &self.children_encoded } - pub fn chd_encoded_mut(&mut self) -> &mut [Option>; NBRANCH] { + pub fn chd_encoded_mut(&mut self) -> &mut [Option>; MAX_CHILDREN] { &mut self.children_encoded } } @@ -466,7 +472,7 @@ impl Node { encoded: OnceLock::new(), inner: NodeType::Branch(BranchNode { path: vec![].into(), - children: [Some(DiskAddress::null()); NBRANCH], + children: [Some(DiskAddress::null()); MAX_CHILDREN], value: Some(Data(Vec::new())), children_encoded: Default::default(), }), @@ -569,7 +575,7 @@ impl Storable for Node { match meta_raw.as_deref()[33] { Self::BRANCH_NODE => { // TODO: add path - let branch_header_size = NBRANCH as u64 * 8 + 4; + let branch_header_size = MAX_CHILDREN as u64 * 8 + 4; let node_raw = mem.get_view(addr + META_SIZE, branch_header_size).ok_or( ShaleError::InvalidCacheView { offset: addr + META_SIZE, @@ -577,7 +583,7 @@ impl Storable for Node { }, )?; let mut cur = Cursor::new(node_raw.as_deref()); - let mut chd = [None; NBRANCH]; + let mut chd = [None; MAX_CHILDREN]; let mut buff = [0; 8]; for chd in chd.iter_mut() { cur.read_exact(&mut buff)?; @@ -601,7 +607,7 @@ impl Storable for Node { .as_deref(), )) }; - let mut chd_encoded: [Option>; NBRANCH] = Default::default(); + let mut chd_encoded: [Option>; MAX_CHILDREN] = Default::default(); let offset = if raw_len == u32::MAX as u64 { addr + META_SIZE + branch_header_size as usize } else { @@ -771,7 +777,7 @@ impl Storable for Node { None => 1, } } - NBRANCH as u64 * 8 + MAX_CHILDREN as u64 * 8 + 4 + match &n.value { Some(val) => val.len() as u64, @@ -884,8 +890,8 @@ pub(super) mod tests { value: Option>, repeated_encoded_child: Option>, ) -> Node { - let children: [Option; NBRANCH] = from_fn(|i| { - if i < NBRANCH / 2 { + let children: [Option; MAX_CHILDREN] = from_fn(|i| { + if i < MAX_CHILDREN / 2 { DiskAddress::from(repeated_disk_address).into() } else { None @@ -895,7 +901,7 @@ pub(super) mod tests { let children_encoded = repeated_encoded_child .map(|child| { from_fn(|i| { - if i < NBRANCH / 2 { + if i < MAX_CHILDREN / 2 { child.clone().into() } else { None From 0451a0c27f384f531138a8614667a1cecf2c9da9 Mon Sep 17 00:00:00 2001 From: Richard Pringle Date: Thu, 2 Nov 2023 16:33:33 -0400 Subject: [PATCH 4/7] Split leaf, branch, and extension node files --- firewood/Cargo.toml | 1 + firewood/src/merkle.rs | 25 +- firewood/src/merkle/node.rs | 368 +++--------------- firewood/src/merkle/node/branch.rs | 181 +++++++++ firewood/src/merkle/node/extension.rs | 91 +++++ firewood/src/merkle/node/leaf.rs | 72 ++++ .../src/merkle/{ => node}/partial_path.rs | 83 ++-- firewood/src/shale/mod.rs | 2 +- 8 files changed, 474 insertions(+), 349 deletions(-) create mode 100644 firewood/src/merkle/node/branch.rs create mode 100644 firewood/src/merkle/node/extension.rs create mode 100644 firewood/src/merkle/node/leaf.rs rename firewood/src/merkle/{ => node}/partial_path.rs (50%) diff --git a/firewood/Cargo.toml b/firewood/Cargo.toml index dcbdb0f8c..8ae8ea51f 100644 --- a/firewood/Cargo.toml +++ b/firewood/Cargo.toml @@ -34,6 +34,7 @@ thiserror = "1.0.38" tokio = { version = "1.21.1", features = ["rt", "sync", "macros", "rt-multi-thread"] } typed-builder = "0.18.0" bincode = "1.3.3" +bitflags = "2.4.1" [dev-dependencies] criterion = {version = "0.5.1", features = ["async_tokio"]} diff --git a/firewood/src/merkle.rs b/firewood/src/merkle.rs index c1dc007a0..42118bcf6 100644 --- a/firewood/src/merkle.rs +++ b/firewood/src/merkle.rs @@ -16,12 +16,10 @@ use std::{ use thiserror::Error; mod node; -mod partial_path; pub(super) mod range_proof; mod trie_hash; -pub use node::{BranchNode, Data, ExtNode, LeafNode, Node, NodeType, MAX_CHILDREN}; -pub use partial_path::PartialPath; +pub use node::{BranchNode, Data, ExtNode, LeafNode, Node, NodeType, PartialPath, MAX_CHILDREN}; pub use trie_hash::{TrieHash, TRIE_HASH_LEN}; type ObjRef<'a> = shale::ObjRef<'a, Node>; @@ -226,9 +224,7 @@ impl + Send + Sync> Merkle { children_encoded: Default::default(), }); - let new_branch_address = self.put_node(new_branch)?.as_ptr(); - - new_branch_address + self.put_node(new_branch)?.as_ptr() } else { // paths do not diverge let (leaf_address, prefix, idx, value) = @@ -342,16 +338,13 @@ impl + Send + Sync> Merkle { children[idx] = leaf_address.into(); - let branch_address = self - .put_node(Node::branch(BranchNode { - path: PartialPath(prefix.to_vec()), - children, - value, - children_encoded: Default::default(), - }))? - .as_ptr(); - - branch_address + self.put_node(Node::branch(BranchNode { + path: PartialPath(prefix.to_vec()), + children, + value, + children_encoded: Default::default(), + }))? + .as_ptr() }; // observation: diff --git a/firewood/src/merkle/node.rs b/firewood/src/merkle/node.rs index 362991ce4..dfe905498 100644 --- a/firewood/src/merkle/node.rs +++ b/firewood/src/merkle/node.rs @@ -3,11 +3,12 @@ use crate::shale::{disk_address::DiskAddress, CachedStore, ShaleError, ShaleStore, Storable}; use bincode::{Error, Options}; +use bitflags::bitflags; use enum_as_inner::EnumAsInner; use serde::{de::DeserializeOwned, Deserialize, Serialize}; use sha3::{Digest, Keccak256}; use std::{ - fmt::{self, Debug}, + fmt::Debug, io::{Cursor, Read, Write}, sync::{ atomic::{AtomicBool, Ordering}, @@ -15,15 +16,28 @@ use std::{ }, }; +mod branch; +mod extension; +mod leaf; +mod partial_path; + +pub use branch::{BranchNode, MAX_CHILDREN, SIZE as BRANCH_NODE_SIZE}; +pub use extension::{ExtNode, SIZE as EXTENSION_NODE_SIZE}; +pub use leaf::LeafNode; +pub use partial_path::PartialPath; + use crate::merkle::to_nibble_array; use crate::nibbles::Nibbles; -use super::{from_nibbles, PartialPath, TrieHash, TRIE_HASH_LEN}; - -pub const MAX_CHILDREN: usize = 16; +use super::{from_nibbles, TrieHash, TRIE_HASH_LEN}; -const EXT_NODE_SIZE: usize = 2; -const BRANCH_NODE_SIZE: usize = 17; +bitflags! { + // should only ever be the size of a nibble + struct Flags: u8 { + const TERMINAL = 0b0010; + const ODD_LEN = 0b0001; + } +} #[derive(Debug, PartialEq, Eq, Clone)] pub struct Data(pub(super) Vec); @@ -35,8 +49,14 @@ impl std::ops::Deref for Data { } } +impl From> for Data { + fn from(v: Vec) -> Self { + Self(v) + } +} + #[derive(Serialize, Deserialize, Debug)] -pub(crate) enum Encoded { +enum Encoded { Raw(T), Data(T), } @@ -57,292 +77,6 @@ impl> Encoded { } } -#[derive(PartialEq, Eq, Clone)] -pub struct BranchNode { - pub(super) path: PartialPath, - pub(super) children: [Option; MAX_CHILDREN], - pub(super) value: Option, - pub(super) children_encoded: [Option>; MAX_CHILDREN], -} - -impl Debug for BranchNode { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { - write!(f, "[Branch")?; - write!(f, " path={:?}", self.path)?; - - for (i, c) in self.children.iter().enumerate() { - if let Some(c) = c { - write!(f, " ({i:x} {c:?})")?; - } - } - - for (i, c) in self.children_encoded.iter().enumerate() { - if let Some(c) = c { - write!(f, " ({i:x} {:?})", c)?; - } - } - - write!( - f, - " v={}]", - match &self.value { - Some(v) => hex::encode(&**v), - None => "nil".to_string(), - } - ) - } -} - -impl BranchNode { - pub(super) fn single_child(&self) -> (Option<(DiskAddress, u8)>, bool) { - let mut has_chd = false; - let mut only_chd = None; - for (i, c) in self.children.iter().enumerate() { - if c.is_some() { - has_chd = true; - if only_chd.is_some() { - only_chd = None; - break; - } - only_chd = (*c).map(|e| (e, i as u8)) - } - } - (only_chd, has_chd) - } - - pub fn decode(buf: &[u8]) -> Result { - let mut items: Vec>> = bincode::DefaultOptions::new().deserialize(buf)?; - - // we've already validated the size, that's why we can safely unwrap - let data = items.pop().unwrap().decode()?; - // Extract the value of the branch node and set to None if it's an empty Vec - let value = Some(data).filter(|data| !data.is_empty()); - - // encode all children. - let mut chd_encoded: [Option>; MAX_CHILDREN] = Default::default(); - - // we popped the last element, so their should only be NBRANCH items left - for (i, chd) in items.into_iter().enumerate() { - let data = chd.decode()?; - chd_encoded[i] = Some(data).filter(|data| !data.is_empty()); - } - - // TODO: add path - let path = Vec::new().into(); - - Ok(BranchNode::new( - path, - [None; MAX_CHILDREN], - value, - chd_encoded, - )) - } - - fn encode>(&self, store: &S) -> Vec { - // TODO: add path to encoded node - let mut list = <[Encoded>; MAX_CHILDREN + 1]>::default(); - - for (i, c) in self.children.iter().enumerate() { - match c { - Some(c) => { - let mut c_ref = store.get_item(*c).unwrap(); - - if c_ref.is_encoded_longer_than_hash_len::(store) { - list[i] = Encoded::Data( - bincode::DefaultOptions::new() - .serialize(&&(*c_ref.get_root_hash::(store))[..]) - .unwrap(), - ); - - // See struct docs for ordering requirements - if c_ref.lazy_dirty.load(Ordering::Relaxed) { - c_ref.write(|_| {}).unwrap(); - c_ref.lazy_dirty.store(false, Ordering::Relaxed) - } - } else { - let child_encoded = &c_ref.get_encoded::(store); - list[i] = Encoded::Raw(child_encoded.to_vec()); - } - } - None => { - // Check if there is already a calculated encoded value for the child, which - // can happen when manually constructing a trie from proof. - if let Some(v) = &self.children_encoded[i] { - if v.len() == TRIE_HASH_LEN { - list[i] = - Encoded::Data(bincode::DefaultOptions::new().serialize(v).unwrap()); - } else { - list[i] = Encoded::Raw(v.clone()); - } - } - } - }; - } - - if let Some(Data(val)) = &self.value { - list[MAX_CHILDREN] = - Encoded::Data(bincode::DefaultOptions::new().serialize(val).unwrap()); - } - - bincode::DefaultOptions::new() - .serialize(list.as_slice()) - .unwrap() - } - - pub fn new( - path: PartialPath, - chd: [Option; MAX_CHILDREN], - value: Option>, - chd_encoded: [Option>; MAX_CHILDREN], - ) -> Self { - BranchNode { - path, - children: chd, - value: value.map(Data), - children_encoded: chd_encoded, - } - } - - pub fn value(&self) -> &Option { - &self.value - } - - pub fn chd(&self) -> &[Option; MAX_CHILDREN] { - &self.children - } - - pub fn chd_mut(&mut self) -> &mut [Option; MAX_CHILDREN] { - &mut self.children - } - - pub fn chd_encode(&self) -> &[Option>; MAX_CHILDREN] { - &self.children_encoded - } - - pub fn chd_encoded_mut(&mut self) -> &mut [Option>; MAX_CHILDREN] { - &mut self.children_encoded - } -} - -#[derive(PartialEq, Eq, Clone)] -pub struct LeafNode { - pub(super) path: PartialPath, - pub(super) data: Data, -} - -impl Debug for LeafNode { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { - write!(f, "[Leaf {:?} {}]", self.path, hex::encode(&*self.data)) - } -} - -impl LeafNode { - fn encode(&self) -> Vec { - bincode::DefaultOptions::new() - .serialize( - [ - Encoded::Raw(from_nibbles(&self.path.encode(true)).collect()), - Encoded::Raw(self.data.to_vec()), - ] - .as_slice(), - ) - .unwrap() - } - - pub fn new(path: Vec, data: Vec) -> Self { - LeafNode { - path: PartialPath(path), - data: Data(data), - } - } - - pub fn path(&self) -> &PartialPath { - &self.path - } - - pub fn data(&self) -> &Data { - &self.data - } -} - -#[derive(PartialEq, Eq, Clone)] -pub struct ExtNode { - pub(crate) path: PartialPath, - pub(crate) child: DiskAddress, - pub(crate) child_encoded: Option>, -} - -impl Debug for ExtNode { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { - let Self { - path, - child, - child_encoded, - } = self; - write!(f, "[Extension {path:?} {child:?} {child_encoded:?}]",) - } -} - -impl ExtNode { - fn encode>(&self, store: &S) -> Vec { - let mut list = <[Encoded>; 2]>::default(); - list[0] = Encoded::Data( - bincode::DefaultOptions::new() - .serialize(&from_nibbles(&self.path.encode(false)).collect::>()) - .unwrap(), - ); - - if !self.child.is_null() { - let mut r = store.get_item(self.child).unwrap(); - - if r.is_encoded_longer_than_hash_len(store) { - list[1] = Encoded::Data( - bincode::DefaultOptions::new() - .serialize(&&(*r.get_root_hash(store))[..]) - .unwrap(), - ); - - if r.lazy_dirty.load(Ordering::Relaxed) { - r.write(|_| {}).unwrap(); - r.lazy_dirty.store(false, Ordering::Relaxed); - } - } else { - list[1] = Encoded::Raw(r.get_encoded(store).to_vec()); - } - } else { - // Check if there is already a caclucated encoded value for the child, which - // can happen when manually constructing a trie from proof. - if let Some(v) = &self.child_encoded { - if v.len() == TRIE_HASH_LEN { - list[1] = Encoded::Data(bincode::DefaultOptions::new().serialize(v).unwrap()); - } else { - list[1] = Encoded::Raw(v.clone()); - } - } - } - - bincode::DefaultOptions::new() - .serialize(list.as_slice()) - .unwrap() - } - - pub fn chd(&self) -> DiskAddress { - self.child - } - - pub fn chd_encoded(&self) -> Option<&[u8]> { - self.child_encoded.as_deref() - } - - pub fn chd_mut(&mut self) -> &mut DiskAddress { - &mut self.child - } - - pub fn chd_encoded_mut(&mut self) -> &mut Option> { - &mut self.child_encoded - } -} - #[derive(Debug)] pub struct Node { pub(super) root_hash: OnceLock, @@ -356,6 +90,7 @@ pub struct Node { } impl Eq for Node {} + impl PartialEq for Node { fn eq(&self, other: &Self) -> bool { let Node { @@ -372,6 +107,7 @@ impl PartialEq for Node { && *inner == other.inner } } + impl Clone for Node { fn clone(&self) -> Self { Self { @@ -397,7 +133,7 @@ impl NodeType { let items: Vec>> = bincode::DefaultOptions::new().deserialize(buf)?; match items.len() { - EXT_NODE_SIZE => { + EXTENSION_NODE_SIZE => { let mut items = items.into_iter(); let decoded_key: Vec = items.next().unwrap().decode()?; @@ -405,7 +141,8 @@ impl NodeType { let decoded_key_nibbles = Nibbles::<0>::new(&decoded_key); let (cur_key_path, term) = - PartialPath::from_nibbles(decoded_key_nibbles.into_iter()); + dbg!(PartialPath::from_nibbles(decoded_key_nibbles.into_iter())); + let cur_key = cur_key_path.into_inner(); let data: Vec = items.next().unwrap().decode()?; @@ -550,28 +287,33 @@ impl Node { impl Storable for Node { fn deserialize(addr: usize, mem: &T) -> Result { - const META_SIZE: usize = 32 + 1 + 1; + const META_SIZE: usize = TRIE_HASH_LEN + 1 + 1; + let meta_raw = mem.get_view(addr, META_SIZE as u64) .ok_or(ShaleError::InvalidCacheView { offset: addr, size: META_SIZE as u64, })?; - let attrs = meta_raw.as_deref()[32]; + + let attrs = meta_raw.as_deref()[TRIE_HASH_LEN]; + let root_hash = if attrs & Node::ROOT_HASH_VALID_BIT == 0 { None } else { Some(TrieHash( - meta_raw.as_deref()[0..32] + meta_raw.as_deref()[..TRIE_HASH_LEN] .try_into() .expect("invalid slice"), )) }; + let is_encoded_longer_than_hash_len = if attrs & Node::IS_ENCODED_BIG_VALID == 0 { None } else { Some(attrs & Node::LONG_BIT != 0) }; + match meta_raw.as_deref()[33] { Self::BRANCH_NODE => { // TODO: add path @@ -650,6 +392,7 @@ impl Storable for Node { }), )) } + Self::EXT_NODE => { let ext_header_size = 1 + 8; let node_raw = mem.get_view(addr + META_SIZE, ext_header_size).ok_or( @@ -688,9 +431,12 @@ impl Storable for Node { offset: addr + META_SIZE + ext_header_size as usize + path_len as usize, size: 1, })?; + cur = Cursor::new(encoded_len_raw.as_deref()); cur.read_exact(&mut buff)?; + let encoded_len = buff[0] as u64; + let encoded: Option> = if encoded_len != 0 { let emcoded_raw = mem .get_view( @@ -711,7 +457,7 @@ impl Storable for Node { None }; - Ok(Self::new_from_hash( + let node = Self::new_from_hash( root_hash, is_encoded_longer_than_hash_len, NodeType::Extension(ExtNode { @@ -719,8 +465,11 @@ impl Storable for Node { child: DiskAddress::from(ptr as usize), child_encoded: encoded, }), - )) + ); + + Ok(node) } + Self::LEAF_NODE => { let leaf_header_size = 1 + 4; let node_raw = mem.get_view(addr + META_SIZE, leaf_header_size).ok_or( @@ -729,11 +478,15 @@ impl Storable for Node { size: leaf_header_size, }, )?; + let mut cur = Cursor::new(node_raw.as_deref()); let mut buff = [0; 4]; cur.read_exact(&mut buff[..1])?; + let path_len = buff[0] as u64; + cur.read_exact(&mut buff)?; + let data_len = u32::from_le_bytes(buff) as u64; let remainder = mem .get_view( @@ -753,12 +506,15 @@ impl Storable for Node { .collect(); let (path, _) = PartialPath::decode(&nibbles); - let value = Data(remainder.as_deref()[path_len as usize..].to_vec()); - Ok(Self::new_from_hash( + let data = Data(remainder.as_deref()[path_len as usize..].to_vec()); + + let node = Self::new_from_hash( root_hash, is_encoded_longer_than_hash_len, - NodeType::Leaf(LeafNode { path, data: value }), - )) + NodeType::Leaf(LeafNode { path, data }), + ); + + Ok(node) } _ => Err(ShaleError::InvalidNodeType), } @@ -787,13 +543,13 @@ impl Storable for Node { } NodeType::Extension(n) => { 1 + 8 - + n.path.dehydrated_len() + + n.path.serialized_len() + match n.chd_encoded() { Some(v) => 1 + v.len() as u64, None => 1, } } - NodeType::Leaf(n) => 1 + 4 + n.path.dehydrated_len() + n.data.len() as u64, + NodeType::Leaf(n) => 1 + 4 + n.path.serialized_len() + n.data.len() as u64, } } diff --git a/firewood/src/merkle/node/branch.rs b/firewood/src/merkle/node/branch.rs new file mode 100644 index 000000000..6d9a421a0 --- /dev/null +++ b/firewood/src/merkle/node/branch.rs @@ -0,0 +1,181 @@ +use super::{Data, Encoded, Node}; +use crate::{ + merkle::{PartialPath, TRIE_HASH_LEN}, + shale::DiskAddress, + shale::ShaleStore, +}; +use bincode::{Error, Options}; +use std::{ + fmt::{Debug, Error as FmtError, Formatter}, + sync::atomic::Ordering, +}; + +pub const MAX_CHILDREN: usize = 16; +pub const SIZE: usize = MAX_CHILDREN + 1; + +#[derive(PartialEq, Eq, Clone)] +pub struct BranchNode { + pub(crate) path: PartialPath, + pub(crate) children: [Option; MAX_CHILDREN], + pub(crate) value: Option, + pub(crate) children_encoded: [Option>; MAX_CHILDREN], +} + +impl Debug for BranchNode { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtError> { + write!(f, "[Branch")?; + write!(f, " path={:?}", self.path)?; + + for (i, c) in self.children.iter().enumerate() { + if let Some(c) = c { + write!(f, " ({i:x} {c:?})")?; + } + } + + for (i, c) in self.children_encoded.iter().enumerate() { + if let Some(c) = c { + write!(f, " ({i:x} {:?})", c)?; + } + } + + write!( + f, + " v={}]", + match &self.value { + Some(v) => hex::encode(&**v), + None => "nil".to_string(), + } + ) + } +} + +impl BranchNode { + pub fn new( + path: PartialPath, + chd: [Option; MAX_CHILDREN], + value: Option>, + chd_encoded: [Option>; MAX_CHILDREN], + ) -> Self { + BranchNode { + path, + children: chd, + value: value.map(Data), + children_encoded: chd_encoded, + } + } + + pub fn value(&self) -> &Option { + &self.value + } + + pub fn chd(&self) -> &[Option; MAX_CHILDREN] { + &self.children + } + + pub fn chd_mut(&mut self) -> &mut [Option; MAX_CHILDREN] { + &mut self.children + } + + pub fn chd_encode(&self) -> &[Option>; MAX_CHILDREN] { + &self.children_encoded + } + + pub fn chd_encoded_mut(&mut self) -> &mut [Option>; MAX_CHILDREN] { + &mut self.children_encoded + } + + pub(crate) fn single_child(&self) -> (Option<(DiskAddress, u8)>, bool) { + let mut has_chd = false; + let mut only_chd = None; + for (i, c) in self.children.iter().enumerate() { + if c.is_some() { + has_chd = true; + if only_chd.is_some() { + only_chd = None; + break; + } + only_chd = (*c).map(|e| (e, i as u8)) + } + } + (only_chd, has_chd) + } + + pub(super) fn decode(buf: &[u8]) -> Result { + let mut items: Vec>> = bincode::DefaultOptions::new().deserialize(buf)?; + + // we've already validated the size, that's why we can safely unwrap + let data = items.pop().unwrap().decode()?; + // Extract the value of the branch node and set to None if it's an empty Vec + let value = Some(data).filter(|data| !data.is_empty()); + + // encode all children. + let mut chd_encoded: [Option>; MAX_CHILDREN] = Default::default(); + + // we popped the last element, so their should only be NBRANCH items left + for (i, chd) in items.into_iter().enumerate() { + let data = chd.decode()?; + chd_encoded[i] = Some(data).filter(|data| !data.is_empty()); + } + + // TODO: add path + let path = Vec::new().into(); + + Ok(BranchNode::new( + path, + [None; MAX_CHILDREN], + value, + chd_encoded, + )) + } + + pub(super) fn encode>(&self, store: &S) -> Vec { + // TODO: add path to encoded node + let mut list = <[Encoded>; MAX_CHILDREN + 1]>::default(); + + for (i, c) in self.children.iter().enumerate() { + match c { + Some(c) => { + let mut c_ref = store.get_item(*c).unwrap(); + + if c_ref.is_encoded_longer_than_hash_len::(store) { + list[i] = Encoded::Data( + bincode::DefaultOptions::new() + .serialize(&&(*c_ref.get_root_hash::(store))[..]) + .unwrap(), + ); + + // See struct docs for ordering requirements + if c_ref.lazy_dirty.load(Ordering::Relaxed) { + c_ref.write(|_| {}).unwrap(); + c_ref.lazy_dirty.store(false, Ordering::Relaxed) + } + } else { + let child_encoded = &c_ref.get_encoded::(store); + list[i] = Encoded::Raw(child_encoded.to_vec()); + } + } + None => { + // Check if there is already a calculated encoded value for the child, which + // can happen when manually constructing a trie from proof. + if let Some(v) = &self.children_encoded[i] { + if v.len() == TRIE_HASH_LEN { + list[i] = + Encoded::Data(bincode::DefaultOptions::new().serialize(v).unwrap()); + } else { + list[i] = Encoded::Raw(v.clone()); + } + } + } + }; + } + + if let Some(Data(val)) = &self.value { + list[MAX_CHILDREN] = + Encoded::Data(bincode::DefaultOptions::new().serialize(val).unwrap()); + } + + bincode::DefaultOptions::new() + .serialize(list.as_slice()) + .unwrap() + } +} diff --git a/firewood/src/merkle/node/extension.rs b/firewood/src/merkle/node/extension.rs new file mode 100644 index 000000000..5aa6a0d9b --- /dev/null +++ b/firewood/src/merkle/node/extension.rs @@ -0,0 +1,91 @@ +use bincode::Options; + +use super::{Encoded, Node}; +use crate::{ + merkle::{from_nibbles, PartialPath, TRIE_HASH_LEN}, + shale::{DiskAddress, ShaleStore}, +}; +use std::{ + fmt::{Debug, Error as FmtError, Formatter}, + sync::atomic::Ordering, +}; + +pub const SIZE: usize = 2; + +#[derive(PartialEq, Eq, Clone)] +pub struct ExtNode { + pub(crate) path: PartialPath, + pub(crate) child: DiskAddress, + pub(crate) child_encoded: Option>, +} + +impl Debug for ExtNode { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtError> { + let Self { + path, + child, + child_encoded, + } = self; + write!(f, "[Extension {path:?} {child:?} {child_encoded:?}]",) + } +} + +impl ExtNode { + pub(super) fn encode>(&self, store: &S) -> Vec { + let mut list = <[Encoded>; 2]>::default(); + list[0] = Encoded::Data( + bincode::DefaultOptions::new() + .serialize(&from_nibbles(&self.path.encode(false)).collect::>()) + .unwrap(), + ); + + if !self.child.is_null() { + let mut r = store.get_item(self.child).unwrap(); + + if r.is_encoded_longer_than_hash_len(store) { + list[1] = Encoded::Data( + bincode::DefaultOptions::new() + .serialize(&&(*r.get_root_hash(store))[..]) + .unwrap(), + ); + + if r.lazy_dirty.load(Ordering::Relaxed) { + r.write(|_| {}).unwrap(); + r.lazy_dirty.store(false, Ordering::Relaxed); + } + } else { + list[1] = Encoded::Raw(r.get_encoded(store).to_vec()); + } + } else { + // Check if there is already a caclucated encoded value for the child, which + // can happen when manually constructing a trie from proof. + if let Some(v) = &self.child_encoded { + if v.len() == TRIE_HASH_LEN { + list[1] = Encoded::Data(bincode::DefaultOptions::new().serialize(v).unwrap()); + } else { + list[1] = Encoded::Raw(v.clone()); + } + } + } + + bincode::DefaultOptions::new() + .serialize(list.as_slice()) + .unwrap() + } + + pub fn chd(&self) -> DiskAddress { + self.child + } + + pub fn chd_encoded(&self) -> Option<&[u8]> { + self.child_encoded.as_deref() + } + + pub fn chd_mut(&mut self) -> &mut DiskAddress { + &mut self.child + } + + pub fn chd_encoded_mut(&mut self) -> &mut Option> { + &mut self.child_encoded + } +} diff --git a/firewood/src/merkle/node/leaf.rs b/firewood/src/merkle/node/leaf.rs new file mode 100644 index 000000000..4fd2c8f30 --- /dev/null +++ b/firewood/src/merkle/node/leaf.rs @@ -0,0 +1,72 @@ +use std::fmt::{Debug, Error as FmtError, Formatter}; + +use bincode::Options; + +use super::{Data, Encoded}; +use crate::merkle::{from_nibbles, PartialPath}; + +#[derive(PartialEq, Eq, Clone)] +pub struct LeafNode { + pub(crate) path: PartialPath, + pub(crate) data: Data, +} + +impl Debug for LeafNode { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtError> { + write!(f, "[Leaf {:?} {}]", self.path, hex::encode(&*self.data)) + } +} + +impl LeafNode { + pub fn new, D: Into>(path: P, data: D) -> Self { + Self { + path: path.into(), + data: data.into(), + } + } + + pub fn path(&self) -> &PartialPath { + &self.path + } + + pub fn data(&self) -> &Data { + &self.data + } + + pub(super) fn encode(&self) -> Vec { + bincode::DefaultOptions::new() + .serialize( + [ + Encoded::Raw(from_nibbles(&self.path.encode(true)).collect()), + Encoded::Raw(self.data.to_vec()), + ] + .as_slice(), + ) + .unwrap() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use test_case::test_case; + + // these tests will fail if the encoding mechanism changes and should be updated accordingly + #[test_case(0b10 << 4, vec![0x12, 0x34], vec![1, 2, 3, 4]; "even length")] + // first nibble is part of the prefix + #[test_case((0b11 << 4) + 2, vec![0x34], vec![2, 3, 4]; "odd length")] + fn encode_regression_test(prefix: u8, path: Vec, nibbles: Vec) { + let data = vec![5, 6, 7, 8]; + + let serialized_path = [vec![prefix], path.clone()].concat(); + // 0 represents Encoded::Raw + let serialized_path = [vec![0, serialized_path.len() as u8], serialized_path].concat(); + let serialized_data = [vec![0, data.len() as u8], data.clone()].concat(); + + let serialized = [vec![2], serialized_path, serialized_data].concat(); + + let node = LeafNode::new(nibbles, data.clone()); + + assert_eq!(node.encode(), serialized); + } +} diff --git a/firewood/src/merkle/partial_path.rs b/firewood/src/merkle/node/partial_path.rs similarity index 50% rename from firewood/src/merkle/partial_path.rs rename to firewood/src/merkle/node/partial_path.rs index 309f77c19..d213dd05d 100644 --- a/firewood/src/merkle/partial_path.rs +++ b/firewood/src/merkle/node/partial_path.rs @@ -1,8 +1,12 @@ // Copyright (C) 2023, Ava Labs, Inc. All rights reserved. // See the file LICENSE.md for licensing terms. +use super::Flags; use crate::nibbles::NibblesIterator; -use std::fmt::{self, Debug}; +use std::{ + fmt::{self, Debug}, + iter::once, +}; // TODO: use smallvec /// PartialPath keeps a list of nibbles to represent a path on the Trie. @@ -36,16 +40,27 @@ impl PartialPath { self.0 } - pub(super) fn encode(&self, term: bool) -> Vec { - let odd_len = (self.0.len() & 1) as u8; - let flags = if term { 2 } else { 0 } + odd_len; - let mut res = if odd_len == 1 { - vec![flags] + pub(super) fn encode(&self, is_terminal: bool) -> Vec { + let mut flags = Flags::empty(); + + if is_terminal { + flags.insert(Flags::TERMINAL); + } + + let has_odd_len = self.0.len() & 1 == 1; + + let extra_byte = if has_odd_len { + flags.insert(Flags::ODD_LEN); + + None } else { - vec![flags, 0x0] + Some(0) }; - res.extend(&self.0); - res + + once(flags.bits()) + .chain(extra_byte) + .chain(self.0.iter().copied()) + .collect() } // TODO: remove all non `Nibbles` usages and delete this function. @@ -53,35 +68,46 @@ impl PartialPath { // /// returns a tuple of the decoded partial path and whether the path is terminal pub fn decode(raw: &[u8]) -> (Self, bool) { - let prefix = raw[0]; - let is_odd = (prefix & 1) as usize; - let decoded = raw.iter().skip(1).skip(1 - is_odd).copied().collect(); + let mut raw = raw.iter().copied(); + let flags = Flags::from_bits_retain(raw.next().unwrap_or_default()); + + if !flags.contains(Flags::ODD_LEN) { + let _ = raw.next(); + } - (Self(decoded), prefix > 1) + (Self(raw.collect()), flags.contains(Flags::TERMINAL)) } /// returns a tuple of the decoded partial path and whether the path is terminal pub fn from_nibbles(mut nibbles: NibblesIterator<'_, N>) -> (Self, bool) { - let prefix = nibbles.next().unwrap(); - let is_odd = (prefix & 1) as usize; - let decoded = nibbles.skip(1 - is_odd).collect(); + let flags = Flags::from_bits_retain(nibbles.next().unwrap_or_default()); - (Self(decoded), prefix > 1) + if !flags.contains(Flags::ODD_LEN) { + let _ = nibbles.next(); + } + + (Self(nibbles.collect()), flags.contains(Flags::TERMINAL)) } - pub(super) fn dehydrated_len(&self) -> u64 { - let len = self.0.len() as u64; - if len & 1 == 1 { - (len + 1) >> 1 + pub(super) fn serialized_len(&self) -> u64 { + let len = self.0.len(); + + // if len is even the prefix takes an extra byte + // otherwise is combined with the first nibble + let len = if len & 1 == 1 { + (len + 1) / 2 } else { - (len >> 1) + 1 - } + len / 2 + 1 + }; + + len as u64 } } #[cfg(test)] mod tests { use super::*; + use std::ops::Deref; use test_case::test_case; #[test_case(&[1, 2, 3, 4], true)] @@ -90,9 +116,14 @@ mod tests { #[test_case(&[1, 2], true)] #[test_case(&[1], true)] fn test_encoding(steps: &[u8], term: bool) { - let path = PartialPath(steps.to_vec()).encode(term); - let (decoded, decoded_term) = PartialPath::decode(&path); - assert_eq!(&decoded.0, &steps); + let path = PartialPath(steps.to_vec()); + let encoded = path.encode(term); + + assert_eq!(encoded.len(), path.serialized_len() as usize * 2); + + let (decoded, decoded_term) = PartialPath::decode(&encoded); + + assert_eq!(&decoded.deref(), &steps); assert_eq!(decoded_term, term); } } diff --git a/firewood/src/shale/mod.rs b/firewood/src/shale/mod.rs index 7d1d549d6..44ae9b3af 100644 --- a/firewood/src/shale/mod.rs +++ b/firewood/src/shale/mod.rs @@ -1,7 +1,7 @@ // Copyright (C) 2023, Ava Labs, Inc. All rights reserved. // See the file LICENSE.md for licensing terms. -use disk_address::DiskAddress; +pub use disk_address::DiskAddress; use std::any::type_name; use std::collections::{HashMap, HashSet}; use std::fmt::{self, Debug, Formatter}; From 028be27d17aa9ee99e9d9bbcf659d8aed52254ff Mon Sep 17 00:00:00 2001 From: Richard Pringle Date: Tue, 21 Nov 2023 12:26:54 -0500 Subject: [PATCH 5/7] Remove dbg! and add licence header --- firewood/src/merkle/node.rs | 2 +- firewood/src/merkle/node/branch.rs | 3 +++ firewood/src/merkle/node/extension.rs | 3 +++ firewood/src/merkle/node/leaf.rs | 3 +++ 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/firewood/src/merkle/node.rs b/firewood/src/merkle/node.rs index dfe905498..077025bb8 100644 --- a/firewood/src/merkle/node.rs +++ b/firewood/src/merkle/node.rs @@ -141,7 +141,7 @@ impl NodeType { let decoded_key_nibbles = Nibbles::<0>::new(&decoded_key); let (cur_key_path, term) = - dbg!(PartialPath::from_nibbles(decoded_key_nibbles.into_iter())); + PartialPath::from_nibbles(decoded_key_nibbles.into_iter()); let cur_key = cur_key_path.into_inner(); let data: Vec = items.next().unwrap().decode()?; diff --git a/firewood/src/merkle/node/branch.rs b/firewood/src/merkle/node/branch.rs index 6d9a421a0..d94e81ad6 100644 --- a/firewood/src/merkle/node/branch.rs +++ b/firewood/src/merkle/node/branch.rs @@ -1,3 +1,6 @@ +// Copyright (C) 2023, Ava Labs, Inc. All rights reserved. +// See the file LICENSE.md for licensing terms. + use super::{Data, Encoded, Node}; use crate::{ merkle::{PartialPath, TRIE_HASH_LEN}, diff --git a/firewood/src/merkle/node/extension.rs b/firewood/src/merkle/node/extension.rs index 5aa6a0d9b..28f9549cd 100644 --- a/firewood/src/merkle/node/extension.rs +++ b/firewood/src/merkle/node/extension.rs @@ -1,3 +1,6 @@ +// Copyright (C) 2023, Ava Labs, Inc. All rights reserved. +// See the file LICENSE.md for licensing terms. + use bincode::Options; use super::{Encoded, Node}; diff --git a/firewood/src/merkle/node/leaf.rs b/firewood/src/merkle/node/leaf.rs index 4fd2c8f30..0418ab888 100644 --- a/firewood/src/merkle/node/leaf.rs +++ b/firewood/src/merkle/node/leaf.rs @@ -1,3 +1,6 @@ +// Copyright (C) 2023, Ava Labs, Inc. All rights reserved. +// See the file LICENSE.md for licensing terms. + use std::fmt::{Debug, Error as FmtError, Formatter}; use bincode::Options; From 34199e5169205ffd9df8344bc1152b03d827e258 Mon Sep 17 00:00:00 2001 From: Richard Pringle Date: Tue, 21 Nov 2023 15:22:06 -0500 Subject: [PATCH 6/7] Limit DiskAddress visibility --- firewood/src/shale/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firewood/src/shale/mod.rs b/firewood/src/shale/mod.rs index 44ae9b3af..1a2d69a92 100644 --- a/firewood/src/shale/mod.rs +++ b/firewood/src/shale/mod.rs @@ -1,7 +1,7 @@ // Copyright (C) 2023, Ava Labs, Inc. All rights reserved. // See the file LICENSE.md for licensing terms. -pub use disk_address::DiskAddress; +pub(crate) use disk_address::DiskAddress; use std::any::type_name; use std::collections::{HashMap, HashSet}; use std::fmt::{self, Debug, Formatter}; From f12df2be58f3a9977155a3c2b0faa50550038f9b Mon Sep 17 00:00:00 2001 From: Richard Pringle Date: Mon, 20 Nov 2023 15:09:22 -0500 Subject: [PATCH 7/7] Remove path again --- firewood/src/merkle.rs | 149 +++++++++-------------------- firewood/src/merkle/node.rs | 8 +- firewood/src/merkle/node/branch.rs | 8 +- 3 files changed, 52 insertions(+), 113 deletions(-) diff --git a/firewood/src/merkle.rs b/firewood/src/merkle.rs index 4a00606e2..10fb47869 100644 --- a/firewood/src/merkle.rs +++ b/firewood/src/merkle.rs @@ -85,7 +85,7 @@ impl + Send + Sync> Merkle { self.store .put_item( Node::branch(BranchNode { - path: vec![].into(), + // path: vec![].into(), children: [None; MAX_CHILDREN], value: None, children_encoded: Default::default(), @@ -167,18 +167,17 @@ impl + Send + Sync> Merkle { Ok(()) } - // TODO: replace `split` with a `split_at` function. Handle the logic for matching paths in `insert` instead. #[allow(clippy::too_many_arguments)] - fn split<'a>( - &'a self, - mut node_to_split: ObjRef<'a>, - parents: &mut [(ObjRef<'a>, u8)], + fn split( + &self, + mut node_to_split: ObjRef, + parents: &mut [(ObjRef, u8)], insert_path: &[u8], n_path: Vec, n_value: Option, val: Vec, deleted: &mut Vec, - ) -> Result, Vec)>, MerkleError> { + ) -> Result>, MerkleError> { let node_to_split_address = node_to_split.as_ptr(); let split_index = insert_path .iter() @@ -218,19 +217,30 @@ impl + Send + Sync> Merkle { chd[n_path[idx] as usize] = Some(address); let new_branch = Node::branch(BranchNode { - path: PartialPath(matching_path[..idx].to_vec()), + // path: PartialPath(matching_path[..idx].to_vec()), children: chd, value: None, children_encoded: Default::default(), }); - self.put_node(new_branch)?.as_ptr() + let new_branch_address = self.put_node(new_branch)?.as_ptr(); + + if idx > 0 { + self.put_node(Node::from(NodeType::Extension(ExtNode { + path: PartialPath(matching_path[..idx].to_vec()), + child: new_branch_address, + child_encoded: None, + })))? + .as_ptr() + } else { + new_branch_address + } } else { // paths do not diverge let (leaf_address, prefix, idx, value) = match (insert_path.len().cmp(&n_path.len()), n_value) { // no node-value means this is an extension node and we can therefore continue walking the tree - (Ordering::Greater, None) => return Ok(Some((node_to_split, val))), + (Ordering::Greater, None) => return Ok(Some(val)), // if the paths are equal, we overwrite the data (Ordering::Equal, _) => { @@ -270,9 +280,7 @@ impl + Send + Sync> Merkle { result = Err(e); } } - NodeType::Branch(u) => { - u.value = Some(Data(val)); - } + NodeType::Branch(_) => unreachable!(), } u.rehash(); @@ -338,13 +346,24 @@ impl + Send + Sync> Merkle { children[idx] = leaf_address.into(); - self.put_node(Node::branch(BranchNode { - path: PartialPath(prefix.to_vec()), - children, - value, - children_encoded: Default::default(), - }))? - .as_ptr() + let branch_address = self + .put_node(Node::branch(BranchNode { + children, + value, + children_encoded: Default::default(), + }))? + .as_ptr(); + + if !prefix.is_empty() { + self.put_node(Node::from(NodeType::Extension(ExtNode { + path: PartialPath(prefix.to_vec()), + child: branch_address, + child_encoded: None, + })))? + .as_ptr() + } else { + branch_address + } }; // observation: @@ -430,7 +449,7 @@ impl + Send + Sync> Merkle { break None; } - NodeType::Branch(n) if n.path.len() == 0 => { + NodeType::Branch(n) => { match n.children[current_nibble as usize] { Some(c) => (node, c), None => { @@ -455,62 +474,6 @@ impl + Send + Sync> Merkle { } } - NodeType::Branch(n) => { - let n_path = n.path.to_vec(); - let rem_path = once(current_nibble) - .chain(key_nibbles.clone()) - .collect::>(); - let n_path_len = n_path.len(); - let n_value = n.value.clone(); - - // TODO: don't always call split if the paths match (avoids an allocation) - if let Some((mut node, v)) = self.split( - node, - &mut parents, - &rem_path, - n_path, - n_value, - val, - &mut deleted, - )? { - (0..n_path_len).for_each(|_| { - key_nibbles.next(); - }); - - val = v; - - let next_nibble = rem_path[n_path_len] as usize; - // we're already in the match-arm that states that this was a branch-node - // TODO: cleaning up the split-logic should fix this awkwardness - let n_ptr = node.inner.as_branch().unwrap().children[next_nibble]; - - match n_ptr { - Some(n_ptr) => (self.get_node(n_ptr)?, n_ptr), - None => { - // insert the leaf to the empty slot - // create a new leaf - let leaf_ptr = self - .put_node(Node::leaf( - PartialPath(key_nibbles.collect()), - Data(val), - ))? - .as_ptr(); - // set the current child to point to this leaf - node.write(|u| { - let uu = u.inner.as_branch_mut().unwrap(); - uu.children[next_nibble] = Some(leaf_ptr); - u.rehash(); - }) - .unwrap(); - - break None; - } - } - } else { - break None; - } - } - NodeType::Extension(n) => { let n_path = n.path.to_vec(); let n_ptr = n.chd(); @@ -519,7 +482,7 @@ impl + Send + Sync> Merkle { .collect::>(); let n_path_len = n_path.len(); - if let Some((_ext_node, v)) = self.split( + if let Some(v) = self.split( node, &mut parents, &rem_path, @@ -613,7 +576,7 @@ impl + Send + Sync> Merkle { let branch = self .put_node(Node::branch(BranchNode { - path: vec![].into(), + // path: vec![].into(), children: chd, value: Some(Data(val)), children_encoded: Default::default(), @@ -1047,34 +1010,10 @@ impl + Send + Sync> Merkle { }; let next_ptr = match &node_ref.inner { - NodeType::Branch(n) if n.path.len() == 0 => match n.children[nib as usize] { + NodeType::Branch(n) => match n.children[nib as usize] { Some(c) => c, None => return Ok(None), }, - NodeType::Branch(n) => { - let mut n_path_iter = n.path.iter().copied(); - - if n_path_iter.next() != Some(nib) { - return Ok(None); - } - - let path_matches = n_path_iter - .map(Some) - .all(|n_path_nibble| key_nibbles.next() == n_path_nibble); - - if !path_matches { - return Ok(None); - } - - let Some(nib) = key_nibbles.next() else { - break; - }; - - match n.children[nib as usize] { - Some(c) => c, - None => return Ok(None), - } - } NodeType::Leaf(n) => { let node_ref = if once(nib).chain(key_nibbles).eq(n.path.iter().copied()) { Some(node_ref) @@ -1758,7 +1697,7 @@ mod tests { } Node::branch(BranchNode { - path: vec![].into(), + // path: vec![].into(), children, value, children_encoded, diff --git a/firewood/src/merkle/node.rs b/firewood/src/merkle/node.rs index 077025bb8..0b2453e46 100644 --- a/firewood/src/merkle/node.rs +++ b/firewood/src/merkle/node.rs @@ -174,7 +174,7 @@ impl NodeType { pub fn path_mut(&mut self) -> &mut PartialPath { match self { - NodeType::Branch(u) => &mut u.path, + NodeType::Branch(_u) => todo!(), NodeType::Leaf(node) => &mut node.path, NodeType::Extension(node) => &mut node.path, } @@ -208,7 +208,7 @@ impl Node { is_encoded_longer_than_hash_len: OnceLock::new(), encoded: OnceLock::new(), inner: NodeType::Branch(BranchNode { - path: vec![].into(), + // path: vec![].into(), children: [Some(DiskAddress::null()); MAX_CHILDREN], value: Some(Data(Vec::new())), children_encoded: Default::default(), @@ -385,7 +385,7 @@ impl Storable for Node { root_hash, is_encoded_longer_than_hash_len, NodeType::Branch(BranchNode { - path: vec![].into(), + // path: vec![].into(), children: chd, value, children_encoded: chd_encoded, @@ -667,7 +667,7 @@ pub(super) mod tests { .unwrap_or_default(); Node::branch(BranchNode { - path: vec![].into(), + // path: vec![].into(), children, value: value.map(Data), children_encoded, diff --git a/firewood/src/merkle/node/branch.rs b/firewood/src/merkle/node/branch.rs index d94e81ad6..9a6469511 100644 --- a/firewood/src/merkle/node/branch.rs +++ b/firewood/src/merkle/node/branch.rs @@ -18,7 +18,7 @@ pub const SIZE: usize = MAX_CHILDREN + 1; #[derive(PartialEq, Eq, Clone)] pub struct BranchNode { - pub(crate) path: PartialPath, + // pub(crate) path: PartialPath, pub(crate) children: [Option; MAX_CHILDREN], pub(crate) value: Option, pub(crate) children_encoded: [Option>; MAX_CHILDREN], @@ -27,7 +27,7 @@ pub struct BranchNode { impl Debug for BranchNode { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtError> { write!(f, "[Branch")?; - write!(f, " path={:?}", self.path)?; + // write!(f, " path={:?}", self.path)?; for (i, c) in self.children.iter().enumerate() { if let Some(c) = c { @@ -54,13 +54,13 @@ impl Debug for BranchNode { impl BranchNode { pub fn new( - path: PartialPath, + _path: PartialPath, chd: [Option; MAX_CHILDREN], value: Option>, chd_encoded: [Option>; MAX_CHILDREN], ) -> Self { BranchNode { - path, + // path, children: chd, value: value.map(Data), children_encoded: chd_encoded,