From 89a298808e5854f0d05f122ece24b000d4c07bf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Laferri=C3=A8re?= Date: Fri, 19 Jan 2024 16:07:35 -0500 Subject: [PATCH] Implement `Smt` struct (replacement to `TieredSmt`) (#254) --- src/merkle/mod.rs | 2 +- src/merkle/smt/full/mod.rs | 381 +++++++++++++++++++++++++++++++++++ src/merkle/smt/full/tests.rs | 283 ++++++++++++++++++++++++++ src/merkle/smt/mod.rs | 3 + 4 files changed, 668 insertions(+), 1 deletion(-) create mode 100644 src/merkle/smt/full/mod.rs create mode 100644 src/merkle/smt/full/tests.rs diff --git a/src/merkle/mod.rs b/src/merkle/mod.rs index a7acb369..021d256d 100644 --- a/src/merkle/mod.rs +++ b/src/merkle/mod.rs @@ -22,7 +22,7 @@ mod path; pub use path::{MerklePath, RootPath, ValuePath}; mod smt; -pub use smt::{LeafIndex, SimpleSmt, SMT_MAX_DEPTH, SMT_MIN_DEPTH}; +pub use smt::{LeafIndex, SimpleSmt, Smt, SmtLeaf, SMT_DEPTH, SMT_MAX_DEPTH, SMT_MIN_DEPTH}; mod tiered_smt; pub use tiered_smt::{TieredSmt, TieredSmtProof, TieredSmtProofError}; diff --git a/src/merkle/smt/full/mod.rs b/src/merkle/smt/full/mod.rs new file mode 100644 index 00000000..0a796a7e --- /dev/null +++ b/src/merkle/smt/full/mod.rs @@ -0,0 +1,381 @@ +use core::cmp::Ordering; + +use winter_math::StarkField; + +use crate::hash::rpo::Rpo256; +use crate::merkle::EmptySubtreeRoots; +use crate::utils::{ + collections::{BTreeMap, BTreeSet, Vec}, + vec, +}; +use crate::{Felt, EMPTY_WORD}; + +use super::{ + InnerNode, LeafIndex, MerkleError, MerklePath, NodeIndex, RpoDigest, SparseMerkleTree, Word, +}; + +#[cfg(test)] +mod tests; + +// CONSTANTS +// ================================================================================================ + +pub const SMT_DEPTH: u8 = 64; + +// SMT +// ================================================================================================ + +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +pub struct Smt { + root: RpoDigest, + leaves: BTreeMap, + inner_nodes: BTreeMap, +} + +impl Smt { + // CONSTANTS + // -------------------------------------------------------------------------------------------- + /// The default value used to compute the hash of empty leaves + pub const EMPTY_VALUE: Word = >::EMPTY_VALUE; + + // CONSTRUCTORS + // -------------------------------------------------------------------------------------------- + + /// Returns a new [Smt]. + /// + /// All leaves in the returned tree are set to [Self::EMPTY_VALUE]. + pub fn new() -> Self { + let root = *EmptySubtreeRoots::entry(SMT_DEPTH, 0); + + Self { + root, + leaves: BTreeMap::new(), + inner_nodes: BTreeMap::new(), + } + } + + /// Returns a new [Smt] instantiated with leaves set as specified by the provided entries. + /// + /// All leaves omitted from the entries list are set to [Self::EMPTY_VALUE]. + /// + /// # Errors + /// Returns an error if the provided entries contain multiple values for the same key. + pub fn with_entries( + entries: impl IntoIterator, + ) -> Result { + // create an empty tree + let mut tree = Self::new(); + + // This being a sparse data structure, the EMPTY_WORD is not assigned to the `BTreeMap`, so + // entries with the empty value need additional tracking. + let mut key_set_to_zero = BTreeSet::new(); + + for (key, value) in entries { + let old_value = tree.insert(key, value); + + if old_value != EMPTY_WORD || key_set_to_zero.contains(&key) { + return Err(MerkleError::DuplicateValuesForIndex( + LeafIndex::::from(key).value(), + )); + } + + if value == EMPTY_WORD { + key_set_to_zero.insert(key); + }; + } + Ok(tree) + } + + // PUBLIC ACCESSORS + // -------------------------------------------------------------------------------------------- + + /// Returns the depth of the tree + pub const fn depth(&self) -> u8 { + SMT_DEPTH + } + + /// Returns the root of the tree + pub fn root(&self) -> RpoDigest { + >::root(self) + } + + /// Returns the leaf at the specified index. + pub fn get_leaf(&self, key: &RpoDigest) -> SmtLeaf { + >::get_leaf(self, key) + } + + /// Returns an opening of the leaf associated with `key`. Conceptually, an opening is a Merkle + /// path to the leaf, as well as the leaf itself. + pub fn open(&self, key: &RpoDigest) -> (MerklePath, SmtLeaf) { + >::open(self, key) + } + + // STATE MUTATORS + // -------------------------------------------------------------------------------------------- + + /// Inserts a value at the specified key, returning the previous value associated with that key. + /// Recall that by definition, any key that hasn't been updated is associated with + /// [`Self::EMPTY_VALUE`]. + /// + /// This also recomputes all hashes between the leaf (associated with the key) and the root, + /// updating the root itself. + pub fn insert(&mut self, key: RpoDigest, value: Word) -> Word { + >::insert(self, key, value) + } + + // HELPERS + // -------------------------------------------------------------------------------------------- + + /// Inserts `value` at leaf index pointed to by `key`. `value` is guaranteed to not be the empty + /// value, such that this is indeed an insertion. + fn perform_insert(&mut self, key: RpoDigest, value: Word) -> Option { + debug_assert_ne!(value, Self::EMPTY_VALUE); + + let leaf_index: LeafIndex = Self::key_to_leaf_index(&key); + + match self.leaves.get_mut(&leaf_index.value()) { + Some(leaf) => leaf.insert(key, value), + None => { + self.leaves.insert(leaf_index.value(), SmtLeaf::Single((key, value))); + + None + } + } + } + + /// Removes key-value pair at leaf index pointed to by `key` if it exists. + fn perform_remove(&mut self, key: RpoDigest) -> Option { + let leaf_index: LeafIndex = Self::key_to_leaf_index(&key); + + if let Some(leaf) = self.leaves.get_mut(&leaf_index.value()) { + let (old_value, is_empty) = leaf.remove(key); + if is_empty { + self.leaves.remove(&leaf_index.value()); + } + old_value + } else { + // there's nothing stored at the leaf; nothing to update + None + } + } +} + +impl SparseMerkleTree for Smt { + type Key = RpoDigest; + type Value = Word; + type Leaf = SmtLeaf; + type Opening = (MerklePath, SmtLeaf); + + const EMPTY_VALUE: Self::Value = EMPTY_WORD; + + fn root(&self) -> RpoDigest { + self.root + } + + fn set_root(&mut self, root: RpoDigest) { + self.root = root; + } + + fn get_inner_node(&self, index: NodeIndex) -> InnerNode { + self.inner_nodes.get(&index).cloned().unwrap_or_else(|| { + let node = EmptySubtreeRoots::entry(SMT_DEPTH, index.depth() + 1); + + InnerNode { left: *node, right: *node } + }) + } + + fn insert_inner_node(&mut self, index: NodeIndex, inner_node: InnerNode) { + self.inner_nodes.insert(index, inner_node); + } + + fn insert_value(&mut self, key: Self::Key, value: Self::Value) -> Option { + // inserting an `EMPTY_VALUE` is equivalent to removing any value associated with `key` + if value != Self::EMPTY_VALUE { + self.perform_insert(key, value) + } else { + self.perform_remove(key) + } + } + + fn get_leaf(&self, key: &RpoDigest) -> Self::Leaf { + let leaf_pos = LeafIndex::::from(*key).value(); + + match self.leaves.get(&leaf_pos) { + Some(leaf) => leaf.clone(), + None => SmtLeaf::Empty, + } + } + + fn hash_leaf(leaf: &Self::Leaf) -> RpoDigest { + leaf.hash() + } + + fn key_to_leaf_index(key: &RpoDigest) -> LeafIndex { + let most_significant_felt = key[3]; + LeafIndex::new_max_depth(most_significant_felt.as_int()) + } +} + +impl Default for Smt { + fn default() -> Self { + Self::new() + } +} + +// LEAF +// ================================================================================================ + +#[derive(Clone, Debug, PartialEq, Eq)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] +pub enum SmtLeaf { + Empty, + Single((RpoDigest, Word)), + Multiple(Vec<(RpoDigest, Word)>), +} + +impl SmtLeaf { + /// Converts a leaf to a list of field elements + pub fn to_elements(&self) -> Vec { + self.clone().into_elements() + } + + /// Converts a leaf to a list of field elements + pub fn into_elements(self) -> Vec { + match self { + SmtLeaf::Empty => Vec::new(), + SmtLeaf::Single(kv_pair) => kv_to_elements(kv_pair).collect(), + SmtLeaf::Multiple(kv_pairs) => kv_pairs.into_iter().flat_map(kv_to_elements).collect(), + } + } + + /// Computes the hash of the leaf + pub fn hash(&self) -> RpoDigest { + match self { + SmtLeaf::Empty => EMPTY_WORD.into(), + SmtLeaf::Single((key, value)) => Rpo256::merge(&[*key, value.into()]), + SmtLeaf::Multiple(kvs) => { + let elements: Vec = kvs.iter().copied().flat_map(kv_to_elements).collect(); + Rpo256::hash_elements(&elements) + } + } + } + + // HELPERS + // --------------------------------------------------------------------------------------------- + + /// Inserts key-value pair into the leaf; returns the previous value associated with `key`, if + /// any. + fn insert(&mut self, key: RpoDigest, value: Word) -> Option { + match self { + SmtLeaf::Empty => { + *self = SmtLeaf::Single((key, value)); + None + } + SmtLeaf::Single(kv_pair) => { + if kv_pair.0 == key { + // the key is already in this leaf. Update the value and return the previous + // value + let old_value = kv_pair.1; + kv_pair.1 = value; + Some(old_value) + } else { + // Another entry is present in this leaf. Transform the entry into a list + // entry, and make sure the key-value pairs are sorted by key + let mut pairs = vec![*kv_pair, (key, value)]; + pairs.sort_by(|(key_1, _), (key_2, _)| cmp_keys(*key_1, *key_2)); + + *self = SmtLeaf::Multiple(pairs); + + None + } + } + SmtLeaf::Multiple(kv_pairs) => { + match kv_pairs.binary_search_by(|kv_pair| cmp_keys(kv_pair.0, key)) { + Ok(pos) => { + let old_value = kv_pairs[pos].1; + kv_pairs[pos].1 = value; + + Some(old_value) + } + Err(pos) => { + kv_pairs.insert(pos, (key, value)); + + None + } + } + } + } + } + + /// Removes key-value pair from the leaf stored at key; returns the previous value associated + /// with `key`, if any. Also returns an `is_empty` flag, indicating whether the leaf became + /// empty, and must be removed from the data structure it is contained in. + fn remove(&mut self, key: RpoDigest) -> (Option, bool) { + match self { + SmtLeaf::Empty => (None, false), + SmtLeaf::Single((key_at_leaf, value_at_leaf)) => { + if *key_at_leaf == key { + // our key was indeed stored in the leaf, so we return the value that was stored + // in it, and indicate that the leaf should be removed + let old_value = *value_at_leaf; + + // Note: this is not strictly needed, since the caller is expected to drop this + // `SmtLeaf` object. + *self = SmtLeaf::Empty; + + (Some(old_value), true) + } else { + // another key is stored at leaf; nothing to update + (None, false) + } + } + SmtLeaf::Multiple(kv_pairs) => { + match kv_pairs.binary_search_by(|kv_pair| cmp_keys(kv_pair.0, key)) { + Ok(pos) => { + let old_value = kv_pairs[pos].1; + + kv_pairs.remove(pos); + debug_assert!(!kv_pairs.is_empty()); + + if kv_pairs.len() == 1 { + // convert the leaf into `Single` + *self = SmtLeaf::Single(kv_pairs[0]); + } + + (Some(old_value), false) + } + Err(_) => { + // other keys are stored at leaf; nothing to update + (None, false) + } + } + } + } + } +} + +// HELPER FUNCTIONS +// ================================================================================================ + +/// Converts a key-value tuple to an iterator of `Felt`s +fn kv_to_elements((key, value): (RpoDigest, Word)) -> impl Iterator { + let key_elements = key.into_iter(); + let value_elements = value.into_iter(); + + key_elements.chain(value_elements) +} + +/// Compares two keys, compared element-by-element using their integer representations starting with +/// the most significant element. +fn cmp_keys(key_1: RpoDigest, key_2: RpoDigest) -> Ordering { + for (v1, v2) in key_1.iter().zip(key_2.iter()).rev() { + let v1 = v1.as_int(); + let v2 = v2.as_int(); + if v1 != v2 { + return v1.cmp(&v2); + } + } + + Ordering::Equal +} diff --git a/src/merkle/smt/full/tests.rs b/src/merkle/smt/full/tests.rs new file mode 100644 index 00000000..b526cbf0 --- /dev/null +++ b/src/merkle/smt/full/tests.rs @@ -0,0 +1,283 @@ +use super::*; +use crate::{ + merkle::{EmptySubtreeRoots, MerkleStore}, + ONE, WORD_SIZE, +}; + +/// This test checks that inserting twice at the same key functions as expected. The test covers +/// only the case where the key is alone in its leaf +#[test] +fn test_smt_insert_at_same_key() { + let mut smt = Smt::default(); + let mut store: MerkleStore = MerkleStore::default(); + + assert_eq!(smt.root(), *EmptySubtreeRoots::entry(SMT_DEPTH, 0)); + + let key_1: RpoDigest = { + let raw = 0b_01101001_01101100_00011111_11111111_10010110_10010011_11100000_00000000_u64; + + RpoDigest::from([ONE, ONE, ONE, Felt::new(raw)]) + }; + let key_1_index: NodeIndex = LeafIndex::::from(key_1).into(); + + let value_1 = [ONE; WORD_SIZE]; + let value_2 = [ONE + ONE; WORD_SIZE]; + + // Insert value 1 and ensure root is as expected + { + let leaf_node = build_single_leaf_node(key_1, value_1); + let tree_root = store.set_node(smt.root(), key_1_index, leaf_node).unwrap().root; + + let old_value_1 = smt.insert(key_1, value_1); + assert_eq!(old_value_1, EMPTY_WORD); + + assert_eq!(smt.root(), tree_root); + } + + // Insert value 2 and ensure root is as expected + { + let leaf_node = build_single_leaf_node(key_1, value_2); + let tree_root = store.set_node(smt.root(), key_1_index, leaf_node).unwrap().root; + + let old_value_2 = smt.insert(key_1, value_2); + assert_eq!(old_value_2, value_1); + + assert_eq!(smt.root(), tree_root); + } +} + +/// This test checks that inserting twice at the same key functions as expected. The test covers +/// only the case where the leaf type is `SmtLeaf::Multiple` +#[test] +fn test_smt_insert_at_same_key_2() { + // The most significant u64 used for both keys (to ensure they map to the same leaf) + let key_msb: u64 = 42; + + let key_already_present: RpoDigest = + RpoDigest::from([2_u64.into(), 2_u64.into(), 2_u64.into(), Felt::new(key_msb)]); + let key_already_present_index: NodeIndex = + LeafIndex::::from(key_already_present).into(); + let value_already_present = [ONE + ONE + ONE; WORD_SIZE]; + + let mut smt = + Smt::with_entries(core::iter::once((key_already_present, value_already_present))).unwrap(); + let mut store: MerkleStore = { + let mut store = MerkleStore::default(); + + let leaf_node = build_single_leaf_node(key_already_present, value_already_present); + store + .set_node(*EmptySubtreeRoots::entry(SMT_DEPTH, 0), key_already_present_index, leaf_node) + .unwrap(); + store + }; + + let key_1: RpoDigest = RpoDigest::from([ONE, ONE, ONE, Felt::new(key_msb)]); + let key_1_index: NodeIndex = LeafIndex::::from(key_1).into(); + + assert_eq!(key_1_index, key_already_present_index); + + let value_1 = [ONE; WORD_SIZE]; + let value_2 = [ONE + ONE; WORD_SIZE]; + + // Insert value 1 and ensure root is as expected + { + // Note: key_1 comes first because it is smaller + let leaf_node = build_multiple_leaf_node(&[ + (key_1, value_1), + (key_already_present, value_already_present), + ]); + let tree_root = store.set_node(smt.root(), key_1_index, leaf_node).unwrap().root; + + let old_value_1 = smt.insert(key_1, value_1); + assert_eq!(old_value_1, EMPTY_WORD); + + assert_eq!(smt.root(), tree_root); + } + + // Insert value 2 and ensure root is as expected + { + let leaf_node = build_multiple_leaf_node(&[ + (key_1, value_2), + (key_already_present, value_already_present), + ]); + let tree_root = store.set_node(smt.root(), key_1_index, leaf_node).unwrap().root; + + let old_value_2 = smt.insert(key_1, value_2); + assert_eq!(old_value_2, value_1); + + assert_eq!(smt.root(), tree_root); + } +} + +/// This test ensures that the root of the tree is as expected when we add 3 items at 3 different +/// keys. This also tests that the merkle paths produced are as expected. +#[test] +fn test_smt_insert_multiple_values() { + let mut smt = Smt::default(); + let mut store: MerkleStore = MerkleStore::default(); + + assert_eq!(smt.root(), *EmptySubtreeRoots::entry(SMT_DEPTH, 0)); + + let key_1: RpoDigest = { + let raw = 0b_01101001_01101100_00011111_11111111_10010110_10010011_11100000_00000000_u64; + + RpoDigest::from([ONE, ONE, ONE, Felt::new(raw)]) + }; + + let key_2: RpoDigest = { + let raw = 0b_11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111_u64; + + RpoDigest::from([ONE, ONE, ONE, Felt::new(raw)]) + }; + + let key_3: RpoDigest = { + let raw = 0b_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000_u64; + + RpoDigest::from([ONE, ONE, ONE, Felt::new(raw)]) + }; + + let value_1 = [ONE; WORD_SIZE]; + let value_2 = [ONE + ONE; WORD_SIZE]; + let value_3 = [ONE + ONE + ONE; WORD_SIZE]; + + let key_values = [(key_1, value_1), (key_2, value_2), (key_3, value_3)]; + + for (key, value) in key_values { + let key_index: NodeIndex = LeafIndex::::from(key).into(); + + let leaf_node = build_single_leaf_node(key, value); + let tree_root = store.set_node(smt.root(), key_index, leaf_node).unwrap().root; + + let old_value = smt.insert(key, value); + assert_eq!(old_value, EMPTY_WORD); + + assert_eq!(smt.root(), tree_root); + + let expected_path = store.get_path(tree_root, key_index).unwrap(); + assert_eq!(smt.open(&key).0, expected_path.path); + } +} + +/// This tests that inserting the empty value does indeed remove the key-value contained at the +/// leaf. We insert & remove 3 values at the same leaf to ensure that all cases are covered (empty, +/// single, multiple). +#[test] +fn test_smt_removal() { + let mut smt = Smt::default(); + + let raw = 0b_01101001_01101100_00011111_11111111_10010110_10010011_11100000_00000000_u64; + + let key_1: RpoDigest = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw)]); + let key_2: RpoDigest = + RpoDigest::from([2_u64.into(), 2_u64.into(), 2_u64.into(), Felt::new(raw)]); + let key_3: RpoDigest = + RpoDigest::from([3_u64.into(), 3_u64.into(), 3_u64.into(), Felt::new(raw)]); + + let value_1 = [ONE; WORD_SIZE]; + let value_2 = [2_u64.into(); WORD_SIZE]; + let value_3: [Felt; 4] = [3_u64.into(); WORD_SIZE]; + + // insert key-value 1 + { + let old_value_1 = smt.insert(key_1, value_1); + assert_eq!(old_value_1, EMPTY_WORD); + + assert_eq!(smt.get_leaf(&key_1), SmtLeaf::Single((key_1, value_1))); + } + + // insert key-value 2 + { + let old_value_2 = smt.insert(key_2, value_2); + assert_eq!(old_value_2, EMPTY_WORD); + + assert_eq!( + smt.get_leaf(&key_2), + SmtLeaf::Multiple(vec![(key_1, value_1), (key_2, value_2)]) + ); + } + + // insert key-value 3 + { + let old_value_3 = smt.insert(key_3, value_3); + assert_eq!(old_value_3, EMPTY_WORD); + + assert_eq!( + smt.get_leaf(&key_3), + SmtLeaf::Multiple(vec![(key_1, value_1), (key_2, value_2), (key_3, value_3)]) + ); + } + + // remove key 3 + { + let old_value_3 = smt.insert(key_3, EMPTY_WORD); + assert_eq!(old_value_3, value_3); + + assert_eq!( + smt.get_leaf(&key_3), + SmtLeaf::Multiple(vec![(key_1, value_1), (key_2, value_2)]) + ); + } + + // remove key 2 + { + let old_value_2 = smt.insert(key_2, EMPTY_WORD); + assert_eq!(old_value_2, value_2); + + assert_eq!(smt.get_leaf(&key_2), SmtLeaf::Single((key_1, value_1))); + } + + // remove key 1 + { + let old_value_1 = smt.insert(key_1, EMPTY_WORD); + assert_eq!(old_value_1, value_1); + + assert_eq!(smt.get_leaf(&key_1), SmtLeaf::Empty); + } +} + +/// Tests that 2 key-value pairs stored in the same leaf have the same path +#[test] +fn test_smt_path_to_keys_in_same_leaf_are_equal() { + let raw = 0b_01101001_01101100_00011111_11111111_10010110_10010011_11100000_00000000_u64; + + let key_1: RpoDigest = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw)]); + let key_2: RpoDigest = + RpoDigest::from([2_u64.into(), 2_u64.into(), 2_u64.into(), Felt::new(raw)]); + + let value_1 = [ONE; WORD_SIZE]; + let value_2 = [2_u64.into(); WORD_SIZE]; + + let smt = Smt::with_entries([(key_1, value_1), (key_2, value_2)]).unwrap(); + + assert_eq!(smt.open(&key_1), smt.open(&key_2)); +} + +/// Tests that an empty leaf hashes to the empty word +#[test] +fn test_empty_leaf_hash() { + let smt = Smt::default(); + + let leaf = smt.get_leaf(&RpoDigest::default()); + assert_eq!(leaf.hash(), EMPTY_WORD.into()); +} + +// HELPERS +// -------------------------------------------------------------------------------------------- + +fn build_single_leaf_node(key: RpoDigest, value: Word) -> RpoDigest { + SmtLeaf::Single((key, value)).hash() +} + +fn build_multiple_leaf_node(kv_pairs: &[(RpoDigest, Word)]) -> RpoDigest { + let elements: Vec = kv_pairs + .iter() + .flat_map(|(key, value)| { + let key_elements = key.into_iter(); + let value_elements = (*value).into_iter(); + + key_elements.chain(value_elements) + }) + .collect(); + + Rpo256::hash_elements(&elements) +} diff --git a/src/merkle/smt/mod.rs b/src/merkle/smt/mod.rs index 67bb8167..63e6ac89 100644 --- a/src/merkle/smt/mod.rs +++ b/src/merkle/smt/mod.rs @@ -7,6 +7,9 @@ use crate::{ use super::{MerkleError, MerklePath, NodeIndex, Vec}; +mod full; +pub use full::{Smt, SmtLeaf, SMT_DEPTH}; + mod simple; pub use simple::SimpleSmt;