From 30f61f2e16bd26c468f39bd013f24bc15d66e997 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Tue, 3 Dec 2024 04:12:37 +0000 Subject: [PATCH 01/42] Cleanup triples interface insert --- .../node/src/protocol/presignature.rs | 4 +- chain-signatures/node/src/protocol/triple.rs | 39 ++++++------------- .../node/src/storage/triple_storage.rs | 32 +++++++-------- .../chain-signatures/tests/cases/mod.rs | 8 ++-- 4 files changed, 31 insertions(+), 52 deletions(-) diff --git a/chain-signatures/node/src/protocol/presignature.rs b/chain-signatures/node/src/protocol/presignature.rs index 865df519..96d70b6a 100644 --- a/chain-signatures/node/src/protocol/presignature.rs +++ b/chain-signatures/node/src/protocol/presignature.rs @@ -436,8 +436,8 @@ impl PresignatureManager { // Insert back the triples to be used later since this active set of // participants were not able to make use of these triples. - triple_manager.insert_mine(triple0).await; - triple_manager.insert_mine(triple1).await; + triple_manager.insert(triple0, true).await; + triple_manager.insert(triple1, true).await; } else { self.generate( &presig_participants, diff --git a/chain-signatures/node/src/protocol/triple.rs b/chain-signatures/node/src/protocol/triple.rs index 2e80d338..22749858 100644 --- a/chain-signatures/node/src/protocol/triple.rs +++ b/chain-signatures/node/src/protocol/triple.rs @@ -144,19 +144,11 @@ impl TripleManager { } } - pub async fn insert(&mut self, triple: Triple) { - tracing::debug!(id = triple.id, "inserting triple"); + pub async fn insert(&mut self, triple: Triple, mine: bool) { + tracing::debug!(id = triple.id, mine, "inserting triple"); self.gc.remove(&triple.id); - if let Err(e) = self.triple_storage.insert(triple).await { - tracing::warn!(?e, "failed to insert triple"); - } - } - - pub async fn insert_mine(&mut self, triple: Triple) { - tracing::debug!(id = triple.id, "inserting mine triple"); - self.gc.remove(&triple.id); - if let Err(e) = self.triple_storage.insert_mine(triple).await { - tracing::warn!(?e, "failed to insert mine triple"); + if let Err(e) = self.triple_storage.insert(triple, mine).await { + tracing::warn!(?e, mine, "failed to insert triple"); } } @@ -209,7 +201,7 @@ impl TripleManager { let triple_1 = match triples.take(&id1).await { Ok(Some(triple)) => triple, Ok(None) => { - if let Err(e) = triples.insert(triple_0).await { + if let Err(e) = triples.insert(triple_0, false).await { tracing::warn!(id0, ?e, "failed to insert triple back"); } if self.generators.contains_key(&id1) { @@ -225,7 +217,7 @@ impl TripleManager { } Err(e) => { tracing::warn!(id1, ?e, "failed to take triple"); - if let Err(e) = triples.insert(triple_0).await { + if let Err(e) = triples.insert(triple_0, false).await { tracing::warn!(id0, ?e, "failed to insert triple back"); } return Err(GenerationError::TripleIsMissing(id1)); @@ -264,7 +256,7 @@ impl TripleManager { let triple_1 = match triples.take_mine().await { Ok(Some(triple)) => triple, Ok(None) => { - if let Err(e) = triples.insert_mine(triple_0).await { + if let Err(e) = triples.insert(triple_0, true).await { tracing::warn!(?e, "failed to insert mine triple back"); } tracing::warn!("no mine triple left"); @@ -272,7 +264,7 @@ impl TripleManager { } Err(e) => { tracing::warn!(?e, "failed to take mine triple"); - if let Err(e) = triples.insert_mine(triple_0).await { + if let Err(e) = triples.insert(triple_0, true).await { tracing::warn!(?e, "failed to insert mine triple back"); } return None; @@ -460,8 +452,7 @@ impl TripleManager { let mut messages = Vec::new(); let mut errors = Vec::new(); - let mut new_triples = Vec::new(); - let mut new_mine_triples = Vec::new(); + let mut triples = Vec::new(); self.generators.retain(|id, generator| { if !self.ongoing.contains(id) { // If the protocol is not ongoing, we should retain it for the next time @@ -564,13 +555,11 @@ impl TripleManager { triple_owner == self.me }; + triples.push((triple, triple_is_mine)); if triple_is_mine { - new_mine_triples.push(triple.clone()); crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATIONS_MINE_SUCCESS .with_label_values(&[self.my_account_id.as_str()]) .inc(); - } else { - new_triples.push(triple.clone()); } // Protocol done, remove it from the ongoing pool. @@ -583,12 +572,8 @@ impl TripleManager { } }); - for triple in new_triples { - self.insert(triple).await; - } - - for triple in new_mine_triples { - self.insert_mine(triple).await; + for (triple, mine) in triples { + self.insert(triple, mine).await; } if !errors.is_empty() { diff --git a/chain-signatures/node/src/storage/triple_storage.rs b/chain-signatures/node/src/storage/triple_storage.rs index a62944fa..b8dfcd9b 100644 --- a/chain-signatures/node/src/storage/triple_storage.rs +++ b/chain-signatures/node/src/storage/triple_storage.rs @@ -5,8 +5,6 @@ use redis::{AsyncCommands, FromRedisValue, RedisWrite, ToRedisArgs}; use near_account_id::AccountId; -type TripleResult = std::result::Result; - // Can be used to "clear" redis storage in case of a breaking change const TRIPLE_STORAGE_VERSION: &str = "v2"; @@ -24,37 +22,33 @@ pub struct TripleStorage { } impl TripleStorage { - pub async fn insert(&self, triple: Triple) -> TripleResult<()> { + pub async fn insert(&self, triple: Triple, mine: bool) -> anyhow::Result<()> { let mut conn = self.redis_pool.get().await?; - conn.hset::<&str, TripleId, Triple, ()>(&self.triple_key(), triple.id, triple) + if mine { + conn.sadd::<&str, TripleId, ()>(&self.mine_key(), triple.id) .await?; - Ok(()) - } - - pub async fn insert_mine(&self, triple: Triple) -> TripleResult<()> { - let mut conn = self.redis_pool.get().await?; - conn.sadd::<&str, TripleId, ()>(&self.mine_key(), triple.id) + } + conn.hset::<&str, TripleId, Triple, ()>(&self.triple_key(), triple.id, triple) .await?; - self.insert(triple).await?; Ok(()) } - pub async fn contains(&self, id: &TripleId) -> TripleResult { + pub async fn contains(&self, id: &TripleId) -> anyhow::Result { let mut conn = self.redis_pool.get().await?; let result: bool = conn.hexists(self.triple_key(), id).await?; Ok(result) } - pub async fn contains_mine(&self, id: &TripleId) -> TripleResult { + pub async fn contains_mine(&self, id: &TripleId) -> anyhow::Result { let mut conn = self.redis_pool.get().await?; let result: bool = conn.sismember(self.mine_key(), id).await?; Ok(result) } - pub async fn take(&self, id: &TripleId) -> TripleResult> { + pub async fn take(&self, id: &TripleId) -> anyhow::Result> { let mut conn = self.redis_pool.get().await?; if self.contains_mine(id).await? { - tracing::error!("Can not take mine triple as foreign: {:?}", id); + tracing::error!("cannot take mine triple as foreign: {:?}", id); return Ok(None); } let result: Option = conn.hget(self.triple_key(), id).await?; @@ -68,7 +62,7 @@ impl TripleStorage { } } - pub async fn take_mine(&self) -> TripleResult> { + pub async fn take_mine(&self) -> anyhow::Result> { let mut conn = self.redis_pool.get().await?; let id: Option = conn.spop(self.mine_key()).await?; match id { @@ -77,19 +71,19 @@ impl TripleStorage { } } - pub async fn len_generated(&self) -> TripleResult { + pub async fn len_generated(&self) -> anyhow::Result { let mut conn = self.redis_pool.get().await?; let result: usize = conn.hlen(self.triple_key()).await?; Ok(result) } - pub async fn len_mine(&self) -> TripleResult { + pub async fn len_mine(&self) -> anyhow::Result { let mut conn = self.redis_pool.get().await?; let result: usize = conn.scard(self.mine_key()).await?; Ok(result) } - pub async fn clear(&self) -> TripleResult<()> { + pub async fn clear(&self) -> anyhow::Result<()> { let mut conn = self.redis_pool.get().await?; conn.del::<&str, ()>(&self.triple_key()).await?; conn.del::<&str, ()>(&self.mine_key()).await?; diff --git a/integration-tests/chain-signatures/tests/cases/mod.rs b/integration-tests/chain-signatures/tests/cases/mod.rs index 4c352913..7122b698 100644 --- a/integration-tests/chain-signatures/tests/cases/mod.rs +++ b/integration-tests/chain-signatures/tests/cases/mod.rs @@ -240,8 +240,8 @@ async fn test_triple_persistence() -> anyhow::Result<()> { assert!(triple_manager.is_empty().await); assert_eq!(triple_manager.len_potential().await, 0); - triple_manager.insert(triple_1).await; - triple_manager.insert(triple_2).await; + triple_manager.insert(triple_1, false).await; + triple_manager.insert(triple_2, false).await; // Check that the storage contains the foreign triple assert!(triple_manager.contains(&triple_id_1).await); @@ -271,8 +271,8 @@ async fn test_triple_persistence() -> anyhow::Result<()> { let mine_triple_2 = dummy_triple(mine_id_2); // Add mine triple and check that it is in the storage - triple_manager.insert_mine(mine_triple_1).await; - triple_manager.insert_mine(mine_triple_2).await; + triple_manager.insert(mine_triple_1, true).await; + triple_manager.insert(mine_triple_2, true).await; assert!(triple_manager.contains(&mine_id_1).await); assert!(triple_manager.contains(&mine_id_2).await); assert!(triple_manager.contains_mine(&mine_id_1).await); From 02e6a8e888171a1e3dd89a65b8c127a28556f8af Mon Sep 17 00:00:00 2001 From: Phuong N Date: Tue, 3 Dec 2024 05:29:48 +0000 Subject: [PATCH 02/42] Added triple store error --- chain-signatures/node/src/protocol/triple.rs | 90 +++++++------------ chain-signatures/node/src/storage/error.rs | 20 +++++ chain-signatures/node/src/storage/mod.rs | 1 + .../node/src/storage/triple_storage.rs | 72 ++++++++------- 4 files changed, 91 insertions(+), 92 deletions(-) create mode 100644 chain-signatures/node/src/storage/error.rs diff --git a/chain-signatures/node/src/protocol/triple.rs b/chain-signatures/node/src/protocol/triple.rs index 22749858..89fe9603 100644 --- a/chain-signatures/node/src/protocol/triple.rs +++ b/chain-signatures/node/src/protocol/triple.rs @@ -2,6 +2,7 @@ use super::contract::primitives::Participants; use super::cryptography::CryptographicError; use super::message::TripleMessage; use super::presignature::GenerationError; +use crate::storage::error::StoreError; use crate::storage::triple_storage::TripleStorage; use crate::types::TripleProtocol; use crate::util::AffinePointExt; @@ -168,6 +169,27 @@ impl TripleManager { .unwrap_or(false) } + async fn take(&self, id: &TripleId) -> Result { + self.triple_storage.take(id).await.map_err(|err| match err { + StoreError::TripleIsMissing(_) | StoreError::TripleDenied(_, _) => { + if self.generators.contains_key(id) { + tracing::warn!(id, "triple is generating"); + return GenerationError::TripleIsGenerating(*id); + } else if self.gc.contains_key(id) { + tracing::warn!(id, "triple is garbage collected"); + return GenerationError::TripleIsGarbageCollected(*id); + } else { + tracing::warn!(id, "triple is missing"); + return GenerationError::TripleIsMissing(*id); + } + } + e => { + tracing::warn!(id, ?e, "failed to take triple"); + return GenerationError::TripleIsMissing(*id); + } + }) + } + /// Take two unspent triple by theirs id with no way to return it. Only takes /// if both of them are present. /// It is very important to NOT reuse the same triple twice for two different @@ -177,56 +199,18 @@ impl TripleManager { id0: TripleId, id1: TripleId, ) -> Result<(Triple, Triple), GenerationError> { - let triples = &self.triple_storage; - let triple_0 = match triples.take(&id0).await { - Ok(Some(triple)) => triple, - Ok(None) => { - if self.generators.contains_key(&id0) { - tracing::warn!(id0, "triple is generating"); - return Err(GenerationError::TripleIsGenerating(id0)); - } else if self.gc.contains_key(&id0) { - tracing::warn!(id0, "triple is garbage collected"); - return Err(GenerationError::TripleIsGarbageCollected(id0)); - } else { - tracing::warn!(id0, "triple is missing"); - return Err(GenerationError::TripleIsMissing(id0)); + let triple_0 = self.take(&id0).await?; + let triple_1 = match self.take(&id1).await { + Ok(triple) => triple, + Err(err) => { + if let Err(e) = self.triple_storage.insert(triple_0, false).await { + tracing::warn!(?e, id0, "failed to insert triple back"); } - } - Err(e) => { - tracing::warn!(id0, ?e, "failed to take triple"); - return Err(GenerationError::TripleIsMissing(id0)); + return Err(err); } }; - - let triple_1 = match triples.take(&id1).await { - Ok(Some(triple)) => triple, - Ok(None) => { - if let Err(e) = triples.insert(triple_0, false).await { - tracing::warn!(id0, ?e, "failed to insert triple back"); - } - if self.generators.contains_key(&id1) { - tracing::warn!(id1, "triple is generating"); - return Err(GenerationError::TripleIsGenerating(id1)); - } else if self.gc.contains_key(&id1) { - tracing::warn!(id1, "triple is garbage collected"); - return Err(GenerationError::TripleIsGarbageCollected(id1)); - } else { - tracing::warn!(id1, "triple is missing"); - return Err(GenerationError::TripleIsMissing(id1)); - } - } - Err(e) => { - tracing::warn!(id1, ?e, "failed to take triple"); - if let Err(e) = triples.insert(triple_0, false).await { - tracing::warn!(id0, ?e, "failed to insert triple back"); - } - return Err(GenerationError::TripleIsMissing(id1)); - } - }; - self.gc.insert(id0, Instant::now()); self.gc.insert(id1, Instant::now()); - tracing::debug!(id0, id1, "took two triples"); Ok((triple_0, triple_1)) @@ -242,26 +226,14 @@ impl TripleManager { return None; } let triple_0 = match triples.take_mine().await { - Ok(Some(triple)) => triple, - Ok(None) => { - tracing::warn!("no mine triple left"); - return None; - } + Ok(triple) => triple, Err(e) => { tracing::warn!(?e, "failed to take mine triple"); return None; } }; - let triple_1 = match triples.take_mine().await { - Ok(Some(triple)) => triple, - Ok(None) => { - if let Err(e) = triples.insert(triple_0, true).await { - tracing::warn!(?e, "failed to insert mine triple back"); - } - tracing::warn!("no mine triple left"); - return None; - } + Ok(triple) => triple, Err(e) => { tracing::warn!(?e, "failed to take mine triple"); if let Err(e) = triples.insert(triple_0, true).await { diff --git a/chain-signatures/node/src/storage/error.rs b/chain-signatures/node/src/storage/error.rs new file mode 100644 index 00000000..112db343 --- /dev/null +++ b/chain-signatures/node/src/storage/error.rs @@ -0,0 +1,20 @@ +use crate::protocol::presignature::PresignatureId; +use crate::protocol::triple::TripleId; + +pub type StoreResult = std::result::Result; + +#[derive(Debug, thiserror::Error)] +pub enum StoreError { + #[error("redis error: {0}")] + Redis(#[from] redis::RedisError), + #[error("storage connection error: {0}")] + Connect(#[from] anyhow::Error), + #[error("missing triple: id={0}")] + TripleIsMissing(TripleId), + #[error("invalid triple: id={0}, {1}")] + TripleDenied(TripleId, &'static str), + #[error("missing presignature: {0}")] + PresignatureIsMissing(PresignatureId), + #[error("empty: {0}")] + Empty(&'static str), +} diff --git a/chain-signatures/node/src/storage/mod.rs b/chain-signatures/node/src/storage/mod.rs index 55231df9..7aa24eb5 100644 --- a/chain-signatures/node/src/storage/mod.rs +++ b/chain-signatures/node/src/storage/mod.rs @@ -2,6 +2,7 @@ pub mod app_data_storage; pub mod presignature_storage; pub mod secret_storage; pub mod triple_storage; +pub mod error; /// Configures storage. #[derive(Debug, Clone, clap::Parser)] diff --git a/chain-signatures/node/src/storage/triple_storage.rs b/chain-signatures/node/src/storage/triple_storage.rs index b8dfcd9b..1c3a7461 100644 --- a/chain-signatures/node/src/storage/triple_storage.rs +++ b/chain-signatures/node/src/storage/triple_storage.rs @@ -1,6 +1,7 @@ use crate::protocol::triple::{Triple, TripleId}; +use crate::storage::error::{StoreError, StoreResult}; -use deadpool_redis::Pool; +use deadpool_redis::{Connection, Pool}; use redis::{AsyncCommands, FromRedisValue, RedisWrite, ToRedisArgs}; use near_account_id::AccountId; @@ -22,69 +23,74 @@ pub struct TripleStorage { } impl TripleStorage { - pub async fn insert(&self, triple: Triple, mine: bool) -> anyhow::Result<()> { - let mut conn = self.redis_pool.get().await?; + async fn connect(&self) -> StoreResult { + self.redis_pool + .get() + .await + .map_err(anyhow::Error::new) + .map_err(StoreError::Connect) + } + + pub async fn insert(&self, triple: Triple, mine: bool) -> StoreResult<()> { + let mut conn = self.connect().await?; if mine { conn.sadd::<&str, TripleId, ()>(&self.mine_key(), triple.id) - .await?; + .await?; } conn.hset::<&str, TripleId, Triple, ()>(&self.triple_key(), triple.id, triple) .await?; Ok(()) } - pub async fn contains(&self, id: &TripleId) -> anyhow::Result { - let mut conn = self.redis_pool.get().await?; + pub async fn contains(&self, id: &TripleId) -> StoreResult { + let mut conn = self.connect().await?; let result: bool = conn.hexists(self.triple_key(), id).await?; Ok(result) } - pub async fn contains_mine(&self, id: &TripleId) -> anyhow::Result { - let mut conn = self.redis_pool.get().await?; + pub async fn contains_mine(&self, id: &TripleId) -> StoreResult { + let mut conn = self.connect().await?; let result: bool = conn.sismember(self.mine_key(), id).await?; Ok(result) } - pub async fn take(&self, id: &TripleId) -> anyhow::Result> { - let mut conn = self.redis_pool.get().await?; + pub async fn take(&self, id: &TripleId) -> StoreResult { + let mut conn = self.connect().await?; if self.contains_mine(id).await? { - tracing::error!("cannot take mine triple as foreign: {:?}", id); - return Ok(None); - } - let result: Option = conn.hget(self.triple_key(), id).await?; - match result { - Some(triple) => { - conn.hdel::<&str, TripleId, ()>(&self.triple_key(), *id) - .await?; - Ok(Some(triple)) - } - None => Ok(None), + tracing::error!(?id, "cannot take mine triple as foreign owned"); + return Err(StoreError::TripleDenied( + *id, + "cannot take mine triple as foreign owned", + )); } + let triple: Option = conn.hget(self.triple_key(), id).await?; + let triple = triple.ok_or_else(|| StoreError::TripleIsMissing(*id))?; + conn.hdel::<&str, TripleId, ()>(&self.triple_key(), *id) + .await?; + Ok(triple) } - pub async fn take_mine(&self) -> anyhow::Result> { - let mut conn = self.redis_pool.get().await?; + pub async fn take_mine(&self) -> StoreResult { + let mut conn = self.connect().await?; let id: Option = conn.spop(self.mine_key()).await?; - match id { - Some(id) => self.take(&id).await, - None => Ok(None), - } + let id = id.ok_or_else(|| StoreError::Empty("mine triple stockpile"))?; + self.take(&id).await } - pub async fn len_generated(&self) -> anyhow::Result { - let mut conn = self.redis_pool.get().await?; + pub async fn len_generated(&self) -> StoreResult { + let mut conn = self.connect().await?; let result: usize = conn.hlen(self.triple_key()).await?; Ok(result) } - pub async fn len_mine(&self) -> anyhow::Result { - let mut conn = self.redis_pool.get().await?; + pub async fn len_mine(&self) -> StoreResult { + let mut conn = self.connect().await?; let result: usize = conn.scard(self.mine_key()).await?; Ok(result) } - pub async fn clear(&self) -> anyhow::Result<()> { - let mut conn = self.redis_pool.get().await?; + pub async fn clear(&self) -> StoreResult<()> { + let mut conn = self.connect().await?; conn.del::<&str, ()>(&self.triple_key()).await?; conn.del::<&str, ()>(&self.mine_key()).await?; Ok(()) From d1f65e1a6aee8e70e755849b15ed527f30f332d1 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Tue, 3 Dec 2024 06:09:20 +0000 Subject: [PATCH 03/42] Cleaned presignature store --- .../node/src/protocol/presignature.rs | 86 ++++++-------- .../node/src/protocol/signature.rs | 4 +- chain-signatures/node/src/protocol/triple.rs | 31 ++--- chain-signatures/node/src/storage/error.rs | 4 +- .../node/src/storage/presignature_storage.rs | 112 +++++++++--------- .../node/src/storage/triple_storage.rs | 4 +- .../chain-signatures/tests/cases/mod.rs | 4 +- 7 files changed, 110 insertions(+), 135 deletions(-) diff --git a/chain-signatures/node/src/protocol/presignature.rs b/chain-signatures/node/src/protocol/presignature.rs index 96d70b6a..c2e3ca9f 100644 --- a/chain-signatures/node/src/protocol/presignature.rs +++ b/chain-signatures/node/src/protocol/presignature.rs @@ -185,21 +185,14 @@ impl PresignatureManager { } } - pub async fn insert(&mut self, presignature: Presignature) { - tracing::debug!(id = ?presignature.id, "inserting presignature"); - // Remove from taken list if it was there - self.gc.remove(&presignature.id); - if let Err(e) = self.presignature_storage.insert(presignature).await { - tracing::error!(?e, "failed to insert presignature"); - } - } - - pub async fn insert_mine(&mut self, presignature: Presignature) { - tracing::debug!(id = ?presignature.id, "inserting mine presignature"); - // Remove from taken list if it was there - self.gc.remove(&presignature.id); - if let Err(e) = self.presignature_storage.insert_mine(presignature).await { - tracing::error!(?e, "failed to insert mine presignature"); + pub async fn insert(&mut self, presignature: Presignature, mine: bool) { + let id = presignature.id; + tracing::debug!(id, mine, "inserting presignature"); + if let Err(store_err) = self.presignature_storage.insert(presignature, mine).await { + tracing::error!(?store_err, mine, "failed to insert presignature"); + } else { + // Remove from taken list if it was there + self.gc.remove(&id); } } @@ -226,41 +219,39 @@ impl PresignatureManager { } pub async fn take(&mut self, id: PresignatureId) -> Result { - if let Some(presignature) = self.presignature_storage.take(&id).await.map_err(|e| { - tracing::error!(?e, "failed to look for presignature"); - GenerationError::PresignatureIsMissing(id) - })? { - self.gc.insert(id, Instant::now()); - tracing::debug!(id, "took presignature"); - return Ok(presignature); - }; + let presignature = self + .presignature_storage + .take(&id) + .await + .map_err(|store_err| { + if self.generators.contains_key(&id) { + tracing::warn!(id, ?store_err, "presignature is still generating"); + GenerationError::PresignatureIsGenerating(id) + } else if self.gc.contains_key(&id) { + tracing::warn!(id, ?store_err, "presignature was garbage collected"); + GenerationError::PresignatureIsGarbageCollected(id) + } else { + tracing::warn!(id, ?store_err, "presignature is missing"); + GenerationError::PresignatureIsMissing(id) + } + })?; - if self.generators.contains_key(&id) { - tracing::warn!(id, "presignature is still generating"); - return Err(GenerationError::PresignatureIsGenerating(id)); - } - if self.gc.contains_key(&id) { - tracing::warn!(id, "presignature was garbage collected"); - return Err(GenerationError::PresignatureIsGarbageCollected(id)); - } - tracing::warn!(id, "presignature is missing"); - Err(GenerationError::PresignatureIsMissing(id)) + self.gc.insert(id, Instant::now()); + tracing::debug!(id, "took presignature"); + Ok(presignature) } pub async fn take_mine(&mut self) -> Option { - if let Some(presignature) = self + let presignature = self .presignature_storage .take_mine() .await .map_err(|e| { tracing::error!(?e, "failed to look for mine presignature"); }) - .ok()? - { - tracing::debug!(id = ?presignature.id, "took presignature of mine"); - return Some(presignature); - } - None + .ok()?; + tracing::debug!(id = ?presignature.id, "took presignature of mine"); + return Some(presignature); } /// Returns the number of unspent presignatures available in the manager. @@ -554,8 +545,7 @@ impl PresignatureManager { pub async fn poke(&mut self) -> Vec<(Participant, PresignatureMessage)> { let mut messages = Vec::new(); let mut errors = Vec::new(); - let mut new_presignatures = Vec::new(); - let mut new_mine_presignatures = Vec::new(); + let mut presignatures = Vec::new(); self.generators.retain(|id, generator| { loop { let action = match generator.poke() { @@ -618,13 +608,11 @@ impl PresignatureManager { }; if generator.mine { tracing::info!(id, "assigning presignature to myself"); - new_mine_presignatures.push(presignature); crate::metrics::NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS_MINE_SUCCESS .with_label_values(&[self.my_account_id.as_str()]) .inc(); - } else { - new_presignatures.push(presignature); } + presignatures.push((presignature, generator.mine)); self.introduced.remove(id); crate::metrics::PRESIGNATURE_LATENCY @@ -640,12 +628,8 @@ impl PresignatureManager { } }); - for presignature in new_presignatures { - self.insert(presignature).await; - } - - for presignature in new_mine_presignatures { - self.insert_mine(presignature).await; + for (presignature, mine) in presignatures { + self.insert(presignature, mine).await; } if !errors.is_empty() { diff --git a/chain-signatures/node/src/protocol/signature.rs b/chain-signatures/node/src/protocol/signature.rs index ea9f0087..e1690565 100644 --- a/chain-signatures/node/src/protocol/signature.rs +++ b/chain-signatures/node/src/protocol/signature.rs @@ -486,7 +486,7 @@ impl SignatureManager { ) { Ok(generator) => generator, Err((presignature, err @ InitializationError::BadParameters(_))) => { - presignature_manager.insert_mine(presignature).await; + presignature_manager.insert(presignature, true).await; tracing::warn!(sign_request = ?sign_request_identifier, presignature_id, ?err, "failed to start signature generation"); return Err(GenerationError::CaitSithInitializationError(err)); } @@ -703,7 +703,7 @@ impl SignatureManager { // add back the failed presignatures that were incompatible to be made into // signatures due to failures or lack of participants. for presignature in failed_presigs { - presignature_manager.insert_mine(presignature).await; + presignature_manager.insert(presignature, true).await; } } diff --git a/chain-signatures/node/src/protocol/triple.rs b/chain-signatures/node/src/protocol/triple.rs index 89fe9603..6ea709a2 100644 --- a/chain-signatures/node/src/protocol/triple.rs +++ b/chain-signatures/node/src/protocol/triple.rs @@ -2,7 +2,6 @@ use super::contract::primitives::Participants; use super::cryptography::CryptographicError; use super::message::TripleMessage; use super::presignature::GenerationError; -use crate::storage::error::StoreError; use crate::storage::triple_storage::TripleStorage; use crate::types::TripleProtocol; use crate::util::AffinePointExt; @@ -170,22 +169,16 @@ impl TripleManager { } async fn take(&self, id: &TripleId) -> Result { - self.triple_storage.take(id).await.map_err(|err| match err { - StoreError::TripleIsMissing(_) | StoreError::TripleDenied(_, _) => { - if self.generators.contains_key(id) { - tracing::warn!(id, "triple is generating"); - return GenerationError::TripleIsGenerating(*id); - } else if self.gc.contains_key(id) { - tracing::warn!(id, "triple is garbage collected"); - return GenerationError::TripleIsGarbageCollected(*id); - } else { - tracing::warn!(id, "triple is missing"); - return GenerationError::TripleIsMissing(*id); - } - } - e => { - tracing::warn!(id, ?e, "failed to take triple"); - return GenerationError::TripleIsMissing(*id); + self.triple_storage.take(id).await.map_err(|store_err| { + if self.generators.contains_key(id) { + tracing::warn!(id, ?store_err, "triple is generating"); + GenerationError::TripleIsGenerating(*id) + } else if self.gc.contains_key(id) { + tracing::warn!(id, ?store_err, "triple is garbage collected"); + GenerationError::TripleIsGarbageCollected(*id) + } else { + tracing::warn!(id, ?store_err, "triple is missing"); + GenerationError::TripleIsMissing(*id) } }) } @@ -203,8 +196,8 @@ impl TripleManager { let triple_1 = match self.take(&id1).await { Ok(triple) => triple, Err(err) => { - if let Err(e) = self.triple_storage.insert(triple_0, false).await { - tracing::warn!(?e, id0, "failed to insert triple back"); + if let Err(store_err) = self.triple_storage.insert(triple_0, false).await { + tracing::warn!(?store_err, id0, "failed to insert triple back"); } return Err(err); } diff --git a/chain-signatures/node/src/storage/error.rs b/chain-signatures/node/src/storage/error.rs index 112db343..0cbcec23 100644 --- a/chain-signatures/node/src/storage/error.rs +++ b/chain-signatures/node/src/storage/error.rs @@ -11,10 +11,12 @@ pub enum StoreError { Connect(#[from] anyhow::Error), #[error("missing triple: id={0}")] TripleIsMissing(TripleId), - #[error("invalid triple: id={0}, {1}")] + #[error("triple access denied: id={0}, {1}")] TripleDenied(TripleId, &'static str), #[error("missing presignature: {0}")] PresignatureIsMissing(PresignatureId), + #[error("presignature access denied: id={0}, {1}")] + PresignatureDenied(PresignatureId, &'static str), #[error("empty: {0}")] Empty(&'static str), } diff --git a/chain-signatures/node/src/storage/presignature_storage.rs b/chain-signatures/node/src/storage/presignature_storage.rs index 159edaea..e8be876d 100644 --- a/chain-signatures/node/src/storage/presignature_storage.rs +++ b/chain-signatures/node/src/storage/presignature_storage.rs @@ -1,11 +1,9 @@ -use anyhow::Ok; -use deadpool_redis::Pool; +use deadpool_redis::{Connection, Pool}; use near_sdk::AccountId; use redis::{AsyncCommands, FromRedisValue, RedisWrite, ToRedisArgs}; use crate::protocol::presignature::{Presignature, PresignatureId}; - -type PresigResult = std::result::Result; +use crate::storage::error::{StoreError, StoreResult}; // Can be used to "clear" redis storage in case of a breaking change const PRESIGNATURE_STORAGE_VERSION: &str = "v2"; @@ -24,82 +22,80 @@ pub struct PresignatureStorage { } impl PresignatureStorage { - pub async fn insert(&self, presignature: Presignature) -> PresigResult<()> { - let mut connection = self.redis_pool.get().await?; - connection - .hset::<&str, PresignatureId, Presignature, ()>( - &self.presig_key(), - presignature.id, - presignature, - ) - .await?; - Ok(()) + async fn connect(&self) -> StoreResult { + self.redis_pool + .get() + .await + .map_err(anyhow::Error::new) + .map_err(StoreError::Connect) } - pub async fn insert_mine(&self, presignature: Presignature) -> PresigResult<()> { - let mut connection = self.redis_pool.get().await?; - connection - .sadd::<&str, PresignatureId, ()>(&self.mine_key(), presignature.id) - .await?; - self.insert(presignature).await?; + pub async fn insert(&self, presignature: Presignature, mine: bool) -> StoreResult<()> { + let mut conn = self.connect().await?; + if mine { + conn.sadd::<&str, PresignatureId, ()>(&self.mine_key(), presignature.id) + .await?; + } + conn.hset::<&str, PresignatureId, Presignature, ()>( + &self.presig_key(), + presignature.id, + presignature, + ) + .await?; Ok(()) } - pub async fn contains(&self, id: &PresignatureId) -> PresigResult { - let mut connection = self.redis_pool.get().await?; - let result: bool = connection.hexists(self.presig_key(), id).await?; + pub async fn contains(&self, id: &PresignatureId) -> StoreResult { + let mut conn = self.connect().await?; + let result: bool = conn.hexists(self.presig_key(), id).await?; Ok(result) } - pub async fn contains_mine(&self, id: &PresignatureId) -> PresigResult { - let mut connection = self.redis_pool.get().await?; + pub async fn contains_mine(&self, id: &PresignatureId) -> StoreResult { + let mut connection = self.connect().await?; let result: bool = connection.sismember(self.mine_key(), id).await?; Ok(result) } - pub async fn take(&self, id: &PresignatureId) -> PresigResult> { - let mut connection = self.redis_pool.get().await?; + pub async fn take(&self, id: &PresignatureId) -> StoreResult { + let mut conn = self.connect().await?; if self.contains_mine(id).await? { - tracing::error!("Can not take mine presignature as foreign: {:?}", id); - return Ok(None); - } - let result: Option = connection.hget(self.presig_key(), id).await?; - match result { - Some(presignature) => { - connection - .hdel::<&str, PresignatureId, ()>(&self.presig_key(), *id) - .await?; - Ok(Some(presignature)) - } - None => Ok(None), + tracing::error!(?id, "cannot take mine presignature as foreign owned"); + return Err(StoreError::PresignatureDenied( + *id, + "cannot take mine presignature as foreign owned", + )); } + let presignature: Option = conn.hget(self.presig_key(), id).await?; + let presignature = presignature.ok_or_else(|| StoreError::PresignatureIsMissing(*id))?; + conn.hdel::<&str, PresignatureId, ()>(&self.presig_key(), *id) + .await?; + Ok(presignature) } - pub async fn take_mine(&self) -> PresigResult> { - let mut connection = self.redis_pool.get().await?; - let id: Option = connection.spop(self.mine_key()).await?; - match id { - Some(id) => self.take(&id).await, - None => Ok(None), - } + pub async fn take_mine(&self) -> StoreResult { + let mut conn = self.connect().await?; + let id: Option = conn.spop(self.mine_key()).await?; + let id = id.ok_or_else(|| StoreError::Empty("mine presignature stockpile"))?; + self.take(&id).await } - pub async fn len_generated(&self) -> PresigResult { - let mut connection = self.redis_pool.get().await?; - let result: usize = connection.hlen(self.presig_key()).await?; + pub async fn len_generated(&self) -> StoreResult { + let mut conn = self.connect().await?; + let result: usize = conn.hlen(self.presig_key()).await?; Ok(result) } - pub async fn len_mine(&self) -> PresigResult { - let mut connection = self.redis_pool.get().await?; - let result: usize = connection.scard(self.mine_key()).await?; + pub async fn len_mine(&self) -> StoreResult { + let mut conn = self.connect().await?; + let result: usize = conn.scard(self.mine_key()).await?; Ok(result) } - pub async fn clear(&self) -> PresigResult<()> { - let mut connection = self.redis_pool.get().await?; - connection.del::<&str, ()>(&self.presig_key()).await?; - connection.del::<&str, ()>(&self.mine_key()).await?; + pub async fn clear(&self) -> StoreResult<()> { + let mut conn = self.connect().await?; + conn.del::<&str, ()>(&self.presig_key()).await?; + conn.del::<&str, ()>(&self.mine_key()).await?; Ok(()) } @@ -124,7 +120,7 @@ impl ToRedisArgs for Presignature { W: ?Sized + RedisWrite, { match serde_json::to_string(self) { - std::result::Result::Ok(json) => out.write_arg(json.as_bytes()), + Ok(json) => out.write_arg(json.as_bytes()), Err(e) => { tracing::error!("Failed to serialize Presignature: {}", e); out.write_arg("failed_to_serialize".as_bytes()) @@ -135,7 +131,7 @@ impl ToRedisArgs for Presignature { impl FromRedisValue for Presignature { fn from_redis_value(v: &redis::Value) -> redis::RedisResult { - let json: String = String::from_redis_value(v)?; + let json = String::from_redis_value(v)?; serde_json::from_str(&json).map_err(|e| { redis::RedisError::from(( diff --git a/chain-signatures/node/src/storage/triple_storage.rs b/chain-signatures/node/src/storage/triple_storage.rs index 1c3a7461..9974a80b 100644 --- a/chain-signatures/node/src/storage/triple_storage.rs +++ b/chain-signatures/node/src/storage/triple_storage.rs @@ -117,7 +117,7 @@ impl ToRedisArgs for Triple { W: ?Sized + RedisWrite, { match serde_json::to_string(self) { - std::result::Result::Ok(json) => out.write_arg(json.as_bytes()), + Ok(json) => out.write_arg(json.as_bytes()), Err(e) => { tracing::error!("Failed to serialize Triple: {}", e); out.write_arg("failed_to_serialize".as_bytes()) @@ -128,7 +128,7 @@ impl ToRedisArgs for Triple { impl FromRedisValue for Triple { fn from_redis_value(v: &redis::Value) -> redis::RedisResult { - let json: String = String::from_redis_value(v)?; + let json = String::from_redis_value(v)?; serde_json::from_str(&json).map_err(|e| { redis::RedisError::from(( diff --git a/integration-tests/chain-signatures/tests/cases/mod.rs b/integration-tests/chain-signatures/tests/cases/mod.rs index 7122b698..d8f9e489 100644 --- a/integration-tests/chain-signatures/tests/cases/mod.rs +++ b/integration-tests/chain-signatures/tests/cases/mod.rs @@ -327,7 +327,7 @@ async fn test_presignature_persistence() -> anyhow::Result<()> { assert!(presignature_manager.is_empty().await); assert_eq!(presignature_manager.len_potential().await, 0); - presignature_manager.insert(presignature).await; + presignature_manager.insert(presignature, false).await; // Check that the storage contains the foreign presignature assert!(presignature_manager.contains(&presignature_id).await); @@ -348,7 +348,7 @@ async fn test_presignature_persistence() -> anyhow::Result<()> { let mine_presig_id: PresignatureId = mine_presignature.id; // Add mine presignature and check that it is in the storage - presignature_manager.insert_mine(mine_presignature).await; + presignature_manager.insert(mine_presignature, true).await; assert!(presignature_manager.contains(&mine_presig_id).await); assert!(presignature_manager.contains_mine(&mine_presig_id).await); assert_eq!(presignature_manager.len_generated().await, 1); From adeb91d4a087cc353746d8525fbbfd2097b9dac6 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Wed, 4 Dec 2024 02:53:53 +0000 Subject: [PATCH 04/42] Upgrade bollard/testcontainers for better test interface --- integration-tests/chain-signatures/Cargo.lock | 236 +++++++++++------ integration-tests/chain-signatures/Cargo.toml | 4 +- .../chain-signatures/src/containers.rs | 246 ++++++++++-------- integration-tests/chain-signatures/src/lib.rs | 54 ++-- .../chain-signatures/src/local.rs | 6 +- .../chain-signatures/tests/actions/mod.rs | 22 +- .../tests/actions/wait_for.rs | 24 +- .../chain-signatures/tests/cases/mod.rs | 4 +- .../chain-signatures/tests/lib.rs | 10 +- 9 files changed, 358 insertions(+), 248 deletions(-) diff --git a/integration-tests/chain-signatures/Cargo.lock b/integration-tests/chain-signatures/Cargo.lock index df129962..eb692747 100644 --- a/integration-tests/chain-signatures/Cargo.lock +++ b/integration-tests/chain-signatures/Cargo.lock @@ -1017,40 +1017,52 @@ dependencies = [ [[package]] name = "bollard" -version = "0.13.0" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82e7850583ead5f8bbef247e2a3c37a19bd576e8420cd262a6711921827e1e5" +checksum = "d41711ad46fda47cd701f6908e59d1bd6b9a2b7464c0d0aeab95c6d37096ff8a" dependencies = [ - "base64 0.13.1", + "base64 0.22.1", "bollard-stubs", "bytes", "futures-core", "futures-util", "hex", - "http 0.2.12", - "hyper 0.14.29", + "home", + "http 1.1.0", + "http-body-util", + "hyper 1.3.1", + "hyper-named-pipe", + "hyper-rustls 0.27.2", + "hyper-util", "hyperlocal", "log", "pin-project-lite", + "rustls 0.23.10", + "rustls-native-certs 0.7.0", + "rustls-pemfile 2.1.2", + "rustls-pki-types", "serde", "serde_derive", "serde_json", + "serde_repr", "serde_urlencoded", "thiserror", "tokio", "tokio-util", + "tower-service", "url 2.5.1", "winapi", ] [[package]] name = "bollard-stubs" -version = "1.42.0-rc.3" +version = "1.45.0-rc.26.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed59b5c00048f48d7af971b71f800fdf23e858844a6f9e4d32ca72e9399e7864" +checksum = "6d7c5415e3a6bc6d3e99eff6268e488fd4ee25e7b28c10f08fa6760bd9de16e4" dependencies = [ "serde", - "serde_with 1.14.0", + "serde_repr", + "serde_with", ] [[package]] @@ -1612,16 +1624,6 @@ dependencies = [ "syn 2.0.66", ] -[[package]] -name = "darling" -version = "0.13.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a01d95850c592940db9b8194bc39f4bc0e89dee5c4265e4b1807c34a9aba453c" -dependencies = [ - "darling_core 0.13.4", - "darling_macro 0.13.4", -] - [[package]] name = "darling" version = "0.14.4" @@ -1642,20 +1644,6 @@ dependencies = [ "darling_macro 0.20.9", ] -[[package]] -name = "darling_core" -version = "0.13.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "859d65a907b6852c9361e3185c862aae7fafd2887876799fa55f5f99dc40d610" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim 0.10.0", - "syn 1.0.109", -] - [[package]] name = "darling_core" version = "0.14.4" @@ -1684,17 +1672,6 @@ dependencies = [ "syn 2.0.66", ] -[[package]] -name = "darling_macro" -version = "0.13.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c972679f83bdf9c42bd905396b6c3588a843a17f0f16dfcfa3e2c5d57441835" -dependencies = [ - "darling_core 0.13.4", - "quote", - "syn 1.0.109", -] - [[package]] name = "darling_macro" version = "0.14.4" @@ -1900,6 +1877,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0bc8fbe9441c17c9f46f75dfe27fa1ddb6c68a461ccaed0481419219d4f10d3" +[[package]] +name = "docker_credential" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31951f49556e34d90ed28342e1df7e1cb7a229c4cab0aecc627b5d91edd41d07" +dependencies = [ + "base64 0.21.7", + "serde", + "serde_json", +] + [[package]] name = "dunce" version = "1.0.4" @@ -2090,6 +2078,17 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "etcetera" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" +dependencies = [ + "cfg-if 1.0.0", + "home", + "windows-sys 0.48.0", +] + [[package]] name = "ethabi" version = "18.0.0" @@ -2573,7 +2572,7 @@ dependencies = [ "percent-encoding 2.3.1", "serde", "serde_json", - "serde_with 3.8.1", + "serde_with", "tokio", "tower-service", "url 2.5.1", @@ -2917,6 +2916,7 @@ dependencies = [ "http 1.1.0", "http-body 1.0.0", "httparse", + "httpdate", "itoa", "pin-project-lite", "smallvec", @@ -2924,6 +2924,21 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-named-pipe" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73b7d8abf35697b81a825e386fc151e0d503e8cb5fcb93cc8669c376dfd6f278" +dependencies = [ + "hex", + "hyper 1.3.1", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", + "winapi", +] + [[package]] name = "hyper-rustls" version = "0.24.2" @@ -3026,15 +3041,17 @@ dependencies = [ [[package]] name = "hyperlocal" -version = "0.8.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fafdf7b2b2de7c9784f76e02c0935e65a8117ec3b768644379983ab333ac98c" +checksum = "986c5ce3b994526b3cd75578e62554abd09f0899d6206de48b3e96ab34ccc8c7" dependencies = [ - "futures-util", "hex", - "hyper 0.14.29", - "pin-project", + "http-body-util", + "hyper 1.3.1", + "hyper-util", + "pin-project-lite", "tokio", + "tower-service", ] [[package]] @@ -4197,7 +4214,7 @@ dependencies = [ "reed-solomon-erasure", "serde", "serde_json", - "serde_with 3.8.1", + "serde_with", "sha3", "smart-default", "strum 0.24.1", @@ -4240,7 +4257,7 @@ dependencies = [ "rand_chacha 0.3.1", "serde", "serde_json", - "serde_with 3.8.1", + "serde_with", "sha3", "smart-default", "strum 0.24.1", @@ -4925,6 +4942,31 @@ dependencies = [ "windows-targets 0.52.5", ] +[[package]] +name = "parse-display" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "914a1c2265c98e2446911282c6ac86d8524f495792c38c5bd884f80499c7538a" +dependencies = [ + "parse-display-derive", + "regex", + "regex-syntax 0.8.4", +] + +[[package]] +name = "parse-display-derive" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ae7800a4c974efd12df917266338e79a7a74415173caf7e70aa0a0707345281" +dependencies = [ + "proc-macro2", + "quote", + "regex", + "regex-syntax 0.8.4", + "structmeta", + "syn 2.0.66", +] + [[package]] name = "password-hash" version = "0.4.2" @@ -5441,6 +5483,15 @@ dependencies = [ "url 2.5.1", ] +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "redox_syscall" version = "0.4.1" @@ -6161,16 +6212,6 @@ dependencies = [ "serde", ] -[[package]] -name = "serde_with" -version = "1.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678b5a069e50bf00ecd22d0cd8ddf7c236f68581b03db652061ed5eb13a312ff" -dependencies = [ - "serde", - "serde_with_macros 1.5.2", -] - [[package]] name = "serde_with" version = "3.8.1" @@ -6185,22 +6226,10 @@ dependencies = [ "serde", "serde_derive", "serde_json", - "serde_with_macros 3.8.1", + "serde_with_macros", "time", ] -[[package]] -name = "serde_with_macros" -version = "1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e182d6ec6f05393cc0e5ed1bf81ad6db3a8feedf8ee515ecdd369809bcce8082" -dependencies = [ - "darling 0.13.4", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "serde_with_macros" version = "3.8.1" @@ -6485,6 +6514,29 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "structmeta" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e1575d8d40908d70f6fd05537266b90ae71b15dbbe7a8b7dffa2b759306d329" +dependencies = [ + "proc-macro2", + "quote", + "structmeta-derive", + "syn 2.0.66", +] + +[[package]] +name = "structmeta-derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "152a0b65a590ff6c3da95cabe2353ee04e6167c896b28e3b14478c2636c922fc" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "strum" version = "0.24.1" @@ -6722,22 +6774,31 @@ dependencies = [ [[package]] name = "testcontainers" -version = "0.15.0" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d2931d7f521af5bae989f716c3fa43a6af9af7ec7a5e21b59ae40878cec00" +checksum = "5f40cc2bd72e17f328faf8ca7687fe337e61bccd8acf9674fa78dd3792b045e1" dependencies = [ "async-trait", "bollard", "bollard-stubs", + "bytes", + "docker_credential", + "either", + "etcetera", "futures", - "hex", - "hmac", "log", - "rand 0.8.5", + "memchr", + "parse-display", + "pin-project-lite", "serde", "serde_json", - "sha2", + "serde_with", + "thiserror", "tokio", + "tokio-stream", + "tokio-tar", + "tokio-util", + "url 2.5.1", ] [[package]] @@ -6931,6 +6992,21 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-tar" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d5714c010ca3e5c27114c1cdeb9d14641ace49874aa5626d7149e47aedace75" +dependencies = [ + "filetime", + "futures-core", + "libc", + "redox_syscall 0.3.5", + "tokio", + "tokio-stream", + "xattr", +] + [[package]] name = "tokio-util" version = "0.7.11" diff --git a/integration-tests/chain-signatures/Cargo.toml b/integration-tests/chain-signatures/Cargo.toml index f6eeb76e..35e7b788 100644 --- a/integration-tests/chain-signatures/Cargo.toml +++ b/integration-tests/chain-signatures/Cargo.toml @@ -8,7 +8,7 @@ publish = false [dependencies] anyhow = { version = "1.0", features = ["backtrace"] } async-process = "1" -bollard = "0.13" +bollard = "0.17.0" clap = { version = "4.5.4", features = ["derive"] } futures = "0.3" generic-array = { version = "0.14.7", default-features = false } @@ -22,7 +22,7 @@ reqwest = "0.11.16" serde = "1" serde_json = "1" shell-escape = "0.1.5" -testcontainers = { version = "0.15", features = ["experimental"] } +testcontainers = { version = "0.23.1" } tokio = { version = "1.28", features = ["full"] } tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/integration-tests/chain-signatures/src/containers.rs b/integration-tests/chain-signatures/src/containers.rs index 3c449cc6..a978eb34 100644 --- a/integration-tests/chain-signatures/src/containers.rs +++ b/integration-tests/chain-signatures/src/containers.rs @@ -3,28 +3,33 @@ use std::path::Path; use super::{local::NodeConfig, utils, MultichainConfig}; use anyhow::{anyhow, Context}; use async_process::Child; +use bollard::container::LogsOptions; use bollard::exec::CreateExecOptions; -use bollard::{container::LogsOptions, network::CreateNetworkOptions, service::Ipam, Docker}; +use bollard::network::CreateNetworkOptions; +use bollard::secret::Ipam; +use bollard::Docker; use futures::{lock::Mutex, StreamExt}; use mpc_keys::hpke; use mpc_node::config::OverrideConfig; use near_workspaces::Account; use once_cell::sync::Lazy; use serde_json::json; -use testcontainers::clients::Cli; -use testcontainers::core::Port; -use testcontainers::Image; +use testcontainers::core::ExecCommand; +use testcontainers::ContainerAsync; use testcontainers::{ - core::{ExecCommand, WaitFor}, - Container, GenericImage, RunnableImage, + core::{IntoContainerPort, WaitFor}, + runners::AsyncRunner, + GenericImage, ImageExt, }; use tokio::io::AsyncWriteExt; use tracing; +pub type Container = ContainerAsync; + static NETWORK_MUTEX: Lazy> = Lazy::new(|| Mutex::new(0)); -pub struct Node<'a> { - pub container: Container<'a, GenericImage>, +pub struct Node { + pub container: Container, pub address: String, pub account: Account, pub local_address: String, @@ -36,12 +41,12 @@ pub struct Node<'a> { near_rpc: String, } -impl<'a> Node<'a> { +impl Node { // Container port used for the docker network, does not have to be unique const CONTAINER_PORT: u16 = 3000; pub async fn run( - ctx: &super::Context<'a>, + ctx: &super::Context, cfg: &MultichainConfig, account: &Account, ) -> anyhow::Result { @@ -54,14 +59,16 @@ impl<'a> Node<'a> { let near_rpc = ctx.lake_indexer.rpc_host_address.clone(); let proxy_name = format!("rpc_from_node_{}", account.id()); let rpc_port_proxied = utils::pick_unused_port().await?; - let rpc_address_proxied = format!("{}:{}", near_rpc, rpc_port_proxied); + let rpc_address_proxied = format!("{near_rpc}:{rpc_port_proxied}"); tracing::info!( "Proxy RPC address {} accessed by node@{} to {}", near_rpc, account.id(), rpc_address_proxied ); - LakeIndexer::populate_proxy(&proxy_name, true, &rpc_address_proxied, &near_rpc).await?; + LakeIndexer::populate_proxy(&proxy_name, true, &rpc_address_proxied, &near_rpc) + .await + .unwrap(); Self::spawn( ctx, @@ -78,8 +85,8 @@ impl<'a> Node<'a> { .await } - pub fn kill(self) -> NodeConfig { - self.container.stop(); + pub async fn kill(self) -> NodeConfig { + self.container.stop().await.unwrap(); NodeConfig { web_port: Self::CONTAINER_PORT, account: self.account, @@ -91,7 +98,7 @@ impl<'a> Node<'a> { } } - pub async fn spawn(ctx: &super::Context<'a>, config: NodeConfig) -> anyhow::Result { + pub async fn spawn(ctx: &super::Context, config: NodeConfig) -> anyhow::Result { let indexer_options = mpc_node::indexer::Options { s3_bucket: ctx.localstack.s3_bucket.clone(), s3_region: ctx.localstack.s3_region.clone(), @@ -120,24 +127,33 @@ impl<'a> Node<'a> { message_options: ctx.message_options.clone(), } .into_str_args(); - let image: GenericImage = GenericImage::new("near/mpc-node", "latest") + let container = GenericImage::new("near/mpc-node", "latest") .with_wait_for(WaitFor::Nothing) - .with_exposed_port(Self::CONTAINER_PORT) + .with_exposed_port(Self::CONTAINER_PORT.tcp()) .with_env_var("RUST_LOG", "mpc_node=DEBUG") - .with_env_var("RUST_BACKTRACE", "1"); - let image: RunnableImage = (image, args).into(); - let image = image.with_network(&ctx.docker_network); - let container = ctx.docker_client.cli.run(image); + .with_env_var("RUST_BACKTRACE", "1") + .with_network(&ctx.docker_network) + .with_cmd(args) + .start() + .await + .unwrap(); + let ip_address = ctx .docker_client .get_network_ip_address(&container, &ctx.docker_network) - .await?; - let host_port = container.get_host_port_ipv4(Self::CONTAINER_PORT); - - container.exec(ExecCommand { - cmd: format!("bash -c 'while [[ \"$(curl -s -o /dev/null -w ''%{{http_code}}'' localhost:{})\" != \"200\" ]]; do sleep 1; done'", Self::CONTAINER_PORT), - ready_conditions: vec![WaitFor::message_on_stdout("node is ready to accept connections")] - }); + .await + .unwrap(); + let host_port = container + .get_host_port_ipv4(Self::CONTAINER_PORT) + .await + .unwrap(); + + container.exec(ExecCommand::new( + format!("bash -c 'while [[ \"$(curl -s -o /dev/null -w ''%{{http_code}}'' localhost:{})\" != \"200\" ]]; do sleep 1; done'", Self::CONTAINER_PORT) + .split_whitespace() + ) + .with_container_ready_conditions(vec![WaitFor::message_on_stdout("node is ready to accept connections")]) + ).await.unwrap(); let full_address = format!("http://{ip_address}:{}", Self::CONTAINER_PORT); tracing::info!( @@ -159,8 +175,8 @@ impl<'a> Node<'a> { } } -pub struct LocalStack<'a> { - pub container: Container<'a, GenericImage>, +pub struct LocalStack { + pub container: Container, pub address: String, pub s3_address: String, pub s3_host_address: String, @@ -168,24 +184,26 @@ pub struct LocalStack<'a> { pub s3_region: String, } -impl<'a> LocalStack<'a> { +impl LocalStack { const S3_CONTAINER_PORT: u16 = 4566; pub async fn run( - docker_client: &'a DockerClient, + docker_client: &DockerClient, network: &str, s3_bucket: &str, s3_region: &str, - ) -> anyhow::Result> { + ) -> Self { tracing::info!("running LocalStack container..."); - let image = GenericImage::new("localstack/localstack", "3.5.0") - .with_wait_for(WaitFor::message_on_stdout("Ready.")); - let image: RunnableImage = image.into(); - let image = image.with_network(network); - let container = docker_client.cli.run(image); + let container = GenericImage::new("localstack/localstack", "3.5.0") + .with_wait_for(WaitFor::message_on_stdout("Ready.")) + .with_network(network) + .start() + .await + .unwrap(); let address = docker_client .get_network_ip_address(&container, network) - .await?; + .await + .unwrap(); // Create the bucket let create_result = docker_client @@ -207,11 +225,13 @@ impl<'a> LocalStack<'a> { ..Default::default() }, ) - .await?; + .await + .unwrap(); let result = docker_client .docker .start_exec(&create_result.id, None) - .await?; + .await + .unwrap(); tracing::info!(?result, s3_bucket, s3_region, "localstack created bucket"); let s3_address = format!("http://{}:{}", address, Self::S3_CONTAINER_PORT); @@ -222,7 +242,10 @@ impl<'a> LocalStack<'a> { }; #[cfg(target_arch = "x86_64")] let s3_host_address = { - let s3_host_port = container.get_host_port_ipv6(Self::S3_CONTAINER_PORT); + let s3_host_port = container + .get_host_port_ipv6(Self::S3_CONTAINER_PORT) + .await + .unwrap(); format!("http://[::1]:{s3_host_port}") }; @@ -231,19 +254,19 @@ impl<'a> LocalStack<'a> { s3_host_address, "LocalStack container is running" ); - Ok(LocalStack { + LocalStack { container, address, s3_address, s3_host_address, s3_bucket: s3_bucket.to_string(), s3_region: s3_region.to_string(), - }) + } } } -pub struct LakeIndexer<'a> { - pub container: Container<'a, GenericImage>, +pub struct LakeIndexer { + pub container: Container, pub bucket_name: String, pub region: String, pub rpc_address: String, @@ -253,10 +276,10 @@ pub struct LakeIndexer<'a> { // Child process is used for proxy host (local node) to container pub toxi_server_process: Child, // Container toxi server is used for proxy container to container - pub toxi_server_container: Container<'a, GenericImage>, + pub toxi_server_container: Container, } -impl<'a> LakeIndexer<'a> { +impl LakeIndexer { pub const CONTAINER_RPC_PORT: u16 = 3030; pub const S3_PORT_PROXIED: u16 = 4566; @@ -279,22 +302,24 @@ impl<'a> LakeIndexer<'a> { Ok(toxi_server) } - async fn spin_up_toxi_server_container( - docker_client: &'a DockerClient, - network: &str, - ) -> anyhow::Result> { - let image = GenericImage::new("ghcr.io/shopify/toxiproxy", "2.9.0") - .with_exposed_port(Self::CONTAINER_RPC_PORT); - let image: RunnableImage = image.into(); - let image = image.with_network(network).with_mapped_port(Port { - local: Self::TOXI_SERVER_EXPOSE_PORT, - internal: Self::TOXI_SERVER_PROCESS_PORT, - }); - let container = docker_client.cli.run(image); - container.exec(ExecCommand { - cmd: format!("bash -c 'while [[ \"$(curl -s -o /dev/null -w ''%{{http_code}}'' localhost:{}/version)\" != \"200\" ]]; do sleep 1; done'", Self::TOXI_SERVER_PROCESS_PORT), - ready_conditions: vec![WaitFor::message_on_stdout("version")] - }); + async fn spin_up_toxi_server_container(network: &str) -> anyhow::Result { + let container = GenericImage::new("ghcr.io/shopify/toxiproxy", "2.9.0") + .with_exposed_port(Self::CONTAINER_RPC_PORT.tcp()) + .with_network(network) + .with_mapped_port( + Self::TOXI_SERVER_EXPOSE_PORT, + Self::TOXI_SERVER_PROCESS_PORT.tcp(), + ) + .start() + .await + .unwrap(); + + container.exec(ExecCommand::new( + format!("bash -c 'while [[ \"$(curl -s -o /dev/null -w ''%{{http_code}}'' localhost:{})\" != \"200\" ]]; do sleep 1; done'", Self::TOXI_SERVER_PROCESS_PORT) + .split_whitespace() + ) + .with_container_ready_conditions(vec![WaitFor::message_on_stdout("version")]) + ).await.unwrap(); Ok(container) } @@ -345,19 +370,19 @@ impl<'a> LakeIndexer<'a> { } pub async fn run( - docker_client: &'a DockerClient, + docker_client: &DockerClient, network: &str, s3_address: &str, bucket_name: &str, region: &str, - ) -> anyhow::Result> { + ) -> LakeIndexer { tracing::info!("initializing toxi proxy servers"); - let toxi_server_process = Self::spin_up_toxi_server_process().await?; - let toxi_server_container = - Self::spin_up_toxi_server_container(docker_client, network).await?; + let toxi_server_process = Self::spin_up_toxi_server_process().await.unwrap(); + let toxi_server_container = Self::spin_up_toxi_server_container(network).await.unwrap(); let toxi_server_container_address = docker_client .get_network_ip_address(&toxi_server_container, network) - .await?; + .await + .unwrap(); let s3_address_proxied = format!( "{}:{}", &toxi_server_container_address, @@ -368,7 +393,9 @@ impl<'a> LakeIndexer<'a> { s3_address_proxied, "Proxy S3 access from Lake Indexer" ); - Self::populate_proxy("lake-s3", false, &s3_address_proxied, s3_address).await?; + Self::populate_proxy("lake-s3", false, &s3_address_proxied, s3_address) + .await + .unwrap(); tracing::info!( network, @@ -378,14 +405,13 @@ impl<'a> LakeIndexer<'a> { "running NEAR Lake Indexer container..." ); - let image = GenericImage::new("ghcr.io/near/near-lake-indexer", "node-2.3.0") + let container = GenericImage::new("ghcr.io/near/near-lake-indexer", "node-2.3.0") + .with_wait_for(WaitFor::message_on_stderr("Starting Streamer")) + .with_exposed_port(Self::CONTAINER_RPC_PORT.tcp()) .with_env_var("AWS_ACCESS_KEY_ID", "FAKE_LOCALSTACK_KEY_ID") .with_env_var("AWS_SECRET_ACCESS_KEY", "FAKE_LOCALSTACK_ACCESS_KEY") - .with_wait_for(WaitFor::message_on_stderr("Starting Streamer")) - .with_exposed_port(Self::CONTAINER_RPC_PORT); - let image: RunnableImage = ( - image, - vec![ + .with_network(network) + .with_cmd(vec![ "--endpoint".to_string(), format!("http://{}", s3_address_proxied), "--bucket".to_string(), @@ -394,16 +420,20 @@ impl<'a> LakeIndexer<'a> { region.to_string(), "--stream-while-syncing".to_string(), "sync-from-latest".to_string(), - ], - ) - .into(); - let image = image.with_network(network); - let container = docker_client.cli.run(image); + ]) + .start() + .await + .unwrap(); + let address = docker_client .get_network_ip_address(&container, network) - .await?; + .await + .unwrap(); let rpc_address = format!("http://{}:{}", address, Self::CONTAINER_RPC_PORT); - let rpc_host_port = container.get_host_port_ipv4(Self::CONTAINER_RPC_PORT); + let rpc_host_port = container + .get_host_port_ipv4(Self::CONTAINER_RPC_PORT) + .await + .unwrap(); let rpc_host_address = format!("http://127.0.0.1:{rpc_host_port}"); tracing::info!( @@ -413,7 +443,7 @@ impl<'a> LakeIndexer<'a> { rpc_host_address, "NEAR Lake Indexer container is running" ); - Ok(LakeIndexer { + LakeIndexer { container, bucket_name: bucket_name.to_string(), region: region.to_string(), @@ -421,19 +451,19 @@ impl<'a> LakeIndexer<'a> { rpc_host_address, toxi_server_process, toxi_server_container, - }) + } } } +#[derive(Clone)] pub struct DockerClient { pub docker: Docker, - pub cli: Cli, } impl DockerClient { - pub async fn get_network_ip_address( + pub async fn get_network_ip_address( &self, - container: &Container<'_, I>, + container: &Container, network: &str, ) -> anyhow::Result { let network_settings = self @@ -557,47 +587,51 @@ impl Default for DockerClient { bollard::API_DEFAULT_VERSION, ) .unwrap(), - cli: Default::default(), } } } -pub struct Redis<'a> { - pub container: Container<'a, GenericImage>, +pub struct Redis { + pub container: Container, pub internal_address: String, pub external_address: String, } -impl<'a> Redis<'a> { +impl Redis { const DEFAULT_REDIS_PORT: u16 = 6379; - pub async fn run(docker_client: &'a DockerClient, network: &str) -> anyhow::Result> { + pub async fn run(docker_client: &DockerClient, network: &str) -> Self { tracing::info!("Running Redis container..."); - let image = GenericImage::new("redis", "7.0.15") - .with_exposed_port(Self::DEFAULT_REDIS_PORT) - .with_wait_for(WaitFor::message_on_stdout("Ready to accept connections")); - let image: RunnableImage = image.into(); - let image = image.with_network(network); - let container = docker_client.cli.run(image); + let container = GenericImage::new("redis", "7.0.15") + .with_exposed_port(Self::DEFAULT_REDIS_PORT.tcp()) + .with_wait_for(WaitFor::message_on_stdout("Ready to accept connections")) + .with_network(network) + .start() + .await + .unwrap(); let network_ip = docker_client .get_network_ip_address(&container, network) - .await?; + .await + .unwrap(); let external_address = format!("redis://{}:{}", network_ip, Self::DEFAULT_REDIS_PORT); - let host_port = container.get_host_port_ipv4(Self::DEFAULT_REDIS_PORT); + let host_port = container + .get_host_port_ipv4(Self::DEFAULT_REDIS_PORT) + .await + .unwrap(); let internal_address = format!("redis://127.0.0.1:{host_port}"); tracing::info!( - "Redis container is running. External address: {}. Internal address: {}", external_address, - internal_address + internal_address, + "Redis container is running", ); - Ok(Redis { + Self { container, internal_address, external_address, - }) + } } } diff --git a/integration-tests/chain-signatures/src/lib.rs b/integration-tests/chain-signatures/src/lib.rs index 08e4fd54..0b093b18 100644 --- a/integration-tests/chain-signatures/src/lib.rs +++ b/integration-tests/chain-signatures/src/lib.rs @@ -3,6 +3,7 @@ pub mod execute; pub mod local; pub mod utils; +use containers::Container; use deadpool_redis::Pool; use std::collections::HashMap; @@ -25,7 +26,6 @@ use near_workspaces::network::{Sandbox, ValidatorKey}; use near_workspaces::types::{KeyType, SecretKey}; use near_workspaces::{Account, AccountId, Contract, Worker}; use serde_json::json; -use testcontainers::{Container, GenericImage}; const NETWORK: &str = "mpc_it_network"; @@ -58,18 +58,18 @@ impl Default for MultichainConfig { } } -pub enum Nodes<'a> { +pub enum Nodes { Local { - ctx: Context<'a>, + ctx: Context, nodes: Vec, }, Docker { - ctx: Context<'a>, - nodes: Vec>, + ctx: Context, + nodes: Vec, }, } -impl Nodes<'_> { +impl Nodes { pub fn len(&self) -> usize { match self { Nodes::Local { nodes, .. } => nodes.len(), @@ -134,7 +134,7 @@ impl Nodes<'_> { .iter() .position(|node| node.account.id() == account_id) .unwrap(); - nodes.remove(index).kill() + nodes.remove(index).kill().await } }; @@ -191,22 +191,22 @@ impl Nodes<'_> { } } -pub struct Context<'a> { - pub docker_client: &'a DockerClient, +pub struct Context { + pub docker_client: DockerClient, pub docker_network: String, pub release: bool, - pub localstack: crate::containers::LocalStack<'a>, - pub lake_indexer: crate::containers::LakeIndexer<'a>, + pub localstack: crate::containers::LocalStack, + pub lake_indexer: crate::containers::LakeIndexer, pub worker: Worker, pub mpc_contract: Contract, - pub redis: crate::containers::Redis<'a>, + pub redis: crate::containers::Redis, pub storage_options: storage::Options, pub mesh_options: mesh::Options, pub message_options: http_client::Options, } -pub async fn setup(docker_client: &DockerClient) -> anyhow::Result> { +pub async fn setup(docker_client: &DockerClient) -> anyhow::Result { let release = true; let docker_network = NETWORK; docker_client.create_network(docker_network).await?; @@ -226,7 +226,7 @@ pub async fn setup(docker_client: &DockerClient) -> anyhow::Result> .await?; tracing::info!(contract_id = %mpc_contract.id(), "deployed mpc contract"); - let redis = crate::containers::Redis::run(docker_client, docker_network).await?; + let redis = crate::containers::Redis::run(docker_client, docker_network).await; let redis_url = redis.internal_address.clone(); let sk_share_local_path = "multichain-integration-secret-manager".to_string(); @@ -246,7 +246,7 @@ pub async fn setup(docker_client: &DockerClient) -> anyhow::Result> let message_options = http_client::Options { timeout: 1000 }; Ok(Context { - docker_client, + docker_client: docker_client.clone(), docker_network: docker_network.to_string(), release, localstack, @@ -427,8 +427,8 @@ pub async fn dry_run( } async fn fetch_from_validator( - docker_client: &containers::DockerClient, - container: &Container<'_, GenericImage>, + docker_client: &DockerClient, + container: &Container, path: &str, ) -> anyhow::Result> { tracing::info!(path, "fetching data from validator"); @@ -465,8 +465,8 @@ async fn fetch_from_validator( } async fn fetch_validator_keys( - docker_client: &containers::DockerClient, - container: &Container<'_, GenericImage>, + docker_client: &DockerClient, + container: &Container, ) -> anyhow::Result { let _span = tracing::info_span!("fetch_validator_keys"); let key_data = @@ -474,19 +474,19 @@ async fn fetch_validator_keys( Ok(serde_json::from_slice(&key_data)?) } -pub struct LakeIndexerCtx<'a> { - pub localstack: containers::LocalStack<'a>, - pub lake_indexer: containers::LakeIndexer<'a>, +pub struct LakeIndexerCtx { + pub localstack: containers::LocalStack, + pub lake_indexer: containers::LakeIndexer, pub worker: Worker, } -pub async fn initialize_lake_indexer<'a>( - docker_client: &'a containers::DockerClient, +pub async fn initialize_lake_indexer( + docker_client: &DockerClient, network: &str, -) -> anyhow::Result> { +) -> anyhow::Result { let s3_bucket = "near-lake-custom"; let s3_region = "us-east-1"; - let localstack = LocalStack::run(docker_client, network, s3_bucket, s3_region).await?; + let localstack = LocalStack::run(docker_client, network, s3_bucket, s3_region).await; let lake_indexer = containers::LakeIndexer::run( docker_client, @@ -495,7 +495,7 @@ pub async fn initialize_lake_indexer<'a>( s3_bucket, s3_region, ) - .await?; + .await; let validator_key = fetch_validator_keys(docker_client, &lake_indexer.container).await?; diff --git a/integration-tests/chain-signatures/src/local.rs b/integration-tests/chain-signatures/src/local.rs index fca763ed..e49f7d35 100644 --- a/integration-tests/chain-signatures/src/local.rs +++ b/integration-tests/chain-signatures/src/local.rs @@ -37,7 +37,7 @@ pub struct NodeConfig { impl Node { pub async fn dry_run( - ctx: &super::Context<'_>, + ctx: &super::Context, account: &Account, cfg: &MultichainConfig, ) -> anyhow::Result { @@ -103,7 +103,7 @@ impl Node { } pub async fn run( - ctx: &super::Context<'_>, + ctx: &super::Context, cfg: &MultichainConfig, account: &Account, ) -> anyhow::Result { @@ -140,7 +140,7 @@ impl Node { .await } - pub async fn spawn(ctx: &super::Context<'_>, config: NodeConfig) -> anyhow::Result { + pub async fn spawn(ctx: &super::Context, config: NodeConfig) -> anyhow::Result { let web_port = config.web_port; let indexer_options = mpc_node::indexer::Options { s3_bucket: ctx.localstack.s3_bucket.clone(), diff --git a/integration-tests/chain-signatures/tests/actions/mod.rs b/integration-tests/chain-signatures/tests/actions/mod.rs index 478259ef..7a7e582e 100644 --- a/integration-tests/chain-signatures/tests/actions/mod.rs +++ b/integration-tests/chain-signatures/tests/actions/mod.rs @@ -1,6 +1,6 @@ pub mod wait_for; -use crate::MultichainTestContext; +use crate::TestContext; use cait_sith::FullSignature; use crypto_shared::ScalarExt; @@ -41,7 +41,7 @@ use k256::{ use serde_json::json; pub async fn request_sign( - ctx: &MultichainTestContext<'_>, + ctx: &TestContext, ) -> anyhow::Result<([u8; 32], [u8; 32], Account, AsyncTransactionStatus)> { let worker = &ctx.nodes.ctx().worker; let account = worker.dev_create_account().await?; @@ -74,7 +74,7 @@ pub async fn request_sign( } pub async fn request_batch_random_sign( - ctx: &MultichainTestContext<'_>, + ctx: &TestContext, ) -> anyhow::Result<(Vec<([u8; 32], [u8; 32])>, Account, AsyncTransactionStatus)> { let worker = &ctx.nodes.ctx().worker; let account = worker.dev_create_account().await?; @@ -110,7 +110,7 @@ pub async fn request_batch_random_sign( } pub async fn request_batch_duplicate_sign( - ctx: &MultichainTestContext<'_>, + ctx: &TestContext, ) -> anyhow::Result<([u8; 32], u32, Account, AsyncTransactionStatus)> { let worker = &ctx.nodes.ctx().worker; let account = worker.dev_create_account().await?; @@ -159,7 +159,7 @@ pub async fn assert_signature( // A normal signature, but we try to insert a bad response which fails and the signature is generated pub async fn single_signature_rogue_responder( - ctx: &MultichainTestContext<'_>, + ctx: &TestContext, state: &RunningContractState, ) -> anyhow::Result<()> { let (_, payload_hash, account, status) = request_sign(ctx).await?; @@ -189,7 +189,7 @@ pub async fn single_signature_rogue_responder( } pub async fn single_signature_production( - ctx: &MultichainTestContext<'_>, + ctx: &TestContext, state: &RunningContractState, ) -> anyhow::Result<()> { let (_, payload_hash, account, status) = request_sign(ctx).await?; @@ -203,7 +203,7 @@ pub async fn single_signature_production( } pub async fn rogue_respond( - ctx: &MultichainTestContext<'_>, + ctx: &TestContext, payload_hash: [u8; 32], predecessor: &near_workspaces::AccountId, path: &str, @@ -254,7 +254,7 @@ pub async fn rogue_respond( } pub async fn request_sign_non_random( - ctx: &MultichainTestContext<'_>, + ctx: &TestContext, account: Account, payload: [u8; 32], payload_hashed: [u8; 32], @@ -296,7 +296,7 @@ pub async fn request_sign_non_random( } pub async fn single_payload_signature_production( - ctx: &MultichainTestContext<'_>, + ctx: &TestContext, state: &RunningContractState, ) -> anyhow::Result<()> { let (payload, payload_hash, account, status) = request_sign(ctx).await?; @@ -377,7 +377,7 @@ pub async fn clear_toxics() -> anyhow::Result<()> { } pub async fn batch_random_signature_production( - ctx: &MultichainTestContext<'_>, + ctx: &TestContext, state: &RunningContractState, ) -> anyhow::Result<()> { let (payloads, account, status) = request_batch_random_sign(ctx).await?; @@ -396,7 +396,7 @@ pub async fn batch_random_signature_production( } pub async fn batch_duplicate_signature_production( - ctx: &MultichainTestContext<'_>, + ctx: &TestContext, _state: &RunningContractState, ) -> anyhow::Result<()> { let (_, _, _, status) = request_batch_duplicate_sign(ctx).await?; diff --git a/integration-tests/chain-signatures/tests/actions/wait_for.rs b/integration-tests/chain-signatures/tests/actions/wait_for.rs index 19bfabc2..c897f39e 100644 --- a/integration-tests/chain-signatures/tests/actions/wait_for.rs +++ b/integration-tests/chain-signatures/tests/actions/wait_for.rs @@ -2,7 +2,7 @@ use std::task::Poll; use std::time::Duration; use crate::actions; -use crate::MultichainTestContext; +use crate::TestContext; use anyhow::Context; use backon::Retryable; @@ -23,8 +23,8 @@ use near_workspaces::Account; use std::collections::HashMap; use url::Url; -pub async fn running_mpc<'a>( - ctx: &MultichainTestContext<'a>, +pub async fn running_mpc( + ctx: &TestContext, epoch: Option, ) -> anyhow::Result { let is_running = || async { @@ -60,8 +60,8 @@ pub async fn running_mpc<'a>( .with_context(|| err_msg) } -pub async fn has_at_least_triples<'a>( - ctx: &MultichainTestContext<'a>, +pub async fn has_at_least_triples( + ctx: &TestContext, expected_triple_count: usize, ) -> anyhow::Result> { let is_enough_triples = |id| { @@ -108,8 +108,8 @@ pub async fn has_at_least_triples<'a>( Ok(state_views) } -pub async fn has_at_least_mine_triples<'a>( - ctx: &MultichainTestContext<'a>, +pub async fn has_at_least_mine_triples( + ctx: &TestContext, expected_mine_triple_count: usize, ) -> anyhow::Result> { let is_enough_mine_triples = |id| { @@ -150,8 +150,8 @@ pub async fn has_at_least_mine_triples<'a>( Ok(state_views) } -pub async fn has_at_least_presignatures<'a>( - ctx: &MultichainTestContext<'a>, +pub async fn has_at_least_presignatures( + ctx: &TestContext, expected_presignature_count: usize, ) -> anyhow::Result> { let is_enough_presignatures = |id| { @@ -192,8 +192,8 @@ pub async fn has_at_least_presignatures<'a>( Ok(state_views) } -pub async fn has_at_least_mine_presignatures<'a>( - ctx: &MultichainTestContext<'a>, +pub async fn has_at_least_mine_presignatures( + ctx: &TestContext, expected_mine_presignature_count: usize, ) -> anyhow::Result> { let is_enough_mine_presignatures = |id| { @@ -302,7 +302,7 @@ pub async fn signature_responded( } pub async fn signature_payload_responded( - ctx: &MultichainTestContext<'_>, + ctx: &TestContext, account: Account, payload: [u8; 32], payload_hashed: [u8; 32], diff --git a/integration-tests/chain-signatures/tests/cases/mod.rs b/integration-tests/chain-signatures/tests/cases/mod.rs index d8f9e489..7c2185b7 100644 --- a/integration-tests/chain-signatures/tests/cases/mod.rs +++ b/integration-tests/chain-signatures/tests/cases/mod.rs @@ -212,7 +212,7 @@ async fn test_triple_persistence() -> anyhow::Result<()> { let docker_client = DockerClient::default(); let docker_network = "test-triple-persistence"; docker_client.create_network(docker_network).await?; - let redis = containers::Redis::run(&docker_client, docker_network).await?; + let redis = containers::Redis::run(&docker_client, docker_network).await; let redis_url = Url::parse(redis.internal_address.as_str())?; let redis_cfg = deadpool_redis::Config::from_url(redis_url); let redis_pool = redis_cfg.create_pool(Some(Runtime::Tokio1)).unwrap(); @@ -300,7 +300,7 @@ async fn test_presignature_persistence() -> anyhow::Result<()> { let docker_client = DockerClient::default(); let docker_network = "test-presignature-persistence"; docker_client.create_network(docker_network).await?; - let redis = containers::Redis::run(&docker_client, docker_network).await?; + let redis = containers::Redis::run(&docker_client, docker_network).await; let redis_url = Url::parse(redis.internal_address.as_str())?; let redis_cfg = deadpool_redis::Config::from_url(redis_url); let redis_pool = redis_cfg.create_pool(Some(Runtime::Tokio1)).unwrap(); diff --git a/integration-tests/chain-signatures/tests/lib.rs b/integration-tests/chain-signatures/tests/lib.rs index 74fab552..5642f952 100644 --- a/integration-tests/chain-signatures/tests/lib.rs +++ b/integration-tests/chain-signatures/tests/lib.rs @@ -19,14 +19,14 @@ const CURRENT_CONTRACT_DEPLOY_DEPOSIT: NearToken = NearToken::from_millinear(900 const CURRENT_CONTRACT_FILE_PATH: &str = "../../target/wasm32-unknown-unknown/release/mpc_contract.wasm"; -pub struct MultichainTestContext<'a> { - nodes: Nodes<'a>, +pub struct TestContext { + nodes: Nodes, rpc_client: near_fetch::Client, http_client: reqwest::Client, cfg: MultichainConfig, } -impl MultichainTestContext<'_> { +impl TestContext { pub fn contract(&self) -> &Contract { self.nodes.contract() } @@ -186,7 +186,7 @@ impl MultichainTestContext<'_> { pub async fn with_multichain_nodes(cfg: MultichainConfig, f: F) -> anyhow::Result<()> where - F: for<'a> FnOnce(MultichainTestContext<'a>) -> BoxFuture<'a, anyhow::Result<()>>, + F: FnOnce(TestContext) -> BoxFuture<'static, anyhow::Result<()>>, { let docker_client = DockerClient::default(); let nodes = run(cfg.clone(), &docker_client).await?; @@ -196,7 +196,7 @@ where let connector = near_jsonrpc_client::JsonRpcClient::new_client(); let jsonrpc_client = connector.connect(&nodes.ctx().lake_indexer.rpc_host_address); let rpc_client = near_fetch::Client::from_client(jsonrpc_client); - let result = f(MultichainTestContext { + let result = f(TestContext { nodes, rpc_client, http_client: reqwest::Client::default(), From a4d12588a1cc79bd7b976266f765bbb6dca6e347 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Wed, 4 Dec 2024 21:59:26 +0000 Subject: [PATCH 05/42] Added node cluster interface --- chain-signatures/node/src/protocol/triple.rs | 2 +- .../chain-signatures/tests/actions/mod.rs | 85 +++++++ .../chain-signatures/tests/actions/sign.rs | 3 + .../chain-signatures/tests/actions/wait.rs | 229 ++++++++++++++++++ .../chain-signatures/tests/cases/mod.rs | 18 +- .../chain-signatures/tests/cluster/mod.rs | 122 ++++++++++ .../chain-signatures/tests/cluster/spawner.rs | 180 ++++++++++++++ .../chain-signatures/tests/lib.rs | 1 + 8 files changed, 628 insertions(+), 12 deletions(-) create mode 100644 integration-tests/chain-signatures/tests/actions/sign.rs create mode 100644 integration-tests/chain-signatures/tests/actions/wait.rs create mode 100644 integration-tests/chain-signatures/tests/cluster/mod.rs create mode 100644 integration-tests/chain-signatures/tests/cluster/spawner.rs diff --git a/chain-signatures/node/src/protocol/triple.rs b/chain-signatures/node/src/protocol/triple.rs index 6ea709a2..517a9ee7 100644 --- a/chain-signatures/node/src/protocol/triple.rs +++ b/chain-signatures/node/src/protocol/triple.rs @@ -215,7 +215,7 @@ impl TripleManager { pub async fn take_two_mine(&mut self) -> Option<(Triple, Triple)> { let triples = &self.triple_storage; if triples.len_mine().await.unwrap_or(0) < 2 { - tracing::warn!("not enough mine triples"); + tracing::debug!("not enough mine triples"); return None; } let triple_0 = match triples.take_mine().await { diff --git a/integration-tests/chain-signatures/tests/actions/mod.rs b/integration-tests/chain-signatures/tests/actions/mod.rs index 7a7e582e..f51e1c4b 100644 --- a/integration-tests/chain-signatures/tests/actions/mod.rs +++ b/integration-tests/chain-signatures/tests/actions/mod.rs @@ -1,5 +1,8 @@ pub mod wait_for; +pub mod wait; +pub mod sign; +use crate::cluster::Cluster; use crate::TestContext; use cait_sith::FullSignature; @@ -73,6 +76,38 @@ pub async fn request_sign( Ok((payload, payload_hashed, account, status)) } +pub async fn request_sign_( + nodes: &Cluster, +) -> anyhow::Result<([u8; 32], [u8; 32], Account, AsyncTransactionStatus)> { + let account = nodes.worker().dev_create_account().await?; + let payload: [u8; 32] = rand::thread_rng().gen(); + let payload_hashed = web3::signing::keccak256(&payload); + + let signer = InMemorySigner { + account_id: account.id().clone(), + public_key: account.secret_key().public_key().to_string().parse()?, + secret_key: account.secret_key().to_string().parse()?, + }; + + let request = SignRequest { + payload: payload_hashed, + path: "test".to_string(), + key_version: 0, + }; + let status = nodes + .rpc_client + .call(&signer, nodes.contract().id(), "sign") + .args_json(serde_json::json!({ + "request": request, + })) + .gas(Gas::from_tgas(50)) + .deposit(NearToken::from_yoctonear(1)) + .transact_async() + .await?; + // tokio::time::sleep(Duration::from_secs(1)).await; + Ok((payload, payload_hashed, account, status)) +} + pub async fn request_batch_random_sign( ctx: &TestContext, ) -> anyhow::Result<(Vec<([u8; 32], [u8; 32])>, Account, AsyncTransactionStatus)> { @@ -253,6 +288,56 @@ pub async fn rogue_respond( Ok(status) } +pub async fn rogue_respond_( + nodes: &Cluster, + payload_hash: [u8; 32], + predecessor: &near_workspaces::AccountId, + path: &str, +) -> anyhow::Result { + let account = nodes.worker().dev_create_account().await?; + + let signer = InMemorySigner { + account_id: account.id().clone(), + public_key: account.secret_key().public_key().clone().into(), + secret_key: account.secret_key().to_string().parse()?, + }; + let epsilon = derive_epsilon(predecessor, path); + + let request = SignatureRequest { + payload_hash: Scalar::from_bytes(payload_hash).unwrap().into(), + epsilon: SerializableScalar { scalar: epsilon }, + }; + + let big_r = serde_json::from_value( + "02EC7FA686BB430A4B700BDA07F2E07D6333D9E33AEEF270334EB2D00D0A6FEC6C".into(), + )?; // Fake BigR + let s = serde_json::from_value( + "20F90C540EE00133C911EA2A9ADE2ABBCC7AD820687F75E011DFEEC94DB10CD6".into(), + )?; // Fake S + + let response = SignatureResponse { + big_r: SerializableAffinePoint { + affine_point: big_r, + }, + s: SerializableScalar { scalar: s }, + recovery_id: 0, + }; + + let status = nodes + .rpc_client + .call(&signer, nodes.contract().id(), "respond") + .args_json(serde_json::json!({ + "request": request, + "response": response, + })) + .max_gas() + .transact_async() + .await?; + + tokio::time::sleep(Duration::from_secs(1)).await; + Ok(status) +} + pub async fn request_sign_non_random( ctx: &TestContext, account: Account, diff --git a/integration-tests/chain-signatures/tests/actions/sign.rs b/integration-tests/chain-signatures/tests/actions/sign.rs new file mode 100644 index 00000000..8fd1dc55 --- /dev/null +++ b/integration-tests/chain-signatures/tests/actions/sign.rs @@ -0,0 +1,3 @@ + +pub struct SignAction {} + diff --git a/integration-tests/chain-signatures/tests/actions/wait.rs b/integration-tests/chain-signatures/tests/actions/wait.rs new file mode 100644 index 00000000..f384618e --- /dev/null +++ b/integration-tests/chain-signatures/tests/actions/wait.rs @@ -0,0 +1,229 @@ +use std::future::{Future, IntoFuture}; + +use anyhow::Context; +use backon::{ConstantBuilder, ExponentialBuilder, Retryable}; +use mpc_contract::{ProtocolContractState, RunningContractState}; +use mpc_node::web::StateView; + +use crate::cluster::Cluster; + +enum WaitActions { + Running, + MinTriples(usize), + MinMineTriples(usize), + MinPresignatures(usize), + MinMinePresignatures(usize), + ReadyToSign(usize), +} + +pub struct WaitAction<'a> { + nodes: &'a Cluster, + actions: Vec, +} + +impl<'a> WaitAction<'a> { + pub fn new(nodes: &'a Cluster) -> Self { + Self { + nodes, + actions: Vec::new(), + } + } +} + +impl WaitAction<'_> { + pub fn running(mut self) -> Self { + self.actions.push(WaitActions::Running); + self + } + + pub fn min_triples(mut self, min_triples: usize) -> Self { + self.actions.push(WaitActions::MinTriples(min_triples)); + self + } + + pub fn min_mine_triples(mut self, min_mine_triples: usize) -> Self { + self.actions + .push(WaitActions::MinMineTriples(min_mine_triples)); + self + } + + pub fn min_presignatures(mut self, min_presignatures: usize) -> Self { + self.actions + .push(WaitActions::MinPresignatures(min_presignatures)); + self + } + + pub fn min_mine_presignatures(mut self, min_mine_presignatures: usize) -> Self { + self.actions + .push(WaitActions::MinMinePresignatures(min_mine_presignatures)); + self + } + + pub fn ready_to_sign(mut self) -> Self { + self.actions.push(WaitActions::ReadyToSign(1)); + self + } + + pub fn ready_to_sign_many(mut self, count: usize) -> Self { + self.actions.push(WaitActions::ReadyToSign(count)); + self + } +} + +impl<'a> IntoFuture for WaitAction<'a> { + type Output = anyhow::Result<()>; + type IntoFuture = std::pin::Pin + Send + 'a>>; + + fn into_future(self) -> Self::IntoFuture { + Box::pin(async move { + for action in self.actions { + match action { + WaitActions::Running => { + running_mpc(self.nodes, None).await?; + } + WaitActions::MinTriples(expected) => { + require_triples(self.nodes, expected, false).await?; + } + WaitActions::MinMineTriples(expected) => { + require_triples(self.nodes, expected, true).await?; + } + WaitActions::MinPresignatures(expected) => { + require_presignatures(self.nodes, expected, false).await?; + } + WaitActions::MinMinePresignatures(expected) => { + require_presignatures(self.nodes, expected, true).await?; + } + WaitActions::ReadyToSign(count) => { + require_presignatures(self.nodes, count, true).await?; + } + } + } + + Ok(()) + }) + } +} + +pub async fn running_mpc( + nodes: &Cluster, + epoch: Option, +) -> anyhow::Result { + let is_running = || async { + match nodes.contract_state().await? { + ProtocolContractState::Running(running) => match epoch { + None => Ok(running), + Some(expected_epoch) if running.epoch >= expected_epoch => Ok(running), + Some(_) => { + anyhow::bail!("running with an older epoch: {}", running.epoch) + } + }, + _ => anyhow::bail!("not running"), + } + }; + let err_msg = format!( + "mpc did not reach {} in time", + if epoch.is_some() { + "expected epoch" + } else { + "running state" + } + ); + is_running + .retry(&ExponentialBuilder::default().with_max_times(6)) + .await + .with_context(|| err_msg) +} + +pub async fn require_presignatures( + nodes: &Cluster, + expected: usize, + mine: bool, +) -> anyhow::Result> { + let is_enough = || async { + let state_views = nodes.fetch_states().await?; + let enough = state_views + .iter() + .filter(|state| match state { + StateView::Running { + presignature_mine_count, + presignature_count, + .. + } => { + if mine { + *presignature_mine_count >= expected + } else { + *presignature_count >= expected + } + } + _ => { + tracing::warn!("state=NotRunning while checking presignatures"); + false + } + }) + .count(); + if enough >= nodes.len() { + Ok(state_views) + } else { + anyhow::bail!("not enough nodes with presignatures") + } + }; + + let strategy = ConstantBuilder::default() + .with_delay(std::time::Duration::from_secs(5)) + .with_max_times(expected * 100); + + let state_views = is_enough.retry(&strategy).await.with_context(|| { + format!( + "mpc nodes failed to generate {} presignatures before deadline", + expected + ) + })?; + + Ok(state_views) +} + +pub async fn require_triples( + nodes: &Cluster, + expected: usize, + mine: bool, +) -> anyhow::Result> { + let is_enough = || async { + let state_views = nodes.fetch_states().await?; + let enough = state_views + .iter() + .filter(|state| match state { + StateView::Running { + triple_mine_count, + triple_count, + .. + } => { + if mine { + *triple_mine_count >= expected + } else { + *triple_count >= expected + } + } + _ => { + tracing::warn!("state=NotRunning while checking triples"); + false + } + }) + .count(); + if enough >= nodes.len() { + Ok(state_views) + } else { + anyhow::bail!("not enough nodes with triples") + } + }; + let state_views = is_enough + .retry(&ExponentialBuilder::default().with_max_times(12)) + .await + .with_context(|| { + format!( + "mpc nodes failed to generate {} triples before deadline", + expected + ) + })?; + + Ok(state_views) +} diff --git a/integration-tests/chain-signatures/tests/cases/mod.rs b/integration-tests/chain-signatures/tests/cases/mod.rs index 7c2185b7..6c5b24a6 100644 --- a/integration-tests/chain-signatures/tests/cases/mod.rs +++ b/integration-tests/chain-signatures/tests/cases/mod.rs @@ -1,7 +1,7 @@ use std::str::FromStr; use crate::actions::{self, add_latency, wait_for}; -use crate::with_multichain_nodes; +use crate::{cluster, with_multichain_nodes}; use cait_sith::protocol::Participant; use cait_sith::triples::{TriplePub, TripleShare}; @@ -98,16 +98,12 @@ async fn test_triples_and_presignatures() -> anyhow::Result<()> { #[test(tokio::test)] async fn test_signature_basic() -> anyhow::Result<()> { - with_multichain_nodes(MultichainConfig::default(), |ctx| { - Box::pin(async move { - let state_0 = wait_for::running_mpc(&ctx, Some(0)).await?; - assert_eq!(state_0.participants.len(), 3); - wait_for::has_at_least_triples(&ctx, 2).await?; - wait_for::has_at_least_presignatures(&ctx, 2).await?; - actions::single_signature_rogue_responder(&ctx, &state_0).await - }) - }) - .await + let nodes = cluster::spawn().wait_for_running().await?; + + nodes.wait().ready_to_sign().await?; + let _ = nodes.sign().await?; + + Ok(()) } #[test(tokio::test)] diff --git a/integration-tests/chain-signatures/tests/cluster/mod.rs b/integration-tests/chain-signatures/tests/cluster/mod.rs new file mode 100644 index 00000000..cb4818d6 --- /dev/null +++ b/integration-tests/chain-signatures/tests/cluster/mod.rs @@ -0,0 +1,122 @@ +mod spawner; + +use near_workspaces::network::Sandbox; +use spawner::ClusterSpawner; + +use mpc_contract::{errors, ProtocolContractState, RunningContractState}; +use mpc_node::web::StateView; + +use anyhow::Context; +use integration_tests_chain_signatures::containers::DockerClient; +use integration_tests_chain_signatures::{utils, MultichainConfig, Nodes}; +use near_workspaces::{Contract, Worker}; +use url::Url; + +use crate::actions::wait::WaitAction; +use crate::actions::{self, wait_for}; + +pub fn spawn() -> ClusterSpawner { + ClusterSpawner { + wait_for_running: false, + cfg: MultichainConfig { + nodes: 3, + threshold: 2, + protocol: Default::default(), + }, + } +} + +pub struct Cluster { + pub cfg: MultichainConfig, + pub docker_client: DockerClient, + pub rpc_client: near_fetch::Client, + http_client: reqwest::Client, + nodes: Nodes, +} + +impl Cluster { + pub fn len(&self) -> usize { + self.nodes.len() + } + + pub fn url(&self, id: usize) -> Url { + Url::parse(self.nodes.url(id)).unwrap() + } + + pub async fn fetch_state(&self, id: usize) -> anyhow::Result { + let url = self.url(id).join("/state").unwrap(); + let state_view: StateView = self.http_client.get(url).send().await?.json().await?; + Ok(state_view) + } + + pub async fn fetch_states(&self) -> anyhow::Result> { + let tasks = (0..self.len()).map(|id| self.fetch_state(id)); + futures::future::try_join_all(tasks).await + } + + pub fn wait(&self) -> WaitAction<'_> { + WaitAction::new(self) + } + + pub async fn sign(&self) -> anyhow::Result<()> { + let state = self.expect_running().await?; + let (_, payload_hash, account, status) = actions::request_sign_(self).await?; + // We have to use seperate transactions because one could fail. + // This leads to a potential race condition where this transaction could get sent after the signature completes, but I think that's unlikely + let rogue_status = + actions::rogue_respond_(self, payload_hash, account.id(), "test").await?; + let err = wait_for::rogue_message_responded(rogue_status).await?; + + assert!(err.contains(&errors::RespondError::InvalidSignature.to_string())); + let signature = wait_for::signature_responded(status).await?; + + let mut mpc_pk_bytes = vec![0x04]; + mpc_pk_bytes.extend_from_slice(&state.public_key.as_bytes()[1..]); + + // Useful for populating the "signatures_havent_changed" test's hardcoded values + // dbg!( + // hex::encode(signature.big_r.to_encoded_point(true).to_bytes()), + // hex::encode(signature.s.to_bytes()), + // hex::encode(&mpc_pk_bytes), + // hex::encode(&payload_hash), + // account.id(), + // ); + actions::assert_signature(account.id(), &mpc_pk_bytes, payload_hash, &signature).await; + + Ok(()) + } + + pub fn worker(&self) -> &Worker { + &self.nodes.ctx().worker + } + + pub fn contract(&self) -> &Contract { + self.nodes.contract() + } + + pub async fn contract_state(&self) -> anyhow::Result { + let state: ProtocolContractState = self + .contract() + .view("state") + .await + .with_context(|| "could not view state")? + .json()?; + Ok(state) + } + + pub async fn expect_running(&self) -> anyhow::Result { + let state = self.contract_state().await?; + if let ProtocolContractState::Running(state) = state { + Ok(state) + } else { + anyhow::bail!("expected running state, got {:?}", state) + } + } +} + +impl Drop for Cluster { + fn drop(&mut self) { + let sk_local_path = self.nodes.ctx().storage_options.sk_share_local_path.clone(); + let _ = tokio::task::spawn(utils::clear_local_sk_shares(sk_local_path)); + } +} diff --git a/integration-tests/chain-signatures/tests/cluster/spawner.rs b/integration-tests/chain-signatures/tests/cluster/spawner.rs new file mode 100644 index 00000000..ba52e60b --- /dev/null +++ b/integration-tests/chain-signatures/tests/cluster/spawner.rs @@ -0,0 +1,180 @@ +use anyhow::Context; +use backon::{ExponentialBuilder, Retryable}; +use mpc_contract::config::ProtocolConfig; +use mpc_contract::{ProtocolContractState, RunningContractState}; +use mpc_node::web::StateView; + +use std::future::{Future, IntoFuture}; + +use integration_tests_chain_signatures::containers::DockerClient; +use integration_tests_chain_signatures::{run, MultichainConfig}; + +// use crate::actions::wait_for; +use crate::cluster::Cluster; + +pub struct ClusterSpawner { + pub(crate) cfg: MultichainConfig, + pub(crate) wait_for_running: bool, +} + +impl ClusterSpawner { + pub fn nodes(mut self, nodes: usize) -> Self { + self.cfg.nodes = nodes; + self + } + + pub fn threshold(mut self, threshold: usize) -> Self { + self.cfg.threshold = threshold; + self + } + + pub fn protocol(mut self, protocol: ProtocolConfig) -> Self { + self.cfg.protocol = protocol; + self + } + + pub fn with_config(mut self, call: impl FnOnce(&mut MultichainConfig)) -> Self { + call(&mut self.cfg); + self + } + + pub fn wait_for_running(mut self) -> Self { + self.wait_for_running = true; + self + } +} + +impl IntoFuture for ClusterSpawner { + type Output = anyhow::Result; + type IntoFuture = std::pin::Pin + Send>>; + + fn into_future(self) -> Self::IntoFuture { + Box::pin(async move { + let docker_client = DockerClient::default(); + let nodes = run(self.cfg.clone(), &docker_client).await?; + let connector = near_jsonrpc_client::JsonRpcClient::new_client(); + let jsonrpc_client = connector.connect(&nodes.ctx().lake_indexer.rpc_host_address); + let rpc_client = near_fetch::Client::from_client(jsonrpc_client); + + let cluster = Cluster { + cfg: self.cfg, + rpc_client, + http_client: reqwest::Client::default(), + docker_client, + nodes, + }; + + if self.wait_for_running { + running_mpc(&cluster, Some(0)).await?; + } + + Ok(cluster) + }) + } +} + +pub async fn running_mpc( + nodes: &Cluster, + epoch: Option, +) -> anyhow::Result { + let is_running = || async { + match nodes.contract_state().await? { + ProtocolContractState::Running(running) => match epoch { + None => Ok(running), + Some(expected_epoch) if running.epoch >= expected_epoch => Ok(running), + Some(_) => { + anyhow::bail!("running with an older epoch: {}", running.epoch) + } + }, + _ => anyhow::bail!("not running"), + } + }; + let err_msg = format!( + "mpc did not reach {} in time", + if epoch.is_some() { + "expected epoch" + } else { + "running state" + } + ); + is_running + .retry(&ExponentialBuilder::default().with_max_times(6)) + .await + .with_context(|| err_msg) +} + +pub async fn require_mine_presignatures( + nodes: &Cluster, + expected: usize, +) -> anyhow::Result> { + let is_enough = || async { + let state_views = nodes.fetch_states().await?; + let enough = state_views + .iter() + .filter(|state| match state { + StateView::Running { + presignature_mine_count, + .. + } => *presignature_mine_count >= expected, + _ => { + tracing::warn!("state=NotRunning while checking mine presignatures"); + false + } + }) + .count(); + if enough >= nodes.len() { + Ok(state_views) + } else { + anyhow::bail!("not enough nodes with mine presignatures") + } + }; + + let state_views = is_enough + .retry(&ExponentialBuilder::default().with_max_times(15)) + .await + .with_context(|| { + format!( + "mpc nodes failed to generate {} presignatures before deadline", + expected + ) + })?; + + Ok(state_views) +} + +pub async fn require_mine_triples( + nodes: &Cluster, + expected: usize, +) -> anyhow::Result> { + let is_enough = || async { + let state_views = nodes.fetch_states().await?; + let enough = state_views + .iter() + .filter(|state| match state { + StateView::Running { + triple_mine_count, .. + } => *triple_mine_count >= expected, + _ => { + tracing::warn!("state=NotRunning while checking mine triples"); + false + } + }) + .count(); + if enough >= nodes.len() { + Ok(state_views) + } else { + anyhow::bail!("not enough nodes with mine triples") + } + }; + let state_views = is_enough + .retry(&ExponentialBuilder::default().with_max_times(12)) + .await + .with_context(|| { + format!( + "mpc nodes failed to generate {} triples before deadline", + expected + ) + })?; + + Ok(state_views) +} diff --git a/integration-tests/chain-signatures/tests/lib.rs b/integration-tests/chain-signatures/tests/lib.rs index 5642f952..0311276e 100644 --- a/integration-tests/chain-signatures/tests/lib.rs +++ b/integration-tests/chain-signatures/tests/lib.rs @@ -1,5 +1,6 @@ mod actions; mod cases; +pub mod cluster; use crate::actions::wait_for; use mpc_contract::update::{ProposeUpdateArgs, UpdateId}; From 2b72dc59a931632c286440f435892e0531c025bb Mon Sep 17 00:00:00 2001 From: Phuong N Date: Wed, 4 Dec 2024 22:51:16 +0000 Subject: [PATCH 06/42] Added SignAction interface --- .../chain-signatures/tests/actions/mod.rs | 32 --- .../chain-signatures/tests/actions/sign.rs | 189 +++++++++++++++++- .../chain-signatures/tests/cluster/mod.rs | 32 +-- 3 files changed, 192 insertions(+), 61 deletions(-) diff --git a/integration-tests/chain-signatures/tests/actions/mod.rs b/integration-tests/chain-signatures/tests/actions/mod.rs index f51e1c4b..c8d5b6ae 100644 --- a/integration-tests/chain-signatures/tests/actions/mod.rs +++ b/integration-tests/chain-signatures/tests/actions/mod.rs @@ -76,38 +76,6 @@ pub async fn request_sign( Ok((payload, payload_hashed, account, status)) } -pub async fn request_sign_( - nodes: &Cluster, -) -> anyhow::Result<([u8; 32], [u8; 32], Account, AsyncTransactionStatus)> { - let account = nodes.worker().dev_create_account().await?; - let payload: [u8; 32] = rand::thread_rng().gen(); - let payload_hashed = web3::signing::keccak256(&payload); - - let signer = InMemorySigner { - account_id: account.id().clone(), - public_key: account.secret_key().public_key().to_string().parse()?, - secret_key: account.secret_key().to_string().parse()?, - }; - - let request = SignRequest { - payload: payload_hashed, - path: "test".to_string(), - key_version: 0, - }; - let status = nodes - .rpc_client - .call(&signer, nodes.contract().id(), "sign") - .args_json(serde_json::json!({ - "request": request, - })) - .gas(Gas::from_tgas(50)) - .deposit(NearToken::from_yoctonear(1)) - .transact_async() - .await?; - // tokio::time::sleep(Duration::from_secs(1)).await; - Ok((payload, payload_hashed, account, status)) -} - pub async fn request_batch_random_sign( ctx: &TestContext, ) -> anyhow::Result<(Vec<([u8; 32], [u8; 32])>, Account, AsyncTransactionStatus)> { diff --git a/integration-tests/chain-signatures/tests/actions/sign.rs b/integration-tests/chain-signatures/tests/actions/sign.rs index 8fd1dc55..a970f4df 100644 --- a/integration-tests/chain-signatures/tests/actions/sign.rs +++ b/integration-tests/chain-signatures/tests/actions/sign.rs @@ -1,3 +1,190 @@ +use std::future::IntoFuture; -pub struct SignAction {} +use cait_sith::FullSignature; +use k256::Secp256k1; +use mpc_contract::errors; +use mpc_contract::primitives::SignRequest; +use near_crypto::InMemorySigner; +use near_fetch::ops::AsyncTransactionStatus; +use near_workspaces::types::{Gas, NearToken}; +use near_workspaces::Account; +use rand::Rng; +use crate::actions::{self, wait_for}; +use crate::cluster::Cluster; + +pub const SIGN_GAS: Gas = Gas::from_tgas(50); +pub const SIGN_DEPOSIT: NearToken = NearToken::from_yoctonear(1); + +pub struct SignAction<'a> { + nodes: &'a Cluster, + count: usize, + account: Option, + payload: Option<[u8; 32]>, + path: String, + key_version: u32, + gas: Gas, + deposit: NearToken, +} + +impl<'a> SignAction<'a> { + pub fn new(nodes: &'a Cluster) -> Self { + Self { + nodes, + count: 1, + account: None, + payload: None, + path: "test".into(), + key_version: 0, + gas: SIGN_GAS, + deposit: SIGN_DEPOSIT, + } + } +} + +impl SignAction<'_> { + /// Specify how many sign calls to be performed sequentially. If not specified, only + /// one sign call will be performed. + pub fn many(mut self, count: usize) -> Self { + self.count = count; + self + } + + /// Set the account to sign with. If not set, a new account will be created. + pub fn account(mut self, account: Account) -> Self { + self.account = Some(account); + self + } + + /// Set the payload of this sign call. The keccak hash of this payload will be signed. + pub fn payload(mut self, payload: [u8; 32]) -> Self { + self.payload = Some(payload); + self + } + + /// Set the derivation path of this sign call. + pub fn path(mut self, path: &str) -> Self { + self.path = path.into(); + self + } + + /// Set the key version of this sign call. If not set, the default key version will be used. + pub fn key_version(mut self, key_version: u32) -> Self { + self.key_version = key_version; + self + } + + /// Set the gas for this sign call. If not set, the default gas will be used. + pub fn gas(mut self, gas: Gas) -> Self { + self.gas = gas; + self + } + + /// Set the deposit for this sign call. If not set, the default deposit will be used. + pub fn deposit(mut self, deposit: NearToken) -> Self { + self.deposit = deposit; + self + } +} + +impl<'a> IntoFuture for SignAction<'a> { + type Output = anyhow::Result; + type IntoFuture = + std::pin::Pin + Send + 'a>>; + + fn into_future(self) -> Self::IntoFuture { + let Self { nodes, .. } = self; + + Box::pin(async move { + let state = nodes.expect_running().await?; + let account = self.account_or_new().await; + let (payload, payload_hash) = self.payload_or_random().await; + let status = self.transact_async(&account, payload_hash).await?; + + // We have to use seperate transactions because one could fail. + // This leads to a potential race condition where this transaction could get sent after the signature completes, but I think that's unlikely + let rogue_status = + actions::rogue_respond_(nodes, payload_hash, account.id(), "test").await?; + let err = wait_for::rogue_message_responded(rogue_status).await?; + + assert!(err.contains(&errors::RespondError::InvalidSignature.to_string())); + let signature = wait_for::signature_responded(status).await?; + + let mut mpc_pk_bytes = vec![0x04]; + mpc_pk_bytes.extend_from_slice(&state.public_key.as_bytes()[1..]); + + // Useful for populating the "signatures_havent_changed" test's hardcoded values + // dbg!( + // hex::encode(signature.big_r.to_encoded_point(true).to_bytes()), + // hex::encode(signature.s.to_bytes()), + // hex::encode(&mpc_pk_bytes), + // hex::encode(&payload_hash), + // account.id(), + // ); + actions::assert_signature(account.id(), &mpc_pk_bytes, payload_hash, &signature).await; + + Ok(SignResult { + account, + signature, + payload, + payload_hash, + }) + }) + } +} + +// Helper methods for the SignAction +impl SignAction<'_> { + async fn account_or_new(&self) -> Account { + if let Some(account) = &self.account { + account.clone() + } else { + self.nodes.worker().dev_create_account().await.unwrap() + } + } + + async fn payload_or_random(&self) -> ([u8; 32], [u8; 32]) { + let payload = if let Some(payload) = &self.payload { + *payload + } else { + rand::thread_rng().gen() + }; + (payload, web3::signing::keccak256(&payload)) + } + + async fn transact_async( + &self, + account: &Account, + payload_hashed: [u8; 32], + ) -> anyhow::Result { + let signer = InMemorySigner { + account_id: account.id().clone(), + public_key: account.secret_key().public_key().to_string().parse()?, + secret_key: account.secret_key().to_string().parse()?, + }; + let request = SignRequest { + payload: payload_hashed, + path: self.path.clone(), + key_version: self.key_version, + }; + let status = self + .nodes + .rpc_client + .call(&signer, self.nodes.contract().id(), "sign") + .args_json(serde_json::json!({ + "request": request, + })) + .gas(self.gas) + .deposit(self.deposit) + .transact_async() + .await?; + Ok(status) + } +} + +pub struct SignResult { + pub account: Account, + pub payload: [u8; 32], + pub payload_hash: [u8; 32], + pub signature: FullSignature, +} diff --git a/integration-tests/chain-signatures/tests/cluster/mod.rs b/integration-tests/chain-signatures/tests/cluster/mod.rs index cb4818d6..45fc66ce 100644 --- a/integration-tests/chain-signatures/tests/cluster/mod.rs +++ b/integration-tests/chain-signatures/tests/cluster/mod.rs @@ -3,7 +3,7 @@ mod spawner; use near_workspaces::network::Sandbox; use spawner::ClusterSpawner; -use mpc_contract::{errors, ProtocolContractState, RunningContractState}; +use mpc_contract::{ProtocolContractState, RunningContractState}; use mpc_node::web::StateView; use anyhow::Context; @@ -12,8 +12,8 @@ use integration_tests_chain_signatures::{utils, MultichainConfig, Nodes}; use near_workspaces::{Contract, Worker}; use url::Url; +use crate::actions::sign::SignAction; use crate::actions::wait::WaitAction; -use crate::actions::{self, wait_for}; pub fn spawn() -> ClusterSpawner { ClusterSpawner { @@ -58,32 +58,8 @@ impl Cluster { WaitAction::new(self) } - pub async fn sign(&self) -> anyhow::Result<()> { - let state = self.expect_running().await?; - let (_, payload_hash, account, status) = actions::request_sign_(self).await?; - // We have to use seperate transactions because one could fail. - // This leads to a potential race condition where this transaction could get sent after the signature completes, but I think that's unlikely - let rogue_status = - actions::rogue_respond_(self, payload_hash, account.id(), "test").await?; - let err = wait_for::rogue_message_responded(rogue_status).await?; - - assert!(err.contains(&errors::RespondError::InvalidSignature.to_string())); - let signature = wait_for::signature_responded(status).await?; - - let mut mpc_pk_bytes = vec![0x04]; - mpc_pk_bytes.extend_from_slice(&state.public_key.as_bytes()[1..]); - - // Useful for populating the "signatures_havent_changed" test's hardcoded values - // dbg!( - // hex::encode(signature.big_r.to_encoded_point(true).to_bytes()), - // hex::encode(signature.s.to_bytes()), - // hex::encode(&mpc_pk_bytes), - // hex::encode(&payload_hash), - // account.id(), - // ); - actions::assert_signature(account.id(), &mpc_pk_bytes, payload_hash, &signature).await; - - Ok(()) + pub fn sign(&self) -> SignAction<'_> { + SignAction::new(self) } pub fn worker(&self) -> &Worker { From e7876ca8d4738342004c0b2e8a4cf0d04267ac90 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Wed, 4 Dec 2024 23:13:43 +0000 Subject: [PATCH 07/42] Added rogue respond to sign interface --- .../chain-signatures/tests/actions/mod.rs | 55 +----------- .../chain-signatures/tests/actions/sign.rs | 86 ++++++++++++++++--- 2 files changed, 74 insertions(+), 67 deletions(-) diff --git a/integration-tests/chain-signatures/tests/actions/mod.rs b/integration-tests/chain-signatures/tests/actions/mod.rs index c8d5b6ae..9adc6561 100644 --- a/integration-tests/chain-signatures/tests/actions/mod.rs +++ b/integration-tests/chain-signatures/tests/actions/mod.rs @@ -1,8 +1,7 @@ -pub mod wait_for; -pub mod wait; pub mod sign; +pub mod wait; +pub mod wait_for; -use crate::cluster::Cluster; use crate::TestContext; use cait_sith::FullSignature; @@ -256,56 +255,6 @@ pub async fn rogue_respond( Ok(status) } -pub async fn rogue_respond_( - nodes: &Cluster, - payload_hash: [u8; 32], - predecessor: &near_workspaces::AccountId, - path: &str, -) -> anyhow::Result { - let account = nodes.worker().dev_create_account().await?; - - let signer = InMemorySigner { - account_id: account.id().clone(), - public_key: account.secret_key().public_key().clone().into(), - secret_key: account.secret_key().to_string().parse()?, - }; - let epsilon = derive_epsilon(predecessor, path); - - let request = SignatureRequest { - payload_hash: Scalar::from_bytes(payload_hash).unwrap().into(), - epsilon: SerializableScalar { scalar: epsilon }, - }; - - let big_r = serde_json::from_value( - "02EC7FA686BB430A4B700BDA07F2E07D6333D9E33AEEF270334EB2D00D0A6FEC6C".into(), - )?; // Fake BigR - let s = serde_json::from_value( - "20F90C540EE00133C911EA2A9ADE2ABBCC7AD820687F75E011DFEEC94DB10CD6".into(), - )?; // Fake S - - let response = SignatureResponse { - big_r: SerializableAffinePoint { - affine_point: big_r, - }, - s: SerializableScalar { scalar: s }, - recovery_id: 0, - }; - - let status = nodes - .rpc_client - .call(&signer, nodes.contract().id(), "respond") - .args_json(serde_json::json!({ - "request": request, - "response": response, - })) - .max_gas() - .transact_async() - .await?; - - tokio::time::sleep(Duration::from_secs(1)).await; - Ok(status) -} - pub async fn request_sign_non_random( ctx: &TestContext, account: Account, diff --git a/integration-tests/chain-signatures/tests/actions/sign.rs b/integration-tests/chain-signatures/tests/actions/sign.rs index a970f4df..00639285 100644 --- a/integration-tests/chain-signatures/tests/actions/sign.rs +++ b/integration-tests/chain-signatures/tests/actions/sign.rs @@ -1,13 +1,16 @@ use std::future::IntoFuture; use cait_sith::FullSignature; -use k256::Secp256k1; +use crypto_shared::{ + derive_epsilon, ScalarExt as _, SerializableAffinePoint, SerializableScalar, SignatureResponse, +}; +use k256::{Scalar, Secp256k1}; use mpc_contract::errors; -use mpc_contract::primitives::SignRequest; +use mpc_contract::primitives::{SignRequest, SignatureRequest}; use near_crypto::InMemorySigner; use near_fetch::ops::AsyncTransactionStatus; use near_workspaces::types::{Gas, NearToken}; -use near_workspaces::Account; +use near_workspaces::{Account, AccountId}; use rand::Rng; use crate::actions::{self, wait_for}; @@ -92,19 +95,19 @@ impl<'a> IntoFuture for SignAction<'a> { type IntoFuture = std::pin::Pin + Send + 'a>>; - fn into_future(self) -> Self::IntoFuture { + fn into_future(mut self) -> Self::IntoFuture { let Self { nodes, .. } = self; Box::pin(async move { let state = nodes.expect_running().await?; let account = self.account_or_new().await; - let (payload, payload_hash) = self.payload_or_random().await; + let payload = self.payload_or_random(); + let payload_hash = self.payload_hash(); let status = self.transact_async(&account, payload_hash).await?; // We have to use seperate transactions because one could fail. // This leads to a potential race condition where this transaction could get sent after the signature completes, but I think that's unlikely - let rogue_status = - actions::rogue_respond_(nodes, payload_hash, account.id(), "test").await?; + let (rogue, rogue_status) = self.rogue_respond(payload_hash, account.id()).await?; let err = wait_for::rogue_message_responded(rogue_status).await?; assert!(err.contains(&errors::RespondError::InvalidSignature.to_string())); @@ -125,6 +128,7 @@ impl<'a> IntoFuture for SignAction<'a> { Ok(SignResult { account, + rogue, signature, payload, payload_hash, @@ -143,13 +147,14 @@ impl SignAction<'_> { } } - async fn payload_or_random(&self) -> ([u8; 32], [u8; 32]) { - let payload = if let Some(payload) = &self.payload { - *payload - } else { - rand::thread_rng().gen() - }; - (payload, web3::signing::keccak256(&payload)) + fn payload_or_random(&mut self) -> [u8; 32] { + let payload = self.payload.unwrap_or_else(|| rand::thread_rng().gen()); + self.payload = Some(payload); + payload + } + + fn payload_hash(&mut self) -> [u8; 32] { + web3::signing::keccak256(&self.payload_or_random()) } async fn transact_async( @@ -180,10 +185,63 @@ impl SignAction<'_> { .await?; Ok(status) } + + async fn rogue_respond( + &self, + payload_hash: [u8; 32], + predecessor: &AccountId, + ) -> anyhow::Result<(Account, AsyncTransactionStatus)> { + let rogue = self.nodes.worker().dev_create_account().await?; + let signer = InMemorySigner { + account_id: rogue.id().clone(), + public_key: rogue.secret_key().public_key().clone().into(), + secret_key: rogue.secret_key().to_string().parse()?, + }; + let epsilon = derive_epsilon(predecessor, &self.path); + + let request = SignatureRequest { + payload_hash: Scalar::from_bytes(payload_hash).unwrap().into(), + epsilon: SerializableScalar { scalar: epsilon }, + }; + + let big_r = serde_json::from_value( + "02EC7FA686BB430A4B700BDA07F2E07D6333D9E33AEEF270334EB2D00D0A6FEC6C".into(), + )?; // Fake BigR + let s = serde_json::from_value( + "20F90C540EE00133C911EA2A9ADE2ABBCC7AD820687F75E011DFEEC94DB10CD6".into(), + )?; // Fake S + + let response = SignatureResponse { + big_r: SerializableAffinePoint { + affine_point: big_r, + }, + s: SerializableScalar { scalar: s }, + recovery_id: 0, + }; + + let status = self + .nodes + .rpc_client + .call(&signer, self.nodes.contract().id(), "respond") + .args_json(serde_json::json!({ + "request": request, + "response": response, + })) + .max_gas() + .transact_async() + .await?; + + Ok((rogue, status)) + } } pub struct SignResult { + /// The account that signed the payload. pub account: Account, + + /// Underlying rogue account that responded to the signature request. + pub rogue: Account, + pub payload: [u8; 32], pub payload_hash: [u8; 32], pub signature: FullSignature, From 7570a2df1441026b50259a308014b80f5f5323b0 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Thu, 5 Dec 2024 01:37:43 +0000 Subject: [PATCH 08/42] Reformat all test with new interface --- integration-tests/chain-signatures/src/lib.rs | 4 +- .../chain-signatures/src/local.rs | 14 + .../chain-signatures/tests/actions/mod.rs | 73 ++- .../chain-signatures/tests/actions/sign.rs | 57 +- .../tests/actions/wait_for.rs | 361 ++++++------ .../chain-signatures/tests/cases/mod.rs | 522 ++++++++---------- .../chain-signatures/tests/cases/nightly.rs | 65 +-- .../chain-signatures/tests/cluster/mod.rs | 24 +- .../chain-signatures/tests/cluster/spawner.rs | 109 +--- .../chain-signatures/tests/lib.rs | 47 +- 10 files changed, 544 insertions(+), 732 deletions(-) diff --git a/integration-tests/chain-signatures/src/lib.rs b/integration-tests/chain-signatures/src/lib.rs index 0b093b18..c72e043b 100644 --- a/integration-tests/chain-signatures/src/lib.rs +++ b/integration-tests/chain-signatures/src/lib.rs @@ -139,7 +139,7 @@ impl Nodes { }; // wait for the node to be removed from the network - tokio::time::sleep(std::time::Duration::from_secs(1)).await; + tokio::time::sleep(std::time::Duration::from_secs(3)).await; killed_node_config } @@ -151,7 +151,7 @@ impl Nodes { Nodes::Docker { ctx, nodes } => nodes.push(containers::Node::spawn(ctx, config).await?), } // wait for the node to be added to the network - tokio::time::sleep(std::time::Duration::from_secs(1)).await; + tokio::time::sleep(std::time::Duration::from_secs(3)).await; Ok(()) } diff --git a/integration-tests/chain-signatures/src/local.rs b/integration-tests/chain-signatures/src/local.rs index e49f7d35..2bb83dd7 100644 --- a/integration-tests/chain-signatures/src/local.rs +++ b/integration-tests/chain-signatures/src/local.rs @@ -1,3 +1,5 @@ +use std::fmt; + use crate::{execute, utils, MultichainConfig}; use crate::containers::LakeIndexer; @@ -35,6 +37,18 @@ pub struct NodeConfig { pub near_rpc: String, } +impl fmt::Debug for NodeConfig { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("NodeConfig") + .field("web_port", &self.web_port) + .field("account", &self.account) + .field("cipher_pk", &self.cipher_pk) + .field("cfg", &self.cfg) + .field("near_rpc", &self.near_rpc) + .finish() + } +} + impl Node { pub async fn dry_run( ctx: &super::Context, diff --git a/integration-tests/chain-signatures/tests/actions/mod.rs b/integration-tests/chain-signatures/tests/actions/mod.rs index 9adc6561..918b1bbf 100644 --- a/integration-tests/chain-signatures/tests/actions/mod.rs +++ b/integration-tests/chain-signatures/tests/actions/mod.rs @@ -2,7 +2,7 @@ pub mod sign; pub mod wait; pub mod wait_for; -use crate::TestContext; +use crate::cluster::Cluster; use cait_sith::FullSignature; use crypto_shared::ScalarExt; @@ -43,10 +43,9 @@ use k256::{ use serde_json::json; pub async fn request_sign( - ctx: &TestContext, + nodes: &Cluster, ) -> anyhow::Result<([u8; 32], [u8; 32], Account, AsyncTransactionStatus)> { - let worker = &ctx.nodes.ctx().worker; - let account = worker.dev_create_account().await?; + let account = nodes.worker().dev_create_account().await?; let payload: [u8; 32] = rand::thread_rng().gen(); let payload_hashed = web3::signing::keccak256(&payload); @@ -61,9 +60,9 @@ pub async fn request_sign( path: "test".to_string(), key_version: 0, }; - let status = ctx + let status = nodes .rpc_client - .call(&signer, ctx.contract().id(), "sign") + .call(&signer, nodes.contract().id(), "sign") .args_json(serde_json::json!({ "request": request, })) @@ -76,10 +75,9 @@ pub async fn request_sign( } pub async fn request_batch_random_sign( - ctx: &TestContext, + nodes: &Cluster, ) -> anyhow::Result<(Vec<([u8; 32], [u8; 32])>, Account, AsyncTransactionStatus)> { - let worker = &ctx.nodes.ctx().worker; - let account = worker.dev_create_account().await?; + let account = nodes.worker().dev_create_account().await?; let signer = InMemorySigner { account_id: account.id().clone(), public_key: account.secret_key().public_key().to_string().parse()?, @@ -87,7 +85,7 @@ pub async fn request_batch_random_sign( }; let mut payloads: Vec<([u8; 32], [u8; 32])> = vec![]; - let mut tx = ctx.rpc_client.batch(&signer, ctx.contract().id()); + let mut tx = nodes.rpc_client.batch(&signer, nodes.contract().id()); for _ in 0..3 { let payload: [u8; 32] = rand::thread_rng().gen(); let payload_hashed = web3::signing::keccak256(&payload); @@ -112,17 +110,16 @@ pub async fn request_batch_random_sign( } pub async fn request_batch_duplicate_sign( - ctx: &TestContext, + nodes: &Cluster, ) -> anyhow::Result<([u8; 32], u32, Account, AsyncTransactionStatus)> { - let worker = &ctx.nodes.ctx().worker; - let account = worker.dev_create_account().await?; + let account = nodes.worker().dev_create_account().await?; let signer = InMemorySigner { account_id: account.id().clone(), public_key: account.secret_key().public_key().to_string().parse()?, secret_key: account.secret_key().to_string().parse()?, }; - let mut tx = ctx.rpc_client.batch(&signer, ctx.contract().id()); + let mut tx = nodes.rpc_client.batch(&signer, nodes.contract().id()); let payload: [u8; 32] = rand::thread_rng().gen(); let payload_hashed = web3::signing::keccak256(&payload); let sign_call_cnt = 2; @@ -161,14 +158,14 @@ pub async fn assert_signature( // A normal signature, but we try to insert a bad response which fails and the signature is generated pub async fn single_signature_rogue_responder( - ctx: &TestContext, + nodes: &Cluster, state: &RunningContractState, ) -> anyhow::Result<()> { - let (_, payload_hash, account, status) = request_sign(ctx).await?; + let (_, payload_hash, account, status) = request_sign(nodes).await?; // We have to use seperate transactions because one could fail. // This leads to a potential race condition where this transaction could get sent after the signature completes, but I think that's unlikely - let rogue_status = rogue_respond(ctx, payload_hash, account.id(), "test").await?; + let rogue_status = rogue_respond(nodes, payload_hash, account.id(), "test").await?; let err = wait_for::rogue_message_responded(rogue_status).await?; assert!(err.contains(&errors::RespondError::InvalidSignature.to_string())); @@ -191,10 +188,10 @@ pub async fn single_signature_rogue_responder( } pub async fn single_signature_production( - ctx: &TestContext, + nodes: &Cluster, state: &RunningContractState, ) -> anyhow::Result<()> { - let (_, payload_hash, account, status) = request_sign(ctx).await?; + let (_, payload_hash, account, status) = request_sign(nodes).await?; let signature = wait_for::signature_responded(status).await?; let mut mpc_pk_bytes = vec![0x04]; @@ -205,13 +202,12 @@ pub async fn single_signature_production( } pub async fn rogue_respond( - ctx: &TestContext, + nodes: &Cluster, payload_hash: [u8; 32], predecessor: &near_workspaces::AccountId, path: &str, ) -> anyhow::Result { - let worker = &ctx.nodes.ctx().worker; - let account = worker.dev_create_account().await?; + let account = nodes.worker().dev_create_account().await?; let signer = InMemorySigner { account_id: account.id().clone(), @@ -240,9 +236,9 @@ pub async fn rogue_respond( recovery_id: 0, }; - let status = ctx + let status = nodes .rpc_client - .call(&signer, ctx.contract().id(), "respond") + .call(&signer, nodes.contract().id(), "respond") .args_json(serde_json::json!({ "request": request, "response": response, @@ -256,7 +252,7 @@ pub async fn rogue_respond( } pub async fn request_sign_non_random( - ctx: &TestContext, + nodes: &Cluster, account: Account, payload: [u8; 32], payload_hashed: [u8; 32], @@ -282,9 +278,9 @@ pub async fn request_sign_non_random( key_version: 0, }; - let status = ctx + let status = nodes .rpc_client - .call(&signer, ctx.contract().id(), "sign") + .call(&signer, nodes.contract().id(), "sign") .args_json(serde_json::json!({ "request": request, })) @@ -298,16 +294,16 @@ pub async fn request_sign_non_random( } pub async fn single_payload_signature_production( - ctx: &TestContext, + nodes: &Cluster, state: &RunningContractState, ) -> anyhow::Result<()> { - let (payload, payload_hash, account, status) = request_sign(ctx).await?; + let (payload, payload_hash, account, status) = request_sign(nodes).await?; let first_tx_result = wait_for::signature_responded(status).await; let signature = match first_tx_result { Ok(sig) => sig, Err(error) => { println!("single_payload_signature_production: first sign tx err out with {error:?}"); - wait_for::signature_payload_responded(ctx, account.clone(), payload, payload_hash) + wait_for::signature_payload_responded(nodes, account.clone(), payload, payload_hash) .await? } }; @@ -378,15 +374,13 @@ pub async fn clear_toxics() -> anyhow::Result<()> { Ok(()) } -pub async fn batch_random_signature_production( - ctx: &TestContext, - state: &RunningContractState, -) -> anyhow::Result<()> { - let (payloads, account, status) = request_batch_random_sign(ctx).await?; +pub async fn batch_random_signature_production(nodes: &Cluster) -> anyhow::Result<()> { + let (payloads, account, status) = request_batch_random_sign(nodes).await?; let signatures = wait_for::batch_signature_responded(status).await?; + let mpc_pk = nodes.root_public_key().await?; let mut mpc_pk_bytes = vec![0x04]; - mpc_pk_bytes.extend_from_slice(&state.public_key.as_bytes()[1..]); + mpc_pk_bytes.extend_from_slice(&mpc_pk.as_bytes()[1..]); assert_eq!(payloads.len(), signatures.len()); for i in 0..payloads.len() { let (_, payload_hash) = payloads.get(i).unwrap(); @@ -397,11 +391,8 @@ pub async fn batch_random_signature_production( Ok(()) } -pub async fn batch_duplicate_signature_production( - ctx: &TestContext, - _state: &RunningContractState, -) -> anyhow::Result<()> { - let (_, _, _, status) = request_batch_duplicate_sign(ctx).await?; +pub async fn batch_duplicate_signature_production(nodes: &Cluster) -> anyhow::Result<()> { + let (_, _, _, status) = request_batch_duplicate_sign(nodes).await?; let result = wait_for::batch_signature_responded(status).await; match result { Err(WaitForError::Signature(SignatureError::Failed(err_msg))) => { diff --git a/integration-tests/chain-signatures/tests/actions/sign.rs b/integration-tests/chain-signatures/tests/actions/sign.rs index 00639285..df6a0b47 100644 --- a/integration-tests/chain-signatures/tests/actions/sign.rs +++ b/integration-tests/chain-signatures/tests/actions/sign.rs @@ -19,6 +19,19 @@ use crate::cluster::Cluster; pub const SIGN_GAS: Gas = Gas::from_tgas(50); pub const SIGN_DEPOSIT: NearToken = NearToken::from_yoctonear(1); +pub struct SignOutcome { + /// The account that signed the payload. + pub account: Account, + + /// Underlying rogue account that responded to the signature request if we wanted + /// to test the rogue behavior. + pub rogue: Option, + + pub payload: [u8; 32], + pub payload_hash: [u8; 32], + pub signature: FullSignature, +} + pub struct SignAction<'a> { nodes: &'a Cluster, count: usize, @@ -28,6 +41,7 @@ pub struct SignAction<'a> { key_version: u32, gas: Gas, deposit: NearToken, + execute_rogue: bool, } impl<'a> SignAction<'a> { @@ -41,6 +55,7 @@ impl<'a> SignAction<'a> { key_version: 0, gas: SIGN_GAS, deposit: SIGN_DEPOSIT, + execute_rogue: false, } } } @@ -88,10 +103,15 @@ impl SignAction<'_> { self.deposit = deposit; self } + + pub fn rogue_responder(mut self) -> Self { + self.execute_rogue = true; + self + } } impl<'a> IntoFuture for SignAction<'a> { - type Output = anyhow::Result; + type Output = anyhow::Result; type IntoFuture = std::pin::Pin + Send + 'a>>; @@ -103,16 +123,23 @@ impl<'a> IntoFuture for SignAction<'a> { let account = self.account_or_new().await; let payload = self.payload_or_random(); let payload_hash = self.payload_hash(); - let status = self.transact_async(&account, payload_hash).await?; + let status = self.transact_sign(&account, payload_hash).await?; // We have to use seperate transactions because one could fail. // This leads to a potential race condition where this transaction could get sent after the signature completes, but I think that's unlikely - let (rogue, rogue_status) = self.rogue_respond(payload_hash, account.id()).await?; - let err = wait_for::rogue_message_responded(rogue_status).await?; + let rogue = if self.execute_rogue { + let (rogue, rogue_status) = self + .transact_rogue_respond(payload_hash, account.id()) + .await?; + let err = wait_for::rogue_message_responded(rogue_status).await?; + + assert!(err.contains(&errors::RespondError::InvalidSignature.to_string())); + Some(rogue) + } else { + None + }; - assert!(err.contains(&errors::RespondError::InvalidSignature.to_string())); let signature = wait_for::signature_responded(status).await?; - let mut mpc_pk_bytes = vec![0x04]; mpc_pk_bytes.extend_from_slice(&state.public_key.as_bytes()[1..]); @@ -126,7 +153,7 @@ impl<'a> IntoFuture for SignAction<'a> { // ); actions::assert_signature(account.id(), &mpc_pk_bytes, payload_hash, &signature).await; - Ok(SignResult { + Ok(SignOutcome { account, rogue, signature, @@ -157,7 +184,7 @@ impl SignAction<'_> { web3::signing::keccak256(&self.payload_or_random()) } - async fn transact_async( + async fn transact_sign( &self, account: &Account, payload_hashed: [u8; 32], @@ -186,7 +213,7 @@ impl SignAction<'_> { Ok(status) } - async fn rogue_respond( + async fn transact_rogue_respond( &self, payload_hash: [u8; 32], predecessor: &AccountId, @@ -234,15 +261,3 @@ impl SignAction<'_> { Ok((rogue, status)) } } - -pub struct SignResult { - /// The account that signed the payload. - pub account: Account, - - /// Underlying rogue account that responded to the signature request. - pub rogue: Account, - - pub payload: [u8; 32], - pub payload_hash: [u8; 32], - pub signature: FullSignature, -} diff --git a/integration-tests/chain-signatures/tests/actions/wait_for.rs b/integration-tests/chain-signatures/tests/actions/wait_for.rs index c897f39e..ba55fcd7 100644 --- a/integration-tests/chain-signatures/tests/actions/wait_for.rs +++ b/integration-tests/chain-signatures/tests/actions/wait_for.rs @@ -2,7 +2,7 @@ use std::task::Poll; use std::time::Duration; use crate::actions; -use crate::TestContext; +use crate::cluster::Cluster; use anyhow::Context; use backon::Retryable; @@ -24,13 +24,13 @@ use std::collections::HashMap; use url::Url; pub async fn running_mpc( - ctx: &TestContext, + nodes: &Cluster, epoch: Option, ) -> anyhow::Result { let is_running = || async { - let state: ProtocolContractState = ctx + let state: ProtocolContractState = nodes .rpc_client - .view(ctx.contract().id(), "state") + .view(nodes.contract().id(), "state") .await .map_err(|err| anyhow::anyhow!("could not view state {err:?}"))? .json()?; @@ -60,180 +60,180 @@ pub async fn running_mpc( .with_context(|| err_msg) } -pub async fn has_at_least_triples( - ctx: &TestContext, - expected_triple_count: usize, -) -> anyhow::Result> { - let is_enough_triples = |id| { - move || async move { - let state_view: StateView = ctx - .http_client - .get( - Url::parse(ctx.nodes.url(id)) - .unwrap() - .join("/state") - .unwrap(), - ) - .send() - .await? - .json() - .await?; - - tracing::debug!( - "has_at_least_triples state_view from {}: {:?}", - id, - state_view - ); - - match state_view { - StateView::Running { triple_count, .. } - if triple_count >= expected_triple_count => - { - Ok(state_view) - } - StateView::Running { .. } => anyhow::bail!("node does not have enough triples yet"), - state => anyhow::bail!("node is not running {state:?}"), - } - } - }; - - let mut state_views = Vec::new(); - for id in 0..ctx.nodes.len() { - let state_view = is_enough_triples(id) - .retry(&ExponentialBuilder::default().with_max_times(6)) - .await - .with_context(|| format!("mpc node '{id}' failed to generate '{expected_triple_count}' triples before deadline"))?; - state_views.push(state_view); - } - Ok(state_views) -} - -pub async fn has_at_least_mine_triples( - ctx: &TestContext, - expected_mine_triple_count: usize, -) -> anyhow::Result> { - let is_enough_mine_triples = |id| { - move || async move { - let state_view: StateView = ctx - .http_client - .get( - Url::parse(ctx.nodes.url(id)) - .unwrap() - .join("/state") - .unwrap(), - ) - .send() - .await? - .json() - .await?; - - match state_view { - StateView::Running { - triple_mine_count, .. - } if triple_mine_count >= expected_mine_triple_count => Ok(state_view), - StateView::Running { .. } => { - anyhow::bail!("node does not have enough mine triples yet") - } - state => anyhow::bail!("node is not running {state:?}"), - } - } - }; - - let mut state_views = Vec::new(); - for id in 0..ctx.nodes.len() { - let state_view = is_enough_mine_triples(id) - .retry(&ExponentialBuilder::default().with_max_times(15)) - .await - .with_context(|| format!("mpc node '{id}' failed to generate '{expected_mine_triple_count}' triples before deadline"))?; - state_views.push(state_view); - } - Ok(state_views) -} - -pub async fn has_at_least_presignatures( - ctx: &TestContext, - expected_presignature_count: usize, -) -> anyhow::Result> { - let is_enough_presignatures = |id| { - move || async move { - let state_view: StateView = ctx - .http_client - .get( - Url::parse(ctx.nodes.url(id)) - .unwrap() - .join("/state") - .unwrap(), - ) - .send() - .await? - .json() - .await?; - - match state_view { - StateView::Running { - presignature_count, .. - } if presignature_count >= expected_presignature_count => Ok(state_view), - StateView::Running { .. } => { - anyhow::bail!("node does not have enough presignatures yet") - } - state => anyhow::bail!("node is not running {state:?}"), - } - } - }; - - let mut state_views = Vec::new(); - for id in 0..ctx.nodes.len() { - let state_view = is_enough_presignatures(id) - .retry(&ExponentialBuilder::default().with_max_times(6)) - .await - .with_context(|| format!("mpc node '{id}' failed to generate '{expected_presignature_count}' presignatures before deadline"))?; - state_views.push(state_view); - } - Ok(state_views) -} - -pub async fn has_at_least_mine_presignatures( - ctx: &TestContext, - expected_mine_presignature_count: usize, -) -> anyhow::Result> { - let is_enough_mine_presignatures = |id| { - move || async move { - let state_view: StateView = ctx - .http_client - .get( - Url::parse(ctx.nodes.url(id)) - .unwrap() - .join("/state") - .unwrap(), - ) - .send() - .await? - .json() - .await?; - - match state_view { - StateView::Running { - presignature_mine_count, - .. - } if presignature_mine_count >= expected_mine_presignature_count => Ok(state_view), - StateView::Running { .. } => { - anyhow::bail!("node does not have enough mine presignatures yet") - } - state => anyhow::bail!("node is not running {state:?}"), - } - } - }; - - let mut state_views = Vec::new(); - for id in 0..ctx.nodes.len() { - let state_view = is_enough_mine_presignatures(id) - .retry(&ExponentialBuilder::default().with_max_times(6)) - .await - .with_context(|| format!("mpc node '{id}' failed to generate '{expected_mine_presignature_count}' presignatures before deadline"))?; - state_views.push(state_view); - } - Ok(state_views) -} +// pub async fn has_at_least_triples( +// ctx: &TestContext, +// expected_triple_count: usize, +// ) -> anyhow::Result> { +// let is_enough_triples = |id| { +// move || async move { +// let state_view: StateView = ctx +// .http_client +// .get( +// Url::parse(ctx.nodes.url(id)) +// .unwrap() +// .join("/state") +// .unwrap(), +// ) +// .send() +// .await? +// .json() +// .await?; + +// tracing::debug!( +// "has_at_least_triples state_view from {}: {:?}", +// id, +// state_view +// ); + +// match state_view { +// StateView::Running { triple_count, .. } +// if triple_count >= expected_triple_count => +// { +// Ok(state_view) +// } +// StateView::Running { .. } => anyhow::bail!("node does not have enough triples yet"), +// state => anyhow::bail!("node is not running {state:?}"), +// } +// } +// }; + +// let mut state_views = Vec::new(); +// for id in 0..ctx.nodes.len() { +// let state_view = is_enough_triples(id) +// .retry(&ExponentialBuilder::default().with_max_times(6)) +// .await +// .with_context(|| format!("mpc node '{id}' failed to generate '{expected_triple_count}' triples before deadline"))?; +// state_views.push(state_view); +// } +// Ok(state_views) +// } + +// pub async fn has_at_least_mine_triples( +// ctx: &TestContext, +// expected_mine_triple_count: usize, +// ) -> anyhow::Result> { +// let is_enough_mine_triples = |id| { +// move || async move { +// let state_view: StateView = ctx +// .http_client +// .get( +// Url::parse(ctx.nodes.url(id)) +// .unwrap() +// .join("/state") +// .unwrap(), +// ) +// .send() +// .await? +// .json() +// .await?; + +// match state_view { +// StateView::Running { +// triple_mine_count, .. +// } if triple_mine_count >= expected_mine_triple_count => Ok(state_view), +// StateView::Running { .. } => { +// anyhow::bail!("node does not have enough mine triples yet") +// } +// state => anyhow::bail!("node is not running {state:?}"), +// } +// } +// }; + +// let mut state_views = Vec::new(); +// for id in 0..ctx.nodes.len() { +// let state_view = is_enough_mine_triples(id) +// .retry(&ExponentialBuilder::default().with_max_times(15)) +// .await +// .with_context(|| format!("mpc node '{id}' failed to generate '{expected_mine_triple_count}' triples before deadline"))?; +// state_views.push(state_view); +// } +// Ok(state_views) +// } + +// pub async fn has_at_least_presignatures( +// ctx: &TestContext, +// expected_presignature_count: usize, +// ) -> anyhow::Result> { +// let is_enough_presignatures = |id| { +// move || async move { +// let state_view: StateView = ctx +// .http_client +// .get( +// Url::parse(ctx.nodes.url(id)) +// .unwrap() +// .join("/state") +// .unwrap(), +// ) +// .send() +// .await? +// .json() +// .await?; + +// match state_view { +// StateView::Running { +// presignature_count, .. +// } if presignature_count >= expected_presignature_count => Ok(state_view), +// StateView::Running { .. } => { +// anyhow::bail!("node does not have enough presignatures yet") +// } +// state => anyhow::bail!("node is not running {state:?}"), +// } +// } +// }; + +// let mut state_views = Vec::new(); +// for id in 0..ctx.nodes.len() { +// let state_view = is_enough_presignatures(id) +// .retry(&ExponentialBuilder::default().with_max_times(6)) +// .await +// .with_context(|| format!("mpc node '{id}' failed to generate '{expected_presignature_count}' presignatures before deadline"))?; +// state_views.push(state_view); +// } +// Ok(state_views) +// } + +// pub async fn has_at_least_mine_presignatures( +// ctx: &TestContext, +// expected_mine_presignature_count: usize, +// ) -> anyhow::Result> { +// let is_enough_mine_presignatures = |id| { +// move || async move { +// let state_view: StateView = ctx +// .http_client +// .get( +// Url::parse(ctx.nodes.url(id)) +// .unwrap() +// .join("/state") +// .unwrap(), +// ) +// .send() +// .await? +// .json() +// .await?; + +// match state_view { +// StateView::Running { +// presignature_mine_count, +// .. +// } if presignature_mine_count >= expected_mine_presignature_count => Ok(state_view), +// StateView::Running { .. } => { +// anyhow::bail!("node does not have enough mine presignatures yet") +// } +// state => anyhow::bail!("node is not running {state:?}"), +// } +// } +// }; + +// let mut state_views = Vec::new(); +// for id in 0..ctx.nodes.len() { +// let state_view = is_enough_mine_presignatures(id) +// .retry(&ExponentialBuilder::default().with_max_times(6)) +// .await +// .with_context(|| format!("mpc node '{id}' failed to generate '{expected_mine_presignature_count}' presignatures before deadline"))?; +// state_views.push(state_view); +// } +// Ok(state_views) +// } #[derive(Debug, thiserror::Error)] pub enum SignatureError { @@ -302,14 +302,15 @@ pub async fn signature_responded( } pub async fn signature_payload_responded( - ctx: &TestContext, + nodes: &Cluster, account: Account, payload: [u8; 32], payload_hashed: [u8; 32], ) -> Result, WaitForError> { let is_signature_ready = || async { let (_, _, _, status) = - actions::request_sign_non_random(ctx, account.clone(), payload, payload_hashed).await?; + actions::request_sign_non_random(nodes, account.clone(), payload, payload_hashed) + .await?; let result = signature_responded(status).await; if let Err(err) = &result { println!("failed to produce signature: {err:?}"); diff --git a/integration-tests/chain-signatures/tests/cases/mod.rs b/integration-tests/chain-signatures/tests/cases/mod.rs index 6c5b24a6..b8a2ee62 100644 --- a/integration-tests/chain-signatures/tests/cases/mod.rs +++ b/integration-tests/chain-signatures/tests/cases/mod.rs @@ -1,7 +1,7 @@ use std::str::FromStr; use crate::actions::{self, add_latency, wait_for}; -use crate::{cluster, with_multichain_nodes}; +use crate::cluster; use cait_sith::protocol::Participant; use cait_sith::triples::{TriplePub, TripleShare}; @@ -10,7 +10,6 @@ use crypto_shared::{self, derive_epsilon, derive_key, x_coordinate, ScalarExt}; use deadpool_redis::Runtime; use elliptic_curve::CurveArithmetic; use integration_tests_chain_signatures::containers::{self, DockerClient}; -use integration_tests_chain_signatures::MultichainConfig; use k256::elliptic_curve::point::AffineCoordinates; use k256::Secp256k1; use mpc_contract::config::Config; @@ -19,7 +18,7 @@ use mpc_node::kdf::into_eth_sig; use mpc_node::protocol::presignature::{Presignature, PresignatureId, PresignatureManager}; use mpc_node::protocol::triple::{Triple, TripleManager}; use mpc_node::storage; -use mpc_node::util::NearPublicKeyExt; +use mpc_node::util::NearPublicKeyExt as _; use near_account_id::AccountId; use test_log::test; use url::Url; @@ -28,179 +27,151 @@ pub mod nightly; #[test(tokio::test)] async fn test_multichain_reshare() -> anyhow::Result<()> { - let config = MultichainConfig::default(); - with_multichain_nodes(config.clone(), |mut ctx| { - Box::pin(async move { - let state = wait_for::running_mpc(&ctx, Some(0)).await?; - wait_for::has_at_least_triples(&ctx, 2).await?; - wait_for::has_at_least_presignatures(&ctx, 2).await?; - actions::single_signature_production(&ctx, &state).await?; - - tracing::info!("!!! Add participant 3"); - assert!(ctx.add_participant(None).await.is_ok()); - let state = wait_for::running_mpc(&ctx, None).await?; - wait_for::has_at_least_triples(&ctx, 2).await?; - wait_for::has_at_least_presignatures(&ctx, 2).await?; - actions::single_signature_production(&ctx, &state).await?; - - tracing::info!("!!! Remove participant 0 and participant 2"); - let account_2 = near_workspaces::types::AccountId::from_str( - state.participants.keys().nth(2).unwrap().clone().as_ref(), - ) - .unwrap(); - assert!(ctx.remove_participant(Some(&account_2)).await.is_ok()); - let account_0 = near_workspaces::types::AccountId::from_str( - state.participants.keys().next().unwrap().clone().as_ref(), - ) - .unwrap(); - let node_cfg_0 = ctx.remove_participant(Some(&account_0)).await; - assert!(node_cfg_0.is_ok()); - let node_cfg_0 = node_cfg_0.unwrap(); - let state = wait_for::running_mpc(&ctx, None).await?; - wait_for::has_at_least_triples(&ctx, 2).await?; - wait_for::has_at_least_presignatures(&ctx, 2).await?; - actions::single_signature_production(&ctx, &state).await?; - - tracing::info!("!!! Try remove participant 3, should fail due to threshold"); - assert!(ctx.remove_participant(None).await.is_err()); - - tracing::info!("!!! Add participant 5"); - assert!(ctx.add_participant(None).await.is_ok()); - let state = wait_for::running_mpc(&ctx, None).await?; - wait_for::has_at_least_triples(&ctx, 2).await?; - wait_for::has_at_least_presignatures(&ctx, 2).await?; - actions::single_signature_production(&ctx, &state).await?; - - tracing::info!("!!! Add back participant 0"); - assert!(ctx.add_participant(Some(node_cfg_0)).await.is_ok()); - let state = wait_for::running_mpc(&ctx, None).await?; - wait_for::has_at_least_triples(&ctx, 2).await?; - wait_for::has_at_least_presignatures(&ctx, 2).await?; - actions::single_signature_production(&ctx, &state).await - }) - }) - .await -} + let mut nodes = cluster::spawn().wait_for_running().await?; + nodes.wait().ready_to_sign().await?; + let _ = nodes.sign().await?; -#[test(tokio::test)] -async fn test_triples_and_presignatures() -> anyhow::Result<()> { - with_multichain_nodes(MultichainConfig::default(), |ctx| { - Box::pin(async move { - let state_0 = wait_for::running_mpc(&ctx, Some(0)).await?; - assert_eq!(state_0.participants.len(), 3); - wait_for::has_at_least_triples(&ctx, 2).await?; - wait_for::has_at_least_presignatures(&ctx, 2).await?; - Ok(()) - }) - }) - .await + tracing::info!("!!! Add participant 3"); + nodes.add_participant(None).await.unwrap(); + nodes.wait().running().ready_to_sign().await.unwrap(); + let _ = nodes.sign().await.unwrap(); + + let state = nodes.expect_running().await.unwrap(); + tracing::info!("!!! Remove participant 0 and participant 2"); + let account_2 = near_workspaces::types::AccountId::from_str( + state.participants.keys().nth(2).unwrap().clone().as_ref(), + ) + .unwrap(); + nodes.remove_participant(Some(&account_2)).await.unwrap(); + let account_0 = near_workspaces::types::AccountId::from_str( + state.participants.keys().next().unwrap().clone().as_ref(), + ) + .unwrap(); + let node_cfg_0 = nodes.remove_participant(Some(&account_0)).await.unwrap(); + nodes.wait().running().ready_to_sign().await.unwrap(); + let _ = nodes.sign().await.unwrap(); + + tracing::info!("!!! Try remove participant 3, should fail due to threshold"); + nodes.remove_participant(None).await.unwrap_err(); + + tracing::info!("!!! Add participant 5"); + nodes.add_participant(None).await.unwrap(); + nodes.wait().running().ready_to_sign().await.unwrap(); + let _ = nodes.sign().await.unwrap(); + + tracing::info!("!!! Add back participant 0"); + nodes.add_participant(Some(node_cfg_0)).await.unwrap(); + nodes.wait().running().ready_to_sign().await.unwrap(); + let _ = nodes.sign().await.unwrap(); + + Ok(()) } #[test(tokio::test)] async fn test_signature_basic() -> anyhow::Result<()> { let nodes = cluster::spawn().wait_for_running().await?; + nodes.wait().ready_to_sign().await?; + nodes.sign().await?; + Ok(()) +} + +#[test(tokio::test)] +async fn test_signature_rogue() -> anyhow::Result<()> { + let nodes = cluster::spawn().wait_for_running().await?; nodes.wait().ready_to_sign().await?; - let _ = nodes.sign().await?; + nodes.sign().rogue_responder().await?; Ok(()) } #[test(tokio::test)] async fn test_signature_offline_node() -> anyhow::Result<()> { - with_multichain_nodes(MultichainConfig::default(), |mut ctx| { - Box::pin(async move { - let state_0 = wait_for::running_mpc(&ctx, Some(0)).await?; - assert_eq!(state_0.participants.len(), 3); - wait_for::has_at_least_triples(&ctx, 6).await?; - wait_for::has_at_least_mine_triples(&ctx, 2).await?; - - // Kill the node then have presignatures and signature generation only use the active set of nodes - // to start generating presignatures and signatures. - let account_id = near_workspaces::types::AccountId::from_str( - state_0.participants.keys().last().unwrap().clone().as_ref(), - ) - .unwrap(); - ctx.nodes.kill_node(&account_id).await; - - // This could potentially fail and timeout the first time if the participant set picked up is the - // one with the offline node. This is expected behavior for now if a user submits a request in between - // a node going offline and the system hasn't detected it yet. - let presig_res = wait_for::has_at_least_mine_presignatures(&ctx, 1).await; - let sig_res = actions::single_signature_production(&ctx, &state_0).await; - - // Try again if the first attempt failed. This second portion should not be needed when the NEP - // comes in for resumeable MPC. - if presig_res.is_err() || sig_res.is_err() { - // Retry if the first attempt failed. - wait_for::has_at_least_mine_presignatures(&ctx, 1).await?; - actions::single_signature_production(&ctx, &state_0).await?; - } - - Ok(()) - }) - }) - .await + let mut nodes = cluster::spawn().wait_for_running().await?; + nodes.wait().ready_to_sign().await?; + let _ = nodes.sign().await?; + + // Kill the node then have presignatures and signature generation only use the active set of nodes + // to start generating presignatures and signatures. + let account_id: near_workspaces::types::AccountId = nodes + .participants() + .await? + .keys() + .last() + .unwrap() + .as_str() + .parse()?; + + nodes.remove_participant(Some(&account_id)).await.unwrap(); + + // This could potentially fail and timeout the first time if the participant set picked up is the + // one with the offline node. This is expected behavior for now if a user submits a request in between + // a node going offline and the system hasn't detected it yet. + nodes.wait().ready_to_sign().await.unwrap(); + let outcome = nodes.sign().await; + + // Try again if the first attempt failed. This second portion should not be needed when the NEP + // comes in for resumeable MPC. + if outcome.is_err() { + // Retry if the first attempt failed. + nodes.wait().ready_to_sign().await.unwrap(); + let _outcome = nodes.sign().await.unwrap(); + } + + Ok(()) } #[test(tokio::test)] async fn test_key_derivation() -> anyhow::Result<()> { - with_multichain_nodes(MultichainConfig::default(), |ctx| { - Box::pin(async move { - let state_0 = wait_for::running_mpc(&ctx, Some(0)).await?; - assert_eq!(state_0.participants.len(), 3); - wait_for::has_at_least_triples(&ctx, 6).await?; - wait_for::has_at_least_presignatures(&ctx, 3).await?; - - for _ in 0..3 { - let mpc_pk: k256::AffinePoint = state_0.public_key.clone().into_affine_point(); - let (_, payload_hashed, account, status) = actions::request_sign(&ctx).await?; - let sig = wait_for::signature_responded(status).await?; - - let hd_path = "test"; - let derivation_epsilon = derive_epsilon(account.id(), hd_path); - let user_pk = derive_key(mpc_pk, derivation_epsilon); - let multichain_sig = into_eth_sig( - &user_pk, - &sig.big_r, - &sig.s, - k256::Scalar::from_bytes(payload_hashed).unwrap(), - ) - .unwrap(); - - // start recovering the address and compare them: - let user_pk_x = x_coordinate(&user_pk); - let user_pk_y_parity = match user_pk.y_is_odd().unwrap_u8() { - 1 => secp256k1::Parity::Odd, - 0 => secp256k1::Parity::Even, - _ => unreachable!(), - }; - let user_pk_x = - secp256k1::XOnlyPublicKey::from_slice(&user_pk_x.to_bytes()).unwrap(); - let user_secp_pk = - secp256k1::PublicKey::from_x_only_public_key(user_pk_x, user_pk_y_parity); - let user_addr = actions::public_key_to_address(&user_secp_pk); - let r = x_coordinate(&multichain_sig.big_r.affine_point); - let s = multichain_sig.s; - let signature_for_recovery: [u8; 64] = { - let mut signature = [0u8; 64]; - signature[..32].copy_from_slice(&r.to_bytes()); - signature[32..].copy_from_slice(&s.scalar.to_bytes()); - signature - }; - let recovered_addr = web3::signing::recover( - &payload_hashed, - &signature_for_recovery, - multichain_sig.recovery_id as i32, - ) - .unwrap(); - assert_eq!(user_addr, recovered_addr); - } - - Ok(()) - }) - }) - .await + let nodes = cluster::spawn().wait_for_running().await?; + nodes.wait().ready_to_sign().await?; + let _ = nodes.sign().await?; + + let mpc_pk: k256::AffinePoint = nodes.root_public_key().await?.into_affine_point(); + for _ in 0..3 { + let (_, payload_hashed, account, status) = actions::request_sign(&nodes).await?; + let sig = wait_for::signature_responded(status).await?; + + let hd_path = "test"; + let derivation_epsilon = derive_epsilon(account.id(), hd_path); + let user_pk = derive_key(mpc_pk, derivation_epsilon); + let multichain_sig = into_eth_sig( + &user_pk, + &sig.big_r, + &sig.s, + k256::Scalar::from_bytes(payload_hashed).unwrap(), + ) + .unwrap(); + + // start recovering the address and compare them: + let user_pk_x = x_coordinate(&user_pk); + let user_pk_y_parity = match user_pk.y_is_odd().unwrap_u8() { + 1 => secp256k1::Parity::Odd, + 0 => secp256k1::Parity::Even, + _ => unreachable!(), + }; + let user_pk_x = secp256k1::XOnlyPublicKey::from_slice(&user_pk_x.to_bytes()).unwrap(); + let user_secp_pk = + secp256k1::PublicKey::from_x_only_public_key(user_pk_x, user_pk_y_parity); + let user_addr = actions::public_key_to_address(&user_secp_pk); + let r = x_coordinate(&multichain_sig.big_r.affine_point); + let s = multichain_sig.s; + let signature_for_recovery: [u8; 64] = { + let mut signature = [0u8; 64]; + signature[..32].copy_from_slice(&r.to_bytes()); + signature[32..].copy_from_slice(&s.scalar.to_bytes()); + signature + }; + let recovered_addr = web3::signing::recover( + &payload_hashed, + &signature_for_recovery, + multichain_sig.recovery_id as i32, + ) + .unwrap(); + assert_eq!(user_addr, recovered_addr); + } + + Ok(()) } #[test(tokio::test)] @@ -395,162 +366,123 @@ fn dummy_triple(id: u64) -> Triple { #[test(tokio::test)] async fn test_signature_offline_node_back_online() -> anyhow::Result<()> { - with_multichain_nodes(MultichainConfig::default(), |mut ctx| { - Box::pin(async move { - let state_0 = wait_for::running_mpc(&ctx, Some(0)).await?; - assert_eq!(state_0.participants.len(), 3); - wait_for::has_at_least_triples(&ctx, 6).await?; - wait_for::has_at_least_mine_triples(&ctx, 2).await?; - wait_for::has_at_least_mine_presignatures(&ctx, 1).await?; - - // Kill node 2 - let account_id = near_workspaces::types::AccountId::from_str( - state_0.participants.keys().last().unwrap().clone().as_ref(), - ) - .unwrap(); - let killed_node_config = ctx.nodes.kill_node(&account_id).await; - - tokio::time::sleep(std::time::Duration::from_secs(2)).await; - - // Start the killed node again - ctx.nodes.restart_node(killed_node_config).await?; - - tokio::time::sleep(std::time::Duration::from_secs(2)).await; - - wait_for::has_at_least_mine_triples(&ctx, 2).await?; - wait_for::has_at_least_mine_presignatures(&ctx, 1).await?; - // retry the same payload multiple times because we might pick many presignatures not present in node 2 repeatedly until yield/resume time out - actions::single_payload_signature_production(&ctx, &state_0).await?; - - Ok(()) - }) - }) - .await + let mut nodes = cluster::spawn().wait_for_running().await?; + nodes.wait().ready_to_sign().await?; + let _ = nodes.sign().await?; + + // Kill node 2 + let account_id: near_workspaces::types::AccountId = nodes + .participants() + .await? + .keys() + .last() + .unwrap() + .as_str() + .parse()?; + let killed = nodes.kill_node(&account_id).await; + + // Start the killed node again + nodes.restart_node(killed).await?; + + // Check that we can sign again + nodes.wait().ready_to_sign().await?; + let _ = nodes.sign().await?; + + Ok(()) } #[test(tokio::test)] async fn test_lake_congestion() -> anyhow::Result<()> { - with_multichain_nodes(MultichainConfig::default(), |ctx| { - Box::pin(async move { - // Currently, with a 10+-1 latency it cannot generate enough tripplets in time - // with a 5+-1 latency it fails to wait for signature response - add_latency(&ctx.nodes.proxy_name_for_node(0), true, 1.0, 2_000, 200).await?; - add_latency(&ctx.nodes.proxy_name_for_node(1), true, 1.0, 2_000, 200).await?; - add_latency(&ctx.nodes.proxy_name_for_node(2), true, 1.0, 2_000, 200).await?; - - // Also mock lake indexer in high load that it becomes slower to finish process - // sig req and write to s3 - // with a 1s latency it fails to wait for signature response in time - add_latency("lake-s3", false, 1.0, 100, 10).await?; - - let state_0 = wait_for::running_mpc(&ctx, Some(0)).await?; - assert_eq!(state_0.participants.len(), 3); - wait_for::has_at_least_triples(&ctx, 2).await?; - wait_for::has_at_least_presignatures(&ctx, 2).await?; - actions::single_signature_rogue_responder(&ctx, &state_0).await?; - Ok(()) - }) - }) - .await + let nodes = cluster::spawn().await?; + // Currently, with a 10+-1 latency it cannot generate enough tripplets in time + // with a 5+-1 latency it fails to wait for signature response + add_latency(&nodes.nodes.proxy_name_for_node(0), true, 1.0, 2_000, 200).await?; + add_latency(&nodes.nodes.proxy_name_for_node(1), true, 1.0, 2_000, 200).await?; + add_latency(&nodes.nodes.proxy_name_for_node(2), true, 1.0, 2_000, 200).await?; + + // Also mock lake indexer in high load that it becomes slower to finish process + // sig req and write to s3 + // with a 1s latency it fails to wait for signature response in time + add_latency("lake-s3", false, 1.0, 100, 10).await?; + + nodes.wait().running().ready_to_sign().await?; + nodes.sign().await.unwrap(); + + Ok(()) } #[test(tokio::test)] async fn test_multichain_reshare_with_lake_congestion() -> anyhow::Result<()> { - let config = MultichainConfig::default(); - with_multichain_nodes(config.clone(), |mut ctx| { - Box::pin(async move { - let state = wait_for::running_mpc(&ctx, Some(0)).await?; - assert!(state.threshold == 2); - assert!(state.participants.len() == 3); - - // add latency to node1->rpc, but not node0->rpc - add_latency(&ctx.nodes.proxy_name_for_node(1), true, 1.0, 1_000, 100).await?; - // remove node2, node0 and node1 should still reach concensus - // this fails if the latency above is too long (10s) - assert!(ctx.remove_participant(None).await.is_ok()); - let state = wait_for::running_mpc(&ctx, Some(0)).await?; - assert!(state.participants.len() == 2); - // Going below T should error out - assert!(ctx.remove_participant(None).await.is_err()); - let state = wait_for::running_mpc(&ctx, Some(0)).await?; - assert!(state.participants.len() == 2); - assert!(ctx.add_participant(None).await.is_ok()); - // add latency to node2->rpc - add_latency(&ctx.nodes.proxy_name_for_node(2), true, 1.0, 1_000, 100).await?; - let state = wait_for::running_mpc(&ctx, Some(0)).await?; - assert!(state.participants.len() == 3); - assert!(ctx.remove_participant(None).await.is_ok()); - let state = wait_for::running_mpc(&ctx, Some(0)).await?; - assert!(state.participants.len() == 2); - // make sure signing works after reshare - let new_state = wait_for::running_mpc(&ctx, None).await?; - wait_for::has_at_least_triples(&ctx, 2).await?; - wait_for::has_at_least_presignatures(&ctx, 2).await?; - actions::single_payload_signature_production(&ctx, &new_state).await - }) - }) - .await + let mut nodes = cluster::spawn().await?; + + // add latency to node1->rpc, but not node0->rpc + add_latency(&nodes.nodes.proxy_name_for_node(1), true, 1.0, 1_000, 100).await?; + // remove node2, node0 and node1 should still reach concensus + // this fails if the latency above is too long (10s) + nodes.remove_participant(None).await.unwrap(); + + nodes.wait().running().await?; + assert!(nodes.expect_running().await?.participants.len() == 2); + + // Going below T should error out + nodes.remove_participant(None).await.unwrap_err(); + nodes.wait().running().await?; + assert!(nodes.expect_running().await?.participants.len() == 2); + + nodes.add_participant(None).await.unwrap(); + // add latency to node2->rpc + add_latency(&nodes.nodes.proxy_name_for_node(2), true, 1.0, 1_000, 100).await?; + nodes.wait().running().await?; + assert!(nodes.expect_running().await?.participants.len() == 3); + + nodes.remove_participant(None).await.unwrap(); + nodes.wait().running().await?; + assert!(nodes.expect_running().await?.participants.len() == 2); + + // make sure signing works after reshare + nodes.wait().ready_to_sign().await?; + nodes.sign().await.unwrap(); + + Ok(()) } #[test(tokio::test)] async fn test_multichain_update_contract() -> anyhow::Result<()> { - let config = MultichainConfig::default(); - with_multichain_nodes(config.clone(), |ctx| { - Box::pin(async move { - // Get into running state and produce a singular signature. - let state = wait_for::running_mpc(&ctx, Some(0)).await?; - wait_for::has_at_least_mine_triples(&ctx, 2).await?; - wait_for::has_at_least_mine_presignatures(&ctx, 1).await?; - actions::single_payload_signature_production(&ctx, &state).await?; - - // Perform update to the contract and see that the nodes are still properly running and picking - // up the new contract by first upgrading the contract, then trying to generate a new signature. - let id = ctx.propose_update_contract_default().await; - ctx.vote_update(id).await; - tokio::time::sleep(std::time::Duration::from_secs(3)).await; - wait_for::has_at_least_mine_presignatures(&ctx, 1).await?; - actions::single_payload_signature_production(&ctx, &state).await?; - - // Now do a config update and see if that also updates the same: - let id = ctx - .propose_update(ProposeUpdateArgs { - code: None, - config: Some(Config::default()), - }) - .await; - ctx.vote_update(id).await; - tokio::time::sleep(std::time::Duration::from_secs(3)).await; - wait_for::has_at_least_mine_presignatures(&ctx, 1).await?; - actions::single_payload_signature_production(&ctx, &state).await?; - - Ok(()) + let nodes = cluster::spawn().wait_for_running().await?; + nodes.wait().ready_to_sign().await?; + nodes.sign().await.unwrap(); + + // Perform update to the contract and see that the nodes are still properly running and picking + // up the new contract by first upgrading the contract, then trying to generate a new signature. + let id = nodes.propose_update_contract_default().await; + nodes.vote_update(id).await; + nodes.wait().ready_to_sign().await?; + nodes.sign().await.unwrap(); + + // Now do a config update and see if that also updates the same: + let id = nodes + .propose_update(ProposeUpdateArgs { + code: None, + config: Some(Config::default()), }) - }) - .await + .await; + nodes.vote_update(id).await; + nodes.wait().ready_to_sign().await?; + nodes.sign().await.unwrap(); + + Ok(()) } #[test(tokio::test)] async fn test_batch_random_signature() -> anyhow::Result<()> { - with_multichain_nodes(MultichainConfig::default(), |ctx| { - Box::pin(async move { - let state_0 = wait_for::running_mpc(&ctx, Some(0)).await?; - assert_eq!(state_0.participants.len(), 3); - actions::batch_random_signature_production(&ctx, &state_0).await?; - Ok(()) - }) - }) - .await + let nodes = cluster::spawn().wait_for_running().await?; + actions::batch_random_signature_production(&nodes).await?; + Ok(()) } #[test(tokio::test)] async fn test_batch_duplicate_signature() -> anyhow::Result<()> { - with_multichain_nodes(MultichainConfig::default(), |ctx| { - Box::pin(async move { - let state_0 = wait_for::running_mpc(&ctx, Some(0)).await?; - assert_eq!(state_0.participants.len(), 3); - actions::batch_duplicate_signature_production(&ctx, &state_0).await?; - Ok(()) - }) - }) - .await + let nodes = cluster::spawn().wait_for_running().await?; + actions::batch_duplicate_signature_production(&nodes).await?; + Ok(()) } diff --git a/integration-tests/chain-signatures/tests/cases/nightly.rs b/integration-tests/chain-signatures/tests/cases/nightly.rs index 5d119729..3b1d6567 100644 --- a/integration-tests/chain-signatures/tests/cases/nightly.rs +++ b/integration-tests/chain-signatures/tests/cases/nightly.rs @@ -1,9 +1,6 @@ -use integration_tests_chain_signatures::MultichainConfig; -use mpc_contract::config::{ProtocolConfig, TripleConfig}; use test_log::test; -use crate::actions::{self, wait_for}; -use crate::with_multichain_nodes; +use crate::cluster; #[test(tokio::test)] #[ignore = "This is triggered by the nightly Github Actions pipeline"] @@ -14,43 +11,27 @@ async fn test_nightly_signature_production() -> anyhow::Result<()> { const MIN_TRIPLES: u32 = 10; const MAX_TRIPLES: u32 = 2 * NODES as u32 * MIN_TRIPLES; - let config = MultichainConfig { - nodes: NODES, - threshold: THRESHOLD, - protocol: ProtocolConfig { - triple: TripleConfig { - min_triples: MIN_TRIPLES, - max_triples: MAX_TRIPLES, - ..Default::default() - }, - ..Default::default() - }, - }; - - with_multichain_nodes(config, |ctx| { - Box::pin(async move { - let state_0 = wait_for::running_mpc(&ctx, Some(0)).await?; - assert_eq!(state_0.participants.len(), NODES); - - for i in 0..SIGNATURE_AMOUNT { - if let Err(err) = wait_for::has_at_least_mine_triples(&ctx, 4).await { - tracing::error!(?err, "Failed to wait for triples"); - continue; - } - - if let Err(err) = wait_for::has_at_least_mine_presignatures(&ctx, 2).await { - tracing::error!(?err, "Failed to wait for presignatures"); - continue; - } - - tracing::info!(at_signature = i, "Producing signature..."); - if let Err(err) = actions::single_signature_production(&ctx, &state_0).await { - tracing::error!(?err, "Failed to produce signature"); - } - } - - Ok(()) + let nodes = cluster::spawn() + .with_config(|config| { + config.nodes = NODES; + config.threshold = THRESHOLD; + config.protocol.triple.min_triples = MIN_TRIPLES; + config.protocol.triple.max_triples = MAX_TRIPLES; }) - }) - .await + .wait_for_running() + .await?; + + for i in 0..SIGNATURE_AMOUNT { + if let Err(err) = nodes.wait().ready_to_sign().await { + tracing::error!(?err, "Failed to be ready to sign"); + continue; + } + + tracing::info!(at_signature = i, "Producing signature..."); + if let Err(err) = nodes.sign().await { + tracing::error!(?err, "Failed to produce signature"); + } + } + + Ok(()) } diff --git a/integration-tests/chain-signatures/tests/cluster/mod.rs b/integration-tests/chain-signatures/tests/cluster/mod.rs index 45fc66ce..c5a82def 100644 --- a/integration-tests/chain-signatures/tests/cluster/mod.rs +++ b/integration-tests/chain-signatures/tests/cluster/mod.rs @@ -1,5 +1,7 @@ mod spawner; +use integration_tests_chain_signatures::local::NodeConfig; +use mpc_contract::primitives::Participants; use near_workspaces::network::Sandbox; use spawner::ClusterSpawner; @@ -9,7 +11,7 @@ use mpc_node::web::StateView; use anyhow::Context; use integration_tests_chain_signatures::containers::DockerClient; use integration_tests_chain_signatures::{utils, MultichainConfig, Nodes}; -use near_workspaces::{Contract, Worker}; +use near_workspaces::{AccountId, Contract, Worker}; use url::Url; use crate::actions::sign::SignAction; @@ -31,7 +33,7 @@ pub struct Cluster { pub docker_client: DockerClient, pub rpc_client: near_fetch::Client, http_client: reqwest::Client, - nodes: Nodes, + pub(crate) nodes: Nodes, } impl Cluster { @@ -88,6 +90,24 @@ impl Cluster { anyhow::bail!("expected running state, got {:?}", state) } } + + pub async fn participants(&self) -> anyhow::Result { + let state = self.expect_running().await?; + Ok(state.participants) + } + + pub async fn root_public_key(&self) -> anyhow::Result { + let state: RunningContractState = self.expect_running().await?; + Ok(state.public_key) + } + + pub async fn kill_node(&mut self, account_id: &AccountId) -> NodeConfig { + self.nodes.kill_node(account_id).await + } + + pub async fn restart_node(&mut self, config: NodeConfig) -> anyhow::Result<()> { + self.nodes.restart_node(config).await + } } impl Drop for Cluster { diff --git a/integration-tests/chain-signatures/tests/cluster/spawner.rs b/integration-tests/chain-signatures/tests/cluster/spawner.rs index ba52e60b..b7010d71 100644 --- a/integration-tests/chain-signatures/tests/cluster/spawner.rs +++ b/integration-tests/chain-signatures/tests/cluster/spawner.rs @@ -2,7 +2,6 @@ use anyhow::Context; use backon::{ExponentialBuilder, Retryable}; use mpc_contract::config::ProtocolConfig; use mpc_contract::{ProtocolContractState, RunningContractState}; -use mpc_node::web::StateView; use std::future::{Future, IntoFuture}; @@ -65,116 +64,10 @@ impl IntoFuture for ClusterSpawner { }; if self.wait_for_running { - running_mpc(&cluster, Some(0)).await?; + cluster.wait().running().await?; } Ok(cluster) }) } } - -pub async fn running_mpc( - nodes: &Cluster, - epoch: Option, -) -> anyhow::Result { - let is_running = || async { - match nodes.contract_state().await? { - ProtocolContractState::Running(running) => match epoch { - None => Ok(running), - Some(expected_epoch) if running.epoch >= expected_epoch => Ok(running), - Some(_) => { - anyhow::bail!("running with an older epoch: {}", running.epoch) - } - }, - _ => anyhow::bail!("not running"), - } - }; - let err_msg = format!( - "mpc did not reach {} in time", - if epoch.is_some() { - "expected epoch" - } else { - "running state" - } - ); - is_running - .retry(&ExponentialBuilder::default().with_max_times(6)) - .await - .with_context(|| err_msg) -} - -pub async fn require_mine_presignatures( - nodes: &Cluster, - expected: usize, -) -> anyhow::Result> { - let is_enough = || async { - let state_views = nodes.fetch_states().await?; - let enough = state_views - .iter() - .filter(|state| match state { - StateView::Running { - presignature_mine_count, - .. - } => *presignature_mine_count >= expected, - _ => { - tracing::warn!("state=NotRunning while checking mine presignatures"); - false - } - }) - .count(); - if enough >= nodes.len() { - Ok(state_views) - } else { - anyhow::bail!("not enough nodes with mine presignatures") - } - }; - - let state_views = is_enough - .retry(&ExponentialBuilder::default().with_max_times(15)) - .await - .with_context(|| { - format!( - "mpc nodes failed to generate {} presignatures before deadline", - expected - ) - })?; - - Ok(state_views) -} - -pub async fn require_mine_triples( - nodes: &Cluster, - expected: usize, -) -> anyhow::Result> { - let is_enough = || async { - let state_views = nodes.fetch_states().await?; - let enough = state_views - .iter() - .filter(|state| match state { - StateView::Running { - triple_mine_count, .. - } => *triple_mine_count >= expected, - _ => { - tracing::warn!("state=NotRunning while checking mine triples"); - false - } - }) - .count(); - if enough >= nodes.len() { - Ok(state_views) - } else { - anyhow::bail!("not enough nodes with mine triples") - } - }; - let state_views = is_enough - .retry(&ExponentialBuilder::default().with_max_times(12)) - .await - .with_context(|| { - format!( - "mpc nodes failed to generate {} triples before deadline", - expected - ) - })?; - - Ok(state_views) -} diff --git a/integration-tests/chain-signatures/tests/lib.rs b/integration-tests/chain-signatures/tests/lib.rs index 0311276e..f213a4e4 100644 --- a/integration-tests/chain-signatures/tests/lib.rs +++ b/integration-tests/chain-signatures/tests/lib.rs @@ -3,15 +3,13 @@ mod cases; pub mod cluster; use crate::actions::wait_for; +use cluster::Cluster; use mpc_contract::update::{ProposeUpdateArgs, UpdateId}; -use futures::future::BoxFuture; -use integration_tests_chain_signatures::containers::DockerClient; use integration_tests_chain_signatures::utils::{vote_join, vote_leave}; -use integration_tests_chain_signatures::{run, utils, MultichainConfig, Nodes}; use near_workspaces::types::NearToken; -use near_workspaces::{Account, AccountId, Contract}; +use near_workspaces::{Account, AccountId}; use integration_tests_chain_signatures::local::NodeConfig; use std::collections::HashSet; @@ -20,18 +18,7 @@ const CURRENT_CONTRACT_DEPLOY_DEPOSIT: NearToken = NearToken::from_millinear(900 const CURRENT_CONTRACT_FILE_PATH: &str = "../../target/wasm32-unknown-unknown/release/mpc_contract.wasm"; -pub struct TestContext { - nodes: Nodes, - rpc_client: near_fetch::Client, - http_client: reqwest::Client, - cfg: MultichainConfig, -} - -impl TestContext { - pub fn contract(&self) -> &Contract { - self.nodes.contract() - } - +impl Cluster { pub async fn participant_accounts(&self) -> anyhow::Result> { let state = wait_for::running_mpc(self, None).await?; let participant_ids = state.participants.keys().collect::>(); @@ -54,7 +41,7 @@ impl TestContext { node.account } None => { - let account = self.nodes.ctx().worker.dev_create_account().await?; + let account = self.worker().dev_create_account().await?; tracing::info!(node_account_id = %account.id(), "adding new participant"); account } @@ -182,29 +169,7 @@ impl TestContext { success >= self.cfg.threshold, "did not successfully vote for update" ); - } -} -pub async fn with_multichain_nodes(cfg: MultichainConfig, f: F) -> anyhow::Result<()> -where - F: FnOnce(TestContext) -> BoxFuture<'static, anyhow::Result<()>>, -{ - let docker_client = DockerClient::default(); - let nodes = run(cfg.clone(), &docker_client).await?; - - let sk_local_path = nodes.ctx().storage_options.sk_share_local_path.clone(); - - let connector = near_jsonrpc_client::JsonRpcClient::new_client(); - let jsonrpc_client = connector.connect(&nodes.ctx().lake_indexer.rpc_host_address); - let rpc_client = near_fetch::Client::from_client(jsonrpc_client); - let result = f(TestContext { - nodes, - rpc_client, - http_client: reqwest::Client::default(), - cfg, - }) - .await; - utils::clear_local_sk_shares(sk_local_path).await?; - - result + tokio::time::sleep(std::time::Duration::from_secs(3)).await; + } } From 5d08a93e19ba4b3cd790ddef371a08e955a6fd69 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Thu, 5 Dec 2024 02:05:14 +0000 Subject: [PATCH 09/42] No more need for wait_for::running_mpc --- .../chain-signatures/tests/actions/sign.rs | 4 +- .../chain-signatures/tests/actions/wait.rs | 88 ++++--- .../tests/actions/wait_for.rs | 214 ------------------ .../chain-signatures/tests/cases/mod.rs | 19 +- .../chain-signatures/tests/cluster/mod.rs | 18 +- .../chain-signatures/tests/lib.rs | 20 +- 6 files changed, 88 insertions(+), 275 deletions(-) diff --git a/integration-tests/chain-signatures/tests/actions/sign.rs b/integration-tests/chain-signatures/tests/actions/sign.rs index df6a0b47..dd3093cf 100644 --- a/integration-tests/chain-signatures/tests/actions/sign.rs +++ b/integration-tests/chain-signatures/tests/actions/sign.rs @@ -116,10 +116,8 @@ impl<'a> IntoFuture for SignAction<'a> { std::pin::Pin + Send + 'a>>; fn into_future(mut self) -> Self::IntoFuture { - let Self { nodes, .. } = self; - Box::pin(async move { - let state = nodes.expect_running().await?; + let state = self.nodes.expect_running().await?; let account = self.account_or_new().await; let payload = self.payload_or_random(); let payload_hash = self.payload_hash(); diff --git a/integration-tests/chain-signatures/tests/actions/wait.rs b/integration-tests/chain-signatures/tests/actions/wait.rs index f384618e..58eaf661 100644 --- a/integration-tests/chain-signatures/tests/actions/wait.rs +++ b/integration-tests/chain-signatures/tests/actions/wait.rs @@ -7,8 +7,10 @@ use mpc_node::web::StateView; use crate::cluster::Cluster; +type Epoch = u64; + enum WaitActions { - Running, + Running(Epoch), MinTriples(usize), MinMineTriples(usize), MinPresignatures(usize), @@ -16,24 +18,34 @@ enum WaitActions { ReadyToSign(usize), } -pub struct WaitAction<'a> { +pub struct WaitAction<'a, R> { nodes: &'a Cluster, actions: Vec, + _phantom: std::marker::PhantomData, } -impl<'a> WaitAction<'a> { +impl<'a> WaitAction<'a, ()> { pub fn new(nodes: &'a Cluster) -> Self { Self { nodes, actions: Vec::new(), + _phantom: std::marker::PhantomData, } } } -impl WaitAction<'_> { - pub fn running(mut self) -> Self { - self.actions.push(WaitActions::Running); - self +impl<'a, R> WaitAction<'a, R> { + pub fn running(self) -> WaitAction<'a, RunningContractState> { + self.running_on_epoch(0) + } + + pub fn running_on_epoch(mut self, epoch: Epoch) -> WaitAction<'a, RunningContractState> { + self.actions.push(WaitActions::Running(epoch)); + WaitAction { + nodes: self.nodes, + actions: self.actions, + _phantom: std::marker::PhantomData, + } } pub fn min_triples(mut self, min_triples: usize) -> Self { @@ -68,42 +80,56 @@ impl WaitAction<'_> { self.actions.push(WaitActions::ReadyToSign(count)); self } + + async fn execute(self) -> anyhow::Result<&'a Cluster> { + for action in self.actions { + match action { + WaitActions::Running(epoch) => { + running_mpc(self.nodes, Some(epoch)).await?; + } + WaitActions::MinTriples(expected) => { + require_triples(self.nodes, expected, false).await?; + } + WaitActions::MinMineTriples(expected) => { + require_triples(self.nodes, expected, true).await?; + } + WaitActions::MinPresignatures(expected) => { + require_presignatures(self.nodes, expected, false).await?; + } + WaitActions::MinMinePresignatures(expected) => { + require_presignatures(self.nodes, expected, true).await?; + } + WaitActions::ReadyToSign(count) => { + require_presignatures(self.nodes, count, true).await?; + } + } + } + + Ok(self.nodes) + } } -impl<'a> IntoFuture for WaitAction<'a> { +impl<'a> IntoFuture for WaitAction<'a, ()> { type Output = anyhow::Result<()>; type IntoFuture = std::pin::Pin + Send + 'a>>; fn into_future(self) -> Self::IntoFuture { Box::pin(async move { - for action in self.actions { - match action { - WaitActions::Running => { - running_mpc(self.nodes, None).await?; - } - WaitActions::MinTriples(expected) => { - require_triples(self.nodes, expected, false).await?; - } - WaitActions::MinMineTriples(expected) => { - require_triples(self.nodes, expected, true).await?; - } - WaitActions::MinPresignatures(expected) => { - require_presignatures(self.nodes, expected, false).await?; - } - WaitActions::MinMinePresignatures(expected) => { - require_presignatures(self.nodes, expected, true).await?; - } - WaitActions::ReadyToSign(count) => { - require_presignatures(self.nodes, count, true).await?; - } - } - } - + self.execute().await?; Ok(()) }) } } +impl<'a> IntoFuture for WaitAction<'a, RunningContractState> { + type Output = anyhow::Result; + type IntoFuture = std::pin::Pin + Send + 'a>>; + + fn into_future(self) -> Self::IntoFuture { + Box::pin(async move { self.execute().await?.expect_running().await }) + } +} + pub async fn running_mpc( nodes: &Cluster, epoch: Option, diff --git a/integration-tests/chain-signatures/tests/actions/wait_for.rs b/integration-tests/chain-signatures/tests/actions/wait_for.rs index ba55fcd7..38b0ca9c 100644 --- a/integration-tests/chain-signatures/tests/actions/wait_for.rs +++ b/integration-tests/chain-signatures/tests/actions/wait_for.rs @@ -12,7 +12,6 @@ use crypto_shared::SignatureResponse; use k256::Secp256k1; use mpc_contract::ProtocolContractState; use mpc_contract::RunningContractState; -use mpc_node::web::StateView; use near_fetch::ops::AsyncTransactionStatus; use near_lake_primitives::CryptoHash; use near_primitives::errors::ActionErrorKind; @@ -21,219 +20,6 @@ use near_primitives::views::ExecutionStatusView; use near_primitives::views::FinalExecutionStatus; use near_workspaces::Account; use std::collections::HashMap; -use url::Url; - -pub async fn running_mpc( - nodes: &Cluster, - epoch: Option, -) -> anyhow::Result { - let is_running = || async { - let state: ProtocolContractState = nodes - .rpc_client - .view(nodes.contract().id(), "state") - .await - .map_err(|err| anyhow::anyhow!("could not view state {err:?}"))? - .json()?; - - match state { - ProtocolContractState::Running(running) => match epoch { - None => Ok(running), - Some(expected_epoch) if running.epoch >= expected_epoch => Ok(running), - Some(_) => { - anyhow::bail!("running with an older epoch: {}", running.epoch) - } - }, - _ => anyhow::bail!("not running"), - } - }; - let err_msg = format!( - "mpc did not reach {} in time", - if epoch.is_some() { - "expected epoch" - } else { - "running state" - } - ); - is_running - .retry(&ExponentialBuilder::default().with_max_times(6)) - .await - .with_context(|| err_msg) -} - -// pub async fn has_at_least_triples( -// ctx: &TestContext, -// expected_triple_count: usize, -// ) -> anyhow::Result> { -// let is_enough_triples = |id| { -// move || async move { -// let state_view: StateView = ctx -// .http_client -// .get( -// Url::parse(ctx.nodes.url(id)) -// .unwrap() -// .join("/state") -// .unwrap(), -// ) -// .send() -// .await? -// .json() -// .await?; - -// tracing::debug!( -// "has_at_least_triples state_view from {}: {:?}", -// id, -// state_view -// ); - -// match state_view { -// StateView::Running { triple_count, .. } -// if triple_count >= expected_triple_count => -// { -// Ok(state_view) -// } -// StateView::Running { .. } => anyhow::bail!("node does not have enough triples yet"), -// state => anyhow::bail!("node is not running {state:?}"), -// } -// } -// }; - -// let mut state_views = Vec::new(); -// for id in 0..ctx.nodes.len() { -// let state_view = is_enough_triples(id) -// .retry(&ExponentialBuilder::default().with_max_times(6)) -// .await -// .with_context(|| format!("mpc node '{id}' failed to generate '{expected_triple_count}' triples before deadline"))?; -// state_views.push(state_view); -// } -// Ok(state_views) -// } - -// pub async fn has_at_least_mine_triples( -// ctx: &TestContext, -// expected_mine_triple_count: usize, -// ) -> anyhow::Result> { -// let is_enough_mine_triples = |id| { -// move || async move { -// let state_view: StateView = ctx -// .http_client -// .get( -// Url::parse(ctx.nodes.url(id)) -// .unwrap() -// .join("/state") -// .unwrap(), -// ) -// .send() -// .await? -// .json() -// .await?; - -// match state_view { -// StateView::Running { -// triple_mine_count, .. -// } if triple_mine_count >= expected_mine_triple_count => Ok(state_view), -// StateView::Running { .. } => { -// anyhow::bail!("node does not have enough mine triples yet") -// } -// state => anyhow::bail!("node is not running {state:?}"), -// } -// } -// }; - -// let mut state_views = Vec::new(); -// for id in 0..ctx.nodes.len() { -// let state_view = is_enough_mine_triples(id) -// .retry(&ExponentialBuilder::default().with_max_times(15)) -// .await -// .with_context(|| format!("mpc node '{id}' failed to generate '{expected_mine_triple_count}' triples before deadline"))?; -// state_views.push(state_view); -// } -// Ok(state_views) -// } - -// pub async fn has_at_least_presignatures( -// ctx: &TestContext, -// expected_presignature_count: usize, -// ) -> anyhow::Result> { -// let is_enough_presignatures = |id| { -// move || async move { -// let state_view: StateView = ctx -// .http_client -// .get( -// Url::parse(ctx.nodes.url(id)) -// .unwrap() -// .join("/state") -// .unwrap(), -// ) -// .send() -// .await? -// .json() -// .await?; - -// match state_view { -// StateView::Running { -// presignature_count, .. -// } if presignature_count >= expected_presignature_count => Ok(state_view), -// StateView::Running { .. } => { -// anyhow::bail!("node does not have enough presignatures yet") -// } -// state => anyhow::bail!("node is not running {state:?}"), -// } -// } -// }; - -// let mut state_views = Vec::new(); -// for id in 0..ctx.nodes.len() { -// let state_view = is_enough_presignatures(id) -// .retry(&ExponentialBuilder::default().with_max_times(6)) -// .await -// .with_context(|| format!("mpc node '{id}' failed to generate '{expected_presignature_count}' presignatures before deadline"))?; -// state_views.push(state_view); -// } -// Ok(state_views) -// } - -// pub async fn has_at_least_mine_presignatures( -// ctx: &TestContext, -// expected_mine_presignature_count: usize, -// ) -> anyhow::Result> { -// let is_enough_mine_presignatures = |id| { -// move || async move { -// let state_view: StateView = ctx -// .http_client -// .get( -// Url::parse(ctx.nodes.url(id)) -// .unwrap() -// .join("/state") -// .unwrap(), -// ) -// .send() -// .await? -// .json() -// .await?; - -// match state_view { -// StateView::Running { -// presignature_mine_count, -// .. -// } if presignature_mine_count >= expected_mine_presignature_count => Ok(state_view), -// StateView::Running { .. } => { -// anyhow::bail!("node does not have enough mine presignatures yet") -// } -// state => anyhow::bail!("node is not running {state:?}"), -// } -// } -// }; - -// let mut state_views = Vec::new(); -// for id in 0..ctx.nodes.len() { -// let state_view = is_enough_mine_presignatures(id) -// .retry(&ExponentialBuilder::default().with_max_times(6)) -// .await -// .with_context(|| format!("mpc node '{id}' failed to generate '{expected_mine_presignature_count}' presignatures before deadline"))?; -// state_views.push(state_view); -// } -// Ok(state_views) -// } #[derive(Debug, thiserror::Error)] pub enum SignatureError { diff --git a/integration-tests/chain-signatures/tests/cases/mod.rs b/integration-tests/chain-signatures/tests/cases/mod.rs index b8a2ee62..030ebdf1 100644 --- a/integration-tests/chain-signatures/tests/cases/mod.rs +++ b/integration-tests/chain-signatures/tests/cases/mod.rs @@ -33,10 +33,9 @@ async fn test_multichain_reshare() -> anyhow::Result<()> { tracing::info!("!!! Add participant 3"); nodes.add_participant(None).await.unwrap(); - nodes.wait().running().ready_to_sign().await.unwrap(); + let state = nodes.wait().running().ready_to_sign().await.unwrap(); let _ = nodes.sign().await.unwrap(); - let state = nodes.expect_running().await.unwrap(); tracing::info!("!!! Remove participant 0 and participant 2"); let account_2 = near_workspaces::types::AccountId::from_str( state.participants.keys().nth(2).unwrap().clone().as_ref(), @@ -421,23 +420,23 @@ async fn test_multichain_reshare_with_lake_congestion() -> anyhow::Result<()> { // this fails if the latency above is too long (10s) nodes.remove_participant(None).await.unwrap(); - nodes.wait().running().await?; - assert!(nodes.expect_running().await?.participants.len() == 2); + let state = nodes.wait().running().await?; + assert!(state.participants.len() == 2); // Going below T should error out nodes.remove_participant(None).await.unwrap_err(); - nodes.wait().running().await?; - assert!(nodes.expect_running().await?.participants.len() == 2); + let state = nodes.wait().running().await?; + assert!(state.participants.len() == 2); nodes.add_participant(None).await.unwrap(); // add latency to node2->rpc add_latency(&nodes.nodes.proxy_name_for_node(2), true, 1.0, 1_000, 100).await?; - nodes.wait().running().await?; - assert!(nodes.expect_running().await?.participants.len() == 3); + let state = nodes.wait().running().await?; + assert!(state.participants.len() == 3); nodes.remove_participant(None).await.unwrap(); - nodes.wait().running().await?; - assert!(nodes.expect_running().await?.participants.len() == 2); + let state = nodes.wait().running().await?; + assert!(state.participants.len() == 2); // make sure signing works after reshare nodes.wait().ready_to_sign().await?; diff --git a/integration-tests/chain-signatures/tests/cluster/mod.rs b/integration-tests/chain-signatures/tests/cluster/mod.rs index c5a82def..dc9baae0 100644 --- a/integration-tests/chain-signatures/tests/cluster/mod.rs +++ b/integration-tests/chain-signatures/tests/cluster/mod.rs @@ -1,5 +1,7 @@ mod spawner; +use std::collections::HashSet; + use integration_tests_chain_signatures::local::NodeConfig; use mpc_contract::primitives::Participants; use near_workspaces::network::Sandbox; @@ -11,7 +13,7 @@ use mpc_node::web::StateView; use anyhow::Context; use integration_tests_chain_signatures::containers::DockerClient; use integration_tests_chain_signatures::{utils, MultichainConfig, Nodes}; -use near_workspaces::{AccountId, Contract, Worker}; +use near_workspaces::{Account, AccountId, Contract, Worker}; use url::Url; use crate::actions::sign::SignAction; @@ -56,7 +58,7 @@ impl Cluster { futures::future::try_join_all(tasks).await } - pub fn wait(&self) -> WaitAction<'_> { + pub fn wait(&self) -> WaitAction<'_, ()> { WaitAction::new(self) } @@ -96,6 +98,18 @@ impl Cluster { Ok(state.participants) } + pub async fn participant_ids(&self) -> anyhow::Result> { + let participants = self.participants().await?; + Ok(participants.keys().cloned().collect()) + } + + pub async fn participant_accounts(&self) -> anyhow::Result> { + let participant_ids = self.participant_ids().await?; + let mut node_accounts = self.nodes.near_accounts(); + node_accounts.retain(|a| participant_ids.contains(a.id())); + Ok(node_accounts) + } + pub async fn root_public_key(&self) -> anyhow::Result { let state: RunningContractState = self.expect_running().await?; Ok(state.public_key) diff --git a/integration-tests/chain-signatures/tests/lib.rs b/integration-tests/chain-signatures/tests/lib.rs index f213a4e4..f4591ad9 100644 --- a/integration-tests/chain-signatures/tests/lib.rs +++ b/integration-tests/chain-signatures/tests/lib.rs @@ -2,36 +2,26 @@ mod actions; mod cases; pub mod cluster; -use crate::actions::wait_for; use cluster::Cluster; use mpc_contract::update::{ProposeUpdateArgs, UpdateId}; use integration_tests_chain_signatures::utils::{vote_join, vote_leave}; use near_workspaces::types::NearToken; -use near_workspaces::{Account, AccountId}; +use near_workspaces::AccountId; use integration_tests_chain_signatures::local::NodeConfig; -use std::collections::HashSet; const CURRENT_CONTRACT_DEPLOY_DEPOSIT: NearToken = NearToken::from_millinear(9000); const CURRENT_CONTRACT_FILE_PATH: &str = "../../target/wasm32-unknown-unknown/release/mpc_contract.wasm"; impl Cluster { - pub async fn participant_accounts(&self) -> anyhow::Result> { - let state = wait_for::running_mpc(self, None).await?; - let participant_ids = state.participants.keys().collect::>(); - let mut node_accounts = self.nodes.near_accounts(); - node_accounts.retain(|a| participant_ids.contains(a.id())); - Ok(node_accounts) - } - pub async fn add_participant( &mut self, existing_node: Option, ) -> anyhow::Result<()> { - let state = wait_for::running_mpc(self, None).await?; + let state = self.expect_running().await?; let node_account = match existing_node { Some(node) => { tracing::info!( @@ -66,7 +56,7 @@ impl Cluster { .await .is_ok()); - let new_state = wait_for::running_mpc(self, Some(state.epoch + 1)).await?; + let new_state = self.wait().running_on_epoch(state.epoch + 1).await?; assert_eq!(new_state.participants.len(), state.participants.len() + 1); assert_eq!( state.public_key, new_state.public_key, @@ -80,7 +70,7 @@ impl Cluster { &mut self, kick: Option<&AccountId>, ) -> anyhow::Result { - let state = wait_for::running_mpc(self, None).await?; + let state = self.expect_running().await?; let participant_accounts = self.participant_accounts().await?; let kick = kick .unwrap_or_else(|| participant_accounts.last().unwrap().id()) @@ -103,7 +93,7 @@ impl Cluster { anyhow::bail!("failed to vote_leave"); } - let new_state = wait_for::running_mpc(self, Some(state.epoch + 1)).await?; + let new_state = self.wait().running_on_epoch(state.epoch + 1).await?; tracing::info!( "Getting new state, old {} {:?}, new {} {:?}", state.participants.len(), From d26d5b760a0580b3a22e787597bab9e4b36f2c71 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Thu, 5 Dec 2024 02:18:01 +0000 Subject: [PATCH 10/42] Rename to signable --- .../chain-signatures/tests/actions/wait.rs | 4 +-- .../chain-signatures/tests/cases/mod.rs | 36 +++++++++---------- .../chain-signatures/tests/cases/nightly.rs | 2 +- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/integration-tests/chain-signatures/tests/actions/wait.rs b/integration-tests/chain-signatures/tests/actions/wait.rs index 58eaf661..31441c36 100644 --- a/integration-tests/chain-signatures/tests/actions/wait.rs +++ b/integration-tests/chain-signatures/tests/actions/wait.rs @@ -71,12 +71,12 @@ impl<'a, R> WaitAction<'a, R> { self } - pub fn ready_to_sign(mut self) -> Self { + pub fn signable(mut self) -> Self { self.actions.push(WaitActions::ReadyToSign(1)); self } - pub fn ready_to_sign_many(mut self, count: usize) -> Self { + pub fn signable_many(mut self, count: usize) -> Self { self.actions.push(WaitActions::ReadyToSign(count)); self } diff --git a/integration-tests/chain-signatures/tests/cases/mod.rs b/integration-tests/chain-signatures/tests/cases/mod.rs index 030ebdf1..96e5da90 100644 --- a/integration-tests/chain-signatures/tests/cases/mod.rs +++ b/integration-tests/chain-signatures/tests/cases/mod.rs @@ -28,12 +28,12 @@ pub mod nightly; #[test(tokio::test)] async fn test_multichain_reshare() -> anyhow::Result<()> { let mut nodes = cluster::spawn().wait_for_running().await?; - nodes.wait().ready_to_sign().await?; + nodes.wait().signable().await?; let _ = nodes.sign().await?; tracing::info!("!!! Add participant 3"); nodes.add_participant(None).await.unwrap(); - let state = nodes.wait().running().ready_to_sign().await.unwrap(); + let state = nodes.wait().running().signable().await.unwrap(); let _ = nodes.sign().await.unwrap(); tracing::info!("!!! Remove participant 0 and participant 2"); @@ -47,7 +47,7 @@ async fn test_multichain_reshare() -> anyhow::Result<()> { ) .unwrap(); let node_cfg_0 = nodes.remove_participant(Some(&account_0)).await.unwrap(); - nodes.wait().running().ready_to_sign().await.unwrap(); + nodes.wait().running().signable().await.unwrap(); let _ = nodes.sign().await.unwrap(); tracing::info!("!!! Try remove participant 3, should fail due to threshold"); @@ -55,12 +55,12 @@ async fn test_multichain_reshare() -> anyhow::Result<()> { tracing::info!("!!! Add participant 5"); nodes.add_participant(None).await.unwrap(); - nodes.wait().running().ready_to_sign().await.unwrap(); + nodes.wait().running().signable().await.unwrap(); let _ = nodes.sign().await.unwrap(); tracing::info!("!!! Add back participant 0"); nodes.add_participant(Some(node_cfg_0)).await.unwrap(); - nodes.wait().running().ready_to_sign().await.unwrap(); + nodes.wait().running().signable().await.unwrap(); let _ = nodes.sign().await.unwrap(); Ok(()) @@ -69,7 +69,7 @@ async fn test_multichain_reshare() -> anyhow::Result<()> { #[test(tokio::test)] async fn test_signature_basic() -> anyhow::Result<()> { let nodes = cluster::spawn().wait_for_running().await?; - nodes.wait().ready_to_sign().await?; + nodes.wait().signable().await?; nodes.sign().await?; Ok(()) @@ -78,7 +78,7 @@ async fn test_signature_basic() -> anyhow::Result<()> { #[test(tokio::test)] async fn test_signature_rogue() -> anyhow::Result<()> { let nodes = cluster::spawn().wait_for_running().await?; - nodes.wait().ready_to_sign().await?; + nodes.wait().signable().await?; nodes.sign().rogue_responder().await?; Ok(()) @@ -87,7 +87,7 @@ async fn test_signature_rogue() -> anyhow::Result<()> { #[test(tokio::test)] async fn test_signature_offline_node() -> anyhow::Result<()> { let mut nodes = cluster::spawn().wait_for_running().await?; - nodes.wait().ready_to_sign().await?; + nodes.wait().signable().await?; let _ = nodes.sign().await?; // Kill the node then have presignatures and signature generation only use the active set of nodes @@ -106,14 +106,14 @@ async fn test_signature_offline_node() -> anyhow::Result<()> { // This could potentially fail and timeout the first time if the participant set picked up is the // one with the offline node. This is expected behavior for now if a user submits a request in between // a node going offline and the system hasn't detected it yet. - nodes.wait().ready_to_sign().await.unwrap(); + nodes.wait().signable().await.unwrap(); let outcome = nodes.sign().await; // Try again if the first attempt failed. This second portion should not be needed when the NEP // comes in for resumeable MPC. if outcome.is_err() { // Retry if the first attempt failed. - nodes.wait().ready_to_sign().await.unwrap(); + nodes.wait().signable().await.unwrap(); let _outcome = nodes.sign().await.unwrap(); } @@ -123,7 +123,7 @@ async fn test_signature_offline_node() -> anyhow::Result<()> { #[test(tokio::test)] async fn test_key_derivation() -> anyhow::Result<()> { let nodes = cluster::spawn().wait_for_running().await?; - nodes.wait().ready_to_sign().await?; + nodes.wait().signable().await?; let _ = nodes.sign().await?; let mpc_pk: k256::AffinePoint = nodes.root_public_key().await?.into_affine_point(); @@ -366,7 +366,7 @@ fn dummy_triple(id: u64) -> Triple { #[test(tokio::test)] async fn test_signature_offline_node_back_online() -> anyhow::Result<()> { let mut nodes = cluster::spawn().wait_for_running().await?; - nodes.wait().ready_to_sign().await?; + nodes.wait().signable().await?; let _ = nodes.sign().await?; // Kill node 2 @@ -384,7 +384,7 @@ async fn test_signature_offline_node_back_online() -> anyhow::Result<()> { nodes.restart_node(killed).await?; // Check that we can sign again - nodes.wait().ready_to_sign().await?; + nodes.wait().signable().await?; let _ = nodes.sign().await?; Ok(()) @@ -404,7 +404,7 @@ async fn test_lake_congestion() -> anyhow::Result<()> { // with a 1s latency it fails to wait for signature response in time add_latency("lake-s3", false, 1.0, 100, 10).await?; - nodes.wait().running().ready_to_sign().await?; + nodes.wait().running().signable().await?; nodes.sign().await.unwrap(); Ok(()) @@ -439,7 +439,7 @@ async fn test_multichain_reshare_with_lake_congestion() -> anyhow::Result<()> { assert!(state.participants.len() == 2); // make sure signing works after reshare - nodes.wait().ready_to_sign().await?; + nodes.wait().signable().await?; nodes.sign().await.unwrap(); Ok(()) @@ -448,14 +448,14 @@ async fn test_multichain_reshare_with_lake_congestion() -> anyhow::Result<()> { #[test(tokio::test)] async fn test_multichain_update_contract() -> anyhow::Result<()> { let nodes = cluster::spawn().wait_for_running().await?; - nodes.wait().ready_to_sign().await?; + nodes.wait().signable().await?; nodes.sign().await.unwrap(); // Perform update to the contract and see that the nodes are still properly running and picking // up the new contract by first upgrading the contract, then trying to generate a new signature. let id = nodes.propose_update_contract_default().await; nodes.vote_update(id).await; - nodes.wait().ready_to_sign().await?; + nodes.wait().signable().await?; nodes.sign().await.unwrap(); // Now do a config update and see if that also updates the same: @@ -466,7 +466,7 @@ async fn test_multichain_update_contract() -> anyhow::Result<()> { }) .await; nodes.vote_update(id).await; - nodes.wait().ready_to_sign().await?; + nodes.wait().signable().await?; nodes.sign().await.unwrap(); Ok(()) diff --git a/integration-tests/chain-signatures/tests/cases/nightly.rs b/integration-tests/chain-signatures/tests/cases/nightly.rs index 3b1d6567..d8db8430 100644 --- a/integration-tests/chain-signatures/tests/cases/nightly.rs +++ b/integration-tests/chain-signatures/tests/cases/nightly.rs @@ -22,7 +22,7 @@ async fn test_nightly_signature_production() -> anyhow::Result<()> { .await?; for i in 0..SIGNATURE_AMOUNT { - if let Err(err) = nodes.wait().ready_to_sign().await { + if let Err(err) = nodes.wait().signable().await { tracing::error!(?err, "Failed to be ready to sign"); continue; } From 87f9ee5a5d535a97d3f3c14a702ddddc67bf4a4a Mon Sep 17 00:00:00 2001 From: Phuong N Date: Thu, 5 Dec 2024 02:30:02 +0000 Subject: [PATCH 11/42] Remove unnecessary functions --- .../chain-signatures/tests/actions/mod.rs | 170 +----------------- .../tests/actions/wait_for.rs | 31 +--- .../chain-signatures/tests/cases/mod.rs | 18 +- .../chain-signatures/tests/cluster/spawner.rs | 3 - 4 files changed, 10 insertions(+), 212 deletions(-) diff --git a/integration-tests/chain-signatures/tests/actions/mod.rs b/integration-tests/chain-signatures/tests/actions/mod.rs index 918b1bbf..bb0d4fd1 100644 --- a/integration-tests/chain-signatures/tests/actions/mod.rs +++ b/integration-tests/chain-signatures/tests/actions/mod.rs @@ -6,8 +6,7 @@ use crate::cluster::Cluster; use cait_sith::FullSignature; use crypto_shared::ScalarExt; -use crypto_shared::SerializableAffinePoint; -use crypto_shared::{derive_epsilon, derive_key, SerializableScalar, SignatureResponse}; +use crypto_shared::{derive_epsilon, derive_key}; use elliptic_curve::sec1::ToEncodedPoint; use k256::ecdsa::VerifyingKey; use k256::elliptic_curve::ops::{Invert, Reduce}; @@ -15,11 +14,8 @@ use k256::elliptic_curve::point::AffineCoordinates; use k256::elliptic_curve::sec1::FromEncodedPoint; use k256::elliptic_curve::ProjectivePoint; use k256::{AffinePoint, EncodedPoint, Scalar, Secp256k1}; -use mpc_contract::errors; use mpc_contract::errors::SignError; use mpc_contract::primitives::SignRequest; -use mpc_contract::primitives::SignatureRequest; -use mpc_contract::RunningContractState; use mpc_node::kdf::into_eth_sig; use near_crypto::InMemorySigner; use near_fetch::ops::AsyncTransactionStatus; @@ -156,170 +152,6 @@ pub async fn assert_signature( assert!(signature.verify(&user_pk, &Scalar::from_bytes(payload).unwrap(),)); } -// A normal signature, but we try to insert a bad response which fails and the signature is generated -pub async fn single_signature_rogue_responder( - nodes: &Cluster, - state: &RunningContractState, -) -> anyhow::Result<()> { - let (_, payload_hash, account, status) = request_sign(nodes).await?; - - // We have to use seperate transactions because one could fail. - // This leads to a potential race condition where this transaction could get sent after the signature completes, but I think that's unlikely - let rogue_status = rogue_respond(nodes, payload_hash, account.id(), "test").await?; - let err = wait_for::rogue_message_responded(rogue_status).await?; - - assert!(err.contains(&errors::RespondError::InvalidSignature.to_string())); - let signature = wait_for::signature_responded(status).await?; - - let mut mpc_pk_bytes = vec![0x04]; - mpc_pk_bytes.extend_from_slice(&state.public_key.as_bytes()[1..]); - - // Useful for populating the "signatures_havent_changed" test's hardcoded values - // dbg!( - // hex::encode(signature.big_r.to_encoded_point(true).to_bytes()), - // hex::encode(signature.s.to_bytes()), - // hex::encode(&mpc_pk_bytes), - // hex::encode(&payload_hash), - // account.id(), - // ); - assert_signature(account.id(), &mpc_pk_bytes, payload_hash, &signature).await; - - Ok(()) -} - -pub async fn single_signature_production( - nodes: &Cluster, - state: &RunningContractState, -) -> anyhow::Result<()> { - let (_, payload_hash, account, status) = request_sign(nodes).await?; - let signature = wait_for::signature_responded(status).await?; - - let mut mpc_pk_bytes = vec![0x04]; - mpc_pk_bytes.extend_from_slice(&state.public_key.as_bytes()[1..]); - assert_signature(account.id(), &mpc_pk_bytes, payload_hash, &signature).await; - - Ok(()) -} - -pub async fn rogue_respond( - nodes: &Cluster, - payload_hash: [u8; 32], - predecessor: &near_workspaces::AccountId, - path: &str, -) -> anyhow::Result { - let account = nodes.worker().dev_create_account().await?; - - let signer = InMemorySigner { - account_id: account.id().clone(), - public_key: account.secret_key().public_key().clone().into(), - secret_key: account.secret_key().to_string().parse()?, - }; - let epsilon = derive_epsilon(predecessor, path); - - let request = SignatureRequest { - payload_hash: Scalar::from_bytes(payload_hash).unwrap().into(), - epsilon: SerializableScalar { scalar: epsilon }, - }; - - let big_r = serde_json::from_value( - "02EC7FA686BB430A4B700BDA07F2E07D6333D9E33AEEF270334EB2D00D0A6FEC6C".into(), - )?; // Fake BigR - let s = serde_json::from_value( - "20F90C540EE00133C911EA2A9ADE2ABBCC7AD820687F75E011DFEEC94DB10CD6".into(), - )?; // Fake S - - let response = SignatureResponse { - big_r: SerializableAffinePoint { - affine_point: big_r, - }, - s: SerializableScalar { scalar: s }, - recovery_id: 0, - }; - - let status = nodes - .rpc_client - .call(&signer, nodes.contract().id(), "respond") - .args_json(serde_json::json!({ - "request": request, - "response": response, - })) - .max_gas() - .transact_async() - .await?; - - tokio::time::sleep(Duration::from_secs(1)).await; - Ok(status) -} - -pub async fn request_sign_non_random( - nodes: &Cluster, - account: Account, - payload: [u8; 32], - payload_hashed: [u8; 32], -) -> Result<([u8; 32], [u8; 32], Account, AsyncTransactionStatus), WaitForError> { - let signer = InMemorySigner { - account_id: account.id().clone(), - public_key: account - .secret_key() - .public_key() - .to_string() - .parse() - .map_err(|_| WaitForError::Parsing)?, - secret_key: account - .secret_key() - .to_string() - .parse() - .map_err(|_| WaitForError::Parsing)?, - }; - - let request = SignRequest { - payload: payload_hashed, - path: "test".to_string(), - key_version: 0, - }; - - let status = nodes - .rpc_client - .call(&signer, nodes.contract().id(), "sign") - .args_json(serde_json::json!({ - "request": request, - })) - .gas(Gas::from_tgas(50)) - .deposit(NearToken::from_yoctonear(1)) - .transact_async() - .await - .map_err(|error| WaitForError::JsonRpc(format!("{error:?}")))?; - tokio::time::sleep(Duration::from_secs(1)).await; - Ok((payload, payload_hashed, account, status)) -} - -pub async fn single_payload_signature_production( - nodes: &Cluster, - state: &RunningContractState, -) -> anyhow::Result<()> { - let (payload, payload_hash, account, status) = request_sign(nodes).await?; - let first_tx_result = wait_for::signature_responded(status).await; - let signature = match first_tx_result { - Ok(sig) => sig, - Err(error) => { - println!("single_payload_signature_production: first sign tx err out with {error:?}"); - wait_for::signature_payload_responded(nodes, account.clone(), payload, payload_hash) - .await? - } - }; - let mut mpc_pk_bytes = vec![0x04]; - mpc_pk_bytes.extend_from_slice(&state.public_key.as_bytes()[1..]); - assert_signature( - account.clone().id(), - &mpc_pk_bytes, - payload_hash, - &signature, - ) - .await; - - Ok(()) -} - // add one of toxic to the toxiproxy-server to make indexer rpc slow down, congested, or unstable // available toxics and params: https://github.com/Shopify/toxiproxy?tab=readme-ov-file#toxic-fields pub async fn add_toxic(proxy: &str, host: bool, toxic: serde_json::Value) -> anyhow::Result<()> { diff --git a/integration-tests/chain-signatures/tests/actions/wait_for.rs b/integration-tests/chain-signatures/tests/actions/wait_for.rs index 38b0ca9c..7b6ed1fc 100644 --- a/integration-tests/chain-signatures/tests/actions/wait_for.rs +++ b/integration-tests/chain-signatures/tests/actions/wait_for.rs @@ -1,24 +1,18 @@ use std::task::Poll; use std::time::Duration; -use crate::actions; -use crate::cluster::Cluster; - use anyhow::Context; +use backon::ConstantBuilder; use backon::Retryable; -use backon::{ConstantBuilder, ExponentialBuilder}; use cait_sith::FullSignature; use crypto_shared::SignatureResponse; use k256::Secp256k1; -use mpc_contract::ProtocolContractState; -use mpc_contract::RunningContractState; use near_fetch::ops::AsyncTransactionStatus; use near_lake_primitives::CryptoHash; use near_primitives::errors::ActionErrorKind; use near_primitives::views::ExecutionOutcomeWithIdView; use near_primitives::views::ExecutionStatusView; use near_primitives::views::FinalExecutionStatus; -use near_workspaces::Account; use std::collections::HashMap; #[derive(Debug, thiserror::Error)] @@ -37,8 +31,6 @@ pub enum WaitForError { Signature(SignatureError), #[error("Serde json error: {0}")] SerdeJson(String), - #[error("Parsing error")] - Parsing, } /// Used locally for testing to circumvent retrying on all errors. This will avoid retrying @@ -87,27 +79,6 @@ pub async fn signature_responded( } } -pub async fn signature_payload_responded( - nodes: &Cluster, - account: Account, - payload: [u8; 32], - payload_hashed: [u8; 32], -) -> Result, WaitForError> { - let is_signature_ready = || async { - let (_, _, _, status) = - actions::request_sign_non_random(nodes, account.clone(), payload, payload_hashed) - .await?; - let result = signature_responded(status).await; - if let Err(err) = &result { - println!("failed to produce signature: {err:?}"); - } - result - }; - - let strategy = ConstantBuilder::default().with_max_times(3); - is_signature_ready.retry(&strategy).await -} - // Check that the rogue message failed pub async fn rogue_message_responded(status: AsyncTransactionStatus) -> anyhow::Result { let is_tx_ready = || async { diff --git a/integration-tests/chain-signatures/tests/cases/mod.rs b/integration-tests/chain-signatures/tests/cases/mod.rs index 96e5da90..93c9943a 100644 --- a/integration-tests/chain-signatures/tests/cases/mod.rs +++ b/integration-tests/chain-signatures/tests/cases/mod.rs @@ -123,22 +123,20 @@ async fn test_signature_offline_node() -> anyhow::Result<()> { #[test(tokio::test)] async fn test_key_derivation() -> anyhow::Result<()> { let nodes = cluster::spawn().wait_for_running().await?; - nodes.wait().signable().await?; - let _ = nodes.sign().await?; + let hd_path = "test"; let mpc_pk: k256::AffinePoint = nodes.root_public_key().await?.into_affine_point(); for _ in 0..3 { - let (_, payload_hashed, account, status) = actions::request_sign(&nodes).await?; - let sig = wait_for::signature_responded(status).await?; + nodes.wait().signable().await?; + let outcome = nodes.sign().path(hd_path).await?; - let hd_path = "test"; - let derivation_epsilon = derive_epsilon(account.id(), hd_path); + let derivation_epsilon = derive_epsilon(outcome.account.id(), hd_path); let user_pk = derive_key(mpc_pk, derivation_epsilon); let multichain_sig = into_eth_sig( &user_pk, - &sig.big_r, - &sig.s, - k256::Scalar::from_bytes(payload_hashed).unwrap(), + &outcome.signature.big_r, + &outcome.signature.s, + k256::Scalar::from_bytes(outcome.payload_hash).unwrap(), ) .unwrap(); @@ -162,7 +160,7 @@ async fn test_key_derivation() -> anyhow::Result<()> { signature }; let recovered_addr = web3::signing::recover( - &payload_hashed, + &outcome.payload_hash, &signature_for_recovery, multichain_sig.recovery_id as i32, ) diff --git a/integration-tests/chain-signatures/tests/cluster/spawner.rs b/integration-tests/chain-signatures/tests/cluster/spawner.rs index b7010d71..9fb1ba6c 100644 --- a/integration-tests/chain-signatures/tests/cluster/spawner.rs +++ b/integration-tests/chain-signatures/tests/cluster/spawner.rs @@ -1,7 +1,4 @@ -use anyhow::Context; -use backon::{ExponentialBuilder, Retryable}; use mpc_contract::config::ProtocolConfig; -use mpc_contract::{ProtocolContractState, RunningContractState}; use std::future::{Future, IntoFuture}; From bf4295668472ecaafa664a1922d1dec910de3354 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Thu, 5 Dec 2024 02:30:28 +0000 Subject: [PATCH 12/42] signable rename --- integration-tests/chain-signatures/tests/actions/wait.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/integration-tests/chain-signatures/tests/actions/wait.rs b/integration-tests/chain-signatures/tests/actions/wait.rs index 31441c36..1110a766 100644 --- a/integration-tests/chain-signatures/tests/actions/wait.rs +++ b/integration-tests/chain-signatures/tests/actions/wait.rs @@ -15,7 +15,7 @@ enum WaitActions { MinMineTriples(usize), MinPresignatures(usize), MinMinePresignatures(usize), - ReadyToSign(usize), + Signable(usize), } pub struct WaitAction<'a, R> { @@ -72,12 +72,12 @@ impl<'a, R> WaitAction<'a, R> { } pub fn signable(mut self) -> Self { - self.actions.push(WaitActions::ReadyToSign(1)); + self.actions.push(WaitActions::Signable(1)); self } pub fn signable_many(mut self, count: usize) -> Self { - self.actions.push(WaitActions::ReadyToSign(count)); + self.actions.push(WaitActions::Signable(count)); self } @@ -99,7 +99,7 @@ impl<'a, R> WaitAction<'a, R> { WaitActions::MinMinePresignatures(expected) => { require_presignatures(self.nodes, expected, true).await?; } - WaitActions::ReadyToSign(count) => { + WaitActions::Signable(count) => { require_presignatures(self.nodes, count, true).await?; } } From b2fee59eed1fbbbac4be224497315998fefe17bc Mon Sep 17 00:00:00 2001 From: Phuong N Date: Thu, 5 Dec 2024 02:33:34 +0000 Subject: [PATCH 13/42] Remove unnecssary functions --- .../chain-signatures/tests/actions/mod.rs | 32 ------------------- .../chain-signatures/tests/cases/mod.rs | 2 +- .../chain-signatures/tests/cluster/mod.rs | 6 +++- 3 files changed, 6 insertions(+), 34 deletions(-) diff --git a/integration-tests/chain-signatures/tests/actions/mod.rs b/integration-tests/chain-signatures/tests/actions/mod.rs index bb0d4fd1..f1007b70 100644 --- a/integration-tests/chain-signatures/tests/actions/mod.rs +++ b/integration-tests/chain-signatures/tests/actions/mod.rs @@ -38,38 +38,6 @@ use k256::{ }; use serde_json::json; -pub async fn request_sign( - nodes: &Cluster, -) -> anyhow::Result<([u8; 32], [u8; 32], Account, AsyncTransactionStatus)> { - let account = nodes.worker().dev_create_account().await?; - let payload: [u8; 32] = rand::thread_rng().gen(); - let payload_hashed = web3::signing::keccak256(&payload); - - let signer = InMemorySigner { - account_id: account.id().clone(), - public_key: account.secret_key().public_key().to_string().parse()?, - secret_key: account.secret_key().to_string().parse()?, - }; - - let request = SignRequest { - payload: payload_hashed, - path: "test".to_string(), - key_version: 0, - }; - let status = nodes - .rpc_client - .call(&signer, nodes.contract().id(), "sign") - .args_json(serde_json::json!({ - "request": request, - })) - .gas(Gas::from_tgas(50)) - .deposit(NearToken::from_yoctonear(1)) - .transact_async() - .await?; - tokio::time::sleep(Duration::from_secs(1)).await; - Ok((payload, payload_hashed, account, status)) -} - pub async fn request_batch_random_sign( nodes: &Cluster, ) -> anyhow::Result<(Vec<([u8; 32], [u8; 32])>, Account, AsyncTransactionStatus)> { diff --git a/integration-tests/chain-signatures/tests/cases/mod.rs b/integration-tests/chain-signatures/tests/cases/mod.rs index 93c9943a..419b64de 100644 --- a/integration-tests/chain-signatures/tests/cases/mod.rs +++ b/integration-tests/chain-signatures/tests/cases/mod.rs @@ -1,6 +1,6 @@ use std::str::FromStr; -use crate::actions::{self, add_latency, wait_for}; +use crate::actions::{self, add_latency}; use crate::cluster; use cait_sith::protocol::Participant; diff --git a/integration-tests/chain-signatures/tests/cluster/mod.rs b/integration-tests/chain-signatures/tests/cluster/mod.rs index dc9baae0..506266b5 100644 --- a/integration-tests/chain-signatures/tests/cluster/mod.rs +++ b/integration-tests/chain-signatures/tests/cluster/mod.rs @@ -43,6 +43,10 @@ impl Cluster { self.nodes.len() } + pub fn is_empty(&self) -> bool { + self.nodes.is_empty() + } + pub fn url(&self, id: usize) -> Url { Url::parse(self.nodes.url(id)).unwrap() } @@ -127,6 +131,6 @@ impl Cluster { impl Drop for Cluster { fn drop(&mut self) { let sk_local_path = self.nodes.ctx().storage_options.sk_share_local_path.clone(); - let _ = tokio::task::spawn(utils::clear_local_sk_shares(sk_local_path)); + let _task = tokio::task::spawn(utils::clear_local_sk_shares(sk_local_path)); } } From 0edb22a444bc0c85e716f027c650c72595ec1c05 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Thu, 5 Dec 2024 03:10:08 +0000 Subject: [PATCH 14/42] Resolved comments --- .../node/src/protocol/presignature.rs | 12 +++++++ chain-signatures/node/src/protocol/triple.rs | 36 ++++++++++++------- chain-signatures/node/src/storage/error.rs | 4 --- .../node/src/storage/presignature_storage.rs | 7 ---- .../node/src/storage/triple_storage.rs | 7 ---- 5 files changed, 35 insertions(+), 31 deletions(-) diff --git a/chain-signatures/node/src/protocol/presignature.rs b/chain-signatures/node/src/protocol/presignature.rs index c2e3ca9f..c0010673 100644 --- a/chain-signatures/node/src/protocol/presignature.rs +++ b/chain-signatures/node/src/protocol/presignature.rs @@ -137,12 +137,16 @@ pub enum GenerationError { TripleIsGenerating(TripleId), #[error("triple {0} is in garbage collection")] TripleIsGarbageCollected(TripleId), + #[error("triple access denied: id={0}, {1}")] + TripleDenied(TripleId, &'static str), #[error("presignature {0} is generating")] PresignatureIsGenerating(PresignatureId), #[error("presignature {0} is missing")] PresignatureIsMissing(PresignatureId), #[error("presignature {0} is in garbage collection")] PresignatureIsGarbageCollected(TripleId), + #[error("presignature access denied: id={0}, {1}")] + PresignatureDenied(PresignatureId, &'static str), #[error("presignature bad parameters")] PresignatureBadParameters, } @@ -219,6 +223,14 @@ impl PresignatureManager { } pub async fn take(&mut self, id: PresignatureId) -> Result { + if self.contains_mine(&id).await { + tracing::error!(?id, "cannot take mine presignature as foreign owned"); + return Err(GenerationError::PresignatureDenied( + id, + "cannot take mine presignature as foreign owned", + )); + } + let presignature = self .presignature_storage .take(&id) diff --git a/chain-signatures/node/src/protocol/triple.rs b/chain-signatures/node/src/protocol/triple.rs index 6ea709a2..5f93f9c8 100644 --- a/chain-signatures/node/src/protocol/triple.rs +++ b/chain-signatures/node/src/protocol/triple.rs @@ -145,10 +145,12 @@ impl TripleManager { } pub async fn insert(&mut self, triple: Triple, mine: bool) { - tracing::debug!(id = triple.id, mine, "inserting triple"); - self.gc.remove(&triple.id); + let id = triple.id; + tracing::debug!(id, mine, "inserting triple"); if let Err(e) = self.triple_storage.insert(triple, mine).await { tracing::warn!(?e, mine, "failed to insert triple"); + } else { + self.gc.remove(&id); } } @@ -168,8 +170,16 @@ impl TripleManager { .unwrap_or(false) } - async fn take(&self, id: &TripleId) -> Result { - self.triple_storage.take(id).await.map_err(|store_err| { + async fn take(&mut self, id: &TripleId) -> Result { + if self.contains_mine(id).await { + tracing::error!(?id, "cannot take mine triple as foreign owned"); + return Err(GenerationError::TripleDenied( + *id, + "cannot take mine triple as foreign owned", + )); + } + + let result = self.triple_storage.take(id).await.map_err(|store_err| { if self.generators.contains_key(id) { tracing::warn!(id, ?store_err, "triple is generating"); GenerationError::TripleIsGenerating(*id) @@ -180,7 +190,13 @@ impl TripleManager { tracing::warn!(id, ?store_err, "triple is missing"); GenerationError::TripleIsMissing(*id) } - }) + }); + + if result.is_ok() { + self.gc.insert(*id, Instant::now()); + } + + result } /// Take two unspent triple by theirs id with no way to return it. Only takes @@ -196,14 +212,10 @@ impl TripleManager { let triple_1 = match self.take(&id1).await { Ok(triple) => triple, Err(err) => { - if let Err(store_err) = self.triple_storage.insert(triple_0, false).await { - tracing::warn!(?store_err, id0, "failed to insert triple back"); - } + self.insert(triple_0, false).await; return Err(err); } }; - self.gc.insert(id0, Instant::now()); - self.gc.insert(id1, Instant::now()); tracing::debug!(id0, id1, "took two triples"); Ok((triple_0, triple_1)) @@ -229,9 +241,7 @@ impl TripleManager { Ok(triple) => triple, Err(e) => { tracing::warn!(?e, "failed to take mine triple"); - if let Err(e) = triples.insert(triple_0, true).await { - tracing::warn!(?e, "failed to insert mine triple back"); - } + self.insert(triple_0, true).await; return None; } }; diff --git a/chain-signatures/node/src/storage/error.rs b/chain-signatures/node/src/storage/error.rs index 0cbcec23..9de789ea 100644 --- a/chain-signatures/node/src/storage/error.rs +++ b/chain-signatures/node/src/storage/error.rs @@ -11,12 +11,8 @@ pub enum StoreError { Connect(#[from] anyhow::Error), #[error("missing triple: id={0}")] TripleIsMissing(TripleId), - #[error("triple access denied: id={0}, {1}")] - TripleDenied(TripleId, &'static str), #[error("missing presignature: {0}")] PresignatureIsMissing(PresignatureId), - #[error("presignature access denied: id={0}, {1}")] - PresignatureDenied(PresignatureId, &'static str), #[error("empty: {0}")] Empty(&'static str), } diff --git a/chain-signatures/node/src/storage/presignature_storage.rs b/chain-signatures/node/src/storage/presignature_storage.rs index e8be876d..fc5c8724 100644 --- a/chain-signatures/node/src/storage/presignature_storage.rs +++ b/chain-signatures/node/src/storage/presignature_storage.rs @@ -59,13 +59,6 @@ impl PresignatureStorage { pub async fn take(&self, id: &PresignatureId) -> StoreResult { let mut conn = self.connect().await?; - if self.contains_mine(id).await? { - tracing::error!(?id, "cannot take mine presignature as foreign owned"); - return Err(StoreError::PresignatureDenied( - *id, - "cannot take mine presignature as foreign owned", - )); - } let presignature: Option = conn.hget(self.presig_key(), id).await?; let presignature = presignature.ok_or_else(|| StoreError::PresignatureIsMissing(*id))?; conn.hdel::<&str, PresignatureId, ()>(&self.presig_key(), *id) diff --git a/chain-signatures/node/src/storage/triple_storage.rs b/chain-signatures/node/src/storage/triple_storage.rs index 9974a80b..8acc8cd3 100644 --- a/chain-signatures/node/src/storage/triple_storage.rs +++ b/chain-signatures/node/src/storage/triple_storage.rs @@ -56,13 +56,6 @@ impl TripleStorage { pub async fn take(&self, id: &TripleId) -> StoreResult { let mut conn = self.connect().await?; - if self.contains_mine(id).await? { - tracing::error!(?id, "cannot take mine triple as foreign owned"); - return Err(StoreError::TripleDenied( - *id, - "cannot take mine triple as foreign owned", - )); - } let triple: Option = conn.hget(self.triple_key(), id).await?; let triple = triple.ok_or_else(|| StoreError::TripleIsMissing(*id))?; conn.hdel::<&str, TripleId, ()>(&self.triple_key(), *id) From 33fdbd09fa06ec9f52b8819223a91cf3c16280b5 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Mon, 9 Dec 2024 21:57:45 +0000 Subject: [PATCH 15/42] fmt --- chain-signatures/node/src/storage/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chain-signatures/node/src/storage/mod.rs b/chain-signatures/node/src/storage/mod.rs index 7aa24eb5..0d8d2f92 100644 --- a/chain-signatures/node/src/storage/mod.rs +++ b/chain-signatures/node/src/storage/mod.rs @@ -1,8 +1,8 @@ pub mod app_data_storage; +pub mod error; pub mod presignature_storage; pub mod secret_storage; pub mod triple_storage; -pub mod error; /// Configures storage. #[derive(Debug, Clone, clap::Parser)] From 4cf6240215c67b9e38775555ead858b11589409d Mon Sep 17 00:00:00 2001 From: Phuong N Date: Mon, 9 Dec 2024 22:40:00 +0000 Subject: [PATCH 16/42] clippy --- chain-signatures/node/src/protocol/presignature.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chain-signatures/node/src/protocol/presignature.rs b/chain-signatures/node/src/protocol/presignature.rs index c0010673..8672235d 100644 --- a/chain-signatures/node/src/protocol/presignature.rs +++ b/chain-signatures/node/src/protocol/presignature.rs @@ -263,7 +263,7 @@ impl PresignatureManager { }) .ok()?; tracing::debug!(id = ?presignature.id, "took presignature of mine"); - return Some(presignature); + Some(presignature) } /// Returns the number of unspent presignatures available in the manager. From 6fbe3d31bc604c69c2ee9b97822bd808530fb4f8 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Mon, 9 Dec 2024 23:13:21 +0000 Subject: [PATCH 17/42] Rename vote/leave --- .../chain-signatures/tests/cases/mod.rs | 22 +++++++++---------- .../chain-signatures/tests/lib.rs | 10 ++------- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/integration-tests/chain-signatures/tests/cases/mod.rs b/integration-tests/chain-signatures/tests/cases/mod.rs index 419b64de..c3994ded 100644 --- a/integration-tests/chain-signatures/tests/cases/mod.rs +++ b/integration-tests/chain-signatures/tests/cases/mod.rs @@ -32,7 +32,7 @@ async fn test_multichain_reshare() -> anyhow::Result<()> { let _ = nodes.sign().await?; tracing::info!("!!! Add participant 3"); - nodes.add_participant(None).await.unwrap(); + nodes.join(None).await.unwrap(); let state = nodes.wait().running().signable().await.unwrap(); let _ = nodes.sign().await.unwrap(); @@ -41,25 +41,25 @@ async fn test_multichain_reshare() -> anyhow::Result<()> { state.participants.keys().nth(2).unwrap().clone().as_ref(), ) .unwrap(); - nodes.remove_participant(Some(&account_2)).await.unwrap(); + nodes.leave(Some(&account_2)).await.unwrap(); let account_0 = near_workspaces::types::AccountId::from_str( state.participants.keys().next().unwrap().clone().as_ref(), ) .unwrap(); - let node_cfg_0 = nodes.remove_participant(Some(&account_0)).await.unwrap(); + let node_cfg_0 = nodes.leave(Some(&account_0)).await.unwrap(); nodes.wait().running().signable().await.unwrap(); let _ = nodes.sign().await.unwrap(); tracing::info!("!!! Try remove participant 3, should fail due to threshold"); - nodes.remove_participant(None).await.unwrap_err(); + nodes.leave(None).await.unwrap_err(); tracing::info!("!!! Add participant 5"); - nodes.add_participant(None).await.unwrap(); + nodes.join(None).await.unwrap(); nodes.wait().running().signable().await.unwrap(); let _ = nodes.sign().await.unwrap(); tracing::info!("!!! Add back participant 0"); - nodes.add_participant(Some(node_cfg_0)).await.unwrap(); + nodes.join(Some(node_cfg_0)).await.unwrap(); nodes.wait().running().signable().await.unwrap(); let _ = nodes.sign().await.unwrap(); @@ -101,7 +101,7 @@ async fn test_signature_offline_node() -> anyhow::Result<()> { .as_str() .parse()?; - nodes.remove_participant(Some(&account_id)).await.unwrap(); + nodes.leave(Some(&account_id)).await.unwrap(); // This could potentially fail and timeout the first time if the participant set picked up is the // one with the offline node. This is expected behavior for now if a user submits a request in between @@ -416,23 +416,23 @@ async fn test_multichain_reshare_with_lake_congestion() -> anyhow::Result<()> { add_latency(&nodes.nodes.proxy_name_for_node(1), true, 1.0, 1_000, 100).await?; // remove node2, node0 and node1 should still reach concensus // this fails if the latency above is too long (10s) - nodes.remove_participant(None).await.unwrap(); + nodes.leave(None).await.unwrap(); let state = nodes.wait().running().await?; assert!(state.participants.len() == 2); // Going below T should error out - nodes.remove_participant(None).await.unwrap_err(); + nodes.leave(None).await.unwrap_err(); let state = nodes.wait().running().await?; assert!(state.participants.len() == 2); - nodes.add_participant(None).await.unwrap(); + nodes.join(None).await.unwrap(); // add latency to node2->rpc add_latency(&nodes.nodes.proxy_name_for_node(2), true, 1.0, 1_000, 100).await?; let state = nodes.wait().running().await?; assert!(state.participants.len() == 3); - nodes.remove_participant(None).await.unwrap(); + nodes.leave(None).await.unwrap(); let state = nodes.wait().running().await?; assert!(state.participants.len() == 2); diff --git a/integration-tests/chain-signatures/tests/lib.rs b/integration-tests/chain-signatures/tests/lib.rs index f4591ad9..f623683b 100644 --- a/integration-tests/chain-signatures/tests/lib.rs +++ b/integration-tests/chain-signatures/tests/lib.rs @@ -17,10 +17,7 @@ const CURRENT_CONTRACT_FILE_PATH: &str = "../../target/wasm32-unknown-unknown/release/mpc_contract.wasm"; impl Cluster { - pub async fn add_participant( - &mut self, - existing_node: Option, - ) -> anyhow::Result<()> { + pub async fn join(&mut self, existing_node: Option) -> anyhow::Result<()> { let state = self.expect_running().await?; let node_account = match existing_node { Some(node) => { @@ -66,10 +63,7 @@ impl Cluster { Ok(()) } - pub async fn remove_participant( - &mut self, - kick: Option<&AccountId>, - ) -> anyhow::Result { + pub async fn leave(&mut self, kick: Option<&AccountId>) -> anyhow::Result { let state = self.expect_running().await?; let participant_accounts = self.participant_accounts().await?; let kick = kick From 695a3ade92c5bec367268ff0f2c81aedde1bf7e8 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Tue, 10 Dec 2024 00:07:17 +0000 Subject: [PATCH 18/42] Rename proper NodeConfig and NodeEnvConfig --- .../chain-signatures/src/containers.rs | 14 +++++----- integration-tests/chain-signatures/src/lib.rs | 28 ++++++++----------- .../chain-signatures/src/local.rs | 26 ++++++++--------- .../chain-signatures/src/main.rs | 6 ++-- .../chain-signatures/tests/cluster/mod.rs | 12 ++++---- .../chain-signatures/tests/cluster/spawner.rs | 6 ++-- .../chain-signatures/tests/lib.rs | 6 ++-- 7 files changed, 46 insertions(+), 52 deletions(-) diff --git a/integration-tests/chain-signatures/src/containers.rs b/integration-tests/chain-signatures/src/containers.rs index a978eb34..a8e385d9 100644 --- a/integration-tests/chain-signatures/src/containers.rs +++ b/integration-tests/chain-signatures/src/containers.rs @@ -1,6 +1,6 @@ use std::path::Path; -use super::{local::NodeConfig, utils, MultichainConfig}; +use super::{local::NodeEnvConfig, utils, NodeConfig}; use anyhow::{anyhow, Context}; use async_process::Child; use bollard::container::LogsOptions; @@ -36,7 +36,7 @@ pub struct Node { pub cipher_pk: hpke::PublicKey, pub cipher_sk: hpke::SecretKey, pub sign_sk: near_crypto::SecretKey, - cfg: MultichainConfig, + cfg: NodeConfig, // near rpc address, after proxy near_rpc: String, } @@ -47,7 +47,7 @@ impl Node { pub async fn run( ctx: &super::Context, - cfg: &MultichainConfig, + cfg: &NodeConfig, account: &Account, ) -> anyhow::Result { tracing::info!(id = %account.id(), "running node container"); @@ -72,7 +72,7 @@ impl Node { Self::spawn( ctx, - NodeConfig { + NodeEnvConfig { web_port: Self::CONTAINER_PORT, account: account.clone(), cipher_pk, @@ -85,9 +85,9 @@ impl Node { .await } - pub async fn kill(self) -> NodeConfig { + pub async fn kill(self) -> NodeEnvConfig { self.container.stop().await.unwrap(); - NodeConfig { + NodeEnvConfig { web_port: Self::CONTAINER_PORT, account: self.account, cipher_pk: self.cipher_pk, @@ -98,7 +98,7 @@ impl Node { } } - pub async fn spawn(ctx: &super::Context, config: NodeConfig) -> anyhow::Result { + pub async fn spawn(ctx: &super::Context, config: NodeEnvConfig) -> anyhow::Result { let indexer_options = mpc_node::indexer::Options { s3_bucket: ctx.localstack.s3_bucket.clone(), s3_region: ctx.localstack.s3_region.clone(), diff --git a/integration-tests/chain-signatures/src/lib.rs b/integration-tests/chain-signatures/src/lib.rs index c72e043b..138880d9 100644 --- a/integration-tests/chain-signatures/src/lib.rs +++ b/integration-tests/chain-signatures/src/lib.rs @@ -7,7 +7,7 @@ use containers::Container; use deadpool_redis::Pool; use std::collections::HashMap; -use self::local::NodeConfig; +use self::local::NodeEnvConfig; use crate::containers::DockerClient; use crate::containers::LocalStack; @@ -30,13 +30,13 @@ use serde_json::json; const NETWORK: &str = "mpc_it_network"; #[derive(Clone, Debug)] -pub struct MultichainConfig { +pub struct NodeConfig { pub nodes: usize, pub threshold: usize, pub protocol: ProtocolConfig, } -impl Default for MultichainConfig { +impl Default for NodeConfig { fn default() -> Self { Self { nodes: 3, @@ -104,7 +104,7 @@ impl Nodes { pub async fn start_node( &mut self, - cfg: &MultichainConfig, + cfg: &NodeConfig, new_account: &Account, ) -> anyhow::Result<()> { tracing::info!(id = %new_account.id(), "adding one more node"); @@ -120,7 +120,7 @@ impl Nodes { Ok(()) } - pub async fn kill_node(&mut self, account_id: &AccountId) -> NodeConfig { + pub async fn kill_node(&mut self, account_id: &AccountId) -> NodeEnvConfig { let killed_node_config = match self { Nodes::Local { nodes, .. } => { let index = nodes @@ -144,7 +144,7 @@ impl Nodes { killed_node_config } - pub async fn restart_node(&mut self, config: NodeConfig) -> anyhow::Result<()> { + pub async fn restart_node(&mut self, config: NodeEnvConfig) -> anyhow::Result<()> { tracing::info!(node_account_id = %config.account.id(), "restarting node"); match self { Nodes::Local { ctx, nodes } => nodes.push(local::Node::spawn(ctx, config).await?), @@ -260,7 +260,7 @@ pub async fn setup(docker_client: &DockerClient) -> anyhow::Result { }) } -pub async fn docker(cfg: MultichainConfig, docker_client: &DockerClient) -> anyhow::Result { +pub async fn docker(cfg: NodeConfig, docker_client: &DockerClient) -> anyhow::Result { let ctx = setup(docker_client).await?; let accounts = @@ -306,10 +306,7 @@ pub async fn docker(cfg: MultichainConfig, docker_client: &DockerClient) -> anyh Ok(Nodes::Docker { ctx, nodes }) } -pub async fn dry_host( - cfg: MultichainConfig, - docker_client: &DockerClient, -) -> anyhow::Result { +pub async fn dry_host(cfg: NodeConfig, docker_client: &DockerClient) -> anyhow::Result { let ctx = setup(docker_client).await?; let accounts = @@ -362,7 +359,7 @@ pub async fn dry_host( Ok(ctx) } -pub async fn host(cfg: MultichainConfig, docker_client: &DockerClient) -> anyhow::Result { +pub async fn host(cfg: NodeConfig, docker_client: &DockerClient) -> anyhow::Result { let ctx = setup(docker_client).await?; let accounts = @@ -407,7 +404,7 @@ pub async fn host(cfg: MultichainConfig, docker_client: &DockerClient) -> anyhow Ok(Nodes::Local { ctx, nodes }) } -pub async fn run(cfg: MultichainConfig, docker_client: &DockerClient) -> anyhow::Result { +pub async fn run(cfg: NodeConfig, docker_client: &DockerClient) -> anyhow::Result { #[cfg(feature = "docker-test")] return docker(cfg, docker_client).await; @@ -415,10 +412,7 @@ pub async fn run(cfg: MultichainConfig, docker_client: &DockerClient) -> anyhow: return host(cfg, docker_client).await; } -pub async fn dry_run( - cfg: MultichainConfig, - docker_client: &DockerClient, -) -> anyhow::Result { +pub async fn dry_run(cfg: NodeConfig, docker_client: &DockerClient) -> anyhow::Result { #[cfg(feature = "docker-test")] unimplemented!("dry_run only works with native node"); diff --git a/integration-tests/chain-signatures/src/local.rs b/integration-tests/chain-signatures/src/local.rs index 2bb83dd7..48a62d7e 100644 --- a/integration-tests/chain-signatures/src/local.rs +++ b/integration-tests/chain-signatures/src/local.rs @@ -1,6 +1,6 @@ use std::fmt; -use crate::{execute, utils, MultichainConfig}; +use crate::{execute, utils, NodeConfig}; use crate::containers::LakeIndexer; use crate::execute::executable; @@ -17,7 +17,7 @@ pub struct Node { pub sign_sk: near_crypto::SecretKey, pub cipher_pk: hpke::PublicKey, cipher_sk: hpke::SecretKey, - cfg: MultichainConfig, + cfg: NodeConfig, web_port: u16, // process held so it's not dropped. Once dropped, process will be killed. @@ -26,18 +26,18 @@ pub struct Node { pub near_rpc: String, } -pub struct NodeConfig { +pub struct NodeEnvConfig { pub web_port: u16, pub account: Account, pub cipher_pk: hpke::PublicKey, pub cipher_sk: hpke::SecretKey, pub sign_sk: near_crypto::SecretKey, - pub cfg: MultichainConfig, + pub cfg: NodeConfig, // near rpc address, after proxy pub near_rpc: String, } -impl fmt::Debug for NodeConfig { +impl fmt::Debug for NodeEnvConfig { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("NodeConfig") .field("web_port", &self.web_port) @@ -53,8 +53,8 @@ impl Node { pub async fn dry_run( ctx: &super::Context, account: &Account, - cfg: &MultichainConfig, - ) -> anyhow::Result { + cfg: &NodeConfig, + ) -> anyhow::Result { let account_id = account.id(); let account_sk = account.secret_key(); let web_port = utils::pick_unused_port().await?; @@ -104,7 +104,7 @@ impl Node { cmd.to_str().unwrap(), escaped_args.join(" ") ); - let node_config = NodeConfig { + let node_config = NodeEnvConfig { web_port, account: account.clone(), cipher_pk, @@ -118,7 +118,7 @@ impl Node { pub async fn run( ctx: &super::Context, - cfg: &MultichainConfig, + cfg: &NodeConfig, account: &Account, ) -> anyhow::Result { let web_port = utils::pick_unused_port().await?; @@ -141,7 +141,7 @@ impl Node { Self::spawn( ctx, - NodeConfig { + NodeEnvConfig { web_port, account: account.clone(), cipher_pk, @@ -154,7 +154,7 @@ impl Node { .await } - pub async fn spawn(ctx: &super::Context, config: NodeConfig) -> anyhow::Result { + pub async fn spawn(ctx: &super::Context, config: NodeEnvConfig) -> anyhow::Result { let web_port = config.web_port; let indexer_options = mpc_node::indexer::Options { s3_bucket: ctx.localstack.s3_bucket.clone(), @@ -203,11 +203,11 @@ impl Node { }) } - pub fn kill(self) -> NodeConfig { + pub fn kill(self) -> NodeEnvConfig { // NOTE: process gets killed after this function completes via the drop, due to taking ownership of self. tracing::info!(id = %self.account.id(), ?self.address, "node killed"); - NodeConfig { + NodeEnvConfig { web_port: self.web_port, account: self.account.clone(), cipher_pk: self.cipher_pk.clone(), diff --git a/integration-tests/chain-signatures/src/main.rs b/integration-tests/chain-signatures/src/main.rs index e928497f..9de9cc3b 100644 --- a/integration-tests/chain-signatures/src/main.rs +++ b/integration-tests/chain-signatures/src/main.rs @@ -5,7 +5,7 @@ use std::vec; use clap::Parser; use integration_tests_chain_signatures::containers::DockerClient; -use integration_tests_chain_signatures::{dry_run, run, utils, MultichainConfig}; +use integration_tests_chain_signatures::{dry_run, run, utils, NodeConfig}; use near_account_id::AccountId; use near_crypto::PublicKey; use serde_json::json; @@ -43,7 +43,7 @@ async fn main() -> anyhow::Result<()> { "Setting up an environment with {} nodes, {} threshold ...", nodes, threshold ); - let config = MultichainConfig { + let config = NodeConfig { nodes, threshold, ..Default::default() @@ -82,7 +82,7 @@ async fn main() -> anyhow::Result<()> { } Cli::DepServices => { println!("Setting up dependency services"); - let config = MultichainConfig::default(); + let config = NodeConfig::default(); let _ctx = dry_run(config.clone(), &docker_client).await?; println!("Press Ctrl-C to stop dependency services"); diff --git a/integration-tests/chain-signatures/tests/cluster/mod.rs b/integration-tests/chain-signatures/tests/cluster/mod.rs index 506266b5..cd8609cf 100644 --- a/integration-tests/chain-signatures/tests/cluster/mod.rs +++ b/integration-tests/chain-signatures/tests/cluster/mod.rs @@ -2,7 +2,7 @@ mod spawner; use std::collections::HashSet; -use integration_tests_chain_signatures::local::NodeConfig; +use integration_tests_chain_signatures::local::NodeEnvConfig; use mpc_contract::primitives::Participants; use near_workspaces::network::Sandbox; use spawner::ClusterSpawner; @@ -12,7 +12,7 @@ use mpc_node::web::StateView; use anyhow::Context; use integration_tests_chain_signatures::containers::DockerClient; -use integration_tests_chain_signatures::{utils, MultichainConfig, Nodes}; +use integration_tests_chain_signatures::{utils, NodeConfig, Nodes}; use near_workspaces::{Account, AccountId, Contract, Worker}; use url::Url; @@ -22,7 +22,7 @@ use crate::actions::wait::WaitAction; pub fn spawn() -> ClusterSpawner { ClusterSpawner { wait_for_running: false, - cfg: MultichainConfig { + cfg: NodeConfig { nodes: 3, threshold: 2, protocol: Default::default(), @@ -31,7 +31,7 @@ pub fn spawn() -> ClusterSpawner { } pub struct Cluster { - pub cfg: MultichainConfig, + pub cfg: NodeConfig, pub docker_client: DockerClient, pub rpc_client: near_fetch::Client, http_client: reqwest::Client, @@ -119,11 +119,11 @@ impl Cluster { Ok(state.public_key) } - pub async fn kill_node(&mut self, account_id: &AccountId) -> NodeConfig { + pub async fn kill_node(&mut self, account_id: &AccountId) -> NodeEnvConfig { self.nodes.kill_node(account_id).await } - pub async fn restart_node(&mut self, config: NodeConfig) -> anyhow::Result<()> { + pub async fn restart_node(&mut self, config: NodeEnvConfig) -> anyhow::Result<()> { self.nodes.restart_node(config).await } } diff --git a/integration-tests/chain-signatures/tests/cluster/spawner.rs b/integration-tests/chain-signatures/tests/cluster/spawner.rs index 9fb1ba6c..16ef9424 100644 --- a/integration-tests/chain-signatures/tests/cluster/spawner.rs +++ b/integration-tests/chain-signatures/tests/cluster/spawner.rs @@ -3,13 +3,13 @@ use mpc_contract::config::ProtocolConfig; use std::future::{Future, IntoFuture}; use integration_tests_chain_signatures::containers::DockerClient; -use integration_tests_chain_signatures::{run, MultichainConfig}; +use integration_tests_chain_signatures::{run, NodeConfig}; // use crate::actions::wait_for; use crate::cluster::Cluster; pub struct ClusterSpawner { - pub(crate) cfg: MultichainConfig, + pub(crate) cfg: NodeConfig, pub(crate) wait_for_running: bool, } @@ -29,7 +29,7 @@ impl ClusterSpawner { self } - pub fn with_config(mut self, call: impl FnOnce(&mut MultichainConfig)) -> Self { + pub fn with_config(mut self, call: impl FnOnce(&mut NodeConfig)) -> Self { call(&mut self.cfg); self } diff --git a/integration-tests/chain-signatures/tests/lib.rs b/integration-tests/chain-signatures/tests/lib.rs index f623683b..a6bdc5f7 100644 --- a/integration-tests/chain-signatures/tests/lib.rs +++ b/integration-tests/chain-signatures/tests/lib.rs @@ -10,14 +10,14 @@ use integration_tests_chain_signatures::utils::{vote_join, vote_leave}; use near_workspaces::types::NearToken; use near_workspaces::AccountId; -use integration_tests_chain_signatures::local::NodeConfig; +use integration_tests_chain_signatures::local::NodeEnvConfig; const CURRENT_CONTRACT_DEPLOY_DEPOSIT: NearToken = NearToken::from_millinear(9000); const CURRENT_CONTRACT_FILE_PATH: &str = "../../target/wasm32-unknown-unknown/release/mpc_contract.wasm"; impl Cluster { - pub async fn join(&mut self, existing_node: Option) -> anyhow::Result<()> { + pub async fn join(&mut self, existing_node: Option) -> anyhow::Result<()> { let state = self.expect_running().await?; let node_account = match existing_node { Some(node) => { @@ -63,7 +63,7 @@ impl Cluster { Ok(()) } - pub async fn leave(&mut self, kick: Option<&AccountId>) -> anyhow::Result { + pub async fn leave(&mut self, kick: Option<&AccountId>) -> anyhow::Result { let state = self.expect_running().await?; let participant_accounts = self.participant_accounts().await?; let kick = kick From 41db4643a2e8c4f62ee6d02f5a904582ba900f64 Mon Sep 17 00:00:00 2001 From: Phuong Date: Wed, 11 Dec 2024 17:08:21 -0800 Subject: [PATCH 19/42] Bump join to 20secs --- .../chain-signatures/tests/actions/wait.rs | 31 ++++++++++++------- .../chain-signatures/tests/lib.rs | 2 +- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/integration-tests/chain-signatures/tests/actions/wait.rs b/integration-tests/chain-signatures/tests/actions/wait.rs index 1110a766..42b21954 100644 --- a/integration-tests/chain-signatures/tests/actions/wait.rs +++ b/integration-tests/chain-signatures/tests/actions/wait.rs @@ -146,18 +146,22 @@ pub async fn running_mpc( _ => anyhow::bail!("not running"), } }; - let err_msg = format!( - "mpc did not reach {} in time", - if epoch.is_some() { - "expected epoch" - } else { - "running state" - } - ); + + let strategy = ConstantBuilder::default() + .with_delay(std::time::Duration::from_secs(3)) + .with_max_times(100); + is_running - .retry(&ExponentialBuilder::default().with_max_times(6)) + .retry(&strategy) .await - .with_context(|| err_msg) + .with_context(|| format!( + "mpc did not reach {} in time", + if epoch.is_some() { + "expected epoch" + } else { + "running state" + } + )) } pub async fn require_presignatures( @@ -241,8 +245,13 @@ pub async fn require_triples( anyhow::bail!("not enough nodes with triples") } }; + + let strategy = ConstantBuilder::default() + .with_delay(std::time::Duration::from_secs(5)) + .with_max_times(expected * 100); + let state_views = is_enough - .retry(&ExponentialBuilder::default().with_max_times(12)) + .retry(&strategy) .await .with_context(|| { format!( diff --git a/integration-tests/chain-signatures/tests/lib.rs b/integration-tests/chain-signatures/tests/lib.rs index a6bdc5f7..7c5b931e 100644 --- a/integration-tests/chain-signatures/tests/lib.rs +++ b/integration-tests/chain-signatures/tests/lib.rs @@ -36,7 +36,7 @@ impl Cluster { self.nodes.start_node(&self.cfg, &node_account).await?; // Wait for new node to add itself as a candidate - tokio::time::sleep(tokio::time::Duration::from_secs(10)).await; + tokio::time::sleep(tokio::time::Duration::from_secs(20)).await; // T number of participants should vote let participants = self.participant_accounts().await?; From ff88bfe3a5781ae094f2e17ac59a67cffeb34dfa Mon Sep 17 00:00:00 2001 From: Phuong Date: Wed, 11 Dec 2024 18:36:18 -0800 Subject: [PATCH 20/42] clippy & fmt --- .../chain-signatures/tests/actions/wait.rs | 26 ++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/integration-tests/chain-signatures/tests/actions/wait.rs b/integration-tests/chain-signatures/tests/actions/wait.rs index 42b21954..dbee5e25 100644 --- a/integration-tests/chain-signatures/tests/actions/wait.rs +++ b/integration-tests/chain-signatures/tests/actions/wait.rs @@ -1,7 +1,7 @@ use std::future::{Future, IntoFuture}; use anyhow::Context; -use backon::{ConstantBuilder, ExponentialBuilder, Retryable}; +use backon::{ConstantBuilder, Retryable}; use mpc_contract::{ProtocolContractState, RunningContractState}; use mpc_node::web::StateView; @@ -151,17 +151,16 @@ pub async fn running_mpc( .with_delay(std::time::Duration::from_secs(3)) .with_max_times(100); - is_running - .retry(&strategy) - .await - .with_context(|| format!( + is_running.retry(&strategy).await.with_context(|| { + format!( "mpc did not reach {} in time", if epoch.is_some() { "expected epoch" } else { "running state" } - )) + ) + }) } pub async fn require_presignatures( @@ -250,15 +249,12 @@ pub async fn require_triples( .with_delay(std::time::Duration::from_secs(5)) .with_max_times(expected * 100); - let state_views = is_enough - .retry(&strategy) - .await - .with_context(|| { - format!( - "mpc nodes failed to generate {} triples before deadline", - expected - ) - })?; + let state_views = is_enough.retry(&strategy).await.with_context(|| { + format!( + "mpc nodes failed to generate {} triples before deadline", + expected + ) + })?; Ok(state_views) } From 20619dd817b3f9d17371bae08a1bc508e63c60ba Mon Sep 17 00:00:00 2001 From: Phuong N Date: Thu, 12 Dec 2024 04:51:01 +0000 Subject: [PATCH 21/42] Internalize triple manager RwLock and tasks to crypto loop --- .../node/src/protocol/consensus.rs | 8 +- .../node/src/protocol/cryptography.rs | 238 ++++--- chain-signatures/node/src/protocol/message.rs | 42 +- .../node/src/protocol/presignature.rs | 4 +- chain-signatures/node/src/protocol/state.rs | 2 +- chain-signatures/node/src/protocol/triple.rs | 593 ++++++++++-------- chain-signatures/node/src/types.rs | 2 +- chain-signatures/node/src/web/mod.rs | 7 +- .../chain-signatures/tests/cases/mod.rs | 3 +- 9 files changed, 523 insertions(+), 376 deletions(-) diff --git a/chain-signatures/node/src/protocol/consensus.rs b/chain-signatures/node/src/protocol/consensus.rs index 895e4950..b9782373 100644 --- a/chain-signatures/node/src/protocol/consensus.rs +++ b/chain-signatures/node/src/protocol/consensus.rs @@ -127,13 +127,13 @@ impl ConsensusProtocol for StartedState { tracing::info!( "started: contract state is running and we are already a participant" ); - let triple_manager = Arc::new(RwLock::new(TripleManager::new( + let triple_manager = TripleManager::new( me, contract_state.threshold, epoch, ctx.my_account_id(), ctx.triple_storage(), - ))); + ); let presignature_manager = Arc::new(RwLock::new(PresignatureManager::new( @@ -370,13 +370,13 @@ impl ConsensusProtocol for WaitingForConsensusState { ); } - let triple_manager = Arc::new(RwLock::new(TripleManager::new( + let triple_manager = TripleManager::new( me, self.threshold, self.epoch, ctx.my_account_id(), ctx.triple_storage(), - ))); + ); let presignature_manager = Arc::new(RwLock::new(PresignatureManager::new( me, diff --git a/chain-signatures/node/src/protocol/cryptography.rs b/chain-signatures/node/src/protocol/cryptography.rs index 601d578d..28b7bb47 100644 --- a/chain-signatures/node/src/protocol/cryptography.rs +++ b/chain-signatures/node/src/protocol/cryptography.rs @@ -361,8 +361,7 @@ impl CryptographicProtocol for RunningState { cfg: Config, mesh_state: MeshState, ) -> Result { - let protocol_cfg = &cfg.protocol; - let active = &mesh_state.active_participants; + let active = mesh_state.active_participants.clone(); if active.len() < self.threshold { tracing::warn!( active = ?active.keys_vec(), @@ -371,117 +370,176 @@ impl CryptographicProtocol for RunningState { return Ok(NodeState::Running(self)); } - let mut messages = self.messages.write().await; - let mut triple_manager = self.triple_manager.write().await; - let my_account_id = triple_manager.my_account_id.clone(); - crate::metrics::MESSAGE_QUEUE_SIZE - .with_label_values(&[my_account_id.as_str()]) - .set(messages.len() as i64); - if let Err(err) = triple_manager.stockpile(active, protocol_cfg).await { - tracing::warn!(?err, "running: failed to stockpile triples"); - } - for (p, msg) in triple_manager.poke(protocol_cfg).await { - let info = self.fetch_participant(&p)?; - messages.push(info.clone(), MpcMessage::Triple(msg)); - } + let participant_map = active + .iter() + .map(|(p, info)| (p.clone(), info.clone())) + .collect::>(); - crate::metrics::NUM_TRIPLES_MINE - .with_label_values(&[my_account_id.as_str()]) - .set(triple_manager.len_mine().await as i64); - crate::metrics::NUM_TRIPLES_TOTAL - .with_label_values(&[my_account_id.as_str()]) - .set(triple_manager.len_generated().await as i64); - crate::metrics::NUM_TRIPLE_GENERATORS_INTRODUCED - .with_label_values(&[my_account_id.as_str()]) - .set(triple_manager.introduced.len() as i64); - crate::metrics::NUM_TRIPLE_GENERATORS_TOTAL - .with_label_values(&[my_account_id.as_str()]) - .set(triple_manager.ongoing.len() as i64); + let my_account_id = self.triple_manager.my_account_id.clone(); + let protocol_cfg = cfg.protocol.clone(); + let messages = self.messages.clone(); + let triple_par = participant_map.clone(); + let triple_manager = self.triple_manager.clone(); + let triple_task = tokio::task::spawn(async move { + let participant_map = triple_par; + let my_account_id = triple_manager.my_account_id.clone(); + // crate::metrics::MESSAGE_QUEUE_SIZE + // .with_label_values(&[my_account_id.as_str()]) + // .set(messages.len() as i64); + if let Err(err) = triple_manager.stockpile(&active, &protocol_cfg).await { + tracing::warn!(?err, "running: failed to stockpile triples"); + } + let mut messages = messages.write().await; + for (p, msg) in triple_manager.poke(&protocol_cfg).await { + messages.push( + participant_map.get(&p).unwrap().clone(), + MpcMessage::Triple(msg), + ); + } + drop(messages); - let mut presignature_manager = self.presignature_manager.write().await; - if let Err(err) = presignature_manager - .stockpile( - active, - &self.public_key, - &self.private_share, - &mut triple_manager, - protocol_cfg, - ) - .await - { - tracing::warn!(?err, "running: failed to stockpile presignatures"); - } - drop(triple_manager); - for (p, msg) in presignature_manager.poke().await { - let info = self.fetch_participant(&p)?; - messages.push(info.clone(), MpcMessage::Presignature(msg)); - } + crate::metrics::NUM_TRIPLES_MINE + .with_label_values(&[my_account_id.as_str()]) + .set(triple_manager.len_mine().await as i64); + crate::metrics::NUM_TRIPLES_TOTAL + .with_label_values(&[my_account_id.as_str()]) + .set(triple_manager.len_generated().await as i64); + // crate::metrics::NUM_TRIPLE_GENERATORS_INTRODUCED + // .with_label_values(&[my_account_id.as_str()]) + // .set(triple_manager.introduced.len() as i64); + // crate::metrics::NUM_TRIPLE_GENERATORS_TOTAL + // .with_label_values(&[my_account_id.as_str()]) + // .set(triple_manager.ongoing.len() as i64); + }); - crate::metrics::NUM_PRESIGNATURES_MINE - .with_label_values(&[my_account_id.as_str()]) - .set(presignature_manager.len_mine().await as i64); - crate::metrics::NUM_PRESIGNATURES_TOTAL - .with_label_values(&[my_account_id.as_str()]) - .set(presignature_manager.len_generated().await as i64); - crate::metrics::NUM_PRESIGNATURE_GENERATORS_TOTAL - .with_label_values(&[my_account_id.as_str()]) - .set( - presignature_manager.len_potential().await as i64 - - presignature_manager.len_generated().await as i64, - ); + let messages = self.messages.clone(); + let triple_manager = self.triple_manager.clone(); + let presignature_manager = self.presignature_manager.clone(); + let presig_par = participant_map.clone(); + let active = mesh_state.active_participants.clone(); + let protocol_cfg = cfg.protocol.clone(); + let presig_task = tokio::task::spawn(async move { + let participant_map = presig_par; + let mut presignature_manager = presignature_manager.write().await; + if let Err(err) = presignature_manager + .stockpile( + &active, + &self.public_key, + &self.private_share, + &triple_manager, + &protocol_cfg, + ) + .await + { + tracing::warn!(?err, "running: failed to stockpile presignatures"); + } + let my_account_id = triple_manager.my_account_id.clone(); + drop(triple_manager); + + let mut messages = messages.write().await; + for (p, msg) in presignature_manager.poke().await { + messages.push( + participant_map.get(&p).unwrap().clone(), + MpcMessage::Presignature(msg), + ); + } + drop(messages); + + crate::metrics::NUM_PRESIGNATURES_MINE + .with_label_values(&[my_account_id.as_str()]) + .set(presignature_manager.len_mine().await as i64); + crate::metrics::NUM_PRESIGNATURES_TOTAL + .with_label_values(&[my_account_id.as_str()]) + .set(presignature_manager.len_generated().await as i64); + crate::metrics::NUM_PRESIGNATURE_GENERATORS_TOTAL + .with_label_values(&[my_account_id.as_str()]) + .set( + presignature_manager.len_potential().await as i64 + - presignature_manager.len_generated().await as i64, + ); + }); // NOTE: signatures should only use stable and not active participants. The difference here is that // stable participants utilizes more than the online status of a node, such as whether or not their // block height is up to date, such that they too can process signature requests. If they cannot // then they are considered unstable and should not be a part of signature generation this round. - let stable = mesh_state.stable_participants; + let stable = mesh_state.stable_participants.clone(); tracing::debug!(?stable, "stable participants"); - let mut sign_queue = self.sign_queue.write().await; - crate::metrics::SIGN_QUEUE_SIZE - .with_label_values(&[my_account_id.as_str()]) - .set(sign_queue.len() as i64); + // let mut sign_queue = self.sign_queue.write().await; + // crate::metrics::SIGN_QUEUE_SIZE + // .with_label_values(&[my_account_id.as_str()]) + // .set(sign_queue.len() as i64); + let presignature_manager = self.presignature_manager.clone(); + let signature_manager = self.signature_manager.clone(); + let messages = self.messages.clone(); + let protocol_cfg = cfg.protocol.clone(); + let sign_queue = self.sign_queue.clone(); let me = ctx.me().await; - sign_queue.organize(self.threshold, &stable, me, &my_account_id); + let rpc_client = ctx.rpc_client().clone(); + let signer = ctx.signer().clone(); + let mpc_contract_id = ctx.mpc_contract_id().clone(); + let sig_task = tokio::task::spawn(async move { + let participant_map = participant_map.clone(); + tracing::debug!(?stable, "stable participants"); - let my_requests = sign_queue.my_requests(me); - crate::metrics::SIGN_QUEUE_MINE_SIZE - .with_label_values(&[my_account_id.as_str()]) - .set(my_requests.len() as i64); + let mut sign_queue = sign_queue.write().await; + // crate::metrics::SIGN_QUEUE_SIZE + // .with_label_values(&[my_account_id.as_str()]) + // .set(sign_queue.len() as i64); + sign_queue.organize(self.threshold, &stable, me, &my_account_id); - let mut signature_manager = self.signature_manager.write().await; - signature_manager - .handle_requests( - self.threshold, - &stable, - my_requests, - &mut presignature_manager, - protocol_cfg, - ) - .await; - drop(sign_queue); - drop(presignature_manager); + let my_requests = sign_queue.my_requests(me); + // crate::metrics::SIGN_QUEUE_MINE_SIZE + // .with_label_values(&[my_account_id.as_str()]) + // .set(my_requests.len() as i64); - for (p, msg) in signature_manager.poke() { - let info = self.fetch_participant(&p)?; - messages.push(info.clone(), MpcMessage::Signature(msg)); + let mut presignature_manager = presignature_manager.write().await; + let mut signature_manager = signature_manager.write().await; + signature_manager + .handle_requests( + self.threshold, + &stable, + my_requests, + &mut presignature_manager, + &protocol_cfg, + ) + .await; + drop(presignature_manager); + + let mut messages = messages.write().await; + for (p, msg) in signature_manager.poke() { + messages.push( + participant_map.get(&p).unwrap().clone(), + MpcMessage::Signature(msg), + ); + } + drop(messages); + signature_manager + .publish(&rpc_client, &signer, &mpc_contract_id) + .await; + }); + + match tokio::try_join!(triple_task, presig_task, sig_task) { + Ok(_result) => (), + Err(err) => { + tracing::warn!(?err, "running: failed to progress cryptographic protocol"); + } } - signature_manager - .publish(ctx.rpc_client(), ctx.signer(), ctx.mpc_contract_id()) - .await; - drop(signature_manager); + + let mut messages = self.messages.write().await; let failures = messages .send_encrypted( - ctx.me().await, + me, &cfg.local.network.sign_sk, ctx.http_client(), - active, - protocol_cfg, + &mesh_state.active_participants, + &cfg.protocol, ) .await; if !failures.is_empty() { tracing::warn!( - active = ?active.keys_vec(), + active = ?mesh_state.active_participants.keys_vec(), "running: failed to send encrypted message; {failures:?}" ); } diff --git a/chain-signatures/node/src/protocol/message.rs b/chain-signatures/node/src/protocol/message.rs index 64e47439..8ac6bacb 100644 --- a/chain-signatures/node/src/protocol/message.rs +++ b/chain-signatures/node/src/protocol/message.rs @@ -245,29 +245,29 @@ impl MessageHandler for RunningState { ) -> Result<(), MessageHandleError> { let protocol_cfg = &cfg.protocol; let participants = &mesh_state.active_participants; - let mut triple_manager = self.triple_manager.write().await; // remove the triple_id that has already failed or taken from the triple_bins // and refresh the timestamp of failed and taken let triple_messages = queue.triple_bins.entry(self.epoch).or_default(); - triple_messages.retain(|id, queue| { - if queue.is_empty() - || queue.iter().any(|msg| { - util::is_elapsed_longer_than_timeout( - msg.timestamp, - protocol_cfg.triple.generation_timeout, - ) - }) - { - return false; - } - - // if triple id is in GC, remove these messages because the triple is currently - // being GC'ed, where this particular triple has previously failed or been utilized. - !triple_manager.refresh_gc(id) - }); + // triple_messages.retain(|id, queue| { + // if queue.is_empty() + // || queue.iter().any(|msg| { + // util::is_elapsed_longer_than_timeout( + // msg.timestamp, + // protocol_cfg.triple.generation_timeout, + // ) + // }) + // { + // return false; + // } + + // // if triple id is in GC, remove these messages because the triple is currently + // // being GC'ed, where this particular triple has previously failed or been utilized. + // !triple_manager.refresh_gc(id) + // }); for (id, queue) in triple_messages { - let protocol = match triple_manager + let protocol = match self + .triple_manager .get_or_start_generation(*id, participants, protocol_cfg) .await { @@ -282,7 +282,7 @@ impl MessageHandler for RunningState { if let Some(protocol) = protocol { while let Some(message) = queue.pop_front() { - protocol.message(message.from, message.data); + protocol.message(message.from, message.data).await; } } } @@ -328,7 +328,7 @@ impl MessageHandler for RunningState { *id, *triple0, *triple1, - &mut triple_manager, + &self.triple_manager, &self.public_key, &self.private_share, protocol_cfg, @@ -489,7 +489,7 @@ impl MessageHandler for RunningState { protocol.message(message.from, message.data); } } - triple_manager.garbage_collect(protocol_cfg); + self.triple_manager.garbage_collect(protocol_cfg).await; presignature_manager.garbage_collect(protocol_cfg); signature_manager.garbage_collect(protocol_cfg); Ok(()) diff --git a/chain-signatures/node/src/protocol/presignature.rs b/chain-signatures/node/src/protocol/presignature.rs index 8672235d..f5d16692 100644 --- a/chain-signatures/node/src/protocol/presignature.rs +++ b/chain-signatures/node/src/protocol/presignature.rs @@ -407,7 +407,7 @@ impl PresignatureManager { active: &Participants, pk: &PublicKey, sk_share: &SecretKeyShare, - triple_manager: &mut TripleManager, + triple_manager: &TripleManager, cfg: &ProtocolConfig, ) -> Result<(), InitializationError> { let not_enough_presignatures = { @@ -471,7 +471,7 @@ impl PresignatureManager { id: PresignatureId, triple0: TripleId, triple1: TripleId, - triple_manager: &mut TripleManager, + triple_manager: &TripleManager, public_key: &PublicKey, private_share: &SecretKeyShare, cfg: &ProtocolConfig, diff --git a/chain-signatures/node/src/protocol/state.rs b/chain-signatures/node/src/protocol/state.rs index dcf54f7a..e1fe1b91 100644 --- a/chain-signatures/node/src/protocol/state.rs +++ b/chain-signatures/node/src/protocol/state.rs @@ -92,7 +92,7 @@ pub struct RunningState { pub private_share: SecretKeyShare, pub public_key: PublicKey, pub sign_queue: Arc>, - pub triple_manager: Arc>, + pub triple_manager: TripleManager, pub presignature_manager: Arc>, pub signature_manager: Arc>, pub messages: Arc>, diff --git a/chain-signatures/node/src/protocol/triple.rs b/chain-signatures/node/src/protocol/triple.rs index 2265abac..9a6a2f54 100644 --- a/chain-signatures/node/src/protocol/triple.rs +++ b/chain-signatures/node/src/protocol/triple.rs @@ -6,7 +6,7 @@ use crate::storage::triple_storage::TripleStorage; use crate::types::TripleProtocol; use crate::util::AffinePointExt; -use cait_sith::protocol::{Action, InitializationError, Participant, ProtocolError}; +use cait_sith::protocol::{Action, InitializationError, MessageData, Participant, ProtocolError}; use cait_sith::triples::{TripleGenerationOutput, TriplePub, TripleShare}; use chrono::Utc; use highway::{HighwayHash, HighwayHasher}; @@ -17,7 +17,9 @@ use serde::{Deserialize, Serialize}; use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet, VecDeque}; use std::fmt; +use std::sync::Arc; use std::time::{Duration, Instant}; +use tokio::sync::RwLock; use near_account_id::AccountId; @@ -35,53 +37,62 @@ pub struct Triple { pub public: TriplePub, } +#[derive(Clone)] pub struct TripleGenerator { pub id: TripleId, pub participants: Vec, - pub protocol: TripleProtocol, - pub timestamp: Option, + pub protocol: Arc, + pub timestamp: Arc>>, pub timeout: Duration, } impl TripleGenerator { pub fn new( + me: Participant, + threshold: usize, id: TripleId, participants: Vec, - protocol: TripleProtocol, timeout: u64, - ) -> Self { - Self { + ) -> Result { + let protocol = Arc::new(RwLock::new( + cait_sith::triples::generate_triple::(&participants, me, threshold)?, + )); + + Ok(Self { id, participants, protocol, - timestamp: None, + timestamp: Arc::new(RwLock::new(None)), timeout: Duration::from_millis(timeout), - } + }) } - pub fn poke(&mut self) -> Result>, ProtocolError> { - let timestamp = self.timestamp.get_or_insert_with(Instant::now); - if timestamp.elapsed() > self.timeout { - tracing::warn!( - id = self.id, - elapsed = ?timestamp.elapsed(), - "triple protocol timed out" - ); + pub async fn poke( + &mut self, + ) -> Result>, ProtocolError> { + let elapsed = { + let mut timestamp = self.timestamp.write().await; + let timestamp = timestamp.get_or_insert_with(Instant::now); + timestamp.elapsed() + }; + if elapsed > self.timeout { + tracing::warn!(id = self.id, ?elapsed, "triple protocol timed out"); return Err(ProtocolError::Other( anyhow::anyhow!("triple protocol timed out").into(), )); } - self.protocol.poke() + let mut protocol = self.protocol.write().await; + protocol.poke() } -} -/// Abstracts how triples are generated by providing a way to request a new triple that will be -/// complete some time in the future and a way to take an already generated triple. -pub struct TripleManager { - /// Triple Storage - pub triple_storage: TripleStorage, + pub async fn message(&self, from: Participant, data: MessageData) { + let mut protocol = self.protocol.write().await; + protocol.message(from, data); + } +} +pub struct TripleTasks { /// The pool of triple protocols that have yet to be completed. pub generators: HashMap, @@ -95,10 +106,228 @@ pub struct TripleManager { /// The set of triples that were introduced to the system by the current node. pub introduced: HashSet, +} + +impl std::fmt::Debug for TripleTasks { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("TripleTasks") + .field("generators", &self.generators.keys().collect::>()) + .field("queued", &self.queued) + .field("ongoing", &self.ongoing) + .field("introduced", &self.introduced) + .finish() + } +} + +impl TripleTasks { + pub fn new() -> Self { + Self { + generators: HashMap::new(), + queued: VecDeque::new(), + ongoing: HashSet::new(), + introduced: HashSet::new(), + } + } + + pub fn entry( + &mut self, + me: Participant, + threshold: usize, + id: TripleId, + potential_len: usize, + cfg: &ProtocolConfig, + participants: &Participants, + my_account_id: &AccountId, + ) -> Result, CryptographicError> { + match self.generators.entry(id) { + Entry::Vacant(e) => { + if potential_len >= cfg.triple.max_triples as usize { + // We are at the maximum amount of triples, we cannot generate more. So just in case a node + // sends more triple generation requests, reject them and have them tiemout. + return Ok(None); + } + + tracing::info!(id, "joining protocol to generate a new triple"); + let participants = participants.keys_vec(); + let generator = e.insert(TripleGenerator::new( + me, + threshold, + id, + participants, + cfg.triple.generation_timeout, + )?); + self.queued.push_back(id); + crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS + .with_label_values(&[my_account_id.as_str()]) + .inc(); + Ok(Some(generator.clone())) + } + Entry::Occupied(e) => Ok(Some(e.get().clone())), + } + } + + pub async fn poke( + &mut self, + me: Participant, + my_account_id: &AccountId, + epoch: u64, + cfg: &ProtocolConfig, + ) -> ( + Vec<(Triple, bool)>, + Vec<(Participant, TripleMessage)>, + HashMap, + ) { + // Add more protocols to the ongoing pool if there is space. + let to_generate_len = cfg.max_concurrent_generation as usize - self.ongoing.len(); + if !self.queued.is_empty() && to_generate_len > 0 { + for _ in 0..to_generate_len { + self.queued.pop_front().map(|id| self.ongoing.insert(id)); + } + } + + let mut triples = Vec::new(); + let mut messages = Vec::new(); + let mut errors = HashMap::new(); + let mut to_remove = Vec::new(); + + for (id, generator) in &mut self.generators { + if !self.ongoing.contains(id) { + continue; + } + + loop { + let action = match generator.poke().await { + Ok(action) => action, + Err(e) => { + errors.insert(*id, e); + crate::metrics::TRIPLE_GENERATOR_FAILURES + .with_label_values(&[my_account_id.as_str()]) + .inc(); + self.ongoing.remove(id); + self.introduced.remove(id); + to_remove.push(*id); + // tracing::warn!( + // elapsed = ?generator.timestamp.unwrap().elapsed(), + // "added {id} to failed triples" + // ); + break; + } + }; + + match action { + Action::Wait => { + tracing::debug!("triple: waiting"); + // Retain protocol until we are finished + break; + } + Action::SendMany(data) => { + for p in &generator.participants { + messages.push(( + *p, + TripleMessage { + id: *id, + epoch, + from: me, + data: data.clone(), + timestamp: Utc::now().timestamp() as u64, + }, + )) + } + } + Action::SendPrivate(p, data) => messages.push(( + p, + TripleMessage { + id: *id, + epoch, + from: me, + data, + timestamp: Utc::now().timestamp() as u64, + }, + )), + Action::Return(output) => { + // elapsed = ?generator.timestamp.unwrap().elapsed(), + tracing::info!( + id, + ?me, + big_a = ?output.1.big_a.to_base58(), + big_b = ?output.1.big_b.to_base58(), + big_c = ?output.1.big_c.to_base58(), + "completed triple generation" + ); + + // if let Some(start_time) = generator.timestamp { + // crate::metrics::TRIPLE_LATENCY + // .with_label_values(&[my_account_id.as_str()]) + // .observe(start_time.elapsed().as_secs_f64()); + // } + + crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS_SUCCESS + .with_label_values(&[my_account_id.as_str()]) + .inc(); + + let triple = Triple { + id: *id, + share: output.0, + public: output.1, + }; + + // After creation the triple is assigned to a random node, which is NOT necessarily the one that initiated it's creation + let triple_is_mine = { + // This is an entirely unpredictable value to all participants because it's a combination of big_c_i + // It is the same value across all participants + let big_c = triple.public.big_c; + + // We turn this into a u64 in a way not biased to the structure of the byte serialisation so we hash it + // We use Highway Hash because the DefaultHasher doesn't guarantee a consistent output across versions + let entropy = + HighwayHasher::default().hash64(&big_c.to_bytes()) as usize; + + let num_participants = generator.participants.len(); + // This has a *tiny* bias towards lower indexed participants, they're up to (1 + num_participants / u64::MAX)^2 times more likely to be selected + // This is acceptably small that it will likely never result in a biased selection happening + let triple_owner = generator.participants[entropy % num_participants]; + + triple_owner == me + }; + + triples.push((triple, triple_is_mine)); + if triple_is_mine { + crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATIONS_MINE_SUCCESS + .with_label_values(&[my_account_id.as_str()]) + .inc(); + } + + // Protocol done, remove it from the ongoing pool. + self.ongoing.remove(id); + self.introduced.remove(id); + // Do not retain the protocol + to_remove.push(*id); + break; + } + } + } + } + + for id in to_remove { + self.generators.remove(&id); + } + + (triples, messages, errors) + } +} + +/// Abstracts how triples are generated by providing a way to request a new triple that will be +/// complete some time in the future and a way to take an already generated triple. +#[derive(Clone)] +pub struct TripleManager { + /// Triple Storage + pub triple_storage: TripleStorage, + + pub tasks: Arc>, /// The set of triple ids that were already taken or failed. This will be maintained for at most /// triple timeout period just so messages are cycled through the system. - pub gc: HashMap, + pub gc: Arc>>, pub me: Participant, pub threshold: usize, @@ -109,11 +338,8 @@ pub struct TripleManager { impl fmt::Debug for TripleManager { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("TripleManager") - .field("generators", &self.generators.keys().collect::>()) - .field("queued", &self.queued) - .field("ongoing", &self.ongoing) - .field("introduced", &self.introduced) - .field("gc", &self.gc.keys().collect::>()) + .field("tasks", &self.tasks) + .field("gc", &self.gc) .field("me", &self.me) .field("threshold", &self.threshold) .field("epoch", &self.epoch) @@ -131,11 +357,8 @@ impl TripleManager { storage: &TripleStorage, ) -> Self { Self { - generators: HashMap::new(), - queued: VecDeque::new(), - ongoing: HashSet::new(), - introduced: HashSet::new(), - gc: HashMap::new(), + tasks: Arc::new(RwLock::new(TripleTasks::new())), + gc: Arc::new(RwLock::new(HashMap::new())), me, threshold, epoch, @@ -144,13 +367,13 @@ impl TripleManager { } } - pub async fn insert(&mut self, triple: Triple, mine: bool) { + pub async fn insert(&self, triple: Triple, mine: bool) { let id = triple.id; tracing::debug!(id, mine, "inserting triple"); if let Err(e) = self.triple_storage.insert(triple, mine).await { tracing::warn!(?e, mine, "failed to insert triple"); } else { - self.gc.remove(&id); + self.gc.write().await.remove(&id); } } @@ -170,7 +393,7 @@ impl TripleManager { .unwrap_or(false) } - async fn take(&mut self, id: &TripleId) -> Result { + async fn take(&self, id: &TripleId) -> Result { if self.contains_mine(id).await { tracing::error!(?id, "cannot take mine triple as foreign owned"); return Err(GenerationError::TripleDenied( @@ -179,24 +402,25 @@ impl TripleManager { )); } - let result = self.triple_storage.take(id).await.map_err(|store_err| { - if self.generators.contains_key(id) { - tracing::warn!(id, ?store_err, "triple is generating"); - GenerationError::TripleIsGenerating(*id) - } else if self.gc.contains_key(id) { - tracing::warn!(id, ?store_err, "triple is garbage collected"); - GenerationError::TripleIsGarbageCollected(*id) - } else { - tracing::warn!(id, ?store_err, "triple is missing"); - GenerationError::TripleIsMissing(*id) + match self.triple_storage.take(id).await { + Ok(result) => { + self.gc.write().await.insert(*id, Instant::now()); + Ok(result) + } + Err(store_err) => { + let tasks = self.tasks.read().await; + if tasks.generators.contains_key(id) { + tracing::warn!(id, ?store_err, "triple is generating"); + Err(GenerationError::TripleIsGenerating(*id)) + } else if self.gc.read().await.contains_key(id) { + tracing::warn!(id, ?store_err, "triple is garbage collected"); + Err(GenerationError::TripleIsGarbageCollected(*id)) + } else { + tracing::warn!(id, ?store_err, "triple is missing"); + Err(GenerationError::TripleIsMissing(*id)) + } } - }); - - if result.is_ok() { - self.gc.insert(*id, Instant::now()); } - - result } /// Take two unspent triple by theirs id with no way to return it. Only takes @@ -204,7 +428,7 @@ impl TripleManager { /// It is very important to NOT reuse the same triple twice for two different /// protocols. pub async fn take_two( - &mut self, + &self, id0: TripleId, id1: TripleId, ) -> Result<(Triple, Triple), GenerationError> { @@ -224,7 +448,7 @@ impl TripleManager { /// Take two random unspent triple generated by this node. Either takes both or none. /// It is very important to NOT reuse the same triple twice for two different /// protocols. - pub async fn take_two_mine(&mut self) -> Option<(Triple, Triple)> { + pub async fn take_two_mine(&self) -> Option<(Triple, Triple)> { let triples = &self.triple_storage; if triples.len_mine().await.unwrap_or(0) < 2 { tracing::debug!("not enough mine triples"); @@ -232,8 +456,8 @@ impl TripleManager { } let triple_0 = match triples.take_mine().await { Ok(triple) => triple, - Err(e) => { - tracing::warn!(?e, "failed to take mine triple"); + Err(store_err) => { + tracing::warn!(?store_err, "failed to take mine triple"); return None; } }; @@ -246,8 +470,11 @@ impl TripleManager { } }; - self.gc.insert(triple_0.id, Instant::now()); - self.gc.insert(triple_1.id, Instant::now()); + { + let mut gc = self.gc.write().await; + gc.insert(triple_0.id, Instant::now()); + gc.insert(triple_1.id, Instant::now()); + } tracing::debug!(triple_0.id, triple_1.id, "took two mine triples"); @@ -272,7 +499,7 @@ impl TripleManager { /// Returns the number of unspent triples we will have in the manager once /// all ongoing generation protocols complete. pub async fn len_potential(&self) -> usize { - self.len_generated().await + self.generators.len() + self.len_generated().await + self.tasks.read().await.generators.len() } pub async fn has_min_triples(&self, cfg: &ProtocolConfig) -> bool { @@ -280,12 +507,11 @@ impl TripleManager { } /// Clears an entry from failed triples if that triple protocol was created more than 2 hrs ago - pub fn garbage_collect(&mut self, cfg: &ProtocolConfig) { - let before = self.gc.len(); - self.gc.retain(|_, timestamp| { - timestamp.elapsed() < Duration::from_millis(cfg.garbage_timeout) - }); - let garbage_collected = before.saturating_sub(self.gc.len()); + pub async fn garbage_collect(&self, cfg: &ProtocolConfig) { + let mut gc = self.gc.write().await; + let before = gc.len(); + gc.retain(|_, timestamp| timestamp.elapsed() < Duration::from_millis(cfg.garbage_timeout)); + let garbage_collected = before.saturating_sub(gc.len()); if garbage_collected > 0 { tracing::debug!("garbage collected {} triples", garbage_collected); } @@ -293,24 +519,28 @@ impl TripleManager { /// Refresh item in the garbage collection. If it is present, return true and update internally /// the timestamp for gabage collection. - pub fn refresh_gc(&mut self, id: &TripleId) -> bool { - let entry = self.gc.entry(*id).and_modify(|e| *e = Instant::now()); + pub async fn refresh_gc(&self, id: &TripleId) -> bool { + let mut gc = self.gc.write().await; + let entry = gc.entry(*id).and_modify(|e| *e = Instant::now()); matches!(entry, Entry::Occupied(_)) } /// Starts a new Beaver triple generation protocol. pub async fn generate( - &mut self, + &self, participants: &Participants, timeout: u64, ) -> Result<(), InitializationError> { let id = rand::random(); + let id_collision = { + let tasks = self.tasks.read().await; + tasks.generators.contains_key(&id) + || self.contains(&id).await + || self.gc.read().await.contains_key(&id) + }; // Check if the `id` is already in the system. Error out and have the next cycle try again. - if self.generators.contains_key(&id) - || self.contains(&id).await - || self.gc.contains_key(&id) - { + if id_collision { tracing::warn!(id, "triple id collision"); return Err(InitializationError::BadParameters(format!( "id collision: triple_id={id}" @@ -319,17 +549,15 @@ impl TripleManager { tracing::debug!(id, "starting protocol to generate a new triple"); let participants: Vec<_> = participants.keys().cloned().collect(); - let protocol: TripleProtocol = Box::new(cait_sith::triples::generate_triple::( - &participants, - self.me, - self.threshold, - )?); - self.generators.insert( - id, - TripleGenerator::new(id, participants, protocol, timeout), - ); - self.queued.push_back(id); - self.introduced.insert(id); + { + let mut tasks = self.tasks.write().await; + tasks.generators.insert( + id, + TripleGenerator::new(self.me, self.threshold, id, participants, timeout)?, + ); + tasks.queued.push_back(id); + tasks.introduced.insert(id); + } crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS .with_label_values(&[self.my_account_id.as_str()]) .inc(); @@ -339,7 +567,7 @@ impl TripleManager { /// Stockpile triples if the amount of unspent triples is below the minimum /// and the maximum number of all ongoing generation protocols is below the maximum. pub async fn stockpile( - &mut self, + &self, participants: &Participants, cfg: &ProtocolConfig, ) -> Result<(), InitializationError> { @@ -350,10 +578,11 @@ impl TripleManager { if self.len_potential().await >= cfg.triple.max_triples as usize { false } else { + let tasks = self.tasks.read().await; // We will always try to generate a new triple if we have less than the minimum self.len_mine().await < cfg.triple.min_triples as usize - && self.introduced.len() < cfg.max_concurrent_introduction as usize - && self.generators.len() < cfg.max_concurrent_generation as usize + && tasks.introduced.len() < cfg.max_concurrent_introduction as usize + && tasks.generators.len() < cfg.max_concurrent_generation as usize } }; @@ -371,44 +600,25 @@ impl TripleManager { /// 3) Has never been seen by the manager in which case start a new protocol and returns `Some(protocol)` // TODO: What if the triple completed generation and is already spent? pub async fn get_or_start_generation( - &mut self, + &self, id: TripleId, participants: &Participants, cfg: &ProtocolConfig, - ) -> Result, CryptographicError> { - if self.contains(&id).await || self.gc.contains_key(&id) { + ) -> Result, CryptographicError> { + if self.contains(&id).await || self.gc.read().await.contains_key(&id) { Ok(None) } else { let potential_len = self.len_potential().await; - match self.generators.entry(id) { - Entry::Vacant(e) => { - if potential_len >= cfg.triple.max_triples as usize { - // We are at the maximum amount of triples, we cannot generate more. So just in case a node - // sends more triple generation requests, reject them and have them tiemout. - return Ok(None); - } - - tracing::info!(id, "joining protocol to generate a new triple"); - let participants = participants.keys_vec(); - let protocol = Box::new(cait_sith::triples::generate_triple::( - &participants, - self.me, - self.threshold, - )?); - let generator = e.insert(TripleGenerator::new( - id, - participants, - protocol, - cfg.triple.generation_timeout, - )); - self.queued.push_back(id); - crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS - .with_label_values(&[self.my_account_id.as_str()]) - .inc(); - Ok(Some(&mut generator.protocol)) - } - Entry::Occupied(e) => Ok(Some(&mut e.into_mut().protocol)), - } + let mut tasks = self.tasks.write().await; + tasks.entry( + self.me, + self.threshold, + id, + potential_len, + cfg, + participants, + &self.my_account_id, + ) } } @@ -416,145 +626,26 @@ impl TripleManager { /// messages to be sent to the respective participant. /// /// An empty vector means we cannot progress until we receive a new message. - pub async fn poke(&mut self, cfg: &ProtocolConfig) -> Vec<(Participant, TripleMessage)> { - // Add more protocols to the ongoing pool if there is space. - let to_generate_len = cfg.max_concurrent_generation as usize - self.ongoing.len(); - if !self.queued.is_empty() && to_generate_len > 0 { - for _ in 0..to_generate_len { - self.queued.pop_front().map(|id| self.ongoing.insert(id)); - } - } - - let mut messages = Vec::new(); - let mut errors = Vec::new(); - let mut triples = Vec::new(); - self.generators.retain(|id, generator| { - if !self.ongoing.contains(id) { - // If the protocol is not ongoing, we should retain it for the next time - // it is in the ongoing pool. - return true; - } - - loop { - let action = match generator.poke() { - Ok(action) => action, - Err(e) => { - errors.push(e); - crate::metrics::TRIPLE_GENERATOR_FAILURES - .with_label_values(&[self.my_account_id.as_str()]) - .inc(); - self.gc.insert(*id, Instant::now()); - self.ongoing.remove(id); - self.introduced.remove(id); - tracing::warn!( - elapsed = ?generator.timestamp.unwrap().elapsed(), - "added {id} to failed triples" - ); - break false; - } - }; - - match action { - Action::Wait => { - tracing::debug!("triple: waiting"); - // Retain protocol until we are finished - break true; - } - Action::SendMany(data) => { - for p in &generator.participants { - messages.push(( - *p, - TripleMessage { - id: *id, - epoch: self.epoch, - from: self.me, - data: data.clone(), - timestamp: Utc::now().timestamp() as u64, - }, - )) - } - } - Action::SendPrivate(p, data) => messages.push(( - p, - TripleMessage { - id: *id, - epoch: self.epoch, - from: self.me, - data, - timestamp: Utc::now().timestamp() as u64, - }, - )), - Action::Return(output) => { - tracing::info!( - id, - me = ?self.me, - elapsed = ?generator.timestamp.unwrap().elapsed(), - big_a = ?output.1.big_a.to_base58(), - big_b = ?output.1.big_b.to_base58(), - big_c = ?output.1.big_c.to_base58(), - "completed triple generation" - ); - - if let Some(start_time) = generator.timestamp { - crate::metrics::TRIPLE_LATENCY - .with_label_values(&[self.my_account_id.as_str()]) - .observe(start_time.elapsed().as_secs_f64()); - } - - crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS_SUCCESS - .with_label_values(&[self.my_account_id.as_str()]) - .inc(); - - let triple = Triple { - id: *id, - share: output.0, - public: output.1, - }; - - // After creation the triple is assigned to a random node, which is NOT necessarily the one that initiated it's creation - let triple_is_mine = { - // This is an entirely unpredictable value to all participants because it's a combination of big_c_i - // It is the same value across all participants - let big_c = triple.public.big_c; - - // We turn this into a u64 in a way not biased to the structure of the byte serialisation so we hash it - // We use Highway Hash because the DefaultHasher doesn't guarantee a consistent output across versions - let entropy = - HighwayHasher::default().hash64(&big_c.to_bytes()) as usize; - - let num_participants = generator.participants.len(); - // This has a *tiny* bias towards lower indexed participants, they're up to (1 + num_participants / u64::MAX)^2 times more likely to be selected - // This is acceptably small that it will likely never result in a biased selection happening - let triple_owner = generator.participants[entropy % num_participants]; - - triple_owner == self.me - }; - - triples.push((triple, triple_is_mine)); - if triple_is_mine { - crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATIONS_MINE_SUCCESS - .with_label_values(&[self.my_account_id.as_str()]) - .inc(); - } + pub async fn poke(&self, cfg: &ProtocolConfig) -> Vec<(Participant, TripleMessage)> { + let (triples, messages, errors) = { + let mut tasks = self.tasks.write().await; + tasks + .poke(self.me, &self.my_account_id, self.epoch, cfg) + .await + }; - // Protocol done, remove it from the ongoing pool. - self.ongoing.remove(id); - self.introduced.remove(id); - // Do not retain the protocol - break false; - } - } + { + let mut gc = self.gc.write().await; + for (id, err) in errors.into_iter() { + tracing::warn!(id, ?err, "failed to generate triple"); + gc.insert(id, Instant::now()); } - }); + } for (triple, mine) in triples { self.insert(triple, mine).await; } - if !errors.is_empty() { - tracing::warn!(?errors, "faled to generate some triples"); - } - messages } } diff --git a/chain-signatures/node/src/types.rs b/chain-signatures/node/src/types.rs index f190c34a..3a8b4ef2 100644 --- a/chain-signatures/node/src/types.rs +++ b/chain-signatures/node/src/types.rs @@ -12,7 +12,7 @@ use crate::protocol::contract::ResharingContractState; pub type SecretKeyShare = ::Scalar; pub type TripleProtocol = - Box> + Send + Sync>; + RwLock> + Send + Sync>; pub type PresignatureProtocol = Box> + Send + Sync>; pub type SignatureProtocol = Box> + Send + Sync>; diff --git a/chain-signatures/node/src/web/mod.rs b/chain-signatures/node/src/web/mod.rs index 63326751..d624ffc8 100644 --- a/chain-signatures/node/src/web/mod.rs +++ b/chain-signatures/node/src/web/mod.rs @@ -137,10 +137,9 @@ async fn state(Extension(state): Extension>) -> Result { - let triple_manager_read = state.triple_manager.read().await; - let triple_potential_count = triple_manager_read.len_potential().await; - let triple_count = triple_manager_read.len_generated().await; - let triple_mine_count = triple_manager_read.len_mine().await; + let triple_potential_count = state.triple_manager.len_potential().await; + let triple_count = state.triple_manager.len_generated().await; + let triple_mine_count = state.triple_manager.len_mine().await; let presignature_read = state.presignature_manager.read().await; let presignature_count = presignature_read.len_generated().await; let presignature_mine_count = presignature_read.len_mine().await; diff --git a/integration-tests/chain-signatures/tests/cases/mod.rs b/integration-tests/chain-signatures/tests/cases/mod.rs index f91f197c..afe8733f 100644 --- a/integration-tests/chain-signatures/tests/cases/mod.rs +++ b/integration-tests/chain-signatures/tests/cases/mod.rs @@ -22,7 +22,6 @@ use mpc_node::util::NearPublicKeyExt as _; use near_account_id::AccountId; use test_log::test; use url::Url; - pub mod nightly; #[test(tokio::test)] @@ -169,7 +168,7 @@ async fn test_triple_persistence() -> anyhow::Result<()> { let triple_storage = storage::triple_storage::init(&redis_pool, &AccountId::from_str("test.near").unwrap()); - let mut triple_manager = TripleManager::new( + let triple_manager = TripleManager::new( Participant::from(0), 5, 123, From 9123d0452dec047225c573962eee258904e57370 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Thu, 12 Dec 2024 23:36:43 +0000 Subject: [PATCH 22/42] Made each generator a task --- .../node/src/protocol/cryptography.rs | 87 ++-- chain-signatures/node/src/protocol/triple.rs | 377 ++++++++++++------ 2 files changed, 304 insertions(+), 160 deletions(-) diff --git a/chain-signatures/node/src/protocol/cryptography.rs b/chain-signatures/node/src/protocol/cryptography.rs index 28b7bb47..01e57a5e 100644 --- a/chain-signatures/node/src/protocol/cryptography.rs +++ b/chain-signatures/node/src/protocol/cryptography.rs @@ -470,54 +470,57 @@ impl CryptographicProtocol for RunningState { // crate::metrics::SIGN_QUEUE_SIZE // .with_label_values(&[my_account_id.as_str()]) // .set(sign_queue.len() as i64); - let presignature_manager = self.presignature_manager.clone(); - let signature_manager = self.signature_manager.clone(); - let messages = self.messages.clone(); - let protocol_cfg = cfg.protocol.clone(); - let sign_queue = self.sign_queue.clone(); let me = ctx.me().await; - let rpc_client = ctx.rpc_client().clone(); - let signer = ctx.signer().clone(); - let mpc_contract_id = ctx.mpc_contract_id().clone(); - let sig_task = tokio::task::spawn(async move { + let sig_task = tokio::task::spawn({ + let presignature_manager = self.presignature_manager.clone(); + let signature_manager = self.signature_manager.clone(); + let messages = self.messages.clone(); + let protocol_cfg = cfg.protocol.clone(); + let sign_queue = self.sign_queue.clone(); + let rpc_client = ctx.rpc_client().clone(); + let signer = ctx.signer().clone(); + let mpc_contract_id = ctx.mpc_contract_id().clone(); let participant_map = participant_map.clone(); - tracing::debug!(?stable, "stable participants"); - let mut sign_queue = sign_queue.write().await; - // crate::metrics::SIGN_QUEUE_SIZE - // .with_label_values(&[my_account_id.as_str()]) - // .set(sign_queue.len() as i64); - sign_queue.organize(self.threshold, &stable, me, &my_account_id); + tokio::task::unconstrained(async move { + tracing::debug!(?stable, "stable participants"); - let my_requests = sign_queue.my_requests(me); - // crate::metrics::SIGN_QUEUE_MINE_SIZE - // .with_label_values(&[my_account_id.as_str()]) - // .set(my_requests.len() as i64); + let mut sign_queue = sign_queue.write().await; + // crate::metrics::SIGN_QUEUE_SIZE + // .with_label_values(&[my_account_id.as_str()]) + // .set(sign_queue.len() as i64); + sign_queue.organize(self.threshold, &stable, me, &my_account_id); - let mut presignature_manager = presignature_manager.write().await; - let mut signature_manager = signature_manager.write().await; - signature_manager - .handle_requests( - self.threshold, - &stable, - my_requests, - &mut presignature_manager, - &protocol_cfg, - ) - .await; - drop(presignature_manager); + let my_requests = sign_queue.my_requests(me); + // crate::metrics::SIGN_QUEUE_MINE_SIZE + // .with_label_values(&[my_account_id.as_str()]) + // .set(my_requests.len() as i64); - let mut messages = messages.write().await; - for (p, msg) in signature_manager.poke() { - messages.push( - participant_map.get(&p).unwrap().clone(), - MpcMessage::Signature(msg), - ); - } - drop(messages); - signature_manager - .publish(&rpc_client, &signer, &mpc_contract_id) - .await; + let mut presignature_manager = presignature_manager.write().await; + let mut signature_manager = signature_manager.write().await; + signature_manager + .handle_requests( + self.threshold, + &stable, + my_requests, + &mut presignature_manager, + &protocol_cfg, + ) + .await; + drop(presignature_manager); + + let mut messages = messages.write().await; + for (p, msg) in signature_manager.poke() { + messages.push( + participant_map.get(&p).unwrap().clone(), + MpcMessage::Signature(msg), + ); + } + drop(messages); + signature_manager + .publish(&rpc_client, &signer, &mpc_contract_id) + .await; + }) }); match tokio::try_join!(triple_task, presig_task, sig_task) { diff --git a/chain-signatures/node/src/protocol/triple.rs b/chain-signatures/node/src/protocol/triple.rs index 9a6a2f54..4cc84c20 100644 --- a/chain-signatures/node/src/protocol/triple.rs +++ b/chain-signatures/node/src/protocol/triple.rs @@ -46,6 +46,11 @@ pub struct TripleGenerator { pub timeout: Duration, } +type PokeTaskOutcome = ( + TripleId, + Result<(Vec<(Participant, TripleMessage)>, Option<(Triple, bool)>), ProtocolError>, +); + impl TripleGenerator { pub fn new( me: Participant, @@ -90,9 +95,138 @@ impl TripleGenerator { let mut protocol = self.protocol.write().await; protocol.message(from, data); } + + pub fn spawn_poke_task( + &self, + me: Participant, + epoch: u64, + ) -> tokio::task::JoinHandle { + tokio::task::spawn({ + let mut generator = self.clone(); + async move { generator.poke_handle(me, epoch).await } + }) + } + + pub async fn poke_handle(&mut self, me: Participant, epoch: u64) -> PokeTaskOutcome { + let mut messages = Vec::new(); + loop { + let action = match self.poke().await { + Ok(action) => action, + Err(e) => { + // errors.insert(*id, e); + // crate::metrics::TRIPLE_GENERATOR_FAILURES + // .with_label_values(&[my_account_id.as_str()]) + // .inc(); + // self.ongoing.remove(id); + // self.introduced.remove(id); + // to_remove.push(*id); + // tracing::warn!( + // elapsed = ?generator.timestamp.unwrap().elapsed(), + // "added {id} to failed triples" + // ); + break (self.id, Err(e)); + } + }; + + match action { + Action::Wait => { + tracing::debug!("triple: waiting"); + // Retain protocol until we are finished + break (self.id, Ok((messages, None))); + } + Action::SendMany(data) => { + for p in &self.participants { + messages.push(( + *p, + TripleMessage { + id: self.id, + epoch, + from: me, + data: data.clone(), + timestamp: Utc::now().timestamp() as u64, + }, + )) + } + } + Action::SendPrivate(p, data) => messages.push(( + p, + TripleMessage { + id: self.id, + epoch, + from: me, + data, + timestamp: Utc::now().timestamp() as u64, + }, + )), + Action::Return(output) => { + // elapsed = ?generator.timestamp.unwrap().elapsed(), + tracing::info!( + id = self.id, + ?me, + big_a = ?output.1.big_a.to_base58(), + big_b = ?output.1.big_b.to_base58(), + big_c = ?output.1.big_c.to_base58(), + "completed triple generation" + ); + + // if let Some(start_time) = generator.timestamp { + // crate::metrics::TRIPLE_LATENCY + // .with_label_values(&[my_account_id.as_str()]) + // .observe(start_time.elapsed().as_secs_f64()); + // } + + // crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS_SUCCESS + // .with_label_values(&[my_account_id.as_str()]) + // .inc(); + + let triple = Triple { + id: self.id, + share: output.0, + public: output.1, + }; + + // After creation the triple is assigned to a random node, which is NOT necessarily the one that initiated it's creation + let triple_is_mine = { + // This is an entirely unpredictable value to all participants because it's a combination of big_c_i + // It is the same value across all participants + let big_c = triple.public.big_c; + + // We turn this into a u64 in a way not biased to the structure of the byte serialisation so we hash it + // We use Highway Hash because the DefaultHasher doesn't guarantee a consistent output across versions + let entropy = HighwayHasher::default().hash64(&big_c.to_bytes()) as usize; + + let num_participants = self.participants.len(); + // This has a *tiny* bias towards lower indexed participants, they're up to (1 + num_participants / u64::MAX)^2 times more likely to be selected + // This is acceptably small that it will likely never result in a biased selection happening + let triple_owner = self.participants[entropy % num_participants]; + + triple_owner == me + }; + + // triples.push((triple, triple_is_mine)); + // if triple_is_mine { + // crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATIONS_MINE_SUCCESS + // .with_label_values(&[my_account_id.as_str()]) + // .inc(); + // } + + // // Protocol done, remove it from the ongoing pool. + // self.ongoing.remove(id); + // self.introduced.remove(id); + // // Do not retain the protocol + // to_remove.push(*id); + break (self.id, Ok((messages, Some((triple, triple_is_mine))))); + } + } + } + } } pub struct TripleTasks { + /// The maximum amount of time the whole of the triple tasks can take before yielding + /// back to the main loop. + budget: Duration, + /// The pool of triple protocols that have yet to be completed. pub generators: HashMap, @@ -104,6 +238,8 @@ pub struct TripleTasks { /// they are completed or timed out. pub ongoing: HashSet, + pub ongoing_tasks: VecDeque<(TripleId, tokio::task::JoinHandle)>, + /// The set of triples that were introduced to the system by the current node. pub introduced: HashSet, } @@ -120,11 +256,14 @@ impl std::fmt::Debug for TripleTasks { } impl TripleTasks { - pub fn new() -> Self { + pub fn new(budget: Duration) -> Self { + // pub fn new() -> Self { Self { + budget, generators: HashMap::new(), queued: VecDeque::new(), ongoing: HashSet::new(), + ongoing_tasks: VecDeque::new(), introduced: HashSet::new(), } } @@ -181,137 +320,114 @@ impl TripleTasks { let to_generate_len = cfg.max_concurrent_generation as usize - self.ongoing.len(); if !self.queued.is_empty() && to_generate_len > 0 { for _ in 0..to_generate_len { - self.queued.pop_front().map(|id| self.ongoing.insert(id)); + // self.queued.pop_front().map(|id| self.ongoing.insert(id)); + if let Some(id) = self.queued.pop_front() { + tracing::info!( + id, + len = self.ongoing_tasks.len(), + "spawning triple generation protocol" + ); + self.ongoing.insert(id); + let generator = self.generators.get(&id).unwrap(); + self.ongoing_tasks + .push_back((id, generator.spawn_poke_task(me, epoch))); + } } } + // spawn these tasks again if they already completed with Action::Wait: + for id in &self.ongoing { + if !self + .ongoing_tasks + .iter() + .any(|(running_id, _)| running_id == id) + { + let generator = self.generators.get(&id).unwrap(); + self.ongoing_tasks + .push_back((*id, generator.spawn_poke_task(me, epoch))); + } + } + + // self.ongoing_tasks.extend( + // self.generators + // .iter() + // .filter(|(id, _)| self.ongoing.contains(id) && self.ongoing_tasks) + // .map(|(id, generator)| (*id, generator.spawn_poke_task(me, epoch))), + // ); + let mut triples = Vec::new(); let mut messages = Vec::new(); let mut errors = HashMap::new(); - let mut to_remove = Vec::new(); - for (id, generator) in &mut self.generators { - if !self.ongoing.contains(id) { - continue; - } - - loop { - let action = match generator.poke().await { - Ok(action) => action, - Err(e) => { - errors.insert(*id, e); - crate::metrics::TRIPLE_GENERATOR_FAILURES - .with_label_values(&[my_account_id.as_str()]) - .inc(); - self.ongoing.remove(id); - self.introduced.remove(id); - to_remove.push(*id); - // tracing::warn!( - // elapsed = ?generator.timestamp.unwrap().elapsed(), - // "added {id} to failed triples" - // ); - break; - } - }; - - match action { - Action::Wait => { - tracing::debug!("triple: waiting"); - // Retain protocol until we are finished - break; - } - Action::SendMany(data) => { - for p in &generator.participants { - messages.push(( - *p, - TripleMessage { - id: *id, - epoch, - from: me, - data: data.clone(), - timestamp: Utc::now().timestamp() as u64, - }, - )) + // let mut interval = tokio::time::interval(Duration::from_millis(5)); + let started = Instant::now(); + while let Some((id, task)) = self.ongoing_tasks.pop_front() { + if task.is_finished() { + match task.await { + Ok((id, result)) => match result { + Ok((mut msgs, triple)) => { + if let Some((triple, mine)) = triple { + tracing::info!(id, "triple completed with triple"); + self.generators.remove(&id); + self.ongoing.remove(&id); + self.introduced.remove(&id); + triples.push((triple, mine)); + } + messages.append(&mut msgs); } - } - Action::SendPrivate(p, data) => messages.push(( - p, - TripleMessage { - id: *id, - epoch, - from: me, - data, - timestamp: Utc::now().timestamp() as u64, - }, - )), - Action::Return(output) => { - // elapsed = ?generator.timestamp.unwrap().elapsed(), - tracing::info!( - id, - ?me, - big_a = ?output.1.big_a.to_base58(), - big_b = ?output.1.big_b.to_base58(), - big_c = ?output.1.big_c.to_base58(), - "completed triple generation" - ); - - // if let Some(start_time) = generator.timestamp { - // crate::metrics::TRIPLE_LATENCY - // .with_label_values(&[my_account_id.as_str()]) - // .observe(start_time.elapsed().as_secs_f64()); - // } - - crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS_SUCCESS - .with_label_values(&[my_account_id.as_str()]) - .inc(); - - let triple = Triple { - id: *id, - share: output.0, - public: output.1, - }; - - // After creation the triple is assigned to a random node, which is NOT necessarily the one that initiated it's creation - let triple_is_mine = { - // This is an entirely unpredictable value to all participants because it's a combination of big_c_i - // It is the same value across all participants - let big_c = triple.public.big_c; - - // We turn this into a u64 in a way not biased to the structure of the byte serialisation so we hash it - // We use Highway Hash because the DefaultHasher doesn't guarantee a consistent output across versions - let entropy = - HighwayHasher::default().hash64(&big_c.to_bytes()) as usize; - - let num_participants = generator.participants.len(); - // This has a *tiny* bias towards lower indexed participants, they're up to (1 + num_participants / u64::MAX)^2 times more likely to be selected - // This is acceptably small that it will likely never result in a biased selection happening - let triple_owner = generator.participants[entropy % num_participants]; - - triple_owner == me - }; - - triples.push((triple, triple_is_mine)); - if triple_is_mine { - crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATIONS_MINE_SUCCESS - .with_label_values(&[my_account_id.as_str()]) - .inc(); + Err(e) => { + tracing::info!(id, ?e, "triple completed with error"); + self.generators.remove(&id); + self.ongoing.remove(&id); + self.introduced.remove(&id); + errors.insert(id, e); } - - // Protocol done, remove it from the ongoing pool. - self.ongoing.remove(id); - self.introduced.remove(id); - // Do not retain the protocol - to_remove.push(*id); - break; + }, + Err(e) => { + tracing::info!(id, ?e, "triple completed with error=join"); + self.generators.remove(&id); + self.ongoing.remove(&id); + self.introduced.remove(&id); + errors.insert(id, ProtocolError::Other(e.into())); } } + } else { + self.ongoing_tasks.push_back((id, task)); } - } - for id in to_remove { - self.generators.remove(&id); + // interval.tick().await; + tokio::time::sleep(Duration::from_millis(5)).await; + if started.elapsed() > self.budget { + break; + } } + // let instant = Instant::now(); + // let check = async { + + // }; + // tokio::pin!(check); + + // tokio::select! { + // _ = tokio::time::sleep(self.budget) => { + // tracing::debug!("triple generation budget exhausted"); + // } + // results = &mut check => { + + // } + // } + // 'generators: + // for (id, generator) in &mut self.generators { + // if !self.ongoing.contains(id) { + // continue; + // } + + // // if instant.elapsed() > self.budget { + // // break 'generators; + // // } + // generator.spawn_poke_task(me, epoch); + // } + (triples, messages, errors) } } @@ -323,8 +439,10 @@ pub struct TripleManager { /// Triple Storage pub triple_storage: TripleStorage, + /// The set of ongoing triple generation protocols. pub tasks: Arc>, + // poke_task: Arc>>>, /// The set of triple ids that were already taken or failed. This will be maintained for at most /// triple timeout period just so messages are cycled through the system. pub gc: Arc>>, @@ -357,7 +475,8 @@ impl TripleManager { storage: &TripleStorage, ) -> Self { Self { - tasks: Arc::new(RwLock::new(TripleTasks::new())), + tasks: Arc::new(RwLock::new(TripleTasks::new(Duration::from_millis(100)))), + // poke_task: Arc::new(RwLock::new(None)), gc: Arc::new(RwLock::new(HashMap::new())), me, threshold, @@ -627,12 +746,34 @@ impl TripleManager { /// /// An empty vector means we cannot progress until we receive a new message. pub async fn poke(&self, cfg: &ProtocolConfig) -> Vec<(Participant, TripleMessage)> { + // let poke = tokio::task::spawn({ + // let tasks = self.tasks.clone(); + // let me = self.me.clone(); + // let my_account_id = self.my_account_id.clone(); + // let epoch = self.epoch.clone(); + // let cfg = cfg.clone(); + // async move { + // let mut tasks = tasks.write().await; + // tasks.poke(me, &my_account_id, epoch, &cfg).await + // } + // }); + let (triples, messages, errors) = { let mut tasks = self.tasks.write().await; tasks .poke(self.me, &self.my_account_id, self.epoch, cfg) .await }; + // let poke = tasks.poke(self.me, &self.my_account_id, self.epoch, cfg); + + // let (triples, messages, errors) = poke.await; + // match poke.await { + // Ok(result) => result, + // Err(e) => { + // tracing::error!(?e, "failed to poke triple generation protocols"); + // return Vec::new(); + // } + // }; { let mut gc = self.gc.write().await; From 7e40cd29041396eaa8047705cafb08e549742efc Mon Sep 17 00:00:00 2001 From: Phuong N Date: Fri, 13 Dec 2024 05:08:28 +0000 Subject: [PATCH 23/42] Cleaned up triple generation task --- .../node/src/protocol/cryptography.rs | 34 +-- chain-signatures/node/src/protocol/message.rs | 39 +-- chain-signatures/node/src/protocol/triple.rs | 286 ++++++++---------- 3 files changed, 162 insertions(+), 197 deletions(-) diff --git a/chain-signatures/node/src/protocol/cryptography.rs b/chain-signatures/node/src/protocol/cryptography.rs index 01e57a5e..b2bf3757 100644 --- a/chain-signatures/node/src/protocol/cryptography.rs +++ b/chain-signatures/node/src/protocol/cryptography.rs @@ -383,9 +383,6 @@ impl CryptographicProtocol for RunningState { let triple_task = tokio::task::spawn(async move { let participant_map = triple_par; let my_account_id = triple_manager.my_account_id.clone(); - // crate::metrics::MESSAGE_QUEUE_SIZE - // .with_label_values(&[my_account_id.as_str()]) - // .set(messages.len() as i64); if let Err(err) = triple_manager.stockpile(&active, &protocol_cfg).await { tracing::warn!(?err, "running: failed to stockpile triples"); } @@ -396,6 +393,9 @@ impl CryptographicProtocol for RunningState { MpcMessage::Triple(msg), ); } + crate::metrics::MESSAGE_QUEUE_SIZE + .with_label_values(&[my_account_id.as_str()]) + .set(messages.len() as i64); drop(messages); crate::metrics::NUM_TRIPLES_MINE @@ -404,12 +404,12 @@ impl CryptographicProtocol for RunningState { crate::metrics::NUM_TRIPLES_TOTAL .with_label_values(&[my_account_id.as_str()]) .set(triple_manager.len_generated().await as i64); - // crate::metrics::NUM_TRIPLE_GENERATORS_INTRODUCED - // .with_label_values(&[my_account_id.as_str()]) - // .set(triple_manager.introduced.len() as i64); - // crate::metrics::NUM_TRIPLE_GENERATORS_TOTAL - // .with_label_values(&[my_account_id.as_str()]) - // .set(triple_manager.ongoing.len() as i64); + crate::metrics::NUM_TRIPLE_GENERATORS_INTRODUCED + .with_label_values(&[my_account_id.as_str()]) + .set(triple_manager.len_introduced().await as i64); + crate::metrics::NUM_TRIPLE_GENERATORS_TOTAL + .with_label_values(&[my_account_id.as_str()]) + .set(triple_manager.len_ongoing().await as i64); }); let messages = self.messages.clone(); @@ -466,10 +466,6 @@ impl CryptographicProtocol for RunningState { let stable = mesh_state.stable_participants.clone(); tracing::debug!(?stable, "stable participants"); - // let mut sign_queue = self.sign_queue.write().await; - // crate::metrics::SIGN_QUEUE_SIZE - // .with_label_values(&[my_account_id.as_str()]) - // .set(sign_queue.len() as i64); let me = ctx.me().await; let sig_task = tokio::task::spawn({ let presignature_manager = self.presignature_manager.clone(); @@ -486,15 +482,15 @@ impl CryptographicProtocol for RunningState { tracing::debug!(?stable, "stable participants"); let mut sign_queue = sign_queue.write().await; - // crate::metrics::SIGN_QUEUE_SIZE - // .with_label_values(&[my_account_id.as_str()]) - // .set(sign_queue.len() as i64); + crate::metrics::SIGN_QUEUE_SIZE + .with_label_values(&[my_account_id.as_str()]) + .set(sign_queue.len() as i64); sign_queue.organize(self.threshold, &stable, me, &my_account_id); let my_requests = sign_queue.my_requests(me); - // crate::metrics::SIGN_QUEUE_MINE_SIZE - // .with_label_values(&[my_account_id.as_str()]) - // .set(my_requests.len() as i64); + crate::metrics::SIGN_QUEUE_MINE_SIZE + .with_label_values(&[my_account_id.as_str()]) + .set(my_requests.len() as i64); let mut presignature_manager = presignature_manager.write().await; let mut signature_manager = signature_manager.write().await; diff --git a/chain-signatures/node/src/protocol/message.rs b/chain-signatures/node/src/protocol/message.rs index 8ac6bacb..ad5f19d6 100644 --- a/chain-signatures/node/src/protocol/message.rs +++ b/chain-signatures/node/src/protocol/message.rs @@ -248,27 +248,28 @@ impl MessageHandler for RunningState { // remove the triple_id that has already failed or taken from the triple_bins // and refresh the timestamp of failed and taken - let triple_messages = queue.triple_bins.entry(self.epoch).or_default(); - // triple_messages.retain(|id, queue| { - // if queue.is_empty() - // || queue.iter().any(|msg| { - // util::is_elapsed_longer_than_timeout( - // msg.timestamp, - // protocol_cfg.triple.generation_timeout, - // ) - // }) - // { - // return false; - // } - - // // if triple id is in GC, remove these messages because the triple is currently - // // being GC'ed, where this particular triple has previously failed or been utilized. - // !triple_manager.refresh_gc(id) - // }); - for (id, queue) in triple_messages { + let triple_messages = queue.triple_bins.remove(&self.epoch).unwrap_or_default(); + for (id, mut queue) in triple_messages { + if queue.is_empty() + || queue.iter().any(|msg| { + util::is_elapsed_longer_than_timeout( + msg.timestamp, + protocol_cfg.triple.generation_timeout, + ) + }) + { + continue; + } + + // if triple id is in GC, remove these messages because the triple is currently + // being GC'ed, where this particular triple has previously failed or been utilized. + if self.triple_manager.refresh_gc(id).await { + continue; + } + let protocol = match self .triple_manager - .get_or_start_generation(*id, participants, protocol_cfg) + .get_or_start_generation(id, participants, protocol_cfg) .await { Ok(protocol) => protocol, diff --git a/chain-signatures/node/src/protocol/triple.rs b/chain-signatures/node/src/protocol/triple.rs index 4cc84c20..2023ab32 100644 --- a/chain-signatures/node/src/protocol/triple.rs +++ b/chain-signatures/node/src/protocol/triple.rs @@ -20,6 +20,7 @@ use std::fmt; use std::sync::Arc; use std::time::{Duration, Instant}; use tokio::sync::RwLock; +use tokio::task::JoinHandle; use near_account_id::AccountId; @@ -28,6 +29,11 @@ use near_account_id::AccountId; /// messages. pub type TripleId = u64; +type GeneratorOutcome = ( + TripleId, + Result<(Vec<(Participant, TripleMessage)>, Option<(Triple, bool)>), ProtocolError>, +); + // TODO: why do we have Clone here? Triples can not be reused. /// A completed triple. #[derive(Clone, Serialize, Deserialize, Debug)] @@ -46,11 +52,6 @@ pub struct TripleGenerator { pub timeout: Duration, } -type PokeTaskOutcome = ( - TripleId, - Result<(Vec<(Participant, TripleMessage)>, Option<(Triple, bool)>), ProtocolError>, -); - impl TripleGenerator { pub fn new( me: Participant, @@ -72,9 +73,32 @@ impl TripleGenerator { }) } - pub async fn poke( - &mut self, - ) -> Result>, ProtocolError> { + pub async fn message(&self, from: Participant, data: MessageData) { + let mut protocol = self.protocol.write().await; + protocol.message(from, data); + } + + pub async fn messages(&self, from: Participant, data: Vec) { + let mut protocol = self.protocol.write().await; + for data in data { + protocol.message(from, data); + } + } + + pub fn spawn_execution( + &self, + me: Participant, + my_account_id: &AccountId, + epoch: u64, + ) -> JoinHandle { + tokio::task::spawn({ + let mut generator = self.clone(); + let my_account_id = my_account_id.clone(); + async move { generator.execute(me, &my_account_id, epoch).await } + }) + } + + async fn poke(&mut self) -> Result>, ProtocolError> { let elapsed = { let mut timestamp = self.timestamp.write().await; let timestamp = timestamp.get_or_insert_with(Instant::now); @@ -91,39 +115,33 @@ impl TripleGenerator { protocol.poke() } - pub async fn message(&self, from: Participant, data: MessageData) { - let mut protocol = self.protocol.write().await; - protocol.message(from, data); - } - - pub fn spawn_poke_task( - &self, + async fn execute( + &mut self, me: Participant, + my_account_id: &AccountId, epoch: u64, - ) -> tokio::task::JoinHandle { - tokio::task::spawn({ - let mut generator = self.clone(); - async move { generator.poke_handle(me, epoch).await } - }) - } - - pub async fn poke_handle(&mut self, me: Participant, epoch: u64) -> PokeTaskOutcome { + ) -> GeneratorOutcome { let mut messages = Vec::new(); loop { let action = match self.poke().await { Ok(action) => action, Err(e) => { - // errors.insert(*id, e); - // crate::metrics::TRIPLE_GENERATOR_FAILURES - // .with_label_values(&[my_account_id.as_str()]) - // .inc(); - // self.ongoing.remove(id); - // self.introduced.remove(id); - // to_remove.push(*id); - // tracing::warn!( - // elapsed = ?generator.timestamp.unwrap().elapsed(), - // "added {id} to failed triples" - // ); + crate::metrics::TRIPLE_GENERATOR_FAILURES + .with_label_values(&[my_account_id.as_str()]) + .inc(); + + { + let timestamp = self.timestamp.read().await; + if let Some(start_time) = &*timestamp { + tracing::warn!( + id = self.id, + err = ?e, + elapsed = ?start_time.elapsed(), + "triple failed" + ); + } + } + break (self.id, Err(e)); } }; @@ -169,15 +187,18 @@ impl TripleGenerator { "completed triple generation" ); - // if let Some(start_time) = generator.timestamp { - // crate::metrics::TRIPLE_LATENCY - // .with_label_values(&[my_account_id.as_str()]) - // .observe(start_time.elapsed().as_secs_f64()); - // } + { + let timestamp = self.timestamp.read().await; + if let Some(start_time) = &*timestamp { + crate::metrics::TRIPLE_LATENCY + .with_label_values(&[my_account_id.as_str()]) + .observe(start_time.elapsed().as_secs_f64()); + } + } - // crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS_SUCCESS - // .with_label_values(&[my_account_id.as_str()]) - // .inc(); + crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATORS_SUCCESS + .with_label_values(&[my_account_id.as_str()]) + .inc(); let triple = Triple { id: self.id, @@ -203,18 +224,12 @@ impl TripleGenerator { triple_owner == me }; - // triples.push((triple, triple_is_mine)); - // if triple_is_mine { - // crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATIONS_MINE_SUCCESS - // .with_label_values(&[my_account_id.as_str()]) - // .inc(); - // } - - // // Protocol done, remove it from the ongoing pool. - // self.ongoing.remove(id); - // self.introduced.remove(id); - // // Do not retain the protocol - // to_remove.push(*id); + if triple_is_mine { + crate::metrics::NUM_TOTAL_HISTORICAL_TRIPLE_GENERATIONS_MINE_SUCCESS + .with_label_values(&[my_account_id.as_str()]) + .inc(); + } + break (self.id, Ok((messages, Some((triple, triple_is_mine))))); } } @@ -225,7 +240,7 @@ impl TripleGenerator { pub struct TripleTasks { /// The maximum amount of time the whole of the triple tasks can take before yielding /// back to the main loop. - budget: Duration, + protocol_budget: Duration, /// The pool of triple protocols that have yet to be completed. pub generators: HashMap, @@ -238,7 +253,8 @@ pub struct TripleTasks { /// they are completed or timed out. pub ongoing: HashSet, - pub ongoing_tasks: VecDeque<(TripleId, tokio::task::JoinHandle)>, + /// The set of ongoing triple generation tasks. + pub ongoing_tasks: VecDeque<(TripleId, JoinHandle)>, /// The set of triples that were introduced to the system by the current node. pub introduced: HashSet, @@ -257,9 +273,8 @@ impl std::fmt::Debug for TripleTasks { impl TripleTasks { pub fn new(budget: Duration) -> Self { - // pub fn new() -> Self { Self { - budget, + protocol_budget: budget, generators: HashMap::new(), queued: VecDeque::new(), ongoing: HashSet::new(), @@ -268,6 +283,12 @@ impl TripleTasks { } } + fn remove(&mut self, id: TripleId) { + self.generators.remove(&id); + self.ongoing.remove(&id); + self.introduced.remove(&id); + } + pub fn entry( &mut self, me: Participant, @@ -320,17 +341,12 @@ impl TripleTasks { let to_generate_len = cfg.max_concurrent_generation as usize - self.ongoing.len(); if !self.queued.is_empty() && to_generate_len > 0 { for _ in 0..to_generate_len { - // self.queued.pop_front().map(|id| self.ongoing.insert(id)); if let Some(id) = self.queued.pop_front() { - tracing::info!( - id, - len = self.ongoing_tasks.len(), - "spawning triple generation protocol" - ); + tracing::info!(id, "spawning triple generation task"); self.ongoing.insert(id); let generator = self.generators.get(&id).unwrap(); self.ongoing_tasks - .push_back((id, generator.spawn_poke_task(me, epoch))); + .push_back((id, generator.spawn_execution(me, my_account_id, epoch))); } } } @@ -344,90 +360,57 @@ impl TripleTasks { { let generator = self.generators.get(&id).unwrap(); self.ongoing_tasks - .push_back((*id, generator.spawn_poke_task(me, epoch))); + .push_back((*id, generator.spawn_execution(me, my_account_id, epoch))); } } - // self.ongoing_tasks.extend( - // self.generators - // .iter() - // .filter(|(id, _)| self.ongoing.contains(id) && self.ongoing_tasks) - // .map(|(id, generator)| (*id, generator.spawn_poke_task(me, epoch))), - // ); - let mut triples = Vec::new(); let mut messages = Vec::new(); let mut errors = HashMap::new(); - // let mut interval = tokio::time::interval(Duration::from_millis(5)); + let mut interval = tokio::time::interval(Duration::from_millis(5)); let started = Instant::now(); + + // Go through each running task and see if it's done. This will apply a protocol_budget which will + // yield back control to the main loop if the time is up. If it is done, remove it from the ongoing_tasks. + // If the TripleGenerator is not done after this, a new task will be spawned in the next iteration + // in the case that the TripleGenerator is waiting. while let Some((id, task)) = self.ongoing_tasks.pop_front() { - if task.is_finished() { - match task.await { - Ok((id, result)) => match result { - Ok((mut msgs, triple)) => { - if let Some((triple, mine)) = triple { - tracing::info!(id, "triple completed with triple"); - self.generators.remove(&id); - self.ongoing.remove(&id); - self.introduced.remove(&id); - triples.push((triple, mine)); - } - messages.append(&mut msgs); - } - Err(e) => { - tracing::info!(id, ?e, "triple completed with error"); - self.generators.remove(&id); - self.ongoing.remove(&id); - self.introduced.remove(&id); - errors.insert(id, e); - } - }, - Err(e) => { - tracing::info!(id, ?e, "triple completed with error=join"); - self.generators.remove(&id); - self.ongoing.remove(&id); - self.introduced.remove(&id); - errors.insert(id, ProtocolError::Other(e.into())); - } - } - } else { + interval.tick().await; + if started.elapsed() > self.protocol_budget { + self.ongoing_tasks.push_back((id, task)); + break; + } + if !task.is_finished() { self.ongoing_tasks.push_back((id, task)); + continue; } - // interval.tick().await; - tokio::time::sleep(Duration::from_millis(5)).await; - if started.elapsed() > self.budget { - break; + let outcome = match task.await { + Ok((_, result)) => result, + Err(e) => { + tracing::info!(id, ?e, "triple completed with cancellation"); + self.remove(id); + errors.insert(id, ProtocolError::Other(e.into())); + continue; + } + }; + match outcome { + Ok((mut msgs, triple)) => { + if let Some((triple, mine)) = triple { + self.remove(id); + triples.push((triple, mine)); + } + messages.append(&mut msgs); + } + Err(e) => { + tracing::info!(id, ?e, "triple completed with error"); + self.remove(id); + errors.insert(id, e); + } } } - // let instant = Instant::now(); - // let check = async { - - // }; - // tokio::pin!(check); - - // tokio::select! { - // _ = tokio::time::sleep(self.budget) => { - // tracing::debug!("triple generation budget exhausted"); - // } - // results = &mut check => { - - // } - // } - // 'generators: - // for (id, generator) in &mut self.generators { - // if !self.ongoing.contains(id) { - // continue; - // } - - // // if instant.elapsed() > self.budget { - // // break 'generators; - // // } - // generator.spawn_poke_task(me, epoch); - // } - (triples, messages, errors) } } @@ -476,7 +459,6 @@ impl TripleManager { ) -> Self { Self { tasks: Arc::new(RwLock::new(TripleTasks::new(Duration::from_millis(100)))), - // poke_task: Arc::new(RwLock::new(None)), gc: Arc::new(RwLock::new(HashMap::new())), me, threshold, @@ -610,6 +592,14 @@ impl TripleManager { self.triple_storage.len_mine().await.unwrap_or(0) } + pub async fn len_ongoing(&self) -> usize { + self.tasks.read().await.ongoing.len() + } + + pub async fn len_introduced(&self) -> usize { + self.tasks.read().await.introduced.len() + } + /// Returns if there's any unspent triple in the manager. pub async fn is_empty(&self) -> bool { self.len_generated().await == 0 @@ -638,9 +628,9 @@ impl TripleManager { /// Refresh item in the garbage collection. If it is present, return true and update internally /// the timestamp for gabage collection. - pub async fn refresh_gc(&self, id: &TripleId) -> bool { + pub async fn refresh_gc(&self, id: TripleId) -> bool { let mut gc = self.gc.write().await; - let entry = gc.entry(*id).and_modify(|e| *e = Instant::now()); + let entry = gc.entry(id).and_modify(|e| *e = Instant::now()); matches!(entry, Entry::Occupied(_)) } @@ -746,34 +736,12 @@ impl TripleManager { /// /// An empty vector means we cannot progress until we receive a new message. pub async fn poke(&self, cfg: &ProtocolConfig) -> Vec<(Participant, TripleMessage)> { - // let poke = tokio::task::spawn({ - // let tasks = self.tasks.clone(); - // let me = self.me.clone(); - // let my_account_id = self.my_account_id.clone(); - // let epoch = self.epoch.clone(); - // let cfg = cfg.clone(); - // async move { - // let mut tasks = tasks.write().await; - // tasks.poke(me, &my_account_id, epoch, &cfg).await - // } - // }); - let (triples, messages, errors) = { let mut tasks = self.tasks.write().await; tasks .poke(self.me, &self.my_account_id, self.epoch, cfg) .await }; - // let poke = tasks.poke(self.me, &self.my_account_id, self.epoch, cfg); - - // let (triples, messages, errors) = poke.await; - // match poke.await { - // Ok(result) => result, - // Err(e) => { - // tracing::error!(?e, "failed to poke triple generation protocols"); - // return Vec::new(); - // } - // }; { let mut gc = self.gc.write().await; From 46da477b7ce8c601e45b5744bd831e60e80d91b4 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Fri, 13 Dec 2024 18:18:51 +0000 Subject: [PATCH 24/42] Added wait for running/joining/resharing on per node --- integration-tests/chain-signatures/src/lib.rs | 10 +-- .../chain-signatures/tests/actions/wait.rs | 64 +++++++++++++++++++ .../chain-signatures/tests/lib.rs | 4 +- 3 files changed, 71 insertions(+), 7 deletions(-) diff --git a/integration-tests/chain-signatures/src/lib.rs b/integration-tests/chain-signatures/src/lib.rs index 138880d9..e4c1e786 100644 --- a/integration-tests/chain-signatures/src/lib.rs +++ b/integration-tests/chain-signatures/src/lib.rs @@ -106,18 +106,18 @@ impl Nodes { &mut self, cfg: &NodeConfig, new_account: &Account, - ) -> anyhow::Result<()> { + ) -> anyhow::Result { tracing::info!(id = %new_account.id(), "adding one more node"); match self { Nodes::Local { ctx, nodes } => { - nodes.push(local::Node::run(ctx, cfg, new_account).await?) + nodes.push(local::Node::run(ctx, cfg, new_account).await?); + Ok(nodes.len() - 1) } Nodes::Docker { ctx, nodes } => { - nodes.push(containers::Node::run(ctx, cfg, new_account).await?) + nodes.push(containers::Node::run(ctx, cfg, new_account).await?); + Ok(nodes.len() - 1) } } - - Ok(()) } pub async fn kill_node(&mut self, account_id: &AccountId) -> NodeEnvConfig { diff --git a/integration-tests/chain-signatures/tests/actions/wait.rs b/integration-tests/chain-signatures/tests/actions/wait.rs index dbee5e25..a5674ece 100644 --- a/integration-tests/chain-signatures/tests/actions/wait.rs +++ b/integration-tests/chain-signatures/tests/actions/wait.rs @@ -9,6 +9,14 @@ use crate::cluster::Cluster; type Epoch = u64; +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum NodeState { + Running, + Resharing, + Joining, + NotRunning, +} + enum WaitActions { Running(Epoch), MinTriples(usize), @@ -16,6 +24,7 @@ enum WaitActions { MinPresignatures(usize), MinMinePresignatures(usize), Signable(usize), + NodeState(NodeState, usize), } pub struct WaitAction<'a, R> { @@ -81,6 +90,24 @@ impl<'a, R> WaitAction<'a, R> { self } + pub fn node_running(mut self, id: usize) -> Self { + self.actions + .push(WaitActions::NodeState(NodeState::Running, id)); + self + } + + pub fn node_resharing(mut self, id: usize) -> Self { + self.actions + .push(WaitActions::NodeState(NodeState::Resharing, id)); + self + } + + pub fn node_joining(mut self, id: usize) -> Self { + self.actions + .push(WaitActions::NodeState(NodeState::Joining, id)); + self + } + async fn execute(self) -> anyhow::Result<&'a Cluster> { for action in self.actions { match action { @@ -102,6 +129,9 @@ impl<'a, R> WaitAction<'a, R> { WaitActions::Signable(count) => { require_presignatures(self.nodes, count, true).await?; } + WaitActions::NodeState(node_state, id) => { + node_ready(self.nodes, node_state, id).await?; + } } } @@ -130,6 +160,40 @@ impl<'a> IntoFuture for WaitAction<'a, RunningContractState> { } } +async fn node_ready(nodes: &Cluster, state: NodeState, id: usize) -> anyhow::Result<()> { + let is_ready = || async { + let node_state = match nodes.fetch_state(id).await? { + StateView::Running { .. } => NodeState::Running, + StateView::Resharing { .. } => NodeState::Resharing, + StateView::Joining { .. } => NodeState::Joining, + StateView::NotRunning => NodeState::NotRunning, + _ => anyhow::bail!("unexpected varian for checking node state"), + }; + + if node_state == state { + anyhow::bail!("node not ready yet {:?} != {:?}", node_state, state); + } + + Ok(state) + }; + + let strategy = ConstantBuilder::default() + .with_delay(std::time::Duration::from_secs(3)) + .with_max_times(100); + + let state = is_ready + .retry(&strategy) + .await + .context("did not reach node state in time")?; + + if matches!(state, NodeState::Joining) { + // wait a bit longer for voting to join + tokio::time::sleep(tokio::time::Duration::from_secs(3)).await; + } + + Ok(()) +} + pub async fn running_mpc( nodes: &Cluster, epoch: Option, diff --git a/integration-tests/chain-signatures/tests/lib.rs b/integration-tests/chain-signatures/tests/lib.rs index 7c5b931e..f14c5199 100644 --- a/integration-tests/chain-signatures/tests/lib.rs +++ b/integration-tests/chain-signatures/tests/lib.rs @@ -34,9 +34,9 @@ impl Cluster { } }; - self.nodes.start_node(&self.cfg, &node_account).await?; // Wait for new node to add itself as a candidate - tokio::time::sleep(tokio::time::Duration::from_secs(20)).await; + let id = self.nodes.start_node(&self.cfg, &node_account).await?; + self.wait().node_joining(id).await?; // T number of participants should vote let participants = self.participant_accounts().await?; From 31e4174a69c807611948fa0ff1ccd1bdad2a56fe Mon Sep 17 00:00:00 2001 From: Phuong Date: Fri, 13 Dec 2024 15:52:44 -0800 Subject: [PATCH 25/42] Fix mac build --- integration-tests/chain-signatures/src/containers.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/integration-tests/chain-signatures/src/containers.rs b/integration-tests/chain-signatures/src/containers.rs index a8e385d9..35410754 100644 --- a/integration-tests/chain-signatures/src/containers.rs +++ b/integration-tests/chain-signatures/src/containers.rs @@ -237,7 +237,10 @@ impl LocalStack { let s3_address = format!("http://{}:{}", address, Self::S3_CONTAINER_PORT); #[cfg(all(target_os = "macos", target_arch = "aarch64"))] let s3_host_address = { - let s3_host_port = container.get_host_port_ipv4(Self::S3_CONTAINER_PORT); + let s3_host_port = container + .get_host_port_ipv4(Self::S3_CONTAINER_PORT) + .await + .unwrap(); format!("http://127.0.0.1:{s3_host_port}") }; #[cfg(target_arch = "x86_64")] From 4cba6014cf618ebf875cc8562b01850387910266 Mon Sep 17 00:00:00 2001 From: Phuong Date: Fri, 13 Dec 2024 18:32:24 -0800 Subject: [PATCH 26/42] Added candidate check --- .../chain-signatures/tests/actions/wait.rs | 93 +++++++++++++++++-- .../chain-signatures/tests/cases/mod.rs | 2 +- .../chain-signatures/tests/lib.rs | 5 +- 3 files changed, 91 insertions(+), 9 deletions(-) diff --git a/integration-tests/chain-signatures/tests/actions/wait.rs b/integration-tests/chain-signatures/tests/actions/wait.rs index a5674ece..511791fa 100644 --- a/integration-tests/chain-signatures/tests/actions/wait.rs +++ b/integration-tests/chain-signatures/tests/actions/wait.rs @@ -4,10 +4,17 @@ use anyhow::Context; use backon::{ConstantBuilder, Retryable}; use mpc_contract::{ProtocolContractState, RunningContractState}; use mpc_node::web::StateView; +use near_sdk::AccountId; use crate::cluster::Cluster; type Epoch = u64; +type Present = bool; + +enum ContractState { + Candidate(AccountId, Present), + Participant(AccountId, Present), +} #[derive(Copy, Clone, Debug, PartialEq, Eq)] enum NodeState { @@ -25,6 +32,7 @@ enum WaitActions { MinMinePresignatures(usize), Signable(usize), NodeState(NodeState, usize), + ContractState(ContractState), } pub struct WaitAction<'a, R> { @@ -108,11 +116,47 @@ impl<'a, R> WaitAction<'a, R> { self } + pub fn candidate_present(mut self, candidate: &AccountId) -> Self { + self.actions + .push(WaitActions::ContractState(ContractState::Candidate( + candidate.clone(), + true, + ))); + self + } + + pub fn candidate_missing(mut self, candidate: &AccountId) -> Self { + self.actions + .push(WaitActions::ContractState(ContractState::Candidate( + candidate.clone(), + false, + ))); + self + } + + pub fn participant_present(mut self, participant: &AccountId) -> Self { + self.actions + .push(WaitActions::ContractState(ContractState::Participant( + participant.clone(), + true, + ))); + self + } + + pub fn participant_missing(mut self, participant: &AccountId) -> Self { + self.actions + .push(WaitActions::ContractState(ContractState::Participant( + participant.clone(), + false, + ))); + self + } + async fn execute(self) -> anyhow::Result<&'a Cluster> { for action in self.actions { match action { WaitActions::Running(epoch) => { - running_mpc(self.nodes, Some(epoch)).await?; + running_mpc(self.nodes, if epoch > 0 { Some(epoch) } else { None }).await?; } WaitActions::MinTriples(expected) => { require_triples(self.nodes, expected, false).await?; @@ -129,8 +173,11 @@ impl<'a, R> WaitAction<'a, R> { WaitActions::Signable(count) => { require_presignatures(self.nodes, count, true).await?; } - WaitActions::NodeState(node_state, id) => { - node_ready(self.nodes, node_state, id).await?; + WaitActions::NodeState(state, id) => { + node_ready(self.nodes, state, id).await?; + } + WaitActions::ContractState(state) => { + require_contract_state(self.nodes, state).await?; } } } @@ -194,6 +241,38 @@ async fn node_ready(nodes: &Cluster, state: NodeState, id: usize) -> anyhow::Res Ok(()) } +async fn require_contract_state(nodes: &Cluster, state: ContractState) -> anyhow::Result<()> { + let is_ready = || async { + let current_state = running_mpc(nodes, None).await?; + + match &state { + ContractState::Candidate(candidate, present) => { + if *present == current_state.candidates.contains_key(candidate) { + anyhow::bail!("candidate not found in contract state"); + } + } + ContractState::Participant(participant, present) => { + if *present == current_state.participants.contains_key(participant) { + anyhow::bail!("participant not found in contract state"); + } + } + } + + Ok(()) + }; + + let strategy = ConstantBuilder::default() + .with_delay(std::time::Duration::from_secs(3)) + .with_max_times(100); + + let state = is_ready + .retry(&strategy) + .await + .context("did not reach contract state in time")?; + + Ok(state) +} + pub async fn running_mpc( nodes: &Cluster, epoch: Option, @@ -213,15 +292,15 @@ pub async fn running_mpc( let strategy = ConstantBuilder::default() .with_delay(std::time::Duration::from_secs(3)) - .with_max_times(100); + .with_max_times(if epoch.is_some() { 200 } else { 100 }); is_running.retry(&strategy).await.with_context(|| { format!( "mpc did not reach {} in time", - if epoch.is_some() { - "expected epoch" + if let Some(epoch) = epoch { + format!("expected epoch={epoch}") } else { - "running state" + "running state".to_string() } ) }) diff --git a/integration-tests/chain-signatures/tests/cases/mod.rs b/integration-tests/chain-signatures/tests/cases/mod.rs index 0f42fe46..b28f41c0 100644 --- a/integration-tests/chain-signatures/tests/cases/mod.rs +++ b/integration-tests/chain-signatures/tests/cases/mod.rs @@ -389,7 +389,7 @@ async fn test_lake_congestion() -> anyhow::Result<()> { #[test(tokio::test)] async fn test_multichain_reshare_with_lake_congestion() -> anyhow::Result<()> { - let mut nodes = cluster::spawn().await?; + let mut nodes = cluster::spawn().wait_for_running().await?; // add latency to node1->rpc, but not node0->rpc add_latency(&nodes.nodes.proxy_name_for_node(1), true, 1.0, 1_000, 100).await?; diff --git a/integration-tests/chain-signatures/tests/lib.rs b/integration-tests/chain-signatures/tests/lib.rs index f14c5199..1fc8cd7d 100644 --- a/integration-tests/chain-signatures/tests/lib.rs +++ b/integration-tests/chain-signatures/tests/lib.rs @@ -36,7 +36,10 @@ impl Cluster { // Wait for new node to add itself as a candidate let id = self.nodes.start_node(&self.cfg, &node_account).await?; - self.wait().node_joining(id).await?; + self.wait() + .node_joining(id) + .candidate_present(node_account.id()) + .await?; // T number of participants should vote let participants = self.participant_accounts().await?; From 4098eb9ca3756a67e7ed99c9dcab45bfa487c95a Mon Sep 17 00:00:00 2001 From: Phuong N Date: Mon, 16 Dec 2024 19:58:10 +0000 Subject: [PATCH 27/42] clippy and better err messages --- integration-tests/chain-signatures/tests/actions/wait.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/integration-tests/chain-signatures/tests/actions/wait.rs b/integration-tests/chain-signatures/tests/actions/wait.rs index 511791fa..4157b52a 100644 --- a/integration-tests/chain-signatures/tests/actions/wait.rs +++ b/integration-tests/chain-signatures/tests/actions/wait.rs @@ -248,12 +248,12 @@ async fn require_contract_state(nodes: &Cluster, state: ContractState) -> anyhow match &state { ContractState::Candidate(candidate, present) => { if *present == current_state.candidates.contains_key(candidate) { - anyhow::bail!("candidate not found in contract state"); + anyhow::bail!("candidate invalid in contract state: expect_present={present} for {candidate:?}"); } } ContractState::Participant(participant, present) => { if *present == current_state.participants.contains_key(participant) { - anyhow::bail!("participant not found in contract state"); + anyhow::bail!("participant invalid in contract state: expect_present={present} for {participant:?}"); } } } @@ -265,12 +265,12 @@ async fn require_contract_state(nodes: &Cluster, state: ContractState) -> anyhow .with_delay(std::time::Duration::from_secs(3)) .with_max_times(100); - let state = is_ready + is_ready .retry(&strategy) .await .context("did not reach contract state in time")?; - Ok(state) + Ok(()) } pub async fn running_mpc( From 60984c4341362064d90fc5f9261b4c5ab922bf22 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Mon, 16 Dec 2024 20:52:07 +0000 Subject: [PATCH 28/42] Made http_client use participant instead of info for less clone --- chain-signatures/node/src/http_client.rs | 34 +++++---- chain-signatures/node/src/mesh/connection.rs | 35 ++++----- .../node/src/protocol/cryptography.rs | 73 ++++--------------- chain-signatures/node/src/protocol/triple.rs | 42 +++++++++++ 4 files changed, 93 insertions(+), 91 deletions(-) diff --git a/chain-signatures/node/src/http_client.rs b/chain-signatures/node/src/http_client.rs index c9975049..44bf82b7 100644 --- a/chain-signatures/node/src/http_client.rs +++ b/chain-signatures/node/src/http_client.rs @@ -1,4 +1,4 @@ -use crate::protocol::contract::primitives::{ParticipantInfo, Participants}; +use crate::protocol::contract::primitives::Participants; use crate::protocol::message::SignedMessage; use crate::protocol::MpcMessage; use cait_sith::protocol::Participant; @@ -95,7 +95,7 @@ pub async fn send_encrypted( // TODO: add in retry logic either in struct or at call site. // TODO: add check for participant list to see if the messages to be sent are still valid. pub struct MessageQueue { - deque: VecDeque<(ParticipantInfo, MpcMessage, Instant)>, + deque: VecDeque<(Participant, MpcMessage, Instant)>, seen_counts: HashSet, message_options: Options, } @@ -117,8 +117,13 @@ impl MessageQueue { self.deque.is_empty() } - pub fn push(&mut self, info: ParticipantInfo, msg: MpcMessage) { - self.deque.push_back((info, msg, Instant::now())); + pub fn push(&mut self, node: Participant, msg: MpcMessage) { + self.deque.push_back((node, msg, Instant::now())); + } + + pub fn extend(&mut self, other: impl IntoIterator) { + self.deque + .extend(other.into_iter().map(|(i, msg)| (i, msg, Instant::now()))); } pub async fn send_encrypted( @@ -126,7 +131,7 @@ impl MessageQueue { from: Participant, sign_sk: &near_crypto::SecretKey, client: &Client, - participants: &Participants, + active: &Participants, cfg: &ProtocolConfig, ) -> Vec { let mut failed = VecDeque::new(); @@ -136,21 +141,22 @@ impl MessageQueue { let outer = Instant::now(); let uncompacted = self.deque.len(); let mut encrypted = HashMap::new(); - while let Some((info, msg, instant)) = self.deque.pop_front() { + while let Some((id, msg, instant)) = self.deque.pop_front() { if instant.elapsed() > timeout(&msg, cfg) { errors.push(SendError::Timeout(format!( - "{} message has timed out: {info:?}", + "{} message has timed out for node={id:?}", msg.typename(), ))); continue; } - if !participants.contains_key(&Participant::from(info.id)) { - let counter = participant_counter.entry(info.id).or_insert(0); + let Some(info) = active.get(&id) else { + let counter = participant_counter.entry(id).or_insert(0); *counter += 1; - failed.push_back((info, msg, instant)); + failed.push_back((id, msg, instant)); continue; - } + }; + let encrypted_msg = match SignedMessage::encrypt(&msg, from, sign_sk, &info.cipher_pk) { Ok(encrypted) => encrypted, Err(err) => { @@ -159,7 +165,7 @@ impl MessageQueue { } }; let encrypted = encrypted.entry(info.id).or_insert_with(Vec::new); - encrypted.push((encrypted_msg, (info, msg, instant))); + encrypted.push((encrypted_msg, (id, msg, instant))); } let mut compacted = 0; @@ -167,7 +173,7 @@ impl MessageQueue { for partition in partition_ciphered_256kb(encrypted) { let (encrypted_partition, msgs): (Vec<_>, Vec<_>) = partition.into_iter().unzip(); // guaranteed to unwrap due to our previous loop check: - let info = participants.get(&Participant::from(id)).unwrap(); + let info = active.get(&Participant::from(id)).unwrap(); let account_id = &info.account_id; let number_of_messages = encrypted_partition.len() as f64; @@ -231,7 +237,7 @@ impl MessageQueue { /// Encrypted message with a reference to the old message. Only the ciphered portion of this /// type will be sent over the wire, while the original message is kept just in case things /// go wrong somewhere and the message needs to be requeued to be sent later. -type EncryptedMessage = (Ciphered, (ParticipantInfo, MpcMessage, Instant)); +type EncryptedMessage = (Ciphered, (Participant, MpcMessage, Instant)); fn partition_ciphered_256kb(encrypted: Vec) -> Vec> { let mut result = Vec::new(); diff --git a/chain-signatures/node/src/mesh/connection.rs b/chain-signatures/node/src/mesh/connection.rs index 7f521427..a889db53 100644 --- a/chain-signatures/node/src/mesh/connection.rs +++ b/chain-signatures/node/src/mesh/connection.rs @@ -71,18 +71,19 @@ impl Pool { } } - let connections = self.connections.read().await.clone(); // Clone connections for iteration - let mut join_set = JoinSet::new(); + let connections = { + let conn = self.connections.read().await; + conn.clone() + }; // Spawn tasks for each participant - for (participant, info) in connections.iter() { - let participant = *participant; - let info = info.clone(); - let self_clone = Arc::clone(&self); // Clone Arc for use inside tasks + let mut join_set = JoinSet::new(); + for (participant, info) in connections.into_iter() { + let pool = Arc::clone(&self); join_set.spawn(async move { - match self_clone.fetch_participant_state(&info).await { - Ok(state) => match self_clone.send_empty_msg(&participant, &info).await { + match pool.fetch_participant_state(&info).await { + Ok(state) => match pool.send_empty_msg(&participant, &info).await { Ok(()) => Ok((participant, state, info)), Err(e) => { tracing::warn!( @@ -139,19 +140,19 @@ impl Pool { } } - let connections = self.potential_connections.read().await; - - let mut join_set = JoinSet::new(); + let connections = { + let conn = self.potential_connections.read().await; + conn.clone() + }; // Spawn tasks for each participant - for (participant, info) in connections.iter() { - let participant = *participant; - let info = info.clone(); - let self_clone = Arc::clone(&self); // Clone Arc for use inside tasks + let mut join_set = JoinSet::new(); + for (participant, info) in connections.into_iter() { + let pool = Arc::clone(&self); // Clone Arc for use inside tasks join_set.spawn(async move { - match self_clone.fetch_participant_state(&info).await { - Ok(state) => match self_clone.send_empty_msg(&participant, &info).await { + match pool.fetch_participant_state(&info).await { + Ok(state) => match pool.send_empty_msg(&participant, &info).await { Ok(()) => Ok((participant, state, info)), Err(e) => { tracing::warn!( diff --git a/chain-signatures/node/src/protocol/cryptography.rs b/chain-signatures/node/src/protocol/cryptography.rs index b2bf3757..0a3f625f 100644 --- a/chain-signatures/node/src/protocol/cryptography.rs +++ b/chain-signatures/node/src/protocol/cryptography.rs @@ -121,7 +121,7 @@ impl CryptographicProtocol for GeneratingState { continue; } messages.push( - info.clone(), + Participant::from(info.id), MpcMessage::Generating(GeneratingMessage { from: ctx.me().await, data: data.clone(), @@ -131,9 +131,8 @@ impl CryptographicProtocol for GeneratingState { } Action::SendPrivate(to, data) => { tracing::debug!("generating: sending a private message to {to:?}"); - let info = self.fetch_participant(&to)?; self.messages.write().await.push( - info.clone(), + to, MpcMessage::Generating(GeneratingMessage { from: ctx.me().await, data, @@ -277,14 +276,14 @@ impl CryptographicProtocol for ResharingState { tracing::debug!("resharing: sending a message to all participants"); let me = ctx.me().await; let mut messages = self.messages.write().await; - for (p, info) in self.new_participants.iter() { + for (p, _info) in self.new_participants.iter() { if p == &me { // Skip yourself, cait-sith never sends messages to oneself continue; } messages.push( - info.clone(), + p.clone(), MpcMessage::Resharing(ResharingMessage { epoch: self.old_epoch, from: me, @@ -296,8 +295,8 @@ impl CryptographicProtocol for ResharingState { Action::SendPrivate(to, data) => { tracing::debug!("resharing: sending a private message to {to:?}"); match self.new_participants.get(&to) { - Some(info) => self.messages.write().await.push( - info.clone(), + Some(_) => self.messages.write().await.push( + to, MpcMessage::Resharing(ResharingMessage { epoch: self.old_epoch, from: ctx.me().await, @@ -370,56 +369,17 @@ impl CryptographicProtocol for RunningState { return Ok(NodeState::Running(self)); } - let participant_map = active - .iter() - .map(|(p, info)| (p.clone(), info.clone())) - .collect::>(); - - let my_account_id = self.triple_manager.my_account_id.clone(); - let protocol_cfg = cfg.protocol.clone(); - let messages = self.messages.clone(); - let triple_par = participant_map.clone(); - let triple_manager = self.triple_manager.clone(); - let triple_task = tokio::task::spawn(async move { - let participant_map = triple_par; - let my_account_id = triple_manager.my_account_id.clone(); - if let Err(err) = triple_manager.stockpile(&active, &protocol_cfg).await { - tracing::warn!(?err, "running: failed to stockpile triples"); - } - let mut messages = messages.write().await; - for (p, msg) in triple_manager.poke(&protocol_cfg).await { - messages.push( - participant_map.get(&p).unwrap().clone(), - MpcMessage::Triple(msg), - ); - } - crate::metrics::MESSAGE_QUEUE_SIZE - .with_label_values(&[my_account_id.as_str()]) - .set(messages.len() as i64); - drop(messages); - - crate::metrics::NUM_TRIPLES_MINE - .with_label_values(&[my_account_id.as_str()]) - .set(triple_manager.len_mine().await as i64); - crate::metrics::NUM_TRIPLES_TOTAL - .with_label_values(&[my_account_id.as_str()]) - .set(triple_manager.len_generated().await as i64); - crate::metrics::NUM_TRIPLE_GENERATORS_INTRODUCED - .with_label_values(&[my_account_id.as_str()]) - .set(triple_manager.len_introduced().await as i64); - crate::metrics::NUM_TRIPLE_GENERATORS_TOTAL - .with_label_values(&[my_account_id.as_str()]) - .set(triple_manager.len_ongoing().await as i64); - }); + let triple_task = + self.triple_manager + .clone() + .execute(&active, &cfg.protocol, self.messages.clone()); let messages = self.messages.clone(); let triple_manager = self.triple_manager.clone(); let presignature_manager = self.presignature_manager.clone(); - let presig_par = participant_map.clone(); let active = mesh_state.active_participants.clone(); let protocol_cfg = cfg.protocol.clone(); let presig_task = tokio::task::spawn(async move { - let participant_map = presig_par; let mut presignature_manager = presignature_manager.write().await; if let Err(err) = presignature_manager .stockpile( @@ -434,14 +394,10 @@ impl CryptographicProtocol for RunningState { tracing::warn!(?err, "running: failed to stockpile presignatures"); } let my_account_id = triple_manager.my_account_id.clone(); - drop(triple_manager); let mut messages = messages.write().await; for (p, msg) in presignature_manager.poke().await { - messages.push( - participant_map.get(&p).unwrap().clone(), - MpcMessage::Presignature(msg), - ); + messages.push(p, MpcMessage::Presignature(msg)); } drop(messages); @@ -465,6 +421,7 @@ impl CryptographicProtocol for RunningState { // then they are considered unstable and should not be a part of signature generation this round. let stable = mesh_state.stable_participants.clone(); tracing::debug!(?stable, "stable participants"); + let my_account_id = self.triple_manager.my_account_id.clone(); let me = ctx.me().await; let sig_task = tokio::task::spawn({ @@ -476,7 +433,6 @@ impl CryptographicProtocol for RunningState { let rpc_client = ctx.rpc_client().clone(); let signer = ctx.signer().clone(); let mpc_contract_id = ctx.mpc_contract_id().clone(); - let participant_map = participant_map.clone(); tokio::task::unconstrained(async move { tracing::debug!(?stable, "stable participants"); @@ -507,10 +463,7 @@ impl CryptographicProtocol for RunningState { let mut messages = messages.write().await; for (p, msg) in signature_manager.poke() { - messages.push( - participant_map.get(&p).unwrap().clone(), - MpcMessage::Signature(msg), - ); + messages.push(p, MpcMessage::Signature(msg)); } drop(messages); signature_manager diff --git a/chain-signatures/node/src/protocol/triple.rs b/chain-signatures/node/src/protocol/triple.rs index 3773d5b2..83f781e7 100644 --- a/chain-signatures/node/src/protocol/triple.rs +++ b/chain-signatures/node/src/protocol/triple.rs @@ -2,6 +2,7 @@ use super::contract::primitives::Participants; use super::cryptography::CryptographicError; use super::message::TripleMessage; use super::presignature::GenerationError; +use crate::protocol::MpcMessage; use crate::storage::triple_storage::TripleStorage; use crate::types::TripleProtocol; use crate::util::AffinePointExt; @@ -744,4 +745,45 @@ impl TripleManager { messages } + + pub fn execute( + self, + active: &Participants, + protocol_cfg: &ProtocolConfig, + messages: Arc>, + ) -> JoinHandle<()> { + let active = active.clone(); + let protocol_cfg = protocol_cfg.clone(); + + tokio::task::spawn(async move { + if let Err(err) = self.stockpile(&active, &protocol_cfg).await { + tracing::warn!(?err, "running: failed to stockpile triples"); + } + + let mut messages = messages.write().await; + messages.extend( + self.poke(&protocol_cfg) + .await + .into_iter() + .map(|(p, msg)| (p, MpcMessage::Triple(msg))), + ); + crate::metrics::MESSAGE_QUEUE_SIZE + .with_label_values(&[self.my_account_id.as_str()]) + .set(messages.len() as i64); + drop(messages); + + crate::metrics::NUM_TRIPLES_MINE + .with_label_values(&[self.my_account_id.as_str()]) + .set(self.len_mine().await as i64); + crate::metrics::NUM_TRIPLES_TOTAL + .with_label_values(&[self.my_account_id.as_str()]) + .set(self.len_generated().await as i64); + crate::metrics::NUM_TRIPLE_GENERATORS_INTRODUCED + .with_label_values(&[self.my_account_id.as_str()]) + .set(self.len_introduced().await as i64); + crate::metrics::NUM_TRIPLE_GENERATORS_TOTAL + .with_label_values(&[self.my_account_id.as_str()]) + .set(self.len_ongoing().await as i64); + }) + } } From 2a640268f37615caffecb22d3bcbeaae78177cdd Mon Sep 17 00:00:00 2001 From: Phuong N Date: Mon, 16 Dec 2024 21:02:45 +0000 Subject: [PATCH 29/42] Bump reshare test to 900secs --- integration-tests/chain-signatures/tests/actions/wait.rs | 2 +- integration-tests/chain-signatures/tests/lib.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/integration-tests/chain-signatures/tests/actions/wait.rs b/integration-tests/chain-signatures/tests/actions/wait.rs index 4157b52a..cc2ef8f5 100644 --- a/integration-tests/chain-signatures/tests/actions/wait.rs +++ b/integration-tests/chain-signatures/tests/actions/wait.rs @@ -292,7 +292,7 @@ pub async fn running_mpc( let strategy = ConstantBuilder::default() .with_delay(std::time::Duration::from_secs(3)) - .with_max_times(if epoch.is_some() { 200 } else { 100 }); + .with_max_times(if epoch.is_some() { 300 } else { 100 }); is_running.retry(&strategy).await.with_context(|| { format!( diff --git a/integration-tests/chain-signatures/tests/lib.rs b/integration-tests/chain-signatures/tests/lib.rs index 1fc8cd7d..fa2ac214 100644 --- a/integration-tests/chain-signatures/tests/lib.rs +++ b/integration-tests/chain-signatures/tests/lib.rs @@ -79,7 +79,7 @@ impl Cluster { .cloned() .collect::>(); - tracing::info!(?voting_accounts, %kick, "kicking participant"); + tracing::info!(?voting_accounts, %kick, at_epoch = state.epoch, "kicking participant"); let results = vote_leave(&voting_accounts, self.contract().id(), &kick).await; // Check if any result has failures, and return early with an error if so if results From 37451f628b95e2166788e52e8c364c305baf39fd Mon Sep 17 00:00:00 2001 From: Phuong N Date: Mon, 16 Dec 2024 21:57:37 +0000 Subject: [PATCH 30/42] Made presignature and signature have execute functions --- .../node/src/protocol/cryptography.rs | 106 ++---------------- chain-signatures/node/src/protocol/mod.rs | 4 + .../node/src/protocol/presignature.rs | 49 ++++++++ .../node/src/protocol/signature.rs | 66 +++++++++++ chain-signatures/node/src/protocol/triple.rs | 20 ++-- 5 files changed, 139 insertions(+), 106 deletions(-) diff --git a/chain-signatures/node/src/protocol/cryptography.rs b/chain-signatures/node/src/protocol/cryptography.rs index 0a3f625f..27eed432 100644 --- a/chain-signatures/node/src/protocol/cryptography.rs +++ b/chain-signatures/node/src/protocol/cryptography.rs @@ -1,10 +1,12 @@ use std::sync::PoisonError; +use super::signature::SignatureManager; use super::state::{GeneratingState, NodeState, ResharingState, RunningState}; use crate::config::Config; use crate::gcp::error::SecretStorageError; use crate::http_client::SendError; use crate::protocol::message::{GeneratingMessage, ResharingMessage}; +use crate::protocol::presignature::PresignatureManager; use crate::protocol::state::{PersistentNodeData, WaitingForConsensusState}; use crate::protocol::MeshState; use crate::protocol::MpcMessage; @@ -23,6 +25,7 @@ pub trait CryptographicCtx { fn signer(&self) -> &InMemorySigner; fn mpc_contract_id(&self) -> &AccountId; fn secret_storage(&mut self) -> &mut SecretNodeStorageBox; + fn my_account_id(&self) -> &AccountId; } #[derive(thiserror::Error, Debug)] @@ -374,103 +377,12 @@ impl CryptographicProtocol for RunningState { .clone() .execute(&active, &cfg.protocol, self.messages.clone()); - let messages = self.messages.clone(); - let triple_manager = self.triple_manager.clone(); - let presignature_manager = self.presignature_manager.clone(); - let active = mesh_state.active_participants.clone(); - let protocol_cfg = cfg.protocol.clone(); - let presig_task = tokio::task::spawn(async move { - let mut presignature_manager = presignature_manager.write().await; - if let Err(err) = presignature_manager - .stockpile( - &active, - &self.public_key, - &self.private_share, - &triple_manager, - &protocol_cfg, - ) - .await - { - tracing::warn!(?err, "running: failed to stockpile presignatures"); - } - let my_account_id = triple_manager.my_account_id.clone(); - - let mut messages = messages.write().await; - for (p, msg) in presignature_manager.poke().await { - messages.push(p, MpcMessage::Presignature(msg)); - } - drop(messages); - - crate::metrics::NUM_PRESIGNATURES_MINE - .with_label_values(&[my_account_id.as_str()]) - .set(presignature_manager.len_mine().await as i64); - crate::metrics::NUM_PRESIGNATURES_TOTAL - .with_label_values(&[my_account_id.as_str()]) - .set(presignature_manager.len_generated().await as i64); - crate::metrics::NUM_PRESIGNATURE_GENERATORS_TOTAL - .with_label_values(&[my_account_id.as_str()]) - .set( - presignature_manager.len_potential().await as i64 - - presignature_manager.len_generated().await as i64, - ); - }); - - // NOTE: signatures should only use stable and not active participants. The difference here is that - // stable participants utilizes more than the online status of a node, such as whether or not their - // block height is up to date, such that they too can process signature requests. If they cannot - // then they are considered unstable and should not be a part of signature generation this round. - let stable = mesh_state.stable_participants.clone(); - tracing::debug!(?stable, "stable participants"); - let my_account_id = self.triple_manager.my_account_id.clone(); + let presig_task = PresignatureManager::execute(&self, &active, &cfg.protocol); let me = ctx.me().await; - let sig_task = tokio::task::spawn({ - let presignature_manager = self.presignature_manager.clone(); - let signature_manager = self.signature_manager.clone(); - let messages = self.messages.clone(); - let protocol_cfg = cfg.protocol.clone(); - let sign_queue = self.sign_queue.clone(); - let rpc_client = ctx.rpc_client().clone(); - let signer = ctx.signer().clone(); - let mpc_contract_id = ctx.mpc_contract_id().clone(); - - tokio::task::unconstrained(async move { - tracing::debug!(?stable, "stable participants"); - - let mut sign_queue = sign_queue.write().await; - crate::metrics::SIGN_QUEUE_SIZE - .with_label_values(&[my_account_id.as_str()]) - .set(sign_queue.len() as i64); - sign_queue.organize(self.threshold, &stable, me, &my_account_id); - - let my_requests = sign_queue.my_requests(me); - crate::metrics::SIGN_QUEUE_MINE_SIZE - .with_label_values(&[my_account_id.as_str()]) - .set(my_requests.len() as i64); - - let mut presignature_manager = presignature_manager.write().await; - let mut signature_manager = signature_manager.write().await; - signature_manager - .handle_requests( - self.threshold, - &stable, - my_requests, - &mut presignature_manager, - &protocol_cfg, - ) - .await; - drop(presignature_manager); - - let mut messages = messages.write().await; - for (p, msg) in signature_manager.poke() { - messages.push(p, MpcMessage::Signature(msg)); - } - drop(messages); - signature_manager - .publish(&rpc_client, &signer, &mpc_contract_id) - .await; - }) - }); + let stable = mesh_state.stable_participants; + tracing::debug!(?stable, "stable participants"); + let sig_task = SignatureManager::execute(&self, &stable, me, &cfg.protocol, &ctx); match tokio::try_join!(triple_task, presig_task, sig_task) { Ok(_result) => (), @@ -480,6 +392,10 @@ impl CryptographicProtocol for RunningState { } let mut messages = self.messages.write().await; + crate::metrics::MESSAGE_QUEUE_SIZE + .with_label_values(&[ctx.my_account_id().as_str()]) + .set(messages.len() as i64); + let failures = messages .send_encrypted( me, diff --git a/chain-signatures/node/src/protocol/mod.rs b/chain-signatures/node/src/protocol/mod.rs index 33a2b0b2..2327bf2c 100644 --- a/chain-signatures/node/src/protocol/mod.rs +++ b/chain-signatures/node/src/protocol/mod.rs @@ -124,6 +124,10 @@ impl CryptographicCtx for &mut MpcSignProtocol { &self.ctx.mpc_contract_id } + fn my_account_id(&self) -> &AccountId { + &self.ctx.account_id + } + fn secret_storage(&mut self) -> &mut SecretNodeStorageBox { &mut self.ctx.secret_storage } diff --git a/chain-signatures/node/src/protocol/presignature.rs b/chain-signatures/node/src/protocol/presignature.rs index 4c409a6a..e287dfcb 100644 --- a/chain-signatures/node/src/protocol/presignature.rs +++ b/chain-signatures/node/src/protocol/presignature.rs @@ -1,4 +1,5 @@ use super::message::PresignatureMessage; +use super::state::RunningState; use super::triple::{Triple, TripleId, TripleManager}; use crate::protocol::contract::primitives::Participants; use crate::storage::presignature_storage::PresignatureStorage; @@ -650,6 +651,54 @@ impl PresignatureManager { messages } + + pub fn execute( + state: &RunningState, + active: &Participants, + protocol_cfg: &ProtocolConfig, + ) -> tokio::task::JoinHandle<()> { + let triple_manager = state.triple_manager.clone(); + let presignature_manager = state.presignature_manager.clone(); + let active = active.clone(); + let protocol_cfg = protocol_cfg.clone(); + let pk = state.public_key.clone(); + let sk_share = state.private_share.clone(); + let messages = state.messages.clone(); + + tokio::task::spawn(async move { + let mut presignature_manager = presignature_manager.write().await; + if let Err(err) = presignature_manager + .stockpile(&active, &pk, &sk_share, &triple_manager, &protocol_cfg) + .await + { + tracing::warn!(?err, "running: failed to stockpile presignatures"); + } + + { + let mut messages = messages.write().await; + messages.extend( + presignature_manager + .poke() + .await + .into_iter() + .map(|(p, msg)| (p, super::MpcMessage::Presignature(msg))), + ); + } + + crate::metrics::NUM_PRESIGNATURES_MINE + .with_label_values(&[presignature_manager.my_account_id.as_str()]) + .set(presignature_manager.len_mine().await as i64); + crate::metrics::NUM_PRESIGNATURES_TOTAL + .with_label_values(&[presignature_manager.my_account_id.as_str()]) + .set(presignature_manager.len_generated().await as i64); + crate::metrics::NUM_PRESIGNATURE_GENERATORS_TOTAL + .with_label_values(&[presignature_manager.my_account_id.as_str()]) + .set( + presignature_manager.len_potential().await as i64 + - presignature_manager.len_generated().await as i64, + ); + }) + } } pub fn hash_as_id(triple0: TripleId, triple1: TripleId) -> PresignatureId { diff --git a/chain-signatures/node/src/protocol/signature.rs b/chain-signatures/node/src/protocol/signature.rs index e1690565..b202268d 100644 --- a/chain-signatures/node/src/protocol/signature.rs +++ b/chain-signatures/node/src/protocol/signature.rs @@ -1,6 +1,7 @@ use super::contract::primitives::Participants; use super::message::SignatureMessage; use super::presignature::{GenerationError, Presignature, PresignatureId, PresignatureManager}; +use super::state::RunningState; use crate::indexer::ContractSignRequest; use crate::kdf::{derive_delta, into_eth_sig}; use crate::types::SignatureProtocol; @@ -811,4 +812,69 @@ impl SignatureManager { .and_modify(|e| *e = Instant::now()); matches!(entry, Entry::Occupied(_)) } + + pub fn execute( + state: &RunningState, + stable: &Participants, + me: Participant, + protocol_cfg: &ProtocolConfig, + ctx: &impl super::cryptography::CryptographicCtx, + ) -> tokio::task::JoinHandle<()> { + let threshold = state.threshold; + let my_account_id = state.triple_manager.my_account_id.clone(); + let presignature_manager = state.presignature_manager.clone(); + let signature_manager = state.signature_manager.clone(); + let messages = state.messages.clone(); + let stable = stable.clone(); + let protocol_cfg = protocol_cfg.clone(); + let sign_queue = state.sign_queue.clone(); + let rpc_client = ctx.rpc_client().clone(); + let signer = ctx.signer().clone(); + let mpc_contract_id = ctx.mpc_contract_id().clone(); + + // NOTE: signatures should only use stable and not active participants. The difference here is that + // stable participants utilizes more than the online status of a node, such as whether or not their + // block height is up to date, such that they too can process signature requests. If they cannot + // then they are considered unstable and should not be a part of signature generation this round. + + tokio::task::spawn(tokio::task::unconstrained(async move { + let mut sign_queue = sign_queue.write().await; + crate::metrics::SIGN_QUEUE_SIZE + .with_label_values(&[my_account_id.as_str()]) + .set(sign_queue.len() as i64); + sign_queue.organize(threshold, &stable, me, &my_account_id); + + let my_requests = sign_queue.my_requests(me); + crate::metrics::SIGN_QUEUE_MINE_SIZE + .with_label_values(&[my_account_id.as_str()]) + .set(my_requests.len() as i64); + + let mut presignature_manager = presignature_manager.write().await; + let mut signature_manager = signature_manager.write().await; + signature_manager + .handle_requests( + threshold, + &stable, + my_requests, + &mut presignature_manager, + &protocol_cfg, + ) + .await; + drop(presignature_manager); + + { + let mut messages = messages.write().await; + messages.extend( + signature_manager + .poke() + .into_iter() + .map(|(p, msg)| (p, crate::protocol::MpcMessage::Signature(msg))), + ); + } + + signature_manager + .publish(&rpc_client, &signer, &mpc_contract_id) + .await; + })) + } } diff --git a/chain-signatures/node/src/protocol/triple.rs b/chain-signatures/node/src/protocol/triple.rs index 83f781e7..bbe939dc 100644 --- a/chain-signatures/node/src/protocol/triple.rs +++ b/chain-signatures/node/src/protocol/triple.rs @@ -760,17 +760,15 @@ impl TripleManager { tracing::warn!(?err, "running: failed to stockpile triples"); } - let mut messages = messages.write().await; - messages.extend( - self.poke(&protocol_cfg) - .await - .into_iter() - .map(|(p, msg)| (p, MpcMessage::Triple(msg))), - ); - crate::metrics::MESSAGE_QUEUE_SIZE - .with_label_values(&[self.my_account_id.as_str()]) - .set(messages.len() as i64); - drop(messages); + { + let mut messages = messages.write().await; + messages.extend( + self.poke(&protocol_cfg) + .await + .into_iter() + .map(|(p, msg)| (p, MpcMessage::Triple(msg))), + ); + } crate::metrics::NUM_TRIPLES_MINE .with_label_values(&[self.my_account_id.as_str()]) From c435100f196f965fcc979cae0b3d7fadd5f461ac Mon Sep 17 00:00:00 2001 From: Phuong N Date: Mon, 16 Dec 2024 22:27:34 +0000 Subject: [PATCH 31/42] Better errors on vote_{join, leave} --- .../chain-signatures/src/utils.rs | 96 ++++++++++++++----- .../chain-signatures/tests/lib.rs | 20 ++-- 2 files changed, 78 insertions(+), 38 deletions(-) diff --git a/integration-tests/chain-signatures/src/utils.rs b/integration-tests/chain-signatures/src/utils.rs index 1303519f..26c1da9a 100644 --- a/integration-tests/chain-signatures/src/utils.rs +++ b/integration-tests/chain-signatures/src/utils.rs @@ -1,6 +1,5 @@ use anyhow::Context; use hyper::{Body, Client, Method, Request, StatusCode, Uri}; -use near_workspaces::result::ExecutionFinalResult; use near_workspaces::{Account, AccountId}; use std::fs; @@ -9,29 +8,43 @@ pub async fn vote_join( mpc_contract: &AccountId, account_id: &AccountId, ) -> anyhow::Result<()> { - let vote_futures = accounts - .iter() - .map(|account| { - tracing::info!( - "{} voting for new participant: {}", - account.id(), - account_id - ); - account - .call(mpc_contract, "vote_join") - .args_json(serde_json::json!({ - "candidate": account_id - })) - .transact() - }) - .collect::>(); + let vote_futures = accounts.iter().map(|account| { + tracing::info!( + "{} voting for new participant: {}", + account.id(), + account_id + ); + account + .call(mpc_contract, "vote_join") + .args_json(serde_json::json!({ + "candidate": account_id + })) + .transact() + }); - futures::future::join_all(vote_futures) - .await - .iter() - .for_each(|result| { - assert!(result.as_ref().unwrap().failures().is_empty()); - }); + let mut errs = Vec::new(); + for result in futures::future::join_all(vote_futures).await { + let outcome = match result { + Ok(outcome) => outcome, + Err(err) => { + errs.push(anyhow::anyhow!("workspaces/rpc failed: {err:?}")); + continue; + } + }; + + if !outcome.failures().is_empty() { + errs.push(anyhow::anyhow!( + "contract(vote_join) failure: {:?}", + outcome.failures() + )) + } + } + + if !errs.is_empty() { + let err = format!("failed to vote_join: {errs:#?}"); + tracing::warn!(err); + anyhow::bail!(err); + } Ok(()) } @@ -40,7 +53,7 @@ pub async fn vote_leave( accounts: &[&Account], mpc_contract: &AccountId, account_id: &AccountId, -) -> Vec> { +) -> anyhow::Result<()> { let vote_futures = accounts .iter() .filter(|account| account.id() != account_id) @@ -54,7 +67,40 @@ pub async fn vote_leave( }) .collect::>(); - futures::future::join_all(vote_futures).await + let mut kicked = false; + let mut errs = Vec::new(); + for result in futures::future::join_all(vote_futures).await { + let outcome = match result { + Ok(outcome) => outcome, + Err(err) => { + errs.push(anyhow::anyhow!("workspaces/rpc failed: {err:?}")); + continue; + } + }; + + if !outcome.failures().is_empty() { + errs.push(anyhow::anyhow!( + "contract(vote_leave) failure: {:?}", + outcome.failures() + )) + } else { + kicked = kicked || outcome.json::().unwrap(); + } + } + + if !errs.is_empty() { + let err = format!("failed to vote_leave: {errs:#?}"); + tracing::warn!(err); + anyhow::bail!(err); + } + + if !kicked { + let err = "failed to vote_leave on number of votes"; + tracing::warn!(err); + anyhow::bail!(err); + } + + Ok(()) } pub async fn get(uri: U) -> anyhow::Result diff --git a/integration-tests/chain-signatures/tests/lib.rs b/integration-tests/chain-signatures/tests/lib.rs index fa2ac214..f333b17e 100644 --- a/integration-tests/chain-signatures/tests/lib.rs +++ b/integration-tests/chain-signatures/tests/lib.rs @@ -48,13 +48,12 @@ impl Cluster { .take(state.threshold) .cloned() .collect::>(); - assert!(vote_join( + vote_join( &voting_participants, self.contract().id(), node_account.id(), ) - .await - .is_ok()); + .await?; let new_state = self.wait().running_on_epoch(state.epoch + 1).await?; assert_eq!(new_state.participants.len(), state.participants.len() + 1); @@ -80,15 +79,7 @@ impl Cluster { .collect::>(); tracing::info!(?voting_accounts, %kick, at_epoch = state.epoch, "kicking participant"); - let results = vote_leave(&voting_accounts, self.contract().id(), &kick).await; - // Check if any result has failures, and return early with an error if so - if results - .iter() - .any(|result| !result.as_ref().unwrap().failures().is_empty()) - { - tracing::error!(?voting_accounts, "failed to vote"); - anyhow::bail!("failed to vote_leave"); - } + vote_leave(&voting_accounts, self.contract().id(), &kick).await?; let new_state = self.wait().running_on_epoch(state.epoch + 1).await?; tracing::info!( @@ -106,7 +97,10 @@ impl Cluster { "public key must stay the same" ); - Ok(self.nodes.kill_node(&kick).await) + let node_config = self.nodes.kill_node(&kick).await; + self.wait().running().await?; + + Ok(node_config) } pub async fn propose_update(&self, args: ProposeUpdateArgs) -> mpc_contract::update::UpdateId { From f320346cff098917b00dbb1106107ce1f0074239 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Mon, 16 Dec 2024 23:19:22 +0000 Subject: [PATCH 32/42] Wait on finality final for running mpc --- integration-tests/chain-signatures/tests/actions/wait.rs | 2 +- integration-tests/chain-signatures/tests/cluster/mod.rs | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/integration-tests/chain-signatures/tests/actions/wait.rs b/integration-tests/chain-signatures/tests/actions/wait.rs index cc2ef8f5..cd84e764 100644 --- a/integration-tests/chain-signatures/tests/actions/wait.rs +++ b/integration-tests/chain-signatures/tests/actions/wait.rs @@ -292,7 +292,7 @@ pub async fn running_mpc( let strategy = ConstantBuilder::default() .with_delay(std::time::Duration::from_secs(3)) - .with_max_times(if epoch.is_some() { 300 } else { 100 }); + .with_max_times(100); is_running.retry(&strategy).await.with_context(|| { format!( diff --git a/integration-tests/chain-signatures/tests/cluster/mod.rs b/integration-tests/chain-signatures/tests/cluster/mod.rs index e2dbff23..551fbe43 100644 --- a/integration-tests/chain-signatures/tests/cluster/mod.rs +++ b/integration-tests/chain-signatures/tests/cluster/mod.rs @@ -5,6 +5,7 @@ use std::collections::HashSet; use integration_tests_chain_signatures::local::NodeEnvConfig; use mpc_contract::primitives::Participants; use near_workspaces::network::Sandbox; +use near_workspaces::types::Finality; use spawner::ClusterSpawner; use mpc_contract::{ProtocolContractState, RunningContractState}; @@ -90,6 +91,7 @@ impl Cluster { let state: ProtocolContractState = self .contract() .view("state") + .finality(Finality::Final) .await .with_context(|| "could not view state")? .json()?; From b7f1f674d0a5a6212c46fc89e0fc99f772a6ab6d Mon Sep 17 00:00:00 2001 From: Phuong N Date: Mon, 16 Dec 2024 23:24:42 +0000 Subject: [PATCH 33/42] minor optimization --- chain-signatures/contract/src/primitives.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/chain-signatures/contract/src/primitives.rs b/chain-signatures/contract/src/primitives.rs index 9fc321b8..dcd1441f 100644 --- a/chain-signatures/contract/src/primitives.rs +++ b/chain-signatures/contract/src/primitives.rs @@ -125,8 +125,8 @@ impl Default for Participants { impl From for Participants { fn from(candidates: Candidates) -> Self { let mut participants = Participants::new(); - for (account_id, candidate_info) in candidates.iter() { - participants.insert(account_id.clone(), candidate_info.clone().into()); + for (account_id, candidate_info) in candidates.into_iter() { + participants.insert(account_id, candidate_info.into()); } participants } @@ -216,6 +216,10 @@ impl Candidates { pub fn iter(&self) -> impl Iterator { self.candidates.iter() } + + pub fn into_iter(self) -> impl IntoIterator { + self.candidates.into_iter() + } } #[derive(BorshDeserialize, BorshSerialize, Serialize, Deserialize, Debug)] From 5d61c1429185f559b0015c158dcfe01e8a9320c6 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Tue, 17 Dec 2024 00:21:27 +0000 Subject: [PATCH 34/42] Added debug impl for SignOutcome --- chain-signatures/node/src/protocol/triple.rs | 1 - .../chain-signatures/tests/actions/sign.rs | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/chain-signatures/node/src/protocol/triple.rs b/chain-signatures/node/src/protocol/triple.rs index bbe939dc..8a356d6f 100644 --- a/chain-signatures/node/src/protocol/triple.rs +++ b/chain-signatures/node/src/protocol/triple.rs @@ -343,7 +343,6 @@ impl TripleTasks { if !self.queued.is_empty() && to_generate_len > 0 { for _ in 0..to_generate_len { if let Some(id) = self.queued.pop_front() { - tracing::info!(id, "spawning triple generation task"); self.ongoing.insert(id); let generator = self.generators.get(&id).unwrap(); self.ongoing_tasks diff --git a/integration-tests/chain-signatures/tests/actions/sign.rs b/integration-tests/chain-signatures/tests/actions/sign.rs index dd3093cf..2afdf50c 100644 --- a/integration-tests/chain-signatures/tests/actions/sign.rs +++ b/integration-tests/chain-signatures/tests/actions/sign.rs @@ -1,3 +1,4 @@ +use std::fmt; use std::future::IntoFuture; use cait_sith::FullSignature; @@ -32,6 +33,19 @@ pub struct SignOutcome { pub signature: FullSignature, } +impl fmt::Debug for SignOutcome { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SignOutcome") + .field("account", &self.account) + .field("rogue", &self.rogue) + .field("payload", &self.payload) + .field("payload_hash", &self.payload_hash) + .field("signature_big_r", &self.signature.big_r) + .field("signature_s", &self.signature.s) + .finish() + } +} + pub struct SignAction<'a> { nodes: &'a Cluster, count: usize, From 9ccf81c393f0706c4e4db7748e3fc8e9e40dac2b Mon Sep 17 00:00:00 2001 From: Phuong N Date: Tue, 17 Dec 2024 00:47:00 +0000 Subject: [PATCH 35/42] Fix offline test --- .../chain-signatures/tests/cases/mod.rs | 16 +++------------- integration-tests/chain-signatures/tests/lib.rs | 11 +++++++---- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/integration-tests/chain-signatures/tests/cases/mod.rs b/integration-tests/chain-signatures/tests/cases/mod.rs index b28f41c0..34c4e35b 100644 --- a/integration-tests/chain-signatures/tests/cases/mod.rs +++ b/integration-tests/chain-signatures/tests/cases/mod.rs @@ -87,21 +87,11 @@ async fn test_signature_offline_node() -> anyhow::Result<()> { // Kill the node then have presignatures and signature generation only use the active set of nodes // to start generating presignatures and signatures. let account_id = nodes.account_ids().into_iter().last().unwrap().clone(); - nodes.leave(Some(&account_id)).await.unwrap(); + nodes.stop(&account_id).await.unwrap(); - // This could potentially fail and timeout the first time if the participant set picked up is the - // one with the offline node. This is expected behavior for now if a user submits a request in between - // a node going offline and the system hasn't detected it yet. nodes.wait().signable().await.unwrap(); - let outcome = nodes.sign().await; - - // Try again if the first attempt failed. This second portion should not be needed when the NEP - // comes in for resumeable MPC. - if outcome.is_err() { - // Retry if the first attempt failed. - nodes.wait().signable().await.unwrap(); - let _outcome = nodes.sign().await.unwrap(); - } + let outcome = nodes.sign().await.unwrap(); + dbg!(outcome); Ok(()) } diff --git a/integration-tests/chain-signatures/tests/lib.rs b/integration-tests/chain-signatures/tests/lib.rs index f333b17e..31dd8e9d 100644 --- a/integration-tests/chain-signatures/tests/lib.rs +++ b/integration-tests/chain-signatures/tests/lib.rs @@ -65,6 +65,12 @@ impl Cluster { Ok(()) } + pub async fn stop(&mut self, node: &AccountId) -> anyhow::Result { + let config = self.nodes.kill_node(node).await; + self.wait().running().await?; + Ok(config) + } + pub async fn leave(&mut self, kick: Option<&AccountId>) -> anyhow::Result { let state = self.expect_running().await?; let participant_accounts = self.participant_accounts().await?; @@ -97,10 +103,7 @@ impl Cluster { "public key must stay the same" ); - let node_config = self.nodes.kill_node(&kick).await; - self.wait().running().await?; - - Ok(node_config) + self.stop(&kick).await } pub async fn propose_update(&self, args: ProposeUpdateArgs) -> mpc_contract::update::UpdateId { From b066ae679c5e070ada4abb2f067acc1d14e3c667 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Tue, 17 Dec 2024 01:28:53 +0000 Subject: [PATCH 36/42] Updated nightly to run 100 sigs --- .../chain-signatures/tests/cases/nightly.rs | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/integration-tests/chain-signatures/tests/cases/nightly.rs b/integration-tests/chain-signatures/tests/cases/nightly.rs index d8db8430..010bde4e 100644 --- a/integration-tests/chain-signatures/tests/cases/nightly.rs +++ b/integration-tests/chain-signatures/tests/cases/nightly.rs @@ -1,3 +1,4 @@ +use near_sdk::NearToken; use test_log::test; use crate::cluster; @@ -5,7 +6,7 @@ use crate::cluster; #[test(tokio::test)] #[ignore = "This is triggered by the nightly Github Actions pipeline"] async fn test_nightly_signature_production() -> anyhow::Result<()> { - const SIGNATURE_AMOUNT: usize = 1000; + const SIGNATURE_AMOUNT: usize = 100; const NODES: usize = 8; const THRESHOLD: usize = 4; const MIN_TRIPLES: u32 = 10; @@ -21,16 +22,12 @@ async fn test_nightly_signature_production() -> anyhow::Result<()> { .wait_for_running() .await?; - for i in 0..SIGNATURE_AMOUNT { - if let Err(err) = nodes.wait().signable().await { - tracing::error!(?err, "Failed to be ready to sign"); - continue; - } + let tasks = (0..SIGNATURE_AMOUNT) + .map(|_| async { nodes.sign().deposit(NearToken::from_near(1)).await }); + let outcomes = futures::future::join_all(tasks).await; - tracing::info!(at_signature = i, "Producing signature..."); - if let Err(err) = nodes.sign().await { - tracing::error!(?err, "Failed to produce signature"); - } + for outcome in outcomes { + println!("produce signature {outcome:?}"); } Ok(()) From 6082db59110f37806bba7ab78b64adfb0111664e Mon Sep 17 00:00:00 2001 From: Phuong N Date: Fri, 20 Dec 2024 01:33:30 +0000 Subject: [PATCH 37/42] Added iter impls for candidates and participants --- chain-signatures/contract/src/primitives.rs | 66 +++++++++++++++++++-- 1 file changed, 62 insertions(+), 4 deletions(-) diff --git a/chain-signatures/contract/src/primitives.rs b/chain-signatures/contract/src/primitives.rs index dcd1441f..58712fbd 100644 --- a/chain-signatures/contract/src/primitives.rs +++ b/chain-signatures/contract/src/primitives.rs @@ -3,7 +3,7 @@ use k256::Scalar; use near_sdk::borsh::{self, BorshDeserialize, BorshSerialize}; use near_sdk::serde::{Deserialize, Serialize}; use near_sdk::{AccountId, BorshStorageKey, CryptoHash, NearToken, PublicKey}; -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{btree_map, BTreeMap, HashMap, HashSet}; pub mod hpke { pub type PublicKey = [u8; 32]; @@ -162,10 +162,14 @@ impl Participants { self.participants.get(account_id) } - pub fn iter(&self) -> impl Iterator { + pub fn iter(&self) -> btree_map::Iter<'_, AccountId, ParticipantInfo> { self.participants.iter() } + pub fn iter_mut(&mut self) -> btree_map::IterMut<'_, AccountId, ParticipantInfo> { + self.participants.iter_mut() + } + pub fn keys(&self) -> impl Iterator { self.participants.keys() } @@ -179,6 +183,33 @@ impl Participants { } } +impl<'a> IntoIterator for &'a Participants { + type Item = (&'a AccountId, &'a ParticipantInfo); + type IntoIter = btree_map::Iter<'a, AccountId, ParticipantInfo>; + + fn into_iter(self) -> Self::IntoIter { + self.participants.iter() + } +} + +impl<'a> IntoIterator for &'a mut Participants { + type Item = (&'a AccountId, &'a mut ParticipantInfo); + type IntoIter = btree_map::IterMut<'a, AccountId, ParticipantInfo>; + + fn into_iter(self) -> Self::IntoIter { + self.participants.iter_mut() + } +} + +impl IntoIterator for Participants { + type Item = (AccountId, ParticipantInfo); + type IntoIter = btree_map::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.participants.into_iter() + } +} + #[derive(BorshDeserialize, BorshSerialize, Serialize, Deserialize, Debug, Clone)] pub struct Candidates { pub candidates: BTreeMap, @@ -213,11 +244,38 @@ impl Candidates { self.candidates.get(account_id) } - pub fn iter(&self) -> impl Iterator { + pub fn iter(&self) -> btree_map::Iter<'_, AccountId, CandidateInfo> { + self.candidates.iter() + } + + pub fn iter_mut(&mut self) -> btree_map::IterMut<'_, AccountId, CandidateInfo> { + self.candidates.iter_mut() + } +} + +impl<'a> IntoIterator for &'a Candidates { + type Item = (&'a AccountId, &'a CandidateInfo); + type IntoIter = btree_map::Iter<'a, AccountId, CandidateInfo>; + + fn into_iter(self) -> Self::IntoIter { self.candidates.iter() } +} + +impl<'a> IntoIterator for &'a mut Candidates { + type Item = (&'a AccountId, &'a mut CandidateInfo); + type IntoIter = btree_map::IterMut<'a, AccountId, CandidateInfo>; + + fn into_iter(self) -> Self::IntoIter { + self.candidates.iter_mut() + } +} + +impl IntoIterator for Candidates { + type Item = (AccountId, CandidateInfo); + type IntoIter = btree_map::IntoIter; - pub fn into_iter(self) -> impl IntoIterator { + fn into_iter(self) -> Self::IntoIter { self.candidates.into_iter() } } From 64e49c4c94a793f8a789a7a90d6d7ebc455d154d Mon Sep 17 00:00:00 2001 From: Phuong N Date: Sat, 4 Jan 2025 00:22:49 +0000 Subject: [PATCH 38/42] Clippy --- .../node/src/protocol/cryptography.rs | 8 +++--- .../node/src/protocol/presignature.rs | 4 +-- chain-signatures/node/src/protocol/triple.rs | 28 +++++++++++-------- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/chain-signatures/node/src/protocol/cryptography.rs b/chain-signatures/node/src/protocol/cryptography.rs index 27eed432..3621b5cf 100644 --- a/chain-signatures/node/src/protocol/cryptography.rs +++ b/chain-signatures/node/src/protocol/cryptography.rs @@ -286,7 +286,7 @@ impl CryptographicProtocol for ResharingState { } messages.push( - p.clone(), + *p, MpcMessage::Resharing(ResharingMessage { epoch: self.old_epoch, from: me, @@ -363,7 +363,7 @@ impl CryptographicProtocol for RunningState { cfg: Config, mesh_state: MeshState, ) -> Result { - let active = mesh_state.active_participants.clone(); + let active = mesh_state.active_participants; if active.len() < self.threshold { tracing::warn!( active = ?active.keys_vec(), @@ -401,13 +401,13 @@ impl CryptographicProtocol for RunningState { me, &cfg.local.network.sign_sk, ctx.http_client(), - &mesh_state.active_participants, + &active, &cfg.protocol, ) .await; if !failures.is_empty() { tracing::warn!( - active = ?mesh_state.active_participants.keys_vec(), + active = ?active.keys_vec(), "running: failed to send encrypted message; {failures:?}" ); } diff --git a/chain-signatures/node/src/protocol/presignature.rs b/chain-signatures/node/src/protocol/presignature.rs index 39b0b15c..c0f79771 100644 --- a/chain-signatures/node/src/protocol/presignature.rs +++ b/chain-signatures/node/src/protocol/presignature.rs @@ -659,8 +659,8 @@ impl PresignatureManager { let presignature_manager = state.presignature_manager.clone(); let active = active.clone(); let protocol_cfg = protocol_cfg.clone(); - let pk = state.public_key.clone(); - let sk_share = state.private_share.clone(); + let pk = state.public_key; + let sk_share = state.private_share; let messages = state.messages.clone(); tokio::task::spawn(async move { diff --git a/chain-signatures/node/src/protocol/triple.rs b/chain-signatures/node/src/protocol/triple.rs index 563c2430..8f6e6cb4 100644 --- a/chain-signatures/node/src/protocol/triple.rs +++ b/chain-signatures/node/src/protocol/triple.rs @@ -55,9 +55,9 @@ pub struct TripleGenerator { impl TripleGenerator { pub fn new( + id: TripleId, me: Participant, threshold: usize, - id: TripleId, participants: Vec, timeout: u64, ) -> Result { @@ -243,6 +243,10 @@ pub struct TripleTasks { /// back to the main loop. protocol_budget: Duration, + /// The threshold for the number of participants required to generate a triple. This is + /// the same as the threshold for signing: we maintain a copy here for easy access. + threshold: usize, + /// The pool of triple protocols that have yet to be completed. pub generators: HashMap, @@ -273,9 +277,10 @@ impl std::fmt::Debug for TripleTasks { } impl TripleTasks { - pub fn new(budget: Duration) -> Self { + pub fn new(threshold: usize, protocol_budget: Duration) -> Self { Self { - protocol_budget: budget, + protocol_budget, + threshold, generators: HashMap::new(), queued: VecDeque::new(), ongoing: HashSet::new(), @@ -293,7 +298,6 @@ impl TripleTasks { pub fn entry( &mut self, me: Participant, - threshold: usize, id: TripleId, potential_len: usize, cfg: &ProtocolConfig, @@ -311,9 +315,9 @@ impl TripleTasks { tracing::info!(id, "joining protocol to generate a new triple"); let participants = participants.keys_vec(); let generator = e.insert(TripleGenerator::new( - me, - threshold, id, + me, + self.threshold, participants, cfg.triple.generation_timeout, )?); @@ -358,7 +362,7 @@ impl TripleTasks { .iter() .any(|(running_id, _)| running_id == id) { - let generator = self.generators.get(&id).unwrap(); + let generator = self.generators.get(id).unwrap(); self.ongoing_tasks .push_back((*id, generator.spawn_execution(me, my_account_id, epoch))); } @@ -458,7 +462,10 @@ impl TripleManager { storage: &TripleStorage, ) -> Self { Self { - tasks: Arc::new(RwLock::new(TripleTasks::new(Duration::from_millis(100)))), + tasks: Arc::new(RwLock::new(TripleTasks::new( + threshold, + Duration::from_millis(100), + ))), gc: Arc::new(RwLock::new(HashMap::new())), me, threshold, @@ -644,12 +651,12 @@ impl TripleManager { } tracing::debug!(id, "starting protocol to generate a new triple"); - let participants: Vec<_> = participants.keys().cloned().collect(); { + let participants = participants.keys_vec(); let mut tasks = self.tasks.write().await; tasks.generators.insert( id, - TripleGenerator::new(self.me, self.threshold, id, participants, timeout)?, + TripleGenerator::new(id, self.me, self.threshold, participants, timeout)?, ); tasks.queued.push_back(id); tasks.introduced.insert(id); @@ -716,7 +723,6 @@ impl TripleManager { let mut tasks = self.tasks.write().await; tasks.entry( self.me, - self.threshold, id, potential_len, cfg, From cf825bf6efee57eb4d4ad0399ca244ad30a88060 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Tue, 7 Jan 2025 22:28:08 +0000 Subject: [PATCH 39/42] Made sign queue indexer use sign_tx for sending requests --- chain-signatures/node/src/cli.rs | 4 +- chain-signatures/node/src/indexer.rs | 22 ++++++---- chain-signatures/node/src/protocol/mod.rs | 4 +- .../node/src/protocol/signature.rs | 42 ++++++++++++------- 4 files changed, 44 insertions(+), 28 deletions(-) diff --git a/chain-signatures/node/src/cli.rs b/chain-signatures/node/src/cli.rs index 5d6bdebc..3ab31d10 100644 --- a/chain-signatures/node/src/cli.rs +++ b/chain-signatures/node/src/cli.rs @@ -191,7 +191,7 @@ pub fn run(cmd: Cli) -> anyhow::Result<()> { mesh_options, message_options, } => { - let sign_queue = Arc::new(RwLock::new(SignQueue::new())); + let (sign_tx, sign_queue) = SignQueue::new(); let rt = tokio::runtime::Builder::new_multi_thread() .enable_all() .build()?; @@ -221,7 +221,7 @@ pub fn run(cmd: Cli) -> anyhow::Result<()> { &indexer_options, &mpc_contract_id, &account_id, - &sign_queue, + sign_tx, app_data_storage, rpc_client.clone(), )?; diff --git a/chain-signatures/node/src/indexer.rs b/chain-signatures/node/src/indexer.rs index bd15a3e7..7fdd5eb3 100644 --- a/chain-signatures/node/src/indexer.rs +++ b/chain-signatures/node/src/indexer.rs @@ -1,4 +1,4 @@ -use crate::protocol::{SignQueue, SignRequest}; +use crate::protocol::SignRequest; use crate::storage::app_data_storage::AppDataStorage; use crypto_shared::{derive_epsilon, ScalarExt}; use k256::Scalar; @@ -13,7 +13,7 @@ use std::ops::Mul; use std::sync::Arc; use std::thread::JoinHandle; use std::time::{Duration, Instant}; -use tokio::sync::RwLock; +use tokio::sync::{mpsc, RwLock}; /// Configures indexer. #[derive(Debug, Clone, clap::Parser)] @@ -169,7 +169,7 @@ impl Indexer { struct Context { mpc_contract_id: AccountId, node_account_id: AccountId, - queue: Arc>, + sign_tx: mpsc::Sender, indexer: Indexer, } @@ -267,14 +267,20 @@ async fn handle_block( // Add the requests after going through the whole block to avoid partial processing if indexer fails somewhere. // This way we can revisit the same block if we failed while not having added the requests partially. - let mut queue = ctx.queue.write().await; for request in pending_requests { - queue.add(request); + tracing::info!( + request_id = ?near_primitives::hash::CryptoHash(request.request_id), + payload = hex::encode(request.request.payload.to_bytes()), + entropy = hex::encode(request.entropy), + "new sign request" + ); + if let Err(err) = ctx.sign_tx.send(request).await { + tracing::error!(?err, "failed to send the sign request into sign queue"); + } crate::metrics::NUM_SIGN_REQUESTS .with_label_values(&[ctx.node_account_id.as_str()]) .inc(); } - drop(queue); let log_indexing_interval = 1000; if block.block_height() % log_indexing_interval == 0 { @@ -292,7 +298,7 @@ pub fn run( options: &Options, mpc_contract_id: &AccountId, node_account_id: &AccountId, - queue: &Arc>, + sign_tx: mpsc::Sender, app_data_storage: AppDataStorage, rpc_client: near_fetch::Client, ) -> anyhow::Result<(JoinHandle>, Indexer)> { @@ -308,7 +314,7 @@ pub fn run( let context = Context { mpc_contract_id: mpc_contract_id.clone(), node_account_id: node_account_id.clone(), - queue: queue.clone(), + sign_tx, indexer: indexer.clone(), }; diff --git a/chain-signatures/node/src/protocol/mod.rs b/chain-signatures/node/src/protocol/mod.rs index 2327bf2c..70e8f553 100644 --- a/chain-signatures/node/src/protocol/mod.rs +++ b/chain-signatures/node/src/protocol/mod.rs @@ -155,7 +155,7 @@ impl MpcSignProtocol { rpc_client: near_fetch::Client, signer: InMemorySigner, receiver: mpsc::Receiver, - sign_queue: Arc>, + sign_queue: SignQueue, secret_storage: SecretNodeStorageBox, triple_storage: TripleStorage, presignature_storage: PresignatureStorage, @@ -179,7 +179,7 @@ impl MpcSignProtocol { mpc_contract_id, rpc_client, http_client: reqwest::Client::new(), - sign_queue, + sign_queue: Arc::new(RwLock::new(sign_queue)), signer, secret_storage, triple_storage, diff --git a/chain-signatures/node/src/protocol/signature.rs b/chain-signatures/node/src/protocol/signature.rs index a915a881..399e26d1 100644 --- a/chain-signatures/node/src/protocol/signature.rs +++ b/chain-signatures/node/src/protocol/signature.rs @@ -22,12 +22,17 @@ use rand::SeedableRng; use std::collections::hash_map::Entry; use std::collections::{HashMap, VecDeque}; use std::time::{Duration, Instant}; +use tokio::sync::mpsc; +use tokio::sync::mpsc::error::TryRecvError; use near_account_id::AccountId; use near_fetch::signer::SignerExt; pub type ReceiptId = near_primitives::hash::CryptoHash; +/// This is the maximum amount of sign requests that we can accept in the network. +const MAX_SIGN_REQUESTS: usize = 1024; + pub struct SignRequest { pub request_id: [u8; 32], pub request: ContractSignRequest, @@ -60,35 +65,31 @@ impl ParticipantRequests { } } -#[derive(Default)] pub struct SignQueue { - unorganized_requests: Vec, requests: HashMap, + sign_rx: mpsc::Receiver, } impl SignQueue { - pub fn new() -> Self { - Self::default() + pub fn new() -> (mpsc::Sender, Self) { + let (sign_tx, sign_rx) = mpsc::channel(MAX_SIGN_REQUESTS); + ( + sign_tx, + Self { + requests: HashMap::new(), + sign_rx, + }, + ) } pub fn len(&self) -> usize { - self.unorganized_requests.len() + self.requests.len() } pub fn is_empty(&self) -> bool { self.len() == 0 } - pub fn add(&mut self, request: SignRequest) { - tracing::info!( - request_id = ?CryptoHash(request.request_id), - payload = hex::encode(request.request.payload.to_bytes()), - entropy = hex::encode(request.entropy), - "new sign request" - ); - self.unorganized_requests.push(request); - } - pub fn organize( &mut self, threshold: usize, @@ -105,7 +106,16 @@ impl SignQueue { ); return; } - for request in self.unorganized_requests.drain(..) { + + while let Ok(request) = { + match self.sign_rx.try_recv() { + err @ Err(TryRecvError::Disconnected) => { + tracing::error!("sign queue channel disconnected"); + err + } + other => other, + } + } { let mut rng = StdRng::from_seed(request.entropy); let subset = stable.keys().choose_multiple(&mut rng, threshold); let proposer = **subset.choose(&mut rng).unwrap(); From 9b2abfefab8b596fbb3b8b17d729a8b0c647853b Mon Sep 17 00:00:00 2001 From: Phuong N Date: Tue, 7 Jan 2025 23:27:29 +0000 Subject: [PATCH 40/42] Made sign_rx be passed around instead of sign queue --- chain-signatures/node/src/cli.rs | 4 +- .../node/src/protocol/consensus.rs | 11 +- .../node/src/protocol/cryptography.rs | 5 +- chain-signatures/node/src/protocol/mod.rs | 10 +- .../node/src/protocol/signature.rs | 133 +++++++----------- chain-signatures/node/src/protocol/state.rs | 2 - 6 files changed, 68 insertions(+), 97 deletions(-) diff --git a/chain-signatures/node/src/cli.rs b/chain-signatures/node/src/cli.rs index 3ab31d10..4e93780b 100644 --- a/chain-signatures/node/src/cli.rs +++ b/chain-signatures/node/src/cli.rs @@ -191,7 +191,7 @@ pub fn run(cmd: Cli) -> anyhow::Result<()> { mesh_options, message_options, } => { - let (sign_tx, sign_queue) = SignQueue::new(); + let (sign_tx, sign_rx) = SignQueue::channel(); let rt = tokio::runtime::Builder::new_multi_thread() .enable_all() .build()?; @@ -257,7 +257,7 @@ pub fn run(cmd: Cli) -> anyhow::Result<()> { rpc_client.clone(), signer, receiver, - sign_queue, + sign_rx, key_storage, triple_storage, presignature_storage, diff --git a/chain-signatures/node/src/protocol/consensus.rs b/chain-signatures/node/src/protocol/consensus.rs index b9782373..8628ca02 100644 --- a/chain-signatures/node/src/protocol/consensus.rs +++ b/chain-signatures/node/src/protocol/consensus.rs @@ -3,7 +3,6 @@ use super::state::{ JoiningState, NodeState, PersistentNodeData, RunningState, StartedState, WaitingForConsensusState, }; -use super::SignQueue; use crate::config::Config; use crate::gcp::error::SecretStorageError; use crate::http_client::MessageQueue; @@ -12,6 +11,7 @@ use crate::protocol::presignature::PresignatureManager; use crate::protocol::signature::SignatureManager; use crate::protocol::state::{GeneratingState, ResharingState}; use crate::protocol::triple::TripleManager; +use crate::protocol::SignRequest; use crate::storage::presignature_storage::PresignatureStorage; use crate::storage::secret_storage::SecretNodeStorageBox; use crate::storage::triple_storage::TripleStorage; @@ -25,7 +25,7 @@ use std::sync::Arc; use async_trait::async_trait; use cait_sith::protocol::InitializationError; use serde_json::json; -use tokio::sync::RwLock; +use tokio::sync::{mpsc, RwLock}; use url::Url; use near_account_id::AccountId; @@ -38,7 +38,7 @@ pub trait ConsensusCtx { fn signer(&self) -> &InMemorySigner; fn mpc_contract_id(&self) -> &AccountId; fn my_address(&self) -> &Url; - fn sign_queue(&self) -> Arc>; + fn sign_rx(&self) -> Arc>>; fn secret_storage(&self) -> &SecretNodeStorageBox; fn triple_storage(&self) -> &TripleStorage; fn presignature_storage(&self) -> &PresignatureStorage; @@ -118,7 +118,6 @@ impl ConsensusProtocol for StartedState { } Ordering::Less => Err(ConsensusError::EpochRollback), Ordering::Equal => { - let sign_queue = ctx.sign_queue(); match contract_state .participants .find_participant(ctx.my_account_id()) @@ -150,6 +149,7 @@ impl ConsensusProtocol for StartedState { public_key, epoch, ctx.my_account_id(), + ctx.sign_rx(), ))); Ok(NodeState::Running(RunningState { @@ -158,7 +158,6 @@ impl ConsensusProtocol for StartedState { threshold: contract_state.threshold, private_share, public_key, - sign_queue, triple_manager, presignature_manager, signature_manager, @@ -391,6 +390,7 @@ impl ConsensusProtocol for WaitingForConsensusState { self.public_key, self.epoch, ctx.my_account_id(), + ctx.sign_rx(), ))); Ok(NodeState::Running(RunningState { @@ -399,7 +399,6 @@ impl ConsensusProtocol for WaitingForConsensusState { threshold: self.threshold, private_share: self.private_share, public_key: self.public_key, - sign_queue: ctx.sign_queue(), triple_manager, presignature_manager, signature_manager, diff --git a/chain-signatures/node/src/protocol/cryptography.rs b/chain-signatures/node/src/protocol/cryptography.rs index 3621b5cf..b9dfb2b3 100644 --- a/chain-signatures/node/src/protocol/cryptography.rs +++ b/chain-signatures/node/src/protocol/cryptography.rs @@ -379,10 +379,9 @@ impl CryptographicProtocol for RunningState { let presig_task = PresignatureManager::execute(&self, &active, &cfg.protocol); - let me = ctx.me().await; let stable = mesh_state.stable_participants; tracing::debug!(?stable, "stable participants"); - let sig_task = SignatureManager::execute(&self, &stable, me, &cfg.protocol, &ctx); + let sig_task = SignatureManager::execute(&self, &stable, &cfg.protocol, &ctx); match tokio::try_join!(triple_task, presig_task, sig_task) { Ok(_result) => (), @@ -398,7 +397,7 @@ impl CryptographicProtocol for RunningState { let failures = messages .send_encrypted( - me, + ctx.me().await, &cfg.local.network.sign_sk, ctx.http_client(), &active, diff --git a/chain-signatures/node/src/protocol/mod.rs b/chain-signatures/node/src/protocol/mod.rs index 70e8f553..4be7f956 100644 --- a/chain-signatures/node/src/protocol/mod.rs +++ b/chain-signatures/node/src/protocol/mod.rs @@ -49,7 +49,7 @@ struct Ctx { signer: InMemorySigner, rpc_client: near_fetch::Client, http_client: reqwest::Client, - sign_queue: Arc>, + sign_rx: Arc>>, secret_storage: SecretNodeStorageBox, triple_storage: TripleStorage, presignature_storage: PresignatureStorage, @@ -81,8 +81,8 @@ impl ConsensusCtx for &mut MpcSignProtocol { &self.ctx.my_address } - fn sign_queue(&self) -> Arc> { - self.ctx.sign_queue.clone() + fn sign_rx(&self) -> Arc>> { + self.ctx.sign_rx.clone() } fn secret_storage(&self) -> &SecretNodeStorageBox { @@ -155,7 +155,7 @@ impl MpcSignProtocol { rpc_client: near_fetch::Client, signer: InMemorySigner, receiver: mpsc::Receiver, - sign_queue: SignQueue, + sign_rx: mpsc::Receiver, secret_storage: SecretNodeStorageBox, triple_storage: TripleStorage, presignature_storage: PresignatureStorage, @@ -179,7 +179,7 @@ impl MpcSignProtocol { mpc_contract_id, rpc_client, http_client: reqwest::Client::new(), - sign_queue: Arc::new(RwLock::new(sign_queue)), + sign_rx: Arc::new(RwLock::new(sign_rx)), signer, secret_storage, triple_storage, diff --git a/chain-signatures/node/src/protocol/signature.rs b/chain-signatures/node/src/protocol/signature.rs index 399e26d1..bd9f9df0 100644 --- a/chain-signatures/node/src/protocol/signature.rs +++ b/chain-signatures/node/src/protocol/signature.rs @@ -21,9 +21,10 @@ use rand::seq::{IteratorRandom, SliceRandom}; use rand::SeedableRng; use std::collections::hash_map::Entry; use std::collections::{HashMap, VecDeque}; +use std::sync::Arc; use std::time::{Duration, Instant}; -use tokio::sync::mpsc; use tokio::sync::mpsc::error::TryRecvError; +use tokio::sync::{mpsc, RwLock}; use near_account_id::AccountId; use near_fetch::signer::SignerExt; @@ -41,45 +42,23 @@ pub struct SignRequest { pub time_added: Instant, } -/// Type that preserves the insertion order of requests. -#[derive(Default)] -pub struct ParticipantRequests { - requests: VecDeque, -} - -impl ParticipantRequests { - fn insert(&mut self, request: SignRequest) { - self.requests.push_back(request); - } - - pub fn len(&self) -> usize { - self.requests.len() - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn pop_front(&mut self) -> Option { - self.requests.pop_front() - } -} - pub struct SignQueue { - requests: HashMap, - sign_rx: mpsc::Receiver, + me: Participant, + sign_rx: Arc>>, + requests: HashMap>, } impl SignQueue { - pub fn new() -> (mpsc::Sender, Self) { - let (sign_tx, sign_rx) = mpsc::channel(MAX_SIGN_REQUESTS); - ( - sign_tx, - Self { - requests: HashMap::new(), - sign_rx, - }, - ) + pub fn channel() -> (mpsc::Sender, mpsc::Receiver) { + mpsc::channel(MAX_SIGN_REQUESTS) + } + + pub fn new(me: Participant, sign_rx: Arc>>) -> Self { + Self { + me, + sign_rx, + requests: HashMap::new(), + } } pub fn len(&self) -> usize { @@ -90,25 +69,15 @@ impl SignQueue { self.len() == 0 } - pub fn organize( + pub async fn organize( &mut self, threshold: usize, stable: &Participants, - me: Participant, my_account_id: &AccountId, ) { - if stable.len() < threshold { - tracing::warn!( - "Require at least {} stable participants to organize, got {}: {:?}", - threshold, - stable.len(), - stable.keys_vec() - ); - return; - } - + let mut sign_rx = self.sign_rx.write().await; while let Ok(request) = { - match self.sign_rx.try_recv() { + match sign_rx.try_recv() { err @ Err(TryRecvError::Disconnected) => { tracing::error!("sign queue channel disconnected"); err @@ -119,8 +88,8 @@ impl SignQueue { let mut rng = StdRng::from_seed(request.entropy); let subset = stable.keys().choose_multiple(&mut rng, threshold); let proposer = **subset.choose(&mut rng).unwrap(); - if subset.contains(&&me) { - let is_mine = proposer == me; + if subset.contains(&&self.me) { + let is_mine = proposer == self.me; tracing::info!( request_id = ?CryptoHash(request.request_id), ?is_mine, @@ -129,7 +98,7 @@ impl SignQueue { "saving sign request: node is in the signer subset" ); let proposer_requests = self.requests.entry(proposer).or_default(); - proposer_requests.insert(request); + proposer_requests.push_back(request); if is_mine { crate::metrics::NUM_SIGN_REQUESTS_MINE .with_label_values(&[my_account_id.as_str()]) @@ -138,7 +107,7 @@ impl SignQueue { } else { tracing::info!( rrequest_id = ?CryptoHash(request.request_id), - ?me, + me = ?self.me, ?subset, ?proposer, "skipping sign request: node is NOT in the signer subset" @@ -147,8 +116,12 @@ impl SignQueue { } } - pub fn my_requests(&mut self, me: Participant) -> &mut ParticipantRequests { - self.requests.entry(me).or_default() + pub fn take_my_requests(&mut self) -> VecDeque { + self.requests.remove(&self.me).unwrap_or_default() + } + + pub fn insert_mine(&mut self, requests: VecDeque) { + self.requests.insert(self.me, requests); } } @@ -258,6 +231,9 @@ pub struct SignatureManager { public_key: PublicKey, epoch: u64, my_account_id: AccountId, + + /// Sign queue that maintains all requests coming in from indexer. + sign_queue: SignQueue, } pub const MAX_RETRY: u8 = 10; @@ -292,6 +268,7 @@ impl SignatureManager { public_key: PublicKey, epoch: u64, my_account_id: &AccountId, + sign_rx: Arc>>, ) -> Self { Self { generators: HashMap::new(), @@ -302,6 +279,7 @@ impl SignatureManager { public_key, epoch, my_account_id: my_account_id.clone(), + sign_queue: SignQueue::new(me, sign_rx), } } @@ -626,7 +604,6 @@ impl SignatureManager { &mut self, threshold: usize, stable: &Participants, - my_requests: &mut ParticipantRequests, presignature_manager: &mut PresignatureManager, cfg: &ProtocolConfig, ) { @@ -639,6 +616,18 @@ impl SignatureManager { ); return; } + + self.sign_queue + .organize(threshold, stable, &self.my_account_id) + .await; + crate::metrics::SIGN_QUEUE_SIZE + .with_label_values(&[self.my_account_id.as_str()]) + .set(self.sign_queue.len() as i64); + let mut my_requests = self.sign_queue.take_my_requests(); + crate::metrics::SIGN_QUEUE_MINE_SIZE + .with_label_values(&[self.my_account_id.as_str()]) + .set(my_requests.len() as i64); + while let Some(mut presignature) = { if self.failed.is_empty() && my_requests.is_empty() { None @@ -686,7 +675,7 @@ impl SignatureManager { } let Some(my_request) = my_requests.pop_front() else { - tracing::warn!("Unexpected state, no more requests to handle"); + tracing::warn!("unexpected state, no more requests to handle"); continue; }; @@ -704,6 +693,12 @@ impl SignatureManager { continue; } } + + // We do not have enough presignature stockpile and the taken requests need to be fulfilled, + // so insert it back into the sign queue to be fulfilled in the next iteration. + if !my_requests.is_empty() { + self.sign_queue.insert_mine(my_requests); + } } pub async fn publish( @@ -814,18 +809,15 @@ impl SignatureManager { pub fn execute( state: &RunningState, stable: &Participants, - me: Participant, protocol_cfg: &ProtocolConfig, ctx: &impl super::cryptography::CryptographicCtx, ) -> tokio::task::JoinHandle<()> { let threshold = state.threshold; - let my_account_id = state.triple_manager.my_account_id.clone(); let presignature_manager = state.presignature_manager.clone(); let signature_manager = state.signature_manager.clone(); let messages = state.messages.clone(); let stable = stable.clone(); let protocol_cfg = protocol_cfg.clone(); - let sign_queue = state.sign_queue.clone(); let rpc_client = ctx.rpc_client().clone(); let signer = ctx.signer().clone(); let mpc_contract_id = ctx.mpc_contract_id().clone(); @@ -836,27 +828,10 @@ impl SignatureManager { // then they are considered unstable and should not be a part of signature generation this round. tokio::task::spawn(tokio::task::unconstrained(async move { - let mut sign_queue = sign_queue.write().await; - crate::metrics::SIGN_QUEUE_SIZE - .with_label_values(&[my_account_id.as_str()]) - .set(sign_queue.len() as i64); - sign_queue.organize(threshold, &stable, me, &my_account_id); - - let my_requests = sign_queue.my_requests(me); - crate::metrics::SIGN_QUEUE_MINE_SIZE - .with_label_values(&[my_account_id.as_str()]) - .set(my_requests.len() as i64); - - let mut presignature_manager = presignature_manager.write().await; let mut signature_manager = signature_manager.write().await; + let mut presignature_manager = presignature_manager.write().await; signature_manager - .handle_requests( - threshold, - &stable, - my_requests, - &mut presignature_manager, - &protocol_cfg, - ) + .handle_requests(threshold, &stable, &mut presignature_manager, &protocol_cfg) .await; drop(presignature_manager); diff --git a/chain-signatures/node/src/protocol/state.rs b/chain-signatures/node/src/protocol/state.rs index e1fe1b91..31a4bb6e 100644 --- a/chain-signatures/node/src/protocol/state.rs +++ b/chain-signatures/node/src/protocol/state.rs @@ -3,7 +3,6 @@ use super::cryptography::CryptographicError; use super::presignature::PresignatureManager; use super::signature::SignatureManager; use super::triple::TripleManager; -use super::SignQueue; use crate::http_client::MessageQueue; use crate::types::{KeygenProtocol, ReshareProtocol, SecretKeyShare}; @@ -91,7 +90,6 @@ pub struct RunningState { pub threshold: usize, pub private_share: SecretKeyShare, pub public_key: PublicKey, - pub sign_queue: Arc>, pub triple_manager: TripleManager, pub presignature_manager: Arc>, pub signature_manager: Arc>, From ce3b7aa85182182a37b87132c61bdb136d1a3213 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Tue, 7 Jan 2025 23:51:59 +0000 Subject: [PATCH 41/42] Made SignatureManager store threshold --- .../node/src/protocol/consensus.rs | 6 ++++-- .../node/src/protocol/signature.rs | 21 ++++++++++--------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/chain-signatures/node/src/protocol/consensus.rs b/chain-signatures/node/src/protocol/consensus.rs index 8628ca02..16c05d38 100644 --- a/chain-signatures/node/src/protocol/consensus.rs +++ b/chain-signatures/node/src/protocol/consensus.rs @@ -146,9 +146,10 @@ impl ConsensusProtocol for StartedState { let signature_manager = Arc::new(RwLock::new(SignatureManager::new( me, + ctx.my_account_id(), + contract_state.threshold, public_key, epoch, - ctx.my_account_id(), ctx.sign_rx(), ))); @@ -387,9 +388,10 @@ impl ConsensusProtocol for WaitingForConsensusState { let signature_manager = Arc::new(RwLock::new(SignatureManager::new( me, + ctx.my_account_id(), + self.threshold, self.public_key, self.epoch, - ctx.my_account_id(), ctx.sign_rx(), ))); diff --git a/chain-signatures/node/src/protocol/signature.rs b/chain-signatures/node/src/protocol/signature.rs index bd9f9df0..6e400db0 100644 --- a/chain-signatures/node/src/protocol/signature.rs +++ b/chain-signatures/node/src/protocol/signature.rs @@ -228,9 +228,10 @@ pub struct SignatureManager { /// Vec<(receipt_id, msg_hash, timestamp, output)> signatures: Vec, me: Participant, + my_account_id: AccountId, + threshold: usize, public_key: PublicKey, epoch: u64, - my_account_id: AccountId, /// Sign queue that maintains all requests coming in from indexer. sign_queue: SignQueue, @@ -265,9 +266,10 @@ impl ToPublish { impl SignatureManager { pub fn new( me: Participant, + my_account_id: &AccountId, + threshold: usize, public_key: PublicKey, epoch: u64, - my_account_id: &AccountId, sign_rx: Arc>>, ) -> Self { Self { @@ -276,9 +278,10 @@ impl SignatureManager { completed: HashMap::new(), signatures: Vec::new(), me, + my_account_id: my_account_id.clone(), + threshold, public_key, epoch, - my_account_id: my_account_id.clone(), sign_queue: SignQueue::new(me, sign_rx), } } @@ -602,15 +605,14 @@ impl SignatureManager { pub async fn handle_requests( &mut self, - threshold: usize, stable: &Participants, presignature_manager: &mut PresignatureManager, cfg: &ProtocolConfig, ) { - if stable.len() < threshold { + if stable.len() < self.threshold { tracing::warn!( "Require at least {} stable participants to handle_requests, got {}: {:?}", - threshold, + self.threshold, stable.len(), stable.keys_vec() ); @@ -618,7 +620,7 @@ impl SignatureManager { } self.sign_queue - .organize(threshold, stable, &self.my_account_id) + .organize(self.threshold, stable, &self.my_account_id) .await; crate::metrics::SIGN_QUEUE_SIZE .with_label_values(&[self.my_account_id.as_str()]) @@ -636,7 +638,7 @@ impl SignatureManager { } } { let sig_participants = stable.intersection(&[&presignature.participants]); - if sig_participants.len() < threshold { + if sig_participants.len() < self.threshold { tracing::warn!( participants = ?sig_participants.keys_vec(), "intersection of stable participants and presignature participants is less than threshold, trashing presignature" @@ -812,7 +814,6 @@ impl SignatureManager { protocol_cfg: &ProtocolConfig, ctx: &impl super::cryptography::CryptographicCtx, ) -> tokio::task::JoinHandle<()> { - let threshold = state.threshold; let presignature_manager = state.presignature_manager.clone(); let signature_manager = state.signature_manager.clone(); let messages = state.messages.clone(); @@ -831,7 +832,7 @@ impl SignatureManager { let mut signature_manager = signature_manager.write().await; let mut presignature_manager = presignature_manager.write().await; signature_manager - .handle_requests(threshold, &stable, &mut presignature_manager, &protocol_cfg) + .handle_requests(&stable, &mut presignature_manager, &protocol_cfg) .await; drop(presignature_manager); From 5a70c080d3318df002ef07cd33f01c5603153304 Mon Sep 17 00:00:00 2001 From: Phuong N Date: Wed, 8 Jan 2025 21:36:20 +0000 Subject: [PATCH 42/42] Fix sign_queue::len --- chain-signatures/node/src/protocol/signature.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chain-signatures/node/src/protocol/signature.rs b/chain-signatures/node/src/protocol/signature.rs index 6e400db0..99d3938e 100644 --- a/chain-signatures/node/src/protocol/signature.rs +++ b/chain-signatures/node/src/protocol/signature.rs @@ -62,7 +62,7 @@ impl SignQueue { } pub fn len(&self) -> usize { - self.requests.len() + self.requests.values().map(|v| v.len()).sum() } pub fn is_empty(&self) -> bool {