diff --git a/.cargo/config.toml b/.cargo/config.toml index ace541bb4..ae425ea9c 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,7 +1,18 @@ [build] # https://github.com/rust-lang/rust/pull/124129 # https://github.com/dtolnay/linkme/pull/88 -rustflags = ["-Z", "linker-features=-lld"] + +[env] +RUST_BACKTRACE = "1" +RUST_TEST_NOCAPTURE = "1" + +[term] +verbose = true +color = 'auto' + +[target.x86_64-unknown-linux-gnu] +linker = "clang" +rustflags = ["-Z", "linker-features=-lld", "-C", "target-cpu=native"] #, "-C", "link-arg=-fuse-ld=/usr/bin/mold", "-C", "debuginfo=2"] [alias] xtask = ["run", "--package=xtask", "--"] diff --git a/Cargo.lock b/Cargo.lock index d64c3c1b7..14eca4d65 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1151,6 +1151,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "build-array" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67ef4e2687af237b2646687e19a0643bc369878216122e46c3f1a01c56baa9d5" +dependencies = [ + "arrayvec", +] + [[package]] name = "bumpalo" version = "3.16.0" @@ -2033,6 +2042,7 @@ dependencies = [ "anyhow", "bytes", "criterion", + "derive_more", "env_logger 0.11.5", "ethereum-types", "hashbrown", @@ -5220,6 +5230,7 @@ dependencies = [ "assert2", "bitflags 2.6.0", "bitvec", + "build-array", "bytes", "camino", "ciborium", @@ -5300,6 +5311,16 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-serde" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc6b213177105856957181934e4920de57730fc69bf42c37ee5bb664d406d9e1" +dependencies = [ + "serde", + "tracing-core", +] + [[package]] name = "tracing-subscriber" version = "0.3.18" @@ -5310,12 +5331,15 @@ dependencies = [ "nu-ansi-term", "once_cell", "regex", + "serde", + "serde_json", "sharded-slab", "smallvec", "thread_local", "tracing", "tracing-core", "tracing-log", + "tracing-serde", ] [[package]] @@ -5901,6 +5925,8 @@ version = "0.1.0" dependencies = [ "alloy", "alloy-compat", + "alloy-primitives", + "alloy-serde", "anyhow", "async-stream", "axum", diff --git a/Cargo.toml b/Cargo.toml index 117f124b1..094b53a12 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,8 @@ alloy = { version = '0.3.0', default-features = false, features = [ "transport-http", "rpc-types-debug", ] } +alloy-primitives = "0.8.0" +alloy-serde = "0.3.0" anyhow = "1.0.86" async-stream = "0.3.5" axum = "0.7.5" @@ -47,6 +49,7 @@ ciborium-io = "0.2.2" clap = { version = "4.5.7", features = ["derive", "env"] } compat = { path = "compat" } criterion = "0.5.1" +derive_more = "1.0.0" dotenvy = "0.15.7" either = "1.12.0" enum-as-inner = "0.6.0" @@ -86,6 +89,7 @@ ruint = "1.12.3" serde = "1.0.203" serde_json = "1.0.118" serde_path_to_error = "0.1.16" +serde_with = "3.8.1" serde-big-array = "0.5.1" sha2 = "0.10.8" static_assertions = "1.1.0" @@ -94,8 +98,8 @@ thiserror = "1.0.61" tiny-keccak = "2.0.2" tokio = { version = "1.38.0", features = ["full"] } tower = "0.4" -tracing = "0.1" -tracing-subscriber = { version = "0.3", features = ["env-filter"] } +tracing = { version = "0.1", features = ["attributes"] } +tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } trybuild = "1.0" u4 = "0.1.0" uint = "0.9.5" @@ -119,3 +123,27 @@ starky = { git = "https://github.com/0xPolygonZero/plonky2.git", rev = "2488cdac [workspace.lints.clippy] too_long_first_doc_paragraph = "allow" + +[profile.release] +opt-level = 3 +debug = true +incremental = true +debug-assertions = true +lto = false +overflow-checks = false + +[profile.test] +opt-level = 3 +debug = true +incremental = true +debug-assertions = true +lto = false +overflow-checks = false + +[profile.dev] +opt-level = 3 +debug = true +incremental = true +debug-assertions = true +lto = false +overflow-checks = false \ No newline at end of file diff --git a/evm_arithmetization/Cargo.toml b/evm_arithmetization/Cargo.toml index f5dfec2f2..9749eb084 100644 --- a/evm_arithmetization/Cargo.toml +++ b/evm_arithmetization/Cargo.toml @@ -17,6 +17,7 @@ keywords.workspace = true [dependencies] anyhow.workspace = true bytes.workspace = true +derive_more.workspace = true env_logger.workspace = true ethereum-types.workspace = true hashbrown.workspace = true diff --git a/evm_arithmetization/benches/fibonacci_25m_gas.rs b/evm_arithmetization/benches/fibonacci_25m_gas.rs index 2242b3049..38344629c 100644 --- a/evm_arithmetization/benches/fibonacci_25m_gas.rs +++ b/evm_arithmetization/benches/fibonacci_25m_gas.rs @@ -192,6 +192,7 @@ fn prepare_setup() -> anyhow::Result> { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: None, }) } diff --git a/evm_arithmetization/src/cpu/kernel/interpreter.rs b/evm_arithmetization/src/cpu/kernel/interpreter.rs index d9745504e..3acbe9835 100644 --- a/evm_arithmetization/src/cpu/kernel/interpreter.rs +++ b/evm_arithmetization/src/cpu/kernel/interpreter.rs @@ -5,11 +5,13 @@ //! the future execution and generate nondeterministically the corresponding //! jumpdest table, before the actual CPU carries on with contract execution. +use core::option::Option::None; use std::collections::{BTreeSet, HashMap}; use anyhow::anyhow; use ethereum_types::{BigEndianHash, U256}; -use log::Level; +use keccak_hash::H256; +use log::{trace, Level}; use mpt_trie::partial_trie::PartialTrie; use plonky2::hash::hash_types::RichField; use serde::{Deserialize, Serialize}; @@ -19,8 +21,10 @@ use crate::cpu::columns::CpuColumnsView; use crate::cpu::kernel::aggregator::KERNEL; use crate::cpu::kernel::constants::global_metadata::GlobalMetadata; use crate::generation::debug_inputs; +use crate::generation::jumpdest::{JumpDestTableProcessed, JumpDestTableWitness}; use crate::generation::linked_list::LinkedListsPtrs; use crate::generation::mpt::{load_linked_lists_and_txn_and_receipt_mpts, TrieRootPtrs}; +use crate::generation::prover_input::get_proofs_and_jumpdests; use crate::generation::rlp::all_rlp_prover_inputs_reversed; use crate::generation::state::{ all_ger_prover_inputs, all_withdrawals_prover_inputs_reversed, GenerationState, @@ -54,6 +58,7 @@ pub(crate) struct Interpreter { /// The interpreter will halt only if the current context matches /// halt_context pub(crate) halt_context: Option, + /// A table of call contexts and the JUMPDEST offsets that they jumped to. jumpdest_table: HashMap>, /// `true` if the we are currently carrying out a jumpdest analysis. pub(crate) is_jumpdest_analysis: bool, @@ -73,9 +78,9 @@ pub(crate) struct Interpreter { pub(crate) fn simulate_cpu_and_get_user_jumps( final_label: &str, state: &GenerationState, -) -> Option>> { +) -> Option<(JumpDestTableProcessed, JumpDestTableWitness)> { match state.jumpdest_table { - Some(_) => None, + Some(_) => Default::default(), None => { let halt_pc = KERNEL.global_labels[final_label]; let initial_context = state.registers.context; @@ -94,16 +99,22 @@ pub(crate) fn simulate_cpu_and_get_user_jumps( let clock = interpreter.get_clock(); - interpreter + let (jdtp, jdtw) = interpreter .generation_state - .set_jumpdest_analysis_inputs(interpreter.jumpdest_table); + .get_jumpdest_analysis_inputs(interpreter.jumpdest_table.clone()); log::debug!( "Simulated CPU for jumpdest analysis halted after {:?} cycles.", clock ); - interpreter.generation_state.jumpdest_table + // if let Some(cc) = interpreter.generation_state.jumpdest_table { + // interpreter.generation_state.jumpdest_table = + // Some(JumpDestTableProcessed::merge([&cc, &jdtp])); + // } else { + // interpreter.generation_state.jumpdest_table = Some(jdtp.clone()); + // } + Some((jdtp, jdtw)) } } } @@ -116,7 +127,7 @@ pub(crate) struct ExtraSegmentData { pub(crate) withdrawal_prover_inputs: Vec, pub(crate) ger_prover_inputs: Vec, pub(crate) trie_root_ptrs: TrieRootPtrs, - pub(crate) jumpdest_table: Option>>, + pub(crate) jumpdest_table: Option, pub(crate) access_lists_ptrs: LinkedListsPtrs, pub(crate) state_ptrs: LinkedListsPtrs, pub(crate) next_txn_index: usize, @@ -152,6 +163,49 @@ pub(crate) fn set_registers_and_run( interpreter.run() } +/// Computes the JUMPDEST proofs for each context. +/// +/// # Arguments +/// +/// - `jumpdest_table_rpc`: The raw table received from RPC. +/// - `code_db`: The corresponding database of contract code used in the trace. +/// +/// # Output +/// +/// Returns a [`JumpDestTableProccessed`]. +pub(crate) fn get_jumpdest_analysis_inputs_rpc_progressive( + jumpdest_table_rpc: &JumpDestTableWitness, + generation_state: &GenerationState, +) -> JumpDestTableProcessed { + let current_ctx = generation_state.registers.context; + let current_code = generation_state.get_current_code().unwrap(); + let current_code_hash = generation_state.get_current_code_hash().unwrap(); + let code_map: &HashMap> = &generation_state.inputs.contract_code; + + trace!( + "current_code: {:?}, current_code_hash: {:?}, {:?} <============", + ¤t_code, + ¤t_code_hash, + code_map.contains_key(¤t_code_hash), + ); + trace!("code_map: {:?}", &code_map); + dbg!(current_ctx, current_code_hash, jumpdest_table_rpc.clone()); + let mut ctx_proof = HashMap::>::new(); + if jumpdest_table_rpc.contains_key(¤t_code_hash) { + let cc = &(*jumpdest_table_rpc)[¤t_code_hash].0; + if cc.contains_key(¤t_ctx) { + let current_offsets = cc[¤t_ctx].clone(); + //let ctx_proof = prove_context_jumpdests(¤t_code, &offsets); + let largest_address = current_offsets.last().unwrap().clone(); + let offset_proofs = + get_proofs_and_jumpdests(¤t_code, largest_address, current_offsets); + ctx_proof.insert(current_ctx, offset_proofs); + } + } + + JumpDestTableProcessed::new(ctx_proof) +} + impl Interpreter { /// Returns an instance of `Interpreter` given `GenerationInputs`, and /// assuming we are initializing with the `KERNEL` code. diff --git a/evm_arithmetization/src/cpu/kernel/tests/add11.rs b/evm_arithmetization/src/cpu/kernel/tests/add11.rs index 683987244..71933ec3e 100644 --- a/evm_arithmetization/src/cpu/kernel/tests/add11.rs +++ b/evm_arithmetization/src/cpu/kernel/tests/add11.rs @@ -193,6 +193,7 @@ fn test_add11_yml() { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: None, }; let initial_stack = vec![]; @@ -370,6 +371,7 @@ fn test_add11_yml_with_exception() { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: None, }; let initial_stack = vec![]; diff --git a/evm_arithmetization/src/cpu/kernel/tests/core/jumpdest_analysis.rs b/evm_arithmetization/src/cpu/kernel/tests/core/jumpdest_analysis.rs index f2d00ede5..0bb07eaf5 100644 --- a/evm_arithmetization/src/cpu/kernel/tests/core/jumpdest_analysis.rs +++ b/evm_arithmetization/src/cpu/kernel/tests/core/jumpdest_analysis.rs @@ -10,13 +10,15 @@ use plonky2::hash::hash_types::RichField; use crate::cpu::kernel::aggregator::KERNEL; use crate::cpu::kernel::interpreter::Interpreter; use crate::cpu::kernel::opcodes::{get_opcode, get_push_opcode}; +use crate::generation::jumpdest::JumpDestTableProcessed; use crate::memory::segments::Segment; use crate::witness::memory::MemoryAddress; use crate::witness::operation::CONTEXT_SCALING_FACTOR; impl Interpreter { pub(crate) fn set_jumpdest_analysis_inputs(&mut self, jumps: HashMap>) { - self.generation_state.set_jumpdest_analysis_inputs(jumps); + let (jdtp, _jdtw) = self.generation_state.get_jumpdest_analysis_inputs(jumps); + self.generation_state.jumpdest_table = Some(jdtp); } pub(crate) fn get_jumpdest_bit(&self, offset: usize) -> U256 { @@ -106,7 +108,10 @@ fn test_jumpdest_analysis() -> Result<()> { interpreter.generation_state.jumpdest_table, // Context 3 has jumpdest 1, 5, 7. All have proof 0 and hence // the list [proof_0, jumpdest_0, ... ] is [0, 1, 0, 5, 0, 7, 8, 40] - Some(HashMap::from([(3, vec![0, 1, 0, 5, 0, 7, 8, 40])])) + Some(JumpDestTableProcessed::new(HashMap::from([( + 3, + vec![0, 1, 0, 5, 0, 7, 8, 40] + )]))) ); // Run jumpdest analysis with context = 3 @@ -175,7 +180,9 @@ fn test_packed_verification() -> Result<()> { let mut interpreter: Interpreter = Interpreter::new(write_table_if_jumpdest, initial_stack.clone(), None); interpreter.set_code(CONTEXT, code.clone()); - interpreter.generation_state.jumpdest_table = Some(HashMap::from([(3, vec![1, 33])])); + interpreter.generation_state.jumpdest_table = Some(JumpDestTableProcessed::new(HashMap::from( + [(3, vec![1, 33])], + ))); interpreter.run()?; @@ -188,7 +195,9 @@ fn test_packed_verification() -> Result<()> { let mut interpreter: Interpreter = Interpreter::new(write_table_if_jumpdest, initial_stack.clone(), None); interpreter.set_code(CONTEXT, code.clone()); - interpreter.generation_state.jumpdest_table = Some(HashMap::from([(3, vec![1, 33])])); + interpreter.generation_state.jumpdest_table = Some(JumpDestTableProcessed::new( + HashMap::from([(3, vec![1, 33])]), + )); assert!(interpreter.run().is_err()); diff --git a/evm_arithmetization/src/cpu/kernel/tests/init_exc_stop.rs b/evm_arithmetization/src/cpu/kernel/tests/init_exc_stop.rs index 2dea58b55..27374888c 100644 --- a/evm_arithmetization/src/cpu/kernel/tests/init_exc_stop.rs +++ b/evm_arithmetization/src/cpu/kernel/tests/init_exc_stop.rs @@ -101,6 +101,7 @@ fn test_init_exc_stop() { cur_hash: H256::default(), }, ger_data: None, + jumpdest_table: None, }; let initial_stack = vec![]; let initial_offset = KERNEL.global_labels["init"]; diff --git a/evm_arithmetization/src/generation/jumpdest.rs b/evm_arithmetization/src/generation/jumpdest.rs new file mode 100644 index 000000000..bcdbfdddb --- /dev/null +++ b/evm_arithmetization/src/generation/jumpdest.rs @@ -0,0 +1,196 @@ +//! EVM opcode 0x5B or 91 is [`JUMPDEST`] which encodes a a valid offset, that +//! opcodes `JUMP` and `JUMPI` can jump to. Jumps to non-[`JUMPDEST`] +//! instructions are invalid. During an execution a [`JUMPDEST`] may be visited +//! zero or more times. Offsets are measured in bytes with respect to the +//! beginning of some contract code, which is uniquely identified by its +//! `CodeHash`. Every time control flow is switches to another contract through +//! a `CALL`-like instruction a new call context, `Context`, is created. Thus, +//! the tripple (`CodeHash`,`Context`, `Offset`) uniquely identifies an visited +//! [`JUMPDEST`] offset of an execution. +//! +//! Since an operation like e.g. `PUSH 0x5B` does not encode a valid +//! [`JUMPDEST`] in its second byte, and `PUSH32 +//! 5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B5B` does not +//! encode any valid [`JUMPDEST`] in bytes 1-32, some diligence must be +//! exercised when proving validity of jump operations. +//! +//! This module concerns itself with data structures for collecting these +//! offsets for [`JUMPDEST`] that was visited during an execution and are not +//! recording duplicity. The proofs, that each of these offsets are not rendered +//! invalid by `PUSH1`-`PUSH32` in any of the previous 32 bytes, are computed +//! later in `prove_context_jumpdests` on basis of these collections. +//! +//! [`JUMPDEST`]: https://www.evm.codes/?fork=cancun#5b + +use std::cmp::max; +use std::{ + collections::{BTreeSet, HashMap}, + fmt::Display, +}; + +use derive_more::derive::{Deref, DerefMut}; +use itertools::{sorted, Itertools}; +use keccak_hash::H256; +use serde::{Deserialize, Serialize}; + +/// Each `CodeHash` can be called one or more times, +/// each time creating a new `Context`. +/// Each `Context` will contain one or more offsets of `JUMPDEST`. +#[derive(PartialEq, Eq, Debug, Clone, Serialize, Deserialize, Default, Deref, DerefMut)] +pub struct Context(pub HashMap>); + +/// The result after proving a [`JumpDestTableWitness`]. +#[derive(PartialEq, Eq, Debug, Clone, Serialize, Deserialize, Default, Deref, DerefMut)] +pub(crate) struct JumpDestTableProcessed(HashMap>); + +/// Map `CodeHash -> (Context -> [JumpDests])` +#[derive(PartialEq, Eq, Debug, Clone, Serialize, Deserialize, Default, Deref, DerefMut)] +pub struct JumpDestTableWitness(HashMap); + +impl Context { + pub fn insert(&mut self, ctx: usize, offset: usize) { + self.entry(ctx).or_default().insert(offset); + } + + pub fn get(&self, ctx: usize) -> Option<&BTreeSet> { + self.0.get(&ctx) + } +} + +impl JumpDestTableProcessed { + pub fn new(ctx_map: HashMap>) -> Self { + Self(ctx_map) + } + + pub fn merge<'a>(jdts: impl IntoIterator) -> Self { + jdts.into_iter().fold(Default::default(), |acc, next| { + let mut inner = acc.0.clone(); + let b = next.iter().map(|(a, b)| (a.clone(), b.clone())); + inner.extend(b); + JumpDestTableProcessed(inner) + }) + } +} + +impl JumpDestTableWitness { + pub fn get(&self, code_hash: &H256) -> Option<&Context> { + self.0.get(code_hash) + } + + /// Insert `offset` into `ctx` under the corresponding `code_hash`. + /// Creates the required `ctx` keys and `code_hash`. Idempotent. + pub fn insert(&mut self, code_hash: H256, ctx: usize, offset: usize) { + (*self).entry(code_hash).or_default().insert(ctx, offset); + } + + pub fn extend(mut self, other: &Self, prev_max_ctx: usize) -> (Self, usize) { + let mut curr_max_ctx = prev_max_ctx; + + for (code_hash, ctx_tbl) in (*other).iter() { + for (ctx, jumpdests) in ctx_tbl.0.iter() { + let batch_ctx = prev_max_ctx + ctx; + curr_max_ctx = max(curr_max_ctx, batch_ctx); + + for offset in jumpdests { + self.insert(*code_hash, batch_ctx, *offset); + } + } + } + + (self, curr_max_ctx) + } + + pub fn merge<'a>(jdts: impl IntoIterator) -> (Self, usize) { + jdts.into_iter() + .fold((Default::default(), 0), |(acc, cnt), next| { + acc.extend(next, cnt) + }) + } +} + +// The following Display instances are added to make it easier to read diffs. +impl Display for JumpDestTableWitness { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "\n=== JumpDestTableWitness ===")?; + + for (code, ctxtbls) in &self.0 { + write!(f, "codehash: {:#x}\n{}", code, ctxtbls)?; + } + Ok(()) + } +} + +impl Display for Context { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let v: Vec<_> = self.0.iter().sorted().collect(); + for (ctx, offsets) in v.into_iter() { + write!(f, " ctx: {:>4}: [", ctx)?; + for offset in offsets { + write!(f, "{:#}, ", offset)?; + } + writeln!(f, "]")?; + } + Ok(()) + } +} + +impl Display for JumpDestTableProcessed { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "\n=== JumpDestTableProcessed ===")?; + + let v = sorted(self.0.clone()); + for (ctx, code) in v { + writeln!(f, "ctx: {:?} {:?}", ctx, code)?; + } + Ok(()) + } +} + +impl FromIterator<(H256, usize, usize)> for JumpDestTableWitness { + fn from_iter>(iter: T) -> Self { + let mut jdtw = JumpDestTableWitness::default(); + for (code_hash, ctx, offset) in iter.into_iter() { + jdtw.insert(code_hash, ctx, offset); + } + jdtw + } +} + +#[cfg(test)] +mod test { + use keccak_hash::H256; + + use super::JumpDestTableWitness; + + #[test] + fn test_extend_from_iter() { + let code_hash = H256::default(); + + let ctx_map = vec![ + (code_hash, 1, 1), + (code_hash, 2, 2), + (code_hash, 42, 3), + (code_hash, 43, 4), + ]; + let table1 = JumpDestTableWitness::from_iter(ctx_map); + let table2 = table1.clone(); + + let jdts = [&table1, &table2]; + let (actual, max_ctx) = JumpDestTableWitness::merge(jdts); + + let ctx_map_merged = vec![ + (code_hash, 1, 1), + (code_hash, 2, 2), + (code_hash, 42, 3), + (code_hash, 43, 4), + (code_hash, 44, 1), + (code_hash, 45, 2), + (code_hash, 85, 3), + (code_hash, 86, 4), + ]; + let expected = JumpDestTableWitness::from_iter(ctx_map_merged); + + assert_eq!(86, max_ctx); + assert_eq!(expected, actual) + } +} diff --git a/evm_arithmetization/src/generation/mod.rs b/evm_arithmetization/src/generation/mod.rs index 6ec02ea73..66253e4a4 100644 --- a/evm_arithmetization/src/generation/mod.rs +++ b/evm_arithmetization/src/generation/mod.rs @@ -3,6 +3,7 @@ use std::fmt::Display; use anyhow::anyhow; use ethereum_types::{Address, BigEndianHash, H256, U256}; +use jumpdest::JumpDestTableWitness; use keccak_hash::keccak; use log::error; use mpt_trie::partial_trie::{HashedPartialTrie, PartialTrie}; @@ -34,6 +35,7 @@ use crate::util::{h2u, u256_to_usize}; use crate::witness::memory::{MemoryAddress, MemoryChannel, MemoryState}; use crate::witness::state::RegistersState; +pub mod jumpdest; pub(crate) mod linked_list; pub mod mpt; pub(crate) mod prover_input; @@ -131,6 +133,10 @@ pub struct GenerationInputs { /// /// This is specific to `cdk-erigon`. pub ger_data: Option<(H256, H256)>, + + /// A table listing each JUMPDESTs reached in each call context under + /// associated code hash. + pub jumpdest_table: Option, } /// A lighter version of [`GenerationInputs`], which have been trimmed @@ -181,6 +187,10 @@ pub struct TrimmedGenerationInputs { /// The hash of the current block, and a list of the 256 previous block /// hashes. pub block_hashes: BlockHashes, + + /// A list of tables listing each JUMPDESTs reached in each call context + /// under associated code hash. + pub jumpdest_table: Option, } #[derive(Clone, Debug, Deserialize, Serialize, Default)] @@ -255,6 +265,7 @@ impl GenerationInputs { burn_addr: self.burn_addr, block_metadata: self.block_metadata.clone(), block_hashes: self.block_hashes.clone(), + jumpdest_table: self.jumpdest_table.clone(), } } } diff --git a/evm_arithmetization/src/generation/prover_input.rs b/evm_arithmetization/src/generation/prover_input.rs index 704e2f4c6..69ff871f3 100644 --- a/evm_arithmetization/src/generation/prover_input.rs +++ b/evm_arithmetization/src/generation/prover_input.rs @@ -6,10 +6,13 @@ use std::str::FromStr; use anyhow::{bail, Error, Result}; use ethereum_types::{BigEndianHash, H256, U256, U512}; use itertools::Itertools; +use keccak_hash::keccak; +use log::{info, trace}; use num_bigint::BigUint; use plonky2::hash::hash_types::RichField; use serde::{Deserialize, Serialize}; +use super::jumpdest::{JumpDestTableProcessed, JumpDestTableWitness}; #[cfg(test)] use super::linked_list::testing::{LinkedList, ADDRESSES_ACCESS_LIST_LEN}; use super::linked_list::{ @@ -22,7 +25,9 @@ use crate::cpu::kernel::constants::cancun_constants::{ POINT_EVALUATION_PRECOMPILE_RETURN_VALUE, }; use crate::cpu::kernel::constants::context_metadata::ContextMetadata; -use crate::cpu::kernel::interpreter::simulate_cpu_and_get_user_jumps; +use crate::cpu::kernel::interpreter::{ + get_jumpdest_analysis_inputs_rpc_progressive, simulate_cpu_and_get_user_jumps, +}; use crate::curve_pairings::{bls381, CurveAff, CyclicGroup}; use crate::extension_tower::{FieldExt, Fp12, Fp2, BLS381, BLS_BASE, BLS_SCALAR, BN254, BN_BASE}; use crate::generation::prover_input::EvmField::{ @@ -40,6 +45,9 @@ use crate::witness::memory::MemoryAddress; use crate::witness::operation::CONTEXT_SCALING_FACTOR; use crate::witness::util::{current_context_peek, stack_peek}; +/// A set to hold contract code as a byte vectors. +pub type CodeDb = BTreeSet>; + /// Prover input function represented as a scoped function name. /// Example: `PROVER_INPUT(ff::bn254_base::inverse)` is represented as /// `ProverInputFn([ff, bn254_base, inverse])`. @@ -352,10 +360,26 @@ impl GenerationState { /// Returns the next used jump address. fn run_next_jumpdest_table_address(&mut self) -> Result { + // QUESTION: Is there a reason we do not use `self.registers.context` here? let context = u256_to_usize(stack_peek(self, 0)? >> CONTEXT_SCALING_FACTOR)?; + assert_eq!(context, self.registers.context); if self.jumpdest_table.is_none() { - self.generate_jumpdest_table()?; + self.jumpdest_table = Some(JumpDestTableProcessed::default()); + } + + if self.jumpdest_table.is_some() + && self + .jumpdest_table + .as_ref() + .unwrap() + .get(&context) + .is_none() + { + let ctx_table = self.generate_jumpdest_table()?; + self.jumpdest_table = Some(JumpDestTableProcessed::merge( + [self.jumpdest_table.clone().unwrap(), ctx_table].iter(), + )); } let Some(jumpdest_table) = &mut self.jumpdest_table else { @@ -783,24 +807,83 @@ impl GenerationState { impl GenerationState { /// Simulate the user's code and store all the jump addresses with their /// respective contexts. - fn generate_jumpdest_table(&mut self) -> Result<(), ProgramError> { - // Simulate the user's code and (unnecessarily) part of the kernel code, - // skipping the validate table call - self.jumpdest_table = simulate_cpu_and_get_user_jumps("terminate_common", self); + fn generate_jumpdest_table(&mut self) -> Result { + dbg!(&self.inputs.jumpdest_table); + // REVIEW: This will be rewritten to only run simulation when + // `self.inputs.jumpdest_table` is `None`. + info!( + "Generating JUMPDEST tables for block: {}, tx: {:?}", + self.inputs.block_metadata.block_number, self.inputs.txn_hashes + ); + let rpcw = self.inputs.jumpdest_table.clone(); + let rpcp: Option = rpcw + .as_ref() + .map(|jdt| get_jumpdest_analysis_inputs_rpc_progressive(jdt, &self)); + if rpcp.is_some() { + return Ok(rpcp.unwrap()); + } + + info!("Generating JUMPDEST tables: Running SIM"); + + self.inputs.jumpdest_table = None; + let sims = simulate_cpu_and_get_user_jumps("terminate_common", self); + // .ok_or(ProgramError::ProverInputError(InvalidJumpdestSimulation))?; + + let (simp, ref simw): (Option, Option) = sims + .clone() + .map_or_else(|| (None, None), |(sim, simw)| (Some(sim), Some(simw))); + + info!("Generating JUMPDEST tables: finished"); + + if rpcw.is_some() && simw != &rpcw { + if let Some(s) = simw { + info!("SIMW {}", s); + } + if let Some(r) = rpcw.as_ref() { + info!("RPCW {}", r); + } + info!("SIMW == RPCW ? {}", simw == &rpcw); + info!("tx: {:?}", self.inputs.txn_hashes); + // let is_equal = simw == &rpcw; + // let block_num = self.inputs.block_metadata.block_number; + // tracing::error!( + // block_num, + // tables_match = is_equal, + // tx = self.inputs.txn_hashes + // ) + // panic!(); + // info!("SIMP {:?}", &simp); + // info!("RPCP {:?}", &rpcp); + // info!("SIMP == RPCP ? {}", &simp == &rpcp); + } else { + info!("JUMPDEST tables are equal."); + } + + // self.jumpdest_table = if rpcp.is_some() { rpcp } else { simp }; + + if sims.as_ref().is_none() { + return Ok(Default::default()); + } - Ok(()) + Ok(simp.unwrap()) } /// Given a HashMap containing the contexts and the jumpdest addresses, /// compute their respective proofs, by calling /// `get_proofs_and_jumpdests` - pub(crate) fn set_jumpdest_analysis_inputs( - &mut self, + pub(crate) fn get_jumpdest_analysis_inputs( + &self, jumpdest_table: HashMap>, - ) { - self.jumpdest_table = Some(HashMap::from_iter(jumpdest_table.into_iter().map( + ) -> (JumpDestTableProcessed, JumpDestTableWitness) { + let mut jdtw = JumpDestTableWitness::default(); + let jdtp = JumpDestTableProcessed::new(HashMap::from_iter(jumpdest_table.into_iter().map( |(ctx, jumpdest_table)| { let code = self.get_code(ctx).unwrap(); + let code_hash = keccak(code.clone()); + trace!("ctx: {ctx}, code_hash: {:?} code: {:?}", code_hash, code); + for offset in jumpdest_table.clone() { + jdtw.insert(code_hash, ctx, offset); + } if let Some(&largest_address) = jumpdest_table.last() { let proofs = get_proofs_and_jumpdests(&code, largest_address, jumpdest_table); (ctx, proofs) @@ -809,12 +892,17 @@ impl GenerationState { } }, ))); + (jdtp, jdtw) } pub(crate) fn get_current_code(&self) -> Result, ProgramError> { self.get_code(self.registers.context) } + pub(crate) fn get_current_code_hash(&self) -> Result { + Ok(keccak(self.get_code(self.registers.context)?)) + } + fn get_code(&self, context: usize) -> Result, ProgramError> { let code_len = self.get_code_len(context)?; let code = (0..code_len) @@ -855,7 +943,7 @@ impl GenerationState { /// for which none of the previous 32 bytes in the code (including opcodes /// and pushed bytes) is a PUSHXX and the address is in its range. It returns /// a vector of even size containing proofs followed by their addresses. -fn get_proofs_and_jumpdests( +pub(crate) fn get_proofs_and_jumpdests( code: &[u8], largest_address: usize, jumpdest_table: std::collections::BTreeSet, diff --git a/evm_arithmetization/src/generation/state.rs b/evm_arithmetization/src/generation/state.rs index abe4b4f1a..1ff7b3f7c 100644 --- a/evm_arithmetization/src/generation/state.rs +++ b/evm_arithmetization/src/generation/state.rs @@ -8,6 +8,7 @@ use keccak_hash::keccak; use log::Level; use plonky2::hash::hash_types::RichField; +use super::jumpdest::JumpDestTableProcessed; use super::linked_list::LinkedListsPtrs; use super::mpt::TrieRootPtrs; use super::segments::GenerationSegmentData; @@ -386,7 +387,7 @@ pub struct GenerationState { /// "proof" for a jump destination is either 0 or an address i > 32 in /// the code (not necessarily pointing to an opcode) such that for every /// j in [i, i+32] it holds that code[j] < 0x7f - j + i. - pub(crate) jumpdest_table: Option>>, + pub(crate) jumpdest_table: Option, /// Provides quick access to pointers that reference the location /// of either and account or a slot in the respective access list. @@ -494,12 +495,12 @@ impl GenerationState { // We cannot observe anything as the stack is empty. return Ok(()); } - if dst == KERNEL.global_labels["observe_new_address"] { + if dst == KERNEL.global_labels["observe_new_address"] && self.is_kernel() { let tip_u256 = stack_peek(self, 0)?; let tip_h256 = H256::from_uint(&tip_u256); let tip_h160 = H160::from(tip_h256); self.observe_address(tip_h160); - } else if dst == KERNEL.global_labels["observe_new_contract"] { + } else if dst == KERNEL.global_labels["observe_new_contract"] && self.is_kernel() { let tip_u256 = stack_peek(self, 0)?; let tip_h256 = H256::from_uint(&tip_u256); self.observe_contract(tip_h256)?; diff --git a/evm_arithmetization/src/lib.rs b/evm_arithmetization/src/lib.rs index 41b7f093a..49585d53d 100644 --- a/evm_arithmetization/src/lib.rs +++ b/evm_arithmetization/src/lib.rs @@ -280,6 +280,9 @@ pub mod verifier; pub mod generation; pub mod witness; +pub use generation::jumpdest; +pub use generation::prover_input::CodeDb; + // Utility modules pub mod curve_pairings; pub mod extension_tower; diff --git a/evm_arithmetization/src/witness/transition.rs b/evm_arithmetization/src/witness/transition.rs index fdcf9af65..8a6f1d39a 100644 --- a/evm_arithmetization/src/witness/transition.rs +++ b/evm_arithmetization/src/witness/transition.rs @@ -345,14 +345,6 @@ where where Self: Sized, { - self.perform_op(op, row)?; - self.incr_pc(match op { - Operation::Syscall(_, _, _) | Operation::ExitKernel => 0, - Operation::Push(n) => n as usize + 1, - Operation::Jump | Operation::Jumpi => 0, - _ => 1, - }); - self.incr_gas(gas_to_charge(op)); let registers = self.get_registers(); let gas_limit_address = MemoryAddress::new( @@ -373,6 +365,14 @@ where } } + self.perform_op(op, row)?; + self.incr_pc(match op { + Operation::Syscall(_, _, _) | Operation::ExitKernel => 0, + Operation::Push(n) => n as usize + 1, + Operation::Jump | Operation::Jumpi => 0, + _ => 1, + }); + Ok(op) } diff --git a/evm_arithmetization/tests/add11_yml.rs b/evm_arithmetization/tests/add11_yml.rs index dd9bfb1ce..87d959925 100644 --- a/evm_arithmetization/tests/add11_yml.rs +++ b/evm_arithmetization/tests/add11_yml.rs @@ -200,6 +200,7 @@ fn get_generation_inputs() -> GenerationInputs { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: None, } } /// The `add11_yml` test case from https://github.com/ethereum/tests diff --git a/evm_arithmetization/tests/erc20.rs b/evm_arithmetization/tests/erc20.rs index 2baf716e7..2cb9a538b 100644 --- a/evm_arithmetization/tests/erc20.rs +++ b/evm_arithmetization/tests/erc20.rs @@ -195,6 +195,7 @@ fn test_erc20() -> anyhow::Result<()> { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: None, }; let max_cpu_len_log = 20; diff --git a/evm_arithmetization/tests/erc721.rs b/evm_arithmetization/tests/erc721.rs index e416dc87a..f34d5c621 100644 --- a/evm_arithmetization/tests/erc721.rs +++ b/evm_arithmetization/tests/erc721.rs @@ -199,6 +199,7 @@ fn test_erc721() -> anyhow::Result<()> { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: None, }; let max_cpu_len_log = 20; diff --git a/evm_arithmetization/tests/global_exit_root.rs b/evm_arithmetization/tests/global_exit_root.rs index b7e82c7a0..481643517 100644 --- a/evm_arithmetization/tests/global_exit_root.rs +++ b/evm_arithmetization/tests/global_exit_root.rs @@ -113,6 +113,7 @@ fn test_global_exit_root() -> anyhow::Result<()> { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: None, }; let max_cpu_len_log = 20; diff --git a/evm_arithmetization/tests/log_opcode.rs b/evm_arithmetization/tests/log_opcode.rs index 81daf588c..871fa90f7 100644 --- a/evm_arithmetization/tests/log_opcode.rs +++ b/evm_arithmetization/tests/log_opcode.rs @@ -266,6 +266,7 @@ fn test_log_opcodes() -> anyhow::Result<()> { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: None, }; let max_cpu_len_log = 20; diff --git a/evm_arithmetization/tests/selfdestruct.rs b/evm_arithmetization/tests/selfdestruct.rs index f97dd41cd..eaf56dea6 100644 --- a/evm_arithmetization/tests/selfdestruct.rs +++ b/evm_arithmetization/tests/selfdestruct.rs @@ -170,6 +170,7 @@ fn test_selfdestruct() -> anyhow::Result<()> { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: None, }; let max_cpu_len_log = 20; diff --git a/evm_arithmetization/tests/simple_transfer.rs b/evm_arithmetization/tests/simple_transfer.rs index a1e7fb655..f40eadc67 100644 --- a/evm_arithmetization/tests/simple_transfer.rs +++ b/evm_arithmetization/tests/simple_transfer.rs @@ -162,6 +162,7 @@ fn test_simple_transfer() -> anyhow::Result<()> { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: None, }; let max_cpu_len_log = 20; diff --git a/evm_arithmetization/tests/withdrawals.rs b/evm_arithmetization/tests/withdrawals.rs index 01b48c0c6..4b3656cb6 100644 --- a/evm_arithmetization/tests/withdrawals.rs +++ b/evm_arithmetization/tests/withdrawals.rs @@ -105,6 +105,7 @@ fn test_withdrawals() -> anyhow::Result<()> { prev_hashes: vec![H256::default(); 256], cur_hash: H256::default(), }, + jumpdest_table: None, }; let max_cpu_len_log = 20; diff --git a/scripts/prove_stdio.sh b/scripts/prove_stdio.sh index b0d0788ca..4fd5d0718 100755 --- a/scripts/prove_stdio.sh +++ b/scripts/prove_stdio.sh @@ -1,6 +1,6 @@ #!/bin/bash # ------------------------------------------------------------------------------ -set -exo pipefail +set -x # Run prover with the parsed input from the standard terminal. # To generate the json input file, use the `rpc` tool, for example: @@ -17,27 +17,30 @@ else num_procs=$(nproc) fi -# Force the working directory to always be the `tools/` directory. +# Force the working directory to always be the `tools/` directory. REPO_ROOT=$(git rev-parse --show-toplevel) PROOF_OUTPUT_DIR="${REPO_ROOT}/proofs" -BLOCK_BATCH_SIZE="${BLOCK_BATCH_SIZE:-8}" +BLOCK_BATCH_SIZE="${BLOCK_BATCH_SIZE:-1}" echo "Block batch size: $BLOCK_BATCH_SIZE" +BATCH_SIZE=${BATCH_SIZE:-1} +echo "Batch size: $BATCH_SIZE" + OUTPUT_LOG="${REPO_ROOT}/output.log" PROOFS_FILE_LIST="${PROOF_OUTPUT_DIR}/proof_files.json" -TEST_OUT_PATH="${REPO_ROOT}/test.out" +TEST_OUT_PATH="${REPO_ROOT}/$3.test.out" # Configured Rayon and Tokio with rough defaults export RAYON_NUM_THREADS=$num_procs export TOKIO_WORKER_THREADS=$num_procs -export RUST_MIN_STACK=33554432 +#export RUST_MIN_STACK=33554432 export RUST_BACKTRACE=full -export RUST_LOG=info +export RUST_LOG=trace # Script users are running locally, and might benefit from extra perf. # See also .cargo/config.toml. -export RUSTFLAGS='-C target-cpu=native -Zlinker-features=-lld' +export RUSTFLAGS='-C target-cpu=native -Z linker-features=-lld' INPUT_FILE=$1 TEST_ONLY=$2 @@ -47,6 +50,11 @@ if [[ $INPUT_FILE == "" ]]; then exit 1 fi +if [[ ! -s $INPUT_FILE ]]; then + echo "Input file $INPUT_FILE does not exist or has length 0." + exit 6 +fi + # Circuit sizes only matter in non test_only mode. if ! [[ $TEST_ONLY == "test_only" ]]; then if [[ $INPUT_FILE == *"witness_b19807080"* ]]; then @@ -95,23 +103,34 @@ fi # proof. This is useful for quickly testing decoding and all of the # other non-proving code. if [[ $TEST_ONLY == "test_only" ]]; then - cargo run --quiet --release --package zero --bin leader -- \ + nice -19 cargo run --quiet --release --package zero --bin leader -- \ --test-only \ --runtime in-memory \ --load-strategy on-demand \ --block-batch-size "$BLOCK_BATCH_SIZE" \ --proof-output-dir "$PROOF_OUTPUT_DIR" \ - stdio < "$INPUT_FILE" &> "$TEST_OUT_PATH" + --batch-size "$BATCH_SIZE" \ + --save-inputs-on-error stdio + stdio < "$INPUT_FILE" |& tee &> "$TEST_OUT_PATH" if grep -q 'All proof witnesses have been generated successfully.' "$TEST_OUT_PATH"; then echo -e "\n\nSuccess - Note this was just a test, not a proof" + #rm $TEST_OUT_PATH + exit 0 + elif grep -q 'Attempted to collapse an extension node' "$TEST_OUT_PATH"; then + echo "ERROR: Attempted to collapse an extension node. See "$TEST_OUT_PATH" for more details." rm "$TEST_OUT_PATH" - exit - else + exit 4 + elif grep -q 'SIMW == RPCW ? false' "$TEST_OUT_PATH"; then + echo "ERROR: SIMW == RPCW ? false. See "$TEST_OUT_PATH" for more details." + exit 5 + elif grep -q 'Proving task finished with error' "$TEST_OUT_PATH"; then # Some error occurred, display the logs and exit. - cat "$TEST_OUT_PATH" - echo "Failed to create proof witnesses. See $TEST_OUT_PATH for more details." + echo "ERROR: Proving task finished with error. See "$TEST_OUT_PATH" for more details." exit 1 + else + echo -e "\n\nUndecided. Proving process has stopped but verdict is undecided. See $TEST_OUT_PATH for more details." + exit 2 fi fi @@ -119,10 +138,10 @@ cargo build --release --jobs "$num_procs" start_time=$(date +%s%N) -"${REPO_ROOT}/target/release/leader" --runtime in-memory \ +nice -19 "${REPO_ROOT}/target/release/leader" --runtime in-memory \ --load-strategy on-demand -n 1 \ --block-batch-size "$BLOCK_BATCH_SIZE" \ - --proof-output-dir "$PROOF_OUTPUT_DIR" stdio < "$INPUT_FILE" &> "$OUTPUT_LOG" + --proof-output-dir "$PROOF_OUTPUT_DIR" stdio < "$INPUT_FILE" |& tee "$OUTPUT_LOG" end_time=$(date +%s%N) grep "Successfully wrote to disk proof file " "$OUTPUT_LOG" | awk '{print $NF}' | tee "$PROOFS_FILE_LIST" @@ -136,9 +155,9 @@ fi while read -r proof_file; do echo "Verifying proof file $proof_file" - verify_file=$PROOF_OUTPUT_DIR/verify_$(basename "$proof_file").out - "${REPO_ROOT}/target/release/verifier" -f "$proof_file" | tee "$verify_file" - if grep -q 'All proofs verified successfully!' "$verify_file"; then + verify_file=$PROOF_OUTPUT_DIR/verify_$(basename $proof_file).out + nice -19 "${REPO_ROOT}/target/release/verifier" -f $proof_file | tee "$verify_file" + if grep -q 'All proofs verified successfully!' $verify_file; then echo "Proof verification for file $proof_file successful"; rm "$verify_file" # we keep the generated proof for potential reuse else diff --git a/scripts/test_jerigon.sh b/scripts/test_jerigon.sh new file mode 100755 index 000000000..8a515f05e --- /dev/null +++ b/scripts/test_jerigon.sh @@ -0,0 +1,329 @@ +#!/usr/bin/env bash + +set -uo pipefail + +RPC=${RPC_JERIGON} +if [ -z $RPC ]; then + # You must set an RPC endpoint + exit 1 +fi +mkdir -p witnesses + +# Must match the values in prove_stdio.sh or build is dirty. +export RAYON_NUM_THREADS=1 +export TOKIO_WORKER_THREADS=1 +export RUST_BACKTRACE=full +#export RUST_LOG=info +#export RUSTFLAGS='-C target-cpu=native -Zlinker-features=-lld' +#export RUST_MIN_STACK=67108864 + +GITHASH=`git rev-parse --short HEAD` +echo "Testing against jergion, current revision: $GITHASH." + +CIBLOCKS=" +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +" + + +STILLFAIL=" +37 +75 +15 +35 +43 +72 +77 +184 +460 +461 +462 +463 +464 +465 +467 +468 +474 +475 +476 +566 +662 +664 +665 +667 +670 +477 +478 +444 +" + +JUMPI=" +662 +664 +665 +667 +670 +" + +CONTAINSKEY=" +461 +462 +463 +464 +465 +467 +468 +474 +475 +476 +72 +" + +CREATE2=" +43 +566 +77 +" + +DECODING=" +477 +478 +" + +USEDTOFAIL=" +2 +15 +28 +35 +37 +43 +65 + +28 + +43 +460 +461 +462 +463 +464 +465 +467 +468 +474 +475 +476 +566 +662 +664 +665 +667 +670 +72 +77 +" + +ROUND2=" +664 +667 +670 +665 +" + +NOWSUCCESS=" +444 +4 +5 +28 +65 +566 +15 +35 +" + +ROUND3=" +125 +127 +131 +132 +136 +141 +142 +143 +145 +149 +150 +151 +153 +154 +186 +187 +188 +190 +193 +195 +197 +199 +201 +214 +220 +221 +222 +223 +226 +228 +229 +230 +231 +232 +234 +242 +256 +257 +258 +262 +264 +267 +268 +282 +284 +285 +287 +292 +294 +295 +301 +303 +304 +321 +325 +333 +460 +461 +462 +463 +464 +465 +466 +467 +468 +473 +474 +528 +529 +530 +531 +532 +533 +534 +566 +570 +664 +77 +548 +" + +ROUND4=" +136 +186 +268 +282 +301 +304 +321 +333 +460 +461 +462 +463 +464 +465 +466 +467 +468 +473 +474 +528 +529 +530 +531 +532 +533 +534 +570 +664 +" + +ROUND5=" +460 +461 +462 +463 +464 +465 +466 +467 +468 +473 +474 +664 +" + +ROUND6=" +664 +" + +# 470..663 from Robin +for i in {470..663} +do + ROBIN+=" $i" +done + +TIP=688 +NUMRANDOMBLOCKS=10 +RANDOMBLOCKS=`shuf --input-range=0-$TIP -n $NUMRANDOMBLOCKS | sort` + +#BLOCKS="72 185" #$ROUND5 $CREATE2 $DECODING $CONTAINSKEY $USEDTOFAIL $STILLFAIL $CIBLOCKS $JUMPI $ROUND2 $RANDOMBLOCKS $ROUND3 $ROUND5 $ROUND4" +BLOCKS="$CIBLOCKS" +BLOCKS=`echo $BLOCKS | tr ' ' '\n' | sort -nu | tr '\n' ' '` + +echo "Testing: $BLOCKS" +printf "\ngithash block verdict duration\n" | tee -a witnesses/jerigon_results.txt +echo "------------------------------------" | tee -a witnesses/jerigon_results.txt + +for BLOCK in $BLOCKS; do + GITHASH=`git rev-parse --short HEAD` + WITNESS="witnesses/$BLOCK.jerigon.$GITHASH.witness.json" + echo "Fetching block $BLOCK" + export RUST_LOG=rpc=trace + SECONDS=0 + cargo run --quiet --release --bin rpc -- --backoff 3000 --max-retries 100 --rpc-url $RPC --rpc-type jerigon --jumpdest-src client-fetched-structlogs fetch --start-block $BLOCK --end-block $BLOCK 1> $WITNESS + echo "Testing blocks: $BLOCKS." + echo "Now testing block $BLOCK .." + export RUST_LOG=info + timeout 10m ./prove_stdio.sh $WITNESS test_only $BLOCK + EXITCODE=$? + DURATION=`date -u -d @"$SECONDS" +'%-Hh%-Mm%-Ss'` + echo $DURATION + if [ $EXITCODE -eq 0 ] + then + VERDICT="success" + else + VERDICT="failure" + fi + printf "%s %10i %s %s\n" $GITHASH $BLOCK $VERDICT $DURATION | tee -a witnesses/jerigon_results.txt +done diff --git a/scripts/test_native.sh b/scripts/test_native.sh new file mode 100755 index 000000000..20506aafa --- /dev/null +++ b/scripts/test_native.sh @@ -0,0 +1,325 @@ +#!/usr/bin/env bash + +set -uxo pipefail + +if [ -z $RPC ]; then + # You must set an RPC endpoint + exit 1 +fi + +if [ git diff --quiet --exit-code HEAD ]; then + exit 1 +fi + + +mkdir -p witnesses + + + +RESULT_LEN=$(cat witnesses/native_results.txt | wc -l) + + +function statistics() +{ + PREFIX_LEN=1000 + wc -l witnesses/native_results.txt + cat witnesses/native_results.txt | tail -n $PREFIX_LEN + + SUMOK=$(cat witnesses/native_results.txt | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f8 | paste -s -d+ - | bc) + SUMFAIL=$(cat witnesses/native_results.txt | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f9 | paste -s -d+ - | bc) + SUMTOTAL=$(cat witnesses/native_results.txt | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f10 | paste -s -d+ - | bc) + echo $SUMTOTAL + echo $SUMFAIL + echo "Failure rate: " $([[ $SUMTOTAL -eq 0 ]] && echo "0" || echo "$(($SUMFAIL * 100 / $SUMTOTAL))%") + echo "Success rate: " $([[ $SUMTOTAL -eq 0 ]] && echo "0" || echo "$(($SUMOK * 100 / $SUMTOTAL))%") + + ZEROES=$(cat witnesses/native_results.txt | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f4 | grep --count "0") + ONES=$( cat witnesses/native_results.txt | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f4 | grep --count "1") + TWOS=$( cat witnesses/native_results.txt | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f4 | grep --count "2") + THREES=$(cat witnesses/native_results.txt | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f4 | grep --count "3") + FOURS=$(cat witnesses/native_results.txt | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f4 | grep --count "4") + FIVES=$(cat witnesses/native_results.txt | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f4 | grep --count "5") + SIXES=$(cat witnesses/native_results.txt | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f4 | grep --count "6") + echo $ZEROES + echo $ONES + echo $TWOS + echo $THREES + echo $FOURS + echo $FIVES + echo $SIXES + echo "good bye" + exit 0 +} +trap statistics INT EXIT QUIT HUP TERM + +statistics + +exit 0 +# Must match the values in prove_stdio.sh or build is dirty. +#export RAYON_NUM_THREADS=1 +#export TOKIO_WORKER_THREADS=1 +export RUST_BACKTRACE=full +#export RUST_LOG=info +#export RUSTFLAGS='-C target-cpu=native -Zlinker-features=-lld' +#export RUST_MIN_STACK=33554432 + + + +CANCUNBLOCKS=" +20548415 +20240058 +19665756 +20634472 +19807080 +20634403 +" + +PRECANCUN=" +19096840 +19240700 +" + + +#It's visible with block 20727641 +ROUND1=`echo {20727640..20727650}` + +ROUND2=" +20727641 +20727643 +20727644 +20727645 +20727646 +20727647 +20727648 +20727649 +20727650 +" + +ROUND3=" +20727643 +20727644 +20727648 +20727649 +20727650 +" + +ROUND4=" +19457111 +19477724 +19501672 +19508907 +19511272 +19548904 +19550401 +19553425 +19563122 +19585193 +19600168 +19603017 +19607029 +19649976 +19654474 +19657021 +19670735 +19688239 +19737540 +19767306 +19792995 +19812505 +19829370 +19835094 +19862390 +19871215 +19877263 +19877279 +19893964 +19922838 +19938970 +19971875 +20011069 +20071977 +20131326 +20173673 +20182890 +20218660 +20225259 +20229861 +20259823 +20274215 +20288828 +20291090 +20301243 +20346949 +20410573 +20462322 +20518465 +20521004 +20542632 +20543651 +20555951 +20634148 +20691605 +20714397 +20715309 +20715461 +20719386 +20720179 +20720275 +20741147 +20775888 +20804319 +20835783 +20859523 +20727643 +20727644 +20727648 +20727649 +20727650 +" + +ROUND5=" +19650385 +19542391 +19578175 +19511272 +" + +ROUND6=" +19426872 +19427018 +19427388 +19427472 +19429634 +19430273 +19430687 +19430855 +19431223 +19431344 +19432360 +19432641 +19435607 +19435804 +19436307 +19439155 +19439754 +19440665 +19441789 +19443628 +19443673 +19444327 +19444582 +19445175 +19445286 +19445799 +19446774 +19446911 +19447598 +19447814 +19448687 +19449229 +19449755 +19450491 +19451118 +19451955 +19452325 +19452532 +19452795 +19452869 +19454136 +19455621 +19456052 +19456615 +19460281 +19460945 +19462377 +19463186 +19464727 +19466034 +19466036 +19466108 +19466509 +" + +ROUND7=" +19430273 +19431344 +19451118 +19452869 +19460945 +19464727 +19466034 +" + +ROUND8=" +19657436 +19508991 +19500774 +19794433 +" + +CANCUN=19426587 +TIP=`cast block-number --rpc-url $RPC` +STATICTIP=20978815 +NUMRANDOMBLOCKS=1000 +RANDOMBLOCKS=`shuf --input-range=$CANCUN-$TIP -n $NUMRANDOMBLOCKS | sort` + +REPO_ROOT=$(git rev-parse --show-toplevel) + +GITHASH=`git rev-parse --short HEAD` +echo "Testing against mainnet, current revision: $GITHASH." + +#BLOCKS="$CANCUNBLOCKS $RANDOMBLOCKS $ROUND3" +BLOCKS="$RANDOMBLOCKS" +#BLOCKS="$ROUND8" +BLOCKS=`echo $BLOCKS | tr ' ' '\n' | sort -nu | tr '\n' ' '` +echo "Testing blocks: $BLOCKS" + +echo "Testing: $BLOCKS" + +printf "\n\nr\n" | tee -a witnesses/native_results.txt +echo "0 is success" | tee -a witnesses/native_results.txt +echo "5 [defect] is non-matching jumpdest tables" | tee -a witnesses/native_results.txt +echo "1 [unexpected] is other errors" | tee -a witnesses/native_results.txt +echo "4 [expected] is Attempted to collapse an extension node" | tee -a witnesses/native_results.txt +echo "6 [expected] is empty witness. Usually due to Error: Failed to get proof for account" | tee -a witnesses/native_results.txt +echo "Report started: $(date)" | tee -a witnesses/native_results.txt +printf "\ngithash block verdict r rpc-time test-time total-time tx-ok tx-none tx-total \n" | tee -a witnesses/native_results.txt +echo "---------------------------------------------------------------------------------------" | tee -a witnesses/native_results.txt + +for BLOCK in $BLOCKS; do + TOTALTIME=0 + GITHASH=`git rev-parse --short HEAD` + WITNESS="witnesses/$BLOCK.native.$GITHASH.witness.json" + echo "Fetching block $BLOCK" + export RUST_LOG=rpc=trace + SECONDS=0 + nice -19 cargo run --quiet --release --bin rpc -- --backoff 3000 --max-retries 100 --rpc-url $RPC --rpc-type native --jumpdest-src client-fetched-structlogs --timeout 600 fetch --start-block $BLOCK --end-block $BLOCK 1> $WITNESS + TOTALTIME=`echo -n $(($TOTALTIME + $SECONDS))` + DURATION_RPC=`date -u -d @"$SECONDS" +'%-Hh%-Mm%-Ss'` + TXALL=`grep '"jumpdest_table":' $WITNESS | wc -l` + TXNONE=`grep '"jumpdest_table": null' $WITNESS | wc -l` + TXOK=`echo -n $(($TXALL - $TXNONE))` + echo "Now testing block $BLOCK .." + export RUST_LOG=info + SECONDS=0 + timeout 10m nice -19 ./prove_stdio.sh $WITNESS test_only $BLOCK + EXITCODE=$? + TOTALTIME=`echo -n $(($TOTALTIME + $SECONDS))` + DURATION_PRV=`date -u -d @"$SECONDS" +'%-Hh%-Mm%-Ss'` + TOTALTIME=`date -u -d @"$TOTALTIME" +'%-Hh%-Mm%-Ss'` + if [ $EXITCODE -eq 0 ] + then + VERDICT="success" + else + VERDICT="failure" + fi + printf "%s %10i %s %3i %8s %8s %8s %3i %3i %3i \n" $GITHASH $BLOCK $VERDICT $EXITCODE $DURATION_RPC $DURATION_PRV $TOTALTIME $TXOK $TXNONE $TXALL | tee -a witnesses/native_results.txt + + + ### Clean up + TEST_OUT_PATH="${REPO_ROOT}/$BLOCK.test.out" + rm $TEST_OUT_PATH + rm $WITNESS + +done + diff --git a/scripts/test_new_chain.sh b/scripts/test_new_chain.sh new file mode 100755 index 000000000..3ca997f07 --- /dev/null +++ b/scripts/test_new_chain.sh @@ -0,0 +1,203 @@ +#!/usr/bin/env bash + +set -uo pipefail + +if [ -z $RPC ]; then + echo You must set an RPC endpoint + exit 1 +fi + +git diff --quiet --exit-code HEAD +if [ $? -ne 0 ]; then + echo Uncommited changes, please commit to make githash consistent + exit 1 +fi + +REPO_ROOT=$(git rev-parse --show-toplevel) + +mkdir -p witnesses + +RESULTS="witnesses/jerigon_new_chain.txt" +RESULT_LEN=$(cat $RESULTS | wc -l) +BLOCKS_TESTED=0 + + +function statistics() +{ + PREFIX_LEN=$BLOCKS_TESTED + wc -l $RESULTS + cat $RESULTS | tail -n $PREFIX_LEN + + SUMOK=$(cat $RESULTS | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f8 | paste -s -d+ - | bc) + SUMFAIL=$(cat $RESULTS | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f9 | paste -s -d+ - | bc) + SUMTOTAL=$(cat $RESULTS | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f10 | paste -s -d+ - | bc) + + ZEROES=$(cat $RESULTS | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f4 | grep --count "0") + ONES=$(cat $RESULTS | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f4 | grep --count "1") + TWOS=$(cat $RESULTS | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f4 | grep --count "2") + THREES=$(cat $RESULTS | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f4 | grep --count "3") + FOURS=$(cat $RESULTS | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f4 | grep --count "4") + FIVES=$(cat $RESULTS | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f4 | grep --count "5") + SIXES=$(cat $RESULTS | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f4 | grep --count "6") + SEVENS=$(cat $RESULTS | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f4 | grep --count "7") + EIGHTS=$(cat $RESULTS | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f4 | grep --count "8") + NINES=$(cat $RESULTS | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f4 | grep --count "9") + TIMEOUTS=$(cat $RESULTS | tail -n $PREFIX_LEN | tr -s ' ' | cut -d' ' -f4 | grep --count "134") + + printf "\n\nStatistics\n" | tee -a $RESULTS + echo "---------------------------------------------------------------------------------------" | tee -a $RESULTS + echo "Total blocks: " $BLOCKS_TESTED | tee -a $RESULTS + echo "Total transactions: " $SUMTOTAL | tee -a $RESULTS + echo "Transactions without prefetched JUMPDEST table: "$SUMFAIL | tee -a $RESULTS + echo "Failure rate: " $([[ $SUMTOTAL -eq 0 ]] && echo "0" || echo "$(($SUMFAIL * 100 / $SUMTOTAL))%") | tee -a $RESULTS + echo "Success rate: " $([[ $SUMTOTAL -eq 0 ]] && echo "0" || echo "$(($SUMOK * 100 / $SUMTOTAL))%") | tee -a $RESULTS + echo "Zeroes: " $ZEROES | tee -a $RESULTS + echo "Ones: " $ONES | tee -a $RESULTS + echo "Twos: " $TWOS | tee -a $RESULTS + echo "Threes: " $THREES | tee -a $RESULTS + echo "Fours: " $FOURS | tee -a $RESULTS + echo "Fives: " $FIVES | tee -a $RESULTS + echo "Sixes: " $SIXES | tee -a $RESULTS + echo "Sevens: " $SEVENS | tee -a $RESULTS + echo "Eights: " $EIGHTS | tee -a $RESULTS + echo "Nines: " $NINES | tee -a $RESULTS + echo "Timeouts: " $TIMEOUTS | tee -a $RESULTS + echo "End of statistics" | tee -a $RESULTS + exit 0 +} +trap statistics EXIT # INT QUIT # HUP TERM + +# Must match the values in prove_stdio.sh or build is dirty. +#export RAYON_NUM_THREADS=1 +#export TOKIO_WORKER_THREADS=1 +export RUST_BACKTRACE=full +#export RUST_LOG=info +#export RUSTFLAGS='-C target-cpu=native -Zlinker-features=-lld' +#export RUST_MIN_STACK=33554432 + +TIP=`cast block-number --rpc-url $RPC` +STATICTIP=6555 + +REPO_ROOT=$(git rev-parse --show-toplevel) + +GITHASH=`git rev-parse --short HEAD` + + +nice -19 cargo build --release --bin rpc +nice -19 cargo build --release --bin leader + + +echo "Testing against jerigon testnet 2, current revision: $GITHASH." + +FAILING_BLOCKS1=" +678 +679 +680 +681 +690 +692 +697 +737 +1178 +1913 +3010 +3114 +3115 +3205 +3206 +3215 +3265 +3915 +4076 +4284 +4285 +4286 +5282 +5661 +6086 +6237 +6321 +6494 +6495 +" + +FAILING_BLOCKS2=" +678 +679 +680 +681 +690 +692 +697 +737 +3010 +" + + +#BLOCKS="$(seq $STATICTIP)" +BLOCKS="$(seq 106 107)" +#BLOCKS=$FAILING_BLOCKS1 +#BLOCKS=`echo $BLOCKS | tr ' ' '\n' | sort -nu | tr '\n' ' '` + +echo "Testing: $BLOCKS" + + + +printf "\n\nReport started: $(date)" | tee -a $RESULTS +printf "\n\nTable of exit codes\n" | tee -a $RESULTS +echo "---------------------------------------------------------------------------------------" | tee -a $RESULTS +echo "0 is success" | tee -a $RESULTS +echo "1 [unexpected] is other errors" | tee -a $RESULTS +echo "2 [unexpected] is undecided" | tee -a $RESULTS +echo "4 [expected] is Attempted to collapse an extension node" | tee -a $RESULTS +echo "5 [unexpected] is non-matching jumpdest tables" | tee -a $RESULTS +echo "6 [expected] is empty witness. Possibly due to Error: Failed to get proof for account" | tee -a $RESULTS +echo "7 [expected] is Found a Hash node during an insert in a PartialTrie" | tee -a $RESULTS +echo "8 [expected] is Attempted to delete a value that ended up inside a hash node" | tee -a $RESULTS +echo "9 [expected] is Memory allocation failed. Increase RAM" | tee -a $RESULTS +echo "134 [undecided] is timeout. Try increasing the proving timeout." | tee -a $RESULTS + +printf "\ngithash block verdict r rpc-time test-time total-time tx-ok tx-none tx-total \n" | tee -a $RESULTS +echo "---------------------------------------------------------------------------------------" | tee -a $RESULTS + +for BLOCK in $BLOCKS; do + TOTALTIME=0 + GITHASH=`git rev-parse --short HEAD` + WITNESS="witnesses/$BLOCK.jerigon2.$GITHASH.witness.json" + echo "Fetching block $BLOCK" + export RUST_LOG=rpc=trace + SECONDS=0 + nice -19 -- "${REPO_ROOT}/target/release/rpc" --backoff 3000 --max-retries 100 --rpc-url $RPC --rpc-type jerigon --jumpdest-src client-fetched-structlogs --timeout 120 fetch --start-block $BLOCK --end-block $BLOCK 1> $WITNESS + TOTALTIME=`echo -n $(($TOTALTIME + $SECONDS))` + DURATION_RPC=`date -u -d @"$SECONDS" +'%-Hh%-Mm%-Ss'` + TXALL=`grep '"jumpdest_table":' $WITNESS | wc -l` + TXNONE=`grep '"jumpdest_table": null' $WITNESS | wc -l` + TXOK=`echo -n $(($TXALL - $TXNONE))` + echo "Now testing block $BLOCK .." + export RUST_LOG=info + SECONDS=0 + #timeout 600s + nice -19 -- ./prove_stdio.sh $WITNESS test_only $BLOCK + EXITCODE=$? + TOTALTIME=`echo -n $(($TOTALTIME + $SECONDS))` + DURATION_PRV=`date -u -d @"$SECONDS" +'%-Hh%-Mm%-Ss'` + TOTALTIME=`date -u -d @"$TOTALTIME" +'%-Hh%-Mm%-Ss'` + if [ $EXITCODE -eq 0 ] + then + VERDICT="success" + else + VERDICT="failure" + fi + printf "%s %10i %s %3i %8s %8s %8s %3i %3i %3i \n" $GITHASH $BLOCK $VERDICT $EXITCODE $DURATION_RPC $DURATION_PRV $TOTALTIME $TXOK $TXNONE $TXALL | tee -a $RESULTS + ((BLOCKS_TESTED+=1)) + + ### Clean up except when unknown error or undecided + TEST_OUT_PATH="${REPO_ROOT}/$BLOCK.test.out" + if [ $EXITCODE -ne 1 ] && [ $EXITCODE -ne 2 ]; then + #rm $TEST_OUT_PATH + #rm $WITNESS + echo + fi + +done + diff --git a/trace_decoder/Cargo.toml b/trace_decoder/Cargo.toml index 4b8d8e7bc..5bdd24f12 100644 --- a/trace_decoder/Cargo.toml +++ b/trace_decoder/Cargo.toml @@ -15,6 +15,7 @@ alloy-compat = "0.1.0" anyhow.workspace = true bitflags.workspace = true bitvec.workspace = true +build-array = "0.1.2" bytes.workspace = true ciborium.workspace = true ciborium-io.workspace = true diff --git a/trace_decoder/src/core.rs b/trace_decoder/src/core.rs index 8093098c1..f8e870760 100644 --- a/trace_decoder/src/core.rs +++ b/trace_decoder/src/core.rs @@ -1,19 +1,28 @@ +use core::{convert::Into as _, option::Option::None}; use std::{ cmp, collections::{BTreeMap, BTreeSet, HashMap}, mem, }; +use alloy::{ + consensus::{Transaction, TxEnvelope}, + primitives::{address, TxKind}, + rlp::Decodable as _, +}; +use alloy_compat::Compat as _; use anyhow::{anyhow, bail, ensure, Context as _}; use either::Either; -use ethereum_types::{Address, BigEndianHash as _, U256}; +use ethereum_types::{Address, BigEndianHash as _, H160, U256}; use evm_arithmetization::{ generation::TrieInputs, + jumpdest::JumpDestTableWitness, proof::{BlockMetadata, TrieRoots}, GenerationInputs, }; use itertools::Itertools as _; use keccak_hash::H256; +use log::debug; use mpt_trie::partial_trie::PartialTrie as _; use nunny::NonEmpty; use zk_evm_common::gwei_to_wei; @@ -30,6 +39,24 @@ use crate::{ TxnInfo, TxnMeta, TxnTrace, }; +/// Addresses of precompiled Ethereum contracts. +pub fn is_precompile(addr: H160) -> bool { + let precompiled_addresses = if cfg!(feature = "eth_mainnet") { + address!("0000000000000000000000000000000000000001") + ..address!("000000000000000000000000000000000000000a") + } else { + // Remove KZG Peval for non-Eth mainnet networks + address!("0000000000000000000000000000000000000001") + ..address!("0000000000000000000000000000000000000009") + }; + + precompiled_addresses.contains(&addr.compat()) + || (cfg!(feature = "polygon_pos") + // Include P256Verify for Polygon PoS + && addr.compat() + == address!("0000000000000000000000000000000000000100")) +} + /// Expected trie type when parsing from binary in a [`BlockTrace`]. /// /// See [`crate::wire`] and [`CombinedPreImages`] for more. @@ -63,7 +90,7 @@ pub fn entrypoint( }; let (world, mut code) = start(trie_pre_images, wire_disposition)?; - code.extend(code_db); + code.extend(code_db.clone()); let OtherBlockData { b_data: @@ -132,6 +159,7 @@ pub fn entrypoint( }, after, withdrawals, + jumpdest_tables, }| { let (state, storage) = world .expect_left("TODO(0xaatif): evm_arithemetization accepts an SMT") @@ -143,7 +171,7 @@ pub fn entrypoint( running_gas_used += gas_used; running_gas_used.into() }, - signed_txns: byte_code.into_iter().map(Into::into).collect(), + signed_txns: byte_code.clone().into_iter().map(Into::into).collect(), withdrawals, ger_data, tries: TrieInputs { @@ -155,13 +183,38 @@ pub fn entrypoint( trie_roots_after: after, checkpoint_state_trie_root, checkpoint_consolidated_hash, - contract_code: contract_code - .into_iter() - .map(|it| (keccak_hash::keccak(&it), it)) - .collect(), + contract_code: { + let init_codes = + byte_code + .iter() + .filter_map(|nonempty_txn_bytes| -> Option> { + let tx_envelope = + TxEnvelope::decode(&mut &nonempty_txn_bytes[..]).unwrap(); + match tx_envelope.to() { + TxKind::Create => Some(tx_envelope.input().to_vec()), + TxKind::Call(_address) => None, + } + }); + let mut result = Hash2Code::default(); + result.extend(init_codes); + result.extend(contract_code); + result.extend(code_db.clone()); + result.into_hashmap() + }, block_metadata: b_meta.clone(), block_hashes: b_hashes.clone(), burn_addr, + jumpdest_table: { + // TODO(einar-polygon): + // Note that this causes any batch containing just a single `None` to + // collapse into a `None`, which causing failover to + // simulating jumpdest analysis for the whole batch. + // There is an optimization opportunity here. + jumpdest_tables + .into_iter() + .collect::>>() + .map(|jdt| JumpDestTableWitness::merge(jdt.iter()).0) + }, } }, ) @@ -324,6 +377,8 @@ struct Batch { /// Empty for all but the final batch pub withdrawals: Vec<(Address, U256)>, + + pub jumpdest_tables: Vec>, } impl Batch { @@ -336,6 +391,7 @@ impl Batch { before, after, withdrawals, + jumpdest_tables, } = self; Batch { first_txn_ix, @@ -345,6 +401,7 @@ impl Batch { before: before.map(f), after, withdrawals, + jumpdest_tables, } } } @@ -437,6 +494,8 @@ where )?; } + let mut jumpdest_tables = vec![]; + for txn in batch { let do_increment_txn_ix = txn.is_some(); let TxnInfo { @@ -446,6 +505,7 @@ where byte_code, new_receipt_trie_node_byte, gas_used: txn_gas_used, + jumpdest_table, }, } = txn.unwrap_or_default(); @@ -565,6 +625,8 @@ where } } + jumpdest_tables.push(jumpdest_table); + if do_increment_txn_ix { txn_ix += 1; } @@ -598,6 +660,7 @@ where transactions_root: transaction_trie.root(), receipts_root: receipt_trie.root(), }, + jumpdest_tables, }); observer.collect_tries( @@ -787,6 +850,7 @@ fn map_receipt_bytes(bytes: Vec) -> anyhow::Result> { /// trace. /// If there are any txns that create contracts, then they will also /// get added here as we process the deltas. +#[derive(Default)] struct Hash2Code { /// Key must always be [`hash`](keccak_hash) of value. inner: HashMap>, @@ -801,11 +865,18 @@ impl Hash2Code { this } pub fn get(&mut self, hash: H256) -> Option> { - self.inner.get(&hash).cloned() + let res = self.inner.get(&hash).cloned(); + if res.is_none() { + debug!("no code for hash {:#x}", hash); + } + res } pub fn insert(&mut self, code: Vec) { self.inner.insert(keccak_hash::keccak(&code), code); } + pub fn into_hashmap(self) -> HashMap> { + self.inner + } } impl Extend> for Hash2Code { diff --git a/trace_decoder/src/interface.rs b/trace_decoder/src/interface.rs index abe3b0af0..35248f9a3 100644 --- a/trace_decoder/src/interface.rs +++ b/trace_decoder/src/interface.rs @@ -5,6 +5,7 @@ use std::collections::{BTreeMap, BTreeSet, HashMap}; use ethereum_types::{Address, U256}; +use evm_arithmetization::jumpdest::JumpDestTableWitness; use evm_arithmetization::proof::{BlockHashes, BlockMetadata}; use evm_arithmetization::ConsolidatedHash; use keccak_hash::H256; @@ -111,6 +112,9 @@ pub struct TxnMeta { /// Gas used by this txn (Note: not cumulative gas used). pub gas_used: u64, + + /// JumpDest table + pub jumpdest_table: Option, } /// A "trace" specific to an account for a txn. diff --git a/trace_decoder/src/lib.rs b/trace_decoder/src/lib.rs index 1f1c87888..aceefc76e 100644 --- a/trace_decoder/src/lib.rs +++ b/trace_decoder/src/lib.rs @@ -69,7 +69,7 @@ mod type2; mod wire; mod world; -pub use core::{entrypoint, WireDisposition}; +pub use core::{entrypoint, is_precompile, WireDisposition}; mod core; diff --git a/zero/Cargo.toml b/zero/Cargo.toml index 96d3bdeba..fd8cba2b0 100644 --- a/zero/Cargo.toml +++ b/zero/Cargo.toml @@ -12,6 +12,8 @@ categories.workspace = true __compat_primitive_types.workspace = true alloy.workspace = true alloy-compat = "0.1.0" +alloy-primitives.workspace = true +alloy-serde.workspace = true anyhow.workspace = true async-stream.workspace = true axum.workspace = true diff --git a/zero/src/bin/leader.rs b/zero/src/bin/leader.rs index 5d11845c6..88d9ee60a 100644 --- a/zero/src/bin/leader.rs +++ b/zero/src/bin/leader.rs @@ -103,6 +103,7 @@ async fn main() -> Result<()> { Command::Rpc { rpc_url, rpc_type, + jumpdest_src, checkpoint_block, previous_proof, block_time, @@ -110,6 +111,7 @@ async fn main() -> Result<()> { end_block, backoff, max_retries, + timeout, } => { // Construct the provider. let previous_proof = get_previous_proof(previous_proof)?; @@ -139,6 +141,8 @@ async fn main() -> Result<()> { previous_proof, prover_config, }, + jumpdest_src, + timeout, ) .await?; } diff --git a/zero/src/bin/leader/cli.rs b/zero/src/bin/leader/cli.rs index c085ae83f..bbefee9f4 100644 --- a/zero/src/bin/leader/cli.rs +++ b/zero/src/bin/leader/cli.rs @@ -1,11 +1,13 @@ use std::path::PathBuf; +use std::time::Duration; use alloy::eips::BlockId; use alloy::transports::http::reqwest::Url; use clap::{Parser, Subcommand, ValueEnum, ValueHint}; +use zero::parsing::parse_duration; use zero::prover::cli::CliProverConfig; use zero::prover_state::cli::CliProverStateConfig; -use zero::rpc::RpcType; +use zero::rpc::{JumpdestSrc, RpcType}; const WORKER_HELP_HEADING: &str = "Worker Config options"; @@ -65,6 +67,14 @@ pub(crate) enum Command { // The node RPC type (jerigon / native). #[arg(long, short = 't', default_value = "jerigon")] rpc_type: RpcType, + /// The source of jumpdest tables. + #[arg( + short = 'j', + long, + default_value = "client-fetched-structlogs", + required = false + )] + jumpdest_src: JumpdestSrc, /// The start of the block range to prove (inclusive). #[arg(long, short = 's')] start_block: BlockId, @@ -89,6 +99,9 @@ pub(crate) enum Command { /// The maximum number of retries #[arg(long, default_value_t = 0)] max_retries: u32, + /// Timeout for fetching structlog traces + #[arg(long, default_value = "60", value_parser = parse_duration)] + timeout: Duration, }, /// Reads input from HTTP and writes output to a directory. Http { diff --git a/zero/src/bin/leader/client.rs b/zero/src/bin/leader/client.rs index 6f2015833..619d1c35a 100644 --- a/zero/src/bin/leader/client.rs +++ b/zero/src/bin/leader/client.rs @@ -1,4 +1,5 @@ use std::sync::Arc; +use std::time::Duration; use alloy::providers::Provider; use alloy::rpc::types::{BlockId, BlockNumberOrTag}; @@ -11,7 +12,7 @@ use zero::pre_checks::check_previous_proof_and_checkpoint; use zero::proof_types::GeneratedBlockProof; use zero::prover::{self, BlockProverInput, ProverConfig}; use zero::provider::CachedProvider; -use zero::rpc; +use zero::rpc::{self, JumpdestSrc}; use crate::ProofRuntime; @@ -29,6 +30,8 @@ pub(crate) async fn client_main( block_time: u64, block_interval: BlockInterval, mut leader_config: LeaderConfig, + jumpdest_src: JumpdestSrc, + timeout: Duration, ) -> Result<()> where ProviderT: Provider + 'static, @@ -81,6 +84,8 @@ where cached_provider.clone(), block_id, leader_config.checkpoint_block_number, + jumpdest_src, + timeout, ) .await?; block_tx diff --git a/zero/src/bin/rpc.rs b/zero/src/bin/rpc.rs index a8a42a6d4..689d13d33 100644 --- a/zero/src/bin/rpc.rs +++ b/zero/src/bin/rpc.rs @@ -1,4 +1,5 @@ use std::sync::Arc; +use std::time::Duration; use alloy::primitives::B256; use alloy::providers::Provider; @@ -13,10 +14,12 @@ use tracing_subscriber::{prelude::*, EnvFilter}; use url::Url; use zero::block_interval::BlockInterval; use zero::block_interval::BlockIntervalStream; +use zero::parsing::parse_duration; use zero::prover::BlockProverInput; use zero::prover::WIRE_DISPOSITION; use zero::provider::CachedProvider; use zero::rpc; +use zero::rpc::JumpdestSrc; use self::rpc::{retry::build_http_retry_provider, RpcType}; @@ -25,6 +28,8 @@ struct FetchParams { pub start_block: u64, pub end_block: u64, pub checkpoint_block_number: Option, + pub jumpdest_src: JumpdestSrc, + pub timeout: Duration, } #[derive(Args, Clone, Debug)] @@ -35,12 +40,23 @@ struct RpcToolConfig { /// The RPC Tracer Type. #[arg(short = 't', long, default_value = "jerigon")] rpc_type: RpcType, + /// The source of jumpdest tables. + #[arg( + short = 'j', + long, + default_value = "client-fetched-structlogs", + required = false + )] + jumpdest_src: JumpdestSrc, /// Backoff in milliseconds for retry requests. #[arg(long, default_value_t = 0)] backoff: u64, /// The maximum number of retries. #[arg(long, default_value_t = 0)] max_retries: u32, + /// Timeout for fetching structlog traces + #[arg(long, default_value = "60", value_parser = parse_duration)] + timeout: Duration, } #[derive(Subcommand)] @@ -97,9 +113,14 @@ where let (block_num, _is_last_block) = block_interval_elem?; let block_id = BlockId::Number(BlockNumberOrTag::Number(block_num)); // Get the prover input for particular block. - let result = - rpc::block_prover_input(cached_provider.clone(), block_id, checkpoint_block_number) - .await?; + let result = rpc::block_prover_input( + cached_provider.clone(), + block_id, + checkpoint_block_number, + params.jumpdest_src, + params.timeout, + ) + .await?; block_prover_inputs.push(result); } @@ -126,6 +147,8 @@ impl Cli { start_block, end_block, checkpoint_block_number, + jumpdest_src: self.config.jumpdest_src, + timeout: self.config.timeout, }; let block_prover_inputs = @@ -151,6 +174,8 @@ impl Cli { start_block: block_number, end_block: block_number, checkpoint_block_number: None, + jumpdest_src: self.config.jumpdest_src, + timeout: self.config.timeout, }; let block_prover_inputs = @@ -204,8 +229,10 @@ async fn main() -> anyhow::Result<()> { tracing_subscriber::Registry::default() .with( tracing_subscriber::fmt::layer() + // With the default configuration trace information is written + // to stdout, but we already use stdout to write our payload (the witness). + .with_writer(std::io::stderr) .with_ansi(false) - .compact() .with_filter(EnvFilter::from_default_env()), ) .init(); diff --git a/zero/src/parsing.rs b/zero/src/parsing.rs index 5643f82f5..19c49ba83 100644 --- a/zero/src/parsing.rs +++ b/zero/src/parsing.rs @@ -1,5 +1,11 @@ //! Parsing utilities. -use std::{fmt::Display, ops::Add, ops::Range, str::FromStr}; +use std::{ + fmt::Display, + num::ParseIntError, + ops::{Add, Range}, + str::FromStr, + time::Duration, +}; use thiserror::Error; @@ -66,6 +72,11 @@ where } } +pub fn parse_duration(arg: &str) -> Result { + let seconds = arg.parse()?; + Ok(Duration::from_secs(seconds)) +} + #[cfg(test)] mod test { use super::*; diff --git a/zero/src/prover/cli.rs b/zero/src/prover/cli.rs index 87e79bc65..09a64ba7d 100644 --- a/zero/src/prover/cli.rs +++ b/zero/src/prover/cli.rs @@ -23,6 +23,9 @@ pub struct CliProverConfig { /// If true, save the public inputs to disk on error. #[arg(short='i', long, help_heading = HELP_HEADING, default_value_t = false)] save_inputs_on_error: bool, + /// Keep going if a block proof fails. + #[arg(short='K', long, help_heading = HELP_HEADING, default_value_t = false)] + keep_going: bool, /// If true, only test the trace decoder and witness generation without /// generating a proof. #[arg(long, help_heading = HELP_HEADING, default_value_t = false)] diff --git a/zero/src/rpc/jerigon.rs b/zero/src/rpc/jerigon.rs index df00bc605..9156fb937 100644 --- a/zero/src/rpc/jerigon.rs +++ b/zero/src/rpc/jerigon.rs @@ -1,12 +1,25 @@ -use alloy::{providers::Provider, rpc::types::eth::BlockId, transports::Transport}; +use core::iter::Iterator; +use std::ops::Deref as _; +use std::time::Duration; + +use alloy::eips::BlockNumberOrTag; +use alloy::{ + providers::Provider, + rpc::types::{eth::BlockId, Block, BlockTransactionsKind}, + transports::Transport, +}; use anyhow::Context as _; +use compat::Compat; +use evm_arithmetization::jumpdest::JumpDestTableWitness; use serde::Deserialize; use serde_json::json; use trace_decoder::{BlockTrace, BlockTraceTriePreImages, CombinedPreImages, TxnInfo}; +use tracing::{debug, warn}; -use super::fetch_other_block_data; +use super::{fetch_other_block_data, JumpdestSrc}; use crate::prover::BlockProverInput; use crate::provider::CachedProvider; +use crate::rpc::jumpdest::{generate_jumpdest_table, get_block_normalized_structlogs}; /// Transaction traces retrieved from Erigon zeroTracer. #[derive(Debug, Deserialize)] @@ -20,6 +33,8 @@ pub async fn block_prover_input( cached_provider: std::sync::Arc>, target_block_id: BlockId, checkpoint_block_number: u64, + jumpdest_src: JumpdestSrc, + fetch_timeout: Duration, ) -> anyhow::Result where ProviderT: Provider, @@ -33,16 +48,53 @@ where "debug_traceBlockByNumber".into(), (target_block_id, json!({"tracer": "zeroTracer"})), ) - .await?; + .await? + .into_iter() + .map(|ztr| ztr.result) + .collect::>(); // Grab block witness info (packed as combined trie pre-images) - let block_witness = cached_provider .get_provider() .await? .raw_request::<_, String>("eth_getWitness".into(), vec![target_block_id]) .await?; + let block: Block = cached_provider + .get_block(target_block_id, BlockTransactionsKind::Full) + .await? + .context("no block")?; + + let block_jumpdest_table_witnesses: Vec> = match jumpdest_src { + JumpdestSrc::ProverSimulation => vec![None; tx_results.len()], + JumpdestSrc::ClientFetchedStructlogs => { + // In case of the error with retrieving structlogs from the server, + // continue without interruption. Equivalent to `ProverSimulation` case. + process_transactions( + &block, + cached_provider.get_provider().await?.deref(), + &tx_results, + &fetch_timeout, + ) + .await + .unwrap_or_else(|e| { + warn!("failed to fetch server structlogs for block {target_block_id}: {e}"); + vec![None; tx_results.len()] + }) + } + JumpdestSrc::Serverside => todo!(), + }; + + // weave in the JDTs + let txn_info = tx_results + .into_iter() + .zip(block_jumpdest_table_witnesses) + .map(|(mut tx_info, jdtw)| { + tx_info.meta.jumpdest_table = jdtw; + tx_info + }) + .collect(); + let other_data = fetch_other_block_data(cached_provider, target_block_id, checkpoint_block_number).await?; @@ -53,9 +105,59 @@ where compact: hex::decode(block_witness.strip_prefix("0x").unwrap_or(&block_witness)) .context("invalid hex returned from call to eth_getWitness")?, }), - txn_info: tx_results.into_iter().map(|it| it.result).collect(), + txn_info, code_db: Default::default(), }, other_data, }) } + +/// Processes the transactions in the given block, generating jumpdest tables +/// and updates the code database +pub async fn process_transactions<'i, ProviderT, TransportT>( + block: &Block, + provider: &ProviderT, + tx_results: &[TxnInfo], + fetch_timeout: &Duration, +) -> anyhow::Result>> +where + ProviderT: Provider, + TransportT: Transport + Clone, +{ + let block_structlogs = get_block_normalized_structlogs( + provider, + &BlockNumberOrTag::from(block.header.number), + fetch_timeout, + ) + .await?; + + let tx_traces = tx_results + .iter() + .map(|tx| tx.traces.iter().map(|(h, t)| (h.compat(), t))); + + let block_jumpdest_tables = block + .transactions + .as_transactions() + .context("no transactions in block")? + .iter() + .zip(block_structlogs) + .zip(tx_traces) + .map(|((tx, structlog), tx_trace)| { + structlog.and_then(|it| { + generate_jumpdest_table(tx, &it.1, tx_trace).map_or_else( + |error| { + debug!( + "{}: JumpDestTable generation failed with reason: {:?}", + tx.hash.to_string(), + error + ); + None + }, + Some, + ) + }) + }) + .collect::>(); + + Ok(block_jumpdest_tables) +} diff --git a/zero/src/rpc/jumpdest.rs b/zero/src/rpc/jumpdest.rs new file mode 100644 index 000000000..3f7a90755 --- /dev/null +++ b/zero/src/rpc/jumpdest.rs @@ -0,0 +1,507 @@ +use core::default::Default; +use core::option::Option::None; +use std::collections::HashMap; +use std::ops::Not as _; +use std::time::Duration; + +use ::compat::Compat; +use alloy::eips::BlockNumberOrTag; +use alloy::primitives::Address; +use alloy::providers::ext::DebugApi; +use alloy::providers::Provider; +use alloy::rpc::types::eth::Transaction; +use alloy::rpc::types::trace::geth::{ + GethDebugTracingOptions, GethDefaultTracingOptions, GethTrace, StructLog, TraceResult, +}; +use alloy::transports::Transport; +use alloy_primitives::{TxHash, U256}; +use anyhow::bail; +use anyhow::ensure; +use evm_arithmetization::jumpdest::JumpDestTableWitness; +use keccak_hash::keccak; +use ruint::Uint; +use tokio::time::timeout; +use trace_decoder::is_precompile; +use trace_decoder::ContractCodeUsage; +use trace_decoder::TxnTrace; +use tracing::{trace, warn}; + +use crate::rpc::H256; + +#[derive(Debug, Clone)] +pub struct TxStructLogs(pub Option, pub Vec); + +/// Pass `true` for the components needed. +fn structlog_tracing_options(stack: bool, memory: bool, storage: bool) -> GethDebugTracingOptions { + GethDebugTracingOptions { + config: GethDefaultTracingOptions { + disable_stack: Some(!stack), + // needed for CREATE2 + disable_memory: Some(!memory), + disable_storage: Some(!storage), + ..GethDefaultTracingOptions::default() + }, + tracer: None, + ..GethDebugTracingOptions::default() + } +} + +/// Get code hash from a read or write operation of contract code. +fn get_code_hash(usage: &ContractCodeUsage) -> H256 { + match usage { + ContractCodeUsage::Read(hash) => *hash, + ContractCodeUsage::Write(bytes) => keccak(bytes), + } +} + +pub(crate) async fn get_block_normalized_structlogs( + provider: &ProviderT, + block: &BlockNumberOrTag, + fetch_timeout: &Duration, +) -> anyhow::Result>> +where + ProviderT: Provider, + TransportT: Transport + Clone, +{ + let block_stackonly_structlog_traces_fut = + provider.debug_trace_block_by_number(*block, structlog_tracing_options(true, false, false)); + + let block_stackonly_structlog_traces = + match timeout(*fetch_timeout, block_stackonly_structlog_traces_fut).await { + Ok(traces) => traces?, + Err(elapsed) => { + trace!(target: "fetching block structlogs timed out", ?elapsed); + bail!(elapsed); + } + }; + + let block_normalized_stackonly_structlog_traces = block_stackonly_structlog_traces + .into_iter() + .map(|tx_trace_result| match tx_trace_result { + TraceResult::Success { + result, tx_hash, .. + } => Ok(trace_to_tx_structlog(tx_hash, result)), + TraceResult::Error { error, tx_hash } => Err(anyhow::anyhow!( + "error fetching structlog for tx: {tx_hash:?}. Error: {error:?}" + )), + }) + .collect::>, anyhow::Error>>()?; + + Ok(block_normalized_stackonly_structlog_traces) +} + +/// Generate at JUMPDEST table by simulating the call stack in EVM, +/// using a Geth structlog as input. +pub(crate) fn generate_jumpdest_table<'a>( + tx: &Transaction, + structlog: &[StructLog], + tx_traces: impl Iterator, +) -> anyhow::Result { + trace!("Generating JUMPDEST table for tx: {}", tx.hash); + + let mut jumpdest_table = JumpDestTableWitness::default(); + + // This map does neither contain the `init` field of Contract Deployment + // transactions nor CREATE, CREATE2 payloads. + let callee_addr_to_code_hash: HashMap = tx_traces + .filter_map(|(callee_addr, trace)| { + trace + .code_usage + .as_ref() + .map(|code| (callee_addr, get_code_hash(code))) + }) + .collect(); + + // REVIEW: will be removed before merge + trace!( + "Transaction: {} is a {}.", + tx.hash, + if tx.to.is_some() { + "message call" + } else { + "contract creation" + } + ); + + let entrypoint_code_hash: H256 = match tx.to { + Some(to_address) if is_precompile(to_address.compat()) => return Ok(jumpdest_table), + Some(to_address) if callee_addr_to_code_hash.contains_key(&to_address).not() => { + return Ok(jumpdest_table) + } + Some(to_address) => callee_addr_to_code_hash[&to_address], + None => { + let init = &tx.input; + keccak(init) + } + }; + + // `None` encodes that previous `entry` was not a JUMP or JUMPI with true + // condition, `Some(jump_target)` encodes we came from a JUMP or JUMPI with + // true condition and target `jump_target`. + let mut prev_jump: Option = None; + + // The next available context. Starts at 1. Never decrements. + let mut next_ctx_available = 1; + // Immediately use context 1; + let mut call_stack = vec![(entrypoint_code_hash, next_ctx_available)]; + next_ctx_available += 1; + + let mut stuctlog_iter = structlog.iter().enumerate().peekable(); + while let Some((step, entry)) = stuctlog_iter.next() { + let op = entry.op.as_str(); + let curr_depth: usize = entry.depth.try_into().unwrap(); + + ensure!(curr_depth <= next_ctx_available, "Structlog is malformed."); + + while curr_depth < call_stack.len() { + call_stack.pop(); + } + + ensure!( + call_stack.is_empty().not(), + "Call stack was unexpectedly empty." + ); + let (code_hash, ctx) = call_stack.last().unwrap(); + + // REVIEW: will be removed before merge + trace!( + step, + curr_depth, + tx_hash = ?tx.hash, + ?code_hash, + ctx, + next_ctx_available, + pc = entry.pc, + pc_hex = format!("{:08x?}", entry.pc), + gas = entry.gas, + gas_cost = entry.gas_cost, + op, + ?entry, + ); + + match op { + "CALL" | "CALLCODE" | "DELEGATECALL" | "STATICCALL" => { + prev_jump = None; + ensure!(entry.stack.as_ref().is_some(), "No evm stack found."); + // We reverse the stack, so the order matches our assembly code. + let evm_stack: Vec<_> = entry.stack.as_ref().unwrap().iter().rev().collect(); + // These opcodes expect 6 or 7 operands on the stack, but for jumpdest-table + // generation we only use 2, and failures will be handled in + // next iteration by popping the stack accordingly. + let operands_used = 2; + + if evm_stack.len() < operands_used { + trace!( "Opcode {op} expected {operands_used} operands at the EVM stack, but only {} were found.", evm_stack.len()); + // Note for future debugging: There may exist edge cases, where the call + // context has been incremented before the call op fails. This should be + // accounted for before this and the following `continue`. The details are + // defined in `sys_calls.asm`. + continue; + } + // This is the same stack index (i.e. 2nd) for all four opcodes. See https://ethervm.io/#F1 + let [_gas, address, ..] = evm_stack[..] else { + unreachable!() + }; + + let callee_address = stack_value_to_address(address); + if callee_addr_to_code_hash.contains_key(&callee_address) { + let next_code_hash = callee_addr_to_code_hash[&callee_address]; + call_stack.push((next_code_hash, next_ctx_available)); + }; + + if is_precompile(callee_address.compat()) { + trace!("Called precompile at address {}.", &callee_address); + }; + + if callee_addr_to_code_hash.contains_key(&callee_address).not() + && is_precompile(callee_address.compat()).not() + { + // This case happens if calling an EOA. This is described + // under opcode `STOP`: https://www.evm.codes/#00?fork=cancun + trace!( + "Callee address {} has no associated `code_hash`.", + &callee_address + ); + } + + if let Some((_next_step, next_entry)) = stuctlog_iter.peek() { + let next_depth: usize = next_entry.depth.try_into().unwrap(); + if next_depth < curr_depth { + // The call caused an exception. Skip over incrementing + // `next_ctx_available`. Note that calling an invalid + // contract address should still increment + // `next_ctx_available`, although we stay in the current + // context. + continue; + } + } + // `peek()` only returns `None` if we are at the last entry of + // the Structlog, whether we are on a `CALL` op that throws an + // exception or not. But this is of no consequence to the + // generated Jumpdest table, so we can ignore the case. + + next_ctx_available += 1; + } + "CREATE" | "CREATE2" => { + prev_jump = None; + ensure!(entry.stack.as_ref().is_some(), "No evm stack found."); + // We reverse the stack, so the order matches our assembly code. + let evm_stack: Vec<_> = entry.stack.as_ref().unwrap().iter().rev().collect(); + let operands_used = 3; + + if evm_stack.len() < operands_used { + trace!( "Opcode {op} expected {operands_used} operands at the EVM stack, but only {} were found.", evm_stack.len() ); + continue; + }; + + let [_value, offset, size, ..] = evm_stack[..] else { + unreachable!() + }; + if *offset > U256::from(usize::MAX) { + trace!( + "{op}: Offset {offset} was too large to fit in usize {}.", + usize::MAX + ); + continue; + }; + let offset: usize = offset.to(); + + if *size > U256::from(usize::MAX) { + trace!( + "{op}: Size {size} was too large to fit in usize {}.", + usize::MAX + ); + continue; + }; + let size: usize = size.to(); + + /// Structure of Etheruem memory + type Word = [u8; 32]; + const WORDSIZE: usize = std::mem::size_of::(); + + let memory_size = entry.memory.as_ref().unwrap().len() * WORDSIZE; + + if entry.memory.is_none() || offset + size > memory_size { + trace!("Insufficient memory available for {op}. Contract has size {size} and is supposed to be stored between offset {offset} and {}, but memory size is only {memory_size}.", offset+size); + continue; + } + let memory_raw: &[String] = entry.memory.as_ref().unwrap(); + let memory_parsed: Vec> = memory_raw + .iter() + .map(|mem_line| { + let mem_line_parsed = U256::from_str_radix(mem_line, 16)?; + Ok(mem_line_parsed.to_be_bytes()) + }) + .collect(); + let mem_res: anyhow::Result> = memory_parsed.into_iter().collect(); + if mem_res.is_err() { + trace!( + "{op}: Parsing memory failed with error: {}", + mem_res.unwrap_err() + ); + continue; + } + let memory: Vec = mem_res.unwrap().concat(); + + let init_code = &memory[offset..offset + size]; + //code_db.insert(init_code.to_vec()); + let init_code_hash = keccak(init_code); + // let mut init_code_hash; + + call_stack.push((init_code_hash, next_ctx_available)); + + if let Some((_next_step, next_entry)) = stuctlog_iter.peek() { + let next_depth: usize = next_entry.depth.try_into().unwrap(); + if next_depth < curr_depth { + // The call caused an exception. Skip over incrementing + // `next_ctx_available`. + continue; + } else { + // init_code_hash = + // next_entry.code_hash.try_into().unwrap(); + } + } + + next_ctx_available += 1; + } + "JUMP" => { + prev_jump = None; + ensure!(entry.stack.as_ref().is_some(), "No evm stack found."); + // We reverse the stack, so the order matches our assembly code. + let evm_stack: Vec<_> = entry.stack.as_ref().unwrap().iter().rev().collect(); + let operands = 1; + if evm_stack.len() < operands { + trace!( "Opcode {op} expected {operands} operands at the EVM stack, but only {} were found.", evm_stack.len() ); + continue; + } + let [jump_target, ..] = evm_stack[..] else { + unreachable!() + }; + + prev_jump = Some(*jump_target); + } + "JUMPI" => { + prev_jump = None; + ensure!(entry.stack.as_ref().is_some(), "No evm stack found."); + // We reverse the stack, so the order matches our assembly code. + let evm_stack: Vec<_> = entry.stack.as_ref().unwrap().iter().rev().collect(); + let operands = 2; + if evm_stack.len() < operands { + trace!( "Opcode {op} expected {operands} operands at the EVM stack, but only {} were found.", evm_stack.len()); + continue; + }; + + let [jump_target, condition, ..] = evm_stack[..] else { + unreachable!() + }; + let jump_condition = condition.is_zero().not(); + + if jump_condition { + prev_jump = Some(*jump_target) + } + } + "JUMPDEST" => { + let mut jumped_here = false; + + if let Some(jmp_target) = prev_jump { + jumped_here = jmp_target == U256::from(entry.pc); + } + prev_jump = None; + + if jumped_here.not() { + trace!( + "{op}: JUMPDESTs at offset {} was reached through fall-through.", + entry.pc + ); + continue; + } + + let jumpdest_offset = TryInto::::try_into(entry.pc); + if jumpdest_offset.is_err() { + trace!( + "{op}: Could not cast offset {} to usize {}.", + entry.pc, + usize::MAX + ); + continue; + } + ensure!(jumpdest_offset.unwrap() < 24576); + jumpdest_table.insert(*code_hash, *ctx, jumpdest_offset.unwrap()); + } + "EXTCODECOPY" | "EXTCODESIZE" => { + prev_jump = None; + next_ctx_available += 1; + } + _ => { + prev_jump = None; + } + } + } + Ok(jumpdest_table) +} + +fn stack_value_to_address(operand: &Uint<256, 4>) -> Address { + let all_bytes: [u8; 32] = operand.compat().into(); + let mut lower_20_bytes = [0u8; 20]; + // Based on `__compat_primitive_types::H160::from(H256::from(all_bytes)). + // into()`. + lower_20_bytes[0..20].copy_from_slice(&all_bytes[32 - 20..32]); + Address::from(lower_20_bytes) +} + +fn trace_to_tx_structlog(tx_hash: Option, trace: GethTrace) -> Option { + match trace { + GethTrace::Default(structlog_frame) => { + Some(TxStructLogs(tx_hash, structlog_frame.struct_logs)) + } + GethTrace::JS(it) => { + let default_frame = compat::deserialize(it) + .inspect_err(|e| warn!("failed to deserialize js default frame {e:?}")) + .ok()?; + Some(TxStructLogs(tx_hash, default_frame.struct_logs)) + } + _ => None, + } +} + +/// This module exists as a workaround for parsing `StructLog`. The `error` +/// field is a string in Geth and Alloy but an object in Erigon. A PR[\^1] has +/// been merged to fix this upstream and should eventually render this +/// unnecessary. [\^1]: `https://github.com/erigontech/erigon/pull/12089` +mod compat { + use std::{collections::BTreeMap, fmt, iter}; + + use alloy::rpc::types::trace::geth::{DefaultFrame, StructLog}; + use alloy_primitives::{Bytes, B256, U256}; + use serde::{de::SeqAccess, Deserialize, Deserializer}; + + pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result { + _DefaultFrame::deserialize(d) + } + + /// The `error` field is a `string` in `geth` etc. but an `object` in + /// `erigon`. + fn error<'de, D: Deserializer<'de>>(d: D) -> Result, D::Error> { + #[derive(Deserialize)] + #[serde(untagged)] + enum Error { + String(String), + #[allow(dead_code)] + Object(serde_json::Map), + } + Ok(match Error::deserialize(d)? { + Error::String(it) => Some(it), + Error::Object(_) => None, + }) + } + + #[derive(Deserialize)] + #[serde(remote = "DefaultFrame", rename_all = "camelCase")] + struct _DefaultFrame { + failed: bool, + gas: u64, + return_value: Bytes, + #[serde(deserialize_with = "vec_structlog")] + struct_logs: Vec, + } + + fn vec_structlog<'de, D: Deserializer<'de>>(d: D) -> Result, D::Error> { + struct Visitor; + impl<'de> serde::de::Visitor<'de> for Visitor { + type Value = Vec; + fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("an array of `StructLog`") + } + fn visit_seq>(self, mut seq: A) -> Result { + #[derive(Deserialize)] + struct With(#[serde(with = "_StructLog")] StructLog); + let v = iter::from_fn(|| seq.next_element().transpose()) + .map(|it| it.map(|With(it)| it)) + .collect::>()?; + Ok(v) + } + } + + d.deserialize_seq(Visitor) + } + + #[derive(Deserialize)] + #[serde(remote = "StructLog", rename_all = "camelCase")] + struct _StructLog { + pc: u64, + op: String, + gas: u64, + gas_cost: u64, + depth: u64, + #[serde(default, deserialize_with = "error")] + error: Option, + stack: Option>, + return_data: Option, + memory: Option>, + #[serde(rename = "memSize")] + memory_size: Option, + storage: Option>, + #[serde(rename = "refund")] + refund_counter: Option, + } +} diff --git a/zero/src/rpc/mod.rs b/zero/src/rpc/mod.rs index 016c1d242..ebee7bb07 100644 --- a/zero/src/rpc/mod.rs +++ b/zero/src/rpc/mod.rs @@ -1,6 +1,6 @@ zk_evm_common::check_chain_features!(); -use std::sync::Arc; +use std::{sync::Arc, time::Duration}; use __compat_primitive_types::{H256, U256}; use alloy::{ @@ -24,6 +24,7 @@ use tracing::warn; use crate::prover::BlockProverInput; pub mod jerigon; +pub mod jumpdest; pub mod native; pub mod retry; @@ -40,11 +41,21 @@ pub enum RpcType { Native, } +/// The Jumpdest source type. +#[derive(ValueEnum, Clone, Debug, Copy)] +pub enum JumpdestSrc { + ProverSimulation, + ClientFetchedStructlogs, + Serverside, // later +} + /// Obtain the prover input for one block pub async fn block_prover_input( cached_provider: Arc>, block_id: BlockId, checkpoint_block_number: u64, + jumpdest_src: JumpdestSrc, + fetch_timeout: Duration, ) -> Result where ProviderT: Provider, @@ -52,10 +63,24 @@ where { match cached_provider.rpc_type { RpcType::Jerigon => { - jerigon::block_prover_input(cached_provider, block_id, checkpoint_block_number).await + jerigon::block_prover_input( + cached_provider, + block_id, + checkpoint_block_number, + jumpdest_src, + fetch_timeout, + ) + .await } RpcType::Native => { - native::block_prover_input(cached_provider, block_id, checkpoint_block_number).await + native::block_prover_input( + cached_provider, + block_id, + checkpoint_block_number, + jumpdest_src, + fetch_timeout, + ) + .await } } } diff --git a/zero/src/rpc/native/mod.rs b/zero/src/rpc/native/mod.rs index a4dc7e0c6..dda71a85e 100644 --- a/zero/src/rpc/native/mod.rs +++ b/zero/src/rpc/native/mod.rs @@ -1,6 +1,5 @@ -use std::collections::BTreeSet; -use std::ops::Deref; use std::sync::Arc; +use std::{ops::Deref, time::Duration}; use alloy::{ providers::Provider, @@ -16,20 +15,24 @@ use crate::provider::CachedProvider; mod state; mod txn; -type CodeDb = BTreeSet>; +pub use txn::{process_transaction, process_transactions}; + +use super::JumpdestSrc; /// Fetches the prover input for the given BlockId. pub async fn block_prover_input( provider: Arc>, block_number: BlockId, checkpoint_block_number: u64, + jumpdest_src: JumpdestSrc, + fetch_timeout: Duration, ) -> anyhow::Result where ProviderT: Provider, TransportT: Transport + Clone, { let (block_trace, other_data) = try_join!( - process_block_trace(provider.clone(), block_number), + process_block_trace(provider.clone(), block_number, jumpdest_src, &fetch_timeout), crate::rpc::fetch_other_block_data(provider.clone(), block_number, checkpoint_block_number) )?; @@ -40,9 +43,11 @@ where } /// Processes the block with the given block number and returns the block trace. -async fn process_block_trace( +pub(crate) async fn process_block_trace( cached_provider: Arc>, block_number: BlockId, + jumpdest_src: JumpdestSrc, + fetch_timeout: &Duration, ) -> anyhow::Result where ProviderT: Provider, @@ -53,8 +58,13 @@ where .await? .ok_or(anyhow::anyhow!("block not found {}", block_number))?; - let (code_db, txn_info) = - txn::process_transactions(&block, cached_provider.get_provider().await?.deref()).await?; + let (code_db, txn_info) = txn::process_transactions( + &block, + cached_provider.get_provider().await?.deref(), + jumpdest_src, + fetch_timeout, + ) + .await?; let trie_pre_images = state::process_state_witness(cached_provider, block, &txn_info).await?; Ok(BlockTrace { diff --git a/zero/src/rpc/native/txn.rs b/zero/src/rpc/native/txn.rs index 79de06d79..c9655f543 100644 --- a/zero/src/rpc/native/txn.rs +++ b/zero/src/rpc/native/txn.rs @@ -1,6 +1,10 @@ +use core::option::Option::None; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; +use std::time::Duration; use __compat_primitive_types::{H256, U256}; +use alloy::eips::BlockNumberOrTag; +use alloy::rpc::types::trace::geth::TraceResult; use alloy::{ primitives::{keccak256, Address, B256}, providers::{ @@ -9,38 +13,120 @@ use alloy::{ Provider, }, rpc::types::{ - eth::Transaction, - eth::{AccessList, Block}, + eth::{AccessList, Block, Transaction}, trace::geth::{ - AccountState, DiffMode, GethDebugBuiltInTracerType, GethTrace, PreStateConfig, - PreStateFrame, PreStateMode, + AccountState, DiffMode, GethDebugBuiltInTracerType, GethDebugTracerType, + GethDebugTracingOptions, GethTrace, PreStateConfig, PreStateFrame, PreStateMode, + StructLog, }, - trace::geth::{GethDebugTracerType, GethDebugTracingOptions}, }, transports::Transport, }; -use anyhow::Context as _; -use compat::Compat; +use anyhow::{bail, Context as _, Ok}; +use evm_arithmetization::{jumpdest::JumpDestTableWitness, CodeDb}; use futures::stream::{FuturesOrdered, TryStreamExt}; use trace_decoder::{ContractCodeUsage, TxnInfo, TxnMeta, TxnTrace}; +use tracing::{debug, warn}; -use super::CodeDb; +use crate::rpc::jumpdest::get_block_normalized_structlogs; +use crate::rpc::Compat; +use crate::rpc::{ + jumpdest::{self}, + JumpdestSrc, +}; +pub(crate) async fn get_block_prestate_traces( + provider: &ProviderT, + block: &BlockNumberOrTag, + tracing_options: GethDebugTracingOptions, +) -> anyhow::Result> +where + ProviderT: Provider, + TransportT: Transport + Clone, +{ + let block_prestate_traces = provider + .debug_trace_block_by_number(*block, tracing_options) + .await?; + + block_prestate_traces + .into_iter() + .map(|trace_result| match trace_result { + TraceResult::Success { result, .. } => Ok(result), + TraceResult::Error { error, .. } => { + bail!("error fetching block prestate traces: {:?}", error) + } + }) + .collect::, anyhow::Error>>() +} /// Processes the transactions in the given block and updates the code db. -pub(super) async fn process_transactions( +pub async fn process_transactions( block: &Block, provider: &ProviderT, + jumpdest_src: JumpdestSrc, + fetch_timeout: &Duration, ) -> anyhow::Result<(CodeDb, Vec)> where ProviderT: Provider, TransportT: Transport + Clone, { + // Get block prestate traces + let block_prestate_trace = get_block_prestate_traces( + provider, + &BlockNumberOrTag::from(block.header.number), + prestate_tracing_options(false), + ) + .await?; + + // Get block diff traces + let block_diff_trace = get_block_prestate_traces( + provider, + &BlockNumberOrTag::from(block.header.number), + prestate_tracing_options(true), + ) + .await?; + + let block_structlogs = match jumpdest_src { + JumpdestSrc::ProverSimulation => vec![None; block_prestate_trace.len()], + JumpdestSrc::ClientFetchedStructlogs => { + // In case of the error with retrieving structlogs from the server, + // continue without interruption. Equivalent to `ProverSimulation` case. + get_block_normalized_structlogs( + provider, + &BlockNumberOrTag::from(block.header.number), + fetch_timeout, + ) + .await + .unwrap_or_else(|e| { + warn!( + "failed to fetch server structlogs for block {}: {e}", + block.header.number + ); + vec![None; block_prestate_trace.len()] + }) + .into_iter() + .map(|tx_struct_log| tx_struct_log.map(|it| it.1)) + .collect() + } + JumpdestSrc::Serverside => todo!( + "Not implemented. See https://github.com/0xPolygonZero/erigon/issues/20 for details." + ), + }; + block .transactions .as_transactions() .context("No transactions in block")? .iter() - .map(|tx| process_transaction(provider, tx)) + .zip( + block_prestate_trace.into_iter().zip( + block_diff_trace + .into_iter() + .zip(block_structlogs.into_iter()), + ), + ) + .map(|(tx, (pre_trace, (diff_trace, structlog)))| { + process_transaction(provider, tx, pre_trace, diff_trace, structlog) + }) .collect::>() .try_fold( (BTreeSet::new(), Vec::new()), @@ -55,25 +141,22 @@ where /// Processes the transaction with the given transaction hash and updates the /// accounts state. -async fn process_transaction( +pub async fn process_transaction( provider: &ProviderT, tx: &Transaction, + pre_trace: GethTrace, + diff_trace: GethTrace, + structlog_opt: Option>, ) -> anyhow::Result<(CodeDb, TxnInfo)> where ProviderT: Provider, TransportT: Transport + Clone, { - let (tx_receipt, pre_trace, diff_trace) = fetch_tx_data(provider, &tx.hash).await?; + let tx_receipt = fetch_tx_receipt(provider, &tx.hash).await?; let tx_status = tx_receipt.status(); let tx_receipt = tx_receipt.map_inner(rlp::map_receipt_envelope); let access_list = parse_access_list(tx.access_list.as_ref()); - let tx_meta = TxnMeta { - byte_code: ::TxEnvelope::try_from(tx.clone())?.encoded_2718(), - new_receipt_trie_node_byte: alloy::rlp::encode(tx_receipt.inner), - gas_used: tx_receipt.gas_used as u64, - }; - let (code_db, mut tx_traces) = match (pre_trace, diff_trace) { ( GethTrace::PreStateTracer(PreStateFrame::Default(read)), @@ -85,7 +168,29 @@ where // Handle case when transaction failed and a contract creation was reverted if !tx_status && tx_receipt.contract_address.is_some() { tx_traces.insert(tx_receipt.contract_address.unwrap(), TxnTrace::default()); - } + }; + + let jumpdest_table: Option = structlog_opt.and_then(|struct_logs| { + jumpdest::generate_jumpdest_table(tx, &struct_logs, tx_traces.iter().map(|(a, t)| (*a, t))) + .map_or_else( + |error| { + debug!( + "{}: JumpDestTable generation failed with reason: {:?}", + tx.hash.to_string(), + error + ); + None + }, + Some, + ) + }); + + let tx_meta = TxnMeta { + byte_code: ::TxEnvelope::try_from(tx.clone())?.encoded_2718(), + new_receipt_trie_node_byte: alloy::rlp::encode(tx_receipt.inner), + gas_used: tx_receipt.gas_used as u64, + jumpdest_table, + }; Ok(( code_db, @@ -100,26 +205,16 @@ where } /// Fetches the transaction data for the given transaction hash. -async fn fetch_tx_data( +async fn fetch_tx_receipt( provider: &ProviderT, tx_hash: &B256, -) -> anyhow::Result<(::ReceiptResponse, GethTrace, GethTrace), anyhow::Error> +) -> anyhow::Result<::ReceiptResponse> where ProviderT: Provider, TransportT: Transport + Clone, { - let tx_receipt_fut = provider.get_transaction_receipt(*tx_hash); - let pre_trace_fut = provider.debug_trace_transaction(*tx_hash, prestate_tracing_options(false)); - let diff_trace_fut = provider.debug_trace_transaction(*tx_hash, prestate_tracing_options(true)); - - let (tx_receipt, pre_trace, diff_trace) = - futures::try_join!(tx_receipt_fut, pre_trace_fut, diff_trace_fut,)?; - - Ok(( - tx_receipt.context("Transaction receipt not found.")?, - pre_trace, - diff_trace, - )) + let tx_receipt = provider.get_transaction_receipt(*tx_hash).await?; + tx_receipt.context("Transaction receipt not found.") } /// Parse the access list data into a hashmap.