feat: stdio parallel proving (#109)

* feat: fearigon witness extractor initiall commit * fix: update rpc and stdio test case * fix: error output * fix: script * fix: verifier and rename stdio_test.sh * fix: rename prove_blocks to prove_jerigon * fix: verifier * fix: script * fix: remove repeated output * fix: verification message * fix: update script and add ci multi block proving * fix: typo * fix: rebase on top of discard-intermediary-proofs * fix: invalid printout * fix: cleanup * fix: cleanup * fix: test_only stdio output * fix: jerigon mode output to stdout * fix: printout * fix: printout 2 * fix: printout 3 * fix: printout 4
0xPolygonZero · Jun 14, 2024 · ef943b4 · ef943b4
1 parent b6bc8c8
commit ef943b4
Show file tree

Hide file tree

Showing 12 changed files with 4,024 additions and 129 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -76,7 +76,7 @@ jobs:
       - name: Run the script
         run: |
           pushd tools
-          ./simple_test.sh
+          ./prove_stdio.sh artifacts/witness_b19240705.json
 
   simple_proof_witness_only:
     name: Execute bash script to generate the proof witness for a small block.
@@ -89,4 +89,17 @@ jobs:
       - name: Run the script
         run: |
           pushd tools
-          ./simple_test.sh test_only
+          ./prove_stdio.sh artifacts/witness_b19240705.json test_only
+
+  multi_blocks_proof_regular:
+    name: Execute bash script to generate and verify a proof for multiple blocks using parallel proving.
+    runs-on: zero-ci
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Run the script
+        run: |
+          pushd tools
+          ./prove_stdio.sh artifacts/witness_b2_b7.json
diff --git a/README.md b/README.md
@@ -364,16 +364,16 @@ For testing proof generation for blocks, the `testing` branch should be used.
 
 ### Proving Blocks
 
-If you want to generate a full block proof, you can use `tools/prove_blocks.sh`:
+If you want to generate a full block proof, you can use `tools/prove_jerigon.sh`:
 
 ```sh
-./prove_blocks.sh <BLOCK_START> <BLOCK_END> <FULL_NODE_ENDPOINT> <IGNORE_PREVIOUS_PROOFS>
+./prove_jerigon.sh <BLOCK_START> <BLOCK_END> <FULL_NODE_ENDPOINT> <IGNORE_PREVIOUS_PROOFS>
 ```
 
 Which may look like this:
 
 ```sh
-./prove_blocks.sh 17 18 http://127.0.0.1:8545 false
+./prove_jerigon.sh 17 18 http://127.0.0.1:8545 false
 ```
 
 Which will attempt to generate proofs for blocks `17` & `18` consecutively and incorporate the previous block proof during generation.
@@ -385,16 +385,16 @@ A few other notes:
 
 ### Generating Witnesses Only
 
-If you want to test a block without the high CPU & memory requirements that come with creating a full proof, you can instead generate only the witness using `tools/prove_blocks.sh` in the `test_only` mode:
+If you want to test a block without the high CPU & memory requirements that come with creating a full proof, you can instead generate only the witness using `tools/prove_jerigon.sh` in the `test_only` mode:
 
 ```sh
-./prove_blocks.sh <START_BLOCK> <END_BLOCK> <FULL_NODE_ENDPOINT> <IGNORE_PREVIOUS_PROOFS> test_only
+./prove_jerigon.sh <START_BLOCK> <END_BLOCK> <FULL_NODE_ENDPOINT> <IGNORE_PREVIOUS_PROOFS> test_only
 ```
 
 Filled in:
 
 ```sh
-./prove_blocks.sh 18299898 18299899 http://34.89.57.138:8545 true test_only
+./prove_jerigon.sh 18299898 18299899 http://34.89.57.138:8545 true test_only
 ```
 
 Finally, note that both of these testing scripts force proof generation to be sequential by allowing only one worker. Because of this, this is not a realistic representation of performance but makes the debugging logs much easier to follow.

diff --git a/leader/src/jerigon.rs b/leader/src/jerigon.rs
@@ -1,3 +1,4 @@
+use std::io::Write;
 use std::path::PathBuf;
 
 use alloy::providers::RootProvider;
@@ -6,7 +7,7 @@ use common::block_interval::BlockInterval;
 use common::fs::generate_block_proof_file_name;
 use paladin::runtime::Runtime;
 use proof_gen::proof_types::GeneratedBlockProof;
-use tracing::{error, warn};
+use tracing::{error, info, warn};
 
 #[derive(Debug, Default)]
 pub struct ProofParams {
@@ -31,6 +32,12 @@ pub(crate) async fn jerigon_main(
     )
     .await?;
 
+    if cfg!(feature = "test_only") {
+        info!("All proof witnesses have been generated successfully.");
+    } else {
+        info!("All proofs have been generated successfully.");
+    }
+
     // If `keep_intermediate_proofs` is not set we only keep the last block
     // proof from the interval. It contains all the necessary information to
     // verify the whole sequence.
@@ -41,24 +48,46 @@ pub(crate) async fn jerigon_main(
             params.save_inputs_on_error,
             params.proof_output_dir.clone(),
         )
-        .await?;
+        .await;
     runtime.close().await?;
+    let proved_blocks = proved_blocks?;
 
     if params.keep_intermediate_proofs {
-        warn!("Skipping cleanup, intermediate proofs are kept");
+        if params.proof_output_dir.is_some() {
+            // All proof files (including intermediary) are written to disk and kept
+            warn!("Skipping cleanup, intermediate proof files are kept");
+        } else {
+            // Output all proofs to stdout
+            std::io::stdout().write_all(&serde_json::to_vec(
+                &proved_blocks
+                    .into_iter()
+                    .filter_map(|(_, block)| block)
+                    .collect::<Vec<_>>(),
+            )?)?;
+        }
     } else if let Some(proof_output_dir) = params.proof_output_dir.as_ref() {
+        // Remove intermediary proof files
         proved_blocks
             .into_iter()
             .rev()
             .skip(1)
-            .map(|b| generate_block_proof_file_name(&proof_output_dir.to_str(), b))
+            .map(|(block_number, _)| {
+                generate_block_proof_file_name(&proof_output_dir.to_str(), block_number)
+            })
             .for_each(|path| {
                 if let Err(e) = std::fs::remove_file(path) {
                     error!("Failed to remove intermediate proof file: {e}");
                 }
             });
     } else {
-        // Proofs are written to stdio, so no need to clean up
+        // Output only last proof to stdout
+        if let Some(last_block) = proved_blocks
+            .into_iter()
+            .filter_map(|(_, block)| block)
+            .last()
+        {
+            std::io::stdout().write_all(&serde_json::to_vec(&last_block)?)?;
+        }
     }
 
     Ok(())

diff --git a/leader/src/stdio.rs b/leader/src/stdio.rs
@@ -3,7 +3,8 @@ use std::io::{Read, Write};
 use anyhow::Result;
 use paladin::runtime::Runtime;
 use proof_gen::proof_types::GeneratedBlockProof;
-use prover::BlockProverInput;
+use prover::ProverInput;
+use tracing::info;
 
 /// The main function for the stdio mode.
 pub(crate) async fn stdio_main(
@@ -15,18 +16,27 @@ pub(crate) async fn stdio_main(
     std::io::stdin().read_to_string(&mut buffer)?;
 
     let des = &mut serde_json::Deserializer::from_str(&buffer);
-    let input: BlockProverInput = serde_path_to_error::deserialize(des)?;
-    let proof = input
-        .prove(
-            &runtime,
-            previous.map(futures::future::ok),
-            save_inputs_on_error,
-        )
+    let prover_input = ProverInput {
+        blocks: serde_path_to_error::deserialize(des)?,
+    };
+
+    let proved_blocks = prover_input
+        .prove(&runtime, previous, save_inputs_on_error, None)
         .await;
     runtime.close().await?;
-    let proof = proof?;
+    let proved_blocks = proved_blocks?;
+
+    if cfg!(feature = "test_only") {
+        info!("All proof witnesses have been generated successfully.");
+    } else {
+        info!("All proofs have been generated successfully.");
+    }
 
-    std::io::stdout().write_all(&serde_json::to_vec(&proof)?)?;
+    let proofs: Vec<GeneratedBlockProof> = proved_blocks
+        .into_iter()
+        .filter_map(|(_, proof)| proof)
+        .collect();
+    std::io::stdout().write_all(&serde_json::to_vec(&proofs)?)?;
 
     Ok(())
 }
diff --git a/prover/src/lib.rs b/prover/src/lib.rs
@@ -46,7 +46,6 @@ impl BlockProverInput {
         use anyhow::Context as _;
 
         let block_number = self.get_block_number();
-        info!("Proving block {block_number}");
 
         let other_data = self.other_data;
         let txs = self.block_trace.into_txn_proof_gen_ir(
@@ -113,8 +112,6 @@ impl BlockProverInput {
             .try_collect::<Vec<_>>()
             .await?;
 
-        info!("Successfully generated witness for block {block_number}.");
-
         // Dummy proof to match expected output type.
         Ok(GeneratedBlockProof {
             b_height: block_number
@@ -131,13 +128,16 @@ pub struct ProverInput {
 }
 
 impl ProverInput {
+    /// Prove all the blocks in the input.
+    /// Return the list of block numbers that are proved and if the proof data
+    /// is not saved to disk, return the generated block proofs as well.
     pub async fn prove(
         self,
         runtime: &Runtime,
         previous_proof: Option<GeneratedBlockProof>,
         save_inputs_on_error: bool,
         proof_output_dir: Option<PathBuf>,
-    ) -> Result<Vec<BlockNumber>> {
+    ) -> Result<Vec<(BlockNumber, Option<GeneratedBlockProof>)>> {
         let mut prev: Option<BoxFuture<Result<GeneratedBlockProof>>> =
             previous_proof.map(|proof| Box::pin(futures::future::ok(proof)) as BoxFuture<_>);
 
@@ -158,14 +158,20 @@ impl ProverInput {
                         let proof = proof?;
                         let block_number = proof.b_height;
 
-                        // Write latest generated proof to disk or stdout
-                        ProverInput::write_proof(proof_output_dir, &proof).await?;
+                        // Write latest generated proof to disk if proof_output_dir is provided
+                        let return_proof: Option<GeneratedBlockProof> =
+                            if proof_output_dir.is_some() {
+                                ProverInput::write_proof(proof_output_dir, &proof).await?;
+                                None
+                            } else {
+                                Some(proof.clone())
+                            };
 
                         if tx.send(proof).is_err() {
                             anyhow::bail!("Failed to send proof");
                         }
 
-                        Ok(block_number)
+                        Ok((block_number, return_proof))
                     })
                     .boxed();
 

diff --git a/rpc/src/main.rs b/rpc/src/main.rs
@@ -10,15 +10,19 @@ use url::Url;
 pub enum Args {
     /// Fetch and generate prover input from the RPC endpoint.
     Fetch {
+        // Starting block of interval to fetch
+        #[arg(short, long)]
+        start_block: u64,
+        // End block of interval to fetch
+        #[arg(short, long)]
+        end_block: u64,
         /// The RPC URL.
         #[arg(short = 'u', long, value_hint = ValueHint::Url)]
         rpc_url: Url,
-        /// The block number.
+        /// The checkpoint block number. If not provided,
+        /// block before the `start_block` is the checkpoint
         #[arg(short, long)]
-        block_number: BlockId,
-        /// The checkpoint block number.
-        #[arg(short, long, default_value = "0")]
-        checkpoint_block_number: BlockId,
+        checkpoint_block_number: Option<BlockId>,
     },
 }
 
@@ -34,18 +38,24 @@ async fn main() -> anyhow::Result<()> {
         .init();
 
     let Args::Fetch {
+        start_block,
+        end_block,
         rpc_url,
-        block_number,
         checkpoint_block_number,
     } = Args::parse();
+
+    let checkpoint_block_number = checkpoint_block_number.unwrap_or((start_block - 1).into());
+    let block_interval = BlockInterval::Range(start_block..end_block + 1);
+
+    // Retrieve prover input from the Erigon node
     let prover_input = rpc::prover_input(
         RootProvider::new_http(rpc_url),
-        BlockInterval::SingleBlockId(block_number),
+        block_interval,
         checkpoint_block_number,
     )
     .await?;
 
-    serde_json::to_writer_pretty(io::stdout(), &prover_input)?;
+    serde_json::to_writer_pretty(io::stdout(), &prover_input.blocks)?;
 
     Ok(())
 }
diff --git a/tools/artifacts/witness_b19240705.json b/tools/artifacts/witness_b19240705.json