a16z · dramarereg · Jan 8, 2025 · Jan 8, 2025 · Jan 8, 2025 · Jan 8, 2025
diff --git a/.codespellrc b/.codespellrc
@@ -0,0 +1,3 @@
+[codespell]
+ignore-words-list = Crate,crate,ans,sie,SIE,FLE,THRE,Bootle
+skip = *.json,*.yml,*.toml,*.lock
diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml
@@ -0,0 +1,22 @@
+name: Codespell Check
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+jobs:
+  codespell:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Install codespell
+        run: |
+          pip install codespell
+
+      - name: Run codespell
+        run: |
+          codespell --config .codespellrc
diff --git a/book/src/background/multilinear-extensions.md b/book/src/background/multilinear-extensions.md
@@ -29,4 +29,4 @@ for i in 0..half {
 ```
 
 ### Multi Variable Binding
-Another common algorithm is to take the MLE $\tilde{f}(x_1, ... x_v)$ and compute its evaluation at a single $v$-variate point outside the boolean hypercube $x \in \mathbb{F}^v$. This algorithm can be performed in $O(n)$ time by preforming the single variable binding algorithm $\log(n)$ times. The time spent on $i$'th variable binding is $O(n/2^i)$, so the total time across all $\log n$ bindings is proportional to $\sum_{i=1}^{\log n} n/2^i = O(n)$. 
+Another common algorithm is to take the MLE $\tilde{f}(x_1, ... x_v)$ and compute its evaluation at a single $v$-variate point outside the boolean hypercube $x \in \mathbb{F}^v$. This algorithm can be performed in $O(n)$ time by performing the single variable binding algorithm $\log(n)$ times. The time spent on $i$'th variable binding is $O(n/2^i)$, so the total time across all $\log n$ bindings is proportional to $\sum_{i=1}^{\log n} n/2^i = O(n)$. 
diff --git a/book/src/future/groth-16.md b/book/src/future/groth-16.md
@@ -8,7 +8,7 @@ We call directly representing the Jolt verifier (with HyperKZG polynomial commit
 as constraints to then feeding those constraints into Groth16 "naive composition". Unfortunately, this naive procedure
 will result 
 in over a hundred millions of constraints. Applying Groth16 
-to such a large constraint system will result in far more latency than we'd lik, (and may even be impossible over the BN254 scalar field
+to such a large constraint system will result in far more latency than we'd like, (and may even be impossible over the BN254 scalar field
 because that field only supports FFTs of length $2^{27}$. 
 Below, we describe alternate ways forward. 
 

diff --git a/book/src/future/proof-size-breakdown.md b/book/src/future/proof-size-breakdown.md
@@ -47,7 +47,7 @@ and one attests to the validity of initialization of memory plus a final pass ov
 The reason we do not run these grand products "together as one big grand product" is they are 
 each potentially of different sizes,
 and it is annoying (though possible) to "batch prove" differently-sized grand products together.
-However, a relatively easy way to get down to 3 grand prodcuts is to set the memory size
+However, a relatively easy way to get down to 3 grand products is to set the memory size
 in each of the three categories above to equal the number of reads/writes. This simply involves 
 padding the memory with zeros to make it equal in size to 
 the number of reads/writes into the memory (i.e., NUM_CYCLES). Doing this will not substantially increase

diff --git a/book/src/how/m-extension.md b/book/src/how/m-extension.md
@@ -113,5 +113,5 @@ If the current instruction is virtual, we can constrain the next instruction in
 next instruction in the bytecode.
 We observe that the virtual sequences used in the M extension don't involve jumps or branches,
 so this should always hold, *except* if we encounter a virtual instruction followed by a padding instruction.
-But that should never happend because an execution trace should always end with some return handling,
+But that should never happen because an execution trace should always end with some return handling,
 which shouldn't involve a virtual sequence.
diff --git a/book/src/how/r1cs_constraints.md b/book/src/how/r1cs_constraints.md
@@ -44,7 +44,7 @@ the preprocessed bytecode in Jolt.
     1. `ConcatLookupQueryChunks`: Indicates whether the instruction performs a concat-type lookup.
     1. `Virtual`: 1 if the instruction is "virtual", as defined in Section 6.1 of the Jolt paper.
     1. `Assert`: 1 if the instruction is an assert, as defined in Section 6.1.1 of the Jolt paper.
-    1. `DoNotUpdatePC`: Used in virtual sequences; the program counter should be the same for the full seqeuence.
+    1. `DoNotUpdatePC`: Used in virtual sequences; the program counter should be the same for the full sequence.
 * Instruction flags: these are the unary bits used to indicate instruction is executed at a given step.
 There are as many per step as the number of unique instruction lookup tables in Jolt.
 

diff --git a/book/src/how/read_write_memory.md b/book/src/how/read_write_memory.md
@@ -24,7 +24,7 @@ Program inputs populate the designated input space upon initialization:
 
 The verifier can efficiently compute the MLE of this initial memory state on its own (i.e. in time proportional to the IO size, not the total memory size).
 
-### Ouputs and panic
+### Outputs and panic
 
 On the other hand, the verifier cannot compute the MLE of the final memory state on its own –– though the program I/O is known to the verifier, the final memory state contains values written to registers/RAM over the course of program execution, which are *not* known to the verifier.
 

diff --git a/book/src/how/sparse-constraint-systems.md b/book/src/how/sparse-constraint-systems.md
@@ -50,7 +50,7 @@ we can "switch over" to the standard "dense" linear-time sum-check proving algor
 so that $n/2^i \approx m$. In Jolt, we expect this "switchover" to happen by round $4$ or $5$. 
 In the end, the amount of extra field work done by the prover owing to the sparsity will only be a factor of $2$ or so.
 
-Jolt uses this approach within Lasso as well. Across all of the primtive RISC-V instructions,
+Jolt uses this approach within Lasso as well. Across all of the primitive RISC-V instructions,
 there are about 80 "subtables" that get used. Any particular primitive instruction only needs
 to access between 4 and 10 of these subtables. We "pretend" that every primitive instruction
 actually accesses all 80 of the subtables, but use binary flags to "turn off" any subtable
@@ -63,7 +63,7 @@ commitment time, and that our grand product prover does not pay any field work f
 There are alternative approaches we could take to achieve "a la carte" prover costs, e.g., [vRAM](https://web.eecs.umich.edu/~genkin/papers/vram.pdf)'s approach
 of having the prover sort all cycles by which primitive operation or pre-compile was executed at that cycle
 (see also the much more recent work [Ceno](https://eprint.iacr.org/2024/387)).
-But the above approach is compatable with a streaming prover, avoids committing to the same data multiple times,
+But the above approach is compatible with a streaming prover, avoids committing to the same data multiple times,
 and has other benefits.
 
 We call this technique (fast proving for) "sparse constraint systems". Note that the term sparse here

diff --git a/jolt-core/src/jolt/instruction/remu.rs b/jolt-core/src/jolt/instruction/remu.rs
@@ -9,7 +9,7 @@ use crate::jolt::instruction::{
     JoltInstruction,
 };
 
-/// Perform unsigned divison and return remainder
+/// Perform unsigned division and return remainder
 pub struct REMUInstruction<const WORD_SIZE: usize>;
 
 impl<const WORD_SIZE: usize> VirtualInstructionSequence for REMUInstruction<WORD_SIZE> {

diff --git a/jolt-core/src/lasso/memory_checking.rs b/jolt-core/src/lasso/memory_checking.rs
@@ -68,7 +68,7 @@ where
 }
 
 /// This type, used within a `StructuredPolynomialData` struct, indicates that the
-/// field has a corresponding opening but no corrresponding polynomial or commitment ––
+/// field has a corresponding opening but no corresponding polynomial or commitment ––
 /// the prover doesn't need to compute a witness polynomial or commitment because
 /// the verifier can compute the opening on its own.
 pub type VerifierComputedOpening<T> = Option<T>;

diff --git a/jolt-core/src/poly/commitment/zeromorph.rs b/jolt-core/src/poly/commitment/zeromorph.rs
@@ -645,7 +645,7 @@ mod test {
     use ark_std::{test_rng, UniformRand};
     use rand_core::SeedableRng;
 
-    // Evaluate Phi_k(x) = \sum_{i=0}^k x^i using the direct inefficent formula
+    // Evaluate Phi_k(x) = \sum_{i=0}^k x^i using the direct inefficient formula
     fn phi<P: Pairing>(challenge: &P::ScalarField, subscript: usize) -> P::ScalarField {
         let len = (1 << subscript) as u64;
         (0..len).fold(P::ScalarField::zero(), |mut acc, i| {

diff --git a/jolt-core/src/poly/sparse_interleaved_poly.rs b/jolt-core/src/poly/sparse_interleaved_poly.rs
@@ -227,7 +227,7 @@ impl<F: JoltField> SparseInterleavedPolynomial<F> {
                             continue;
                         }
                         if coeff.index % 2 == 0 {
-                            // Left node; try to find correspoding right node
+                            // Left node; try to find corresponding right node
                             let right = segment
                                 .get(j + 1)
                                 .cloned()

diff --git a/jolt-core/src/r1cs/inputs.rs b/jolt-core/src/r1cs/inputs.rs
@@ -238,7 +238,7 @@ impl<const C: usize, I: ConstraintInput, F: JoltField, ProofTranscript: Transcri
     }
 }
 
-/// Jolt's R1CS constraint inputs are typically represneted as an enum.
+/// Jolt's R1CS constraint inputs are typically represented as an enum.
 /// This trait serves two main purposes:
 /// - Defines a canonical ordering over inputs (and thus indices for each input).
 ///   This is needed for sumcheck.

diff --git a/jolt-core/src/r1cs/spartan.rs b/jolt-core/src/r1cs/spartan.rs
@@ -38,15 +38,15 @@ pub enum SpartanError {
     #[error("InvalidSumcheckProof")]
     InvalidSumcheckProof,
 
-    /// returned when the recusive sumcheck proof fails
+    /// returned when the recursive sumcheck proof fails
     #[error("InvalidOuterSumcheckProof")]
     InvalidOuterSumcheckProof,
 
     /// returned when the final sumcheck opening proof fails
     #[error("InvalidOuterSumcheckClaim")]
     InvalidOuterSumcheckClaim,
 
-    /// returned when the recusive sumcheck proof fails
+    /// returned when the recursive sumcheck proof fails
     #[error("InvalidInnerSumcheckProof")]
     InvalidInnerSumcheckProof,
 

diff --git a/jolt-core/src/r1cs/special_polys.rs b/jolt-core/src/r1cs/special_polys.rs
@@ -185,7 +185,7 @@ impl<F: JoltField> SparsePolynomial<F> {
                             write_index += 1;
                         }
                     } else if sparse_index > 0 && chunk[sparse_index - 1].1 == dense_index - 1 {
-                        // (low, high) present, but handeled prior
+                        // (low, high) present, but handled prior
                         continue;
                     } else {
                         // (_, high) present

diff --git a/jolt-core/src/subprotocols/grand_product_quarks.rs b/jolt-core/src/subprotocols/grand_product_quarks.rs
@@ -301,7 +301,7 @@ where
 {
     /// Computes a grand product proof using the Section 5 technique from Quarks Paper
     /// First - Extends the evals of v to create an f poly, then commits to it and evals
-    /// Then - Constructs a g poly and preforms sumcheck proof that sum == 0
+    /// Then - Constructs a g poly and performs sumcheck proof that sum == 0
     /// Finally - computes opening proofs for a random sampled during sumcheck proof and returns
     /// Returns a random point and evaluation to be verified by the caller (which our hybrid prover does with GKR)
     pub fn prove(

diff --git a/jolt-core/src/subprotocols/sparse_grand_product.rs b/jolt-core/src/subprotocols/sparse_grand_product.rs
@@ -38,11 +38,11 @@ struct BatchedGrandProductToggleLayer<F: JoltField> {
     flag_values: Vec<Vec<F>>,
     /// The Reed-Solomon fingerprints for each circuit in the batch.
     fingerprints: Vec<Vec<F>>,
-    /// Once the sparse flag/fingerprint vectors cannnot be bound further
+    /// Once the sparse flag/fingerprint vectors cannot be bound further
     /// (i.e. binding would require processing values in different vectors),
     /// we switch to using `coalesced_flags` to represent the flag values.
     coalesced_flags: Option<Vec<F>>,
-    /// Once the sparse flag/fingerprint vectors cannnot be bound further
+    /// Once the sparse flag/fingerprint vectors cannot be bound further
     /// (i.e. binding would require processing values in different vectors),
     /// we switch to using `coalesced_fingerprints` to represent the fingerprint values.
     coalesced_fingerprints: Option<Vec<F>>,

diff --git a/jolt-core/src/subprotocols/sumcheck.rs b/jolt-core/src/subprotocols/sumcheck.rs
@@ -441,7 +441,7 @@ impl<F: JoltField, ProofTranscript: Transcript> SumcheckInstanceProof<F, ProofTr
     // A specialized sumcheck implementation with the 0th round unrolled from the rest of the
     // `for` loop. This allows us to pass in `witness_polynomials` by reference instead of
     // passing them in as a single `DensePolynomial`, which would require an expensive
-    // concatenation. We defer the actual instantation of a `DensePolynomial` to the end of the
+    // concatenation. We defer the actual instantiation of a `DensePolynomial` to the end of the
     // 0th round.
     pub fn prove_spartan_quadratic(
         claim: &F,

diff --git a/jolt-core/src/utils/sol_types.rs b/jolt-core/src/utils/sol_types.rs
@@ -69,7 +69,7 @@ impl Into<HyperKZGProofSol> for &HyperKZGProof<Bn254> {
         let yneg_scalar = self.v[1].clone();
         let y_scalar = self.v[2].clone();
 
-        // Horrible type conversion here, possibly theres an easier way
+        // Horrible type conversion here, possibly there's an easier way
         let v_ypos = ypos_scalar
             .iter()
             .map(|i| U256::from_be_slice(i.into_bigint().to_bytes_be().as_slice()))

diff --git a/jolt-core/src/utils/thread.rs b/jolt-core/src/utils/thread.rs
@@ -37,7 +37,7 @@ pub fn unsafe_allocate_zero_vec<F: JoltField + Sized>(size: usize) -> Vec<F> {
         let ptr = std::alloc::alloc_zeroed(layout) as *mut F;
 
         if ptr.is_null() {
-            panic!("Zero vec allocaiton failed");
+            panic!("Zero vec allocation failed");
         }
 
         result = Vec::from_raw_parts(ptr, size, size);

diff --git a/tracer/src/emulator/cpu.rs b/tracer/src/emulator/cpu.rs
@@ -770,7 +770,7 @@ impl Cpu {
     // SSTATUS, SIE, and SIP are subsets of MSTATUS, MIE, and MIP
     fn read_csr_raw(&self, address: u16) -> u64 {
         match address {
-            // @TODO: Mask shuld consider of 32-bit mode
+            // @TODO: Mask should consider of 32-bit mode
             CSR_FFLAGS_ADDRESS => self.csr[CSR_FCSR_ADDRESS as usize] & 0x1f,
             CSR_FRM_ADDRESS => (self.csr[CSR_FCSR_ADDRESS as usize] >> 5) & 0x7,
             CSR_SSTATUS_ADDRESS => self.csr[CSR_MSTATUS_ADDRESS as usize] & 0x80000003000de162,
@@ -1286,7 +1286,7 @@ impl Cpu {
                         if rd != 0 {
                             return (offset << 20) | (2 << 15) | (3 << 12) | (rd << 7) | 0x7;
                         }
-                        // rd == 0 is reseved instruction
+                        // rd == 0 is reserved instruction
                     }
                     2 => {
                         // C.LWSP
@@ -1298,7 +1298,7 @@ impl Cpu {
                         if r != 0 {
                             return (offset << 20) | (2 << 15) | (2 << 12) | (r << 7) | 0x3;
                         }
-                        // r == 0 is reseved instruction
+                        // r == 0 is reserved instruction
                     }
                     3 => {
                         // @TODO: Support C.FLWSP in 32-bit mode
@@ -1311,7 +1311,7 @@ impl Cpu {
                         if rd != 0 {
                             return (offset << 20) | (2 << 15) | (3 << 12) | (rd << 7) | 0x3;
                         }
-                        // rd == 0 is reseved instruction
+                        // rd == 0 is reserved instruction
                     }
                     4 => {
                         let funct1 = (halfword >> 12) & 1; // [12]
@@ -1406,7 +1406,7 @@ impl Cpu {
                     _ => {} // Not happens
                 };
             }
-            _ => {} // No happnes
+            _ => {} // No happens
         };
         0xffffffff // Return invalid value
     }

diff --git a/tracer/src/emulator/device/virtio_block_disk.rs b/tracer/src/emulator/device/virtio_block_disk.rs
@@ -315,7 +315,7 @@ impl VirtioBlockDisk {
     ///
     /// # Arguments
     /// * `memory`
-    /// * `mem_addresss` Physical address. Must be eight-byte aligned.
+    /// * `mem_address` Physical address. Must be eight-byte aligned.
     /// * `disk_address` Must be eight-byte aligned.
     /// * `length` Must be eight-byte aligned.
     fn transfer_from_disk(
@@ -350,7 +350,7 @@ impl VirtioBlockDisk {
     ///
     /// # Arguments
     /// * `memory`
-    /// * `mem_addresss` Physical address. Must be eight-byte aligned.
+    /// * `mem_address` Physical address. Must be eight-byte aligned.
     /// * `disk_address` Must be eight-byte aligned.
     /// * `length` Must be eight-byte aligned.
     fn transfer_to_disk(
@@ -384,7 +384,7 @@ impl VirtioBlockDisk {
     /// Reads a byte from disk.
     ///
     /// # Arguments
-    /// * `addresss` Address in disk
+    /// * `address` Address in disk
     fn read_from_disk(&mut self, address: u64) -> u8 {
         let index = (address >> 3) as usize;
         let pos = (address % 8) * 8;
@@ -394,7 +394,7 @@ impl VirtioBlockDisk {
     /// Writes a byte to disk.
     ///
     /// # Arguments
-    /// * `addresss` Address in disk
+    /// * `address` Address in disk
     /// * `value` Data written to disk
     fn write_to_disk(&mut self, address: u64, value: u8) {
         let index = (address >> 3) as usize;
@@ -453,7 +453,7 @@ impl VirtioBlockDisk {
         (self.get_base_avail_address() + 4 + queue_size * 2).div_ceil(align) * align
     }
 
-    // @TODO: Follow the virtio block specification more propertly.
+    // @TODO: Follow the virtio block specification more properly.
     fn handle_disk_access(&mut self, memory: &mut MemoryWrapper) {
         let base_desc_address = self.get_base_desc_address();
         let base_avail_address = self.get_base_avail_address();

diff --git a/tracer/src/emulator/mmu.rs b/tracer/src/emulator/mmu.rs
@@ -532,7 +532,7 @@ impl Mmu {
             false => match effective_address {
                 // I don't know why but dtb data seems to be stored from 0x1020 on Linux.
                 // It might be from self.x[0xb] initialization?
-                // And DTB size is arbitray.
+                // And DTB size is arbitrary.
                 0x00001020..=0x00001fff => self.dtb[effective_address as usize - 0x1020],
                 0x02000000..=0x0200ffff => self.clint.load(effective_address),
                 0x0C000000..=0x0fffffff => self.plic.load(effective_address),

diff --git a/tracer/src/emulator/mod.rs b/tracer/src/emulator/mod.rs
@@ -99,7 +99,7 @@ impl Emulator {
 
             // It seems in riscv-tests ends with end code
             // written to a certain physical memory address
-            // (0x80001000 in mose test cases) so checking
+            // (0x80001000 in most test cases) so checking
             // the data in the address and terminating the test
             // if non-zero data is written.
             // End code 1 seems to mean pass.