diff --git a/.codespellrc b/.codespellrc
new file mode 100644
index 000000000..1e7a5bd7c
--- /dev/null
+++ b/.codespellrc
@@ -0,0 +1,3 @@
+[codespell]
+ignore-words-list = Crate,crate,ans,sie,SIE,FLE,THRE,Bootle
+skip = *.json,*.yml,*.toml,*.lock
diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml
new file mode 100644
index 000000000..6903eba80
--- /dev/null
+++ b/.github/workflows/spellcheck.yml
@@ -0,0 +1,22 @@
+name: Codespell Check
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+jobs:
+  codespell:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Install codespell
+        run: |
+          pip install codespell
+
+      - name: Run codespell
+        run: |
+          codespell --config .codespellrc
diff --git a/book/src/background/multilinear-extensions.md b/book/src/background/multilinear-extensions.md
index eed3e37a8..8932173b1 100644
--- a/book/src/background/multilinear-extensions.md
+++ b/book/src/background/multilinear-extensions.md
@@ -29,4 +29,4 @@ for i in 0..half {
 ```
 
 ### Multi Variable Binding
-Another common algorithm is to take the MLE $\tilde{f}(x_1, ... x_v)$ and compute its evaluation at a single $v$-variate point outside the boolean hypercube $x \in \mathbb{F}^v$. This algorithm can be performed in $O(n)$ time by preforming the single variable binding algorithm $\log(n)$ times. The time spent on $i$'th variable binding is $O(n/2^i)$, so the total time across all $\log n$ bindings is proportional to $\sum_{i=1}^{\log n} n/2^i = O(n)$. 
+Another common algorithm is to take the MLE $\tilde{f}(x_1, ... x_v)$ and compute its evaluation at a single $v$-variate point outside the boolean hypercube $x \in \mathbb{F}^v$. This algorithm can be performed in $O(n)$ time by performing the single variable binding algorithm $\log(n)$ times. The time spent on $i$'th variable binding is $O(n/2^i)$, so the total time across all $\log n$ bindings is proportional to $\sum_{i=1}^{\log n} n/2^i = O(n)$. 
diff --git a/book/src/future/groth-16.md b/book/src/future/groth-16.md
index de55a8d29..6772ee3d3 100644
--- a/book/src/future/groth-16.md
+++ b/book/src/future/groth-16.md
@@ -8,7 +8,7 @@ We call directly representing the Jolt verifier (with HyperKZG polynomial commit
 as constraints to then feeding those constraints into Groth16 "naive composition". Unfortunately, this naive procedure
 will result 
 in over a hundred millions of constraints. Applying Groth16 
-to such a large constraint system will result in far more latency than we'd lik, (and may even be impossible over the BN254 scalar field
+to such a large constraint system will result in far more latency than we'd like, (and may even be impossible over the BN254 scalar field
 because that field only supports FFTs of length $2^{27}$. 
 Below, we describe alternate ways forward. 
 
diff --git a/book/src/future/proof-size-breakdown.md b/book/src/future/proof-size-breakdown.md
index d1eb1edbb..d05188453 100644
--- a/book/src/future/proof-size-breakdown.md
+++ b/book/src/future/proof-size-breakdown.md
@@ -47,7 +47,7 @@ and one attests to the validity of initialization of memory plus a final pass ov
 The reason we do not run these grand products "together as one big grand product" is they are 
 each potentially of different sizes,
 and it is annoying (though possible) to "batch prove" differently-sized grand products together.
-However, a relatively easy way to get down to 3 grand prodcuts is to set the memory size
+However, a relatively easy way to get down to 3 grand products is to set the memory size
 in each of the three categories above to equal the number of reads/writes. This simply involves 
 padding the memory with zeros to make it equal in size to 
 the number of reads/writes into the memory (i.e., NUM_CYCLES). Doing this will not substantially increase
diff --git a/book/src/how/m-extension.md b/book/src/how/m-extension.md
index 4ffd59caf..04a5a5e26 100644
--- a/book/src/how/m-extension.md
+++ b/book/src/how/m-extension.md
@@ -113,5 +113,5 @@ If the current instruction is virtual, we can constrain the next instruction in
 next instruction in the bytecode.
 We observe that the virtual sequences used in the M extension don't involve jumps or branches,
 so this should always hold, *except* if we encounter a virtual instruction followed by a padding instruction.
-But that should never happend because an execution trace should always end with some return handling,
+But that should never happen because an execution trace should always end with some return handling,
 which shouldn't involve a virtual sequence.
diff --git a/book/src/how/r1cs_constraints.md b/book/src/how/r1cs_constraints.md
index 3c830f188..821ea8c81 100644
--- a/book/src/how/r1cs_constraints.md
+++ b/book/src/how/r1cs_constraints.md
@@ -44,7 +44,7 @@ the preprocessed bytecode in Jolt.
     1. `ConcatLookupQueryChunks`: Indicates whether the instruction performs a concat-type lookup.
     1. `Virtual`: 1 if the instruction is "virtual", as defined in Section 6.1 of the Jolt paper.
     1. `Assert`: 1 if the instruction is an assert, as defined in Section 6.1.1 of the Jolt paper.
-    1. `DoNotUpdatePC`: Used in virtual sequences; the program counter should be the same for the full seqeuence.
+    1. `DoNotUpdatePC`: Used in virtual sequences; the program counter should be the same for the full sequence.
 * Instruction flags: these are the unary bits used to indicate instruction is executed at a given step.
 There are as many per step as the number of unique instruction lookup tables in Jolt.
 
diff --git a/book/src/how/read_write_memory.md b/book/src/how/read_write_memory.md
index 83fd8b83e..c67b8e22f 100644
--- a/book/src/how/read_write_memory.md
+++ b/book/src/how/read_write_memory.md
@@ -24,7 +24,7 @@ Program inputs populate the designated input space upon initialization:
 
 The verifier can efficiently compute the MLE of this initial memory state on its own (i.e. in time proportional to the IO size, not the total memory size).
 
-### Ouputs and panic
+### Outputs and panic
 
 On the other hand, the verifier cannot compute the MLE of the final memory state on its own –– though the program I/O is known to the verifier, the final memory state contains values written to registers/RAM over the course of program execution, which are *not* known to the verifier.
 
diff --git a/book/src/how/sparse-constraint-systems.md b/book/src/how/sparse-constraint-systems.md
index 434658baf..a367891be 100644
--- a/book/src/how/sparse-constraint-systems.md
+++ b/book/src/how/sparse-constraint-systems.md
@@ -50,7 +50,7 @@ we can "switch over" to the standard "dense" linear-time sum-check proving algor
 so that $n/2^i \approx m$. In Jolt, we expect this "switchover" to happen by round $4$ or $5$. 
 In the end, the amount of extra field work done by the prover owing to the sparsity will only be a factor of $2$ or so.
 
-Jolt uses this approach within Lasso as well. Across all of the primtive RISC-V instructions,
+Jolt uses this approach within Lasso as well. Across all of the primitive RISC-V instructions,
 there are about 80 "subtables" that get used. Any particular primitive instruction only needs
 to access between 4 and 10 of these subtables. We "pretend" that every primitive instruction
 actually accesses all 80 of the subtables, but use binary flags to "turn off" any subtable
@@ -63,7 +63,7 @@ commitment time, and that our grand product prover does not pay any field work f
 There are alternative approaches we could take to achieve "a la carte" prover costs, e.g., [vRAM](https://web.eecs.umich.edu/~genkin/papers/vram.pdf)'s approach
 of having the prover sort all cycles by which primitive operation or pre-compile was executed at that cycle
 (see also the much more recent work [Ceno](https://eprint.iacr.org/2024/387)).
-But the above approach is compatable with a streaming prover, avoids committing to the same data multiple times,
+But the above approach is compatible with a streaming prover, avoids committing to the same data multiple times,
 and has other benefits.
 
 We call this technique (fast proving for) "sparse constraint systems". Note that the term sparse here
diff --git a/jolt-core/src/jolt/instruction/remu.rs b/jolt-core/src/jolt/instruction/remu.rs
index b95609d18..dd79f73f7 100644
--- a/jolt-core/src/jolt/instruction/remu.rs
+++ b/jolt-core/src/jolt/instruction/remu.rs
@@ -9,7 +9,7 @@ use crate::jolt::instruction::{
     JoltInstruction,
 };
 
-/// Perform unsigned divison and return remainder
+/// Perform unsigned division and return remainder
 pub struct REMUInstruction<const WORD_SIZE: usize>;
 
 impl<const WORD_SIZE: usize> VirtualInstructionSequence for REMUInstruction<WORD_SIZE> {
diff --git a/jolt-core/src/lasso/memory_checking.rs b/jolt-core/src/lasso/memory_checking.rs
index d3c672e95..15183aa56 100644
--- a/jolt-core/src/lasso/memory_checking.rs
+++ b/jolt-core/src/lasso/memory_checking.rs
@@ -68,7 +68,7 @@ where
 }
 
 /// This type, used within a `StructuredPolynomialData` struct, indicates that the
-/// field has a corresponding opening but no corrresponding polynomial or commitment ––
+/// field has a corresponding opening but no corresponding polynomial or commitment ––
 /// the prover doesn't need to compute a witness polynomial or commitment because
 /// the verifier can compute the opening on its own.
 pub type VerifierComputedOpening<T> = Option<T>;
diff --git a/jolt-core/src/poly/commitment/zeromorph.rs b/jolt-core/src/poly/commitment/zeromorph.rs
index fd0021a2c..e6e97b626 100644
--- a/jolt-core/src/poly/commitment/zeromorph.rs
+++ b/jolt-core/src/poly/commitment/zeromorph.rs
@@ -645,7 +645,7 @@ mod test {
     use ark_std::{test_rng, UniformRand};
     use rand_core::SeedableRng;
 
-    // Evaluate Phi_k(x) = \sum_{i=0}^k x^i using the direct inefficent formula
+    // Evaluate Phi_k(x) = \sum_{i=0}^k x^i using the direct inefficient formula
     fn phi<P: Pairing>(challenge: &P::ScalarField, subscript: usize) -> P::ScalarField {
         let len = (1 << subscript) as u64;
         (0..len).fold(P::ScalarField::zero(), |mut acc, i| {
diff --git a/jolt-core/src/poly/sparse_interleaved_poly.rs b/jolt-core/src/poly/sparse_interleaved_poly.rs
index 2eb9a7908..20efd9fe5 100644
--- a/jolt-core/src/poly/sparse_interleaved_poly.rs
+++ b/jolt-core/src/poly/sparse_interleaved_poly.rs
@@ -227,7 +227,7 @@ impl<F: JoltField> SparseInterleavedPolynomial<F> {
                             continue;
                         }
                         if coeff.index % 2 == 0 {
-                            // Left node; try to find correspoding right node
+                            // Left node; try to find corresponding right node
                             let right = segment
                                 .get(j + 1)
                                 .cloned()
diff --git a/jolt-core/src/r1cs/inputs.rs b/jolt-core/src/r1cs/inputs.rs
index f73deab4d..99b9e6e6a 100644
--- a/jolt-core/src/r1cs/inputs.rs
+++ b/jolt-core/src/r1cs/inputs.rs
@@ -238,7 +238,7 @@ impl<const C: usize, I: ConstraintInput, F: JoltField, ProofTranscript: Transcri
     }
 }
 
-/// Jolt's R1CS constraint inputs are typically represneted as an enum.
+/// Jolt's R1CS constraint inputs are typically represented as an enum.
 /// This trait serves two main purposes:
 /// - Defines a canonical ordering over inputs (and thus indices for each input).
 ///   This is needed for sumcheck.
diff --git a/jolt-core/src/r1cs/spartan.rs b/jolt-core/src/r1cs/spartan.rs
index 1be9ee2ed..bcc6f4ad5 100644
--- a/jolt-core/src/r1cs/spartan.rs
+++ b/jolt-core/src/r1cs/spartan.rs
@@ -38,7 +38,7 @@ pub enum SpartanError {
     #[error("InvalidSumcheckProof")]
     InvalidSumcheckProof,
 
-    /// returned when the recusive sumcheck proof fails
+    /// returned when the recursive sumcheck proof fails
     #[error("InvalidOuterSumcheckProof")]
     InvalidOuterSumcheckProof,
 
@@ -46,7 +46,7 @@ pub enum SpartanError {
     #[error("InvalidOuterSumcheckClaim")]
     InvalidOuterSumcheckClaim,
 
-    /// returned when the recusive sumcheck proof fails
+    /// returned when the recursive sumcheck proof fails
     #[error("InvalidInnerSumcheckProof")]
     InvalidInnerSumcheckProof,
 
diff --git a/jolt-core/src/r1cs/special_polys.rs b/jolt-core/src/r1cs/special_polys.rs
index ecdf7e090..7c762b791 100644
--- a/jolt-core/src/r1cs/special_polys.rs
+++ b/jolt-core/src/r1cs/special_polys.rs
@@ -185,7 +185,7 @@ impl<F: JoltField> SparsePolynomial<F> {
                             write_index += 1;
                         }
                     } else if sparse_index > 0 && chunk[sparse_index - 1].1 == dense_index - 1 {
-                        // (low, high) present, but handeled prior
+                        // (low, high) present, but handled prior
                         continue;
                     } else {
                         // (_, high) present
diff --git a/jolt-core/src/subprotocols/grand_product_quarks.rs b/jolt-core/src/subprotocols/grand_product_quarks.rs
index 62057c917..74365d9e8 100644
--- a/jolt-core/src/subprotocols/grand_product_quarks.rs
+++ b/jolt-core/src/subprotocols/grand_product_quarks.rs
@@ -301,7 +301,7 @@ where
 {
     /// Computes a grand product proof using the Section 5 technique from Quarks Paper
     /// First - Extends the evals of v to create an f poly, then commits to it and evals
-    /// Then - Constructs a g poly and preforms sumcheck proof that sum == 0
+    /// Then - Constructs a g poly and performs sumcheck proof that sum == 0
     /// Finally - computes opening proofs for a random sampled during sumcheck proof and returns
     /// Returns a random point and evaluation to be verified by the caller (which our hybrid prover does with GKR)
     pub fn prove(
diff --git a/jolt-core/src/subprotocols/sparse_grand_product.rs b/jolt-core/src/subprotocols/sparse_grand_product.rs
index 1e192b51d..6e1bfcf2d 100644
--- a/jolt-core/src/subprotocols/sparse_grand_product.rs
+++ b/jolt-core/src/subprotocols/sparse_grand_product.rs
@@ -38,11 +38,11 @@ struct BatchedGrandProductToggleLayer<F: JoltField> {
     flag_values: Vec<Vec<F>>,
     /// The Reed-Solomon fingerprints for each circuit in the batch.
     fingerprints: Vec<Vec<F>>,
-    /// Once the sparse flag/fingerprint vectors cannnot be bound further
+    /// Once the sparse flag/fingerprint vectors cannot be bound further
     /// (i.e. binding would require processing values in different vectors),
     /// we switch to using `coalesced_flags` to represent the flag values.
     coalesced_flags: Option<Vec<F>>,
-    /// Once the sparse flag/fingerprint vectors cannnot be bound further
+    /// Once the sparse flag/fingerprint vectors cannot be bound further
     /// (i.e. binding would require processing values in different vectors),
     /// we switch to using `coalesced_fingerprints` to represent the fingerprint values.
     coalesced_fingerprints: Option<Vec<F>>,
diff --git a/jolt-core/src/subprotocols/sumcheck.rs b/jolt-core/src/subprotocols/sumcheck.rs
index 008f892be..a2fd74883 100644
--- a/jolt-core/src/subprotocols/sumcheck.rs
+++ b/jolt-core/src/subprotocols/sumcheck.rs
@@ -441,7 +441,7 @@ impl<F: JoltField, ProofTranscript: Transcript> SumcheckInstanceProof<F, ProofTr
     // A specialized sumcheck implementation with the 0th round unrolled from the rest of the
     // `for` loop. This allows us to pass in `witness_polynomials` by reference instead of
     // passing them in as a single `DensePolynomial`, which would require an expensive
-    // concatenation. We defer the actual instantation of a `DensePolynomial` to the end of the
+    // concatenation. We defer the actual instantiation of a `DensePolynomial` to the end of the
     // 0th round.
     pub fn prove_spartan_quadratic(
         claim: &F,
diff --git a/jolt-core/src/utils/sol_types.rs b/jolt-core/src/utils/sol_types.rs
index eb05a7f77..971e02181 100644
--- a/jolt-core/src/utils/sol_types.rs
+++ b/jolt-core/src/utils/sol_types.rs
@@ -69,7 +69,7 @@ impl Into<HyperKZGProofSol> for &HyperKZGProof<Bn254> {
         let yneg_scalar = self.v[1].clone();
         let y_scalar = self.v[2].clone();
 
-        // Horrible type conversion here, possibly theres an easier way
+        // Horrible type conversion here, possibly there's an easier way
         let v_ypos = ypos_scalar
             .iter()
             .map(|i| U256::from_be_slice(i.into_bigint().to_bytes_be().as_slice()))
diff --git a/jolt-core/src/utils/thread.rs b/jolt-core/src/utils/thread.rs
index 92754a095..1e3dd8ad5 100644
--- a/jolt-core/src/utils/thread.rs
+++ b/jolt-core/src/utils/thread.rs
@@ -37,7 +37,7 @@ pub fn unsafe_allocate_zero_vec<F: JoltField + Sized>(size: usize) -> Vec<F> {
         let ptr = std::alloc::alloc_zeroed(layout) as *mut F;
 
         if ptr.is_null() {
-            panic!("Zero vec allocaiton failed");
+            panic!("Zero vec allocation failed");
         }
 
         result = Vec::from_raw_parts(ptr, size, size);
diff --git a/tracer/src/emulator/cpu.rs b/tracer/src/emulator/cpu.rs
index ac2607636..4bc5717a3 100644
--- a/tracer/src/emulator/cpu.rs
+++ b/tracer/src/emulator/cpu.rs
@@ -770,7 +770,7 @@ impl Cpu {
     // SSTATUS, SIE, and SIP are subsets of MSTATUS, MIE, and MIP
     fn read_csr_raw(&self, address: u16) -> u64 {
         match address {
-            // @TODO: Mask shuld consider of 32-bit mode
+            // @TODO: Mask should consider of 32-bit mode
             CSR_FFLAGS_ADDRESS => self.csr[CSR_FCSR_ADDRESS as usize] & 0x1f,
             CSR_FRM_ADDRESS => (self.csr[CSR_FCSR_ADDRESS as usize] >> 5) & 0x7,
             CSR_SSTATUS_ADDRESS => self.csr[CSR_MSTATUS_ADDRESS as usize] & 0x80000003000de162,
@@ -1286,7 +1286,7 @@ impl Cpu {
                         if rd != 0 {
                             return (offset << 20) | (2 << 15) | (3 << 12) | (rd << 7) | 0x7;
                         }
-                        // rd == 0 is reseved instruction
+                        // rd == 0 is reserved instruction
                     }
                     2 => {
                         // C.LWSP
@@ -1298,7 +1298,7 @@ impl Cpu {
                         if r != 0 {
                             return (offset << 20) | (2 << 15) | (2 << 12) | (r << 7) | 0x3;
                         }
-                        // r == 0 is reseved instruction
+                        // r == 0 is reserved instruction
                     }
                     3 => {
                         // @TODO: Support C.FLWSP in 32-bit mode
@@ -1311,7 +1311,7 @@ impl Cpu {
                         if rd != 0 {
                             return (offset << 20) | (2 << 15) | (3 << 12) | (rd << 7) | 0x3;
                         }
-                        // rd == 0 is reseved instruction
+                        // rd == 0 is reserved instruction
                     }
                     4 => {
                         let funct1 = (halfword >> 12) & 1; // [12]
@@ -1406,7 +1406,7 @@ impl Cpu {
                     _ => {} // Not happens
                 };
             }
-            _ => {} // No happnes
+            _ => {} // No happens
         };
         0xffffffff // Return invalid value
     }
diff --git a/tracer/src/emulator/device/virtio_block_disk.rs b/tracer/src/emulator/device/virtio_block_disk.rs
index b06501db5..373cf3159 100644
--- a/tracer/src/emulator/device/virtio_block_disk.rs
+++ b/tracer/src/emulator/device/virtio_block_disk.rs
@@ -315,7 +315,7 @@ impl VirtioBlockDisk {
     ///
     /// # Arguments
     /// * `memory`
-    /// * `mem_addresss` Physical address. Must be eight-byte aligned.
+    /// * `mem_address` Physical address. Must be eight-byte aligned.
     /// * `disk_address` Must be eight-byte aligned.
     /// * `length` Must be eight-byte aligned.
     fn transfer_from_disk(
@@ -350,7 +350,7 @@ impl VirtioBlockDisk {
     ///
     /// # Arguments
     /// * `memory`
-    /// * `mem_addresss` Physical address. Must be eight-byte aligned.
+    /// * `mem_address` Physical address. Must be eight-byte aligned.
     /// * `disk_address` Must be eight-byte aligned.
     /// * `length` Must be eight-byte aligned.
     fn transfer_to_disk(
@@ -384,7 +384,7 @@ impl VirtioBlockDisk {
     /// Reads a byte from disk.
     ///
     /// # Arguments
-    /// * `addresss` Address in disk
+    /// * `address` Address in disk
     fn read_from_disk(&mut self, address: u64) -> u8 {
         let index = (address >> 3) as usize;
         let pos = (address % 8) * 8;
@@ -394,7 +394,7 @@ impl VirtioBlockDisk {
     /// Writes a byte to disk.
     ///
     /// # Arguments
-    /// * `addresss` Address in disk
+    /// * `address` Address in disk
     /// * `value` Data written to disk
     fn write_to_disk(&mut self, address: u64, value: u8) {
         let index = (address >> 3) as usize;
@@ -453,7 +453,7 @@ impl VirtioBlockDisk {
         (self.get_base_avail_address() + 4 + queue_size * 2).div_ceil(align) * align
     }
 
-    // @TODO: Follow the virtio block specification more propertly.
+    // @TODO: Follow the virtio block specification more properly.
     fn handle_disk_access(&mut self, memory: &mut MemoryWrapper) {
         let base_desc_address = self.get_base_desc_address();
         let base_avail_address = self.get_base_avail_address();
diff --git a/tracer/src/emulator/mmu.rs b/tracer/src/emulator/mmu.rs
index f8245b6cc..a2dd12fbc 100644
--- a/tracer/src/emulator/mmu.rs
+++ b/tracer/src/emulator/mmu.rs
@@ -532,7 +532,7 @@ impl Mmu {
             false => match effective_address {
                 // I don't know why but dtb data seems to be stored from 0x1020 on Linux.
                 // It might be from self.x[0xb] initialization?
-                // And DTB size is arbitray.
+                // And DTB size is arbitrary.
                 0x00001020..=0x00001fff => self.dtb[effective_address as usize - 0x1020],
                 0x02000000..=0x0200ffff => self.clint.load(effective_address),
                 0x0C000000..=0x0fffffff => self.plic.load(effective_address),
diff --git a/tracer/src/emulator/mod.rs b/tracer/src/emulator/mod.rs
index a6900d527..9f993a5a3 100644
--- a/tracer/src/emulator/mod.rs
+++ b/tracer/src/emulator/mod.rs
@@ -99,7 +99,7 @@ impl Emulator {
 
             // It seems in riscv-tests ends with end code
             // written to a certain physical memory address
-            // (0x80001000 in mose test cases) so checking
+            // (0x80001000 in most test cases) so checking
             // the data in the address and terminating the test
             // if non-zero data is written.
             // End code 1 seems to mean pass.