Skip to content

Commit

Permalink
[chore] use rayon work-stealing to improve evaluate_h (#28)
Browse files Browse the repository at this point in the history
* chore: use rayon par_iter for more work-stealing in evaluate_h

* chore: bump version to 0.4.1

* chore: turn off profiling

* chore: add CI to check wasm build

* feat: use Scroll's FFT instead of Taiko's on x86

empirically it has better performance (lower memory bandwidth?)

* chore: remove target wasm32-unknown-unknown

https://docs.rs/getrandom/latest/getrandom/#webassembly-support
  • Loading branch information
jonathanpwang authored Nov 23, 2023
1 parent e841084 commit f335ffc
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 23 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,27 @@ jobs:
command: test
args: --verbose --release --all --all-features

build:
name: Build target ${{ matrix.target }}
runs-on: ubuntu-latest
strategy:
matrix:
target:
- wasm32-wasi

steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
with:
override: false
- name: Add target
run: rustup target add ${{ matrix.target }}
- name: cargo build
uses: actions-rs/cargo@v1
with:
command: build
args: --no-default-features --features batch,circuit-params --target ${{ matrix.target }}

example:
name: Examples on ubuntu
runs-on: ubuntu-latest
Expand Down
15 changes: 6 additions & 9 deletions halo2_proofs/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
[package]
name = "halo2-axiom"
version = "0.4.0"
version = "0.4.1"
authors = [
"Sean Bowe <[email protected]>",
"Ying Tong Lai <[email protected]>",
"Daira Hopwood <[email protected]>",
"Jack Grigg <[email protected]>",
"Privacy Scaling Explorations team", "Taiko Labs", "Intrinsic Technologies"
"Privacy Scaling Explorations team",
"Taiko Labs",
"Intrinsic Technologies",
]
edition = "2021"
rust-version = "1.73.0"
Expand Down Expand Up @@ -63,7 +65,7 @@ group = "0.13"
pairing = "0.23"
halo2curves = { package = "halo2curves-axiom", version = "0.4.2", default-features = false, features = ["bits", "bn256-table", "derive_serde"] }
rand = "0.8"
rand_core = { version = "0.6", default-features = false}
rand_core = { version = "0.6", default-features = false }
tracing = "0.1"
blake2b_simd = "1"
rustc-hash = "1.1"
Expand Down Expand Up @@ -94,12 +96,7 @@ getrandom = { version = "0.2", features = ["js"] }
default = ["batch", "multicore", "circuit-params"]
multicore = ["maybe-rayon/threads"]
dev-graph = ["plotters", "tabbycat"]
test-dev-graph = [
"dev-graph",
"plotters/bitmap_backend",
"plotters/bitmap_encoder",
"plotters/ttf",
]
test-dev-graph = ["dev-graph", "plotters/bitmap_backend", "plotters/bitmap_encoder", "plotters/ttf"]
gadget-traces = ["backtrace"]
# thread-safe-region = []
sanity-checks = []
Expand Down
17 changes: 17 additions & 0 deletions halo2_proofs/src/fft.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ pub fn fft<Scalar: Field, G: FftGroup<Scalar>>(
data: &FFTData<Scalar>,
inverse: bool,
) {
// Empirically, the parallel implementation requires less memory bandwidth, which is more performant on x86_64.
#[cfg(target_arch = "x86_64")]
parallel::fft(a, omega, log_n, data, inverse);
#[cfg(not(target_arch = "x86_64"))]
recursive::fft(a, omega, log_n, data, inverse)
}

Expand Down Expand Up @@ -52,6 +56,18 @@ mod tests {
);
end_timer!(start);

let mut c = input.clone();
let l_c = c.len();
let start = start_timer!(|| format!("parallel fft {} ({})", a.len(), num_threads));
fft::parallel::fft(
&mut c,
domain.get_omega(),
k,
domain.get_fft_data(l_c),
false,
);
end_timer!(start);

let mut b = input;
let l_b = b.len();
let start = start_timer!(|| format!("recursive fft {} ({})", a.len(), num_threads));
Expand All @@ -67,6 +83,7 @@ mod tests {
for i in 0..n {
//log_info(format!("{}: {} {}", i, a[i], b[i]));
assert_eq!(a[i], b[i]);
assert_eq!(a[i], c[i]);
}
}

Expand Down
54 changes: 40 additions & 14 deletions halo2_proofs/src/plonk/evaluation.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
#![allow(clippy::too_many_arguments)]

use crate::multicore;
use crate::plonk::{lookup, permutation, Any, ProvingKey};
use crate::poly::Basis;
use crate::{
arithmetic::{parallelize, CurveAffine},
poly::{Coeff, ExtendedLagrangeCoeff, LagrangeCoeff, Polynomial, Rotation},
};
#[cfg(feature = "profile")]
use ark_std::{end_timer, start_timer};
use ff::{Field, PrimeField, WithSmallOrderMulGroup};
use multicore::{IntoParallelIterator, ParallelIterator};

use super::{ConstraintSystem, Expression};

Expand Down Expand Up @@ -286,9 +290,10 @@ impl<C: CurveAffine> Evaluator<C> {
let mut current_extended_omega = one;
let value_parts: Vec<Polynomial<C::ScalarExt, LagrangeCoeff>> = (0..num_parts)
.map(|_| {
let fixed: Vec<Polynomial<C::ScalarExt, LagrangeCoeff>> = pk
.fixed_polys
.iter()
#[cfg(feature = "profile")]
let fixed_timer = start_timer!(|| "Fixed coeff_to_extended_part");
let fixed: Vec<Polynomial<C::ScalarExt, LagrangeCoeff>> = (&pk.fixed_polys)
.into_par_iter()
.map(|p| domain.coeff_to_extended_part(p.clone(), current_extended_omega))
.collect();
let fixed = &fixed[..];
Expand All @@ -297,10 +302,14 @@ impl<C: CurveAffine> Evaluator<C> {
domain.coeff_to_extended_part(pk.l_last.clone(), current_extended_omega);
let l_active_row =
domain.coeff_to_extended_part(pk.l_active_row.clone(), current_extended_omega);
#[cfg(feature = "profile")]
end_timer!(fixed_timer);

#[cfg(feature = "profile")]
let advice_timer = start_timer!(|| "Advice coeff_to_extended_part");
// Calculate the advice and instance cosets
let advice: Vec<Vec<Polynomial<C::Scalar, LagrangeCoeff>>> = advice_polys
.iter()
.into_par_iter()
.map(|advice_polys| {
advice_polys
.iter()
Expand All @@ -310,8 +319,12 @@ impl<C: CurveAffine> Evaluator<C> {
.collect()
})
.collect();
#[cfg(feature = "profile")]
end_timer!(advice_timer);
#[cfg(feature = "profile")]
let instance_timer = start_timer!(|| "Instance coeff_to_extended_part");
let instance: Vec<Vec<Polynomial<C::Scalar, LagrangeCoeff>>> = instance_polys
.iter()
.into_par_iter()
.map(|instance_polys| {
instance_polys
.iter()
Expand All @@ -321,6 +334,8 @@ impl<C: CurveAffine> Evaluator<C> {
.collect()
})
.collect();
#[cfg(feature = "profile")]
end_timer!(instance_timer);

let mut values = domain.empty_lagrange();

Expand All @@ -332,6 +347,8 @@ impl<C: CurveAffine> Evaluator<C> {
.zip(lookups.iter())
.zip(permutations.iter())
{
#[cfg(feature = "profile")]
let timer = start_timer!(|| "Custom gates");
// Custom gates
multicore::scope(|scope| {
let chunk_size = (size + num_threads - 1) / num_threads;
Expand Down Expand Up @@ -360,7 +377,11 @@ impl<C: CurveAffine> Evaluator<C> {
});
}
});
#[cfg(feature = "profile")]
end_timer!(timer);

#[cfg(feature = "profile")]
let timer = start_timer!(|| "Permutations");
// Permutations
let sets = &permutation.sets;
if !sets.is_empty() {
Expand All @@ -372,22 +393,21 @@ impl<C: CurveAffine> Evaluator<C> {
let permutation_product_cosets: Vec<
Polynomial<C::ScalarExt, LagrangeCoeff>,
> = sets
.iter()
.into_par_iter()
.map(|set| {
domain.coeff_to_extended_part(
set.permutation_product_poly.clone(),
current_extended_omega,
)
})
.collect();
let permutation_cosets: Vec<Polynomial<C::ScalarExt, LagrangeCoeff>> = pk
.permutation
.polys
.iter()
.map(|p| {
domain.coeff_to_extended_part(p.clone(), current_extended_omega)
})
.collect();
let permutation_cosets: Vec<Polynomial<C::ScalarExt, LagrangeCoeff>> =
(&pk.permutation.polys)
.into_par_iter()
.map(|p| {
domain.coeff_to_extended_part(p.clone(), current_extended_omega)
})
.collect();

let first_set_permutation_product_coset =
permutation_product_cosets.first().unwrap();
Expand Down Expand Up @@ -473,7 +493,11 @@ impl<C: CurveAffine> Evaluator<C> {
}
});
}
#[cfg(feature = "profile")]
end_timer!(timer);

#[cfg(feature = "profile")]
let timer = start_timer!(|| "Lookups");
// Lookups
for (n, lookup) in lookups.iter().enumerate() {
// Polynomials required for this lookup.
Expand Down Expand Up @@ -554,6 +578,8 @@ impl<C: CurveAffine> Evaluator<C> {
}
});
}
#[cfg(feature = "profile")]
end_timer!(timer);
}
current_extended_omega *= extended_omega;
values
Expand Down

0 comments on commit f335ffc

Please sign in to comment.