Skip to content

Commit

Permalink
Vectorized sum
Browse files Browse the repository at this point in the history
  • Loading branch information
Tiemo Bang committed Apr 9, 2024
1 parent 7b61fbb commit 9b935d0
Show file tree
Hide file tree
Showing 6 changed files with 301 additions and 2 deletions.
3 changes: 2 additions & 1 deletion base/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod kmeans_baseline;
pub mod point;
pub mod matrix_vector_multiply;
pub mod matrix_vector_multiply;
pub mod vectorized_sum;
85 changes: 85 additions & 0 deletions base/src/vectorized_sum.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
pub fn vectorized_sum_iterator(a: Vec<f32>) -> f32 {
a.iter().sum()
}

pub fn vectorized_sum_iterator_chunks<const CHUNK_SIZE: usize>(a: Vec<f32>) -> f32 {
a.chunks(CHUNK_SIZE).map(|chunk| chunk.iter().sum::<f32>()).sum()
}

pub fn vectorized_sum_iterator_batching<const CHUNK_SIZE: usize>(a: Vec<f32>) -> f32 {

let mut buf: [f32; CHUNK_SIZE] = [0.0; CHUNK_SIZE];
a.into_iter().enumerate()
.map(move |(i,f)|{
buf[i%CHUNK_SIZE] = f;
if i%CHUNK_SIZE == CHUNK_SIZE-1 {
Some(buf.clone())
} else {
None
}
})
.filter(|x| x.is_some())
.map(|x| x.unwrap())
.map(|chunk| chunk.iter().sum::<f32>())
.sum()
}

pub fn vectorized_sum_iterator_batched<const CHUNK_SIZE: usize>(a: Vec<[f32; CHUNK_SIZE]>) -> f32 {

a.into_iter().map(|chunk| chunk.iter().sum::<f32>()).sum()
}

pub fn vectorized_sum_iterator_batched_flatten<const CHUNK_SIZE: usize>(a: Vec<[f32; CHUNK_SIZE]>) -> f32 {

a.into_iter().flatten().sum()
}

pub fn to_chunks<const CHUNK_SIZE: usize>(a: &[f32]) -> Vec<[f32; CHUNK_SIZE]> {
a.chunks(CHUNK_SIZE).map(|chunk| {
let mut arr = [0.0; CHUNK_SIZE];
for (i, &val) in chunk.iter().enumerate() {
arr[i] = val;
}
arr
}).collect()
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_vectorized_sum_iterator() {
let a = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let result = vectorized_sum_iterator(a);
assert_eq!(result, 15.0);
}

#[test]
fn test_vectorized_sum_iterator_chunks() {
let a = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let result = vectorized_sum_iterator_chunks::<2>(a);
assert_eq!(result, 15.0);
}

#[test]
fn test_vectorized_sum_iterator_batched() {
let a = vec![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let result = vectorized_sum_iterator_batched::<2>(a);
assert_eq!(result, 21.0);
}

#[test]
fn test_vectorized_sum_iterator_batched_flatten() {
let a = vec![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let result = vectorized_sum_iterator_batched_flatten::<2>(a);
assert_eq!(result, 21.0);
}

#[test]
fn test_to_chunks() {
let a = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let result = to_chunks::<2>(&a);
assert_eq!(result, vec![[1.0, 2.0], [3.0, 4.0], [5.0, 0.0]]);
}
}
4 changes: 4 additions & 0 deletions hydro_local_benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,8 @@ harness = false

[[bench]]
name = "matrix_vector"
harness = false

[[bench]]
name = "vectorized_sum"
harness = false
91 changes: 91 additions & 0 deletions hydro_local_benchmarks/benches/vectorized_sum.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rand::SeedableRng;
use rand::rngs::StdRng;
use rand::distributions::{Distribution, Uniform};
use base::vectorized_sum::*;
use hydroflow_base::vectorized_sum as hf;

fn rand_uniform_vector(num_elem: usize, seed: Option<u64>) -> Vec<f32> {
// Create a random number generator with a fixed seed
let seed = seed.unwrap_or(42);
let rng: StdRng = SeedableRng::seed_from_u64(seed);

// Generate random points
let uniform = Uniform::new(-1.0, 1.0);
uniform.sample_iter(rng).take(num_elem).collect::<Vec<f32>>()
}

fn vectorized_sum_benchmark(c: &mut Criterion) {

let num_elem = 10000;
let a = rand_uniform_vector(num_elem, None);

c.bench_function("vectorized_sum_iterator", |b| {
b.iter_batched(|| a.clone(), |a| vectorized_sum_iterator(black_box(a)), criterion::BatchSize::SmallInput)
});

c.bench_function("vectorized_sum_iterator_chunks-4", |b| {
b.iter_batched(|| a.clone(), |a| vectorized_sum_iterator_chunks::<4>(black_box(a)), criterion::BatchSize::SmallInput)
});

c.bench_function("vectorized_sum_iterator_chunks-8", |b| {
b.iter_batched(|| a.clone(), |a| vectorized_sum_iterator_chunks::<8>(black_box(a)), criterion::BatchSize::SmallInput)
});

c.bench_function("vectorized_sum_iterator_batching-4", |b| {
b.iter_batched(|| a.clone(), |a| vectorized_sum_iterator_batching::<4>(black_box(a)), criterion::BatchSize::SmallInput)
});

c.bench_function("vectorized_sum_iterator_batching-8", |b| {
b.iter_batched(|| a.clone(), |a| vectorized_sum_iterator_batching::<8>(black_box(a)), criterion::BatchSize::SmallInput)
});

c.bench_function("vectorized_sum_iterator_batched-4", |b| {
b.iter_batched(|| to_chunks(&a), |a| vectorized_sum_iterator_batched::<4>(black_box(a)), criterion::BatchSize::SmallInput)
});

c.bench_function("vectorized_sum_iterator_batched-8", |b| {
b.iter_batched(|| to_chunks(&a), |a| vectorized_sum_iterator_batched::<8>(black_box(a)), criterion::BatchSize::SmallInput)
});

c.bench_function("vectorized_sum_iterator_batched_flatten-4", |b| {
b.iter_batched(|| to_chunks(&a), |a| vectorized_sum_iterator_batched_flatten::<4>(black_box(a)), criterion::BatchSize::SmallInput)
});

c.bench_function("vectorized_sum_iterator_batched_flatten-8", |b| {
b.iter_batched(|| to_chunks(&a), |a| vectorized_sum_iterator_batched_flatten::<8>(black_box(a)), criterion::BatchSize::SmallInput)
});

// Hydroflow
c.bench_function("hf::vectorized_sum_iterator", |b| {
b.iter_batched(|| a.clone(), |a| hf::vectorized_sum_iterator(black_box(a)), criterion::BatchSize::SmallInput)
});

c.bench_function("hf::vectorized_sum_iterator_chunks-4", |b| {
b.iter_batched(|| a.clone(), |a| hf::vectorized_sum_iterator_chunks::<4>(black_box(a)), criterion::BatchSize::SmallInput)
});

c.bench_function("hf::vectorized_sum_iterator_chunks-8", |b| {
b.iter_batched(|| a.clone(), |a| hf::vectorized_sum_iterator_chunks::<8>(black_box(a)), criterion::BatchSize::SmallInput)
});

c.bench_function("hf::vectorized_sum_iterator_batching-4", |b| {
b.iter_batched(|| a.clone(), |a| hf::vectorized_sum_iterator_batching::<4>(black_box(a)), criterion::BatchSize::SmallInput)
});

c.bench_function("hf::vectorized_sum_iterator_batching-8", |b| {
b.iter_batched(|| a.clone(), |a| hf::vectorized_sum_iterator_batching::<8>(black_box(a)), criterion::BatchSize::SmallInput)
});

c.bench_function("hf::vectorized_sum_iterator_batched-4", |b| {
b.iter_batched(|| to_chunks(&a), |a| hf::vectorized_sum_iterator_batched::<4>(black_box(a)), criterion::BatchSize::SmallInput)
});

c.bench_function("hf::vectorized_sum_iterator_batched-8", |b| {
b.iter_batched(|| to_chunks(&a), |a| hf::vectorized_sum_iterator_batched::<8>(black_box(a)), criterion::BatchSize::SmallInput)
});

}

criterion_group!(benches, vectorized_sum_benchmark);
criterion_main!(benches);
3 changes: 2 additions & 1 deletion hydroflow_base/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub mod kmeans_hf;
pub mod matrix_vector_multiply;
pub mod matrix_vector_multiply;
pub mod vectorized_sum;
117 changes: 117 additions & 0 deletions hydroflow_base/src/vectorized_sum.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
use hydroflow::hydroflow_syntax;

pub fn vectorized_sum_iterator(a: Vec<f32>) -> f32 {

let (output_send, output_recv) = hydroflow::util::unbounded_channel::<f32>();

let mut flow = hydroflow_syntax! {
// Inputs
a = source_iter(a);

// Combine columns and vector elements
result = a
-> reduce(|acc: &mut f32, x: f32| {
*acc += x;
});

result
-> for_each(|x| output_send.send(x).unwrap());
};

flow.run_available();
let res = hydroflow::util::collect_ready::<Vec<f32>,_>(output_recv);

assert!(res.len() == 1);
return res[0].clone();
}

pub fn vectorized_sum_iterator_chunks<const CHUNK_SIZE: usize>(a: Vec<f32>) -> f32 {

let (output_send, output_recv) = hydroflow::util::unbounded_channel::<f32>();

let mut flow = hydroflow_syntax! {
// Inputs
a = source_iter(a.chunks(CHUNK_SIZE));

// Combine columns and vector elements
result = a
-> map(|chunk| chunk.iter().sum::<f32>())
-> reduce(|acc: &mut f32, x: f32| {
*acc += x;
});

result
-> for_each(|x| output_send.send(x).unwrap());
};

flow.run_available();
let res = hydroflow::util::collect_ready::<Vec<f32>,_>(output_recv);

assert!(res.len() == 1);
return res[0].clone();
}

pub fn vectorized_sum_iterator_batching<const CHUNK_SIZE: usize>(a: Vec<f32>) -> f32 {

let (output_send, output_recv) = hydroflow::util::unbounded_channel::<f32>();
let mut buf: [f32; CHUNK_SIZE] = [0.0; CHUNK_SIZE];


let mut flow = hydroflow_syntax! {
// Inputs
a = source_iter(a);

// Combine columns and vector elements
result = a -> enumerate()
-> map(move |(i,f)|{
buf[i%CHUNK_SIZE] = f;
if i%CHUNK_SIZE == CHUNK_SIZE-1 {
Some(buf.clone())
} else {
None
}
})
-> filter(|x| x.is_some())
-> map(|x| x.unwrap())
-> map(|chunk| chunk.iter().sum::<f32>())
-> reduce(|acc: &mut f32, x: f32| {
*acc += x;
});


result
-> for_each(|x| output_send.send(x).unwrap());
};

flow.run_available();
let res = hydroflow::util::collect_ready::<Vec<f32>,_>(output_recv);

assert!(res.len() == 1);
return res[0].clone();
}

pub fn vectorized_sum_iterator_batched<const CHUNK_SIZE: usize>(a: Vec<[f32; CHUNK_SIZE]>) -> f32 {

let (output_send, output_recv) = hydroflow::util::unbounded_channel::<f32>();

let mut flow = hydroflow_syntax! {
// Inputs
a = source_iter(a);

// Combine columns and vector elements
result = a
-> map(|chunk| chunk.iter().sum::<f32>())
-> reduce(|acc: &mut f32, x: f32| {
*acc += x;
});

result
-> for_each(|x| output_send.send(x).unwrap());
};

flow.run_available();
let res = hydroflow::util::collect_ready::<Vec<f32>,_>(output_recv);

assert!(res.len() == 1);
return res[0].clone();
}

0 comments on commit 9b935d0

Please sign in to comment.