-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Tiemo Bang
committed
Apr 9, 2024
1 parent
7b61fbb
commit 9b935d0
Showing
6 changed files
with
301 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
pub mod kmeans_baseline; | ||
pub mod point; | ||
pub mod matrix_vector_multiply; | ||
pub mod matrix_vector_multiply; | ||
pub mod vectorized_sum; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
pub fn vectorized_sum_iterator(a: Vec<f32>) -> f32 { | ||
a.iter().sum() | ||
} | ||
|
||
pub fn vectorized_sum_iterator_chunks<const CHUNK_SIZE: usize>(a: Vec<f32>) -> f32 { | ||
a.chunks(CHUNK_SIZE).map(|chunk| chunk.iter().sum::<f32>()).sum() | ||
} | ||
|
||
pub fn vectorized_sum_iterator_batching<const CHUNK_SIZE: usize>(a: Vec<f32>) -> f32 { | ||
|
||
let mut buf: [f32; CHUNK_SIZE] = [0.0; CHUNK_SIZE]; | ||
a.into_iter().enumerate() | ||
.map(move |(i,f)|{ | ||
buf[i%CHUNK_SIZE] = f; | ||
if i%CHUNK_SIZE == CHUNK_SIZE-1 { | ||
Some(buf.clone()) | ||
} else { | ||
None | ||
} | ||
}) | ||
.filter(|x| x.is_some()) | ||
.map(|x| x.unwrap()) | ||
.map(|chunk| chunk.iter().sum::<f32>()) | ||
.sum() | ||
} | ||
|
||
pub fn vectorized_sum_iterator_batched<const CHUNK_SIZE: usize>(a: Vec<[f32; CHUNK_SIZE]>) -> f32 { | ||
|
||
a.into_iter().map(|chunk| chunk.iter().sum::<f32>()).sum() | ||
} | ||
|
||
pub fn vectorized_sum_iterator_batched_flatten<const CHUNK_SIZE: usize>(a: Vec<[f32; CHUNK_SIZE]>) -> f32 { | ||
|
||
a.into_iter().flatten().sum() | ||
} | ||
|
||
pub fn to_chunks<const CHUNK_SIZE: usize>(a: &[f32]) -> Vec<[f32; CHUNK_SIZE]> { | ||
a.chunks(CHUNK_SIZE).map(|chunk| { | ||
let mut arr = [0.0; CHUNK_SIZE]; | ||
for (i, &val) in chunk.iter().enumerate() { | ||
arr[i] = val; | ||
} | ||
arr | ||
}).collect() | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
||
#[test] | ||
fn test_vectorized_sum_iterator() { | ||
let a = vec![1.0, 2.0, 3.0, 4.0, 5.0]; | ||
let result = vectorized_sum_iterator(a); | ||
assert_eq!(result, 15.0); | ||
} | ||
|
||
#[test] | ||
fn test_vectorized_sum_iterator_chunks() { | ||
let a = vec![1.0, 2.0, 3.0, 4.0, 5.0]; | ||
let result = vectorized_sum_iterator_chunks::<2>(a); | ||
assert_eq!(result, 15.0); | ||
} | ||
|
||
#[test] | ||
fn test_vectorized_sum_iterator_batched() { | ||
let a = vec![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]; | ||
let result = vectorized_sum_iterator_batched::<2>(a); | ||
assert_eq!(result, 21.0); | ||
} | ||
|
||
#[test] | ||
fn test_vectorized_sum_iterator_batched_flatten() { | ||
let a = vec![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]; | ||
let result = vectorized_sum_iterator_batched_flatten::<2>(a); | ||
assert_eq!(result, 21.0); | ||
} | ||
|
||
#[test] | ||
fn test_to_chunks() { | ||
let a = vec![1.0, 2.0, 3.0, 4.0, 5.0]; | ||
let result = to_chunks::<2>(&a); | ||
assert_eq!(result, vec![[1.0, 2.0], [3.0, 4.0], [5.0, 0.0]]); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
use criterion::{black_box, criterion_group, criterion_main, Criterion}; | ||
use rand::SeedableRng; | ||
use rand::rngs::StdRng; | ||
use rand::distributions::{Distribution, Uniform}; | ||
use base::vectorized_sum::*; | ||
use hydroflow_base::vectorized_sum as hf; | ||
|
||
fn rand_uniform_vector(num_elem: usize, seed: Option<u64>) -> Vec<f32> { | ||
// Create a random number generator with a fixed seed | ||
let seed = seed.unwrap_or(42); | ||
let rng: StdRng = SeedableRng::seed_from_u64(seed); | ||
|
||
// Generate random points | ||
let uniform = Uniform::new(-1.0, 1.0); | ||
uniform.sample_iter(rng).take(num_elem).collect::<Vec<f32>>() | ||
} | ||
|
||
fn vectorized_sum_benchmark(c: &mut Criterion) { | ||
|
||
let num_elem = 10000; | ||
let a = rand_uniform_vector(num_elem, None); | ||
|
||
c.bench_function("vectorized_sum_iterator", |b| { | ||
b.iter_batched(|| a.clone(), |a| vectorized_sum_iterator(black_box(a)), criterion::BatchSize::SmallInput) | ||
}); | ||
|
||
c.bench_function("vectorized_sum_iterator_chunks-4", |b| { | ||
b.iter_batched(|| a.clone(), |a| vectorized_sum_iterator_chunks::<4>(black_box(a)), criterion::BatchSize::SmallInput) | ||
}); | ||
|
||
c.bench_function("vectorized_sum_iterator_chunks-8", |b| { | ||
b.iter_batched(|| a.clone(), |a| vectorized_sum_iterator_chunks::<8>(black_box(a)), criterion::BatchSize::SmallInput) | ||
}); | ||
|
||
c.bench_function("vectorized_sum_iterator_batching-4", |b| { | ||
b.iter_batched(|| a.clone(), |a| vectorized_sum_iterator_batching::<4>(black_box(a)), criterion::BatchSize::SmallInput) | ||
}); | ||
|
||
c.bench_function("vectorized_sum_iterator_batching-8", |b| { | ||
b.iter_batched(|| a.clone(), |a| vectorized_sum_iterator_batching::<8>(black_box(a)), criterion::BatchSize::SmallInput) | ||
}); | ||
|
||
c.bench_function("vectorized_sum_iterator_batched-4", |b| { | ||
b.iter_batched(|| to_chunks(&a), |a| vectorized_sum_iterator_batched::<4>(black_box(a)), criterion::BatchSize::SmallInput) | ||
}); | ||
|
||
c.bench_function("vectorized_sum_iterator_batched-8", |b| { | ||
b.iter_batched(|| to_chunks(&a), |a| vectorized_sum_iterator_batched::<8>(black_box(a)), criterion::BatchSize::SmallInput) | ||
}); | ||
|
||
c.bench_function("vectorized_sum_iterator_batched_flatten-4", |b| { | ||
b.iter_batched(|| to_chunks(&a), |a| vectorized_sum_iterator_batched_flatten::<4>(black_box(a)), criterion::BatchSize::SmallInput) | ||
}); | ||
|
||
c.bench_function("vectorized_sum_iterator_batched_flatten-8", |b| { | ||
b.iter_batched(|| to_chunks(&a), |a| vectorized_sum_iterator_batched_flatten::<8>(black_box(a)), criterion::BatchSize::SmallInput) | ||
}); | ||
|
||
// Hydroflow | ||
c.bench_function("hf::vectorized_sum_iterator", |b| { | ||
b.iter_batched(|| a.clone(), |a| hf::vectorized_sum_iterator(black_box(a)), criterion::BatchSize::SmallInput) | ||
}); | ||
|
||
c.bench_function("hf::vectorized_sum_iterator_chunks-4", |b| { | ||
b.iter_batched(|| a.clone(), |a| hf::vectorized_sum_iterator_chunks::<4>(black_box(a)), criterion::BatchSize::SmallInput) | ||
}); | ||
|
||
c.bench_function("hf::vectorized_sum_iterator_chunks-8", |b| { | ||
b.iter_batched(|| a.clone(), |a| hf::vectorized_sum_iterator_chunks::<8>(black_box(a)), criterion::BatchSize::SmallInput) | ||
}); | ||
|
||
c.bench_function("hf::vectorized_sum_iterator_batching-4", |b| { | ||
b.iter_batched(|| a.clone(), |a| hf::vectorized_sum_iterator_batching::<4>(black_box(a)), criterion::BatchSize::SmallInput) | ||
}); | ||
|
||
c.bench_function("hf::vectorized_sum_iterator_batching-8", |b| { | ||
b.iter_batched(|| a.clone(), |a| hf::vectorized_sum_iterator_batching::<8>(black_box(a)), criterion::BatchSize::SmallInput) | ||
}); | ||
|
||
c.bench_function("hf::vectorized_sum_iterator_batched-4", |b| { | ||
b.iter_batched(|| to_chunks(&a), |a| hf::vectorized_sum_iterator_batched::<4>(black_box(a)), criterion::BatchSize::SmallInput) | ||
}); | ||
|
||
c.bench_function("hf::vectorized_sum_iterator_batched-8", |b| { | ||
b.iter_batched(|| to_chunks(&a), |a| hf::vectorized_sum_iterator_batched::<8>(black_box(a)), criterion::BatchSize::SmallInput) | ||
}); | ||
|
||
} | ||
|
||
criterion_group!(benches, vectorized_sum_benchmark); | ||
criterion_main!(benches); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
pub mod kmeans_hf; | ||
pub mod matrix_vector_multiply; | ||
pub mod matrix_vector_multiply; | ||
pub mod vectorized_sum; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
use hydroflow::hydroflow_syntax; | ||
|
||
pub fn vectorized_sum_iterator(a: Vec<f32>) -> f32 { | ||
|
||
let (output_send, output_recv) = hydroflow::util::unbounded_channel::<f32>(); | ||
|
||
let mut flow = hydroflow_syntax! { | ||
// Inputs | ||
a = source_iter(a); | ||
|
||
// Combine columns and vector elements | ||
result = a | ||
-> reduce(|acc: &mut f32, x: f32| { | ||
*acc += x; | ||
}); | ||
|
||
result | ||
-> for_each(|x| output_send.send(x).unwrap()); | ||
}; | ||
|
||
flow.run_available(); | ||
let res = hydroflow::util::collect_ready::<Vec<f32>,_>(output_recv); | ||
|
||
assert!(res.len() == 1); | ||
return res[0].clone(); | ||
} | ||
|
||
pub fn vectorized_sum_iterator_chunks<const CHUNK_SIZE: usize>(a: Vec<f32>) -> f32 { | ||
|
||
let (output_send, output_recv) = hydroflow::util::unbounded_channel::<f32>(); | ||
|
||
let mut flow = hydroflow_syntax! { | ||
// Inputs | ||
a = source_iter(a.chunks(CHUNK_SIZE)); | ||
|
||
// Combine columns and vector elements | ||
result = a | ||
-> map(|chunk| chunk.iter().sum::<f32>()) | ||
-> reduce(|acc: &mut f32, x: f32| { | ||
*acc += x; | ||
}); | ||
|
||
result | ||
-> for_each(|x| output_send.send(x).unwrap()); | ||
}; | ||
|
||
flow.run_available(); | ||
let res = hydroflow::util::collect_ready::<Vec<f32>,_>(output_recv); | ||
|
||
assert!(res.len() == 1); | ||
return res[0].clone(); | ||
} | ||
|
||
pub fn vectorized_sum_iterator_batching<const CHUNK_SIZE: usize>(a: Vec<f32>) -> f32 { | ||
|
||
let (output_send, output_recv) = hydroflow::util::unbounded_channel::<f32>(); | ||
let mut buf: [f32; CHUNK_SIZE] = [0.0; CHUNK_SIZE]; | ||
|
||
|
||
let mut flow = hydroflow_syntax! { | ||
// Inputs | ||
a = source_iter(a); | ||
|
||
// Combine columns and vector elements | ||
result = a -> enumerate() | ||
-> map(move |(i,f)|{ | ||
buf[i%CHUNK_SIZE] = f; | ||
if i%CHUNK_SIZE == CHUNK_SIZE-1 { | ||
Some(buf.clone()) | ||
} else { | ||
None | ||
} | ||
}) | ||
-> filter(|x| x.is_some()) | ||
-> map(|x| x.unwrap()) | ||
-> map(|chunk| chunk.iter().sum::<f32>()) | ||
-> reduce(|acc: &mut f32, x: f32| { | ||
*acc += x; | ||
}); | ||
|
||
|
||
result | ||
-> for_each(|x| output_send.send(x).unwrap()); | ||
}; | ||
|
||
flow.run_available(); | ||
let res = hydroflow::util::collect_ready::<Vec<f32>,_>(output_recv); | ||
|
||
assert!(res.len() == 1); | ||
return res[0].clone(); | ||
} | ||
|
||
pub fn vectorized_sum_iterator_batched<const CHUNK_SIZE: usize>(a: Vec<[f32; CHUNK_SIZE]>) -> f32 { | ||
|
||
let (output_send, output_recv) = hydroflow::util::unbounded_channel::<f32>(); | ||
|
||
let mut flow = hydroflow_syntax! { | ||
// Inputs | ||
a = source_iter(a); | ||
|
||
// Combine columns and vector elements | ||
result = a | ||
-> map(|chunk| chunk.iter().sum::<f32>()) | ||
-> reduce(|acc: &mut f32, x: f32| { | ||
*acc += x; | ||
}); | ||
|
||
result | ||
-> for_each(|x| output_send.send(x).unwrap()); | ||
}; | ||
|
||
flow.run_available(); | ||
let res = hydroflow::util::collect_ready::<Vec<f32>,_>(output_recv); | ||
|
||
assert!(res.len() == 1); | ||
return res[0].clone(); | ||
} |