Skip to content

Commit

Permalink
small unpacking & benchmark improvements (#190)
Browse files Browse the repository at this point in the history
```
~/g/vortex ❯❯❯ cargo bench --bench bitpacking                                                                                                             ✘ 130
   Compiling vortex-fastlanes v0.1.0 (/Users/will/git/vortex/vortex-fastlanes)
    Finished `bench` profile [optimized] target(s) in 3.29s
     Running benches/bitpacking.rs (target/release/deps/bitpacking-274b666c83950221)
Gnuplot not found, using plotters backend
bitpack_1M              time:   [55.836 µs 56.008 µs 56.193 µs]
                        change: [-1.3761% -0.8817% -0.3672%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 2 outliers among 100 measurements (2.00%)
  2 (2.00%) high mild

unpack_1M               time:   [216.54 µs 232.54 µs 251.50 µs]
                        change: [-9.0508% -1.5376% +6.7838%] (p = 0.71 > 0.05)
                        No change in performance detected.
Found 13 outliers among 100 measurements (13.00%)
  3 (3.00%) high mild
  10 (10.00%) high severe

unpack_1M_singles       time:   [2.2917 ms 2.3220 ms 2.3539 ms]
                        change: [-5.6770% -3.6532% -1.6082%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 1 outliers among 100 measurements (1.00%)
  1 (1.00%) high mild

unpack_1024_alloc       time:   [218.64 µs 232.55 µs 248.50 µs]
                        change: [-8.6257% -1.5167% +5.9634%] (p = 0.69 > 0.05)
                        No change in performance detected.
Found 12 outliers among 100 measurements (12.00%)
  4 (4.00%) high mild
  8 (8.00%) high severe

unpack_1024_noalloc     time:   [43.505 ns 43.651 ns 43.806 ns]
                        change: [+0.5580% +1.0303% +1.4821%] (p = 0.00 < 0.05)
                        Change within noise threshold.

unpack_single           time:   [4.9364 ns 4.9507 ns 4.9647 ns]
                        change: [+0.3408% +0.7604% +1.1548%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 1 outliers among 100 measurements (1.00%)
  1 (1.00%) high mild
```
  • Loading branch information
lwwmanning authored Apr 3, 2024
1 parent 1a5c2fe commit 70fbfef
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 18 deletions.
6 changes: 5 additions & 1 deletion vortex-fastlanes/benches/bitpacking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,12 @@ fn pack_unpack(c: &mut Criterion) {

// 1024 elements pack into `128 * bits` bytes
let packed_1024 = &packed[0..128 * bits];
c.bench_function("unpack_1024_alloc", |b| {
b.iter(|| black_box(unpack_primitive::<u32>(&packed, bits, values.len())));
});

let mut output: Vec<u32> = Vec::with_capacity(1024);
c.bench_function("unpack_1024", |b| {
c.bench_function("unpack_1024_noalloc", |b| {
b.iter(|| {
output.clear();
TryBitPack::try_unpack_into(packed_1024, bits, &mut output).unwrap();
Expand Down
36 changes: 19 additions & 17 deletions vortex-fastlanes/src/bitpacking/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,31 +212,33 @@ pub fn unpack_primitive<T: NativePType + TryBitPack>(
}

// How many fastlanes vectors we will process.
let num_chunks = length / 1024;
let num_chunks = (length + 1023) / 1024;
let bytes_per_chunk = 128 * bit_width;
assert_eq!(
packed.len(),
num_chunks * bytes_per_chunk,
"Invalid packed length: got {}, expected {}",
packed.len(),
num_chunks * bytes_per_chunk
);

// Allocate a result vector.
let mut output = Vec::with_capacity(length);

// Loop over all but the last chunk.
let bytes_per_chunk = 128 * bit_width;
let mut output = Vec::with_capacity(num_chunks * 1024);
// Loop over all the chunks.
(0..num_chunks).for_each(|i| {
let chunk: &[u8] = &packed[i * bytes_per_chunk..][0..bytes_per_chunk];
TryBitPack::try_unpack_into(chunk, bit_width, &mut output).unwrap();
});

// Handle the final chunk which may contain padding.
let last_chunk_size = length % 1024;
if last_chunk_size > 0 {
let mut last_output = Vec::with_capacity(1024);
TryBitPack::try_unpack_into(
&packed[num_chunks * bytes_per_chunk..],
bit_width,
&mut last_output,
)
.unwrap();
output.extend_from_slice(&last_output[..last_chunk_size]);
}
// The final chunk may have had padding
output.truncate(length);

// For small vectors, the overhead of rounding up is more noticable.
// Shrink to fit may or may not reallocate depending on the implementation.
// But for very small vectors, the reallocation is cheap enough even if it does happen.
if output.len() < 1024 {
output.shrink_to_fit();
}
output
}

Expand Down

0 comments on commit 70fbfef

Please sign in to comment.