Skip to content

Commit

Permalink
U64 words (#114)
Browse files Browse the repository at this point in the history
fixes decompression on 32 bit architectures
  • Loading branch information
mwlon authored Oct 29, 2023
1 parent 852b501 commit 4ce1182
Show file tree
Hide file tree
Showing 11 changed files with 113 additions and 100 deletions.
60 changes: 32 additions & 28 deletions pco/src/bit_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::cmp::min;
use std::mem;

use crate::bits;
use crate::constants::{Bitlen, BYTES_PER_WORD, WORD_BITLEN};
use crate::constants::Bitlen;
use crate::errors::{PcoError, PcoResult};
use crate::read_write_uint::ReadWriteUint;

Expand All @@ -16,49 +16,53 @@ pub fn make_extension_for(slice: &[u8], padding: usize) -> Vec<u8> {
res
}

// Q: Why u64?
// A: It's the largest data type most instruction sets have support for (and
// can do few-cycle/SIMD ops on). e.g. even 32-bit wasm has 64-bit ints and
// opcodes.
#[inline]
pub fn word_at(src: &[u8], byte_idx: usize) -> usize {
let raw_bytes = unsafe { *(src.as_ptr().add(byte_idx) as *const [u8; BYTES_PER_WORD]) };
usize::from_le_bytes(raw_bytes)
pub fn u64_at(src: &[u8], byte_idx: usize) -> u64 {
let raw_bytes = unsafe { *(src.as_ptr().add(byte_idx) as *const [u8; 8]) };
u64::from_le_bytes(raw_bytes)
}

#[inline]
pub fn read_uint_at<U: ReadWriteUint, const MAX_EXTRA_WORDS: usize>(
pub fn read_uint_at<U: ReadWriteUint, const MAX_EXTRA_U64S: usize>(
src: &[u8],
mut byte_idx: usize,
bits_past_byte: Bitlen,
n: Bitlen,
) -> U {
// Q: Why is this fast?
// A: The 0..MAX_EXTRA_WORDS can be unrolled at compile time and interact
// A: The 0..MAX_EXTRA_U64S can be unrolled at compile time and interact
// freely with an outer loop, allowing really fast SIMD stuff.
//
// Q: Why does this work?
// A: We set MAX_EXTRA_WORDS so that e.g. on 64 bit architectures,
// 0 to 57 bit reads -> 0 extra words
// 58 to 113 bit reads -> 1 extra words
// 113 to 128 bit reads -> 2 extra words
// During the 1st word (prior to the loop), we read all bytes from the
// current word. Due to our bit packing, up to the first 7 of these may
// A: We set MAX_EXTRA_U64S so that e.g. on 64 bit architectures,
// 0 to 57 bit reads -> 0 extra u64's
// 58 to 113 bit reads -> 1 extra u64's
// 113 to 128 bit reads -> 2 extra u64's
// During the 1st u64 (prior to the loop), we read all bytes from the
// current u64. Due to our bit packing, up to the first 7 of these may
// be useless, so we can read up to (64 - 7) = 57 bits safely from a
// single word. We right shift by only up to 7 bits, which is safe.
// single u64. We right shift by only up to 7 bits, which is safe.
//
// For the 2nd word, we skip only 7 bytes forward. This will overlap with
// the 1st word by 1 byte, which seems useless, but allows us to avoid one
// For the 2nd u64, we skip only 7 bytes forward. This will overlap with
// the 1st u64 by 1 byte, which seems useless, but allows us to avoid one
// nasty case: left shifting by U::BITS (a panic). This could happen e.g.
// with 64-bit reads when we start out byte-aligned (bits_past_byte=0).
//
// For the 3rd word and onward, we skip 8 bytes forward. Due to how we
// handled the 2nd word, the most we'll every need to shift by is
// For the 3rd u64 and onward, we skip 8 bytes forward. Due to how we
// handled the 2nd u64, the most we'll every need to shift by is
// precision - 8, which is safe.
let mut res = U::from_word(word_at(src, byte_idx) >> bits_past_byte);
let mut processed = min(n, WORD_BITLEN - 8 - bits_past_byte);
byte_idx += BYTES_PER_WORD - 1;

for _ in 0..MAX_EXTRA_WORDS {
res |= U::from_word(word_at(src, byte_idx)) << processed;
processed += WORD_BITLEN;
byte_idx += BYTES_PER_WORD;
let mut res = U::from_u64(u64_at(src, byte_idx) >> bits_past_byte);
let mut processed = min(n, 56 - bits_past_byte);
byte_idx += 7;

for _ in 0..MAX_EXTRA_U64S {
res |= U::from_u64(u64_at(src, byte_idx)) << processed;
processed += 64;
byte_idx += 8;
}

bits::lowest_bits(res, n)
Expand Down Expand Up @@ -201,7 +205,7 @@ impl<'a> BitReader<'a> {

pub fn read_uint<U: ReadWriteUint>(&mut self, n: Bitlen) -> U {
self.refill();
let res = match U::MAX_EXTRA_WORDS {
let res = match U::MAX_EXTRA_U64S {
0 => read_uint_at::<U, 0>(
self.current_stream,
self.stale_byte_idx,
Expand All @@ -221,8 +225,8 @@ impl<'a> BitReader<'a> {
n,
),
_ => panic!(
"[BitReader] data type too large (extra words {} > 2)",
U::MAX_EXTRA_WORDS
"[BitReader] data type too large (extra u64's {} > 2)",
U::MAX_EXTRA_U64S
),
};
self.consume(n);
Expand Down
39 changes: 21 additions & 18 deletions pco/src/bit_writer.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::io::Write;

use crate::bit_reader::word_at;
use crate::bit_reader::u64_at;
use crate::bits;
use crate::constants::{Bitlen, BYTES_PER_WORD, WORD_BITLEN};
use crate::constants::Bitlen;
use crate::errors::{PcoError, PcoResult};
use crate::read_write_uint::ReadWriteUint;

Expand All @@ -13,15 +13,15 @@ use crate::read_write_uint::ReadWriteUint;
// BitWriter (wrapping BitBuffer, generic to W) to reduce binary size

#[inline]
pub fn write_word_to(word: usize, byte_idx: usize, dst: &mut [u8]) {
pub fn write_u64_to(x: u64, byte_idx: usize, dst: &mut [u8]) {
unsafe {
let target = dst.as_mut_ptr().add(byte_idx) as *mut [u8; BYTES_PER_WORD];
*target = word.to_le_bytes();
let target = dst.as_mut_ptr().add(byte_idx) as *mut [u8; 8];
*target = x.to_le_bytes();
};
}

#[inline]
pub fn write_uint_to<U: ReadWriteUint, const MAX_EXTRA_WORDS: Bitlen>(
pub fn write_uint_to<U: ReadWriteUint, const MAX_EXTRA_U64S: Bitlen>(
x: U,
mut byte_idx: usize,
bits_past_byte: Bitlen,
Expand All @@ -30,15 +30,18 @@ pub fn write_uint_to<U: ReadWriteUint, const MAX_EXTRA_WORDS: Bitlen>(
) {
// See bit_reader for an explanation of why this is fast and how it works.
let x = bits::lowest_bits(x, n);
let word = word_at(dst, byte_idx) | (x.to_usize() << bits_past_byte);
write_word_to(word, byte_idx, dst);
let mut processed = WORD_BITLEN - 8 - bits_past_byte;
byte_idx += BYTES_PER_WORD - 1;

for _ in 0..MAX_EXTRA_WORDS {
write_word_to((x >> processed).to_usize(), byte_idx, dst);
processed += WORD_BITLEN;
byte_idx += BYTES_PER_WORD;
write_u64_to(
u64_at(dst, byte_idx) | (x.to_u64() << bits_past_byte),
byte_idx,
dst,
);
let mut processed = 56 - bits_past_byte;
byte_idx += 7;

for _ in 0..MAX_EXTRA_U64S {
write_u64_to((x >> processed).to_u64(), byte_idx, dst);
processed += 64;
byte_idx += 8;
}
}

Expand Down Expand Up @@ -98,7 +101,7 @@ impl<W: Write> BitWriter<W> {

pub fn write_uint<U: ReadWriteUint>(&mut self, x: U, n: Bitlen) {
self.refill();
match U::MAX_EXTRA_WORDS {
match U::MAX_EXTRA_U64S {
0 => write_uint_to::<U, 0>(
x,
self.stale_byte_idx,
Expand All @@ -121,8 +124,8 @@ impl<W: Write> BitWriter<W> {
&mut self.buf,
),
_ => panic!(
"[BitWriter] data type too large (extra words {} > 2)",
U::MAX_EXTRA_WORDS
"[BitWriter] data type too large (extra u64's {} > 2)",
U::MAX_EXTRA_U64S
),
}
self.consume(n);
Expand Down
6 changes: 3 additions & 3 deletions pco/src/bits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ use crate::data_types::UnsignedLike;
use crate::read_write_uint::ReadWriteUint;

#[inline]
pub fn lowest_bits<U: ReadWriteUint>(word: U, n: Bitlen) -> U {
pub fn lowest_bits<U: ReadWriteUint>(x: U, n: Bitlen) -> U {
if n >= U::BITS {
word
x
} else {
word & ((U::ONE << n) - U::ONE)
x & ((U::ONE << n) - U::ONE)
}
}

Expand Down
8 changes: 2 additions & 6 deletions pco/src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ pub const BITS_TO_ENCODE_N_BINS: Bitlen = 15;

// padding
pub const HEADER_PADDING: usize = 1;
pub const OVERSHOOT_PADDING: usize = MAX_SUPPORTED_PRECISION_BYTES + BYTES_PER_WORD + 1;
// + 9 because we might read an extra u64 (8 bytes), plus 1 for good measure
pub const OVERSHOOT_PADDING: usize = MAX_SUPPORTED_PRECISION_BYTES + 9;
// Chunk meta padding is enough for one full batch of bins; this should also
// generously cover the data needed to read the other parts of chunk meta.
pub const CHUNK_META_PADDING: usize =
Expand All @@ -31,11 +32,6 @@ pub const PAGE_LATENT_META_PADDING: usize =
pub const PAGE_PADDING: usize =
FULL_BATCH_SIZE * (MAX_SUPPORTED_PRECISION_BYTES + MAX_ANS_BYTES) + OVERSHOOT_PADDING;

// native architecture info
pub const WORD_SIZE: usize = usize::BITS as usize;
pub const WORD_BITLEN: Bitlen = usize::BITS as Bitlen;
pub const BYTES_PER_WORD: usize = WORD_SIZE / 8;

// cutoffs and legal parameter values
pub const AUTO_DELTA_LIMIT: usize = 1100;
pub const MAX_ANS_BITS: Bitlen = 14;
Expand Down
1 change: 0 additions & 1 deletion pco/src/data_types/floats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ macro_rules! impl_float_number {

impl NumberLike for $t {
const DTYPE_BYTE: u8 = $header_byte;
const PHYSICAL_BITS: usize = $bits;
const IS_FLOAT: bool = true;

type Unsigned = $unsigned;
Expand Down
5 changes: 2 additions & 3 deletions pco/src/data_types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,12 @@ pub trait UnsignedLike:

/// Converts a `usize` into this type. Panics if the conversion is
/// impossible.
fn from_word(word: usize) -> Self;
fn from_u64(x: u64) -> Self;

fn leading_zeros(self) -> Bitlen;

/// Converts the unsigned integer to a usize, truncating higher bits if necessary.
fn to_usize(self) -> usize;
fn to_u64(self) -> u64;

fn wrapping_add(self, other: Self) -> Self;
fn wrapping_sub(self, other: Self) -> Self;
Expand Down Expand Up @@ -142,7 +142,6 @@ pub trait NumberLike: Copy + Debug + Display + Default + PartialEq + 'static {
/// This must match the number of bytes in the `to_bytes` and `from_bytes`
/// implementations.
/// Note that booleans have 8 physical bits (not 1).
const PHYSICAL_BITS: usize;
const IS_FLOAT: bool = false;

/// The unsigned integer this type can convert between to do
Expand Down
1 change: 0 additions & 1 deletion pco/src/data_types/signeds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ macro_rules! impl_signed {
($t: ty, $unsigned: ty, $header_byte: expr) => {
impl NumberLike for $t {
const DTYPE_BYTE: u8 = $header_byte;
const PHYSICAL_BITS: usize = Self::BITS as usize;

type Unsigned = $unsigned;

Expand Down
9 changes: 4 additions & 5 deletions pco/src/data_types/unsigneds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ macro_rules! impl_unsigned {
type Float = $float;

#[inline]
fn from_word(word: usize) -> Self {
word as Self
fn from_u64(x: u64) -> Self {
x as Self
}

#[inline]
Expand All @@ -23,8 +23,8 @@ macro_rules! impl_unsigned {
}

#[inline]
fn to_usize(self) -> usize {
self as usize
fn to_u64(self) -> u64 {
self as u64
}

#[inline]
Expand Down Expand Up @@ -94,7 +94,6 @@ macro_rules! impl_unsigned_number {
($t: ty, $signed: ty, $float: ty, $header_byte: expr) => {
impl NumberLike for $t {
const DTYPE_BYTE: u8 = $header_byte;
const PHYSICAL_BITS: usize = Self::BITS as usize;

type Unsigned = Self;

Expand Down
Loading

0 comments on commit 4ce1182

Please sign in to comment.