Skip to content

Commit

Permalink
documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
mwlon committed Nov 9, 2024
1 parent f7d4344 commit 51cf3af
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 23 deletions.
20 changes: 10 additions & 10 deletions dtype_dispatch/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ macro_rules! build_dtype_macros {

impl $name {
#[inline]
pub fn new<S: $constraint>() -> Option<Self> {
let type_id = std::any::TypeId::of::<S>();
pub fn new<T: $constraint>() -> Option<Self> {
let type_id = std::any::TypeId::of::<T>();
$(
if type_id == std::any::TypeId::of::<$t>() {
return Some($name::$variant);
Expand All @@ -43,8 +43,8 @@ macro_rules! build_dtype_macros {

impl $name {
#[inline]
pub fn new<S: $constraint>() -> Option<Self> {
let type_id = std::any::TypeId::of::<S>();
pub fn new<T: $constraint>() -> Option<Self> {
let type_id = std::any::TypeId::of::<T>();
$(
if type_id == std::any::TypeId::of::<$t>() {
return Some($name::$variant);
Expand Down Expand Up @@ -125,26 +125,26 @@ macro_rules! build_dtype_macros {
None
}

pub fn downcast<S: $constraint>(self) -> Option<$container<S>> {
pub fn downcast<T: $constraint>(self) -> Option<$container<T>> {
match self {
$(
Self::$variant(inner) => inner.downcast::<S>(),
Self::$variant(inner) => inner.downcast::<T>(),
)+
}
}

pub fn downcast_ref<S: $constraint>(&self) -> Option<&$container<S>> {
pub fn downcast_ref<T: $constraint>(&self) -> Option<&$container<T>> {
match self {
$(
Self::$variant(inner) => inner.downcast_ref::<S>(),
Self::$variant(inner) => inner.downcast_ref::<T>(),
)+
}
}

pub fn downcast_mut<S: $constraint>(&mut self) -> Option<&mut $container<S>> {
pub fn downcast_mut<T: $constraint>(&mut self) -> Option<&mut $container<T>> {
match self {
$(
Self::$variant(inner) => inner.downcast_mut::<S>(),
Self::$variant(inner) => inner.downcast_mut::<T>(),
)+
}
}
Expand Down
12 changes: 5 additions & 7 deletions pco/src/data_types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,15 +134,16 @@ pub trait Latent:

/// *unstable API* Trait for data types supported for compression/decompression.
///
/// If you have a new data type you would like to add to the library or
/// implement as custom in your own, these are the questions you need to
/// answer:
/// If you have a new data type you would like to add to the library or,
/// these are the questions you need to answer:
/// * What is the corresponding latent type? This is probably the
/// smallest unsigned integer with enough bits to represent the number.
/// * How can I convert to this latent representation and back
/// in *a way that preserves ordering*? For instance, transmuting `f32` to `u32`
/// wouldn't preserve ordering and would cause pco to fail. In this example,
/// one needs to flip the sign bit and, if negative, the rest of the bits.
///
/// Custom data types (defined outside of pco) are not currently supported.
pub trait Number: Copy + Debug + Display + Default + PartialEq + Send + Sync + 'static {
/// A number from 1-255 that corresponds to the number's data type.
///
Expand All @@ -157,12 +158,9 @@ pub trait Number: Copy + Debug + Display + Default + PartialEq + Send + Sync + '
/// `pco` data type implementation.
const NUMBER_TYPE_BYTE: u8;

/// The latent this type can convert between to do
/// bitwise logic and such.
/// The latent this type can convert between to do bitwise logic and such.
type L: Latent;

/// Returns a `LatentDescriber` for each latent variable in the chunk
/// metadata.
fn get_latent_describers(meta: &ChunkMeta) -> PerLatentVar<LatentDescriber>;

fn mode_is_valid(mode: Mode) -> bool;
Expand Down
5 changes: 1 addition & 4 deletions pco/src/metadata/chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@ use crate::metadata::Mode;
pub struct ChunkMeta {
/// The formula `pco` used to compress each number at a low level.
pub mode: Mode,
/// How many times delta encoding was applied during compression.
/// This is between 0 and 7, inclusive.
///
/// See [`ChunkConfig`][crate::ChunkConfig] for more details.
/// How delta encoding was applied.
pub delta_encoding: DeltaEncoding,
/// Metadata about the interleaved streams needed by `pco` to
/// compress/decompress the inputs
Expand Down
11 changes: 9 additions & 2 deletions pco/src/metadata/delta_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@ impl DeltaLz77Config {
}
}

/// How Pco does
/// How Pco did
/// [delta encoding](https://en.wikipedia.org/wiki/Delta_encoding) on this
/// chunk.
///
/// Delta encoding optionally takes differences between nearby numbers,
/// greatly reducing the entropy of the data distribution in some cases.
/// This stage of processing happens after applying the
/// [`Mode`][crate::metadata::Mode].
/// [`Mode`][crate::metadata::Mode] during compression.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum DeltaEncoding {
/// No delta encoding; the values are encoded as-is.
Expand All @@ -51,11 +51,18 @@ pub enum DeltaEncoding {
/// Encodes the differences between consecutive values (or differences
/// between those, etc.).
///
/// This is best if your numbers have high variance overall, but adjacent
/// numbers are close in value, e.g. an arithmetic sequence.
///
/// This order is always positive, between 1 and 7.
/// For instance, 2nd order delta encoding is delta-of-deltas.
Consecutive(DeltaConsecutiveConfig),
/// Encodes an extra "lookback" latent variable and the differences
/// `x[i] - x[i - lookback[i]]` between values.
///
/// This is best if your numbers have complex repeating patterns
/// beyond just adjacent elements.
///
/// The `window_n_log` parameter specifies how large the maximum lookback
/// can be.
Lz77(DeltaLz77Config),
Expand Down
19 changes: 19 additions & 0 deletions pco/src/metadata/per_latent_var.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,25 @@
use std::fmt::Debug;
use std::iter::Sum;

/// The possible kinds of latent variables present in a chunk.
///
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum LatentVarKey {
/// Used by certain types of
/// [delta encodings][crate::metadata::DeltaEncoding]. E.g. Lz77 delta uses
/// this to store lookbacks.
Delta,
/// The only required latent variable, used by
/// [modes][crate::metadata::Mode] to represent number values.
///
/// Always has the same precision as the encoded numbers.
Primary,
/// An optional additional latent variable, used by certain
/// [modes][crate::metadata::Mode] to represent number values.
Secondary,
}

/// A generic container holding a value for each applicable latent variable.
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct PerLatentVar<T> {
pub delta: Option<T>,
Expand Down Expand Up @@ -63,6 +75,8 @@ impl<T> PerLatentVar<T> {
}
}

/// Returns a new `PerLatentVar` where each entry has been wrapped in a
/// reference.
pub fn as_ref(&self) -> PerLatentVar<&T> {
PerLatentVar {
delta: self.delta.as_ref(),
Expand All @@ -87,6 +101,9 @@ impl<T> PerLatentVar<T> {
}
}

/// Zips each element of this `PerLatentVar` with each element of the other.
///
/// Will panic if either one has a latent variable that the other does not.
pub fn zip_exact<S>(self, other: PerLatentVar<S>) -> PerLatentVar<(T, S)> {
let zip_option = |a: Option<T>, b: Option<S>| match (a, b) {
(Some(a), Some(b)) => Some((a, b)),
Expand All @@ -101,6 +118,8 @@ impl<T> PerLatentVar<T> {
}
}

/// Returns a vector of the defined `LatentVarKey`s and values, in order
/// of appearance in the file.
pub fn enumerated(self) -> Vec<(LatentVarKey, T)> {
let mut res = Vec::with_capacity(3);
if let Some(value) = self.delta {
Expand Down

0 comments on commit 51cf3af

Please sign in to comment.