From 29a9fcfecfe27f78c969a263f43150f7cc4f660e Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 9 Nov 2024 16:13:18 -0500 Subject: [PATCH] Delta lookback (#248) --- docs/benchmark_results/mbp_m3_max.csv | 6 +- dtype_dispatch/src/lib.rs | 41 +- images/real_world_compression_ratio.svg | 34 +- images/real_world_compression_speed.svg | 74 +-- images/real_world_decompression_speed.svg | 67 ++- pco/assets/v0_4_0_lookback_delta.pco | Bin 0 -> 120 bytes pco/src/bit_reader.rs | 5 + pco/src/bit_writer.rs | 9 + pco/src/chunk_config.rs | 5 + pco/src/compression_intermediates.rs | 31 +- pco/src/compression_table.rs | 2 +- pco/src/constants.rs | 8 +- pco/src/data_types/dynamic.rs | 9 +- pco/src/data_types/floats.rs | 11 +- pco/src/data_types/mod.rs | 42 +- pco/src/data_types/signeds.rs | 11 +- pco/src/data_types/unsigneds.rs | 16 +- pco/src/delta.rs | 375 +++++++++++- pco/src/describers.rs | 175 ++++-- pco/src/float_mult_utils.rs | 21 +- pco/src/float_quant_utils.rs | 58 +- pco/src/int_mult_utils.rs | 29 +- pco/src/latent_batch_dissector.rs | 8 +- pco/src/latent_chunk_compressor.rs | 85 +-- ...pressor.rs => latent_page_decompressor.rs} | 103 +++- pco/src/lib.rs | 12 +- pco/src/macros.rs | 10 +- pco/src/metadata/bin.rs | 1 - pco/src/metadata/chunk.rs | 249 ++++---- pco/src/metadata/chunk_latent_var.rs | 48 +- pco/src/metadata/delta_encoding.rs | 240 +++++++- pco/src/metadata/dyn_latents.rs | 7 + pco/src/metadata/format_version.rs | 4 + pco/src/metadata/mod.rs | 5 +- pco/src/metadata/mode.rs | 151 ++++- pco/src/metadata/page.rs | 48 +- pco/src/metadata/page_latent_var.rs | 27 +- pco/src/metadata/per_latent_var.rs | 149 +++++ pco/src/split_latents.rs | 7 + pco/src/standalone/compressor.rs | 10 +- pco/src/tests/compatibility.rs | 237 ++++---- pco/src/tests/recovery.rs | 26 +- pco/src/tests/stability.rs | 24 +- pco/src/wrapped/chunk_compressor.rs | 537 +++++++++++------- pco/src/wrapped/file_compressor.rs | 2 +- pco/src/wrapped/file_decompressor.rs | 12 +- pco/src/wrapped/guarantee.rs | 27 +- pco/src/wrapped/page_decompressor.rs | 314 +++++----- pco_cli/src/dtypes.rs | 4 + pco_cli/src/input/mod.rs | 6 + pco_cli/src/inspect/handler.rs | 53 +- pco_python/README.md | 2 +- pco_python/src/config.rs | 6 + pco_python/src/wrapped/compressor.rs | 44 +- pco_python/test/test_standalone.py | 38 +- 55 files changed, 2391 insertions(+), 1134 deletions(-) create mode 100644 pco/assets/v0_4_0_lookback_delta.pco rename pco/src/{latent_batch_decompressor.rs => latent_page_decompressor.rs} (74%) create mode 100644 pco/src/metadata/per_latent_var.rs create mode 100644 pco/src/split_latents.rs diff --git a/docs/benchmark_results/mbp_m3_max.csv b/docs/benchmark_results/mbp_m3_max.csv index f586941b..b7ac8f22 100644 --- a/docs/benchmark_results/mbp_m3_max.csv +++ b/docs/benchmark_results/mbp_m3_max.csv @@ -1,16 +1,16 @@ input,codec,compress_dt,decompress_dt,compressed_size air_quality,blosc:cname=zstd:level=3,0.104805835,0.028497249,8429280 air_quality,parquet:compression=zstd1,0.2296,0.02594,11156819 -air_quality,pco,0.10299417,0.021796916,4283105 +air_quality,pco,0.11510511,0.022027887,4283153 air_quality,spdp,0.09585233,0.10599508,22560633 air_quality,tpfor,0.029559456,0.002875309,19114630 r_place,blosc:cname=zstd:level=3,11.372147,3.3396413,976973046 r_place,parquet:compression=zstd1,15.389868,1.9258637,961718183 -r_place,pco,10.476611,1.5533803,661664577 +r_place,pco,11.368066,1.5747843,661665164 r_place,spdp,11.464923,12.225844,3306514546 r_place,tpfor,2.5569496,0.5739353,2023272462 taxi,blosc:cname=zstd:level=3,6.3025703,1.8824589,841110760 taxi,parquet:compression=zstd1,5.600355,0.9174722,464867099 -taxi,pco,5.3043575,0.8830483,333004373 +taxi,pco,5.6141233,0.8759089,333004631 taxi,spdp,4.5603795,5.0574136,1636214754 taxi,tpfor,1.0123023,0.24857067,1452549995 \ No newline at end of file diff --git a/dtype_dispatch/src/lib.rs b/dtype_dispatch/src/lib.rs index 77a185e1..a403ec41 100644 --- a/dtype_dispatch/src/lib.rs +++ b/dtype_dispatch/src/lib.rs @@ -1,4 +1,5 @@ #![doc = include_str!("../README.md")] +#![allow(unreachable_patterns)] /// Produces two macros: an enum definer and an enum matcher. /// @@ -15,17 +16,38 @@ macro_rules! build_dtype_macros { ) => { $(#[$definer_attrs])* macro_rules! $definer { + (#[$enum_attrs: meta] $vis: vis $name: ident) => { + #[$enum_attrs] + #[non_exhaustive] + $vis enum $name { + $($variant,)+ + } + + impl $name { + #[inline] + pub fn new() -> Option { + let type_id = std::any::TypeId::of::(); + $( + if type_id == std::any::TypeId::of::<$t>() { + return Some($name::$variant); + } + )+ + None + } + } + }; (#[$enum_attrs: meta] #[repr($desc_t: ty)] $vis: vis $name: ident = $desc_val: ident) => { #[$enum_attrs] #[repr($desc_t)] + #[non_exhaustive] $vis enum $name { $($variant = <$t>::$desc_val,)+ } impl $name { #[inline] - pub fn new() -> Option { - let type_id = std::any::TypeId::of::(); + pub fn new() -> Option { + let type_id = std::any::TypeId::of::(); $( if type_id == std::any::TypeId::of::<$t>() { return Some($name::$variant); @@ -50,6 +72,7 @@ macro_rules! build_dtype_macros { } #[$enum_attrs] + #[non_exhaustive] $vis enum $name { $($variant($container<$t>),)+ } @@ -106,26 +129,26 @@ macro_rules! build_dtype_macros { None } - pub fn downcast(self) -> Option<$container> { + pub fn downcast(self) -> Option<$container> { match self { $( - Self::$variant(inner) => inner.downcast::(), + Self::$variant(inner) => inner.downcast::(), )+ } } - pub fn downcast_ref(&self) -> Option<&$container> { + pub fn downcast_ref(&self) -> Option<&$container> { match self { $( - Self::$variant(inner) => inner.downcast_ref::(), + Self::$variant(inner) => inner.downcast_ref::(), )+ } } - pub fn downcast_mut(&mut self) -> Option<&mut $container> { + pub fn downcast_mut(&mut self) -> Option<&mut $container> { match self { $( - Self::$variant(inner) => inner.downcast_mut::(), + Self::$variant(inner) => inner.downcast_mut::(), )+ } } @@ -141,6 +164,7 @@ macro_rules! build_dtype_macros { type $generic = $t; $block })+ + _ => unreachable!() } }; ($value: expr, $enum_: ident<$generic: ident>($inner: ident) => $block: block) => { @@ -149,6 +173,7 @@ macro_rules! build_dtype_macros { type $generic = $t; $block })+ + _ => unreachable!() } }; } diff --git a/images/real_world_compression_ratio.svg b/images/real_world_compression_ratio.svg index 286e00b8..df793534 100644 --- a/images/real_world_compression_ratio.svg +++ b/images/real_world_compression_ratio.svg @@ -8,9 +8,9 @@ - - - + + + @@ -132,7 +132,7 @@ - + @@ -185,11 +185,11 @@ - - - + + + - + @@ -204,7 +204,7 @@ - + @@ -229,11 +229,11 @@ - - - + + + - + @@ -245,7 +245,7 @@ - + @@ -273,13 +273,13 @@ - + - + - + diff --git a/images/real_world_compression_speed.svg b/images/real_world_compression_speed.svg index faab9f86..ac2caf9e 100644 --- a/images/real_world_compression_speed.svg +++ b/images/real_world_compression_speed.svg @@ -8,9 +8,9 @@ - - - + + + @@ -59,17 +59,17 @@ - + - - + + - - + + - + @@ -79,7 +79,10 @@ - + + + + @@ -87,26 +90,28 @@ - - - + + + - + - - - - - + + + - + - + + + + + @@ -114,24 +119,21 @@ - - - + + + - - - - - - - + + + + - + - + @@ -139,13 +141,13 @@ - + - + - + diff --git a/images/real_world_decompression_speed.svg b/images/real_world_decompression_speed.svg index 858dd879..d8c30c3a 100644 --- a/images/real_world_decompression_speed.svg +++ b/images/real_world_decompression_speed.svg @@ -8,9 +8,9 @@ - - - + + + @@ -61,30 +61,30 @@ - + - - + + - - + + - + - - + - + + @@ -95,27 +95,26 @@ - - - + + + - - - - - + - - + + - + + + + - + @@ -127,23 +126,23 @@ - - - + + + - - + + - + - + @@ -152,13 +151,13 @@ - + - + - + diff --git a/pco/assets/v0_4_0_lookback_delta.pco b/pco/assets/v0_4_0_lookback_delta.pco new file mode 100644 index 0000000000000000000000000000000000000000..de749ba1f4894cded5d05261f8f02c6c96423267 GIT binary patch literal 120 zcmXR&&R1mO{Kd@poS8v^a}EQ800RSq!w*p)$sohP5VE}Lcg0^IyMf_=!rJ=_*g>)r zZ|kx!Fch$X)N~xb|DK(pq3Muu!{*J#`s>%~e$d|M+ZvDpRKehIev-?))@3V|4J&_2 S*xxK)v{sr4DEyC+fdK$5@+IB? literal 0 HcmV?d00001 diff --git a/pco/src/bit_reader.rs b/pco/src/bit_reader.rs index ca8eb5b1..1e2756e4 100644 --- a/pco/src/bit_reader.rs +++ b/pco/src/bit_reader.rs @@ -149,6 +149,7 @@ impl<'a> BitReader<'a> { self.consume(n); res } + pub unsafe fn read_usize(&mut self, n: Bitlen) -> usize { self.read_uint(n) } @@ -157,6 +158,10 @@ impl<'a> BitReader<'a> { self.read_uint(n) } + pub unsafe fn read_bool(&mut self) -> bool { + self.read_uint::(1) > 0 + } + // checks in bounds and returns bit idx #[inline] fn bit_idx_safe(&self) -> PcoResult { diff --git a/pco/src/bit_writer.rs b/pco/src/bit_writer.rs index 9fdee2a6..6e0df0eb 100644 --- a/pco/src/bit_writer.rs +++ b/pco/src/bit_writer.rs @@ -129,6 +129,10 @@ impl BitWriter { self.write_uint(x, n) } + pub unsafe fn write_bool(&mut self, b: bool) { + self.write_uint(b as u32, 1) + } + pub fn finish_byte(&mut self) { self.stale_byte_idx += self.bits_past_byte.div_ceil(8) as usize; self.bits_past_byte = 0; @@ -153,6 +157,11 @@ impl BitWriter { pub fn into_inner(self) -> W { self.dst } + + #[cfg(test)] + pub fn bit_idx(&self) -> usize { + self.stale_byte_idx * 8 + self.bits_past_byte as usize + } } #[cfg(test)] diff --git a/pco/src/chunk_config.rs b/pco/src/chunk_config.rs index 8c755f15..0865715a 100644 --- a/pco/src/chunk_config.rs +++ b/pco/src/chunk_config.rs @@ -63,6 +63,11 @@ pub enum DeltaSpec { /// deltas-of-deltas, etc. /// It is legal to use 0th order, but it is identical to `None`. TryConsecutive(usize), + /// Tries delta encoding according to an extra latent variable of "lookback". + /// + /// This can improve compression ratio when there are nontrivial patterns in + /// your numbers, but reduces compression speed substantially. + TryLookback, } // TODO consider adding a "lossiness" spec that allows dropping secondary latent diff --git a/pco/src/compression_intermediates.rs b/pco/src/compression_intermediates.rs index 89acb558..fde097f4 100644 --- a/pco/src/compression_intermediates.rs +++ b/pco/src/compression_intermediates.rs @@ -1,30 +1,45 @@ use crate::ans::{AnsState, Symbol}; use crate::constants::{Bitlen, Weight, ANS_INTERLEAVING}; use crate::data_types::{Latent, Number}; -use crate::metadata::Mode; +use crate::delta::DeltaState; +use crate::metadata::per_latent_var::{LatentVarKey, PerLatentVar}; +use crate::metadata::{DynLatents, Mode}; +use crate::split_latents::SplitLatents; +use std::ops::Range; + +#[derive(Clone, Debug)] +pub struct PageInfoVar { + pub delta_state: DeltaState, + pub range: Range, +} #[derive(Clone, Debug)] pub struct PageInfo { pub page_n: usize, - pub start_idx: usize, - pub end_idx_per_var: Vec, + pub per_latent_var: PerLatentVar, +} + +impl PageInfo { + pub fn range_for_latent_var(&self, key: LatentVarKey) -> Range { + self.per_latent_var.get(key).unwrap().range.clone() + } } #[derive(Clone, Debug)] -pub struct DissectedPageVar { +pub struct DissectedPageVar { // These vecs should have the same length. pub ans_vals: Vec, pub ans_bits: Vec, - pub offsets: Vec, + pub offsets: DynLatents, pub offset_bits: Vec, pub ans_final_states: [AnsState; ANS_INTERLEAVING], } #[derive(Clone, Debug)] -pub struct DissectedPage { +pub struct DissectedPage { pub page_n: usize, - pub per_latent_var: Vec>, // one per latent variable + pub per_latent_var: PerLatentVar, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -58,5 +73,5 @@ pub(crate) struct Bid { // information (inv_base) not captured entirely in the mode. This extra // information is an implementation detail of the compressor, not part of the // format itself, and is not / does not need to be known to the decompressor. - pub split_fn: Box Vec>>, + pub split_fn: Box SplitLatents>, } diff --git a/pco/src/compression_table.rs b/pco/src/compression_table.rs index acb9d0da..0b1b9c7e 100644 --- a/pco/src/compression_table.rs +++ b/pco/src/compression_table.rs @@ -1,7 +1,7 @@ use crate::compression_intermediates::BinCompressionInfo; use crate::data_types::Latent; -#[derive(Debug, Clone)] +#[derive(Clone, Debug)] pub struct CompressionTable { pub search_size_log: usize, pub search_lowers: Vec, diff --git a/pco/src/constants.rs b/pco/src/constants.rs index 853a952f..d010b363 100644 --- a/pco/src/constants.rs +++ b/pco/src/constants.rs @@ -6,14 +6,18 @@ pub(crate) type Bitlen = u32; // must be u32 or larger // exposed in public API pub(crate) type Weight = u32; +pub(crate) type DeltaLookback = u32; // compatibility -pub const CURRENT_FORMAT_VERSION: u8 = 2; +pub const CURRENT_FORMAT_VERSION: u8 = 3; // bit lengths pub const BITS_TO_ENCODE_ANS_SIZE_LOG: Bitlen = 4; +pub const BITS_TO_ENCODE_MODE_VARIANT: Bitlen = 4; +pub const BITS_TO_ENCODE_DELTA_ENCODING_VARIANT: Bitlen = 4; pub const BITS_TO_ENCODE_DELTA_ENCODING_ORDER: Bitlen = 3; -pub const BITS_TO_ENCODE_MODE: Bitlen = 4; +pub const BITS_TO_ENCODE_LZ_DELTA_WINDOW_N_LOG: Bitlen = 5; +pub const BITS_TO_ENCODE_LZ_DELTA_STATE_N_LOG: Bitlen = 4; pub const BITS_TO_ENCODE_N_BINS: Bitlen = 15; // conservative: wide enough to support quantizing float datasets with 255 unused bits of precision pub const BITS_TO_ENCODE_QUANTIZE_K: Bitlen = 8; diff --git a/pco/src/data_types/dynamic.rs b/pco/src/data_types/dynamic.rs index f6d8d2c5..348d98b5 100644 --- a/pco/src/data_types/dynamic.rs +++ b/pco/src/data_types/dynamic.rs @@ -1,8 +1,13 @@ -use crate::data_types::Number; -use crate::macros::define_number_enum; +use crate::data_types::{Latent, Number}; +use crate::macros::{define_latent_enum, define_number_enum}; define_number_enum!( #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[repr(u8)] pub NumberType = NUMBER_TYPE_BYTE ); + +define_latent_enum!( + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub LatentType +); diff --git a/pco/src/data_types/floats.rs b/pco/src/data_types/floats.rs index 707e4fed..d27536ac 100644 --- a/pco/src/data_types/floats.rs +++ b/pco/src/data_types/floats.rs @@ -10,7 +10,8 @@ use crate::data_types::{split_latents_classic, Float, Latent, Number}; use crate::describers::LatentDescriber; use crate::errors::{PcoError, PcoResult}; use crate::float_mult_utils::FloatMultConfig; -use crate::metadata::{ChunkMeta, Mode}; +use crate::metadata::per_latent_var::PerLatentVar; +use crate::metadata::{ChunkMeta, DynLatents, Mode}; use crate::{describers, float_mult_utils, float_quant_utils, sampling, ChunkConfig}; fn filter_sample(num: &F) -> Option { @@ -28,7 +29,7 @@ fn filter_sample(num: &F) -> Option { fn choose_mode_and_split_latents( nums: &[F], chunk_config: &ChunkConfig, -) -> PcoResult> { +) -> PcoResult { match chunk_config.mode_spec { ModeSpec::Auto => { // up to 3 bids: classic, float mult, float quant modes @@ -318,7 +319,7 @@ macro_rules! impl_float_number { type L = $latent; - fn get_latent_describers(meta: &ChunkMeta) -> Vec> { + fn get_latent_describers(meta: &ChunkMeta) -> PerLatentVar { describers::match_classic_mode::(meta, " ULPs") .or_else(|| describers::match_float_modes::(meta)) .expect("invalid mode for float type") @@ -338,7 +339,7 @@ macro_rules! impl_float_number { fn choose_mode_and_split_latents( nums: &[Self], config: &ChunkConfig, - ) -> PcoResult> { + ) -> PcoResult { choose_mode_and_split_latents(nums, config) } @@ -363,7 +364,7 @@ macro_rules! impl_float_number { mem_layout ^ $sign_bit_mask } } - fn join_latents(mode: Mode, primary: &mut [Self::L], secondary: &[Self::L]) { + fn join_latents(mode: Mode, primary: &mut [Self::L], secondary: Option<&DynLatents>) { match mode { Mode::Classic => (), Mode::FloatMult(dyn_latent) => { diff --git a/pco/src/data_types/mod.rs b/pco/src/data_types/mod.rs index 1b564d1c..0e58eff0 100644 --- a/pco/src/data_types/mod.rs +++ b/pco/src/data_types/mod.rs @@ -5,12 +5,15 @@ use std::ops::{ Rem, RemAssign, Shl, Shr, Sub, SubAssign, }; -pub use dynamic::NumberType; +pub use dynamic::{LatentType, NumberType}; use crate::constants::Bitlen; use crate::describers::LatentDescriber; use crate::errors::PcoResult; +use crate::metadata::dyn_latents::DynLatents; +use crate::metadata::per_latent_var::PerLatentVar; use crate::metadata::{ChunkMeta, Mode}; +use crate::split_latents::SplitLatents; use crate::ChunkConfig; mod dynamic; @@ -18,7 +21,7 @@ mod floats; mod signeds; mod unsigneds; -pub(crate) type ModeAndLatents = (Mode, Vec>); +pub(crate) type ModeAndLatents = (Mode, SplitLatents); /// This is used internally for compressing and decompressing with /// float modes. @@ -74,7 +77,7 @@ pub(crate) trait Float: fn from_latent_numerical(l: Self::L) -> Self; } -/// *unstable API* Trait for data types that behave like unsigned integers. +/// **unstable API** Trait for data types that behave like unsigned integers. /// /// This is used extensively in `pco` to guarantee that bitwise /// operations like `>>` and `|=` are available and that certain properties @@ -129,17 +132,18 @@ pub trait Latent: } } -/// *unstable API* Trait for data types supported for compression/decompression. +/// **unstable API** Trait for data types supported for compression/decompression. /// -/// If you have a new data type you would like to add to the library or -/// implement as custom in your own, these are the questions you need to -/// answer: +/// If you have a new data type you would like to add to the library or, +/// these are the questions you need to answer: /// * What is the corresponding latent type? This is probably the /// smallest unsigned integer with enough bits to represent the number. /// * How can I convert to this latent representation and back /// in *a way that preserves ordering*? For instance, transmuting `f32` to `u32` /// wouldn't preserve ordering and would cause pco to fail. In this example, /// one needs to flip the sign bit and, if negative, the rest of the bits. +/// +/// Custom data types (defined outside of pco) are not currently supported. pub trait Number: Copy + Debug + Display + Default + PartialEq + Send + Sync + 'static { /// A number from 1-255 that corresponds to the number's data type. /// @@ -154,13 +158,10 @@ pub trait Number: Copy + Debug + Display + Default + PartialEq + Send + Sync + ' /// `pco` data type implementation. const NUMBER_TYPE_BYTE: u8; - /// The latent this type can convert between to do - /// bitwise logic and such. + /// The latent this type can convert between to do bitwise logic and such. type L: Latent; - /// Returns a `LatentDescriber` for each latent variable in the chunk - /// metadata. - fn get_latent_describers(meta: &ChunkMeta) -> Vec>; + fn get_latent_describers(meta: &ChunkMeta) -> PerLatentVar; fn mode_is_valid(mode: Mode) -> bool; /// Breaks the numbers into latent variables for better compression. @@ -168,21 +169,26 @@ pub trait Number: Copy + Debug + Display + Default + PartialEq + Send + Sync + ' /// Returns /// * mode: the [`Mode`] that will be stored alongside the data /// for decompression - /// * latents: a list of latent variables, each of which contains a latent per - /// num in `nums` + /// * latents: a primary and optionally secondary latent variable, each of + /// which contains a latent per num in `nums`. Primary must be of the same + /// latent type as T. fn choose_mode_and_split_latents( nums: &[Self], config: &ChunkConfig, - ) -> PcoResult>; + ) -> PcoResult; fn from_latent_ordered(l: Self::L) -> Self; fn to_latent_ordered(self) -> Self::L; - fn join_latents(mode: Mode, primary: &mut [Self::L], secondary: &[Self::L]); + fn join_latents(mode: Mode, primary: &mut [Self::L], secondary: Option<&DynLatents>); fn transmute_to_latents(slice: &mut [Self]) -> &mut [Self::L]; fn transmute_to_latent(self) -> Self::L; } -pub(crate) fn split_latents_classic(nums: &[T]) -> Vec> { - vec![nums.iter().map(|&x| x.to_latent_ordered()).collect()] +pub(crate) fn split_latents_classic(nums: &[T]) -> SplitLatents { + let primary = DynLatents::new(nums.iter().map(|&x| x.to_latent_ordered()).collect()).unwrap(); + SplitLatents { + primary, + secondary: None, + } } diff --git a/pco/src/data_types/signeds.rs b/pco/src/data_types/signeds.rs index 692ac8b6..211d7c1d 100644 --- a/pco/src/data_types/signeds.rs +++ b/pco/src/data_types/signeds.rs @@ -3,7 +3,8 @@ use std::mem; use crate::data_types::{unsigneds, ModeAndLatents, Number}; use crate::describers::LatentDescriber; use crate::errors::PcoResult; -use crate::metadata::{ChunkMeta, Mode}; +use crate::metadata::per_latent_var::PerLatentVar; +use crate::metadata::{ChunkMeta, DynLatents, Mode}; use crate::{describers, int_mult_utils, ChunkConfig}; macro_rules! impl_signed { @@ -13,9 +14,9 @@ macro_rules! impl_signed { type L = $latent; - fn get_latent_describers(meta: &ChunkMeta) -> Vec> { + fn get_latent_describers(meta: &ChunkMeta) -> PerLatentVar { describers::match_classic_mode::(meta, "") - .or_else(|| describers::match_int_modes(meta, true)) + .or_else(|| describers::match_int_modes::(meta, true)) .expect("invalid mode for signed type") } @@ -29,7 +30,7 @@ macro_rules! impl_signed { fn choose_mode_and_split_latents( nums: &[Self], config: &ChunkConfig, - ) -> PcoResult> { + ) -> PcoResult { unsigneds::choose_mode_and_split_latents(&nums, config) } @@ -41,7 +42,7 @@ macro_rules! impl_signed { fn to_latent_ordered(self) -> Self::L { self.wrapping_sub(Self::MIN) as $latent } - fn join_latents(mode: Mode, primary: &mut [Self::L], secondary: &[Self::L]) { + fn join_latents(mode: Mode, primary: &mut [Self::L], secondary: Option<&DynLatents>) { match mode { Mode::Classic => (), Mode::IntMult(dyn_latent) => { diff --git a/pco/src/data_types/unsigneds.rs b/pco/src/data_types/unsigneds.rs index 04d351fb..ccac4929 100644 --- a/pco/src/data_types/unsigneds.rs +++ b/pco/src/data_types/unsigneds.rs @@ -1,16 +1,16 @@ +use super::ModeAndLatents; use crate::constants::Bitlen; use crate::data_types::{split_latents_classic, Latent, Number}; use crate::describers::LatentDescriber; use crate::errors::{PcoError, PcoResult}; -use crate::metadata::{ChunkMeta, DynLatent, Mode}; +use crate::metadata::per_latent_var::PerLatentVar; +use crate::metadata::{ChunkMeta, DynLatent, DynLatents, Mode}; use crate::{describers, int_mult_utils, ChunkConfig, ModeSpec}; -use super::ModeAndLatents; - pub fn choose_mode_and_split_latents( nums: &[T], config: &ChunkConfig, -) -> PcoResult> { +) -> PcoResult { match config.mode_spec { ModeSpec::Auto => { if let Some(base) = int_mult_utils::choose_base(nums) { @@ -83,9 +83,9 @@ macro_rules! impl_unsigned_number { type L = Self; - fn get_latent_describers(meta: &ChunkMeta) -> Vec> { + fn get_latent_describers(meta: &ChunkMeta) -> PerLatentVar { describers::match_classic_mode::(meta, "") - .or_else(|| describers::match_int_modes(meta, false)) + .or_else(|| describers::match_int_modes::(meta, false)) .expect("invalid mode for unsigned type") } @@ -99,7 +99,7 @@ macro_rules! impl_unsigned_number { fn choose_mode_and_split_latents( nums: &[Self], config: &ChunkConfig, - ) -> PcoResult> { + ) -> PcoResult { choose_mode_and_split_latents(nums, config) } @@ -111,7 +111,7 @@ macro_rules! impl_unsigned_number { fn to_latent_ordered(self) -> Self::L { self } - fn join_latents(mode: Mode, primary: &mut [Self::L], secondary: &[Self::L]) { + fn join_latents(mode: Mode, primary: &mut [Self::L], secondary: Option<&DynLatents>) { match mode { Mode::Classic => (), Mode::IntMult(dyn_latent) => { diff --git a/pco/src/delta.rs b/pco/src/delta.rs index 6a87e44d..6c05f1b6 100644 --- a/pco/src/delta.rs +++ b/pco/src/delta.rs @@ -1,13 +1,15 @@ +use crate::constants::{Bitlen, DeltaLookback}; use crate::data_types::Latent; +use crate::macros::match_latent_enum; +use crate::metadata::delta_encoding::DeltaLookbackConfig; +use crate::metadata::dyn_latents::DynLatents; +use crate::metadata::DeltaEncoding; +use crate::FULL_BATCH_N; +use std::mem::MaybeUninit; +use std::ops::Range; +use std::{array, cmp, mem}; -#[derive(Clone, Debug, Default)] -pub(crate) struct DeltaMoments(pub(crate) Vec); - -impl DeltaMoments { - pub fn order(&self) -> usize { - self.0.len() - } -} +pub type DeltaState = DynLatents; // Without this, deltas in, say, [-5, 5] would be split out of order into // [U::MAX - 4, U::MAX] and [0, 5]. @@ -21,35 +23,39 @@ pub fn toggle_center_in_place(latents: &mut [L]) { } } -fn first_order_encode_in_place(latents: &mut [L]) { +fn first_order_encode_consecutive_in_place(latents: &mut [L]) { if latents.is_empty() { return; } - for i in 0..latents.len() - 1 { - latents[i] = latents[i + 1].wrapping_sub(latents[i]); + for i in (1..latents.len()).rev() { + latents[i] = latents[i].wrapping_sub(latents[i - 1]); } } -// used for a single page, so we return the delta moments +// Used for a single page, so we return the delta moments. +// All encode in place functions leave junk data (`order` +// latents in this case) at the front of the latents. +// Using the front instead of the back is preferable because it makes the lookback +// encode function simpler and faster. #[inline(never)] -pub(crate) fn encode_in_place(mut latents: &mut [L], order: usize) -> DeltaMoments { +fn encode_consecutive_in_place(order: usize, mut latents: &mut [L]) -> Vec { // TODO this function could be made faster by doing all steps on mini batches // of ~512 at a time let mut page_moments = Vec::with_capacity(order); for _ in 0..order { page_moments.push(latents.first().copied().unwrap_or(L::ZERO)); - first_order_encode_in_place(latents); - let truncated_len = latents.len().saturating_sub(1); - latents = &mut latents[0..truncated_len]; + first_order_encode_consecutive_in_place(latents); + let truncated_start = cmp::min(latents.len(), 1); + latents = &mut latents[truncated_start..]; } toggle_center_in_place(latents); - DeltaMoments(page_moments) + page_moments } -fn first_order_decode_in_place(moment: &mut L, latents: &mut [L]) { +fn first_order_decode_consecutive_in_place(moment: &mut L, latents: &mut [L]) { for delta in latents.iter_mut() { let tmp = *delta; *delta = *moment; @@ -59,34 +65,345 @@ fn first_order_decode_in_place(moment: &mut L, latents: &mut [L]) { // used for a single batch, so we mutate the delta moments #[inline(never)] -pub(crate) fn decode_in_place(delta_moments: &mut DeltaMoments, latents: &mut [L]) { +pub(crate) fn decode_consecutive_in_place(delta_moments: &mut [L], latents: &mut [L]) { toggle_center_in_place(latents); - for moment in delta_moments.0.iter_mut().rev() { - first_order_decode_in_place(moment, latents); + for moment in delta_moments.iter_mut().rev() { + first_order_decode_consecutive_in_place(moment, latents); } } +// there are 3 types of proposed lookbacks: +// * brute force: just try the most recent few latents +// * repeating: try the most recent lookbacks we actually used +// * hash: look up similar values by hash +const PROPOSED_LOOKBACKS: usize = 16; +const BRUTE_LOOKBACKS: usize = 6; +const REPEATING_LOOKBACKS: usize = 4; +// To help locate similar latents for lookback encoding, we hash each latent at +// different "coarsenesses" and write them into a vector. e.g. a coarseness +// of 8 means that (l >> 8) gets hashed, so we can lookup recent values by +// quotient by 256. +const COARSENESSES: [Bitlen; 2] = [0, 8]; + +fn lookback_hash_lookup( + l: u64, + i: usize, + hash_table_n: usize, + window_n: usize, + idx_hash_table: &mut [usize], + proposed_lookbacks: &mut [usize; PROPOSED_LOOKBACKS], +) { + let hash_mask = hash_table_n - 1; + // might be possible to improve this hash fn + let hash_fn = |mut x: u64| { + // constant is roughly 2**64 / phi + x = (x ^ (x >> 32)).wrapping_mul(11400714819323197441); + x = x ^ (x >> 32); + x as usize & hash_mask + }; + + let mut proposal_idx = BRUTE_LOOKBACKS + REPEATING_LOOKBACKS; + let mut offset = 0; + for coarseness in COARSENESSES { + let bucket = l >> coarseness; + let buckets = [bucket.wrapping_sub(1), bucket, bucket.wrapping_add(1)]; + let hashes = buckets.map(hash_fn); + for h in hashes { + let lookback_to_last_instance = unsafe { i - *idx_hash_table.get_unchecked(offset + h) }; + proposed_lookbacks[proposal_idx] = if lookback_to_last_instance <= window_n { + lookback_to_last_instance + } else { + cmp::min(proposal_idx, i) + }; + proposal_idx += 1; + } + let h = hashes[1]; + unsafe { + *idx_hash_table.get_unchecked_mut(offset + h) = i; + } + offset += hash_table_n; + } +} + +fn lookback_compute_goodness( + l: L, + i: usize, + latents: &[L], + proposed_lookbacks: &[usize; PROPOSED_LOOKBACKS], + lookback_counts: &mut [u32], + goodnesses: &mut [Bitlen; PROPOSED_LOOKBACKS], +) { + for lookback_idx in 0..PROPOSED_LOOKBACKS { + let lookback = proposed_lookbacks[lookback_idx]; + let lookback_count = lookback_counts[lookback - 1]; + let other = unsafe { *latents.get_unchecked(i - lookback) }; + let lookback_goodness = Bitlen::BITS - lookback_count.leading_zeros(); + let delta = L::min(l.wrapping_sub(other), other.wrapping_sub(l)); + let delta_goodness = delta.leading_zeros(); + goodnesses[lookback_idx] = lookback_goodness + delta_goodness; + } +} + +fn lookback_goodness_argmax(goodnesses: &[Bitlen; PROPOSED_LOOKBACKS]) -> usize { + let mut best_goodness = goodnesses[0]; + let mut best_idx = 0; + + for (i, &goodness) in goodnesses.iter().enumerate().skip(1) { + if goodness > best_goodness { + best_goodness = goodness; + best_idx = i; + } + } + + best_idx +} + +#[inline(never)] +fn choose_lookbacks(config: DeltaLookbackConfig, latents: &[L]) -> Vec { + let state_n = config.state_n(); + + if latents.len() <= state_n { + return vec![]; + } + + let hash_table_n_log = config.window_n_log + 1; + let hash_table_n = 1 << hash_table_n_log; + let window_n = config.window_n(); + assert!( + window_n >= PROPOSED_LOOKBACKS, + "we do not support tiny windows during compression" + ); + + let mut lookback_counts = vec![1_u32; cmp::min(window_n, latents.len())]; + let mut lookbacks = vec![MaybeUninit::uninit(); latents.len() - state_n]; + let mut idx_hash_table = vec![0_usize; COARSENESSES.len() * hash_table_n]; + let mut proposed_lookbacks = array::from_fn::<_, PROPOSED_LOOKBACKS, _>(|i| (i + 1).min(state_n)); + let mut goodnesses = [0; PROPOSED_LOOKBACKS]; + let mut best_lookback = 1; + let mut repeating_lookback_idx: usize = 0; + for i in state_n..latents.len() { + let l = latents[i]; + + let new_brute_lookback = i.min(PROPOSED_LOOKBACKS); + proposed_lookbacks[new_brute_lookback - 1] = new_brute_lookback; + + lookback_hash_lookup( + l.to_u64(), + i, + hash_table_n, + window_n, + &mut idx_hash_table, + &mut proposed_lookbacks, + ); + lookback_compute_goodness( + l, + i, + latents, + &proposed_lookbacks, + &mut lookback_counts, + &mut goodnesses, + ); + let best_lookback_idx = lookback_goodness_argmax(&goodnesses); + let new_best_lookback = proposed_lookbacks[best_lookback_idx]; + if new_best_lookback != best_lookback { + repeating_lookback_idx += 1; + } + proposed_lookbacks[BRUTE_LOOKBACKS + (repeating_lookback_idx) % REPEATING_LOOKBACKS] = + new_best_lookback; + best_lookback = new_best_lookback; + lookbacks[i - state_n] = MaybeUninit::new(best_lookback as DeltaLookback); + lookback_counts[best_lookback - 1] += 1; + } + + unsafe { mem::transmute::>, Vec>(lookbacks) } +} + +// All encode in place functions leave junk data (`state_n` latents in this +// case) at the front of the latents. +// Using the front instead of the back is preferable because it means we don't +// need an extra copy of the latents in this case. +#[inline(never)] +fn encode_with_lookbacks_in_place( + config: DeltaLookbackConfig, + lookbacks: &[DeltaLookback], + latents: &mut [L], +) -> Vec { + let state_n = config.state_n(); + let real_state_n = cmp::min(latents.len(), state_n); + // TODO make this fast + for i in (real_state_n..latents.len()).rev() { + let lookback = lookbacks[i - state_n] as usize; + latents[i] = latents[i].wrapping_sub(latents[i - lookback]) + } + + let mut state = vec![L::ZERO; state_n]; + state[state_n - real_state_n..].copy_from_slice(&latents[..real_state_n]); + + toggle_center_in_place(latents); + + state +} + +pub fn new_lookback_window_buffer_and_pos( + config: DeltaLookbackConfig, + state: &[L], +) -> (Vec, usize) { + let window_n = config.window_n(); + let buffer_n = cmp::max(window_n, FULL_BATCH_N) * 2; + // TODO better default window + let mut res = vec![L::ZERO; buffer_n]; + res[window_n - state.len()..window_n].copy_from_slice(state); + (res, window_n) +} + +// returns the new position +pub fn decode_with_lookbacks_in_place( + config: DeltaLookbackConfig, + lookbacks: &[DeltaLookback], + window_buffer_pos: &mut usize, + window_buffer: &mut [L], + latents: &mut [L], +) { + toggle_center_in_place(latents); + + let (window_n, state_n) = (config.window_n(), config.state_n()); + let mut pos = *window_buffer_pos; + let batch_n = latents.len(); + if pos + batch_n > window_buffer.len() { + // we need to cycle the buffer + for i in 0..window_n { + window_buffer[i] = window_buffer[i + pos - window_n]; + } + pos = window_n; + } + + for (i, (&latent, &lookback)) in latents.iter().zip(lookbacks).enumerate() { + window_buffer[pos + i] = latent.wrapping_add(window_buffer[pos + i - lookback as usize]); + } + + let new_pos = pos + batch_n; + latents.copy_from_slice(&window_buffer[pos - state_n..new_pos - state_n]); + *window_buffer_pos = new_pos; +} + +pub fn compute_delta_latent_var( + delta_encoding: DeltaEncoding, + primary_latents: &mut DynLatents, + range: Range, +) -> Option { + match delta_encoding { + DeltaEncoding::None | DeltaEncoding::Consecutive(_) => None, + DeltaEncoding::Lookback(config) => { + let res = match_latent_enum!( + primary_latents, + DynLatents(inner) => { + let latents = &mut inner[range]; + DynLatents::new(choose_lookbacks(config, latents)).unwrap() + } + ); + Some(res) + } + } +} + +pub fn encode_in_place( + delta_encoding: DeltaEncoding, + delta_latents: Option<&DynLatents>, + range: Range, + latents: &mut DynLatents, +) -> DeltaState { + match_latent_enum!( + latents, + DynLatents(inner) => { + let delta_state = match delta_encoding { + DeltaEncoding::None => Vec::::new(), + DeltaEncoding::Consecutive(config) => { + encode_consecutive_in_place(config.order, &mut inner[range]) + } + DeltaEncoding::Lookback(config) => { + let lookbacks = delta_latents.unwrap().downcast_ref::().unwrap(); + encode_with_lookbacks_in_place(config, lookbacks, &mut inner[range]) + } + }; + DynLatents::new(delta_state).unwrap() + } + ) +} + #[cfg(test)] mod tests { use super::*; #[test] - fn test_delta_encode_decode() { + fn test_consecutive_encode_decode() { let orig_latents: Vec = vec![2, 2, 1, u32::MAX, 0]; - let mut deltas = orig_latents.to_vec(); + let mut deltas = orig_latents.clone(); let order = 2; - let zero_delta = u32::MID; - let mut moments = encode_in_place(&mut deltas, order); + let mut moments = encode_consecutive_in_place(order, &mut deltas); - // add back some padding we lose during compression + // Encoding left junk deltas at the front, + // but for decoding we need junk deltas at the end. + let mut deltas_to_decode = Vec::new(); + deltas_to_decode.extend(&deltas[order..]); for _ in 0..order { - deltas.push(zero_delta); + deltas_to_decode.push(1337); } + let mut deltas = deltas_to_decode; - decode_in_place::(&mut moments, &mut deltas[..3]); + // decode in two parts to show we keep state properly + decode_consecutive_in_place::(&mut moments, &mut deltas[..3]); assert_eq!(&deltas[..3], &orig_latents[..3]); - decode_in_place::(&mut moments, &mut deltas[3..]); + decode_consecutive_in_place::(&mut moments, &mut deltas[3..]); assert_eq!(&deltas[3..5], &orig_latents[3..5]); } + + #[test] + fn test_lookback_encode_decode() { + let original_latents = { + let mut res = vec![100_u32; 100]; + res[1] = 200; + res[2] = 201; + res[3] = 202; + res[5] = 203; + res[15] = 204; + res[50] = 205; + res + }; + let config = DeltaLookbackConfig { + window_n_log: 4, + state_n_log: 1, + secondary_uses_delta: false, + }; + + let mut deltas = original_latents.clone(); + let lookbacks = choose_lookbacks(config, &original_latents); + assert_eq!(lookbacks[0], 1); // 201 -> 200 + assert_eq!(lookbacks[2], 4); // 0 -> 0 + assert_eq!(lookbacks[13], 10); // 204 -> 203 + assert_eq!(lookbacks[48], 1); // 205 -> 0; 204 was outside window + + let state = encode_with_lookbacks_in_place(config, &lookbacks, &mut deltas); + assert_eq!(state, vec![100, 200]); + + // Encoding left junk deltas at the front, + // but for decoding we need junk deltas at the end. + let mut deltas_to_decode = Vec::::new(); + deltas_to_decode.extend(&deltas[2..]); + for _ in 0..2 { + deltas_to_decode.push(1337); + } + + let (mut window_buffer, mut pos) = new_lookback_window_buffer_and_pos(config, &state); + assert_eq!(pos, 16); + decode_with_lookbacks_in_place( + config, + &lookbacks, + &mut pos, + &mut window_buffer, + &mut deltas_to_decode, + ); + assert_eq!(deltas_to_decode, original_latents); + assert_eq!(pos, 16 + original_latents.len()); + } } diff --git a/pco/src/describers.rs b/pco/src/describers.rs index b8b3b739..05a03618 100644 --- a/pco/src/describers.rs +++ b/pco/src/describers.rs @@ -1,12 +1,13 @@ -use crate::constants::Bitlen; +use crate::constants::{Bitlen, DeltaLookback}; use crate::data_types::{Float, Latent, Number}; -use crate::metadata::{ChunkMeta, DeltaEncoding, Mode}; +use crate::metadata::per_latent_var::PerLatentVar; +use crate::metadata::{ChunkMeta, DeltaEncoding, DynLatent, LatentVarKey, Mode}; use std::marker::PhantomData; /// Interprets the meaning of latent variables and values from [`ChunkMeta`]. /// /// Obtainable via [`crate::data_types::Number::get_latent_describers`]. -pub trait DescribeLatent { +pub trait DescribeLatent { /// Returns a description for this latent variable. fn latent_var(&self) -> String; /// Returns a description for this latent variable's units, when formatted @@ -16,32 +17,49 @@ pub trait DescribeLatent { /// numbers. fn latent_units(&self) -> String; /// Returns a more easily interpretable description for the latent. - fn latent(&self, latent: L) -> String; + fn latent(&self, latent: DynLatent) -> String; } -pub type LatentDescriber = Box>; +pub type LatentDescriber = Box; + +fn delta_latent_describer(delta_encoding: DeltaEncoding) -> Option { + match delta_encoding { + DeltaEncoding::None | DeltaEncoding::Consecutive(_) => None, + DeltaEncoding::Lookback(_) => { + let describer = IntDescriber { + description: "lookback".to_string(), + units: "".to_string(), + center: 0 as DeltaLookback, + is_signed: false, + }; + Some(Box::new(describer)) + } + } +} pub(crate) fn match_classic_mode( meta: &ChunkMeta, delta_units: &'static str, -) -> Option>> { - match (meta.mode, meta.delta_encoding) { - (Mode::Classic, DeltaEncoding::None) => { - let describer = Box::new(ClassicDescriber::::default()); - Some(vec![describer]) - } +) -> Option> { + let primary: LatentDescriber = match (meta.mode, meta.delta_encoding) { + (Mode::Classic, DeltaEncoding::None) => Box::new(ClassicDescriber::::default()), (Mode::Classic, _) => { - let describer = centered_delta_describer("delta".to_string(), delta_units.to_string()); - Some(vec![describer]) + centered_delta_describer::("delta".to_string(), delta_units.to_string()) } - _ => None, - } + _ => return None, + }; + + Some(PerLatentVar { + delta: delta_latent_describer(meta.delta_encoding), + primary, + secondary: None, + }) } pub(crate) fn match_int_modes( meta: &ChunkMeta, is_signed: bool, -) -> Option>> { +) -> Option> { match meta.mode { Mode::IntMult(dyn_latent) => { let base = *dyn_latent.downcast_ref::().unwrap(); @@ -56,29 +74,47 @@ pub(crate) fn match_int_modes( is_signed, }) } else { - centered_delta_describer( + centered_delta_describer::( format!("multiplier delta [x{}]", base), "x".to_string(), ) }; - let secondary = Box::new(IntDescriber { - description: "adjustment".to_string(), - units: "".to_string(), - center: adj_center, - is_signed: false, - }); - Some(vec![primary, secondary]) + + let secondary: LatentDescriber = if meta + .delta_encoding + .applies_to_latent_var(LatentVarKey::Secondary) + { + centered_delta_describer::( + "adjustment delta".to_string(), + "".to_string(), + ) + } else { + Box::new(IntDescriber { + description: "adjustment".to_string(), + units: "".to_string(), + center: adj_center, + is_signed: false, + }) + }; + + Some(PerLatentVar { + delta: delta_latent_describer(meta.delta_encoding), + primary, + secondary: Some(secondary), + }) } _ => None, } } -pub(crate) fn match_float_modes(meta: &ChunkMeta) -> Option>> { +pub(crate) fn match_float_modes( + meta: &ChunkMeta, +) -> Option> { match meta.mode { Mode::FloatMult(dyn_latent) => { let base_latent = *dyn_latent.downcast_ref::().unwrap(); let base_string = F::from_latent_ordered(base_latent).to_string(); - let primary: LatentDescriber = if matches!(meta.delta_encoding, DeltaEncoding::None) { + let primary: LatentDescriber = if matches!(meta.delta_encoding, DeltaEncoding::None) { Box::new(FloatMultDescriber { base_string, phantom: PhantomData::, @@ -91,13 +127,29 @@ pub(crate) fn match_float_modes(meta: &ChunkMeta) -> Option( + "adjustment delta".to_string(), + "".to_string(), + ) + } else { + Box::new(IntDescriber { + description: "adjustment".to_string(), + units: " ULPs".to_string(), + center: F::L::MID, + is_signed: true, + }) + }; + + Some(PerLatentVar { + delta: delta_latent_describer(meta.delta_encoding), + primary, + secondary: Some(secondary), + }) } Mode::FloatQuant(k) => { let primary = if matches!(meta.delta_encoding, DeltaEncoding::None) { @@ -106,19 +158,34 @@ pub(crate) fn match_float_modes(meta: &ChunkMeta) -> Option, }) } else { - centered_delta_describer( + centered_delta_describer::( format!("quantums delta [<<{}]", k), "q".to_string(), ) }; - let secondary = Box::new(IntDescriber { - description: "magnitude adjustment".to_string(), - units: " ULPs".to_string(), - center: F::L::ZERO, - is_signed: false, - }); - Some(vec![primary, secondary]) + let secondary: LatentDescriber = if meta + .delta_encoding + .applies_to_latent_var(LatentVarKey::Secondary) + { + centered_delta_describer::( + "magnitude adjustment delta".to_string(), + "".to_string(), + ) + } else { + Box::new(IntDescriber { + description: "magnitude adjustment".to_string(), + units: " ULPs".to_string(), + center: F::L::ZERO, + is_signed: false, + }) + }; + + Some(PerLatentVar { + delta: delta_latent_describer(meta.delta_encoding), + primary, + secondary: Some(secondary), + }) } _ => None, } @@ -127,7 +194,7 @@ pub(crate) fn match_float_modes(meta: &ChunkMeta) -> Option(PhantomData); -impl DescribeLatent for ClassicDescriber { +impl DescribeLatent for ClassicDescriber { fn latent_var(&self) -> String { "primary".to_string() } @@ -136,8 +203,8 @@ impl DescribeLatent for ClassicDescriber { "".to_string() } - fn latent(&self, latent: T::L) -> String { - T::from_latent_ordered(latent).to_string() + fn latent(&self, latent: DynLatent) -> String { + T::from_latent_ordered(latent.downcast::().unwrap()).to_string() } } @@ -148,7 +215,7 @@ struct IntDescriber { is_signed: bool, } -impl DescribeLatent for IntDescriber { +impl DescribeLatent for IntDescriber { fn latent_var(&self) -> String { self.description.to_string() } @@ -157,8 +224,8 @@ impl DescribeLatent for IntDescriber { self.units.to_string() } - fn latent(&self, latent: L) -> String { - let centered = latent.wrapping_sub(self.center); + fn latent(&self, latent: DynLatent) -> String { + let centered = latent.downcast::().unwrap().wrapping_sub(self.center); if centered < L::MID || !self.is_signed { centered.to_string() } else { @@ -167,7 +234,7 @@ impl DescribeLatent for IntDescriber { } } -fn centered_delta_describer(description: String, units: String) -> LatentDescriber { +fn centered_delta_describer(description: String, units: String) -> LatentDescriber { Box::new(IntDescriber { description, units, @@ -181,7 +248,7 @@ struct FloatMultDescriber { phantom: PhantomData, } -impl DescribeLatent for FloatMultDescriber { +impl DescribeLatent for FloatMultDescriber { fn latent_var(&self) -> String { format!("multiplier [x{}]", self.base_string) } @@ -190,8 +257,8 @@ impl DescribeLatent for FloatMultDescriber { "x".to_string() } - fn latent(&self, latent: F::L) -> String { - F::int_float_from_latent(latent).to_string() + fn latent(&self, latent: DynLatent) -> String { + F::int_float_from_latent(latent.downcast::().unwrap()).to_string() } } @@ -200,7 +267,7 @@ struct FloatQuantDescriber { phantom: PhantomData, } -impl DescribeLatent for FloatQuantDescriber { +impl DescribeLatent for FloatQuantDescriber { fn latent_var(&self) -> String { "quantized".to_string() } @@ -209,8 +276,8 @@ impl DescribeLatent for FloatQuantDescriber { "".to_string() } - fn latent(&self, latent: F::L) -> String { - let shifted = latent << self.k; + fn latent(&self, latent: DynLatent) -> String { + let shifted = latent.downcast::().unwrap() << self.k; if shifted >= F::L::MID { F::from_latent_ordered(shifted).to_string() } else { diff --git a/pco/src/float_mult_utils.rs b/pco/src/float_mult_utils.rs index b65db320..3e0338d7 100644 --- a/pco/src/float_mult_utils.rs +++ b/pco/src/float_mult_utils.rs @@ -4,12 +4,18 @@ use std::mem; use crate::compression_intermediates::Bid; use crate::constants::{Bitlen, MULT_REQUIRED_BITS_SAVED_PER_NUM}; use crate::data_types::{Float, Latent}; -use crate::metadata::Mode; +use crate::metadata::{DynLatents, Mode}; use crate::sampling::PrimaryLatentAndSavings; +use crate::split_latents::SplitLatents; use crate::{int_mult_utils, sampling}; #[inline(never)] -pub(crate) fn join_latents(base: F, primary: &mut [F::L], secondary: &[F::L]) { +pub(crate) fn join_latents( + base: F, + primary: &mut [F::L], + secondary: Option<&DynLatents>, +) { + let secondary = secondary.unwrap().downcast_ref::().unwrap(); for (mult_and_dst, &adj) in primary.iter_mut().zip(secondary.iter()) { let unadjusted = F::int_float_from_latent(*mult_and_dst) * base; *mult_and_dst = unadjusted @@ -19,10 +25,7 @@ pub(crate) fn join_latents(base: F, primary: &mut [F::L], secondary: & } } -pub(crate) fn split_latents( - page_nums: &[F], - config: FloatMultConfig, -) -> Vec> { +pub(crate) fn split_latents(page_nums: &[F], config: FloatMultConfig) -> SplitLatents { let FloatMultConfig { base, inv_base } = config; let n = page_nums.len(); let uninit_vec = || unsafe { @@ -45,7 +48,11 @@ pub(crate) fn split_latents( // that 0 is in the middle of the range .toggle_center(); } - vec![primary, adjustments] + + SplitLatents { + primary: DynLatents::new(primary).unwrap(), + secondary: Some(DynLatents::new(adjustments).unwrap()), + } } // The rest of this file concerns automatically detecting the float `base` diff --git a/pco/src/float_quant_utils.rs b/pco/src/float_quant_utils.rs index 0d06330c..80b2b6b3 100644 --- a/pco/src/float_quant_utils.rs +++ b/pco/src/float_quant_utils.rs @@ -1,14 +1,20 @@ use crate::compression_intermediates::Bid; use crate::constants::{Bitlen, QUANT_REQUIRED_BITS_SAVED_PER_NUM}; use crate::data_types::{Float, Latent}; -use crate::metadata::Mode; +use crate::metadata::{DynLatents, Mode}; use crate::sampling::{self, PrimaryLatentAndSavings}; +use crate::split_latents::SplitLatents; use std::cmp; const REQUIRED_QUANTIZED_PROPORTION: f64 = 0.95; #[inline(never)] -pub(crate) fn join_latents(k: Bitlen, primary: &mut [F::L], secondary: &[F::L]) { +pub(crate) fn join_latents( + k: Bitlen, + primary: &mut [F::L], + secondary: Option<&DynLatents>, +) { + let secondary = secondary.unwrap().downcast_ref::().unwrap(); // For any float `num` such that `split_latents([num], k) == [[y], [m]]`, we have // num.is_sign_positive() == (y >= sign_cutoff) let sign_cutoff = F::L::MID >> k; @@ -28,7 +34,7 @@ pub(crate) fn join_latents(k: Bitlen, primary: &mut [F::L], secondary: } } -pub(crate) fn split_latents(page_nums: &[F], k: Bitlen) -> Vec> { +pub(crate) fn split_latents(page_nums: &[F], k: Bitlen) -> SplitLatents { let n = page_nums.len(); let uninit_vec = || unsafe { let mut res = Vec::::with_capacity(n); @@ -55,7 +61,11 @@ pub(crate) fn split_latents(page_nums: &[F], k: Bitlen) -> Vec(sample: &[F]) -> Option> { @@ -179,16 +189,15 @@ mod test { let (nums, (_expected_ys, _expected_ms)): (Vec<_>, (Vec<_>, Vec<_>)) = expected.iter().cloned().unzip(); let k: Bitlen = 5; - if let [ref mut ys, ms] = &mut split_latents(&nums, k)[..] { - let actual: Vec<_> = nums - .iter() - .cloned() - .zip(ys.iter().cloned().zip(ms.iter().cloned())) - .collect(); - assert_eq!(expected, actual); - } else { - panic!("Bug: `split_latents` returned data in an unexpected format"); - } + let SplitLatents { primary, secondary } = split_latents(&nums, k); + let primary = primary.downcast::().unwrap(); + let secondary = secondary.unwrap().downcast::().unwrap(); + let actual: Vec<_> = nums + .iter() + .cloned() + .zip(primary.iter().cloned().zip(secondary.iter().cloned())) + .collect(); + assert_eq!(expected, actual); } #[test] @@ -198,11 +207,12 @@ mod test { .iter() .map(|&num| num as f64) .collect(); - if let [_, ms] = &split_latents(&nums, k)[..] { - assert!(ms.iter().all(|&m| m == 0u64)); - } else { - panic!("Bug: `split_latents` returned data in an unexpected format"); - } + let SplitLatents { + primary: _primary, + secondary, + } = split_latents(&nums, k); + let secondary = secondary.unwrap().downcast::().unwrap(); + assert!(secondary.iter().all(|&m| m == 0u64)); } #[test] @@ -214,12 +224,10 @@ mod test { .collect::>(); let k: Bitlen = 5; - if let [ref mut ys, ms] = &mut split_latents(&nums, k)[..] { - join_latents::(k, ys, &ms); - assert_eq!(uints, *ys); - } else { - panic!("Bug: `split_latents` returned data in an unexpected format"); - } + let SplitLatents { primary, secondary } = split_latents(&nums, k); + let mut primary = primary.downcast::().unwrap(); + join_latents::(k, &mut primary, secondary.as_ref()); + assert_eq!(uints, primary); } #[test] diff --git a/pco/src/int_mult_utils.rs b/pco/src/int_mult_utils.rs index 2b6f81a6..24e308f5 100644 --- a/pco/src/int_mult_utils.rs +++ b/pco/src/int_mult_utils.rs @@ -5,14 +5,16 @@ use std::mem; use crate::constants::MULT_REQUIRED_BITS_SAVED_PER_NUM; use crate::data_types::{Latent, Number}; +use crate::metadata::DynLatents; use crate::sampling::{self, PrimaryLatentAndSavings}; +use crate::split_latents::SplitLatents; // riemann zeta function const ZETA_OF_2: f64 = PI * PI / 6.0; const LCB_RATIO: f64 = 1.0; #[inline(never)] -pub fn split_latents(nums: &[T], base: T::L) -> Vec> { +pub fn split_latents(nums: &[T], base: T::L) -> SplitLatents { let n = nums.len(); let mut mults = Vec::with_capacity(n); let mut adjs = Vec::with_capacity(n); @@ -26,11 +28,16 @@ pub fn split_latents(nums: &[T], base: T::L) -> Vec> { *mult_dst = u / base; *adj_dst = u % base; } - vec![mults, adjs] + + SplitLatents { + primary: DynLatents::new(mults).unwrap(), + secondary: Some(DynLatents::new(adjs).unwrap()), + } } #[inline(never)] -pub(crate) fn join_latents(base: L, primary: &mut [L], secondary: &[L]) { +pub(crate) fn join_latents(base: L, primary: &mut [L], secondary: Option<&DynLatents>) { + let secondary = secondary.unwrap().downcast_ref::().unwrap(); for (mult_and_dst, &adj) in primary.iter_mut().zip(secondary.iter()) { *mult_and_dst = (*mult_and_dst * base).wrapping_add(adj); } @@ -249,15 +256,19 @@ mod tests { let nums = vec![8_u32, 1, 5]; let base = 4_u32; let latents = split_latents(&nums, base); - assert_eq!(latents.len(), 2); - assert_eq!(latents[0], vec![2_u32, 0, 1]); - assert_eq!(latents[1], vec![0_u32, 1, 1]); + let mut primary = latents.primary.downcast::().unwrap(); + let secondary = latents.secondary.unwrap().downcast::().unwrap(); + assert_eq!(&primary, &vec![2_u32, 0, 1]); + assert_eq!(&secondary, &vec![0_u32, 1, 1]); // JOIN - let mut primary_and_dst = latents[0].to_vec(); - join_latents(base, &mut primary_and_dst, &latents[1]); + join_latents( + base, + &mut primary, + DynLatents::new(secondary).as_ref(), + ); - assert_eq!(primary_and_dst, nums); + assert_eq!(primary, nums); } #[test] diff --git a/pco/src/latent_batch_dissector.rs b/pco/src/latent_batch_dissector.rs index f0db662e..a5f2a636 100644 --- a/pco/src/latent_batch_dissector.rs +++ b/pco/src/latent_batch_dissector.rs @@ -104,12 +104,7 @@ impl<'a, L: Latent> LatentBatchDissector<'a, L> { } } - pub fn dissect_latent_batch( - &mut self, - latents: &[L], - base_i: usize, - dst: &mut DissectedPageVar, - ) { + pub fn dissect_latent_batch(&mut self, latents: &[L], base_i: usize, dst: &mut DissectedPageVar) { let DissectedPageVar { ans_vals, ans_bits, @@ -127,6 +122,7 @@ impl<'a, L: Latent> LatentBatchDissector<'a, L> { &mut offset_bits[base_i..end_i], ); + let offsets = offsets.downcast_mut::().unwrap(); self.set_offsets(latents, &mut offsets[base_i..end_i]); self.encode_ans_in_reverse( diff --git a/pco/src/latent_chunk_compressor.rs b/pco/src/latent_chunk_compressor.rs index 3e5af990..b10679d0 100644 --- a/pco/src/latent_chunk_compressor.rs +++ b/pco/src/latent_chunk_compressor.rs @@ -6,10 +6,13 @@ use crate::constants::{Bitlen, Weight, ANS_INTERLEAVING, PAGE_PADDING}; use crate::data_types::Latent; use crate::errors::PcoResult; use crate::latent_batch_dissector::LatentBatchDissector; +use crate::macros::{define_latent_enum, match_latent_enum}; +use crate::metadata::dyn_latents::DynLatents; use crate::metadata::{bins, Bin}; use crate::read_write_uint::ReadWriteUint; use crate::{ans, bit_reader, bit_writer, read_write_uint, FULL_BATCH_N}; use std::io::Write; +use std::ops::Range; // This would be very hard to combine with write_uints because it makes use of // an optimization that only works easily for single-u64 writes of 56 bits or @@ -79,14 +82,15 @@ pub(crate) struct TrainedBins { pub struct LatentChunkCompressor { table: CompressionTable, pub encoder: ans::Encoder, - pub avg_bits_per_delta: f64, + pub avg_bits_per_latent: f64, is_trivial: bool, needs_ans: bool, max_u64s_per_offset: usize, + latents: Vec, } impl LatentChunkCompressor { - pub(crate) fn new(trained: TrainedBins, bins: &[Bin]) -> PcoResult { + pub(crate) fn new(trained: TrainedBins, bins: &[Bin], latents: Vec) -> PcoResult { let needs_ans = bins.len() != 1; let table = CompressionTable::from(trained.infos); @@ -100,20 +104,21 @@ impl LatentChunkCompressor { Ok(LatentChunkCompressor { table, encoder, - avg_bits_per_delta: bins::avg_bits_per_latent(bins, trained.ans_size_log), + avg_bits_per_latent: bins::avg_bits_per_latent(bins, trained.ans_size_log), is_trivial: bins::are_trivial(bins), needs_ans, max_u64s_per_offset, + latents, }) } - pub fn dissect_page(&self, page_latents: &[L]) -> DissectedPageVar { + pub fn dissect_page(&self, page_range: Range) -> DissectedPageVar { let uninit_dissected_page_var = |n, ans_default_state| { let ans_final_states = [ans_default_state; ANS_INTERLEAVING]; DissectedPageVar { ans_vals: uninit_vec(n), ans_bits: uninit_vec(n), - offsets: uninit_vec(n), + offsets: DynLatents::new(uninit_vec::(n)).unwrap(), offset_bits: uninit_vec(n), ans_final_states, } @@ -124,13 +129,17 @@ impl LatentChunkCompressor { } let mut dissected_page_var = uninit_dissected_page_var( - page_latents.len(), + page_range.len(), self.encoder.default_state(), ); // we go through in reverse for ANS! let mut lbd = LatentBatchDissector::new(&self.table, &self.encoder); - for (batch_idx, batch) in page_latents.chunks(FULL_BATCH_N).enumerate().rev() { + for (batch_idx, batch) in self.latents[page_range] + .chunks(FULL_BATCH_N) + .enumerate() + .rev() + { let base_i = batch_idx * FULL_BATCH_N; lbd.dissect_latent_batch(batch, base_i, &mut dissected_page_var) } @@ -139,7 +148,7 @@ impl LatentChunkCompressor { pub fn write_dissected_batch( &self, - dissected_page_var: &DissectedPageVar, + dissected_page_var: &DissectedPageVar, batch_start: usize, writer: &mut BitWriter, ) -> PcoResult<()> { @@ -165,33 +174,43 @@ impl LatentChunkCompressor { // write offsets (writer.stale_byte_idx, writer.bits_past_byte) = unsafe { - match self.max_u64s_per_offset { - 0 => (writer.stale_byte_idx, writer.bits_past_byte), - 1 => write_short_uints::( - &dissected_page_var.offsets[batch_start..], - &dissected_page_var.offset_bits[batch_start..], - writer.stale_byte_idx, - writer.bits_past_byte, - &mut writer.buf, - ), - 2 => write_uints::( - &dissected_page_var.offsets[batch_start..], - &dissected_page_var.offset_bits[batch_start..], - writer.stale_byte_idx, - writer.bits_past_byte, - &mut writer.buf, - ), - 3 => write_uints::( - &dissected_page_var.offsets[batch_start..], - &dissected_page_var.offset_bits[batch_start..], - writer.stale_byte_idx, - writer.bits_past_byte, - &mut writer.buf, - ), - _ => panic!("[ChunkCompressor] data type is too large"), - } + match_latent_enum!( + &dissected_page_var.offsets, + DynLatents(offsets) => { + match self.max_u64s_per_offset { + 0 => (writer.stale_byte_idx, writer.bits_past_byte), + 1 => write_short_uints::( + &offsets[batch_start..], + &dissected_page_var.offset_bits[batch_start..], + writer.stale_byte_idx, + writer.bits_past_byte, + &mut writer.buf, + ), + 2 => write_uints::( + &offsets[batch_start..], + &dissected_page_var.offset_bits[batch_start..], + writer.stale_byte_idx, + writer.bits_past_byte, + &mut writer.buf, + ), + 3 => write_uints::( + &offsets[batch_start..], + &dissected_page_var.offset_bits[batch_start..], + writer.stale_byte_idx, + writer.bits_past_byte, + &mut writer.buf, + ), + _ => panic!("[ChunkCompressor] data type is too large"), + } + } + ) }; Ok(()) } } + +define_latent_enum!( + #[derive(Clone, Debug)] + pub DynLatentChunkCompressor(LatentChunkCompressor) +); diff --git a/pco/src/latent_batch_decompressor.rs b/pco/src/latent_page_decompressor.rs similarity index 74% rename from pco/src/latent_batch_decompressor.rs rename to pco/src/latent_page_decompressor.rs index 53027f13..951ea20d 100644 --- a/pco/src/latent_batch_decompressor.rs +++ b/pco/src/latent_page_decompressor.rs @@ -2,11 +2,11 @@ use std::fmt::Debug; use crate::ans::{AnsState, Spec}; use crate::bit_reader::BitReader; -use crate::constants::{Bitlen, ANS_INTERLEAVING, FULL_BATCH_N}; +use crate::constants::{Bitlen, DeltaLookback, ANS_INTERLEAVING, FULL_BATCH_N}; use crate::data_types::Latent; use crate::errors::PcoResult; -use crate::metadata::{bins, Bin}; -use crate::{ans, bit_reader, read_write_uint}; +use crate::metadata::{bins, Bin, DeltaEncoding, DynLatents}; +use crate::{ans, bit_reader, delta, read_write_uint}; // Default here is meaningless and should only be used to fill in empty // vectors. @@ -16,8 +16,8 @@ pub struct BinDecompressionInfo { pub offset_bits: Bitlen, } -impl From<&Bin> for BinDecompressionInfo { - fn from(bin: &Bin) -> Self { +impl BinDecompressionInfo { + fn new(bin: &Bin) -> Self { Self { lower: bin.lower, offset_bits: bin.offset_bits, @@ -31,7 +31,10 @@ struct State { offset_bits_csum_scratch: [Bitlen; FULL_BATCH_N], offset_bits_scratch: [Bitlen; FULL_BATCH_N], lowers_scratch: [L; FULL_BATCH_N], - state_idxs: [AnsState; ANS_INTERLEAVING], + + ans_state_idxs: [AnsState; ANS_INTERLEAVING], + delta_state: Vec, + delta_state_pos: usize, } impl State { @@ -47,38 +50,50 @@ impl State { // LatentBatchDecompressor does the main work of decoding bytes into Latents #[derive(Clone, Debug)] -pub struct LatentBatchDecompressor { +pub struct LatentPageDecompressor { // known information about this latent variable u64s_per_offset: usize, infos: Vec>, needs_ans: bool, decoder: ans::Decoder, + delta_encoding: DeltaEncoding, pub maybe_constant_value: Option, // mutable state state: State, } -impl LatentBatchDecompressor { +impl LatentPageDecompressor { pub fn new( ans_size_log: Bitlen, bins: &[Bin], + delta_encoding: DeltaEncoding, ans_final_state_idxs: [AnsState; ANS_INTERLEAVING], + stored_delta_state: Vec, ) -> PcoResult { let u64s_per_offset = read_write_uint::calc_max_u64s(bins::max_offset_bits(bins)); let infos = bins .iter() - .map(BinDecompressionInfo::from) + .map(BinDecompressionInfo::new) .collect::>(); let weights = bins::weights(bins); let ans_spec = Spec::from_weights(ans_size_log, weights)?; let decoder = ans::Decoder::new(&ans_spec); + let (working_delta_state, delta_state_pos) = match delta_encoding { + DeltaEncoding::None | DeltaEncoding::Consecutive(_) => (stored_delta_state, 0), + DeltaEncoding::Lookback(config) => { + delta::new_lookback_window_buffer_and_pos(config, &stored_delta_state) + } + }; + let mut state = State { offset_bits_csum_scratch: [0; FULL_BATCH_N], offset_bits_scratch: [0; FULL_BATCH_N], lowers_scratch: [L::ZERO; FULL_BATCH_N], - state_idxs: ans_final_state_idxs, + ans_state_idxs: ans_final_state_idxs, + delta_state: working_delta_state, + delta_state_pos, }; let needs_ans = bins.len() != 1; @@ -94,17 +109,19 @@ impl LatentBatchDecompressor { } } - let maybe_constant_value = if bins::are_trivial(bins) { - bins.first().map(|bin| bin.lower) - } else { - None - }; + let maybe_constant_value = + if bins::are_trivial(bins) && matches!(delta_encoding, DeltaEncoding::None) { + bins.first().map(|bin| bin.lower) + } else { + None + }; Ok(Self { u64s_per_offset, infos, needs_ans, decoder, + delta_encoding, maybe_constant_value, state, }) @@ -123,7 +140,7 @@ impl LatentBatchDecompressor { let mut bits_past_byte = reader.bits_past_byte; let mut offset_bit_idx = 0; let [mut state_idx_0, mut state_idx_1, mut state_idx_2, mut state_idx_3] = - self.state.state_idxs; + self.state.ans_state_idxs; let infos = self.infos.as_slice(); let ans_nodes = self.decoder.nodes.as_slice(); for base_i in (0..FULL_BATCH_N).step_by(ANS_INTERLEAVING) { @@ -154,7 +171,7 @@ impl LatentBatchDecompressor { reader.stale_byte_idx = stale_byte_idx; reader.bits_past_byte = bits_past_byte; - self.state.state_idxs = [state_idx_0, state_idx_1, state_idx_2, state_idx_3]; + self.state.ans_state_idxs = [state_idx_0, state_idx_1, state_idx_2, state_idx_3]; } // This implementation handles arbitrary batch size and looks simpler, but is @@ -165,7 +182,7 @@ impl LatentBatchDecompressor { let mut stale_byte_idx = reader.stale_byte_idx; let mut bits_past_byte = reader.bits_past_byte; let mut offset_bit_idx = 0; - let mut state_idxs = self.state.state_idxs; + let mut state_idxs = self.state.ans_state_idxs; for i in 0..batch_n { let j = i % 4; stale_byte_idx += bits_past_byte as usize / 8; @@ -182,7 +199,7 @@ impl LatentBatchDecompressor { reader.stale_byte_idx = stale_byte_idx; reader.bits_past_byte = bits_past_byte; - self.state.state_idxs = state_idxs; + self.state.ans_state_idxs = state_idxs; } #[inline(never)] @@ -224,13 +241,9 @@ impl LatentBatchDecompressor { // If hits a corruption, it returns an error and leaves reader and self unchanged. // May contaminate dst. - pub unsafe fn decompress_latent_batch( - &mut self, - reader: &mut BitReader, - dst: &mut [L], - ) -> PcoResult<()> { + pub unsafe fn decompress_batch_pre_delta(&mut self, reader: &mut BitReader, dst: &mut [L]) { if dst.is_empty() { - return Ok(()); + return; } if self.needs_ans { @@ -258,7 +271,45 @@ impl LatentBatchDecompressor { } self.add_lowers(dst); + } - Ok(()) + pub unsafe fn decompress_batch( + &mut self, + delta_latents: Option<&DynLatents>, + n_remaining_in_page: usize, + reader: &mut BitReader, + dst: &mut [L], + ) { + let n_remaining_pre_delta = + n_remaining_in_page.saturating_sub(self.delta_encoding.n_latents_per_state()); + let pre_delta_len = if dst.len() <= n_remaining_pre_delta { + dst.len() + } else { + // If we're at the end, this won't initialize the last + // few elements before delta decoding them, so we do that manually here to + // satisfy MIRI. This step isn't really necessary. + dst[n_remaining_pre_delta..].fill(L::default()); + n_remaining_pre_delta + }; + self.decompress_batch_pre_delta(reader, &mut dst[..pre_delta_len]); + + match self.delta_encoding { + DeltaEncoding::None => (), + DeltaEncoding::Consecutive(_) => { + delta::decode_consecutive_in_place(&mut self.state.delta_state, dst) + } + DeltaEncoding::Lookback(config) => { + delta::decode_with_lookbacks_in_place( + config, + delta_latents + .unwrap() + .downcast_ref::() + .unwrap(), + &mut self.state.delta_state_pos, + &mut self.state.delta_state, + dst, + ); + } + } } } diff --git a/pco/src/lib.rs b/pco/src/lib.rs index 8204a428..d1fab610 100644 --- a/pco/src/lib.rs +++ b/pco/src/lib.rs @@ -12,14 +12,15 @@ #![deny(clippy::unused_unit)] #![deny(dead_code)] -pub use chunk_config::{ChunkConfig, DeltaSpec, ModeSpec, PagingSpec}; -pub use constants::{DEFAULT_COMPRESSION_LEVEL, DEFAULT_MAX_PAGE_N, FULL_BATCH_N}; -pub use progress::Progress; - #[doc = include_str!("../README.md")] #[cfg(doctest)] struct ReadmeDoctest; +pub use chunk_config::{ChunkConfig, DeltaSpec, ModeSpec, PagingSpec}; +pub use constants::{DEFAULT_COMPRESSION_LEVEL, DEFAULT_MAX_PAGE_N, FULL_BATCH_N}; +pub use progress::Progress; +pub use split_latents::SplitLatents; + pub mod data_types; /// for inspecting certain types of Pco metadata pub mod describers; @@ -45,14 +46,15 @@ mod float_mult_utils; mod float_quant_utils; mod histograms; mod int_mult_utils; -mod latent_batch_decompressor; mod latent_batch_dissector; mod latent_chunk_compressor; +mod latent_page_decompressor; mod macros; mod progress; mod read_write_uint; mod sampling; mod sort_utils; +mod split_latents; #[cfg(test)] mod tests; diff --git a/pco/src/macros.rs b/pco/src/macros.rs index 19fb58e1..27c351a1 100644 --- a/pco/src/macros.rs +++ b/pco/src/macros.rs @@ -2,7 +2,7 @@ dtype_dispatch::build_dtype_macros!( #[doc = "\ - Defines enums holding a container generic to `Number`. + **unstable API** Defines enums holding a container generic to `Number`. "] #[doc = "\ You'll only want to use this if you're using pco's low level APIs. @@ -37,14 +37,6 @@ dtype_dispatch::build_dtype_macros!( ); dtype_dispatch::build_dtype_macros!( - #[doc = "\ - Defines enums holding a container generic to `Latent`. - "] - #[doc = "\ - You'll only want to use this if you're using pco's low level APIs. - See the dtype_dispatch crate for more details. - "] - #[macro_export] define_latent_enum, #[doc = "\ diff --git a/pco/src/metadata/bin.rs b/pco/src/metadata/bin.rs index 431b52e2..f0d2e9cc 100644 --- a/pco/src/metadata/bin.rs +++ b/pco/src/metadata/bin.rs @@ -6,7 +6,6 @@ use crate::data_types::Latent; /// Part of [`ChunkLatentVarMeta`][`crate::metadata::ChunkLatentVarMeta`] representing /// a numerical range. #[derive(Clone, Copy, Debug, PartialEq, Eq)] -#[non_exhaustive] pub struct Bin { /// The number of occurrences of this bin in the asymmetric numeral system /// table. diff --git a/pco/src/metadata/chunk.rs b/pco/src/metadata/chunk.rs index 318861a7..eea7e8a8 100644 --- a/pco/src/metadata/chunk.rs +++ b/pco/src/metadata/chunk.rs @@ -4,129 +4,92 @@ use better_io::BetterBufRead; use crate::bit_reader::BitReaderBuilder; use crate::bit_writer::BitWriter; -use crate::constants::*; -use crate::data_types::Latent; -use crate::errors::{PcoError, PcoResult}; +use crate::data_types::LatentType; +use crate::errors::PcoResult; use crate::metadata::chunk_latent_var::ChunkLatentVarMeta; use crate::metadata::delta_encoding::DeltaEncoding; -use crate::metadata::dyn_latent::DynLatent; use crate::metadata::format_version::FormatVersion; +use crate::metadata::per_latent_var::PerLatentVar; use crate::metadata::Mode; /// The metadata of a pco chunk. #[derive(Clone, Debug, PartialEq, Eq)] -#[non_exhaustive] pub struct ChunkMeta { /// The formula `pco` used to compress each number at a low level. pub mode: Mode, - /// How many times delta encoding was applied during compression. - /// This is between 0 and 7, inclusive. - /// - /// See [`ChunkConfig`][crate::ChunkConfig] for more details. + /// How delta encoding was applied. pub delta_encoding: DeltaEncoding, /// Metadata about the interleaved streams needed by `pco` to /// compress/decompress the inputs /// according to the formula used by `mode`. - pub per_latent_var: Vec, + pub per_latent_var: PerLatentVar, } impl ChunkMeta { - pub(crate) fn new( - mode: Mode, - delta_encoding: DeltaEncoding, - per_latent_var: Vec, - ) -> Self { - ChunkMeta { - mode, - delta_encoding, - per_latent_var, - } - } - pub(crate) fn exact_size(&self) -> usize { - let extra_bits_for_mode = match self.mode { - Mode::Classic => 0, - Mode::IntMult(inner) => inner.bits(), - Mode::FloatMult(inner) => inner.bits(), - Mode::FloatQuant(_) => BITS_TO_ENCODE_QUANTIZE_K, - }; - let bits_for_latent_vars: usize = self + let bits_for_latent_vars = self .per_latent_var - .iter() - .map(ChunkLatentVarMeta::exact_bit_size) + .as_ref() + .map(|_, var_meta| var_meta.exact_bit_size()) .sum(); - let n_bits = BITS_TO_ENCODE_MODE as usize - + extra_bits_for_mode as usize - + BITS_TO_ENCODE_DELTA_ENCODING_ORDER as usize + let n_bits = self.mode.exact_bit_size() as usize + + self.delta_encoding.exact_bit_size() as usize + bits_for_latent_vars; n_bits.div_ceil(8) } pub(crate) fn exact_page_meta_size(&self) -> usize { - let bit_size: usize = self + let bit_size = self .per_latent_var - .iter() - .enumerate() - .map(|(latent_var_idx, latent_var)| { - let delta_encoding = self - .mode - .delta_encoding_for_latent_var(latent_var_idx, self.delta_encoding); - latent_var.exact_page_meta_bit_size(delta_encoding) + .as_ref() + .map(|key, var_meta| { + let delta_encoding = self.delta_encoding.for_latent_var(key); + var_meta.exact_page_meta_bit_size(delta_encoding) }) .sum(); bit_size.div_ceil(8) } - pub(crate) unsafe fn read_from( + pub(crate) unsafe fn read_from( reader_builder: &mut BitReaderBuilder, version: &FormatVersion, + latent_type: LatentType, ) -> PcoResult { let (mode, delta_encoding) = reader_builder.with_reader(|reader| { - let mode = match reader.read_usize(BITS_TO_ENCODE_MODE) { - 0 => Ok(Mode::Classic), - 1 => { - if version.used_old_gcds() { - return Err(PcoError::compatibility( - "unable to decompress data from v0.0.0 of pco with different GCD encoding", - )); - } - - let base = DynLatent::read_uncompressed_from::(reader); - Ok(Mode::IntMult(base)) - } - 2 => { - let base_latent = DynLatent::read_uncompressed_from::(reader); - Ok(Mode::FloatMult(base_latent)) - } - 3 => { - let k = reader.read_bitlen(BITS_TO_ENCODE_QUANTIZE_K); - Ok(Mode::FloatQuant(k)) - } - value => Err(PcoError::corruption(format!( - "unknown mode value {}", - value - ))), - }?; - - let delta_encoding_order = reader.read_usize(BITS_TO_ENCODE_DELTA_ENCODING_ORDER); - let delta_encoding = if delta_encoding_order == 0 { - DeltaEncoding::None - } else { - DeltaEncoding::Consecutive(delta_encoding_order) - }; + let mode = Mode::read_from(reader, version, latent_type)?; + let delta_encoding = DeltaEncoding::read_from(version, reader)?; Ok((mode, delta_encoding)) })?; - let n_latent_vars = mode.n_latent_vars(); + let delta = if let Some(delta_latent_type) = delta_encoding.latent_type() { + Some(ChunkLatentVarMeta::read_from::( + reader_builder, + delta_latent_type, + )?) + } else { + None + }; - let mut per_latent_var = Vec::with_capacity(n_latent_vars); + let primary = ChunkLatentVarMeta::read_from::( + reader_builder, + mode.primary_latent_type(latent_type), + )?; - for _ in 0..n_latent_vars { - per_latent_var.push(ChunkLatentVarMeta::read_from::( + let secondary = if let Some(secondary_latent_type) = mode.secondary_latent_type(latent_type) { + Some(ChunkLatentVarMeta::read_from::( reader_builder, + secondary_latent_type, )?) - } + } else { + None + }; + + let per_latent_var = PerLatentVar { + delta, + primary, + secondary, + }; reader_builder.with_reader(|reader| { reader.drain_empty_byte("nonzero bits in end of final byte of chunk metadata") @@ -140,36 +103,12 @@ impl ChunkMeta { } pub(crate) unsafe fn write_to(&self, writer: &mut BitWriter) -> PcoResult<()> { - let mode_value = match self.mode { - Mode::Classic => 0, - Mode::IntMult(_) => 1, - Mode::FloatMult { .. } => 2, - Mode::FloatQuant { .. } => 3, - }; - writer.write_usize(mode_value, BITS_TO_ENCODE_MODE); - match self.mode { - Mode::Classic => (), - Mode::IntMult(base) => { - base.write_uncompressed_to(writer); - } - Mode::FloatMult(base_latent) => { - base_latent.write_uncompressed_to(writer); - } - Mode::FloatQuant(k) => { - writer.write_uint(k, BITS_TO_ENCODE_QUANTIZE_K); - } - }; - - match self.delta_encoding { - DeltaEncoding::None => writer.write_usize(0, BITS_TO_ENCODE_DELTA_ENCODING_ORDER), - DeltaEncoding::Consecutive(order) => { - writer.write_usize(order, BITS_TO_ENCODE_DELTA_ENCODING_ORDER) - } - } + self.mode.write_to(writer); + self.delta_encoding.write_to(writer); writer.flush()?; - for latents in &self.per_latent_var { + for (_, latents) in self.per_latent_var.as_ref().enumerated() { latents.write_to(writer)?; } @@ -177,23 +116,20 @@ impl ChunkMeta { writer.flush()?; Ok(()) } - - pub(crate) fn delta_encoding_for_latent_var(&self, latent_idx: usize) -> DeltaEncoding { - self - .mode - .delta_encoding_for_latent_var(latent_idx, self.delta_encoding) - } } #[cfg(test)] mod tests { use super::*; + use crate::constants::ANS_INTERLEAVING; + use crate::data_types::Latent; use crate::macros::match_latent_enum; + use crate::metadata::delta_encoding::DeltaConsecutiveConfig; use crate::metadata::dyn_bins::DynBins; use crate::metadata::dyn_latents::DynLatents; use crate::metadata::page::PageMeta; use crate::metadata::page_latent_var::PageLatentVarMeta; - use crate::metadata::Bin; + use crate::metadata::{Bin, DynLatent}; fn check_exact_sizes(meta: &ChunkMeta) -> PcoResult<()> { let buffer_size = 8192; @@ -207,30 +143,26 @@ mod tests { let mut dst = Vec::new(); let mut writer = BitWriter::new(&mut dst, buffer_size); let page_meta = PageMeta { - per_latent_var: (0..meta.per_latent_var.len()) - .map(|latent_var_idx| { - let delta_encoding = meta - .mode - .delta_encoding_for_latent_var(latent_var_idx, meta.delta_encoding); - let delta_moments = match_latent_enum!( - &meta.per_latent_var[latent_var_idx].bins, - DynBins(_bins) => { - DynLatents::new(vec![L::ZERO; delta_encoding.n_latents_per_state()]).unwrap() - } - ); - PageLatentVarMeta { - delta_moments, - ans_final_state_idxs: [0; ANS_INTERLEAVING], + per_latent_var: meta.per_latent_var.as_ref().map(|key, latent_var_meta| { + let delta_encoding = meta.delta_encoding.for_latent_var(key); + let delta_moments = match_latent_enum!( + &latent_var_meta.bins, + DynBins(_bins) => { + DynLatents::new(vec![L::ZERO; delta_encoding.n_latents_per_state()]).unwrap() } - }) - .collect(), + ); + PageLatentVarMeta { + delta_state: delta_moments, + ans_final_state_idxs: [0; ANS_INTERLEAVING], + } + }), }; unsafe { page_meta.write_to( meta .per_latent_var - .iter() - .map(|var_meta| var_meta.ans_size_log), + .as_ref() + .map(|_, var_meta| var_meta.ans_size_log), &mut writer, ) }; @@ -243,11 +175,18 @@ mod tests { fn exact_size_binless() -> PcoResult<()> { let meta = ChunkMeta { mode: Mode::Classic, - delta_encoding: DeltaEncoding::Consecutive(5), - per_latent_var: vec![ChunkLatentVarMeta { - ans_size_log: 0, - bins: DynBins::U32(vec![]), - }], + delta_encoding: DeltaEncoding::Consecutive(DeltaConsecutiveConfig { + order: 5, + secondary_uses_delta: false, + }), + per_latent_var: PerLatentVar { + delta: None, + primary: ChunkLatentVarMeta { + ans_size_log: 0, + bins: DynBins::U32(vec![]), + }, + secondary: None, + }, }; check_exact_sizes(&meta) @@ -258,14 +197,18 @@ mod tests { let meta = ChunkMeta { mode: Mode::Classic, delta_encoding: DeltaEncoding::None, - per_latent_var: vec![ChunkLatentVarMeta { - ans_size_log: 0, - bins: DynBins::U64(vec![Bin { - weight: 1, - lower: 77_u64, - offset_bits: 0, - }]), - }], + per_latent_var: PerLatentVar { + delta: None, + primary: ChunkLatentVarMeta { + ans_size_log: 0, + bins: DynBins::U64(vec![Bin { + weight: 1, + lower: 77_u64, + offset_bits: 0, + }]), + }, + secondary: None, + }, }; check_exact_sizes(&meta) @@ -275,9 +218,13 @@ mod tests { fn exact_size_float_mult() -> PcoResult<()> { let meta = ChunkMeta { mode: Mode::FloatMult(DynLatent::U32(777_u32)), - delta_encoding: DeltaEncoding::Consecutive(3), - per_latent_var: vec![ - ChunkLatentVarMeta { + delta_encoding: DeltaEncoding::Consecutive(DeltaConsecutiveConfig { + order: 3, + secondary_uses_delta: false, + }), + per_latent_var: PerLatentVar { + delta: None, + primary: ChunkLatentVarMeta { ans_size_log: 7, bins: DynBins::U32(vec![ Bin { @@ -292,7 +239,7 @@ mod tests { }, ]), }, - ChunkLatentVarMeta { + secondary: Some(ChunkLatentVarMeta { ans_size_log: 3, bins: DynBins::U32(vec![ Bin { @@ -306,8 +253,8 @@ mod tests { offset_bits: 0, }, ]), - }, - ], + }), + }, }; check_exact_sizes(&meta) diff --git a/pco/src/metadata/chunk_latent_var.rs b/pco/src/metadata/chunk_latent_var.rs index 6bf4a487..febc44d4 100644 --- a/pco/src/metadata/chunk_latent_var.rs +++ b/pco/src/metadata/chunk_latent_var.rs @@ -5,7 +5,7 @@ use crate::constants::{ Bitlen, Weight, ANS_INTERLEAVING, BITS_TO_ENCODE_ANS_SIZE_LOG, BITS_TO_ENCODE_N_BINS, FULL_BIN_BATCH_SIZE, MAX_ANS_BITS, }; -use crate::data_types::Latent; +use crate::data_types::{Latent, LatentType}; use crate::errors::{PcoError, PcoResult}; use crate::macros::match_latent_enum; use crate::metadata::dyn_bins::DynBins; @@ -75,7 +75,6 @@ unsafe fn write_bins( /// /// This is mainly useful for inspecting how compression was done. #[derive(Clone, Debug, PartialEq, Eq)] -#[non_exhaustive] pub struct ChunkLatentVarMeta { /// The log2 of the number of the number of states in this chunk's tANS /// table. @@ -88,8 +87,16 @@ pub struct ChunkLatentVarMeta { } impl ChunkLatentVarMeta { - pub(crate) unsafe fn read_from( + pub(crate) fn latent_type(&self) -> LatentType { + match_latent_enum!( + &self.bins, + DynBins(_inner) => { LatentType::new::().unwrap() } + ) + } + + pub(crate) unsafe fn read_from( reader_builder: &mut BitReaderBuilder, + latent_type: LatentType, ) -> PcoResult { let (ans_size_log, n_bins) = reader_builder.with_reader(|reader| { let ans_size_log = reader.read_bitlen(BITS_TO_ENCODE_ANS_SIZE_LOG); @@ -116,18 +123,23 @@ impl ChunkLatentVarMeta { ))); } - let mut bins = Vec::with_capacity(n_bins); - while bins.len() < n_bins { - let batch_size = min(n_bins - bins.len(), FULL_BIN_BATCH_SIZE); - read_bin_batch::( - reader_builder, - ans_size_log, - batch_size, - &mut bins, - )?; - } + let bins = match_latent_enum!( + latent_type, + LatentType => { + let mut bins = Vec::with_capacity(n_bins); + while bins.len() < n_bins { + let batch_size = min(n_bins - bins.len(), FULL_BIN_BATCH_SIZE); + read_bin_batch::( + reader_builder, + ans_size_log, + batch_size, + &mut bins, + )?; + } - let bins = DynBins::new(bins).unwrap(); + DynBins::new(bins).unwrap() + } + ); Ok(Self { bins, ans_size_log }) } @@ -155,13 +167,11 @@ impl ChunkLatentVarMeta { } pub(crate) fn exact_page_meta_bit_size(&self, delta_encoding: DeltaEncoding) -> usize { - let bits_per_delta = match_latent_enum!( + let bits_per_latent = match_latent_enum!( &self.bins, - DynBins(_bins) => { - L::BITS - } + DynBins(_bins) => { L::BITS } ); self.ans_size_log as usize * ANS_INTERLEAVING - + bits_per_delta as usize * delta_encoding.n_latents_per_state() + + bits_per_latent as usize * delta_encoding.n_latents_per_state() } } diff --git a/pco/src/metadata/delta_encoding.rs b/pco/src/metadata/delta_encoding.rs index 7e839ea6..2308ed85 100644 --- a/pco/src/metadata/delta_encoding.rs +++ b/pco/src/metadata/delta_encoding.rs @@ -1,29 +1,253 @@ -/// How Pco does +use crate::bit_reader::BitReader; +use crate::bit_writer::BitWriter; +use crate::constants::{ + Bitlen, BITS_TO_ENCODE_DELTA_ENCODING_ORDER, BITS_TO_ENCODE_DELTA_ENCODING_VARIANT, + BITS_TO_ENCODE_LZ_DELTA_STATE_N_LOG, BITS_TO_ENCODE_LZ_DELTA_WINDOW_N_LOG, +}; +use crate::data_types::LatentType; +use crate::errors::{PcoError, PcoResult}; +use crate::metadata::delta_encoding::DeltaEncoding::*; +use crate::metadata::format_version::FormatVersion; +use crate::metadata::per_latent_var::LatentVarKey; +use std::io::Write; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct DeltaConsecutiveConfig { + /// The number of times consecutive deltas were taken. + /// For instance, 2nd order delta encoding is delta-of-deltas. + /// + /// This is always positive, between 1 and 7. + pub order: usize, + pub secondary_uses_delta: bool, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct DeltaLookbackConfig { + /// The log2 of the number of latents explicitly stored in page metadata + /// to prepopulate the lookback window. + pub state_n_log: Bitlen, + /// The log2 of the maximum possible lookback. + pub window_n_log: Bitlen, + pub secondary_uses_delta: bool, +} + +impl DeltaLookbackConfig { + pub(crate) fn state_n(&self) -> usize { + 1 << self.state_n_log + } + + pub(crate) fn window_n(&self) -> usize { + 1 << self.window_n_log + } +} + +/// How Pco did /// [delta encoding](https://en.wikipedia.org/wiki/Delta_encoding) on this /// chunk. /// /// Delta encoding optionally takes differences between nearby numbers, /// greatly reducing the entropy of the data distribution in some cases. /// This stage of processing happens after applying the -/// [`Mode`][crate::metadata::Mode]. +/// [`Mode`][crate::metadata::Mode] during compression. #[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[non_exhaustive] pub enum DeltaEncoding { /// No delta encoding; the values are encoded as-is. /// /// This is best if your data is in random order. None, - /// Encodes the differences between values (or differences between those, - /// etc.). + /// Encodes the differences between consecutive values (or differences + /// between those, etc.). + /// + /// This is best if your numbers have high variance overall, but adjacent + /// numbers are close in value, e.g. an arithmetic sequence. + Consecutive(DeltaConsecutiveConfig), + /// Encodes an extra "lookback" latent variable and the differences + /// `x[i] - x[i - lookback[i]]` between values. /// - /// This order is always positive, between 1 and 7. - Consecutive(usize), + /// This is best if your numbers have complex repeating patterns + /// beyond just adjacent elements. + /// It is in spirit similar to LZ77 compression, but only stores lookbacks + /// (AKA match offsets) and no match lengths. + Lookback(DeltaLookbackConfig), } impl DeltaEncoding { + unsafe fn read_from_pre_v3(reader: &mut BitReader) -> Self { + let order = reader.read_usize(BITS_TO_ENCODE_DELTA_ENCODING_ORDER); + match order { + 0 => None, + _ => Consecutive(DeltaConsecutiveConfig { + order, + secondary_uses_delta: false, + }), + } + } + + pub(crate) unsafe fn read_from( + version: &FormatVersion, + reader: &mut BitReader, + ) -> PcoResult { + if !version.supports_delta_variants() { + return Ok(Self::read_from_pre_v3(reader)); + } + + let delta_encoding_variant = reader.read_bitlen(BITS_TO_ENCODE_DELTA_ENCODING_VARIANT); + + let res = match delta_encoding_variant { + 0 => None, + 1 => { + let order = reader.read_usize(BITS_TO_ENCODE_DELTA_ENCODING_ORDER); + if order == 0 { + return Err(PcoError::corruption( + "Consecutive delta encoding order must not be 0", + )); + } else { + Consecutive(DeltaConsecutiveConfig { + order, + secondary_uses_delta: reader.read_bool(), + }) + } + } + 2 => { + let window_n_log = 1 + reader.read_bitlen(BITS_TO_ENCODE_LZ_DELTA_WINDOW_N_LOG); + let state_n_log = reader.read_bitlen(BITS_TO_ENCODE_LZ_DELTA_STATE_N_LOG); + if state_n_log > window_n_log { + return Err(PcoError::corruption(format!( + "LZ delta encoding state size log exceeded window size log: {} vs {}", + state_n_log, window_n_log + ))); + } + Lookback(DeltaLookbackConfig { + window_n_log, + state_n_log, + secondary_uses_delta: reader.read_bool(), + }) + } + value => { + return Err(PcoError::corruption(format!( + "unknown delta encoding value: {}", + value + ))) + } + }; + Ok(res) + } + + pub(crate) unsafe fn write_to(&self, writer: &mut BitWriter) { + let variant = match self { + None => 0, + Consecutive(_) => 1, + Lookback(_) => 2, + }; + writer.write_bitlen( + variant, + BITS_TO_ENCODE_DELTA_ENCODING_VARIANT, + ); + + match self { + None => (), + Consecutive(config) => { + writer.write_usize( + config.order, + BITS_TO_ENCODE_DELTA_ENCODING_ORDER, + ); + writer.write_bool(config.secondary_uses_delta); + } + Lookback(config) => { + writer.write_bitlen( + config.window_n_log - 1, + BITS_TO_ENCODE_LZ_DELTA_WINDOW_N_LOG, + ); + writer.write_bitlen( + config.state_n_log, + BITS_TO_ENCODE_LZ_DELTA_STATE_N_LOG, + ); + writer.write_bool(config.secondary_uses_delta); + } + } + } + + pub(crate) fn latent_type(&self) -> Option { + match self { + None | Consecutive(_) => Option::None, + Lookback(_) => Some(LatentType::U32), + } + } + + pub(crate) fn applies_to_latent_var(&self, key: LatentVarKey) -> bool { + match (self, key) { + // We never recursively delta encode. + (_, LatentVarKey::Delta) => false, + // We always apply the DeltaEncoding to the primary latents. + (_, LatentVarKey::Primary) => true, + (None, LatentVarKey::Secondary) => false, + (Consecutive(config), LatentVarKey::Secondary) => config.secondary_uses_delta, + (Lookback(config), LatentVarKey::Secondary) => config.secondary_uses_delta, + } + } + + pub(crate) fn for_latent_var(self, key: LatentVarKey) -> DeltaEncoding { + if self.applies_to_latent_var(key) { + self + } else { + None + } + } + pub(crate) fn n_latents_per_state(&self) -> usize { match self { - Self::None => 0, - Self::Consecutive(order) => *order, + None => 0, + Consecutive(config) => config.order, + Lookback(config) => 1 << config.state_n_log, } } + + pub(crate) fn exact_bit_size(&self) -> Bitlen { + let payload_bits = match self { + None => 0, + // For nontrivial encodings, we have a +1 bit for whether the + // secondary latent is delta-encoded or not. + Consecutive(_) => BITS_TO_ENCODE_DELTA_ENCODING_ORDER + 1, + Lookback(_) => BITS_TO_ENCODE_LZ_DELTA_WINDOW_N_LOG + BITS_TO_ENCODE_LZ_DELTA_STATE_N_LOG + 1, + }; + BITS_TO_ENCODE_DELTA_ENCODING_VARIANT + payload_bits + } +} + +#[cfg(test)] +mod tests { + use crate::bit_writer::BitWriter; + use crate::metadata::delta_encoding::{DeltaConsecutiveConfig, DeltaLookbackConfig}; + use crate::metadata::DeltaEncoding; + + fn check_bit_size(encoding: DeltaEncoding) { + let mut bytes = Vec::new(); + let mut writer = BitWriter::new(&mut bytes, 100); + unsafe { + encoding.write_to(&mut writer); + } + assert_eq!( + encoding.exact_bit_size() as usize, + writer.bit_idx(), + ); + } + + #[test] + fn test_bit_size() { + check_bit_size(DeltaEncoding::None); + check_bit_size(DeltaEncoding::Consecutive( + DeltaConsecutiveConfig { + order: 3, + secondary_uses_delta: false, + }, + )); + check_bit_size(DeltaEncoding::Lookback( + DeltaLookbackConfig { + window_n_log: 8, + state_n_log: 1, + secondary_uses_delta: true, + }, + )); + } } diff --git a/pco/src/metadata/dyn_latents.rs b/pco/src/metadata/dyn_latents.rs index 783bb3cc..67ecd608 100644 --- a/pco/src/metadata/dyn_latents.rs +++ b/pco/src/metadata/dyn_latents.rs @@ -10,6 +10,13 @@ define_latent_enum!( ); impl DynLatents { + pub(crate) fn len(&self) -> usize { + match_latent_enum!( + self, + DynLatents(inner) => { inner.len() } + ) + } + pub(crate) unsafe fn read_uncompressed_from( reader: &mut BitReader, len: usize, diff --git a/pco/src/metadata/format_version.rs b/pco/src/metadata/format_version.rs index 10c425af..43258f60 100644 --- a/pco/src/metadata/format_version.rs +++ b/pco/src/metadata/format_version.rs @@ -46,4 +46,8 @@ impl FormatVersion { pub(crate) fn used_old_gcds(&self) -> bool { self.0 == 0 } + + pub(crate) fn supports_delta_variants(&self) -> bool { + self.0 >= 3 + } } diff --git a/pco/src/metadata/mod.rs b/pco/src/metadata/mod.rs index 87e87072..74323a04 100644 --- a/pco/src/metadata/mod.rs +++ b/pco/src/metadata/mod.rs @@ -1,10 +1,12 @@ pub use bin::Bin; pub use chunk::ChunkMeta; pub use chunk_latent_var::ChunkLatentVarMeta; -pub use delta_encoding::DeltaEncoding; +pub use delta_encoding::{DeltaConsecutiveConfig, DeltaEncoding, DeltaLookbackConfig}; pub use dyn_bins::DynBins; pub use dyn_latent::DynLatent; +pub use dyn_latents::DynLatents; pub use mode::Mode; +pub use per_latent_var::{LatentVarKey, PerLatentVar}; pub(crate) mod bin; pub(crate) mod bins; @@ -18,3 +20,4 @@ pub(crate) mod format_version; pub(crate) mod mode; pub(crate) mod page; pub(crate) mod page_latent_var; +pub(crate) mod per_latent_var; diff --git a/pco/src/metadata/mode.rs b/pco/src/metadata/mode.rs index 7590c3d4..74ed51ce 100644 --- a/pco/src/metadata/mode.rs +++ b/pco/src/metadata/mode.rs @@ -1,9 +1,14 @@ -use std::fmt::Debug; - -use crate::constants::Bitlen; -use crate::data_types::Float; +use crate::bit_reader::BitReader; +use crate::bit_writer::BitWriter; +use crate::constants::{Bitlen, BITS_TO_ENCODE_MODE_VARIANT, BITS_TO_ENCODE_QUANTIZE_K}; +use crate::data_types::{Float, LatentType}; +use crate::errors::{PcoError, PcoResult}; +use crate::macros::match_latent_enum; use crate::metadata::dyn_latent::DynLatent; -use crate::metadata::DeltaEncoding; +use crate::metadata::format_version::FormatVersion; +use crate::metadata::Mode::*; +use std::fmt::Debug; +use std::io::Write; // Internally, here's how we should model each mode: // @@ -74,39 +79,125 @@ pub enum Mode { } impl Mode { - pub(crate) fn n_latent_vars(&self) -> usize { - use Mode::*; + pub(crate) unsafe fn read_from( + reader: &mut BitReader, + version: &FormatVersion, + latent_type: LatentType, + ) -> PcoResult { + let read_latent = |reader| { + match_latent_enum!( + latent_type, + LatentType => { + DynLatent::read_uncompressed_from::(reader) + } + ) + }; + + let mode = match reader.read_bitlen(BITS_TO_ENCODE_MODE_VARIANT) { + 0 => Classic, + 1 => { + if version.used_old_gcds() { + return Err(PcoError::compatibility( + "unable to decompress data from v0.0.0 of pco with different GCD encoding", + )); + } + + let base = read_latent(reader); + IntMult(base) + } + 2 => { + let base_latent = read_latent(reader); + FloatMult(base_latent) + } + 3 => { + let k = reader.read_bitlen(BITS_TO_ENCODE_QUANTIZE_K); + FloatQuant(k) + } + value => { + return Err(PcoError::corruption(format!( + "unknown mode value {}", + value + ))) + } + }; + Ok(mode) + } + pub(crate) unsafe fn write_to(&self, writer: &mut BitWriter) { + let mode_value = match self { + Classic => 0, + IntMult(_) => 1, + FloatMult { .. } => 2, + FloatQuant { .. } => 3, + }; + writer.write_bitlen(mode_value, BITS_TO_ENCODE_MODE_VARIANT); match self { - Classic => 1, - FloatMult(_) | IntMult(_) => 2, // multiplier, adjustment - FloatQuant(_) => 2, // quantums, adjustment - } + Classic => (), + IntMult(base) => { + base.write_uncompressed_to(writer); + } + FloatMult(base_latent) => { + base_latent.write_uncompressed_to(writer); + } + &FloatQuant(k) => { + writer.write_uint(k, BITS_TO_ENCODE_QUANTIZE_K); + } + }; } - pub(crate) fn delta_encoding_for_latent_var( - &self, - latent_var_idx: usize, - delta_encoding: DeltaEncoding, - ) -> DeltaEncoding { - use Mode::*; + pub(crate) fn primary_latent_type(&self, number_latent_type: LatentType) -> LatentType { + match self { + Classic | FloatMult(_) | FloatQuant(_) | IntMult(_) => number_latent_type, + } + } - match (self, latent_var_idx) { - // In all currently-available modes, the overall `delta_order` is really the delta-order of - // the first latent. - (Classic, 0) | (FloatMult(_), 0) | (FloatQuant(_), 0) | (IntMult(_), 0) => delta_encoding, - // In FloatMult, IntMult, and FloatQuant, the second latent is essentially a remainder or - // adjustment; there isn't any a priori reason that deltas should be useful for that kind of - // term and we do not attempt them. - (FloatMult(_), 1) | (IntMult(_), 1) | (FloatQuant(_), 1) => DeltaEncoding::None, - _ => unreachable!( - "unknown latent {:?}/{}", - self, latent_var_idx - ), + pub(crate) fn secondary_latent_type(&self, number_latent_type: LatentType) -> Option { + match self { + Classic => None, + FloatMult(_) | FloatQuant(_) | IntMult(_) => Some(number_latent_type), } } pub(crate) fn float_mult(base: F) -> Self { - Self::FloatMult(DynLatent::new(base.to_latent_ordered()).unwrap()) + FloatMult(DynLatent::new(base.to_latent_ordered()).unwrap()) + } + + pub(crate) fn exact_bit_size(&self) -> Bitlen { + let payload_bits = match self { + Classic => 0, + IntMult(base) | FloatMult(base) => base.bits(), + FloatQuant(_) => BITS_TO_ENCODE_QUANTIZE_K, + }; + BITS_TO_ENCODE_MODE_VARIANT + payload_bits + } +} + +#[cfg(test)] +mod tests { + use crate::bit_writer::BitWriter; + use crate::metadata::{DynLatent, Mode}; + + fn check_bit_size(mode: Mode) { + let mut bytes = Vec::new(); + let mut writer = BitWriter::new(&mut bytes, 100); + unsafe { + mode.write_to(&mut writer); + } + assert_eq!( + mode.exact_bit_size() as usize, + writer.bit_idx() + ); + } + + #[test] + fn test_bit_size() { + check_bit_size(Mode::Classic); + check_bit_size(Mode::IntMult( + DynLatent::new(77_u32).unwrap(), + )); + check_bit_size(Mode::FloatMult( + DynLatent::new(77_u32).unwrap(), + )); + check_bit_size(Mode::FloatQuant(7)); } } diff --git a/pco/src/metadata/page.rs b/pco/src/metadata/page.rs index eeff5776..9f7b39f0 100644 --- a/pco/src/metadata/page.rs +++ b/pco/src/metadata/page.rs @@ -3,9 +3,9 @@ use std::io::Write; use crate::bit_reader::BitReader; use crate::bit_writer::BitWriter; use crate::constants::Bitlen; -use crate::data_types::Latent; use crate::errors::PcoResult; use crate::metadata::page_latent_var::PageLatentVarMeta; +use crate::metadata::per_latent_var::{PerLatentVar, PerLatentVarBuilder}; use crate::metadata::ChunkMeta; // Data page metadata is slightly semantically different from chunk metadata, @@ -15,37 +15,45 @@ use crate::metadata::ChunkMeta; // (wrapped mode). #[derive(Clone, Debug)] pub struct PageMeta { - pub per_latent_var: Vec, + pub per_latent_var: PerLatentVar, } impl PageMeta { - pub unsafe fn write_to, W: Write>( + pub unsafe fn write_to( &self, - ans_size_logs: I, + ans_size_logs: PerLatentVar, writer: &mut BitWriter, ) { - for (latent_idx, ans_size_log) in ans_size_logs.enumerate() { - self.per_latent_var[latent_idx].write_to(ans_size_log, writer); + for (_, (ans_size_log, latent_var_meta)) in ans_size_logs + .zip_exact(self.per_latent_var.as_ref()) + .enumerated() + { + latent_var_meta.write_to(ans_size_log, writer); } writer.finish_byte(); } - pub unsafe fn read_from( - reader: &mut BitReader, - chunk_meta: &ChunkMeta, - ) -> PcoResult { - let mut per_latent_var = Vec::with_capacity(chunk_meta.per_latent_var.len()); - for (latent_idx, chunk_latent_var_meta) in chunk_meta.per_latent_var.iter().enumerate() { - per_latent_var.push(PageLatentVarMeta::read_from::( - reader, - chunk_meta - .delta_encoding_for_latent_var(latent_idx) - .n_latents_per_state(), - chunk_latent_var_meta.ans_size_log, - )?); + pub unsafe fn read_from(reader: &mut BitReader, chunk_meta: &ChunkMeta) -> PcoResult { + let mut per_latent_var_builder = PerLatentVarBuilder::default(); + for (key, chunk_latent_var_meta) in chunk_meta.per_latent_var.as_ref().enumerated() { + let n_latents_per_state = chunk_meta + .delta_encoding + .for_latent_var(key) + .n_latents_per_state(); + per_latent_var_builder.set( + key, + PageLatentVarMeta::read_from( + reader, + chunk_latent_var_meta.latent_type(), + n_latents_per_state, + chunk_latent_var_meta.ans_size_log, + ), + ) } reader.drain_empty_byte("non-zero bits at end of data page metadata")?; - Ok(Self { per_latent_var }) + Ok(Self { + per_latent_var: per_latent_var_builder.into(), + }) } } diff --git a/pco/src/metadata/page_latent_var.rs b/pco/src/metadata/page_latent_var.rs index dfb3dfa7..2813b1c7 100644 --- a/pco/src/metadata/page_latent_var.rs +++ b/pco/src/metadata/page_latent_var.rs @@ -2,20 +2,21 @@ use crate::ans::AnsState; use crate::bit_reader::BitReader; use crate::bit_writer::BitWriter; use crate::constants::{Bitlen, ANS_INTERLEAVING}; -use crate::data_types::Latent; -use crate::errors::PcoResult; +use crate::data_types::LatentType; +use crate::delta::DeltaState; +use crate::macros::match_latent_enum; use crate::metadata::dyn_latents::DynLatents; use std::io::Write; #[derive(Clone, Debug)] pub struct PageLatentVarMeta { - pub delta_moments: DynLatents, + pub delta_state: DeltaState, pub ans_final_state_idxs: [AnsState; ANS_INTERLEAVING], } impl PageLatentVarMeta { pub unsafe fn write_to(&self, ans_size_log: Bitlen, writer: &mut BitWriter) { - self.delta_moments.write_uncompressed_to(writer); + self.delta_state.write_uncompressed_to(writer); // write the final ANS state, moving it down the range [0, table_size) for state_idx in self.ans_final_state_idxs { @@ -23,19 +24,25 @@ impl PageLatentVarMeta { } } - pub unsafe fn read_from( + pub unsafe fn read_from( reader: &mut BitReader, + latent_type: LatentType, n_latents_per_delta_state: usize, ans_size_log: Bitlen, - ) -> PcoResult { - let delta_moments = DynLatents::read_uncompressed_from::(reader, n_latents_per_delta_state); + ) -> Self { + let delta_state = match_latent_enum!( + latent_type, + LatentType => { + DynLatents::read_uncompressed_from::(reader, n_latents_per_delta_state) + } + ); let mut ans_final_state_idxs = [0; ANS_INTERLEAVING]; for state in &mut ans_final_state_idxs { *state = reader.read_uint::(ans_size_log); } - Ok(Self { - delta_moments, + Self { + delta_state, ans_final_state_idxs, - }) + } } } diff --git a/pco/src/metadata/per_latent_var.rs b/pco/src/metadata/per_latent_var.rs new file mode 100644 index 00000000..e15b0f30 --- /dev/null +++ b/pco/src/metadata/per_latent_var.rs @@ -0,0 +1,149 @@ +use std::fmt::Debug; +use std::iter::Sum; + +/// The possible kinds of latent variables present in a chunk. +/// +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum LatentVarKey { + /// Used by certain types of + /// [delta encodings][crate::metadata::DeltaEncoding]. E.g. lookback delta + /// encoding uses this to store lookbacks. + Delta, + /// The only required latent variable, used by + /// [modes][crate::metadata::Mode] to represent number values. + /// + /// Always has the same precision as the encoded numbers. + Primary, + /// An optional additional latent variable, used by certain + /// [modes][crate::metadata::Mode] to represent number values. + Secondary, +} + +/// A generic container holding a value for each applicable latent variable. +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub struct PerLatentVar { + pub delta: Option, + pub primary: T, + pub secondary: Option, +} + +#[derive(Clone, Debug)] +pub(crate) struct PerLatentVarBuilder { + pub delta: Option, + pub primary: Option, + pub secondary: Option, +} + +impl Default for PerLatentVarBuilder { + fn default() -> Self { + Self { + delta: None, + primary: None, + secondary: None, + } + } +} + +impl PerLatentVarBuilder { + pub fn set(&mut self, key: LatentVarKey, value: T) { + match key { + LatentVarKey::Delta => self.delta = Some(value), + LatentVarKey::Primary => self.primary = Some(value), + LatentVarKey::Secondary => self.secondary = Some(value), + } + } +} + +impl From> for PerLatentVar { + fn from(value: PerLatentVarBuilder) -> Self { + PerLatentVar { + delta: value.delta, + primary: value.primary.unwrap(), + secondary: value.secondary, + } + } +} + +impl PerLatentVar { + pub(crate) fn map S>(self, f: F) -> PerLatentVar { + PerLatentVar { + delta: self.delta.map(|delta| f(LatentVarKey::Delta, delta)), + primary: f(LatentVarKey::Primary, self.primary), + secondary: self + .secondary + .map(|secondary| f(LatentVarKey::Secondary, secondary)), + } + } + + /// Returns a new `PerLatentVar` where each entry has been wrapped in a + /// reference. + pub fn as_ref(&self) -> PerLatentVar<&T> { + PerLatentVar { + delta: self.delta.as_ref(), + primary: &self.primary, + secondary: self.secondary.as_ref(), + } + } + + pub(crate) fn as_mut(&mut self) -> PerLatentVar<&mut T> { + PerLatentVar { + delta: self.delta.as_mut(), + primary: &mut self.primary, + secondary: self.secondary.as_mut(), + } + } + + pub(crate) fn get(&self, key: LatentVarKey) -> Option<&T> { + match key { + LatentVarKey::Delta => self.delta.as_ref(), + LatentVarKey::Primary => Some(&self.primary), + LatentVarKey::Secondary => self.secondary.as_ref(), + } + } + + /// Zips each element of this `PerLatentVar` with each element of the other. + /// + /// Will panic if either one has a latent variable that the other does not. + pub fn zip_exact(self, other: PerLatentVar) -> PerLatentVar<(T, S)> { + let zip_option = |a: Option, b: Option| match (a, b) { + (Some(a), Some(b)) => Some((a, b)), + (None, None) => None, + _ => panic!("expected values of left and right sides to match"), + }; + + PerLatentVar { + delta: zip_option(self.delta, other.delta), + primary: (self.primary, other.primary), + secondary: zip_option(self.secondary, other.secondary), + } + } + + /// Returns a vector of the defined `LatentVarKey`s and values, in order + /// of appearance in the file. + pub fn enumerated(self) -> Vec<(LatentVarKey, T)> { + let mut res = Vec::with_capacity(3); + if let Some(value) = self.delta { + res.push((LatentVarKey::Delta, value)); + } + res.push((LatentVarKey::Primary, self.primary)); + if let Some(value) = self.secondary { + res.push((LatentVarKey::Secondary, value)); + } + res + } + + pub(crate) fn sum(self) -> T + where + T: Sum, + { + let mut values = Vec::with_capacity(3); + if let Some(value) = self.delta { + values.push(value); + } + values.push(self.primary); + if let Some(value) = self.secondary { + values.push(value); + } + T::sum(values.into_iter()) + } +} diff --git a/pco/src/split_latents.rs b/pco/src/split_latents.rs new file mode 100644 index 00000000..a3352e4b --- /dev/null +++ b/pco/src/split_latents.rs @@ -0,0 +1,7 @@ +use crate::metadata::DynLatents; + +#[derive(Clone, Debug)] +pub struct SplitLatents { + pub primary: DynLatents, + pub secondary: Option, +} diff --git a/pco/src/standalone/compressor.rs b/pco/src/standalone/compressor.rs index c0bcfbec..4f68edfc 100644 --- a/pco/src/standalone/compressor.rs +++ b/pco/src/standalone/compressor.rs @@ -2,7 +2,7 @@ use std::io::Write; use crate::bit_writer::BitWriter; use crate::chunk_config::PagingSpec; -use crate::data_types::{Latent, Number}; +use crate::data_types::Number; use crate::errors::PcoResult; use crate::metadata::ChunkMeta; use crate::standalone::constants::*; @@ -83,7 +83,7 @@ impl FileCompressor { &self, nums: &[T], config: &ChunkConfig, - ) -> PcoResult> { + ) -> PcoResult { let mut config = config.clone(); config.paging_spec = PagingSpec::Exact(vec![nums.len()]); @@ -106,12 +106,12 @@ impl FileCompressor { /// Holds metadata about a chunk and supports compression. #[derive(Clone, Debug)] -pub struct ChunkCompressor { - inner: wrapped::ChunkCompressor, +pub struct ChunkCompressor { + inner: wrapped::ChunkCompressor, number_type_byte: u8, } -impl ChunkCompressor { +impl ChunkCompressor { /// Returns pre-computed information about the chunk. pub fn meta(&self) -> &ChunkMeta { self.inner.meta() diff --git a/pco/src/tests/compatibility.rs b/pco/src/tests/compatibility.rs index 078f9f00..f2b51f56 100644 --- a/pco/src/tests/compatibility.rs +++ b/pco/src/tests/compatibility.rs @@ -1,11 +1,11 @@ +use crate::data_types::Number; +use crate::errors::PcoResult; +use crate::{standalone, ChunkConfig, DeltaSpec, ModeSpec}; +use half::f16; use std::fs; use std::path::PathBuf; use std::str::FromStr; -use crate::data_types::Number; -use crate::errors::PcoResult; -use crate::{standalone, ChunkConfig}; - fn get_asset_dir() -> PathBuf { PathBuf::from_str(env!("CARGO_MANIFEST_DIR")) .unwrap() @@ -66,122 +66,131 @@ fn simple_write_if_version_matches( Ok(()) } -#[cfg(test)] -mod tests { - use crate::chunk_config::DeltaSpec; - use crate::errors::PcoResult; - use crate::tests::compatibility::{assert_compatible, simple_write_if_version_matches}; - use crate::{ChunkConfig, ModeSpec}; - use half::f16; - - #[test] - fn v0_0_0_classic() -> PcoResult<()> { - let version = "0.0.0"; - let name = "classic"; - let nums = (0_i32..1000).chain(2000..3000).collect::>(); - let config = ChunkConfig { - delta_spec: DeltaSpec::None, - ..Default::default() - }; - simple_write_if_version_matches(version, name, &nums, &config)?; - assert_compatible(version, name, &nums)?; - Ok(()) - } +#[test] +fn v0_0_0_classic() -> PcoResult<()> { + let version = "0.0.0"; + let name = "classic"; + let nums = (0_i32..1000).chain(2000..3000).collect::>(); + let config = ChunkConfig { + delta_spec: DeltaSpec::None, + ..Default::default() + }; + simple_write_if_version_matches(version, name, &nums, &config)?; + assert_compatible(version, name, &nums)?; + Ok(()) +} - #[test] - fn v0_0_0_delta_float_mult() -> PcoResult<()> { - let version = "0.0.0"; - let name = "delta_float_mult"; - let mut nums = (0..2000).map(|i| i as f32).collect::>(); - nums[1337] += 1.001; - let config = ChunkConfig { - delta_spec: DeltaSpec::TryConsecutive(1), - ..Default::default() - }; - simple_write_if_version_matches(version, name, &nums, &config)?; - assert_compatible(version, name, &nums)?; - Ok(()) - } +#[test] +fn v0_0_0_delta_float_mult() -> PcoResult<()> { + let version = "0.0.0"; + let name = "delta_float_mult"; + let mut nums = (0..2000).map(|i| i as f32).collect::>(); + nums[1337] += 1.001; + let config = ChunkConfig { + delta_spec: DeltaSpec::TryConsecutive(1), + ..Default::default() + }; + simple_write_if_version_matches(version, name, &nums, &config)?; + assert_compatible(version, name, &nums)?; + Ok(()) +} - #[test] - fn v0_1_0_delta_int_mult() -> PcoResult<()> { - // starting at 0.1.0 because 0.0.0 had GCD mode (no longer supported) - // instead of int mult - let version = "0.1.0"; - let name = "delta_int_mult"; - let mut nums = (0..2000).map(|i| i * 1000).collect::>(); - nums[1337] -= 1; - let config = ChunkConfig { - delta_spec: DeltaSpec::TryConsecutive(1), - ..Default::default() - }; - simple_write_if_version_matches(version, name, &nums, &config)?; - assert_compatible(version, name, &nums)?; - Ok(()) - } +#[test] +fn v0_1_0_delta_int_mult() -> PcoResult<()> { + // starting at 0.1.0 because 0.0.0 had GCD mode (no longer supported) + // instead of int mult + let version = "0.1.0"; + let name = "delta_int_mult"; + let mut nums = (0..2000).map(|i| i * 1000).collect::>(); + nums[1337] -= 1; + let config = ChunkConfig { + delta_spec: DeltaSpec::TryConsecutive(1), + ..Default::default() + }; + simple_write_if_version_matches(version, name, &nums, &config)?; + assert_compatible(version, name, &nums)?; + Ok(()) +} - #[test] - fn v0_1_1_classic() -> PcoResult<()> { - // v0.1.1 introduced standalone versioning, separate from wrapped versioning - let version = "0.1.1"; - let name = "standalone_versioned"; - let nums = vec![]; - let config = ChunkConfig::default(); - simple_write_if_version_matches::(version, name, &nums, &config)?; - assert_compatible(version, name, &nums)?; - Ok(()) - } +#[test] +fn v0_1_1_classic() -> PcoResult<()> { + // v0.1.1 introduced standalone versioning, separate from wrapped versioning + let version = "0.1.1"; + let name = "standalone_versioned"; + let nums = vec![]; + let config = ChunkConfig::default(); + simple_write_if_version_matches::(version, name, &nums, &config)?; + assert_compatible(version, name, &nums)?; + Ok(()) +} - fn generate_pseudorandom_f16s() -> Vec { - // makes a variety of floats approximately uniformly distributed - // between (-2.0, -1.0] U [1.0, 2.0) - let mut num = 0.1_f32; - let mut nums = vec![]; - for _ in 0..2000 { - num = ((num * 77.7) + 0.1) % 2.0; - if num < 1.0 { - nums.push(f16::from_f32(-1.0 - num)); - } else { - nums.push(f16::from_f32(num)); - } +fn generate_pseudorandom_f16s() -> Vec { + // makes a variety of floats approximately uniformly distributed + // between (-2.0, -1.0] U [1.0, 2.0) + let mut num = 0.1_f32; + let mut nums = vec![]; + for _ in 0..2000 { + num = ((num * 77.7) + 0.1) % 2.0; + if num < 1.0 { + nums.push(f16::from_f32(-1.0 - num)); + } else { + nums.push(f16::from_f32(num)); } - nums } + nums +} - #[test] - fn v0_3_0_f16() -> PcoResult<()> { - // v0.3.0 introduced 16-bit data types, including f16, which requires the - // half crate - let version = "0.3.0"; - let name = "f16"; - let config = ChunkConfig::default(); - let nums = generate_pseudorandom_f16s(); - simple_write_if_version_matches::(version, name, &nums, &config)?; - assert_compatible(version, name, &nums)?; - Ok(()) - } +#[test] +fn v0_3_0_f16() -> PcoResult<()> { + // v0.3.0 introduced 16-bit data types, including f16, which requires the + // half crate + let version = "0.3.0"; + let name = "f16"; + let config = ChunkConfig::default(); + let nums = generate_pseudorandom_f16s(); + simple_write_if_version_matches::(version, name, &nums, &config)?; + assert_compatible(version, name, &nums)?; + Ok(()) +} - #[test] - fn v0_3_0_float_quant() -> PcoResult<()> { - // v0.3.0 introduced float quantization mode - let version = "0.3.0"; - let name = "float_quant"; - let nums = generate_pseudorandom_f16s() - .into_iter() - .map(|x| { - let x = x.to_f32(); - if x.abs() < 1.1 { - f32::from_bits(x.to_bits() + 1) - } else { - x - } - }) - .collect::>(); - let config = ChunkConfig::default().with_mode_spec(ModeSpec::TryFloatQuant( - f32::MANTISSA_DIGITS - f16::MANTISSA_DIGITS, - )); - simple_write_if_version_matches::(version, name, &nums, &config)?; - assert_compatible(version, name, &nums)?; - Ok(()) - } +#[test] +fn v0_3_0_float_quant() -> PcoResult<()> { + // v0.3.0 introduced float quantization mode + let version = "0.3.0"; + let name = "float_quant"; + let nums = generate_pseudorandom_f16s() + .into_iter() + .map(|x| { + let x = x.to_f32(); + if x.abs() < 1.1 { + f32::from_bits(x.to_bits() + 1) + } else { + x + } + }) + .collect::>(); + let config = ChunkConfig::default().with_mode_spec(ModeSpec::TryFloatQuant( + f32::MANTISSA_DIGITS - f16::MANTISSA_DIGITS, + )); + simple_write_if_version_matches::(version, name, &nums, &config)?; + assert_compatible(version, name, &nums)?; + Ok(()) +} + +#[test] +fn v0_4_0_lookback_delta() -> PcoResult<()> { + // v0.4.0 introduced lookback delta encoding + let version = "0.4.0"; + let name = "lookback_delta"; + + // randomly generated ahead of time + let nums: Vec = vec![ + 1121827092, 729032807, 3968137854, 2875434067, 3775328080, 431649926, 1048116090, 1906978350, + 14752788, 1180462487, + ] + .repeat(100); + let config = ChunkConfig::default().with_delta_spec(DeltaSpec::TryLookback); + simple_write_if_version_matches(version, name, &nums, &config)?; + assert_compatible(version, name, &nums)?; + Ok(()) } diff --git a/pco/src/tests/recovery.rs b/pco/src/tests/recovery.rs index 24ef80ff..f6f73d61 100644 --- a/pco/src/tests/recovery.rs +++ b/pco/src/tests/recovery.rs @@ -6,7 +6,7 @@ use crate::chunk_config::{ChunkConfig, DeltaSpec}; use crate::constants::Bitlen; use crate::data_types::Number; use crate::errors::PcoResult; -use crate::metadata::{ChunkMeta, DynLatent, Mode}; +use crate::metadata::{ChunkMeta, DeltaEncoding, DynLatent, Mode}; use crate::standalone::{simple_compress, simple_decompress, FileCompressor}; use crate::ModeSpec; @@ -241,8 +241,9 @@ fn recover_with_alternating_nums(offset_bits: Bitlen, name: &str) -> PcoResult<( ..Default::default() }, )?; - assert_eq!(meta.per_latent_var.len(), 1); - let latent_var = &meta.per_latent_var[0]; + assert!(meta.per_latent_var.delta.is_none()); + assert!(meta.per_latent_var.secondary.is_none()); + let latent_var = &meta.per_latent_var.primary; let bins = latent_var.bins.downcast_ref::().unwrap(); assert_eq!(bins.len(), 1); assert_eq!(bins[0].offset_bits, offset_bits); @@ -356,3 +357,22 @@ fn test_trivial_first_latent_var() -> PcoResult<()> { assert_nums_eq(&decompressed, &nums, "trivial_first_latent")?; Ok(()) } + +#[test] +fn test_lookback_delta_encoding() -> PcoResult<()> { + let mut nums = Vec::new(); + for i in 0..100 { + nums.push(i % 9); + } + let (compressed, meta) = compress_w_meta( + &nums, + &ChunkConfig::default().with_delta_spec(DeltaSpec::TryLookback), + )?; + assert!(matches!( + meta.delta_encoding, + DeltaEncoding::Lookback(_) + )); + let decompressed = simple_decompress(&compressed)?; + assert_nums_eq(&decompressed, &nums, "trivial_first_latent")?; + Ok(()) +} diff --git a/pco/src/tests/stability.rs b/pco/src/tests/stability.rs index 3bd19a40..a0feba12 100644 --- a/pco/src/tests/stability.rs +++ b/pco/src/tests/stability.rs @@ -44,9 +44,12 @@ fn test_insufficient_data_short_bins() -> PcoResult<()> { } let meta = assert_panic_safe(nums)?; - assert_eq!(meta.per_latent_var.len(), 1); + assert!(meta.per_latent_var.delta.is_none()); + assert!(meta.per_latent_var.secondary.is_none()); assert_eq!( - meta.per_latent_var[0] + meta + .per_latent_var + .primary .bins .downcast_ref::() .unwrap() @@ -64,9 +67,12 @@ fn test_insufficient_data_sparse() -> PcoResult<()> { } let meta = assert_panic_safe(nums)?; - assert_eq!(meta.per_latent_var.len(), 1); + assert!(meta.per_latent_var.delta.is_none()); + assert!(meta.per_latent_var.secondary.is_none()); assert_eq!( - meta.per_latent_var[0] + meta + .per_latent_var + .primary .bins .downcast_ref::() .unwrap() @@ -85,8 +91,14 @@ fn test_insufficient_data_long_offsets() -> PcoResult<()> { } let meta = assert_panic_safe(nums)?; - let bins = meta.per_latent_var[0].bins.downcast_ref::().unwrap(); - assert_eq!(meta.per_latent_var.len(), 1); + let bins = meta + .per_latent_var + .primary + .bins + .downcast_ref::() + .unwrap(); + assert!(meta.per_latent_var.delta.is_none()); + assert!(meta.per_latent_var.secondary.is_none()); assert_eq!(bins.len(), 1); assert_eq!(bins[0].offset_bits, 64); Ok(()) diff --git a/pco/src/wrapped/chunk_compressor.rs b/pco/src/wrapped/chunk_compressor.rs index f3939bae..9ce5557d 100644 --- a/pco/src/wrapped/chunk_compressor.rs +++ b/pco/src/wrapped/chunk_compressor.rs @@ -1,34 +1,57 @@ -use std::cmp::min; -use std::io::Write; - use crate::bit_writer::BitWriter; use crate::chunk_config::DeltaSpec; -use crate::compression_intermediates::BinCompressionInfo; +use crate::compression_intermediates::{BinCompressionInfo, PageInfoVar}; use crate::compression_intermediates::{DissectedPage, PageInfo}; use crate::constants::{ - Bitlen, Weight, ANS_INTERLEAVING, LIMITED_UNOPTIMIZED_BINS_LOG, MAX_COMPRESSION_LEVEL, - MAX_DELTA_ENCODING_ORDER, MAX_ENTRIES, OVERSHOOT_PADDING, PAGE_PADDING, + Bitlen, Weight, LIMITED_UNOPTIMIZED_BINS_LOG, MAX_COMPRESSION_LEVEL, MAX_DELTA_ENCODING_ORDER, + MAX_ENTRIES, OVERSHOOT_PADDING, PAGE_PADDING, }; -use crate::data_types::{Latent, Number}; -use crate::delta::DeltaMoments; +use crate::data_types::{Latent, LatentType, Number}; +use crate::delta::DeltaState; use crate::errors::{PcoError, PcoResult}; use crate::histograms::histogram; -use crate::latent_chunk_compressor::{LatentChunkCompressor, TrainedBins}; +use crate::latent_chunk_compressor::{ + DynLatentChunkCompressor, LatentChunkCompressor, TrainedBins, +}; use crate::macros::match_latent_enum; use crate::metadata::chunk_latent_var::ChunkLatentVarMeta; +use crate::metadata::delta_encoding::{DeltaConsecutiveConfig, DeltaLookbackConfig}; use crate::metadata::dyn_bins::DynBins; use crate::metadata::dyn_latents::DynLatents; use crate::metadata::page::PageMeta; use crate::metadata::page_latent_var::PageLatentVarMeta; +use crate::metadata::per_latent_var::{LatentVarKey, PerLatentVar, PerLatentVarBuilder}; use crate::metadata::{Bin, ChunkMeta, DeltaEncoding, Mode}; +use crate::split_latents::SplitLatents; use crate::wrapped::guarantee; -use crate::{ans, bin_optimization, data_types, delta, ChunkConfig, PagingSpec, FULL_BATCH_N}; +use crate::{ + ans, bin_optimization, bits, data_types, delta, ChunkConfig, PagingSpec, FULL_BATCH_N, +}; +use std::cmp::min; +use std::io::Write; // if it looks like the average page of size n will use k bits, hint that it // will be PAGE_SIZE_OVERESTIMATION * k bits. const PAGE_SIZE_OVERESTIMATION: f64 = 1.2; const N_PER_EXTRA_DELTA_GROUP: usize = 10000; const DELTA_GROUP_SIZE: usize = 200; +const LOOKBACK_MAX_WINDOW_N_LOG: Bitlen = 15; +const LOOKBACK_MIN_WINDOW_N_LOG: Bitlen = 4; +const LOOKBACK_REQUIRED_BYTE_SAVINGS_PER_N: f64 = 0.25; + +// TODO taking deltas of secondary latents has been proven to help slightly +// in some cases, so we should consider it in the future + +fn new_lz_delta_encoding(n: usize) -> DeltaEncoding { + DeltaEncoding::Lookback(DeltaLookbackConfig { + window_n_log: bits::bits_to_encode_offset(n as u32 - 1).clamp( + LOOKBACK_MIN_WINDOW_N_LOG, + LOOKBACK_MAX_WINDOW_N_LOG, + ), + state_n_log: 0, + secondary_uses_delta: false, + }) +} // returns table size log fn quantize_weights( @@ -96,14 +119,10 @@ fn train_infos( /// Holds metadata about a chunk and supports compression. #[derive(Clone, Debug)] -pub struct ChunkCompressor { +pub struct ChunkCompressor { meta: ChunkMeta, - latent_chunk_compressors: Vec>, + latent_chunk_compressors: PerLatentVar, page_infos: Vec, - // n_latent_vars x n_deltas - deltas: Vec>, - // n_pages x n_latent_vars - delta_moments: Vec>>, } fn bins_from_compression_infos(infos: &[BinCompressionInfo]) -> Vec> { @@ -147,153 +166,207 @@ fn validate_chunk_size(n: usize) -> PcoResult<()> { Ok(()) } -#[inline(never)] -fn collect_contiguous_deltas( - deltas: &[L], +fn collect_contiguous_latents( + latents: &[L], page_infos: &[PageInfo], - latent_idx: usize, + latent_var_key: LatentVarKey, ) -> Vec { - let mut res = Vec::with_capacity(deltas.len()); + let mut res = Vec::with_capacity(latents.len()); for page in page_infos { - res.extend(&deltas[page.start_idx..page.end_idx_per_var[latent_idx]]); + let range = page.range_for_latent_var(latent_var_key); + res.extend(&latents[range]); } res } -fn build_page_infos_and_delta_moments( - mode: Mode, +fn delta_encode_and_build_page_infos( delta_encoding: DeltaEncoding, n_per_page: &[usize], - latents: &mut [Vec], - // TODO put delta state into page info -) -> (Vec, Vec>>) { + latents: SplitLatents, +) -> (PerLatentVar, Vec) { + let n = latents.primary.len(); + let mut latents = PerLatentVar { + delta: None, + primary: latents.primary, + secondary: latents.secondary, + }; let n_pages = n_per_page.len(); let mut page_infos = Vec::with_capacity(n_pages); - let mut delta_moments = vec![Vec::new(); n_pages]; // delta encoding let mut start_idx = 0; - for (&page_n, delta_moments) in n_per_page.iter().zip(delta_moments.iter_mut()) { - let mut end_idx_per_var = Vec::new(); - for (latent_var_idx, latents) in latents.iter_mut().enumerate() { - let var_delta_encoding = mode.delta_encoding_for_latent_var(latent_var_idx, delta_encoding); - - let moments = match var_delta_encoding { - DeltaEncoding::None => DeltaMoments::default(), - DeltaEncoding::Consecutive(order) => delta::encode_in_place( - &mut latents[start_idx..start_idx + page_n], - order, - ), - }; - delta_moments.push(moments); - end_idx_per_var - .push(start_idx + page_n.saturating_sub(var_delta_encoding.n_latents_per_state())); + let mut delta_latents = delta_encoding.latent_type().map(|ltype| { + match_latent_enum!( + ltype, + LatentType => { DynLatents::new(Vec::::with_capacity(n)).unwrap() } + ) + }); + for &page_n in n_per_page { + let end_idx = start_idx + page_n; + + let page_delta_latents = delta::compute_delta_latent_var( + delta_encoding, + &mut latents.primary, + start_idx..end_idx, + ); + + let mut per_latent_var = latents.as_mut().map(|key, var_latents| { + let encoding_for_var = delta_encoding.for_latent_var(key); + let delta_state = delta::encode_in_place( + encoding_for_var, + page_delta_latents.as_ref(), + start_idx..end_idx, + var_latents, + ); + // delta encoding in place leaves junk in the first n_latents_per_state + let stored_start_idx = min( + start_idx + encoding_for_var.n_latents_per_state(), + end_idx, + ); + let range = stored_start_idx..end_idx; + PageInfoVar { delta_state, range } + }); + + if let Some(delta_latents) = delta_latents.as_mut() { + match_latent_enum!( + delta_latents, + DynLatents(delta_latents) => { + let page_delta_latents = page_delta_latents.unwrap().downcast::().unwrap(); + let delta_state = DeltaState::new(Vec::::new()).unwrap(); + let range = delta_latents.len()..delta_latents.len() + page_delta_latents.len(); + per_latent_var.delta = Some(PageInfoVar { delta_state, range }); + delta_latents.extend(&page_delta_latents); + } + ) } + page_infos.push(PageInfo { page_n, - start_idx, - end_idx_per_var, + per_latent_var, }); - start_idx += page_n; + start_idx = end_idx; } + latents.delta = delta_latents; - (page_infos, delta_moments) + (latents, page_infos) } -fn new_candidate_w_split_and_delta_encoding( - mut latents: Vec>, // start out plain, gets delta encoded in place +fn new_candidate_w_split_and_delta_encoding( + latents: SplitLatents, // start out plain, gets delta encoded in place paging_spec: &PagingSpec, mode: Mode, delta_encoding: DeltaEncoding, unoptimized_bins_log: Bitlen, -) -> PcoResult<(ChunkCompressor, Vec>)> { - let chunk_n = latents[0].len(); +) -> PcoResult<(ChunkCompressor, PerLatentVar>)> { + let chunk_n = latents.primary.len(); let n_per_page = paging_spec.n_per_page(chunk_n)?; - let n_latent_vars = mode.n_latent_vars(); - let (page_infos, delta_moments) = build_page_infos_and_delta_moments( - mode, - delta_encoding, - &n_per_page, - &mut latents, - ); - let deltas = latents; + // delta encoding + let (latents, page_infos) = + delta_encode_and_build_page_infos(delta_encoding, &n_per_page, latents); // training bins - let mut var_metas = Vec::with_capacity(n_latent_vars); - let mut latent_chunk_compressors = Vec::with_capacity(n_latent_vars); - let mut bin_counts = Vec::with_capacity(n_latent_vars); - for (latent_idx, deltas) in deltas.iter().enumerate() { - // secondary latents should be compressed faster - let unoptimized_bins_log = if latent_idx == 0 { - unoptimized_bins_log - } else { - min( + let mut var_metas = PerLatentVarBuilder::default(); + let mut latent_chunk_compressors = PerLatentVarBuilder::default(); + let mut bin_countss = PerLatentVarBuilder::default(); + for (key, latents) in latents.enumerated() { + let unoptimized_bins_log = match key { + // primary latents are generally the most important to compress, and + // delta latents typically have a small number of discrete values, so + // aren't slow to optimize anyway + LatentVarKey::Delta | LatentVarKey::Primary => unoptimized_bins_log, + // secondary latents should be compressed faster + LatentVarKey::Secondary => min( unoptimized_bins_log, LIMITED_UNOPTIMIZED_BINS_LOG, - ) + ), }; - let contiguous_deltas = collect_contiguous_deltas(deltas, &page_infos, latent_idx); - let trained = train_infos(contiguous_deltas, unoptimized_bins_log)?; - let bins = bins_from_compression_infos(&trained.infos); - - let ans_size_log = trained.ans_size_log; - bin_counts.push(trained.counts.to_vec()); - latent_chunk_compressors.push(LatentChunkCompressor::new(trained, &bins)?); - let latent_meta = ChunkLatentVarMeta { - bins: DynBins::new(bins).unwrap(), - ans_size_log, - }; - var_metas.push(latent_meta); + match_latent_enum!( + latents, + DynLatents(latents) => { + let contiguous_deltas = collect_contiguous_latents(&latents, &page_infos, key); + let trained = train_infos(contiguous_deltas, unoptimized_bins_log)?; + + let bins = bins_from_compression_infos(&trained.infos); + + let ans_size_log = trained.ans_size_log; + let bin_counts = trained.counts.to_vec(); + let lcc = DynLatentChunkCompressor::new( + LatentChunkCompressor::new(trained, &bins, latents)? + ).unwrap(); + let var_meta = ChunkLatentVarMeta { + bins: DynBins::new(bins).unwrap(), + ans_size_log, + }; + var_metas.set(key, var_meta); + latent_chunk_compressors.set(key, lcc); + bin_countss.set(key, bin_counts); + } + ) } - let meta = ChunkMeta::new(mode, delta_encoding, var_metas); + let var_metas = var_metas.into(); + let latent_chunk_compressors = latent_chunk_compressors.into(); + let bin_countss = bin_countss.into(); + + let meta = ChunkMeta { + mode, + delta_encoding, + per_latent_var: var_metas, + }; let chunk_compressor = ChunkCompressor { meta, latent_chunk_compressors, page_infos, - deltas, - delta_moments, }; - Ok((chunk_compressor, bin_counts)) + Ok((chunk_compressor, bin_countss)) } -fn choose_delta_sample( - primary_latents: &[L], +fn choose_delta_sample( + primary_latents: &DynLatents, group_size: usize, n_extra_groups: usize, -) -> Vec { +) -> DynLatents { let n = primary_latents.len(); let nominal_sample_size = (n_extra_groups + 1) * group_size; - let mut sample = Vec::with_capacity(nominal_sample_size); let group_padding = if n_extra_groups == 0 { 0 } else { n.saturating_sub(nominal_sample_size) / n_extra_groups }; - sample.extend(primary_latents.iter().take(group_size)); let mut i = group_size; - for _ in 0..n_extra_groups { - i += group_padding; - sample.extend(primary_latents.iter().skip(i).take(group_size)); - i += group_size; - } - sample + match_latent_enum!( + primary_latents, + DynLatents(primary_latents) => { + let mut sample = Vec::::with_capacity(nominal_sample_size); + sample.extend(primary_latents.iter().take(group_size)); + for _ in 0..n_extra_groups { + i += group_padding; + sample.extend(primary_latents.iter().skip(i).take(group_size)); + i += group_size; + } + DynLatents::new(sample).unwrap() + } + ) } -fn calculate_compressed_sample_size( - sample: &[L], +fn calculate_compressed_sample_size( + sample: &DynLatents, unoptimized_bins_log: Bitlen, delta_encoding: DeltaEncoding, ) -> PcoResult { + let sample_n = sample.len(); let (sample_cc, _) = new_candidate_w_split_and_delta_encoding( - vec![sample.to_vec()], - &PagingSpec::Exact(vec![sample.len()]), + SplitLatents { + primary: sample.clone(), + secondary: None, + }, + &PagingSpec::Exact(vec![sample_n]), Mode::Classic, delta_encoding, unoptimized_bins_log, @@ -301,19 +374,18 @@ fn calculate_compressed_sample_size( Ok(sample_cc.chunk_meta_size_hint() + sample_cc.page_size_hint_inner(0, 1.0)) } -// Right now this is entirely based on the primary latents since no existing -// modes apply deltas to secondary latents. Might want to change this -// eventually? #[inline(never)] -fn choose_delta_encoding( - primary_latents: &[L], +fn choose_delta_encoding( + primary_latents: &DynLatents, unoptimized_bins_log: Bitlen, ) -> PcoResult { + let n = primary_latents.len(); let sample = choose_delta_sample( primary_latents, DELTA_GROUP_SIZE, - 1 + primary_latents.len() / N_PER_EXTRA_DELTA_GROUP, + 1 + n / N_PER_EXTRA_DELTA_GROUP, ); + let sample_n = sample.len(); let mut best_encoding = DeltaEncoding::None; let mut best_size = calculate_compressed_sample_size( @@ -322,8 +394,22 @@ fn choose_delta_encoding( DeltaEncoding::None, )?; + let lz_penalty = (LOOKBACK_REQUIRED_BYTE_SAVINGS_PER_N * sample_n as f64) as usize; + if best_size > lz_penalty { + let lz_encoding = new_lz_delta_encoding(sample_n); + let lz_penalized_size_estimate = + calculate_compressed_sample_size(&sample, unoptimized_bins_log, lz_encoding)? + lz_penalty; + if lz_penalized_size_estimate < best_size { + best_encoding = new_lz_delta_encoding(primary_latents.len()); + best_size = lz_penalized_size_estimate; + } + } + for delta_encoding_order in 1..MAX_DELTA_ENCODING_ORDER + 1 { - let encoding = DeltaEncoding::Consecutive(delta_encoding_order); + let encoding = DeltaEncoding::Consecutive(DeltaConsecutiveConfig { + order: delta_encoding_order, + secondary_uses_delta: false, + }); let size_estimate = calculate_compressed_sample_size(&sample, unoptimized_bins_log, encoding)?; if size_estimate < best_size { best_encoding = encoding; @@ -352,17 +438,21 @@ fn choose_unoptimized_bins_log(compression_level: usize, n: usize) -> Bitlen { // and we don't need a specialization for each full number type. // Returns a chunk compressor and the counts (per latent var) of numbers in // each bin. -fn new_candidate_w_split( +fn new_candidate_w_split( mode: Mode, - latents: Vec>, + latents: SplitLatents, config: &ChunkConfig, -) -> PcoResult<(ChunkCompressor, Vec>)> { - let unoptimized_bins_log = - choose_unoptimized_bins_log(config.compression_level, latents[0].len()); +) -> PcoResult<(ChunkCompressor, PerLatentVar>)> { + let n = latents.primary.len(); + let unoptimized_bins_log = choose_unoptimized_bins_log(config.compression_level, n); let delta_encoding = match config.delta_spec { - DeltaSpec::Auto => choose_delta_encoding(&latents[0], unoptimized_bins_log)?, + DeltaSpec::Auto => choose_delta_encoding(&latents.primary, unoptimized_bins_log)?, DeltaSpec::None | DeltaSpec::TryConsecutive(0) => DeltaEncoding::None, - DeltaSpec::TryConsecutive(order) => DeltaEncoding::Consecutive(order), + DeltaSpec::TryConsecutive(order) => DeltaEncoding::Consecutive(DeltaConsecutiveConfig { + order, + secondary_uses_delta: false, + }), + DeltaSpec::TryLookback => new_lz_delta_encoding(n), }; new_candidate_w_split_and_delta_encoding( @@ -374,45 +464,52 @@ fn new_candidate_w_split( ) } -fn fallback_chunk_compressor( - mut latents: Vec>, +fn fallback_chunk_compressor( + latents: SplitLatents, config: &ChunkConfig, -) -> PcoResult> { - let n = latents[0].len(); +) -> PcoResult { + let n = latents.primary.len(); let n_per_page = config.paging_spec.n_per_page(n)?; - let (page_infos, delta_moments) = build_page_infos_and_delta_moments( - Mode::Classic, - DeltaEncoding::None, - &n_per_page, - &mut latents, + let (latents, page_infos) = + delta_encode_and_build_page_infos(DeltaEncoding::None, &n_per_page, latents); + + let (meta, lcc) = match_latent_enum!( + latents.primary, + DynLatents(latents) => { + let infos = vec![BinCompressionInfo:: { + weight: 1, + symbol: 0, + ..Default::default() + }]; + let meta = guarantee::baseline_chunk_meta::(); + let latent_var_meta = &meta.per_latent_var.primary; + + let lcc = LatentChunkCompressor::new( + TrainedBins { + infos, + ans_size_log: 0, + counts: vec![n as Weight], + }, + latent_var_meta.bins.downcast_ref::().unwrap(), + latents, + )?; + (meta, DynLatentChunkCompressor::new(lcc).unwrap()) + } ); - let infos = vec![BinCompressionInfo:: { - weight: 1, - symbol: 0, - ..Default::default() - }]; - let meta = guarantee::baseline_chunk_meta::(); - let latent_var_meta = &meta.per_latent_var[0]; - - let lcc = LatentChunkCompressor::new( - TrainedBins { - infos, - ans_size_log: 0, - counts: vec![n as Weight], - }, - latent_var_meta.bins.downcast_ref::().unwrap(), - )?; + Ok(ChunkCompressor { meta, - latent_chunk_compressors: vec![lcc], + latent_chunk_compressors: PerLatentVar { + delta: None, + primary: lcc, + secondary: None, + }, page_infos, - deltas: latents, - delta_moments, }) } // Should this take nums as a slice of slices instead of having a config.paging_spec? -pub(crate) fn new(nums: &[T], config: &ChunkConfig) -> PcoResult> { +pub(crate) fn new(nums: &[T], config: &ChunkConfig) -> PcoResult { validate_config(config)?; let n = nums.len(); validate_chunk_size(n)?; @@ -420,16 +517,25 @@ pub(crate) fn new(nums: &[T], config: &ChunkConfig) -> PcoResult().unwrap(), + n, + bin_counts, + ) { + let split_latents = data_types::split_latents_classic(nums); + return fallback_chunk_compressor(split_latents, config); } Ok(candidate) } -impl ChunkCompressor { - fn should_fallback(&self, n: usize, bin_counts_per_latent_var: Vec>) -> bool { +impl ChunkCompressor { + fn should_fallback( + &self, + latent_type: LatentType, + n: usize, + bin_counts_per_latent_var: PerLatentVar>, + ) -> bool { let meta = &self.meta; if meta.delta_encoding == DeltaEncoding::None && meta.mode == Mode::Classic { // we already have a size guarantee in this case @@ -440,10 +546,11 @@ impl ChunkCompressor { // worst case trailing bytes after bit packing let mut worst_case_body_bit_size = 7 * n_pages; - for (latent_var_meta, bin_counts) in meta + for (_, (latent_var_meta, bin_counts)) in meta .per_latent_var - .iter() - .zip(bin_counts_per_latent_var.iter()) + .as_ref() + .zip_exact(bin_counts_per_latent_var.as_ref()) + .enumerated() { match_latent_enum!(&latent_var_meta.bins, DynBins(bins) => { for (bin, &count) in bins.iter().zip(bin_counts) { @@ -456,12 +563,12 @@ impl ChunkCompressor { let worst_case_size = meta.exact_size() + n_pages * meta.exact_page_meta_size() + worst_case_body_bit_size.div_ceil(8); - let baseline_size = guarantee::chunk_size::(n); - worst_case_size > baseline_size - } - fn page_moments(&self, page_idx: usize, latent_var_idx: usize) -> &DeltaMoments { - &self.delta_moments[page_idx][latent_var_idx] + let baseline_size = match_latent_enum!( + latent_type, + LatentType => { guarantee::chunk_size::(n) } + ); + worst_case_size > baseline_size } /// Returns the count of numbers this chunk will contain in each page. @@ -494,25 +601,24 @@ impl ChunkCompressor { Ok(writer.into_inner()) } - fn dissect_page(&self, page_idx: usize) -> PcoResult> { + fn dissect_page(&self, page_idx: usize) -> PcoResult { let Self { latent_chunk_compressors, - deltas, page_infos, .. } = self; let page_info = &page_infos[page_idx]; - let mut per_latent_var = Vec::new(); - for ((lcc, &delta_end), var_deltas) in latent_chunk_compressors - .iter() - .zip(page_info.end_idx_per_var.iter()) - .zip(deltas) - { - let page_deltas = &var_deltas[page_info.start_idx..delta_end]; - per_latent_var.push(lcc.dissect_page(page_deltas)); - } + let per_latent_var = latent_chunk_compressors.as_ref().map(|key, lcc| { + let range = page_info.range_for_latent_var(key); + match_latent_enum!( + lcc, + DynLatentChunkCompressor(inner) => { + inner.dissect_page(range) + } + ) + }); Ok(DissectedPage { page_n: page_info.page_n, @@ -531,13 +637,18 @@ impl ChunkCompressor { fn page_size_hint_inner(&self, page_idx: usize, page_size_overestimation: f64) -> usize { let page_info = &self.page_infos[page_idx]; let mut body_bit_size = 0; - for (lcc, &end_idx) in self + for (_, (lcc, page_info_var)) in self .latent_chunk_compressors - .iter() - .zip(&page_info.end_idx_per_var) + .as_ref() + .zip_exact(page_info.per_latent_var.as_ref()) + .enumerated() { - let page_n_deltas = end_idx - page_info.start_idx; - let nums_bit_size = page_n_deltas as f64 * lcc.avg_bits_per_delta; + let n_stored_latents = page_info_var.range.len(); + let avg_bits_per_latent = match_latent_enum!( + lcc, + DynLatentChunkCompressor(inner) => { inner.avg_bits_per_latent } + ); + let nums_bit_size = n_stored_latents as f64 * avg_bits_per_latent; body_bit_size += (nums_bit_size * page_size_overestimation).ceil() as usize; } self.meta.exact_page_meta_size() + body_bit_size.div_ceil(8) @@ -546,7 +657,7 @@ impl ChunkCompressor { #[inline(never)] fn write_dissected_page( &self, - dissected_page: DissectedPage, + dissected_page: DissectedPage, writer: &mut BitWriter, ) -> PcoResult<()> { let mut batch_start = 0; @@ -555,12 +666,18 @@ impl ChunkCompressor { batch_start + FULL_BATCH_N, dissected_page.page_n, ); - for (dissected_page_var, lcc) in dissected_page + for (_, (dissected_page_var, lcc)) in dissected_page .per_latent_var - .iter() - .zip(&self.latent_chunk_compressors) + .as_ref() + .zip_exact(self.latent_chunk_compressors.as_ref()) + .enumerated() { - lcc.write_dissected_batch(dissected_page_var, batch_start, writer)?; + match_latent_enum!( + lcc, + DynLatentChunkCompressor(inner) => { + inner.write_dissected_batch(dissected_page_var, batch_start, writer)?; + } + ); } batch_start = batch_end; } @@ -582,31 +699,33 @@ impl ChunkCompressor { let mut writer = BitWriter::new(dst, PAGE_PADDING); let dissected_page = self.dissect_page(page_idx)?; + let page_info = &self.page_infos[page_idx]; - let n_latents = self.meta.mode.n_latent_vars(); - let mut per_latent_var = Vec::with_capacity(n_latents); - for latent_idx in 0..n_latents { - let delta_moments = self.page_moments(page_idx, latent_idx).clone(); - let base_state = self.latent_chunk_compressors[latent_idx] - .encoder - .default_state(); + let ans_default_state_and_size_log = self.latent_chunk_compressors.as_ref().map(|_, lcc| { + match_latent_enum!( + lcc, + DynLatentChunkCompressor(inner) => { (inner.encoder.default_state(), inner.encoder.size_log()) } + ) + }); - let ans_final_state_idxs = dissected_page - .per_latent_var - .get(latent_idx) - .map(|dissected| dissected.ans_final_states.map(|state| state - base_state)) - .unwrap_or([0; ANS_INTERLEAVING]); - per_latent_var.push(PageLatentVarMeta { - delta_moments: DynLatents::new(delta_moments.0).unwrap(), - ans_final_state_idxs, + let per_latent_var = page_info + .per_latent_var + .as_ref() + .zip_exact(ans_default_state_and_size_log.as_ref()) + .zip_exact(dissected_page.per_latent_var.as_ref()) + .map(|_, tuple| { + let ((page_info_var, (ans_default_state, _)), dissected) = tuple; + let ans_final_state_idxs = dissected + .ans_final_states + .map(|state| state - ans_default_state); + PageLatentVarMeta { + delta_state: page_info_var.delta_state.clone(), + ans_final_state_idxs, + } }); - } - let page_meta = PageMeta { per_latent_var }; - let ans_size_logs = self - .latent_chunk_compressors - .iter() - .map(|config| config.encoder.size_log()); + let page_meta = PageMeta { per_latent_var }; + let ans_size_logs = ans_default_state_and_size_log.map(|_, (_, size_log)| size_log); unsafe { page_meta.write_to(ans_size_logs, &mut writer) }; self.write_dissected_page(dissected_page, &mut writer)?; @@ -623,25 +742,33 @@ mod tests { #[test] fn test_choose_delta_sample() { - let latents = vec![0_u32, 1]; + let latents = DynLatents::new(vec![0_u32, 1]).unwrap(); assert_eq!( - choose_delta_sample(&latents, 100, 0), + choose_delta_sample(&latents, 100, 0) + .downcast::() + .unwrap(), vec![0, 1] ); assert_eq!( - choose_delta_sample(&latents, 100, 1), + choose_delta_sample(&latents, 100, 1) + .downcast::() + .unwrap(), vec![0, 1] ); - let latents = (0..300).collect::>(); - let sample = choose_delta_sample(&latents, 100, 1); + let latents = DynLatents::new((0..300).collect::>()).unwrap(); + let sample = choose_delta_sample(&latents, 100, 1) + .downcast::() + .unwrap(); assert_eq!(sample.len(), 200); assert_eq!(&sample[..3], &[0, 1, 2]); assert_eq!(&sample[197..], &[297, 298, 299]); - let latents = (0..8).collect::>(); + let latents = DynLatents::new((0..8).collect::>()).unwrap(); assert_eq!( - choose_delta_sample(&latents, 2, 2), + choose_delta_sample(&latents, 2, 2) + .downcast::() + .unwrap(), vec![0, 1, 3, 4, 6, 7] ); } diff --git a/pco/src/wrapped/file_compressor.rs b/pco/src/wrapped/file_compressor.rs index 3e0cf047..b9820838 100644 --- a/pco/src/wrapped/file_compressor.rs +++ b/pco/src/wrapped/file_compressor.rs @@ -70,7 +70,7 @@ impl FileCompressor { &self, nums: &[T], config: &ChunkConfig, - ) -> PcoResult> { + ) -> PcoResult { chunk_compressor::new(nums, config) } } diff --git a/pco/src/wrapped/file_decompressor.rs b/pco/src/wrapped/file_decompressor.rs index 90719bb7..6be9926d 100644 --- a/pco/src/wrapped/file_decompressor.rs +++ b/pco/src/wrapped/file_decompressor.rs @@ -5,7 +5,7 @@ use better_io::BetterBufRead; use crate::bit_reader; use crate::bit_reader::BitReaderBuilder; use crate::constants::{CHUNK_META_PADDING, HEADER_PADDING}; -use crate::data_types::Number; +use crate::data_types::{LatentType, Number}; use crate::errors::PcoResult; use crate::metadata::chunk::ChunkMeta; use crate::metadata::format_version::FormatVersion; @@ -49,8 +49,14 @@ impl FileDecompressor { ) -> PcoResult<(ChunkDecompressor, R)> { bit_reader::ensure_buf_read_capacity(&mut src, CHUNK_META_PADDING); let mut reader_builder = BitReaderBuilder::new(src, CHUNK_META_PADDING, 0); - let chunk_meta = - unsafe { ChunkMeta::read_from::(&mut reader_builder, &self.format_version)? }; + let latent_type = LatentType::new::().unwrap(); + let chunk_meta = unsafe { + ChunkMeta::read_from::( + &mut reader_builder, + &self.format_version, + latent_type, + )? + }; let cd = ChunkDecompressor::new(chunk_meta)?; Ok((cd, reader_builder.into_inner())) } diff --git a/pco/src/wrapped/guarantee.rs b/pco/src/wrapped/guarantee.rs index 06d6e775..f19bc5a4 100644 --- a/pco/src/wrapped/guarantee.rs +++ b/pco/src/wrapped/guarantee.rs @@ -1,5 +1,6 @@ use crate::data_types::Latent; use crate::metadata::chunk_latent_var::ChunkLatentVarMeta; +use crate::metadata::per_latent_var::PerLatentVar; use crate::metadata::{Bin, ChunkMeta, DeltaEncoding, DynBins, Mode}; /// Returns the maximum possible byte size of a wrapped header. @@ -8,26 +9,30 @@ pub fn header_size() -> usize { } pub(crate) fn baseline_chunk_meta() -> ChunkMeta { + let primary = ChunkLatentVarMeta { + ans_size_log: 0, + bins: DynBins::new(vec![Bin { + weight: 1, + lower: L::ZERO, + offset_bits: L::BITS, + }]) + .unwrap(), + }; + ChunkMeta { mode: Mode::Classic, delta_encoding: DeltaEncoding::None, - per_latent_var: vec![ChunkLatentVarMeta { - ans_size_log: 0, - bins: DynBins::new(vec![Bin { - weight: 1, - lower: L::ZERO, - offset_bits: L::BITS, - }]) - .unwrap(), - }], + per_latent_var: PerLatentVar { + delta: None, + primary, + secondary: None, + }, } } /// Returns the maximum possible byte size of a wrapped chunk for a given /// latent type (e.g. u32 or u64) and count of numbers. pub fn chunk_size(n: usize) -> usize { - // TODO if we ever add Numbers that are smaller than their Latents, we - // may want to make this more generic baseline_chunk_meta::().exact_size() + n * L::BITS.div_ceil(8) as usize } diff --git a/pco/src/wrapped/page_decompressor.rs b/pco/src/wrapped/page_decompressor.rs index 8b8e53b9..902ee0f4 100644 --- a/pco/src/wrapped/page_decompressor.rs +++ b/pco/src/wrapped/page_decompressor.rs @@ -5,65 +5,49 @@ use std::marker::PhantomData; use better_io::BetterBufRead; use crate::bit_reader; -use crate::bit_reader::{BitReader, BitReaderBuilder}; +use crate::bit_reader::BitReaderBuilder; use crate::constants::{FULL_BATCH_N, PAGE_PADDING}; use crate::data_types::{Latent, Number}; -use crate::delta; -use crate::delta::DeltaMoments; use crate::errors::{PcoError, PcoResult}; -use crate::latent_batch_decompressor::LatentBatchDecompressor; +use crate::latent_page_decompressor::LatentPageDecompressor; +use crate::macros::{define_latent_enum, match_latent_enum}; use crate::metadata::page::PageMeta; -use crate::metadata::{ChunkMeta, DeltaEncoding, Mode}; +use crate::metadata::per_latent_var::{PerLatentVar, PerLatentVarBuilder}; +use crate::metadata::{ChunkMeta, DeltaEncoding, DynBins, DynLatents, Mode}; use crate::progress::Progress; const PERFORMANT_BUF_READ_CAPACITY: usize = 8192; -#[derive(Clone, Debug)] -pub struct State { - n_processed: usize, - latent_batch_decompressors: Vec>, - delta_momentss: Vec>, // one per latent variable - secondary_latents: [L; FULL_BATCH_N], +#[derive(Debug)] +struct LatentScratch { + is_constant: bool, + dst: DynLatents, } -/// Holds metadata about a page and supports decompression. -pub struct PageDecompressor { +define_latent_enum!( + #[derive()] + DynLatentPageDecompressor(LatentPageDecompressor) +); + +struct PageDecompressorInner { // immutable n: usize, mode: Mode, delta_encoding: DeltaEncoding, - maybe_constant_latents: Vec>, // 1 per latent var - phantom: PhantomData, // mutable reader_builder: BitReaderBuilder, - state: State, + n_processed: usize, + // TODO make these heap allocated + latent_decompressors: PerLatentVar, + delta_scratch: Option, + secondary_scratch: Option, } -unsafe fn decompress_latents_w_delta( - reader: &mut BitReader, - delta_encoding: DeltaEncoding, - n_remaining: usize, - delta_state: &mut DeltaMoments, - lbd: &mut LatentBatchDecompressor, - dst: &mut [L], -) -> PcoResult<()> { - let n_remaining_pre_delta = n_remaining.saturating_sub(delta_state.order()); - let pre_delta_len = if dst.len() <= n_remaining_pre_delta { - dst.len() - } else { - // If we're at the end, LatentBatchdDecompressor won't initialize the last - // few elements before delta decoding them, so we do that manually here to - // satisfy MIRI. This step isn't really necessary. - dst[n_remaining_pre_delta..].fill(L::default()); - n_remaining_pre_delta - }; - lbd.decompress_latent_batch(reader, &mut dst[..pre_delta_len])?; - match delta_encoding { - DeltaEncoding::None => (), - DeltaEncoding::Consecutive(_) => delta::decode_in_place(delta_state, dst), - } - Ok(()) +/// Holds metadata about a page and supports decompression. +pub struct PageDecompressor { + inner: PageDecompressorInner, + phantom: PhantomData, } fn convert_from_latents_to_numbers(dst: &mut [T]) { @@ -73,127 +57,190 @@ fn convert_from_latents_to_numbers(dst: &mut [T]) { } } -impl PageDecompressor { +impl PageDecompressorInner { pub(crate) fn new(mut src: R, chunk_meta: &ChunkMeta, n: usize) -> PcoResult { bit_reader::ensure_buf_read_capacity(&mut src, PERFORMANT_BUF_READ_CAPACITY); let mut reader_builder = BitReaderBuilder::new(src, PAGE_PADDING, 0); - let page_meta = reader_builder - .with_reader(|reader| unsafe { PageMeta::read_from::(reader, chunk_meta) })?; + let page_meta = + reader_builder.with_reader(|reader| unsafe { PageMeta::read_from(reader, chunk_meta) })?; let mode = chunk_meta.mode; - let delta_momentss = page_meta + + let mut states = PerLatentVarBuilder::default(); + for (key, (chunk_latent_var_meta, page_latent_var_meta)) in chunk_meta .per_latent_var - .iter() - .map(|latent_var_meta| { - let moments = latent_var_meta - .delta_moments - .downcast_ref::() - .unwrap() - .clone(); - DeltaMoments(moments) - }) - .collect::>(); - - let mut latent_batch_decompressors = Vec::new(); - for latent_idx in 0..mode.n_latent_vars() { - let chunk_latent_meta = &chunk_meta.per_latent_var[latent_idx]; - - // this will change to dynamically typed soon - let bins = chunk_latent_meta.bins.downcast_ref::().unwrap(); - let n_in_body = n.saturating_sub(chunk_meta.delta_encoding.n_latents_per_state()); - if bins.is_empty() && n_in_body > 0 { - return Err(PcoError::corruption(format!( - "unable to decompress chunk with no bins and {} latents", - n_in_body - ))); - } - - latent_batch_decompressors.push(LatentBatchDecompressor::new( - chunk_latent_meta.ans_size_log, - bins, - page_meta.per_latent_var[latent_idx].ans_final_state_idxs, - )?); + .as_ref() + .zip_exact(page_meta.per_latent_var.as_ref()) + .enumerated() + { + let var_delta_encoding = chunk_meta.delta_encoding.for_latent_var(key); + let n_in_body = n.saturating_sub(var_delta_encoding.n_latents_per_state()); + let state = match_latent_enum!( + &chunk_latent_var_meta.bins, + DynBins(bins) => { + let delta_state = page_latent_var_meta + .delta_state + .downcast_ref::() + .unwrap() + .clone(); + + if bins.is_empty() && n_in_body > 0 { + return Err(PcoError::corruption(format!( + "unable to decompress chunk with no bins and {} latents", + n_in_body + ))); + } + + let lpd = LatentPageDecompressor::new( + chunk_latent_var_meta.ans_size_log, + bins, + var_delta_encoding, + page_latent_var_meta.ans_final_state_idxs, + delta_state, + )?; + + DynLatentPageDecompressor::new(lpd).unwrap() + } + ); + + states.set(key, state); } + let latent_decompressors: PerLatentVar = states.into(); - let maybe_constant_secondary = - if latent_batch_decompressors.len() >= 2 && delta_momentss[1].order() == 0 { - latent_batch_decompressors[1].maybe_constant_value - } else { - None - }; - let maybe_constant_latents = vec![None, maybe_constant_secondary]; + fn make_latent_scratch(lpd: Option<&DynLatentPageDecompressor>) -> Option { + let lpd = lpd?; + + match_latent_enum!( + lpd, + DynLatentPageDecompressor(inner) => { + let maybe_constant_value = inner.maybe_constant_value; + Some(LatentScratch { + is_constant: maybe_constant_value.is_some(), + dst: DynLatents::new(vec![maybe_constant_value.unwrap_or_default(); FULL_BATCH_N]).unwrap(), + }) + } + ) + } + let delta_scratch = make_latent_scratch(latent_decompressors.delta.as_ref()); + let secondary_scratch = make_latent_scratch(latent_decompressors.secondary.as_ref()); // we don't store the whole ChunkMeta because it can get large due to bins - let secondary_default = maybe_constant_secondary.unwrap_or(T::L::default()); Ok(Self { n, mode, delta_encoding: chunk_meta.delta_encoding, - maybe_constant_latents, - phantom: PhantomData, reader_builder, - state: State { - n_processed: 0, - latent_batch_decompressors, - delta_momentss, - secondary_latents: [secondary_default; FULL_BATCH_N], - }, + n_processed: 0, + latent_decompressors, + delta_scratch, + secondary_scratch, + }) + } + + fn n_remaining(&self) -> usize { + self.n - self.n_processed + } +} + +impl PageDecompressor { + #[inline(never)] + pub(crate) fn new(src: R, chunk_meta: &ChunkMeta, n: usize) -> PcoResult { + Ok(Self { + inner: PageDecompressorInner::new(src, chunk_meta, n)?, + phantom: PhantomData::, }) } fn decompress_batch(&mut self, dst: &mut [T]) -> PcoResult<()> { let batch_n = dst.len(); - let n = self.n; - let mode = self.mode; - let State { - latent_batch_decompressors, - delta_momentss, - secondary_latents, - n_processed, - .. - } = &mut self.state; + let inner = &mut self.inner; + let n = inner.n; + let n_remaining = inner.n_remaining(); + let mode = inner.mode; - let secondary_latents = &mut secondary_latents[..batch_n]; - let n_latents = latent_batch_decompressors.len(); + // DELTA LATENTS + if let Some(LatentScratch { + is_constant: false, + dst, + }) = &mut inner.delta_scratch + { + let dyn_lpd = inner.latent_decompressors.delta.as_mut().unwrap(); + let limit = min( + n_remaining.saturating_sub(inner.delta_encoding.n_latents_per_state()), + batch_n, + ); + inner.reader_builder.with_reader(|reader| unsafe { + match_latent_enum!( + dyn_lpd, + DynLatentPageDecompressor(lpd) => { + // Delta latents only line up with pre-delta length of the other + // latents. + // We never apply delta encoding to delta latents, so we just + // skip straight to the inner LatentBatchDecompressor + lpd.decompress_batch_pre_delta( + reader, + &mut dst.downcast_mut::().unwrap()[..limit] + ) + } + ); + Ok(()) + })?; + } + let delta_latents = inner.delta_scratch.as_ref().map(|scratch| &scratch.dst); - self.reader_builder.with_reader(|reader| { + // PRIMARY LATENTS + inner.reader_builder.with_reader(|reader| unsafe { let primary_dst = T::transmute_to_latents(dst); - unsafe { - decompress_latents_w_delta( - reader, - mode.delta_encoding_for_latent_var(0, self.delta_encoding), - n - *n_processed, - &mut delta_momentss[0], - &mut latent_batch_decompressors[0], - primary_dst, - ) - } + let dyn_lpd = inner + .latent_decompressors + .primary + .downcast_mut::() + .unwrap(); + dyn_lpd.decompress_batch( + delta_latents, + n_remaining, + reader, + primary_dst, + ); + Ok(()) })?; - if n_latents >= 2 && self.maybe_constant_latents[1].is_none() { - self.reader_builder.with_reader(|reader| unsafe { - decompress_latents_w_delta( - reader, - mode.delta_encoding_for_latent_var(1, self.delta_encoding), - n - *n_processed, - &mut delta_momentss[1], - &mut latent_batch_decompressors[1], - secondary_latents, - ) + // SECONDARY LATENTS + if let Some(LatentScratch { + is_constant: false, + dst, + }) = &mut inner.secondary_scratch + { + let dyn_lpd = inner.latent_decompressors.secondary.as_mut().unwrap(); + inner.reader_builder.with_reader(|reader| unsafe { + match_latent_enum!( + dyn_lpd, + DynLatentPageDecompressor(lpd) => { + // We never apply delta encoding to delta latents, so we just + // skip straight to the inner LatentBatchDecompressor + lpd.decompress_batch( + delta_latents, + n_remaining, + reader, + &mut dst.downcast_mut::().unwrap()[..batch_n] + ) + } + ); + Ok(()) })?; } T::join_latents( mode, T::transmute_to_latents(dst), - secondary_latents, + inner.secondary_scratch.as_ref().map(|scratch| &scratch.dst), ); convert_from_latents_to_numbers(dst); - *n_processed += batch_n; - if *n_processed == n { - self.reader_builder.with_reader(|reader| { + inner.n_processed += batch_n; + if inner.n_processed == n { + inner.reader_builder.with_reader(|reader| { reader.drain_empty_byte("expected trailing bits at end of page to be empty") })?; } @@ -209,17 +256,18 @@ impl PageDecompressor { /// `dst` must have length either a multiple of 256 or be at least the count /// of numbers remaining in the page. pub fn decompress(&mut self, num_dst: &mut [T]) -> PcoResult { - if num_dst.len() % FULL_BATCH_N != 0 && num_dst.len() < self.n_remaining() { + let n_remaining = self.inner.n_remaining(); + if num_dst.len() % FULL_BATCH_N != 0 && num_dst.len() < n_remaining { return Err(PcoError::invalid_argument(format!( "num_dst's length must either be a multiple of {} or be \ at least the count of numbers remaining ({} < {})", FULL_BATCH_N, num_dst.len(), - self.n_remaining(), + n_remaining, ))); } - let n_to_process = min(num_dst.len(), self.n_remaining()); + let n_to_process = min(num_dst.len(), n_remaining); let mut n_processed = 0; while n_processed < n_to_process { @@ -230,16 +278,12 @@ impl PageDecompressor { Ok(Progress { n_processed, - finished: self.n_remaining() == 0, + finished: self.inner.n_remaining() == 0, }) } - fn n_remaining(&self) -> usize { - self.n - self.state.n_processed - } - /// Returns the rest of the compressed data source. pub fn into_src(self) -> R { - self.reader_builder.into_inner() + self.inner.reader_builder.into_inner() } } diff --git a/pco_cli/src/dtypes.rs b/pco_cli/src/dtypes.rs index 5ac74578..1c26885c 100644 --- a/pco_cli/src/dtypes.rs +++ b/pco_cli/src/dtypes.rs @@ -299,5 +299,9 @@ pub fn to_arrow(dtype: NumberType) -> ArrowDataType { NumberType::U16 => ArrowDataType::UInt16, NumberType::U32 => ArrowDataType::UInt32, NumberType::U64 => ArrowDataType::UInt64, + other => panic!( + "number type {:?} not yet supported in pco_cli", + other + ), } } diff --git a/pco_cli/src/input/mod.rs b/pco_cli/src/input/mod.rs index be73a544..3a2beae0 100644 --- a/pco_cli/src/input/mod.rs +++ b/pco_cli/src/input/mod.rs @@ -441,6 +441,12 @@ impl PcoColumnReader { U64 => Arc::new(UInt64Array::from(simple_decompress::( &compressed, )?)), + other => { + return Err(anyhow!( + "number type {:?} not yet supported in pco_cli", + other + )) + } }; Ok(array) } diff --git a/pco_cli/src/inspect/handler.rs b/pco_cli/src/inspect/handler.rs index bc2ff3c2..3a2640e3 100644 --- a/pco_cli/src/inspect/handler.rs +++ b/pco_cli/src/inspect/handler.rs @@ -7,7 +7,8 @@ use tabled::settings::{Alignment, Modify, Style}; use tabled::{Table, Tabled}; use pco::data_types::{Latent, Number}; -use pco::metadata::ChunkMeta; +use pco::match_latent_enum; +use pco::metadata::{ChunkMeta, DynBins, DynLatent, LatentVarKey}; use pco::standalone::{FileDecompressor, MaybeChunkDecompressor}; use crate::core_handlers::CoreHandlerImpl; @@ -39,8 +40,10 @@ pub struct BinSummary { #[derive(Serialize)] pub struct LatentVarSummary { + name: String, n_bins: usize, ans_size_log: u32, + approx_avg_bits: f64, bins: String, } @@ -75,31 +78,53 @@ fn measure_bytes_read(src: &[u8], prev_src_len: &mut usize) -> usize { fn build_latent_var_summaries(meta: &ChunkMeta) -> BTreeMap { let describers = T::get_latent_describers(meta); let mut summaries = BTreeMap::new(); - for (latent_var_idx, latent_var_meta) in meta.per_latent_var.iter().enumerate() { - let describer = &describers[latent_var_idx]; + for (key, (latent_var_meta, describer)) in meta + .per_latent_var + .as_ref() + .zip_exact(describers) + .enumerated() + { let unit = describer.latent_units(); - let bins = latent_var_meta.bins.downcast_ref::().unwrap(); - let mut bin_summaries = Vec::new(); - for bin in bins { - bin_summaries.push(BinSummary { - weight: bin.weight, - lower: format!("{}{}", describer.latent(bin.lower), unit), - offset_bits: bin.offset_bits, - }); - } + let mut approx_total_bits = 0.0; + let bin_summaries = match_latent_enum!( + &latent_var_meta.bins, + DynBins(bins) => { + let mut bin_summaries = Vec::new(); + for bin in bins { + bin_summaries.push(BinSummary { + weight: bin.weight, + lower: format!("{}{}", describer.latent(DynLatent::new(bin.lower).unwrap()), unit), + offset_bits: bin.offset_bits, + }); + let weight = bin.weight as f64; + approx_total_bits += weight * (bin.offset_bits as f64 + latent_var_meta.ans_size_log as f64 - weight.log2()); + } + bin_summaries + } + ); + let n_bins = bin_summaries.len(); let bins_table = Table::new(bin_summaries) .with(Style::rounded()) .with(Modify::new(Columns::new(0..3)).with(Alignment::right())) .to_string(); + let total_weight = (1 << latent_var_meta.ans_size_log) as f64; let summary = LatentVarSummary { - n_bins: bins.len(), + name: describer.latent_var(), + n_bins, ans_size_log: latent_var_meta.ans_size_log, + approx_avg_bits: approx_total_bits / total_weight, bins: bins_table.to_string(), }; - summaries.insert(describer.latent_var(), summary); + let key_name = match key { + LatentVarKey::Delta => "delta", + LatentVarKey::Primary => "primary", + LatentVarKey::Secondary => "secondary", + }; + + summaries.insert(key_name.to_string(), summary); } summaries diff --git a/pco_python/README.md b/pco_python/README.md index 00b2fa1f..38cb4c75 100644 --- a/pco_python/README.md +++ b/pco_python/README.md @@ -22,7 +22,7 @@ Pcodec is a codec for numerical sequences. Example usage: >>> # compress >>> compressed = standalone.simple_compress(nums, ChunkConfig()) >>> print(f'compressed to {len(compressed)} bytes') -compressed to 6946257 bytes +compressed to 6946258 bytes >>> >>> # decompress >>> recovered = standalone.simple_decompress(compressed) diff --git a/pco_python/src/config.rs b/pco_python/src/config.rs index a658fe4b..166ea652 100644 --- a/pco_python/src/config.rs +++ b/pco_python/src/config.rs @@ -67,6 +67,12 @@ impl PyDeltaSpec { fn try_consecutive(order: usize) -> Self { Self(DeltaSpec::TryConsecutive(order)) } + + /// :returns: a DeltaSpec that tries to use delta lookbacks, if possible. + #[staticmethod] + fn try_lookback() -> Self { + Self(DeltaSpec::TryLookback) + } } #[pyclass(name = "PagingSpec")] diff --git a/pco_python/src/wrapped/compressor.rs b/pco_python/src/wrapped/compressor.rs index 8155519b..1933efcb 100644 --- a/pco_python/src/wrapped/compressor.rs +++ b/pco_python/src/wrapped/compressor.rs @@ -5,9 +5,9 @@ use pyo3::prelude::*; use pyo3::types::{PyBytes, PyModule}; use pyo3::{pyclass, pymethods, Bound, PyResult, Python}; -use pco::data_types::{Latent, Number, NumberType}; +use pco::data_types::{Number, NumberType}; use pco::wrapped::{ChunkCompressor, FileCompressor}; -use pco::{match_latent_enum, match_number_enum, ChunkConfig}; +use pco::{match_number_enum, ChunkConfig}; use crate::utils::pco_err_to_py; use crate::{utils, PyChunkConfig}; @@ -18,15 +18,10 @@ struct PyFc { inner: FileCompressor, } -pco::define_latent_enum!( - #[derive()] - DynCc(ChunkCompressor) -); - // can't pass inner directly since pyo3 only supports unit variant enums /// Holds metadata about a chunk and supports compressing one page at a time. #[pyclass(name = "ChunkCompressor")] -struct PyCc(DynCc); +struct PyCc(ChunkCompressor); impl PyFc { fn chunk_compressor_generic( @@ -34,7 +29,7 @@ impl PyFc { py: Python, arr: &Bound>, config: &ChunkConfig, - ) -> PyResult> { + ) -> PyResult { let arr_ro = arr.readonly(); let src = arr_ro.as_slice()?; py.allow_threads(|| self.inner.chunk_compressor(src, config)) @@ -87,7 +82,7 @@ impl PyFc { number_type, NumberType => { let cc = self.chunk_compressor_generic::(py, nums.downcast::>()?, &config)?; - Ok(PyCc(DynCc::new(cc).unwrap())) + Ok(PyCc(cc)) } ) } @@ -99,22 +94,14 @@ impl PyCc { /// /// :raises: TypeError, RuntimeError fn write_chunk_meta<'py>(&self, py: Python<'py>) -> PyResult> { - match_latent_enum!( - &self.0, - DynCc(cc) => { - let mut res = Vec::new(); - cc.write_chunk_meta(&mut res).map_err(pco_err_to_py)?; - Ok(PyBytes::new_bound(py, &res)) - } - ) + let mut res = Vec::new(); + self.0.write_chunk_meta(&mut res).map_err(pco_err_to_py)?; + Ok(PyBytes::new_bound(py, &res)) } /// :returns: a list containing the count of numbers in each page. fn n_per_page(&self) -> Vec { - match_latent_enum!( - &self.0, - DynCc(cc) => { cc.n_per_page() } - ) + self.0.n_per_page() } /// :param page_idx: an int for which page you want to write. @@ -123,15 +110,10 @@ impl PyCc { /// /// :raises: TypeError, RuntimeError fn write_page<'py>(&self, py: Python<'py>, page_idx: usize) -> PyResult> { - match_latent_enum!( - &self.0, - DynCc(cc) => { - let mut res = Vec::new(); - py.allow_threads(|| cc.write_page(page_idx, &mut res)) - .map_err(pco_err_to_py)?; - Ok(PyBytes::new_bound(py, &res)) - } - ) + let mut res = Vec::new(); + py.allow_threads(|| self.0.write_page(page_idx, &mut res)) + .map_err(pco_err_to_py)?; + Ok(PyBytes::new_bound(py, &res)) } } diff --git a/pco_python/test/test_standalone.py b/pco_python/test/test_standalone.py index 3b5085a6..d5561686 100644 --- a/pco_python/test/test_standalone.py +++ b/pco_python/test/test_standalone.py @@ -15,6 +15,7 @@ ) all_dtypes = ("f2", "f4", "f8", "i2", "i4", "i8", "u2", "u4", "u8") + @pytest.mark.parametrize("length", all_lengths) @pytest.mark.parametrize("dtype", all_dtypes) def test_round_trip_decompress_into(length, dtype): @@ -96,23 +97,22 @@ def test_compression_options(): # this is mostly just to check that there is no error, but these settings # should give worse compression than the defaults - assert ( - len( - standalone.simple_compress( - data, - ChunkConfig( - compression_level=0, - delta_spec=DeltaSpec.try_consecutive(1), - mode_spec=ModeSpec.classic(), - paging_spec=PagingSpec.equal_pages_up_to(77), - ), - ) + for delta_spec in [DeltaSpec.try_consecutive(1), DeltaSpec.try_lookback()]: + compressed = standalone.simple_compress( + data, + ChunkConfig( + compression_level=0, + delta_spec=delta_spec, + mode_spec=ModeSpec.classic(), + paging_spec=PagingSpec.equal_pages_up_to(77), + ), ) - > default_size - ) + assert len(compressed) > default_size -@pytest.mark.parametrize("mode_spec", [ModeSpec.auto(), ModeSpec.classic(), ModeSpec.try_int_mult(10)]) +@pytest.mark.parametrize( + "mode_spec", [ModeSpec.auto(), ModeSpec.classic(), ModeSpec.try_int_mult(10)] +) def test_compression_int_mode_spec_options(mode_spec): data = (np.random.normal(size=100) * 1000).astype(np.int32) @@ -128,7 +128,15 @@ def test_compression_int_mode_spec_options(mode_spec): np.testing.assert_array_equal(data, out) -@pytest.mark.parametrize("mode_spec", [ModeSpec.auto(), ModeSpec.classic(), ModeSpec.try_float_mult(10.0), ModeSpec.try_float_quant(4)]) +@pytest.mark.parametrize( + "mode_spec", + [ + ModeSpec.auto(), + ModeSpec.classic(), + ModeSpec.try_float_mult(10.0), + ModeSpec.try_float_quant(4), + ], +) def test_compression_float_mode_spec_options(mode_spec): data = (np.random.normal(size=100) * 1000).astype(np.int32) * np.pi