Skip to content

Commit

Permalink
page sizes work, added todo
Browse files Browse the repository at this point in the history
  • Loading branch information
mwlon committed Oct 31, 2023
1 parent 72774fe commit 497d753
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 20 deletions.
8 changes: 8 additions & 0 deletions bench/src/codecs/pco.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::codecs::CodecInternal;
use crate::dtypes::Dtype;
use anyhow::{anyhow, Result};
use pco::PagingSpec;

#[derive(Clone, Debug, Default)]
pub struct PcoConfig {
Expand All @@ -22,6 +23,10 @@ impl CodecInternal for PcoConfig {
.unwrap_or("auto".to_string()),
"use_gcds" => self.compressor_config.use_gcds.to_string(),
"use_float_mult" => self.compressor_config.use_float_mult.to_string(),
"page_size" => match self.compressor_config.paging_spec {
PagingSpec::EqualPagesUpTo(page_size) => page_size.to_string(),
_ => panic!("unexpected paging spec"),
},
_ => panic!("bad conf"),
}
}
Expand All @@ -43,6 +48,9 @@ impl CodecInternal for PcoConfig {
}
"use_gcds" => self.compressor_config.use_gcds = value.parse::<bool>().unwrap(),
"use_float_mult" => self.compressor_config.use_float_mult = value.parse::<bool>().unwrap(),
"page_size" => {
self.compressor_config.paging_spec = PagingSpec::EqualPagesUpTo(value.parse().unwrap())
}
_ => return Err(anyhow!("unknown conf: {}", key)),
}
Ok(())
Expand Down
14 changes: 6 additions & 8 deletions bench/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,14 +136,12 @@ fn print_stats(mut stats: Vec<PrintStat>, opt: &Opt) {
.or_default()
.add_assign(stat.clone());
}
stats.extend(
opt.codecs.iter().map(|codec| {
let codec = codec.to_string();
let mut stat = aggregate_by_codec.get(&codec).cloned().unwrap();
stat.codec = codec;
stat
})
);
stats.extend(opt.codecs.iter().map(|codec| {
let codec = codec.to_string();
let mut stat = aggregate_by_codec.get(&codec).cloned().unwrap();
stat.codec = codec;
stat
}));
stats.push(aggregate);
let table = Table::new(stats)
.with(Style::rounded())
Expand Down
6 changes: 6 additions & 0 deletions pco/src/chunk_meta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ impl<U: UnsignedLike> ChunkLatentMeta<U> {
ans_size_log,
)));
}
if ans_size_log > MAX_ANS_BITS {
return Err(PcoError::corruption(format!(
"ANS size log ({}) should not be greater than {}",
ans_size_log, MAX_ANS_BITS,
)));
}

let mut bins = Vec::with_capacity(n_bins);
while bins.len() < n_bins {
Expand Down
28 changes: 17 additions & 11 deletions pco/src/standalone/simple.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,35 @@
use crate::bits;
use crate::chunk_config::ChunkConfig;
use crate::data_types::NumberLike;
use crate::errors::PcoResult;
use crate::standalone::compressor::FileCompressor;
use crate::standalone::decompressor::FileDecompressor;

const DEFAULT_CHUNK_SIZE: usize = 1_000_000;
use crate::PagingSpec;

/// Takes in a slice of numbers and an exact configuration and returns
/// compressed bytes.
///
/// Will return an error if the compressor config is invalid.
/// This will use the `PagingSpec` in `ChunkConfig` to decide where to split
/// chunks.
/// For standalone, the concepts of chunk and page are conflated since each
/// chunk has exactly one page.
pub fn simple_compress<T: NumberLike>(nums: &[T], config: &ChunkConfig) -> PcoResult<Vec<u8>> {
let mut dst = Vec::new();
let file_compressor = FileCompressor::default();
file_compressor.write_header(&mut dst)?;

let n_chunks = bits::ceil_div(nums.len(), DEFAULT_CHUNK_SIZE);
if n_chunks > 0 {
let n_per_chunk = bits::ceil_div(nums.len(), n_chunks);
for chunk in nums.chunks(n_per_chunk) {
let chunk_compressor = file_compressor.chunk_compressor(chunk, config)?;
dst.reserve(chunk_compressor.chunk_size_hint());
chunk_compressor.write_chunk(&mut dst)?;
}
// here we use the paging spec to determine chunks; each chunk has 1 page
let page_sizes = config.paging_spec.page_sizes(nums.len())?;
let mut start = 0;
let mut this_chunk_config = config.clone();
for &page_size in &page_sizes {
let end = start + page_size;
this_chunk_config.paging_spec = PagingSpec::ExactPageSizes(vec![page_size]);
let chunk_compressor =
file_compressor.chunk_compressor(&nums[start..end], &this_chunk_config)?;
dst.reserve(chunk_compressor.chunk_size_hint());
chunk_compressor.write_chunk(&mut dst)?;
start = end;
}

file_compressor.write_footer(&mut dst)?;
Expand Down
3 changes: 2 additions & 1 deletion pco/src/wrapped/chunk_compressor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,6 @@ pub(crate) fn new<T: NumberLike>(

let table = CompressionTable::from(trained.infos);
let encoder = ans::Encoder::from_bins(trained.ans_size_log, &bins)?;
println!("encoder size log {}", encoder.size_log());

latent_metas.push(ChunkLatentMeta {
bins,
Expand Down Expand Up @@ -554,6 +553,8 @@ impl<U: UnsignedLike> ChunkCompressor<U> {

let mut writer = BitWriter::new(dst, PAGE_PADDING);

// TODO why doesn't this take page_idx? Am I doing repeated work
// (or worse)?
let dissected_src = self.dissect_unsigneds()?;

let mut latent_metas = Vec::with_capacity(self.n_latents);
Expand Down

0 comments on commit 497d753

Please sign in to comment.