From 7e92b627f6be12aee9be9bd0c865d0a6fe78a45e Mon Sep 17 00:00:00 2001 From: Mingzhuo Yin Date: Tue, 7 Jan 2025 16:20:17 +0800 Subject: [PATCH] fix: invalid ScoreWindow in VariableBlockPartition Signed-off-by: Mingzhuo Yin --- src/algorithm/block_encode/delta_bitpack.rs | 8 +-- src/algorithm/block_encode/elias_fano.rs | 6 +- .../variable_block_partition.rs | 58 ++++++++++++++++++- 3 files changed, 63 insertions(+), 9 deletions(-) diff --git a/src/algorithm/block_encode/delta_bitpack.rs b/src/algorithm/block_encode/delta_bitpack.rs index 401e1f1..60eccac 100644 --- a/src/algorithm/block_encode/delta_bitpack.rs +++ b/src/algorithm/block_encode/delta_bitpack.rs @@ -262,7 +262,7 @@ mod tests { use super::*; #[test] - fn test_delta_bitpack_next() { + fn test_next() { let mut encoder = DeltaBitpackEncode::new(); let mut decoder = DeltaBitpackDecode::new(); @@ -294,7 +294,7 @@ mod tests { } #[test] - fn test_delta_bitpack_seek() { + fn test_seek() { let mut encoder = DeltaBitpackEncode::new(); let mut decoder = DeltaBitpackDecode::new(); @@ -326,7 +326,7 @@ mod tests { } #[test] - fn test_delta_bitpack_seek2() { + fn test_seek_long() { let mut encoder = DeltaBitpackEncode::new(); let mut decoder = DeltaBitpackDecode::new(); @@ -354,7 +354,7 @@ mod tests { } #[test] - fn test_delta_bitpack_zero_bit_width() { + fn test_zero_bit_width() { let mut encoder = DeltaBitpackEncode::new(); let mut decoder = DeltaBitpackDecode::new(); diff --git a/src/algorithm/block_encode/elias_fano.rs b/src/algorithm/block_encode/elias_fano.rs index e7afacf..d9d5373 100644 --- a/src/algorithm/block_encode/elias_fano.rs +++ b/src/algorithm/block_encode/elias_fano.rs @@ -372,7 +372,7 @@ mod tests { use super::*; #[test] - fn test_elias_fano_next() { + fn test_next() { let mut encoder = EliasFanoEncode::new(); let mut decoder = EliasFanoDecode::new(); @@ -404,7 +404,7 @@ mod tests { } #[test] - fn test_elias_fano_seek() { + fn test_seek() { let mut encoder = EliasFanoEncode::new(); let mut decoder = EliasFanoDecode::new(); @@ -436,7 +436,7 @@ mod tests { } #[test] - fn test_elias_fano_seek2() { + fn test_seek_long() { let mut encoder = EliasFanoEncode::new(); let mut decoder = EliasFanoDecode::new(); diff --git a/src/algorithm/block_partition/variable_block_partition.rs b/src/algorithm/block_partition/variable_block_partition.rs index 387ccd0..4477eb9 100644 --- a/src/algorithm/block_partition/variable_block_partition.rs +++ b/src/algorithm/block_partition/variable_block_partition.rs @@ -67,7 +67,7 @@ impl BlockPartitionTrait for VariableBlockPartition { let mut cost_bound = self.lambda; while self.eps1 == 0. || cost_bound < self.lambda / self.eps1 { - self.score_window.push(Default::default()); + self.score_window.push(ScoreWindow::new(cost_bound)); if cost_bound >= max_block_cost { break; } @@ -129,7 +129,6 @@ impl BlockPartitionTrait for VariableBlockPartition { } } -#[derive(Default)] struct ScoreWindow { start: u32, end: u32, @@ -139,6 +138,16 @@ struct ScoreWindow { } impl ScoreWindow { + fn new(cost_upper_bound: f32) -> Self { + Self { + start: 0, + end: 0, + cost_upper_bound, + sum: 0., + max_queue: VecDeque::new(), + } + } + fn advance_start(&mut self, scores: &[f32]) { let score = scores[self.start as usize]; self.sum -= score; @@ -162,3 +171,48 @@ impl ScoreWindow { (self.end - self.start) as f32 * self.max_queue.front().unwrap() - self.sum + fixed_cost } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_single() { + let mut partition = VariableBlockPartition::new(1., 0.01, 0.4); + partition.add_doc(1.); + partition.make_partitions(); + assert_eq!(partition.partitions(), &[0]); + assert_eq!(partition.max_doc(), &[0]); + } + + #[test] + fn test_split_block() { + let mut partition = VariableBlockPartition::new(0.1, 0.01, 0.4); + partition.add_doc(1.); + partition.add_doc(2.); + partition.make_partitions(); + assert_eq!(partition.partitions(), &[0, 1]); + assert_eq!(partition.max_doc(), &[0, 1]); + } + + #[test] + fn test_merge_block() { + let mut partition = VariableBlockPartition::new(f32::MAX, 0.01, 0.4); + partition.add_doc(1.); + partition.add_doc(2.); + partition.make_partitions(); + assert_eq!(partition.partitions(), &[1]); + assert_eq!(partition.max_doc(), &[1]); + } + + #[test] + fn test_optimal_block() { + let mut partition = VariableBlockPartition::new(1., 0.01, 0.4); + partition.add_doc(1.); + partition.add_doc(1.); + partition.add_doc(10.); + partition.make_partitions(); + assert_eq!(partition.partitions(), &[1, 2]); + assert_eq!(partition.max_doc(), &[1, 2]); + } +}