Skip to content

Commit

Permalink
fix: wrong behavior in PostingCursor
Browse files Browse the repository at this point in the history
Signed-off-by: Mingzhuo Yin <[email protected]>
  • Loading branch information
silver-ymz committed Jan 7, 2025
1 parent f86f841 commit ed9d571
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 24 deletions.
16 changes: 16 additions & 0 deletions src/algorithm/block_wand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@ fn restore_ordering(term_scorers: &mut [SealedScorer], ord: usize) {
}
term_scorers.swap(i, i - 1);
}
debug_assert!(is_sorted(
term_scorers.iter().map(|scorer| scorer.posting.docid())
));
}

fn align_scorers(
Expand Down Expand Up @@ -179,3 +182,16 @@ fn advance_all_scorers_on_pivot(term_scorers: &mut Vec<SealedScorer>, pivot_len:
term_scorers.retain(|scorer| !scorer.posting.completed());
term_scorers.sort_unstable_by_key(|scorer| scorer.posting.docid());
}

fn is_sorted(mut it: impl Iterator<Item = u32>) -> bool {
if let Some(first) = it.next() {
let mut prev = first;
for doc in it {
if doc < prev {
return false;
}
prev = doc;
}
}
true
}
62 changes: 38 additions & 24 deletions src/segment/posting/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ pub struct PostingCursor {
// block reader
block_page_reader: VirtualPageReader,
block_page_id: u32,
page_offset: usize,
page_offset: u32,
// skip info reader
skip_info_page_id: u32,
skip_info_offset: usize,
skip_info_offset: u32,
decode_offset: u32,
cur_skip_info: SkipBlock,
// helper state
Expand All @@ -60,6 +60,7 @@ pub struct PostingCursor {
// unfulled block
unfulled_docid: Box<[u32]>,
unfulled_freq: Box<[u32]>,
unfulled_offset: u32,
}

impl PostingCursor {
Expand Down Expand Up @@ -92,6 +93,7 @@ impl PostingCursor {
remain_block_cnt,
unfulled_docid,
unfulled_freq,
unfulled_offset: u32::MAX,
};

this.update_skip_info();
Expand All @@ -106,31 +108,35 @@ impl PostingCursor {
return false;
}

let skip = &self.cur_skip_info;
self.decode_offset = skip.last_doc;
self.page_offset += skip.size as usize;
if skip.flag.contains(SkipBlockFlags::PAGE_CHANGED) || self.is_in_unfulled_block() {
self.block_page_id += 1;
self.page_offset = 0;
}
self.decode_offset = self.cur_skip_info.last_doc;
self.page_offset += self.cur_skip_info.size as u32;

self.skip_info_offset += std::mem::size_of::<SkipBlock>();
if self.skip_info_offset == bm25_page_size() {
self.skip_info_offset += std::mem::size_of::<SkipBlock>() as u32;
if self.skip_info_offset == bm25_page_size() as u32 {
let page = page_read(self.index, self.skip_info_page_id);
self.skip_info_page_id = page.opaque.next_blkno;
self.skip_info_offset = 0;
}
self.update_skip_info();

if self
.cur_skip_info
.flag
.contains(SkipBlockFlags::PAGE_CHANGED)
{
self.block_page_id += 1;
self.page_offset = 0;
}

true
}

pub fn next_doc(&mut self) -> bool {
debug_assert!(self.block_decoded);
if self.is_in_unfulled_block() {
self.page_offset += 1;
debug_assert!(self.page_offset <= self.unfulled_doc_cnt() as usize);
if self.page_offset == self.unfulled_doc_cnt() as usize {
self.unfulled_offset += 1;
debug_assert!(self.unfulled_offset <= self.unfulled_doc_cnt());
if self.unfulled_offset == self.unfulled_doc_cnt() {
return false;
}
true
Expand Down Expand Up @@ -158,16 +164,20 @@ impl PostingCursor {
if self.completed() {
return false;
}
let prev_docid = self.docid();
while self.last_doc_in_block() < docid {
if !self.next_block() {
debug_assert!(prev_docid == self.docid());
return false;
}
}
debug_assert!(prev_docid == self.docid());
true
}

pub fn seek(&mut self, docid: u32) -> u32 {
if self.completed() {
self.unfulled_offset = self.unfulled_doc_cnt();
return TERMINATED_DOC;
}
if !self.shallow_seek(docid) {
Expand All @@ -178,8 +188,12 @@ impl PostingCursor {
}

if self.is_in_unfulled_block() {
self.page_offset = self.unfulled_docid.partition_point(|&d| d < docid);
debug_assert!(self.page_offset < self.unfulled_doc_cnt() as usize);
self.unfulled_offset = self
.unfulled_docid
.partition_point(|&d| d < docid)
.try_into()
.unwrap();
debug_assert!(self.unfulled_offset < self.unfulled_doc_cnt());
} else {
let incomplete = self.block_decode.seek(docid);
debug_assert!(incomplete);
Expand All @@ -195,6 +209,7 @@ impl PostingCursor {
}
self.block_decoded = true;
if self.is_in_unfulled_block() {
self.unfulled_offset = 0;
return;
}

Expand All @@ -204,19 +219,18 @@ impl PostingCursor {
self.block_page_reader.get_block_id(self.block_page_id),
);
self.block_decode.decode(
&page.data()[self.page_offset..][..skip.size as usize],
&page.data()[self.page_offset as usize..][..skip.size as usize],
NonZeroU32::new(self.decode_offset),
skip.doc_cnt,
);
}

pub fn docid(&self) -> u32 {
if self.completed() {
if self.completed() && self.unfulled_offset == self.unfulled_doc_cnt() {
return TERMINATED_DOC;
}
debug_assert!(self.block_decoded);
if self.is_in_unfulled_block() {
return self.unfulled_docid[self.page_offset];
if self.is_in_unfulled_block() && self.unfulled_offset != u32::MAX {
return self.unfulled_docid[self.unfulled_offset as usize];
}
debug_assert!(self.block_decode.docid() <= self.last_doc_in_block());
self.block_decode.docid()
Expand All @@ -226,7 +240,7 @@ impl PostingCursor {
debug_assert!(!self.completed());
debug_assert!(self.block_decoded);
if self.is_in_unfulled_block() {
return self.unfulled_freq[self.page_offset];
return self.unfulled_freq[self.unfulled_offset as usize];
}
self.block_decode.freq()
}
Expand All @@ -253,7 +267,7 @@ impl PostingCursor {
fn update_skip_info(&mut self) {
let page = page_read(self.index, self.skip_info_page_id);
let skip_info = *bytemuck::from_bytes(
&page.data()[self.skip_info_offset..][..std::mem::size_of::<SkipBlock>()],
&page.data()[self.skip_info_offset as usize..][..std::mem::size_of::<SkipBlock>()],
);
self.cur_skip_info = skip_info;
}
Expand All @@ -263,6 +277,6 @@ impl PostingCursor {
}

fn is_in_unfulled_block(&self) -> bool {
!self.unfulled_docid.is_empty() && self.remain_block_cnt == 1
!self.unfulled_docid.is_empty() && self.remain_block_cnt <= 1
}
}

0 comments on commit ed9d571

Please sign in to comment.