Skip to content

Commit

Permalink
more comments
Browse files Browse the repository at this point in the history
Signed-off-by: BubbleCal <[email protected]>
  • Loading branch information
BubbleCal committed Feb 11, 2025
1 parent c6aa615 commit f573c26
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
6 changes: 5 additions & 1 deletion rust/lance/src/dataset/scanner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ pub const LEGACY_DEFAULT_FRAGMENT_READAHEAD: usize = 4;
lazy_static::lazy_static! {
pub static ref DEFAULT_FRAGMENT_READAHEAD: Option<usize> = std::env::var("LANCE_DEFAULT_FRAGMENT_READAHEAD")
.map(|val| Some(val.parse().unwrap())).unwrap_or(None);

pub static ref DEFAULT_XTR_OVERFETCH: u32 = std::env::var("LANCE_XTR_OVERFETCH")
.map(|val| val.parse().unwrap()).unwrap_or(10);
}

// We want to support ~256 concurrent reads to maximize throughput on cloud storage systems
Expand Down Expand Up @@ -2128,7 +2131,7 @@ impl Scanner {
// 1. collect the candidates by vector searching on each query vector
// 2. scoring the candidates

let over_fetch_factor = q.refine_factor.unwrap_or(10);
let over_fetch_factor = *DEFAULT_XTR_OVERFETCH;

let prefilter_source = self.prefilter_source(filter_plan).await?;
let dim = get_vector_dim(self.dataset.schema(), &q.column)?;
Expand All @@ -2147,6 +2150,7 @@ impl Scanner {
});
let mut ann_nodes = Vec::with_capacity(new_queries.len());
for query in new_queries {
// this produces `nprobes * k * over_fetch_factor * num_indices` candidates
let ann_node = new_knn_exec(
self.dataset.clone(),
index,
Expand Down
6 changes: 5 additions & 1 deletion rust/lance/src/io/exec/knn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,11 @@ impl ExecutionPlan for MultivectorScoringExec {
debug_assert_eq!(dists.null_count(), 0);

// max-reduce for the same row id
let min_sim = 1.0 - dists.values().last().copied().unwrap_or(2.0);
let min_sim = dists
.values()
.last()
.map(|dist| 1.0 - *dist)
.unwrap_or_default();
let mut new_row_ids = Vec::with_capacity(row_ids.len());
let mut new_sims = Vec::with_capacity(row_ids.len());
let mut visited_row_ids = HashSet::with_capacity(row_ids.len());
Expand Down

0 comments on commit f573c26

Please sign in to comment.