forked from URI-ABD/clam
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
wip: adding dataset extension with search hints
- Loading branch information
Showing
6 changed files
with
253 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
//! A wrapper around any `Dataset` type to provide a dataset for storing | ||
//! search hints. | ||
use std::collections::HashMap; | ||
|
||
use distances::Number; | ||
use serde::{Deserialize, Serialize}; | ||
|
||
use crate::{ | ||
dataset::{AssociatesMetadata, AssociatesMetadataMut, ParDataset, Permutable}, | ||
Dataset, | ||
}; | ||
|
||
use super::SearchHints; | ||
|
||
/// A dataset which stores search hints for each item. | ||
#[derive(Clone, Serialize, Deserialize)] | ||
#[cfg_attr(feature = "disk-io", derive(bitcode::Encode, bitcode::Decode))] | ||
pub struct HintedFlatVec<I, U, D: Dataset<I>> { | ||
/// The underlying dataset. | ||
data: D, | ||
/// The search hints. | ||
hints: Vec<HashMap<usize, U>>, | ||
/// The name of the dataset. | ||
name: String, | ||
/// Ghosts in the machine. | ||
i_phantom: std::marker::PhantomData<I>, | ||
} | ||
|
||
impl<I, U: Number, D: Dataset<I>> From<D> for HintedFlatVec<I, U, D> { | ||
fn from(data: D) -> Self { | ||
let hints = (0..data.cardinality()).map(|_| HashMap::new()).collect(); | ||
let name = format!("Hinted({})", data.name()); | ||
Self { | ||
data, | ||
hints, | ||
name, | ||
i_phantom: std::marker::PhantomData, | ||
} | ||
} | ||
} | ||
|
||
impl<I, U: Number, D: Dataset<I>> Dataset<I> for HintedFlatVec<I, U, D> { | ||
fn name(&self) -> &str { | ||
&self.name | ||
} | ||
|
||
fn with_name(mut self, name: &str) -> Self { | ||
self.name = name.to_string(); | ||
self | ||
} | ||
|
||
fn cardinality(&self) -> usize { | ||
self.data.cardinality() | ||
} | ||
|
||
fn dimensionality_hint(&self) -> (usize, Option<usize>) { | ||
self.data.dimensionality_hint() | ||
} | ||
|
||
fn get(&self, index: usize) -> &I { | ||
self.data.get(index) | ||
} | ||
} | ||
|
||
impl<I: Send + Sync, U: Number, D: ParDataset<I>> ParDataset<I> for HintedFlatVec<I, U, D> {} | ||
|
||
impl<I, U: Number, D: Dataset<I>> SearchHints<I, U> for HintedFlatVec<I, U, D> { | ||
fn hints(&self) -> &[HashMap<usize, U>] { | ||
&self.hints | ||
} | ||
|
||
fn hints_mut(&mut self) -> &mut [HashMap<usize, U>] { | ||
&mut self.hints | ||
} | ||
|
||
fn hints_for(&self, i: usize) -> &HashMap<usize, U> { | ||
&self.hints[i] | ||
} | ||
|
||
fn hints_for_mut(&mut self, i: usize) -> &mut HashMap<usize, U> { | ||
&mut self.hints[i] | ||
} | ||
} | ||
|
||
impl<I, U: Number, Me, D: AssociatesMetadata<I, Me>> AssociatesMetadata<I, Me> for HintedFlatVec<I, U, D> { | ||
fn metadata(&self) -> &[Me] { | ||
self.data.metadata() | ||
} | ||
|
||
fn metadata_at(&self, index: usize) -> &Me { | ||
self.data.metadata_at(index) | ||
} | ||
} | ||
|
||
impl<I, U: Number, Me, Met: Clone, Det: AssociatesMetadata<I, Met>, D: AssociatesMetadataMut<I, Me, Met, Det>> | ||
AssociatesMetadataMut<I, Me, Met, Det> for HintedFlatVec<I, U, D> | ||
{ | ||
fn metadata_mut(&mut self) -> &mut [Me] { | ||
self.data.metadata_mut() | ||
} | ||
|
||
fn metadata_at_mut(&mut self, index: usize) -> &mut Me { | ||
self.data.metadata_at_mut(index) | ||
} | ||
|
||
fn with_metadata(self, metadata: &[Met]) -> Result<Det, String> { | ||
self.data.with_metadata(metadata) | ||
} | ||
|
||
fn transform_metadata<F: Fn(&Me) -> Met>(self, f: F) -> Det { | ||
self.data.transform_metadata(f) | ||
} | ||
} | ||
|
||
impl<I, U: Number, D: Dataset<I> + Permutable> Permutable for HintedFlatVec<I, U, D> { | ||
fn permutation(&self) -> Vec<usize> { | ||
self.data.permutation() | ||
} | ||
|
||
fn set_permutation(&mut self, permutation: &[usize]) { | ||
self.data.set_permutation(permutation); | ||
} | ||
|
||
fn swap_two(&mut self, i: usize, j: usize) { | ||
self.data.swap_two(i, j); | ||
self.hints.swap(i, j); | ||
} | ||
} | ||
|
||
#[cfg(feature = "disk-io")] | ||
impl<I, U: Number, D: crate::dataset::DatasetIO<I>> crate::dataset::DatasetIO<I> for HintedFlatVec<I, U, D> {} | ||
|
||
#[cfg(feature = "disk-io")] | ||
impl<I: Send + Sync, U: Number, D: crate::dataset::ParDatasetIO<I>> crate::dataset::ParDatasetIO<I> | ||
for HintedFlatVec<I, U, D> | ||
{ | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
//! `Dataset`s which store extra information to improve search performance. | ||
use std::collections::HashMap; | ||
|
||
use distances::Number; | ||
|
||
use crate::{Cluster, Dataset, Metric}; | ||
|
||
mod data; | ||
|
||
pub use data::HintedFlatVec; | ||
|
||
use super::{Algorithm, Searchable}; | ||
|
||
/// An extension of the `Dataset` trait which provides hints for search | ||
/// algorithms. | ||
/// | ||
/// Each hint is a mapping from the number of known neighbors to the | ||
/// distance to the farthest known neighbor. | ||
pub trait SearchHints<I, U: Number>: Dataset<I> { | ||
/// Get the search hints for the dataset. | ||
fn hints(&self) -> &[HashMap<usize, U>]; | ||
|
||
/// Get the search hints for the dataset as mutable. | ||
fn hints_mut(&mut self) -> &mut [HashMap<usize, U>]; | ||
|
||
/// Get the search hints for a specific item by index. | ||
fn hints_for(&self, i: usize) -> &HashMap<usize, U>; | ||
|
||
/// Get the search hints for a specific item by index as mutable. | ||
fn hints_for_mut(&mut self, i: usize) -> &mut HashMap<usize, U>; | ||
|
||
/// Add a hint for a specific item. | ||
/// | ||
/// # Arguments | ||
/// | ||
/// * `i` - The index of the item. | ||
/// * `k` - The number of known neighbors. | ||
/// * `d` - The distance to the farthest known neighbor. | ||
fn add_hint(&mut self, i: usize, k: usize, d: U) { | ||
self.hints_for_mut(i).insert(k, d); | ||
} | ||
|
||
/// Add hints from a tree. | ||
/// | ||
/// For each cluster in the tree, this will add hints for cluster centers | ||
/// using the cluster radius and cardinality. | ||
fn add_from_tree<C: Cluster<U>>(&mut self, root: &C) { | ||
self.add_hint(root.arg_center(), root.cardinality(), root.radius()); | ||
if !root.is_leaf() { | ||
root.child_clusters().for_each(|c| self.add_from_tree(c)); | ||
} | ||
} | ||
|
||
/// Add hints using a search algorithm. | ||
/// | ||
/// # Arguments | ||
/// | ||
/// * `metric` - The metric to use for the search. | ||
/// * `root` - The root of the search tree. | ||
/// * `alg` - The search algorithm to use. | ||
/// * `q` - The index of the query item. | ||
fn add_by_search<M: Metric<I, U>, C: Searchable<I, U, Self, M>>( | ||
&mut self, | ||
metric: &M, | ||
root: &C, | ||
alg: &Algorithm<U>, | ||
i: usize, | ||
) where | ||
Self: Sized, | ||
{ | ||
let (k, r) = alg.params(); | ||
let (car, d) = { | ||
let mut hits = alg.search(self, metric, root, self.get(i)); | ||
hits.sort_unstable_by(|(_, a), (_, b)| a.total_cmp(b)); | ||
hits.last().map_or((0, U::ZERO), |&(j, d)| (j, d)) | ||
}; | ||
if let Some(k) = k { | ||
self.add_hint(i, k, d); | ||
} else if let Some(r) = r { | ||
self.add_hint(i, car, r); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters