forked from URI-ABD/clam
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
18 changed files
with
101 additions
and
106 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,7 @@ This means that the API is not yet stable and breaking changes may occur frequen | |
|
||
CLAM is a library crate so you can add it to your crate using `cargo add [email protected]`. | ||
|
||
### Cakes: Nearest Neighbor Search | ||
### `Cakes`: Nearest Neighbor Search | ||
|
||
```rust | ||
use abd_clam::{ | ||
|
@@ -44,20 +44,18 @@ let labels: Vec<bool> = rows.iter().map(|v| v[0] > 0.0).collect(); | |
// We use the `Euclidean` metric for this example. | ||
let metric = abd_clam::metric::Euclidean; | ||
|
||
// We can create a `Dataset` object. We make it mutable here so we can reorder it after building the tree. | ||
let data = FlatVec::new(rows).unwrap(); | ||
// We can create a `Dataset` object and assign metadata. | ||
let data = FlatVec::new(rows).unwrap().with_metadata(&labels).unwrap(); | ||
|
||
// We can assign the labels as metadata to the dataset. | ||
let data = data.with_metadata(&labels).unwrap(); | ||
|
||
// We define the criteria for building the tree to partition the `Cluster`s until each contains a single point. | ||
// We define the criteria for building the tree to partition the `Cluster`s | ||
// until each contains a single point. | ||
let criteria = |c: &Ball<_>| c.cardinality() > 1; | ||
|
||
// Now we create a tree. | ||
let root = Ball::new_tree(&data, &metric, &criteria, Some(seed)); | ||
|
||
// We will use the origin as our query. | ||
let query: Vec<f32> = vec![0.0; dimensionality]; | ||
let query = vec![0_f32; dimensionality]; | ||
|
||
// We can now perform Ranged Nearest Neighbors search on the tree. | ||
let radius = 0.05; | ||
|
@@ -67,7 +65,8 @@ let rnn_results: Vec<(usize, f32)> = root.search(&data, &metric, &query, alg); | |
// KNN search is also supported. | ||
let k = 10; | ||
|
||
// The `KnnRepeatedRnn` algorithm starts RNN search with a small radius and increases it until it finds `k` neighbors. | ||
// The `KnnRepeatedRnn` algorithm starts RNN search with a small radius and | ||
// increases it until it finds `k` neighbors. | ||
let alg = Algorithm::KnnRepeatedRnn(k, 2.0); | ||
let knn_results: Vec<(usize, f32)> = root.search(&data, &metric, &query, alg); | ||
|
||
|
@@ -79,9 +78,6 @@ let knn_results: Vec<(usize, f32)> = root.search(&data, &metric, &query, alg); | |
let alg = Algorithm::KnnDepthFirst(k); | ||
let knn_results: Vec<(usize, f32)> = root.search(&data, &metric, &query, alg); | ||
|
||
// We can borrow the reordered labels from the model. | ||
let labels: &[bool] = data.metadata(); | ||
|
||
// We can use the results to get the labels of the points that are within the | ||
// radius of the query point. | ||
let rnn_labels: Vec<bool> = rnn_results.iter().map(|&(i, _)| labels[i]).collect(); | ||
|
@@ -91,7 +87,7 @@ let rnn_labels: Vec<bool> = rnn_results.iter().map(|&(i, _)| labels[i]).collect( | |
let knn_labels: Vec<bool> = knn_results.iter().map(|&(i, _)| labels[i]).collect(); | ||
``` | ||
|
||
### Compression and Compressive Search | ||
### `PanCakes`: Compression and Compressive Search | ||
|
||
We also support compression of certain datasets and trees to reduce memory usage. | ||
We can then perform compressed search on the compressed dataset without having to decompress the whole dataset. | ||
|
@@ -128,7 +124,8 @@ let (metadata, data) = symagen::random_edits::generate_clumped_data( | |
.into_iter() | ||
.map(|(m, d)| (m, Sequence::from(d))) | ||
.unzip::<_, _, Vec<_>, Vec<_>>(); | ||
// Create a `FlatVec` dataset from the sequence data. | ||
|
||
// We create a `FlatVec` dataset from the sequence data and assign metadata. | ||
let data = FlatVec::new(data).unwrap().with_metadata(&metadata).unwrap(); | ||
|
||
// The dataset will use the `levenshtein` distance metric. | ||
|
@@ -162,14 +159,15 @@ codec_data.write_to(&codec_path).unwrap(); | |
let squishy_ball_path = temp_dir.path().join("strings.squishy_ball"); | ||
squishy_ball.write_to(&squishy_ball_path).unwrap(); | ||
|
||
// We can perform compressed search on the compressed dataset. | ||
// We can perform compressive search on the compressed dataset. | ||
let query = &Sequence::from(seed_string); | ||
let radius = 2; | ||
let k = 10; | ||
|
||
let alg = Algorithm::RnnClustered(radius); | ||
let results: Vec<(usize, u16)> = squishy_ball.par_search(&codec_data, &metric, query, alg); | ||
assert!(!results.is_empty()); | ||
|
||
let k = 10; | ||
let alg = Algorithm::KnnRepeatedRnn(k, 2); | ||
let results: Vec<(usize, u16)> = squishy_ball.par_search(&codec_data, &metric, query, alg); | ||
assert_eq!(results.len(), k); | ||
|
@@ -183,7 +181,7 @@ let results: Vec<(usize, u16)> = squishy_ball.par_search(&codec_data, &metric, q | |
assert_eq!(results.len(), k); | ||
|
||
// The dataset can be deserialized from disk. | ||
let mut flat_data: FlatVec<Sequence, String> = FlatVec::read_from(&flat_path).unwrap(); | ||
let flat_data: FlatVec<Sequence, String> = FlatVec::read_from(&flat_path).unwrap(); | ||
|
||
// The tree can be deserialized from disk. | ||
let ball: Ball<u16> = Ball::read_from(&ball_path).unwrap(); | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.