forked from jean-pierreBoth/hnswlib-rs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlevensthein.rs
63 lines (60 loc) · 1.75 KB
/
levensthein.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
use anndists::dist::*;
use hnsw_rs::prelude::*;
use rand::Rng;
use std::iter;
fn generate(len: usize) -> String {
const CHARSET: &[u8] = b"abcdefghij";
let mut rng = rand::thread_rng();
let one_char = || CHARSET[rng.gen_range(0..CHARSET.len())] as char;
iter::repeat_with(one_char).take(len).collect()
}
fn main() {
let nb_elem = 500000; // number of possible words in the dictionary
let max_nb_connection = 15;
let nb_layer = 16.min((nb_elem as f32).ln().trunc() as usize);
let ef_c = 200;
let nb_words = 1000;
let hns = Hnsw::<u16, DistLevenshtein>::new(
max_nb_connection,
nb_elem,
nb_layer,
ef_c,
DistLevenshtein {},
);
let mut words = vec![];
for _n in 1..nb_words {
let tw = generate(5);
words.push(tw);
}
words.push(String::from("abcdj"));
//
for (i, w) in words.iter().enumerate() {
let vec: Vec<u16> = w.chars().map(|c| c as u16).collect();
hns.insert((&vec, i));
}
// create a filter
let mut filter: Vec<usize> = Vec::new();
for i in 1..100 {
filter.push(i);
}
//
let ef_search: usize = 30;
let tosearch: Vec<u16> = "abcde".chars().map(|c| c as u16).collect();
//
println!("========== search with filter ");
let res = hns.search_filter(&tosearch, 10, ef_search, Some(&filter));
for r in res {
println!(
"Word: {:?} Id: {:?} Distance: {:?}",
words[r.d_id], r.d_id, r.distance
);
}
println!("========== search without filter ");
let res3 = hns.search(&tosearch, 10, ef_search);
for r in res3 {
println!(
"Word: {:?} Id: {:?} Distance: {:?}",
words[r.d_id], r.d_id, r.distance
);
}
}