Skip to content

Commit

Permalink
[TEMP] testing
Browse files Browse the repository at this point in the history
  • Loading branch information
khb7840 committed Mar 5, 2024
1 parent 65e5865 commit edd8977
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 35 deletions.
90 changes: 60 additions & 30 deletions src/cli/workflows/build_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,36 +73,66 @@ pub fn build_index(env: AppArgs) {
);

// Main workflow
// 2. Collect hash
measure_time!(fold_disco.collect_hash_pairs());
if verbose {
print_log_msg(INFO,
&format!("Total {} hashes collected (Allocated {}MB)", fold_disco.hash_id_pairs.len(), PEAK_ALLOC.current_usage_as_mb())
);
}
measure_time!(fold_disco.sort_hash_pairs());
if verbose { print_log_msg(INFO, &format!("Hash sorted (Allocated {}MB)", PEAK_ALLOC.current_usage_as_mb())); }
measure_time!(fold_disco.fill_numeric_id_vec());

let (offset_table, value_vec) =
// measure_time!(fold_disco.index_builder.convert_sorted_pairs_to_offset_and_values(fold_disco.hash_id_pairs));
measure_time!(convert_sorted_pairs_to_offset_and_values_vec(fold_disco.hash_id_pairs));
if verbose { print_log_msg(INFO, &format!("Converted to offsets (Allocated {}MB)", PEAK_ALLOC.current_usage_as_mb())); }
// Save offset table
let offset_path = format!("{}.offset", index_path);
// measure_time!(save_offset_map(&offset_path, &offset_table).expect(
measure_time!(save_offset_vec(&offset_path, &offset_table).expect(
&log_msg(FAIL, "Failed to save offset table")
));
drop(offset_table);
let USE_DASHMAP = true;
if USE_DASHMAP {
measure_time!(fold_disco.collect_hash());
if verbose {
print_log_msg(INFO,
&format!("Hash collected (Allocated {}MB)", PEAK_ALLOC.current_usage_as_mb())
);
}
// 3. Setting
measure_time!(fold_disco.set_index_table());
if verbose { print_log_msg(INFO, &format!("Setting done (Allocated {}MB)", PEAK_ALLOC.current_usage_as_mb())); }
// 4. Fill index table
let mut index_table = measure_time!(fold_disco.index_builder.fill_and_return_dashmap());
index_table.remove(&GeometricHash::from_u64(0, hash_type));
if verbose { print_log_msg(INFO, &format!("Filling done (Allocated {}MB)", PEAK_ALLOC.current_usage_as_mb())); }
// Convert to offset table
let (offset_table, value_vec) =
measure_time!(fold_disco.index_builder.convert_hashmap_to_offset_and_values(index_table));
if verbose { print_log_msg(INFO, &format!("Offset & values acquired (Allocated {}MB)", PEAK_ALLOC.current_usage_as_mb())); }
// Save offset table
let offset_path = format!("{}.offset", index_path);
measure_time!(save_offset_map(&offset_path, &offset_table).expect(
&log_msg(FAIL, "Failed to save offset table")
));
// Save value vector
let value_path = format!("{}.value", index_path);
measure_time!(write_usize_vector(&value_path, &value_vec).expect(
&log_msg(FAIL, "Failed to save values")
));
} else {
// 2. Collect hash
measure_time!(fold_disco.collect_hash_pairs());
if verbose {
print_log_msg(INFO,
&format!("Total {} hashes collected (Allocated {}MB)", fold_disco.hash_id_pairs.len(), PEAK_ALLOC.current_usage_as_mb())
);
}
measure_time!(fold_disco.sort_hash_pairs());
if verbose { print_log_msg(INFO, &format!("Hash sorted (Allocated {}MB)", PEAK_ALLOC.current_usage_as_mb())); }
measure_time!(fold_disco.fill_numeric_id_vec());

// Save value vector
let value_path = format!("{}.value", index_path);
measure_time!(write_usize_vector(&value_path, &value_vec).expect(
&log_msg(FAIL, "Failed to save values")
));
drop(value_vec);
let (offset_table, value_vec) =
// measure_time!(fold_disco.index_builder.convert_sorted_pairs_to_offset_and_values(fold_disco.hash_id_pairs));
measure_time!(convert_sorted_pairs_to_offset_and_values_vec(fold_disco.hash_id_pairs));
if verbose { print_log_msg(INFO, &format!("Converted to offsets (Allocated {}MB)", PEAK_ALLOC.current_usage_as_mb())); }
// Save offset table
let offset_path = format!("{}.offset", index_path);
// measure_time!(save_offset_map(&offset_path, &offset_table).expect(
measure_time!(save_offset_vec(&offset_path, &offset_table).expect(
&log_msg(FAIL, "Failed to save offset table")
));
drop(offset_table);

// Save value vector
let value_path = format!("{}.value", index_path);
measure_time!(write_usize_vector(&value_path, &value_vec).expect(
&log_msg(FAIL, "Failed to save values")
));
drop(value_vec);
}
// Save lookup. The path to lookup table is the same as the index table with .lookup extension
let lookup_path = format!("{}.lookup", index_path);
measure_time!(save_lookup_to_file(
Expand Down Expand Up @@ -132,8 +162,8 @@ mod tests {
fn test_build_index() {
let pdb_dir = "data/serine_peptidases_filtered";
let pdb_path_vec = load_path(pdb_dir);
let hash_type = "default32";
let index_path = "data/serine_peptidases_default32";
let hash_type = "pdb";
let index_path = "data/serine_peptidases_pdb";
let num_threads = 4;
let verbose = true;
let help = false;
Expand Down
10 changes: 8 additions & 2 deletions src/controller/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,12 @@ impl FoldDisco {
}
}

pub fn return_hash_collection(&mut self) -> Vec<Vec<GeometricHash>> {
// Move self.hash_collection and clear it
let hash_collection = std::mem::take(&mut self.hash_collection);
hash_collection
}

pub fn set_index_table(&mut self) {
// Deprecated
// Check if hash_collection is filled
Expand All @@ -268,8 +274,8 @@ impl FoldDisco {
return;
}
let index_builder = IndexBuilder::new(
&self.numeric_id_vec, &self.hash_collection,
self.num_threads, self.get_allocation_size(),
self.numeric_id_vec.clone(), self.return_hash_collection(),
self.num_threads, 1,
format!("{}.offset", self.output_path), // offset file path
format!("{}.index", self.output_path), // data file path
);
Expand Down
2 changes: 1 addition & 1 deletion src/geometry/default_32bit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ const NBIN_DIST: f32 = 8.0;
// 2. NEW IDEA for encoding angles; represent as sin and cos
const MIN_SIN_COS: f32 = -1.0;
const MAX_SIN_COS: f32 = 1.0;
const NBIN_TORSION_SIN_COS: f32 = 2.0;
const NBIN_TORSION_SIN_COS: f32 = 3.0;
const NBIN_PLANE_SIN_COS: f32 = 3.0;
// Bitmasks
const BITMASK32_2BIT: u32 = 0x00000003;
Expand Down
4 changes: 2 additions & 2 deletions src/index/alloc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ impl<K: HashableSync, V: HashableSync> IndexBuilder<K, V> {

// Constructor
pub fn new(
ids: &Vec<K>, data: &Vec<Vec<V>>,
ids: Vec<K>, data: Vec<Vec<V>>,
num_threads: usize, allocation_size: usize,
offset_path: String, data_path: String,
) -> IndexBuilder<K, V> {
Expand All @@ -85,7 +85,7 @@ impl<K: HashableSync, V: HashableSync> IndexBuilder<K, V> {
IndexBuilder {
offset: DashMap::new(),
allocation: Arc::new(HugeAllocation::new(allocation_size)),
ids: Arc::new(ids.to_owned()),
ids: Arc::new(ids),
data: Arc::new(data.to_owned()),
data_dashmap: Arc::new(DashMap::new()),
num_threads,
Expand Down

0 comments on commit edd8977

Please sign in to comment.