From 6e6f8abda601a192de50f924298d981116849bf2 Mon Sep 17 00:00:00 2001 From: Marijon Pierre Date: Fri, 22 Feb 2019 15:26:17 +0100 Subject: [PATCH] V0.5.1 correct bug in split module --- Cargo.lock | 2 +- Cargo.toml | 2 +- Readme.md | 2 +- src/chimera.rs | 4 +-- src/main.rs | 55 +++++++++++++++++++++++++------------- src/overlap.rs | 22 +++++++-------- src/postdetection/fasta.rs | 36 ++++++++++++++++--------- src/postdetection/fastq.rs | 20 +++++++++++--- src/postdetection/mod.rs | 2 +- tests/not_run.rs | 4 +-- 10 files changed, 96 insertions(+), 53 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 86f32cb..97d2582 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -565,7 +565,7 @@ dependencies = [ [[package]] name = "yacrd" -version = "0.5.1-alpha" +version = "0.5.1" dependencies = [ "bio 0.21.0 (registry+https://github.com/rust-lang/crates.io-index)", "bzip2 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index fea880f..53eab0e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "yacrd" -version = "0.5.1-alpha" +version = "0.5.1" authors = ["Pierre Marijon "] exclude = ["image/*", "validation/*", "tests/*"] diff --git a/Readme.md b/Readme.md index 96077ed..66c9c6f 100644 --- a/Readme.md +++ b/Readme.md @@ -58,7 +58,7 @@ conda install yacrd ``` git clone https://github.com/natir/yacrd.git cd yacrd -git checkout v0.5.0 +git checkout v0.5.1 cargo build cargo test diff --git a/src/chimera.rs b/src/chimera.rs index d695a9b..f9e0dac 100644 --- a/src/chimera.rs +++ b/src/chimera.rs @@ -23,9 +23,9 @@ SOFTWARE. /* local use */ /* crates use */ -use serde_json; use serde::ser::SerializeStruct; - +use serde_json; + /* standard use */ use std; use std::cmp::Ordering; diff --git a/src/main.rs b/src/main.rs index b9c4bf1..be5680a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -61,7 +61,7 @@ use std::collections::HashMap; fn main() { let matches = App::new("yacrd") - .version("0.5 Omanyte") + .version("0.5.1 Omanyte") .author("Pierre Marijon ") .about("Yet Another Chimeric Read Detector") .subcommand(SubCommand::with_name("chimeric") @@ -252,8 +252,10 @@ fn main() { chimeric_matches.is_present("compression-out"), chimeric_matches.value_of("compression-out").unwrap_or("no"), ); - let mut output: Box = - file::get_output(chimeric_matches.value_of("output").unwrap(), out_compression); + let mut output: Box = file::get_output( + chimeric_matches.value_of("output").unwrap(), + out_compression, + ); let filters: Vec<_> = match chimeric_matches.is_present("filter") { true => chimeric_matches.values_of("filter").unwrap().collect(), @@ -288,7 +290,14 @@ fn main() { .parse::() .unwrap(); - overlap::find(inputs, formats, chim_thres, ncov_thres, &mut remove_reads, false); + overlap::find( + inputs, + formats, + chim_thres, + ncov_thres, + &mut remove_reads, + false, + ); chimera::write( &mut output, @@ -307,24 +316,28 @@ fn main() { for filename in splits { split::run(&remove_reads, filename, split_suffix); } - } - else if let Some(scrubbing_matches) = matches.subcommand_matches("scrubbing") { - println!("{:?}", scrubbing_matches); - let (mapping, map_compression) = file::get_input(scrubbing_matches.value_of("mapping").unwrap()); - let mut format = utils::get_mapping_format(scrubbing_matches.value_of("mapping").unwrap()).unwrap(); + } else if let Some(scrubbing_matches) = matches.subcommand_matches("scrubbing") { + let (mapping, map_compression) = + file::get_input(scrubbing_matches.value_of("mapping").unwrap()); + let mut format = + utils::get_mapping_format(scrubbing_matches.value_of("mapping").unwrap()).unwrap(); let mut raw_path = scrubbing_matches.value_of("sequence").unwrap(); let mut scrubbed_path = scrubbing_matches.value_of("scrubbed").unwrap(); - + let out_compression = file::choose_compression( map_compression, scrubbing_matches.is_present("compression-out"), - scrubbing_matches.value_of("compression-out").unwrap_or("no"), + scrubbing_matches + .value_of("compression-out") + .unwrap_or("no"), + ); + + let mut report: Box = file::get_output( + scrubbing_matches.value_of("report").unwrap(), + out_compression, ); - let mut report: Box = - file::get_output(scrubbing_matches.value_of("report").unwrap(), out_compression); - let chim_thres = scrubbing_matches .value_of("chimeric-threshold") .unwrap() @@ -337,8 +350,15 @@ fn main() { .unwrap(); let mut remove_reads: chimera::BadReadMap = HashMap::new(); - - overlap::find(vec![mapping], vec![format], chim_thres, ncov_thres, &mut remove_reads, true); + + overlap::find( + vec![mapping], + vec![format], + chim_thres, + ncov_thres, + &mut remove_reads, + true, + ); split::run(&remove_reads, raw_path, "_splited"); chimera::write( @@ -347,9 +367,8 @@ fn main() { scrubbing_matches.is_present("json-output"), ); - let tmp_name = postdetection::generate_out_name(raw_path, "_splited"); - + std::fs::rename(tmp_name, scrubbed_path).unwrap(); } } diff --git a/src/overlap.rs b/src/overlap.rs index 8c8dee0..029dae4 100644 --- a/src/overlap.rs +++ b/src/overlap.rs @@ -62,7 +62,7 @@ pub fn find( chim_thres: u64, ncov_thres: f64, remove_reads: &mut chimera::BadReadMap, - report_all: bool + report_all: bool, ) { let mut read2mapping: HashMap> = HashMap::new(); @@ -287,7 +287,7 @@ mod test { 0, 0.8, &mut remove_reads, - false + false, ); chimera::write(&mut writer, &remove_reads, false); @@ -302,7 +302,7 @@ mod test { 0, 0.8, &mut remove_reads, - false + false, ); chimera::write(&mut writer, &remove_reads, false); @@ -323,7 +323,7 @@ mod test { 0, 0.8, &mut remove_reads, - false + false, ); chimera::write(&mut writer, &remove_reads, false); @@ -344,7 +344,7 @@ mod test { 1, 0.8, &mut remove_reads, - false + false, ); chimera::write(&mut writer, &remove_reads, false); @@ -370,7 +370,7 @@ mod test { 0, 0.8, &mut remove_reads, - false + false, ); chimera::write(&mut writer, &remove_reads, true); @@ -397,7 +397,7 @@ mod test { 0, 0.8, &mut remove_reads, - false + false, ); chimera::write(&mut writer, &remove_reads, false); @@ -423,7 +423,7 @@ mod test { 0, 0.8, &mut remove_reads, - false + false, ); chimera::write(&mut writer, &remove_reads, false); @@ -447,7 +447,7 @@ mod test { 0, 0.8, &mut remove_reads, - false + false, ); chimera::write(&mut writer, &remove_reads, false); @@ -518,7 +518,7 @@ mod test { #[test] fn find_chimera_report_all() { let result = "NotBad\t3\t10000\t7500,2500,10000\nChimeric\t4\t6000\t1000,2500,3500\nChimeric\t1\t10000\t2000,0,2000;1000,4500,5500;2000,8000,10000\nNotBad\t2\t10000\t7500,0,7500\n".to_string(); - + let good: HashSet<&str> = result.split("\n").collect(); let mut remove_reads: chimera::BadReadMap = HashMap::new(); let mut writer: Vec = Vec::new(); @@ -529,7 +529,7 @@ mod test { 1, 0.8, &mut remove_reads, - true + true, ); chimera::write(&mut writer, &remove_reads, false); diff --git a/src/postdetection/fasta.rs b/src/postdetection/fasta.rs index 76703f4..cc52f75 100644 --- a/src/postdetection/fasta.rs +++ b/src/postdetection/fasta.rs @@ -125,16 +125,26 @@ impl PostDetectionOperationFasta for Split { } let mut position = vec![0]; + let mut passed_pos = std::collections::HashSet::new(); for inter in gaps.iter() { - position.push(inter.begin); - position.push(inter.end); + if !passed_pos.contains(&inter.begin) { + position.push(inter.begin); + passed_pos.insert(inter.begin); + } + if !passed_pos.contains(&inter.end) { + position.push(inter.end); + passed_pos.insert(inter.end); + } } - position.push(record.seq().len() as u64); + if position.len() % 2 == 1 { + position.push(record.seq().len() as u64); + } + if position.len() == 2 && position[0] == 0 && position[1] as usize == record.seq().len() { return vec![record.clone()]; } - + for (a, b) in position.chunks(2).map(|x| (x[0], x[1])) { if a == b { continue; // empty interval @@ -208,7 +218,8 @@ mod test { int_type: chimera::IntervalType::Sure, }, ], - )); + ), + ); m.insert( "2".to_string(), ( @@ -226,19 +237,16 @@ mod test { int_type: chimera::IntervalType::Sure, }, ], - )); + ), + ); m.insert( "3".to_string(), - ( - chimera::BadReadType::NotBad, - 6000, - vec![], - ), + (chimera::BadReadType::NotBad, 6000, vec![]), ); m }; } - + const FASTA_FILE: &'static [u8] = b">1 ACTG >2 @@ -363,7 +371,6 @@ ACTG assert_eq!(out, FASTA_FILE_SPLITED_ALL); } - const SHORT_FASTA_FILE: &'static [u8] = b">1 ACTGGGGGGACTG >2 @@ -395,6 +402,9 @@ ACTG } } } + + println!("{}", String::from_utf8_lossy(&out)); + println!("{}", String::from_utf8_lossy(SHORT_FASTA_FILE_SPLIT)); assert_eq!(out, SHORT_FASTA_FILE_SPLIT); } } diff --git a/src/postdetection/fastq.rs b/src/postdetection/fastq.rs index 1aab8d9..aa145e2 100644 --- a/src/postdetection/fastq.rs +++ b/src/postdetection/fastq.rs @@ -125,12 +125,26 @@ impl PostDetectionOperationFastq for Split { } let mut position = vec![0]; + let mut passed_pos = std::collections::HashSet::new(); for inter in gaps.iter() { - position.push(inter.begin); - position.push(inter.end); + if !passed_pos.contains(&inter.begin) { + position.push(inter.begin); + passed_pos.insert(inter.begin); + } + if !passed_pos.contains(&inter.end) { + position.push(inter.end); + passed_pos.insert(inter.end); + } } - position.push(record.seq().len() as u64); + if position.len() % 2 == 1 { + position.push(record.seq().len() as u64); + } + + if position.len() == 2 && position[0] == 0 && position[1] as usize == record.seq().len() { + return vec![record.clone()]; + } + for (a, b) in position.chunks(2).map(|x| (x[0], x[1])) { if a == b { continue; // empty interval diff --git a/src/postdetection/mod.rs b/src/postdetection/mod.rs index 60a4a34..f1fbc11 100644 --- a/src/postdetection/mod.rs +++ b/src/postdetection/mod.rs @@ -29,7 +29,7 @@ pub mod paf; use std::path::Path; pub fn in_read(begin: usize, end: usize, length: usize) -> bool { - return begin < length || end < length; + return begin <= length && end <= length; } pub fn generate_out_name(filename: &str, suffix: &str) -> String { diff --git a/tests/not_run.rs b/tests/not_run.rs index 0b44837..6d242d5 100644 --- a/tests/not_run.rs +++ b/tests/not_run.rs @@ -22,7 +22,7 @@ SOFTWARE. use std::process::Command; -static HELP_MESSAGE: &'static str = "yacrd 0.5 Omanyte +static HELP_MESSAGE: &'static str = "yacrd 0.5.1 Omanyte Pierre Marijon Yet Another Chimeric Read Detector @@ -51,7 +51,7 @@ mod not_run { .output() .expect("Could not run yacrd"); - assert_eq!(output.stdout, b"yacrd 0.5 Omanyte\n"); + assert_eq!(output.stdout, b"yacrd 0.5.1 Omanyte\n"); println!("{:?}", output); }