Skip to content

Commit

Permalink
with --debug, store time information regarding time taken to genotype…
Browse files Browse the repository at this point in the history
… repeat

this requires repeat instances to be mutable
this is done to be able to identify repeats that are slow to genotype
may revert later
  • Loading branch information
wdecoster committed Jul 23, 2024
1 parent d369185 commit 53ad0b7
Show file tree
Hide file tree
Showing 8 changed files with 77 additions and 19 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "STRdust"
version = "0.8.6"
version = "0.8.7"
edition = "2021"


Expand Down Expand Up @@ -28,6 +28,7 @@ petgraph = "0.6.4"
hts-sys = "2.1.1"
reqwest = { version = "0.12.5", features = ["blocking", "json"] }
indicatif = { version = "0.17.1", features = ["rayon"] }
chrono = "0.4.38"

[dev-dependencies]
ctor = "*"
12 changes: 6 additions & 6 deletions src/call.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ pub fn genotype_repeats(args: Cli) {
// The indexedreader is created once and passed on to the function
let num_intervals = repeats.num_intervals;
let mut bam = parse_bam::create_bam_reader(&args.bam, &args.fasta);
for repeat in repeats.progress_count(num_intervals as u64) {
if let Ok(output) = genotype::genotype_repeat_singlethreaded(&repeat, &args, &mut bam) {
for mut repeat in repeats.progress_count(num_intervals as u64) {
if let Ok(output) = genotype::genotype_repeat_singlethreaded(&mut repeat, &args, &mut bam) {
writeln!(handle, "{output}").expect("Failed writing the result.");
}
}
Expand All @@ -39,8 +39,8 @@ pub fn genotype_repeats(args: Cli) {
repeats
.par_bridge()
.progress_count(num_intervals as u64)
.for_each(|repeat| {
if let Ok(output) = genotype::genotype_repeat_multithreaded(&repeat, &args) {
.for_each(|mut repeat| {
if let Ok(output) = genotype::genotype_repeat_multithreaded(&mut repeat, &args) {
let mut geno = genotypes.lock().expect("Unable to lock genotypes mutex");
geno.push(output);
} else {
Expand All @@ -66,8 +66,8 @@ pub fn genotype_repeats(args: Cli) {
repeats
.par_bridge()
.progress_count(num_intervals as u64)
.for_each(|repeat| {
if let Ok(output) = genotype::genotype_repeat_multithreaded(&repeat, &args) {
.for_each(|mut repeat| {
if let Ok(output) = genotype::genotype_repeat_multithreaded(&mut repeat, &args) {
println!("{output}");
}
});
Expand Down
1 change: 1 addition & 0 deletions src/consensus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ mod tests {
chrom: "chr1".to_string(),
start: 1,
end: 100,
created: None
},
);
println!("Consensus: {}", cons.seq.unwrap());
Expand Down
36 changes: 27 additions & 9 deletions src/genotype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,25 @@ use rust_htslib::bam;
// when running multithreaded, the indexedreader has to be created every time again
// this is probably expensive
pub fn genotype_repeat_multithreaded(
repeat: &crate::repeats::RepeatInterval,
repeat: &mut crate::repeats::RepeatInterval,
args: &Cli,
) -> Result<crate::vcf::VCFRecord, String> {
if args.debug {
repeat.set_time_stamp()
}
let mut bam = parse_bam::create_bam_reader(&args.bam, &args.fasta);
genotype_repeat(repeat, args, &mut bam)
}

// when running singlethreaded, the indexedreader is created once and simply passed on
pub fn genotype_repeat_singlethreaded(
repeat: &crate::repeats::RepeatInterval,
repeat: &mut crate::repeats::RepeatInterval,
args: &Cli,
bam: &mut bam::IndexedReader,
) -> Result<crate::vcf::VCFRecord, String> {
if args.debug {
repeat.set_time_stamp()
}
genotype_repeat(repeat, args, bam)
}

Expand All @@ -42,18 +48,19 @@ fn genotype_repeat(
repeat,
"N",
".".to_string(),
args
))
}
};

let repeat_compressed_reference = repeat.make_repeat_compressed_sequence(&args.fasta, flanking);
if args.debug {
// write the repeat compressed reference to a file
use std::fs;
let header = format!(">{chrom}:{start}-{end}\n", chrom = repeat.chrom, start = repeat.start, end = repeat.end);
let fas = String::from_utf8(repeat_compressed_reference.clone()).expect("Unable to convert repeat compressed reference to string");
fs::write("repeat_compressed.fa", header + &fas).expect("Unable to write repeat compressed reference to file");
}
// if args.debug {
// // write the repeat compressed reference to a file
// use std::fs;
// let header = format!(">{chrom}:{start}-{end}\n", chrom = repeat.chrom, start = repeat.start, end = repeat.end);
// let fas = String::from_utf8(repeat_compressed_reference.clone()).expect("Unable to convert repeat compressed reference to string");
// fs::write("repeat_compressed.fa", header + &fas).expect("Unable to write repeat compressed reference to file");
// }

// alignments can be extracted in an unphased manner, if the chromosome is --haploid or the --unphased is set
// this means that --haploid overrides the phases which could be present in the bam file
Expand All @@ -69,6 +76,7 @@ fn genotype_repeat(
repeat,
&repeat_ref_seq,
0.to_string(),
args
));
}
};
Expand Down Expand Up @@ -119,6 +127,7 @@ fn genotype_repeat(
repeat,
&repeat_ref_seq,
insertions.len().to_string(),
args
));
}
// there is only one haplotype, haploid, so this gets duplicated for reporting in the VCF module
Expand All @@ -144,6 +153,7 @@ fn genotype_repeat(
repeat,
&repeat_ref_seq,
insertions.len().to_string(),
args
));
}
// handle the special case where there is only 1 read for the repeat (but minimal support is 1 so it passes)
Expand All @@ -156,6 +166,7 @@ fn genotype_repeat(
all_insertions,
reads.ps,
flags,
args
));
}

Expand Down Expand Up @@ -231,6 +242,7 @@ fn genotype_repeat(
repeat,
reads.ps,
flags,
args
))
}

Expand Down Expand Up @@ -344,6 +356,7 @@ mod tests {
chrom: String::from("chr7"),
start: 154654404,
end: 154654432,
created: None
};
let flanking = 2000;
let minlen = 5;
Expand Down Expand Up @@ -383,6 +396,7 @@ mod tests {
chrom: String::from("chr7"),
start: 154654404,
end: 154654432,
created: None
};
let args = Cli {
bam: String::from("test_data/small-test-phased.bam"),
Expand Down Expand Up @@ -412,6 +426,7 @@ mod tests {
chrom: String::from("chr7"),
start: 154654404,
end: 154654432,
created: None
};
let args = Cli {
bam: String::from("test_data/small-test-phased.bam"),
Expand Down Expand Up @@ -458,6 +473,7 @@ mod tests {
chrom: String::from("chr7"),
start: 154654404,
end: 154654432,
created: None
};
let mut bam = parse_bam::create_bam_reader(&args.bam, &args.fasta);
let genotype = genotype_repeat(&repeat, &args, &mut bam);
Expand Down Expand Up @@ -487,6 +503,7 @@ mod tests {
chrom: String::from("chr7"),
start: 154654404,
end: 154654432,
created: None
};
let mut bam = parse_bam::create_bam_reader(&args.bam, &args.fasta);
let genotype = genotype_repeat(&repeat, &args, &mut bam);
Expand Down Expand Up @@ -517,6 +534,7 @@ mod tests {
chrom: String::from("chr7"),
start: 154654404,
end: 154654432,
created: None
};
let mut bam = parse_bam::create_bam_reader(&args.bam, &args.fasta);
let genotype = genotype_repeat(&repeat, &args, &mut bam);
Expand Down
2 changes: 2 additions & 0 deletions src/parse_bam.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ fn test_get_overlapping_reads() {
chrom: String::from("chr7"),
start: 154654404,
end: 154654432,
created: None,
};
let unphased = false;
let mut bam = create_bam_reader(&bam, &fasta);
Expand All @@ -138,6 +139,7 @@ fn test_get_overlapping_reads_url() {
chrom: String::from("chr20"),
start: 154654404,
end: 154654432,
created: None
};
let unphased = false;
let mut bam = create_bam_reader(&bam, &fasta);
Expand Down
6 changes: 6 additions & 0 deletions src/phase_insertions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ mod tests {
chrom: "chr7".to_string(),
start: 154654404,
end: 154654432,
created: None
},
false,
);
Expand Down Expand Up @@ -372,6 +373,7 @@ mod tests {
chrom: "chr7".to_string(),
start: 154654404,
end: 154654432,
created: None
},
false,
);
Expand Down Expand Up @@ -411,6 +413,7 @@ mod tests {
chrom: "chr7".to_string(),
start: 154654404,
end: 154654432,
created: None
},
false,
);
Expand Down Expand Up @@ -453,6 +456,7 @@ mod tests {
chrom: "chr7".to_string(),
start: 154654404,
end: 154654432,
created: None
},
false,
);
Expand Down Expand Up @@ -493,6 +497,7 @@ mod tests {
chrom: "chr7".to_string(),
start: 154654404,
end: 154654432,
created: None
},
false,
);
Expand Down Expand Up @@ -564,6 +569,7 @@ mod tests {
chrom: "chr7".to_string(),
start: 154654404,
end: 154654432,
created: None
},
false,
);
Expand Down
9 changes: 8 additions & 1 deletion src/repeats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ impl Clone for RepeatInterval {
chrom: self.chrom.clone(),
start: self.start,
end: self.end,
created: self.created,
}
}
}
Expand All @@ -105,6 +106,7 @@ pub struct RepeatInterval {
pub chrom: String,
pub start: u32,
pub end: u32,
pub created: Option<chrono::DateTime<chrono::Utc>>,
}

impl fmt::Display for RepeatInterval {
Expand Down Expand Up @@ -144,7 +146,7 @@ impl RepeatInterval {
.expect("Failed parsing chromosome length from fai file")
> end
{
return Some(Self { chrom, start, end });
return Some(Self { chrom, start, end, created: None });
}
}
// if the chromosome is not in the fai file or the end does not fit the interval, return None
Expand All @@ -157,6 +159,7 @@ impl RepeatInterval {
chrom: chrom.to_string(),
start,
end,
created: None
}
}

Expand Down Expand Up @@ -201,6 +204,10 @@ impl RepeatInterval {
}
Some(repeat_ref_sequence)
}

pub fn set_time_stamp(& mut self) {
self.created = Some(chrono::Utc::now());
}
}

#[cfg(test)]
Expand Down
Loading

0 comments on commit 53ad0b7

Please sign in to comment.