From 99f62576d139f9c54115b21fbddfd7d4f794adb6 Mon Sep 17 00:00:00 2001 From: wdecoster Date: Mon, 5 Aug 2024 09:57:43 +0200 Subject: [PATCH] write cli arguments to vcf header, pass Cli instead --- src/call.rs | 2 +- src/vcf.rs | 74 ++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 57 insertions(+), 19 deletions(-) diff --git a/src/call.rs b/src/call.rs index 90a607b..bf8bf62 100644 --- a/src/call.rs +++ b/src/call.rs @@ -12,7 +12,7 @@ pub fn genotype_repeats(args: Cli) { debug!("Genotyping STRs in {}", args.bam); check_files_exist(&args); let repeats = get_targets(&args); - crate::vcf::write_vcf_header(&args.fasta, &args.bam, &args.sample); + crate::vcf::write_vcf_header(&args); let stdout = io::stdout(); // get the global stdout entity let mut handle = io::BufWriter::new(stdout); // wrap that handle in a buffer if args.threads == 1 { diff --git a/src/vcf.rs b/src/vcf.rs index 9e9d3b6..92a4de3 100644 --- a/src/vcf.rs +++ b/src/vcf.rs @@ -6,6 +6,8 @@ use rust_htslib::faidx; use std::cmp::Ordering; use std::fmt; use std::io::Read; +use crate::Cli; + pub struct Allele { pub length: String, // length of the consensus sequence minus the length of the repeat sequence @@ -67,7 +69,7 @@ impl VCFRecord { repeat: &crate::repeats::RepeatInterval, ps: Option, flag: Vec, - args: &crate::Cli, + args: &Cli, ) -> VCFRecord { // since I use .pop() to format the two consensus sequences, the order is reversed let allele2 = Allele::from_consensus( @@ -381,10 +383,10 @@ impl PartialEq for VCFRecord { impl Eq for VCFRecord {} -pub fn write_vcf_header(fasta: &str, bam: &str, sample: &Option) { +pub fn write_vcf_header(args: &Cli) { println!(r#"##fileformat=VCFv4.2"#); // get absolute path to fasta file - let path = std::fs::canonicalize(fasta) + let path = std::fs::canonicalize(&args.fasta) .unwrap_or_else(|err| panic!("Failed getting absolute path to fasta: {err}")); println!( r#"##reference={}"#, @@ -393,11 +395,16 @@ pub fn write_vcf_header(fasta: &str, bam: &str, sample: &Option) { // get the version of this crate let version = env!("CARGO_PKG_VERSION"); println!(r#"##source=STRdust v{}"#, version); + // write all arguments to the header + println!( + r#"##command=STRdust {}"#, + std::env::args().collect::>().join(" ") + ); // call faidx to make sure the fasta index exists, we'll need this anyway when genotyping let _ = - faidx::Reader::from_path(fasta).unwrap_or_else(|err| panic!("Failed opening fasta: {err}")); - - let mut fai_file = std::fs::File::open(format!("{fasta}.fai")).expect("Can't open .fai file"); + faidx::Reader::from_path(&args.fasta).unwrap_or_else(|err| panic!("Failed opening fasta: {err}")); + + let mut fai_file = std::fs::File::open(format!("{0}.fai", args.fasta)).expect("Can't open .fai file"); // parse the fasta index file let mut buf = String::new(); fai_file @@ -424,6 +431,9 @@ pub fn write_vcf_header(fasta: &str, bam: &str, sample: &Option) { println!( r#"##INFO="# ); + if args.debug { + println!(r#"##INFO="#); + } println!(r#"##FORMAT="#); println!( r#"##FORMAT="# @@ -434,11 +444,11 @@ pub fn write_vcf_header(fasta: &str, bam: &str, sample: &Option) { println!(r#"##FORMAT="#); println!(r#"##FORMAT="#); println!(r#"##FORMAT="#); - let name = match sample { + let name = match &args.sample { Some(name) => name, None => { // use basename of bam and remove file extension - let name = std::path::Path::new(&bam) + let name = std::path::Path::new(&args.bam) .file_stem() .unwrap() .to_str() @@ -452,20 +462,48 @@ pub fn write_vcf_header(fasta: &str, bam: &str, sample: &Option) { #[cfg(test)] #[test] fn test_write_vcf_header_from_bam() { - write_vcf_header( - "test_data/chr7.fa.gz", - "test_data/small-test-phased.bam", - &None, - ); + let args = Cli { + bam: String::from("test_data/small-test-phased.bam"), + fasta: String::from("test_data/chr7.fa.gz"), + region: None, + region_file: None, + pathogenic: false, + minlen: 5, + support: 1, + somatic: false, + unphased: true, + find_outliers: false, + threads: 1, + sample: None, + haploid: Some(String::from("chr7")), + debug: false, + sorted: false, + consensus_reads: 20, + }; + write_vcf_header(&args); } #[test] fn test_write_vcf_header_from_name() { - write_vcf_header( - "test_data/chr7.fa.gz", - "test_data/small-test-phased.bam", - &Some("test_sample".to_string()), - ); + let args = Cli { + bam: String::from("test_data/small-test-phased.bam"), + fasta: String::from("test_data/chr7.fa.gz"), + region: None, + region_file: None, + pathogenic: false, + minlen: 5, + support: 1, + somatic: false, + unphased: true, + find_outliers: false, + threads: 1, + sample: Some("test_sample".to_string()), + haploid: Some(String::from("chr7")), + debug: false, + sorted: false, + consensus_reads: 20, + }; + write_vcf_header(&args); } #[test]