diff --git a/src/utils/formats/bam.rs b/src/utils/formats/bam.rs index 7e64334..3fcae7c 100644 --- a/src/utils/formats/bam.rs +++ b/src/utils/formats/bam.rs @@ -105,8 +105,7 @@ where // (3) Parse the header and reference sequences. debug!("parsing the header and reference sequences"); let raw_header = reader.read_header().with_context(|| "reading BAM header")?; - let parsed_header = - super::sam::parse_header(raw_header.clone()).with_context(|| "parsing BAM header")?; + let parsed_header = raw_header.parse().with_context(|| "parsing BAM header")?; let reference_sequences = reader .read_reference_sequences() .with_context(|| "reading BAM reference sequences")?; @@ -209,8 +208,7 @@ where .read_header() .await .with_context(|| "reading BAM header")?; - let parsed_header = - super::sam::parse_header(raw_header.clone()).with_context(|| "parsing BAM header")?; + let parsed_header = raw_header.parse().with_context(|| "parsing BAM header")?; let reference_sequences = reader .read_reference_sequences() .await diff --git a/src/utils/formats/cram.rs b/src/utils/formats/cram.rs index 69a7853..78bcf48 100644 --- a/src/utils/formats/cram.rs +++ b/src/utils/formats/cram.rs @@ -102,8 +102,7 @@ where let raw_header = reader .read_file_header() .with_context(|| "reading CRAM header")?; - let parsed_header = - super::sam::parse_header(raw_header.clone()).with_context(|| "parsing CRAM header")?; + let parsed_header = raw_header.parse().with_context(|| "parsing CRAM header")?; // (4) Return the result. Ok(ParsedCRAMFile { @@ -207,8 +206,7 @@ where .read_file_header() .await .with_context(|| "reading CRAM header")?; - let parsed_header = - super::sam::parse_header(raw_header.clone()).with_context(|| "parsing CRAM header")?; + let parsed_header = raw_header.parse().with_context(|| "parsing CRAM header")?; // (4) Return the result. Ok(ParsedAsyncCRAMFile { diff --git a/src/utils/formats/sam.rs b/src/utils/formats/sam.rs index ea51bdb..ebde96d 100644 --- a/src/utils/formats/sam.rs +++ b/src/utils/formats/sam.rs @@ -6,44 +6,12 @@ use std::path::Path; use anyhow::bail; use anyhow::Context; use noodles::sam; -use regex::Captures; -use regex::Regex; use tracing::debug; use crate::utils::formats::utils::RawAndParsedHeaders; use super::BioinformaticsFileFormat; -//=================// -// Utility Methods // -//=================// - -/// Corrects common header mistakes. See the inline comments for the things that -/// are automatically corrected. -pub fn correct_common_header_mistakes(header: String) -> String { - // (1) Corrects any lowercase platform units in the read group to be all - // uppercase. This is especially important for data that contains 'illumina' - // instead of the correct 'ILLUMINA'. - let pattern = Regex::new("(\tPL:)(.+)").unwrap(); - let replaced = pattern.replace_all(&header, |c: &Captures<'_>| { - format!("{}{}", &c[1], c[2].to_uppercase()) - }); - - replaced.to_string() -} - -/// Parses a SAM/BAM/CRAM header from a string while also correcting common -/// header mistakes. -pub fn parse_header(header: String) -> anyhow::Result { - let header_raw_corrected = correct_common_header_mistakes(header); - - let header = header_raw_corrected - .parse() - .with_context(|| "could not parse SAM/BAM/CRAM header")?; - - Ok(header) -} - //====================================// // Sequence Alignment Map (SAM) files // //====================================// @@ -102,7 +70,7 @@ where // (2) Parse the header. debug!("parsing the header"); let raw_header = reader.read_header()?; - let parsed_header = parse_header(raw_header.clone()).with_context(|| "parsing SAM header")?; + let parsed_header = raw_header.parse().with_context(|| "parsing SAM header")?; // (3) Return the result. Ok(ParsedSAMFile { @@ -176,7 +144,7 @@ where // (2) Parse the header. debug!("parsing the header"); let raw_header = reader.read_header().await?; - let parsed_header = parse_header(raw_header.clone()).with_context(|| "parsing SAM header")?; + let parsed_header = raw_header.parse().with_context(|| "parsing SAM header")?; // (3) Return the result. Ok(ParsedAsyncSAMFile { @@ -187,21 +155,3 @@ where }, }) } - -//=======// -// Tests // -//=======// - -#[cfg(test)] -mod tests { - - use super::*; - - #[test] - pub fn test_illumina_lowercase_fix() { - let data = "@RG\tID:rg0\tPL:illumina\n"; - let expected = "@RG\tID:rg0\tPL:ILLUMINA\n"; - - assert_eq!(correct_common_header_mistakes(data.to_string()), expected); - } -} diff --git a/src/view/cram.rs b/src/view/cram.rs index 0b878a2..13ed785 100644 --- a/src/view/cram.rs +++ b/src/view/cram.rs @@ -14,7 +14,6 @@ use tokio::io; use tokio::io::AsyncWriteExt; use tracing::debug; -use crate::utils::formats::sam::parse_header; use crate::utils::pathbuf::AppendExtension; use crate::view::command::Mode; @@ -82,7 +81,7 @@ pub async fn view( } // (7) Parses the header text. - let header = parse_header(ht).with_context(|| "parsing CRAM header")?; + let header = ht.parse().with_context(|| "parsing CRAM header")?; // (8) Writes the records to the output stream. let mut writer = sam::AsyncWriter::new(handle);