diff --git a/wdl-format/CHANGELOG.md b/wdl-format/CHANGELOG.md index ec38904da..cb63781ad 100644 --- a/wdl-format/CHANGELOG.md +++ b/wdl-format/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added * Leading whitespace in command text is now normalized ([#240](https://github.com/stjude-rust-labs/wdl/pull/240)). +* Line breaks are now added in order to keep lines under the max line width (default 90 characters) ([#242](https://github.com/stjude-rust-labs/wdl/pull/242)). ### Fixed diff --git a/wdl-format/src/config.rs b/wdl-format/src/config.rs index e933bacce..7bbacb9f6 100644 --- a/wdl-format/src/config.rs +++ b/wdl-format/src/config.rs @@ -2,20 +2,29 @@ mod builder; mod indent; +mod max_line_length; pub use builder::Builder; pub use indent::Indent; +pub use max_line_length::MaxLineLength; /// Configuration for formatting. #[derive(Clone, Copy, Debug, Default)] pub struct Config { - /// The number of characters to indent. + /// The indentation configuration. indent: Indent, + /// The maximum line length. + max_line_length: MaxLineLength, } impl Config { - /// Gets the indent level of the configuration. + /// Gets the indentation configuration. pub fn indent(&self) -> Indent { self.indent } + + /// Gets the maximum line length of the configuration. + pub fn max_line_length(&self) -> Option { + self.max_line_length.get() + } } diff --git a/wdl-format/src/config/builder.rs b/wdl-format/src/config/builder.rs index daf5a91e9..78dee50bd 100644 --- a/wdl-format/src/config/builder.rs +++ b/wdl-format/src/config/builder.rs @@ -2,46 +2,20 @@ use crate::Config; use crate::config::Indent; - -/// An error related to a [`Builder`]. -#[derive(Debug)] -pub enum Error { - /// A required value was missing for a builder field. - Missing(&'static str), -} - -impl std::fmt::Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Error::Missing(field) => write!( - f, - "missing required value for '{field}' in a formatter configuration builder" - ), - } - } -} - -impl std::error::Error for Error {} - -/// A [`Result`](std::result::Result) with an [`Error`]. -pub type Result = std::result::Result; +use crate::config::MaxLineLength; /// A builder for a [`Config`]. +#[derive(Default)] pub struct Builder { /// The number of characters to indent. indent: Option, + /// The maximum line length. + max_line_length: Option, } impl Builder { - /// Creates a new builder with default values. - pub fn new() -> Self { - Default::default() - } - /// Sets the indentation level. /// - /// # Notes - /// /// This silently overwrites any previously provided value for the /// indentation level. pub fn indent(mut self, indent: Indent) -> Self { @@ -49,18 +23,22 @@ impl Builder { self } - /// Consumes `self` and attempts to build a [`Config`]. - pub fn try_build(self) -> Result { - let indent = self.indent.ok_or(Error::Missing("indent"))?; - - Ok(Config { indent }) + /// Sets the maximum line length. + /// + /// This silently overwrites any previously provided value for the maximum + /// line length. + pub fn max_line_length(mut self, max_line_length: MaxLineLength) -> Self { + self.max_line_length = Some(max_line_length); + self } -} -impl Default for Builder { - fn default() -> Self { - Self { - indent: Some(Default::default()), + /// Consumes `self` to build a [`Config`]. + pub fn build(self) -> Config { + let indent = self.indent.unwrap_or_default(); + let max_line_length = self.max_line_length.unwrap_or_default(); + Config { + indent, + max_line_length, } } } diff --git a/wdl-format/src/config/indent.rs b/wdl-format/src/config/indent.rs index 549f96f7f..8e613f076 100644 --- a/wdl-format/src/config/indent.rs +++ b/wdl-format/src/config/indent.rs @@ -1,18 +1,22 @@ //! Indentation within formatting configuration. -use std::num::NonZeroUsize; +use crate::SPACE; +use crate::TAB; +/// The default number of spaces to represent one indentation level. +const DEFAULT_SPACE_INDENT: usize = 4; /// The default indentation. -pub const DEFAULT_INDENT: Indent = Indent::Spaces(unsafe { NonZeroUsize::new_unchecked(4) }); +pub const DEFAULT_INDENT: Indent = Indent::Spaces(DEFAULT_SPACE_INDENT); +/// The maximum number of spaces to represent one indentation level. +pub const MAX_SPACE_INDENT: usize = 16; /// An indentation level. #[derive(Clone, Copy, Debug)] pub enum Indent { /// Tabs. - Tabs(NonZeroUsize), - + Tabs, /// Spaces. - Spaces(NonZeroUsize), + Spaces(usize), } impl Default for Indent { @@ -20,3 +24,50 @@ impl Default for Indent { DEFAULT_INDENT } } + +impl Indent { + /// Attempts to create a new indentation level configuration. + pub fn try_new(tab: bool, num_spaces: Option) -> Result { + match (tab, num_spaces) { + (true, None) => Ok(Indent::Tabs), + (true, Some(_)) => { + Err("Indentation with tabs cannot have a number of spaces".to_string()) + } + (false, Some(n)) => { + if n > MAX_SPACE_INDENT { + Err(format!( + "Indentation with spaces cannot have more than {} characters", + MAX_SPACE_INDENT + )) + } else { + Ok(Indent::Spaces(n)) + } + } + (false, None) => Ok(Indent::Spaces(DEFAULT_SPACE_INDENT)), + } + } + + /// Gets the number of characters to indent. + pub fn num(&self) -> usize { + match self { + Indent::Tabs => 1, + Indent::Spaces(n) => *n, + } + } + + /// Gets the character used for indentation. + pub fn character(&self) -> &str { + match self { + Indent::Tabs => TAB, + Indent::Spaces(_) => SPACE, + } + } + + /// Gets the string representation of the indentation. + pub fn string(&self) -> String { + match self { + Indent::Tabs => self.character().to_string(), + Indent::Spaces(n) => self.character().repeat(*n), + } + } +} diff --git a/wdl-format/src/config/max_line_length.rs b/wdl-format/src/config/max_line_length.rs new file mode 100644 index 000000000..f39248d15 --- /dev/null +++ b/wdl-format/src/config/max_line_length.rs @@ -0,0 +1,42 @@ +//! Configuration for max line length formatting. + +/// The default maximum line length. +pub const DEFAULT_MAX_LINE_LENGTH: usize = 90; +/// The minimum maximum line length. +pub const MIN_MAX_LINE_LENGTH: usize = 60; +/// The maximum maximum line length. +pub const MAX_MAX_LINE_LENGTH: usize = 240; + +/// The maximum line length. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct MaxLineLength(Option); + +impl MaxLineLength { + /// Attempts to create a new `MaxLineLength` with the provided value. + /// + /// A value of `0` indicates no maximum. + pub fn try_new(value: usize) -> Result { + let val = match value { + 0 => Self(None), + MIN_MAX_LINE_LENGTH..=MAX_MAX_LINE_LENGTH => Self(Some(value)), + _ => { + return Err(format!( + "The maximum line length must be between {} and {} or 0", + MIN_MAX_LINE_LENGTH, MAX_MAX_LINE_LENGTH + )); + } + }; + Ok(val) + } + + /// Gets the maximum line length. A value of `None` indicates no maximum. + pub fn get(&self) -> Option { + self.0 + } +} + +impl Default for MaxLineLength { + fn default() -> Self { + Self(Some(DEFAULT_MAX_LINE_LENGTH)) + } +} diff --git a/wdl-format/src/lib.rs b/wdl-format/src/lib.rs index 545d5b018..7e711d3d8 100644 --- a/wdl-format/src/lib.rs +++ b/wdl-format/src/lib.rs @@ -24,6 +24,9 @@ pub const NEWLINE: &str = "\n"; /// A space. pub const SPACE: &str = " "; +/// A tab. +pub const TAB: &str = "\t"; + /// Returns exactly one entity from an enumerable list of entities (usually a /// [`Vec`]). #[macro_export] @@ -208,16 +211,12 @@ impl Formatter { } /// Gets the [`PostToken`] stream. - /// - /// # Notes - /// - /// * This shouldn't be exposed publicly. fn to_stream(&self, element: W) -> TokenStream { let mut stream = TokenStream::default(); element.write(&mut stream); let mut postprocessor = Postprocessor::default(); - postprocessor.run(stream) + postprocessor.run(stream, self.config()) } } diff --git a/wdl-format/src/token.rs b/wdl-format/src/token.rs index 21e3d8df6..2171139a1 100644 --- a/wdl-format/src/token.rs +++ b/wdl-format/src/token.rs @@ -4,6 +4,7 @@ mod post; mod pre; use std::fmt::Display; +use std::rc::Rc; pub use post::*; pub use pre::*; @@ -19,8 +20,7 @@ pub trait Token: Eq + PartialEq { /// A stream of tokens. Tokens in this case are either [`PreToken`]s or /// [`PostToken`]s. Note that, unless you are working on formatting /// specifically, you should never need to work with [`PostToken`]s. -#[derive(Debug)] - +#[derive(Debug, Clone)] pub struct TokenStream(Vec); impl Default for TokenStream { @@ -52,6 +52,26 @@ impl TokenStream { let _ = self.0.pop(); } } + + /// Returns whether the stream is empty. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Returns an iterator over the tokens in the stream. + pub fn iter(&self) -> std::slice::Iter<'_, T> { + self.0.iter() + } + + /// Clears the stream. + pub fn clear(&mut self) { + self.0.clear(); + } + + /// Extends the stream with the tokens from another stream. + pub fn extend(&mut self, other: Self) { + self.0.extend(other.0); + } } impl IntoIterator for TokenStream { @@ -64,16 +84,16 @@ impl IntoIterator for TokenStream { } /// The kind of comment. -#[derive(Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq)] pub enum Comment { /// A comment on its own line. - Preceding(String), + Preceding(Rc), /// A comment on the same line as the code preceding it. - Inline(String), + Inline(Rc), } /// Trivia. -#[derive(Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq)] pub enum Trivia { /// A blank line. This may be ignored by the postprocessor. BlankLine, @@ -81,11 +101,16 @@ pub enum Trivia { Comment(Comment), } -/// Whether optional blank lines are allowed in the current context. +/// The policy for [`Trivia::BlankLine`] line spacing. +/// +/// Blank lines before comments and between comments are always permitted. #[derive(Eq, PartialEq, Default, Debug, Clone, Copy)] -pub enum LineSpacingPolicy { - /// Blank lines are allowed before comments. - BeforeComments, +pub enum TriviaBlankLineSpacingPolicy { + /// Blank lines are allowed before and between comments, but not after. + /// + /// i.e. a comment, then a blank line, then code, would have the blank + /// removed. + RemoveTrailingBlanks, /// Blank lines are always allowed. #[default] Always, diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs index b3df4bfc5..5b3bde275 100644 --- a/wdl-format/src/token/post.rs +++ b/wdl-format/src/token/post.rs @@ -3,22 +3,27 @@ //! Generally speaking, unless you are working with the internals of code //! formatting, you're not going to be working with these. +use std::collections::HashSet; use std::fmt::Display; +use std::rc::Rc; use wdl_ast::SyntaxKind; use crate::Comment; -use crate::LineSpacingPolicy; +use crate::Config; use crate::NEWLINE; use crate::PreToken; use crate::SPACE; use crate::Token; use crate::TokenStream; use crate::Trivia; -use crate::config::Indent; +use crate::TriviaBlankLineSpacingPolicy; + +/// [`PostToken`]s that precede an inline comment. +const INLINE_COMMENT_PRECEDING_TOKENS: [PostToken; 2] = [PostToken::Space, PostToken::Space]; /// A postprocessed token. -#[derive(Eq, PartialEq)] +#[derive(Clone, Eq, PartialEq)] pub enum PostToken { /// A space. Space, @@ -29,8 +34,14 @@ pub enum PostToken { /// One indentation. Indent, + /// A temporary indent. + /// + /// This is added after a [`PostToken::Indent`] during the formatting of + /// command sections. + TempIndent(Rc), + /// A string literal. - Literal(String), + Literal(Rc), } impl std::fmt::Debug for PostToken { @@ -39,20 +50,21 @@ impl std::fmt::Debug for PostToken { Self::Space => write!(f, ""), Self::Newline => write!(f, ""), Self::Indent => write!(f, ""), - Self::Literal(value) => write!(f, " {value}"), + Self::TempIndent(value) => write!(f, ""), + Self::Literal(value) => write!(f, ""), } } } impl Token for PostToken { /// Returns a displayable version of the token. - fn display<'a>(&'a self, config: &'a crate::Config) -> impl Display + 'a { + fn display<'a>(&'a self, config: &'a Config) -> impl Display + 'a { /// A displayable version of a [`PostToken`]. struct Display<'a> { /// The token to display. token: &'a PostToken, /// The configuration to use. - config: &'a crate::Config, + config: &'a Config, } impl std::fmt::Display for Display<'_> { @@ -61,17 +73,9 @@ impl Token for PostToken { PostToken::Space => write!(f, "{SPACE}"), PostToken::Newline => write!(f, "{NEWLINE}"), PostToken::Indent => { - let (c, n) = match self.config.indent() { - Indent::Spaces(n) => (' ', n), - Indent::Tabs(n) => ('\t', n), - }; - - for _ in 0..n.get() { - write!(f, "{c}")?; - } - - Ok(()) + write!(f, "{indent}", indent = self.config.indent().string()) } + PostToken::TempIndent(value) => write!(f, "{value}"), PostToken::Literal(value) => write!(f, "{value}"), } } @@ -84,6 +88,95 @@ impl Token for PostToken { } } +impl PostToken { + /// Gets the width of the [`PostToken`]. + fn width(&self, config: &crate::Config) -> usize { + match self { + Self::Space => SPACE.len(), + Self::Newline => 0, + Self::Indent => config.indent().num(), + Self::TempIndent(value) => value.len(), + Self::Literal(value) => value.len(), + } + } +} + +impl TokenStream { + /// Gets the maximum width of the [`TokenStream`]. + /// + /// This is suitable to call if the stream represents multiple lines. + fn max_width(&self, config: &Config) -> usize { + let mut max: usize = 0; + let mut cur_width: usize = 0; + for token in self.iter() { + cur_width += token.width(config); + if token == &PostToken::Newline { + max = max.max(cur_width); + cur_width = 0; + } + } + max.max(cur_width) + } + + /// Gets the width of the last line of the [`TokenStream`]. + fn last_line_width(&self, config: &Config) -> usize { + let mut width = 0; + for token in self.iter().rev() { + if token == &PostToken::Newline { + break; + } + width += token.width(config); + } + width + } +} + +/// A line break. +enum LineBreak { + /// A line break that can be inserted before a token. + Before, + /// A line break that can be inserted after a token. + After, +} + +/// Returns whether a token can be line broken. +fn can_be_line_broken(kind: SyntaxKind) -> Option { + match kind { + SyntaxKind::CloseBrace + | SyntaxKind::CloseBracket + | SyntaxKind::CloseParen + | SyntaxKind::CloseHeredoc + | SyntaxKind::Assignment + | SyntaxKind::Plus + | SyntaxKind::Minus + | SyntaxKind::Asterisk + | SyntaxKind::Slash + | SyntaxKind::Percent + | SyntaxKind::Exponentiation + | SyntaxKind::Equal + | SyntaxKind::NotEqual + | SyntaxKind::Less + | SyntaxKind::LessEqual + | SyntaxKind::Greater + | SyntaxKind::GreaterEqual + | SyntaxKind::LogicalAnd + | SyntaxKind::LogicalOr + | SyntaxKind::AfterKeyword + | SyntaxKind::AsKeyword + | SyntaxKind::IfKeyword + | SyntaxKind::ElseKeyword + | SyntaxKind::ThenKeyword => Some(LineBreak::Before), + SyntaxKind::OpenBrace + | SyntaxKind::OpenBracket + | SyntaxKind::OpenParen + | SyntaxKind::OpenHeredoc + | SyntaxKind::Colon + | SyntaxKind::PlaceholderOpen + | SyntaxKind::Comma => Some(LineBreak::After), + _ => None, + } +} + /// Current position in a line. #[derive(Default, Eq, PartialEq)] enum LinePosition { @@ -107,29 +200,39 @@ pub struct Postprocessor { /// Whether the current line has been interrupted by trivia. interrupted: bool, - /// Whether blank lines are allowed in the current context. - line_spacing_policy: LineSpacingPolicy, + /// The current trivial blank line spacing policy. + line_spacing_policy: TriviaBlankLineSpacingPolicy, /// Whether temporary indentation is needed. temp_indent_needed: bool, - /// Temporary indentation to add while formatting command blocks. - temp_indent: String, + /// Temporary indentation to add. + temp_indent: Rc, } impl Postprocessor { /// Runs the postprocessor. - pub fn run(&mut self, input: TokenStream) -> TokenStream { + pub fn run(&mut self, input: TokenStream, config: &Config) -> TokenStream { let mut output = TokenStream::::default(); - - let mut stream = input.into_iter().peekable(); - while let Some(token) = stream.next() { - self.step(token, stream.peek(), &mut output); + let mut buffer = TokenStream::::default(); + + for token in input { + match token { + PreToken::LineEnd => { + self.flush(&buffer, &mut output, config); + self.trim_whitespace(&mut output); + output.push(PostToken::Newline); + + buffer.clear(); + self.interrupted = false; + self.position = LinePosition::StartOfLine; + } + _ => { + buffer.push(token); + } + } } - self.trim_whitespace(&mut output); - output.push(PostToken::Newline); - output } @@ -141,6 +244,11 @@ impl Postprocessor { next: Option<&PreToken>, stream: &mut TokenStream, ) { + if stream.is_empty() { + self.interrupted = false; + self.position = LinePosition::StartOfLine; + self.indent(stream); + } match token { PreToken::BlankLine => { self.blank_line(stream); @@ -171,7 +279,7 @@ impl Postprocessor { self.line_spacing_policy = policy; } PreToken::Literal(value, kind) => { - assert!(kind != SyntaxKind::Comment && kind != SyntaxKind::Whitespace); + assert!(!kind.is_trivia()); // This is special handling for inserting the empty string. // We remove any indentation or spaces from the end of the @@ -194,16 +302,21 @@ impl Postprocessor { | SyntaxKind::OpenParen | SyntaxKind::OpenHeredoc ) - && stream.0.last() == Some(&PostToken::Indent) + && matches!( + stream.0.last(), + Some(&PostToken::Indent) | Some(&PostToken::TempIndent(_)) + ) { stream.0.pop(); } if kind == SyntaxKind::LiteralCommandText { - self.temp_indent = value - .chars() - .take_while(|c| matches!(c, ' ' | '\t')) - .collect(); + self.temp_indent = Rc::new( + value + .chars() + .take_while(|c| matches!(c.to_string().as_str(), SPACE | crate::TAB)) + .collect(), + ); } stream.push(PostToken::Literal(value)); @@ -211,10 +324,10 @@ impl Postprocessor { } PreToken::Trivia(trivia) => match trivia { Trivia::BlankLine => match self.line_spacing_policy { - LineSpacingPolicy::Always => { + TriviaBlankLineSpacingPolicy::Always => { self.blank_line(stream); } - LineSpacingPolicy::BeforeComments => { + TriviaBlankLineSpacingPolicy::RemoveTrailingBlanks => { if matches!(next, Some(&PreToken::Trivia(Trivia::Comment(_)))) { self.blank_line(stream); } @@ -225,13 +338,15 @@ impl Postprocessor { Comment::Preceding(value) => { if !matches!( stream.0.last(), - Some(&PostToken::Newline) | Some(&PostToken::Indent) | None + Some(&PostToken::Newline) + | Some(&PostToken::Indent) + | Some(&PostToken::TempIndent(_)) + | None ) { self.interrupted = true; } self.end_line(stream); stream.push(PostToken::Literal(value)); - self.position = LinePosition::MiddleOfLine; } Comment::Inline(value) => { assert!(self.position == LinePosition::MiddleOfLine); @@ -241,11 +356,13 @@ impl Postprocessor { } } self.trim_last_line(stream); - stream.push(PostToken::Space); - stream.push(PostToken::Space); + for token in INLINE_COMMENT_PRECEDING_TOKENS.iter() { + stream.push(token.clone()); + } stream.push(PostToken::Literal(value)); } } + self.position = LinePosition::MiddleOfLine; self.end_line(stream); } }, @@ -258,12 +375,116 @@ impl Postprocessor { } } + /// Flushes the `in_stream` buffer to the `out_stream`. + fn flush( + &mut self, + in_stream: &TokenStream, + out_stream: &mut TokenStream, + config: &Config, + ) { + assert!(!self.interrupted); + assert!(self.position == LinePosition::StartOfLine); + let mut post_buffer = TokenStream::::default(); + let mut pre_buffer = in_stream.iter().peekable(); + let starting_indent = self.indent_level; + while let Some(token) = pre_buffer.next() { + let next = pre_buffer.peek().copied(); + self.step(token.clone(), next, &mut post_buffer); + } + + // If all lines are short enough, we can just add the post_buffer to the + // out_stream and be done. + if config.max_line_length().is_none() + || post_buffer.max_width(config) <= config.max_line_length().unwrap() + { + out_stream.extend(post_buffer); + return; + } + + // At least one line in the post_buffer is too long. + // We iterate through the in_stream to find potential line breaks, + // and then we iterate through the in_stream again to actually insert + // them in the proper places. + + let max_length = config.max_line_length().unwrap(); + + let mut potential_line_breaks: HashSet = HashSet::new(); + for (i, token) in in_stream.iter().enumerate() { + if let PreToken::Literal(_, kind) = token { + match can_be_line_broken(*kind) { + Some(LineBreak::Before) => { + potential_line_breaks.insert(i); + } + Some(LineBreak::After) => { + potential_line_breaks.insert(i + 1); + } + None => {} + } + } + } + + if potential_line_breaks.is_empty() { + // There are no potential line breaks, so we can't do anything. + out_stream.extend(post_buffer); + return; + } + + // Set up the buffers for the second pass. + post_buffer.clear(); + let mut pre_buffer = in_stream.iter().enumerate().peekable(); + + // Reset the indent level. + self.indent_level = starting_indent; + + while let Some((i, token)) = pre_buffer.next() { + let mut cache = None; + if potential_line_breaks.contains(&i) { + if post_buffer.last_line_width(config) > max_length { + // The line is already too long, and taking the next step + // can only make it worse. Insert a line break here. + self.interrupted = true; + self.end_line(&mut post_buffer); + } else { + // The line is not too long yet, but it might be after the + // next step. Cache the current state so we can revert to it + // if necessary. + cache = Some(post_buffer.clone()); + } + } + self.step( + token.clone(), + pre_buffer.peek().map(|(_, v)| &**v), + &mut post_buffer, + ); + + if let Some(cache) = cache { + if post_buffer.last_line_width(config) > max_length { + // The line is too long after the next step. Revert to the + // cached state and insert a line break. + post_buffer = cache; + self.interrupted = true; + self.end_line(&mut post_buffer); + self.step( + token.clone(), + pre_buffer.peek().map(|(_, v)| &**v), + &mut post_buffer, + ); + } + } + } + + out_stream.extend(post_buffer); + } + /// Trims any and all whitespace from the end of the stream. - fn trim_whitespace(&mut self, stream: &mut TokenStream) { + fn trim_whitespace(&self, stream: &mut TokenStream) { stream.trim_while(|token| { matches!( token, - PostToken::Space | PostToken::Newline | PostToken::Indent + PostToken::Space + | PostToken::Newline + | PostToken::Indent + | PostToken::TempIndent(_) ) }); } @@ -271,8 +492,10 @@ impl Postprocessor { /// Trims spaces and indents (and not newlines) from the end of the stream. fn trim_last_line(&mut self, stream: &mut TokenStream) { stream.trim_while(|token| { - matches!(token, PostToken::Space | PostToken::Indent) - || token == &PostToken::Literal(self.temp_indent.clone()) + matches!( + token, + PostToken::Space | PostToken::Indent | PostToken::TempIndent(_) + ) }); } @@ -292,7 +515,7 @@ impl Postprocessor { /// Pushes the current indentation level to the stream. /// This should only be called when the state is - /// [`LinePosition::StartOfLine`]. + /// [`LinePosition::StartOfLine`]. This does not change the state. fn indent(&self, stream: &mut TokenStream) { assert!(self.position == LinePosition::StartOfLine); @@ -307,14 +530,16 @@ impl Postprocessor { } if self.temp_indent_needed { - stream.push(PostToken::Literal(self.temp_indent.clone())); + stream.push(PostToken::TempIndent(self.temp_indent.clone())); } } /// Creates a blank line and then indents. fn blank_line(&mut self, stream: &mut TokenStream) { self.trim_whitespace(stream); - stream.push(PostToken::Newline); + if !stream.is_empty() { + stream.push(PostToken::Newline); + } stream.push(PostToken::Newline); self.position = LinePosition::StartOfLine; self.indent(stream); diff --git a/wdl-format/src/token/pre.rs b/wdl-format/src/token/pre.rs index e8f8f5ea7..2c0382654 100644 --- a/wdl-format/src/token/pre.rs +++ b/wdl-format/src/token/pre.rs @@ -1,13 +1,15 @@ //! Tokens emitted during the formatting of particular elements. +use std::rc::Rc; + use wdl_ast::SyntaxKind; use wdl_ast::SyntaxTokenExt; use crate::Comment; -use crate::LineSpacingPolicy; use crate::Token; use crate::TokenStream; use crate::Trivia; +use crate::TriviaBlankLineSpacingPolicy; /// A token that can be written by elements. /// @@ -17,7 +19,7 @@ use crate::Trivia; /// [`PostToken`](super::PostToken)s by a /// [`Postprocessor`](super::Postprocessor) (authors of elements are never /// expected to write [`PostToken`](super::PostToken)s directly). -#[derive(Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq)] pub enum PreToken { /// A blank line. BlankLine, @@ -34,11 +36,11 @@ pub enum PreToken { /// The end of an indented block. IndentEnd, - /// How to handle blank lines from this point onwards. - LineSpacingPolicy(LineSpacingPolicy), + /// How to handle trivial blank lines from this point onwards. + LineSpacingPolicy(TriviaBlankLineSpacingPolicy), /// Literal text. - Literal(String, SyntaxKind), + Literal(Rc, SyntaxKind), /// Trivia. Trivia(Trivia), @@ -50,14 +52,11 @@ pub enum PreToken { TempIndentEnd, } -/// The line length to use when displaying pretokens. -const DISPLAY_LINE_LENGTH: usize = 90; - impl std::fmt::Display for PreToken { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - PreToken::BlankLine => write!(f, "{}", " ".repeat(DISPLAY_LINE_LENGTH)), - PreToken::LineEnd => write!(f, ""), + PreToken::BlankLine => write!(f, ""), + PreToken::LineEnd => write!(f, ""), PreToken::WordEnd => write!(f, ""), PreToken::IndentStart => write!(f, ""), PreToken::IndentEnd => write!(f, ""), @@ -65,34 +64,18 @@ impl std::fmt::Display for PreToken { write!(f, "", policy) } PreToken::Literal(value, kind) => { - write!( - f, - "{:width$}", - value, - kind, - width = DISPLAY_LINE_LENGTH - ) + write!(f, "", kind, value,) } PreToken::Trivia(trivia) => match trivia { Trivia::BlankLine => { - write!(f, "{}", " ".repeat(DISPLAY_LINE_LENGTH)) + write!(f, "") } Trivia::Comment(comment) => match comment { Comment::Preceding(value) => { - write!( - f, - "{:width$}", - value, - width = DISPLAY_LINE_LENGTH - ) + write!(f, "", value,) } Comment::Inline(value) => { - write!( - f, - "{:width$}", - value, - width = DISPLAY_LINE_LENGTH - ) + write!(f, "", value,) } }, }, @@ -148,16 +131,18 @@ impl TokenStream { self.0.push(PreToken::IndentEnd); } - /// Inserts a blank lines allowed context change. + /// Inserts a trivial blank lines "always allowed" context change. pub fn blank_lines_allowed(&mut self) { - self.0 - .push(PreToken::LineSpacingPolicy(LineSpacingPolicy::Always)); + self.0.push(PreToken::LineSpacingPolicy( + TriviaBlankLineSpacingPolicy::Always, + )); } - /// Inserts a blank lines allowed between comments context change. + /// Inserts a trivial blank lines "not allowed after comments" context + /// change. pub fn blank_lines_allowed_between_comments(&mut self) { self.0.push(PreToken::LineSpacingPolicy( - LineSpacingPolicy::BeforeComments, + TriviaBlankLineSpacingPolicy::RemoveTrailingBlanks, )); } @@ -175,9 +160,9 @@ impl TokenStream { } } SyntaxKind::Comment => { - let comment = PreToken::Trivia(Trivia::Comment(Comment::Preceding( + let comment = PreToken::Trivia(Trivia::Comment(Comment::Preceding(Rc::new( token.text().trim_end().to_owned(), - ))); + )))); self.0.push(comment); } _ => unreachable!("unexpected trivia: {:?}", token), @@ -189,9 +174,9 @@ impl TokenStream { fn push_inline_trivia(&mut self, token: &wdl_ast::Token) { assert!(!token.syntax().kind().is_trivia()); if let Some(token) = token.syntax().inline_comment() { - let inline_comment = PreToken::Trivia(Trivia::Comment(Comment::Inline( + let inline_comment = PreToken::Trivia(Trivia::Comment(Comment::Inline(Rc::new( token.text().trim_end().to_owned(), - ))); + )))); self.0.push(inline_comment); } } @@ -204,7 +189,7 @@ impl TokenStream { pub fn push_ast_token(&mut self, token: &wdl_ast::Token) { self.push_preceding_trivia(token); self.0.push(PreToken::Literal( - token.syntax().text().to_owned(), + Rc::new(token.syntax().text().to_owned()), token.syntax().kind(), )); self.push_inline_trivia(token); @@ -215,15 +200,17 @@ impl TokenStream { /// token. pub fn push_literal_in_place_of_token(&mut self, token: &wdl_ast::Token, replacement: String) { self.push_preceding_trivia(token); - self.0 - .push(PreToken::Literal(replacement, token.syntax().kind())); + self.0.push(PreToken::Literal( + Rc::new(replacement), + token.syntax().kind(), + )); self.push_inline_trivia(token); } /// Pushes a literal string into the stream. /// This will not insert any trivia. pub fn push_literal(&mut self, value: String, kind: SyntaxKind) { - self.0.push(PreToken::Literal(value, kind)); + self.0.push(PreToken::Literal(Rc::new(value), kind)); } /// Returns the kind of the last literal token in the stream. diff --git a/wdl-format/src/v1/expr.rs b/wdl-format/src/v1/expr.rs index bee31c15a..756afe530 100644 --- a/wdl-format/src/v1/expr.rs +++ b/wdl-format/src/v1/expr.rs @@ -672,44 +672,9 @@ pub fn format_parenthesized_expr(element: &FormatElement, stream: &mut TokenStre /// Formats an [`IfExpr`](wdl_ast::v1::IfExpr). pub fn format_if_expr(element: &FormatElement, stream: &mut TokenStream) { - let mut children = element.children().expect("if expr children"); - - let last = stream.last_literal_kind(); - - // Nested `if` expressions are a special case where we don't want to add - // parentheses or increment the indent level. - // Otherwise, we need to add parentheses and increment the indent if the last - // token is not an open parenthesis. - let nested_else_if = matches!(last, Some(SyntaxKind::ElseKeyword)); - let paren_needed = !matches!(last, Some(SyntaxKind::OpenParen)) && !nested_else_if; - - if paren_needed { - stream.push_literal("(".to_string(), SyntaxKind::OpenParen); - } - if !nested_else_if { - stream.increment_indent(); - } - - let if_keyword = children.next().expect("if keyword"); - assert!(if_keyword.element().kind() == SyntaxKind::IfKeyword); - (&if_keyword).write(stream); - stream.end_word(); - - for child in children { - let kind = child.element().kind(); - if matches!(kind, SyntaxKind::ElseKeyword | SyntaxKind::ThenKeyword) { - stream.end_line(); - } + for child in element.children().expect("if expr children") { (&child).write(stream); - if matches!(kind, SyntaxKind::ElseKeyword | SyntaxKind::ThenKeyword) { - stream.end_word(); - } - } - - if !nested_else_if { - stream.decrement_indent(); - } - if paren_needed { - stream.push_literal(")".to_string(), SyntaxKind::CloseParen); + stream.end_word(); } + stream.trim_end(&PreToken::WordEnd); } diff --git a/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl index 6429ae3c3..7e938e9c5 100644 --- a/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl +++ b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl @@ -1047,11 +1047,7 @@ workflow chip { ref_fa, read_genome_tsv.ref_fa, ]) - File? bwa_idx_tar_ = ( - if defined(bwa_idx_tar) - then bwa_idx_tar - else read_genome_tsv.bwa_idx_tar - ) + File? bwa_idx_tar_ = if defined(bwa_idx_tar) then bwa_idx_tar else read_genome_tsv.bwa_idx_tar File bowtie2_idx_tar_ = select_first([ bowtie2_idx_tar, read_genome_tsv.bowtie2_idx_tar, @@ -1064,16 +1060,8 @@ workflow chip { gensz, read_genome_tsv.gensz, ]) - File? blacklist1_ = ( - if defined(blacklist) - then blacklist - else read_genome_tsv.blacklist - ) - File? blacklist2_ = ( - if defined(blacklist2) - then blacklist2 - else read_genome_tsv.blacklist2 - ) + File? blacklist1_ = if defined(blacklist) then blacklist else read_genome_tsv.blacklist + File? blacklist2_ = if defined(blacklist2) then blacklist2 else read_genome_tsv.blacklist2 # merge multiple blacklists # two blacklists can have different number of columns (3 vs 6) # so we limit merged blacklist's columns to 3 @@ -1088,13 +1076,8 @@ workflow chip { runtime_environment = runtime_environment, } } - File? blacklist_ = ( - if length(blacklists) > 1 - then pool_blacklist.ta_pooled - else if length(blacklists) > 0 - then blacklists[0] - else blacklist2_ - ) + File? blacklist_ = if length(blacklists) > 1 then pool_blacklist.ta_pooled else if length( + blacklists) > 0 then blacklists[0] else blacklist2_ String mito_chr_name_ = select_first([ mito_chr_name, read_genome_tsv.mito_chr_name, @@ -1110,185 +1093,109 @@ workflow chip { ]) ### temp vars (do not define these) - String aligner_ = ( - if defined(custom_align_py) - then "custom" - else aligner - ) - String peak_caller_ = ( - if pipeline_type == "tf" - then select_first([ - peak_caller, - "spp", - ]) - else select_first([ - peak_caller, - "macs2", - ]) - ) - String peak_type_ = ( - if peak_caller_ == "spp" - then "regionPeak" - else "narrowPeak" - ) + String aligner_ = if defined(custom_align_py) then "custom" else aligner + String peak_caller_ = if pipeline_type == "tf" then select_first([ + peak_caller, + "spp", + ]) else select_first([ + peak_caller, + "macs2", + ]) + String peak_type_ = if peak_caller_ == "spp" then "regionPeak" else "narrowPeak" Boolean enable_idr = pipeline_type == "tf" # enable_idr for TF chipseq only - String idr_rank_ = ( - if peak_caller_ == "spp" - then "signal.value" - else if peak_caller_ == "macs2" - then "p.value" - else "p.value" - ) + String idr_rank_ = if peak_caller_ == "spp" then "signal.value" else if peak_caller_ + == "macs2" then "p.value" else "p.value" Int cap_num_peak_spp = 300000 Int cap_num_peak_macs2 = 500000 - Int cap_num_peak_ = ( - if peak_caller_ == "spp" - then select_first([ - cap_num_peak, - cap_num_peak_spp, - ]) - else select_first([ - cap_num_peak, - cap_num_peak_macs2, - ]) - ) + Int cap_num_peak_ = if peak_caller_ == "spp" then select_first([ + cap_num_peak, + cap_num_peak_spp, + ]) else select_first([ + cap_num_peak, + cap_num_peak_macs2, + ]) Int mapq_thresh_ = mapq_thresh - Boolean enable_xcor_ = ( - if pipeline_type == "control" - then false - else true - ) - Boolean enable_count_signal_track_ = ( - if pipeline_type == "control" - then false - else enable_count_signal_track - ) - Boolean enable_jsd_ = ( - if pipeline_type == "control" - then false - else enable_jsd - ) - Boolean enable_gc_bias_ = ( - if pipeline_type == "control" - then false - else enable_gc_bias - ) - Boolean align_only_ = ( - if pipeline_type == "control" - then true - else align_only - ) - - Float align_mem_factor_ = ( - if aligner_ == "bowtie2" - then align_bowtie2_mem_factor - else align_bwa_mem_factor - ) - Float align_disk_factor_ = ( - if aligner_ == "bowtie2" - then align_bowtie2_disk_factor + Boolean enable_xcor_ = if pipeline_type == "control" then false else true + Boolean enable_count_signal_track_ = if pipeline_type == "control" then false else enable_count_signal_track + Boolean enable_jsd_ = if pipeline_type == "control" then false else enable_jsd + Boolean enable_gc_bias_ = if pipeline_type == "control" then false else enable_gc_bias + Boolean align_only_ = if pipeline_type == "control" then true else align_only + + Float align_mem_factor_ = if aligner_ == "bowtie2" then align_bowtie2_mem_factor else align_bwa_mem_factor + Float align_disk_factor_ = if aligner_ == "bowtie2" then align_bowtie2_disk_factor else align_bwa_disk_factor - ) - Float call_peak_mem_factor_ = ( - if peak_caller_ == "spp" - then call_peak_spp_mem_factor + Float call_peak_mem_factor_ = if peak_caller_ == "spp" then call_peak_spp_mem_factor else call_peak_macs2_mem_factor - ) - Float call_peak_disk_factor_ = ( - if peak_caller_ == "spp" - then call_peak_spp_disk_factor + Float call_peak_disk_factor_ = if peak_caller_ == "spp" then call_peak_spp_disk_factor else call_peak_macs2_disk_factor - ) # temporary 2-dim fastqs array [rep_id][merge_id] - Array[Array[File]] fastqs_R1 = ( - if length(fastqs_rep10_R1) > 0 - then [ - fastqs_rep1_R1, - fastqs_rep2_R1, - fastqs_rep3_R1, - fastqs_rep4_R1, - fastqs_rep5_R1, - fastqs_rep6_R1, - fastqs_rep7_R1, - fastqs_rep8_R1, - fastqs_rep9_R1, - fastqs_rep10_R1, - ] - else if length(fastqs_rep9_R1) > 0 - then [ - fastqs_rep1_R1, - fastqs_rep2_R1, - fastqs_rep3_R1, - fastqs_rep4_R1, - fastqs_rep5_R1, - fastqs_rep6_R1, - fastqs_rep7_R1, - fastqs_rep8_R1, - fastqs_rep9_R1, - ] - else if length(fastqs_rep8_R1) > 0 - then [ - fastqs_rep1_R1, - fastqs_rep2_R1, - fastqs_rep3_R1, - fastqs_rep4_R1, - fastqs_rep5_R1, - fastqs_rep6_R1, - fastqs_rep7_R1, - fastqs_rep8_R1, - ] - else if length(fastqs_rep7_R1) > 0 - then [ - fastqs_rep1_R1, - fastqs_rep2_R1, - fastqs_rep3_R1, - fastqs_rep4_R1, - fastqs_rep5_R1, - fastqs_rep6_R1, - fastqs_rep7_R1, - ] - else if length(fastqs_rep6_R1) > 0 - then [ - fastqs_rep1_R1, - fastqs_rep2_R1, - fastqs_rep3_R1, - fastqs_rep4_R1, - fastqs_rep5_R1, - fastqs_rep6_R1, - ] - else if length(fastqs_rep5_R1) > 0 - then [ - fastqs_rep1_R1, - fastqs_rep2_R1, - fastqs_rep3_R1, - fastqs_rep4_R1, - fastqs_rep5_R1, - ] - else if length(fastqs_rep4_R1) > 0 - then [ - fastqs_rep1_R1, - fastqs_rep2_R1, - fastqs_rep3_R1, - fastqs_rep4_R1, - ] - else if length(fastqs_rep3_R1) > 0 - then [ - fastqs_rep1_R1, - fastqs_rep2_R1, - fastqs_rep3_R1, - ] - else if length(fastqs_rep2_R1) > 0 - then [ - fastqs_rep1_R1, - fastqs_rep2_R1, - ] - else if length(fastqs_rep1_R1) > 0 - then [ - fastqs_rep1_R1, - ] - else [] - ) + Array[Array[File]] fastqs_R1 = if length(fastqs_rep10_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + fastqs_rep7_R1, + fastqs_rep8_R1, + fastqs_rep9_R1, + fastqs_rep10_R1, + ] else if length(fastqs_rep9_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + fastqs_rep7_R1, + fastqs_rep8_R1, + fastqs_rep9_R1, + ] else if length(fastqs_rep8_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + fastqs_rep7_R1, + fastqs_rep8_R1, + ] else if length(fastqs_rep7_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + fastqs_rep7_R1, + ] else if length(fastqs_rep6_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + ] else if length(fastqs_rep5_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + ] else if length(fastqs_rep4_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + ] else if length(fastqs_rep3_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + ] else if length(fastqs_rep2_R1) > 0 then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + ] else if length(fastqs_rep1_R1) > 0 then [ + fastqs_rep1_R1, + ] else [] # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) Array[Array[File]] fastqs_R2 = [ fastqs_rep1_R2, @@ -1304,94 +1211,72 @@ workflow chip { ] # temporary 2-dim ctl fastqs array [rep_id][merge_id] - Array[Array[File]] ctl_fastqs_R1 = ( - if length(ctl_fastqs_rep10_R1) > 0 - then [ - ctl_fastqs_rep1_R1, - ctl_fastqs_rep2_R1, - ctl_fastqs_rep3_R1, - ctl_fastqs_rep4_R1, - ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1, - ctl_fastqs_rep7_R1, - ctl_fastqs_rep8_R1, - ctl_fastqs_rep9_R1, - ctl_fastqs_rep10_R1, - ] - else if length(ctl_fastqs_rep9_R1) > 0 - then [ - ctl_fastqs_rep1_R1, - ctl_fastqs_rep2_R1, - ctl_fastqs_rep3_R1, - ctl_fastqs_rep4_R1, - ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1, - ctl_fastqs_rep7_R1, - ctl_fastqs_rep8_R1, - ctl_fastqs_rep9_R1, - ] - else if length(ctl_fastqs_rep8_R1) > 0 - then [ - ctl_fastqs_rep1_R1, - ctl_fastqs_rep2_R1, - ctl_fastqs_rep3_R1, - ctl_fastqs_rep4_R1, - ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1, - ctl_fastqs_rep7_R1, - ctl_fastqs_rep8_R1, - ] - else if length(ctl_fastqs_rep7_R1) > 0 - then [ - ctl_fastqs_rep1_R1, - ctl_fastqs_rep2_R1, - ctl_fastqs_rep3_R1, - ctl_fastqs_rep4_R1, - ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1, - ctl_fastqs_rep7_R1, - ] - else if length(ctl_fastqs_rep6_R1) > 0 - then [ - ctl_fastqs_rep1_R1, - ctl_fastqs_rep2_R1, - ctl_fastqs_rep3_R1, - ctl_fastqs_rep4_R1, - ctl_fastqs_rep5_R1, - ctl_fastqs_rep6_R1, - ] - else if length(ctl_fastqs_rep5_R1) > 0 - then [ - ctl_fastqs_rep1_R1, - ctl_fastqs_rep2_R1, - ctl_fastqs_rep3_R1, - ctl_fastqs_rep4_R1, - ctl_fastqs_rep5_R1, - ] - else if length(ctl_fastqs_rep4_R1) > 0 - then [ - ctl_fastqs_rep1_R1, - ctl_fastqs_rep2_R1, - ctl_fastqs_rep3_R1, - ctl_fastqs_rep4_R1, - ] - else if length(ctl_fastqs_rep3_R1) > 0 - then [ - ctl_fastqs_rep1_R1, - ctl_fastqs_rep2_R1, - ctl_fastqs_rep3_R1, - ] - else if length(ctl_fastqs_rep2_R1) > 0 - then [ - ctl_fastqs_rep1_R1, - ctl_fastqs_rep2_R1, - ] - else if length(ctl_fastqs_rep1_R1) > 0 - then [ - ctl_fastqs_rep1_R1, - ] - else [] - ) + Array[Array[File]] ctl_fastqs_R1 = if length(ctl_fastqs_rep10_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ctl_fastqs_rep7_R1, + ctl_fastqs_rep8_R1, + ctl_fastqs_rep9_R1, + ctl_fastqs_rep10_R1, + ] else if length(ctl_fastqs_rep9_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ctl_fastqs_rep7_R1, + ctl_fastqs_rep8_R1, + ctl_fastqs_rep9_R1, + ] else if length(ctl_fastqs_rep8_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ctl_fastqs_rep7_R1, + ctl_fastqs_rep8_R1, + ] else if length(ctl_fastqs_rep7_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ctl_fastqs_rep7_R1, + ] else if length(ctl_fastqs_rep6_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ] else if length(ctl_fastqs_rep5_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ] else if length(ctl_fastqs_rep4_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ] else if length(ctl_fastqs_rep3_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ] else if length(ctl_fastqs_rep2_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ] else if length(ctl_fastqs_rep1_R1) > 0 then [ + ctl_fastqs_rep1_R1, + ] else [] # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) Array[Array[File]] ctl_fastqs_R2 = [ ctl_fastqs_rep1_R2, @@ -1409,45 +1294,22 @@ workflow chip { # temporary variables to get number of replicates # WDLic implementation of max(A,B,C,...) Int num_rep_fastq = length(fastqs_R1) - Int num_rep_bam = ( - if length(bams) < num_rep_fastq - then num_rep_fastq - else length(bams) - ) - Int num_rep_nodup_bam = ( - if length(nodup_bams) < num_rep_bam - then num_rep_bam - else length(nodup_bams) - ) - Int num_rep_ta = ( - if length(tas) < num_rep_nodup_bam - then num_rep_nodup_bam - else length(tas) - ) - Int num_rep_peak = ( - if length(peaks) < num_rep_ta - then num_rep_ta - else length(peaks) - ) + Int num_rep_bam = if length(bams) < num_rep_fastq then num_rep_fastq else length(bams) + Int num_rep_nodup_bam = if length(nodup_bams) < num_rep_bam then num_rep_bam else length( + nodup_bams) + Int num_rep_ta = if length(tas) < num_rep_nodup_bam then num_rep_nodup_bam else length( + tas) + Int num_rep_peak = if length(peaks) < num_rep_ta then num_rep_ta else length(peaks) Int num_rep = num_rep_peak # temporary variables to get number of controls Int num_ctl_fastq = length(ctl_fastqs_R1) - Int num_ctl_bam = ( - if length(ctl_bams) < num_ctl_fastq - then num_ctl_fastq - else length(ctl_bams) - ) - Int num_ctl_nodup_bam = ( - if length(ctl_nodup_bams) < num_ctl_bam - then num_ctl_bam - else length(ctl_nodup_bams) - ) - Int num_ctl_ta = ( - if length(ctl_tas) < num_ctl_nodup_bam - then num_ctl_nodup_bam - else length(ctl_tas) - ) + Int num_ctl_bam = if length(ctl_bams) < num_ctl_fastq then num_ctl_fastq else length( + ctl_bams) + Int num_ctl_nodup_bam = if length(ctl_nodup_bams) < num_ctl_bam then num_ctl_bam else length( + ctl_nodup_bams) + Int num_ctl_ta = if length(ctl_tas) < num_ctl_nodup_bam then num_ctl_nodup_bam else length( + ctl_tas) Int num_ctl = num_ctl_ta # sanity check for inputs @@ -1463,7 +1325,8 @@ workflow chip { runtime_environment = runtime_environment, } } - if ((num_rep_fastq > 0 || num_ctl_fastq > 0) && aligner_ != "bwa" && aligner_ != "bowtie2" && aligner_ != "custom") { + if ((num_rep_fastq > 0 || num_ctl_fastq > 0) && aligner_ != "bwa" && aligner_ != "bowtie2" + && aligner_ != "custom") { call raise_exception as error_wrong_aligner { input: msg = "Choose chip.aligner to align your fastqs. Choices: bwa, bowtie2, custom.", runtime_environment = runtime_environment, @@ -1481,16 +1344,22 @@ workflow chip { runtime_environment = runtime_environment, } } - if (aligner_ == "custom" && (!defined(custom_align_py) || !defined(custom_aligner_idx_tar))) { + if (aligner_ == "custom" && (!defined(custom_align_py) || !defined( + custom_aligner_idx_tar))) { call raise_exception as error_custom_aligner { input: msg = "To use a custom aligner, define chip.custom_align_py and chip.custom_aligner_idx_tar.", runtime_environment = runtime_environment, } } - if ((ctl_depth_limit > 0 || exp_ctl_depth_ratio_limit > 0) && num_ctl > 1 && length(ctl_paired_ends) > 1) { + if ((ctl_depth_limit > 0 || exp_ctl_depth_ratio_limit > 0) && num_ctl > 1 && length( + ctl_paired_ends) > 1) { call raise_exception as error_subsample_pooled_control_with_mixed_endedness { input: - msg = "Cannot use automatic control subsampling (\"chip.ctl_depth_limit\">0 and \"chip.exp_ctl_depth_limit\">0) for " + "multiple controls with mixed endedness (e.g. SE ctl-rep1 and PE ctl-rep2). " + "Automatic control subsampling is enabled by default. " + "Disable automatic control subsampling by explicitly defining the above two parameters as 0 in your input JSON file. " + "You can still use manual control subsamping (\"chip.ctl_subsample_reads\">0) since it is done " + "for individual control's TAG-ALIGN output according to each control's endedness. ", + msg = "Cannot use automatic control subsampling (\"chip.ctl_depth_limit\">0 and \"chip.exp_ctl_depth_limit\">0) for " + + "multiple controls with mixed endedness (e.g. SE ctl-rep1 and PE ctl-rep2). " + + "Automatic control subsampling is enabled by default. " + "Disable automatic control subsampling by explicitly defining the above two parameters as 0 in your input JSON file. " + + "You can still use manual control subsamping (\"chip.ctl_subsample_reads\">0) since it is done " + + "for individual control's TAG-ALIGN output according to each control's endedness. ", runtime_environment = runtime_environment, } } @@ -1511,24 +1380,17 @@ workflow chip { scatter (i in range(num_rep)) { # to override endedness definition for individual replicate # paired_end will override paired_ends[i] - Boolean paired_end_ = ( - if !defined(paired_end) && i < length(paired_ends) - then paired_ends[i] - else select_first([ - paired_end, - ]) - ) + Boolean paired_end_ = if !defined(paired_end) && i < length(paired_ends) then paired_ends[ + i] else select_first([ + paired_end, + ]) Boolean has_input_of_align = i < length(fastqs_R1) && length(fastqs_R1[i]) > 0 Boolean has_output_of_align = i < length(bams) if (has_input_of_align && !has_output_of_align) { call align { input: fastqs_R1 = fastqs_R1[i], - fastqs_R2 = ( - if paired_end_ - then fastqs_R2[i] - else [] - ), + fastqs_R2 = if paired_end_ then fastqs_R2[i] else [], crop_length = crop_length, crop_length_tol = crop_length_tol, trimmomatic_phred_score_format = trimmomatic_phred_score_format, @@ -1536,13 +1398,8 @@ workflow chip { aligner = aligner_, mito_chr_name = mito_chr_name_, custom_align_py = custom_align_py, - idx_tar = ( - if aligner == "bwa" - then bwa_idx_tar_ - else if aligner == "bowtie2" - then bowtie2_idx_tar_ - else custom_aligner_idx_tar - ), + idx_tar = if aligner == "bwa" then bwa_idx_tar_ else if aligner == "bowtie2" + then bowtie2_idx_tar_ else custom_aligner_idx_tar, paired_end = paired_end_, use_bwa_mem_for_pe = use_bwa_mem_for_pe, bwa_mem_read_len_limit = bwa_mem_read_len_limit, @@ -1557,11 +1414,7 @@ workflow chip { runtime_environment = runtime_environment, } } - File? bam_ = ( - if has_output_of_align - then bams[i] - else align.bam - ) + File? bam_ = if has_output_of_align then bams[i] else align.bam Boolean has_input_of_filter = has_output_of_align || defined(align.bam) Boolean has_output_of_filter = i < length(nodup_bams) @@ -1587,11 +1440,7 @@ workflow chip { runtime_environment = runtime_environment, } } - File? nodup_bam_ = ( - if has_output_of_filter - then nodup_bams[i] - else filter.nodup_bam - ) + File? nodup_bam_ = if has_output_of_filter then nodup_bams[i] else filter.nodup_bam Boolean has_input_of_bam2ta = has_output_of_filter || defined(filter.nodup_bam) Boolean has_output_of_bam2ta = i < length(tas) @@ -1609,11 +1458,7 @@ workflow chip { runtime_environment = runtime_environment, } } - File? ta_ = ( - if has_output_of_bam2ta - then tas[i] - else bam2ta.ta - ) + File? ta_ = if has_output_of_bam2ta then tas[i] else bam2ta.ta Boolean has_input_of_spr = has_output_of_bam2ta || defined(bam2ta.ta) if (has_input_of_spr && !align_only_ && !true_rep_only) { @@ -1627,7 +1472,8 @@ workflow chip { } } - Boolean has_input_of_count_signal_track = has_output_of_bam2ta || defined(bam2ta.ta) + Boolean has_input_of_count_signal_track = has_output_of_bam2ta || defined(bam2ta.ta + ) if (has_input_of_count_signal_track && enable_count_signal_track_) { # generate count signal track call count_signal_track { input: @@ -1659,13 +1505,8 @@ workflow chip { aligner = aligner_, mito_chr_name = mito_chr_name_, custom_align_py = custom_align_py, - idx_tar = ( - if aligner == "bwa" - then bwa_idx_tar_ - else if aligner == "bowtie2" - then bowtie2_idx_tar_ - else custom_aligner_idx_tar - ), + idx_tar = if aligner == "bwa" then bwa_idx_tar_ else if aligner == "bowtie2" + then bowtie2_idx_tar_ else custom_aligner_idx_tar, paired_end = false, use_bwa_mem_for_pe = false, bwa_mem_read_len_limit = 0, @@ -1712,7 +1553,8 @@ workflow chip { } # special trimming/mapping for xcor (when starting from BAMs) - Boolean has_input_of_bam2ta_no_dedup = (has_output_of_align || defined(align.bam)) && !defined(bam2ta_no_dedup_R1.ta) + Boolean has_input_of_bam2ta_no_dedup = (has_output_of_align || defined(align.bam)) + && !defined(bam2ta_no_dedup_R1.ta) if (has_input_of_bam2ta_no_dedup) { call filter as filter_no_dedup { input: bam = bam_, @@ -1750,18 +1592,9 @@ workflow chip { # if not starting from fastqs, keep using old method # (mapping with both ends for tag-aligns to be used for xcor) # subsample tagalign (non-mito) and cross-correlation analysis - File? ta_xcor = ( - if defined(bam2ta_no_dedup_R1.ta) - then bam2ta_no_dedup_R1.ta - else if defined(bam2ta_no_dedup.ta) - then bam2ta_no_dedup.ta - else ta_ - ) - Boolean paired_end_xcor = ( - if defined(bam2ta_no_dedup_R1.ta) - then false - else paired_end_ - ) + File? ta_xcor = if defined(bam2ta_no_dedup_R1.ta) then bam2ta_no_dedup_R1.ta else + if defined(bam2ta_no_dedup.ta) then bam2ta_no_dedup.ta else ta_ + Boolean paired_end_xcor = if defined(bam2ta_no_dedup_R1.ta) then false else paired_end_ Boolean has_input_of_xcor = defined(ta_xcor) if (has_input_of_xcor && enable_xcor_) { @@ -1783,36 +1616,26 @@ workflow chip { # before peak calling, get fragment length from xcor analysis or given input # if fraglen [] is defined in the input JSON, fraglen from xcor will be ignored - Int? fraglen_ = ( - if i < length(fraglen) - then fraglen[i] - else xcor.fraglen - ) + Int? fraglen_ = if i < length(fraglen) then fraglen[i] else xcor.fraglen } # align each control scatter (i in range(num_ctl)) { # to override endedness definition for individual control # ctl_paired_end will override ctl_paired_ends[i] - Boolean ctl_paired_end_ = ( - if !defined(ctl_paired_end) && i < length(ctl_paired_ends) - then ctl_paired_ends[i] - else select_first([ - ctl_paired_end, - paired_end, - ]) - ) - - Boolean has_input_of_align_ctl = i < length(ctl_fastqs_R1) && length(ctl_fastqs_R1[i]) > 0 + Boolean ctl_paired_end_ = if !defined(ctl_paired_end) && i < length( + ctl_paired_ends) then ctl_paired_ends[i] else select_first([ + ctl_paired_end, + paired_end, + ]) + + Boolean has_input_of_align_ctl = i < length(ctl_fastqs_R1) && length(ctl_fastqs_R1[ + i]) > 0 Boolean has_output_of_align_ctl = i < length(ctl_bams) if (has_input_of_align_ctl && !has_output_of_align_ctl) { call align as align_ctl { input: fastqs_R1 = ctl_fastqs_R1[i], - fastqs_R2 = ( - if ctl_paired_end_ - then ctl_fastqs_R2[i] - else [] - ), + fastqs_R2 = if ctl_paired_end_ then ctl_fastqs_R2[i] else [], crop_length = crop_length, crop_length_tol = crop_length_tol, trimmomatic_phred_score_format = trimmomatic_phred_score_format, @@ -1820,13 +1643,8 @@ workflow chip { aligner = aligner_, mito_chr_name = mito_chr_name_, custom_align_py = custom_align_py, - idx_tar = ( - if aligner == "bwa" - then bwa_idx_tar_ - else if aligner == "bowtie2" - then bowtie2_idx_tar_ - else custom_aligner_idx_tar - ), + idx_tar = if aligner == "bwa" then bwa_idx_tar_ else if aligner == "bowtie2" + then bowtie2_idx_tar_ else custom_aligner_idx_tar, paired_end = ctl_paired_end_, use_bwa_mem_for_pe = use_bwa_mem_for_pe, bwa_mem_read_len_limit = bwa_mem_read_len_limit, @@ -1841,13 +1659,10 @@ workflow chip { runtime_environment = runtime_environment, } } - File? ctl_bam_ = ( - if has_output_of_align_ctl - then ctl_bams[i] - else align_ctl.bam - ) + File? ctl_bam_ = if has_output_of_align_ctl then ctl_bams[i] else align_ctl.bam - Boolean has_input_of_filter_ctl = has_output_of_align_ctl || defined(align_ctl.bam) + Boolean has_input_of_filter_ctl = has_output_of_align_ctl || defined(align_ctl.bam + ) Boolean has_output_of_filter_ctl = i < length(ctl_nodup_bams) # skip if we already have output of this step if (has_input_of_filter_ctl && !has_output_of_filter_ctl) { @@ -1871,13 +1686,10 @@ workflow chip { runtime_environment = runtime_environment, } } - File? ctl_nodup_bam_ = ( - if has_output_of_filter_ctl - then ctl_nodup_bams[i] - else filter_ctl.nodup_bam - ) + File? ctl_nodup_bam_ = if has_output_of_filter_ctl then ctl_nodup_bams[i] else filter_ctl.nodup_bam - Boolean has_input_of_bam2ta_ctl = has_output_of_filter_ctl || defined(filter_ctl.nodup_bam) + Boolean has_input_of_bam2ta_ctl = has_output_of_filter_ctl || defined(filter_ctl.nodup_bam + ) Boolean has_output_of_bam2ta_ctl = i < length(ctl_tas) if (has_input_of_bam2ta_ctl && !has_output_of_bam2ta_ctl) { call bam2ta as bam2ta_ctl { input: @@ -1893,11 +1705,7 @@ workflow chip { runtime_environment = runtime_environment, } } - File? ctl_ta_ = ( - if has_output_of_bam2ta_ctl - then ctl_tas[i] - else bam2ta_ctl.ta - ) + File? ctl_ta_ = if has_output_of_bam2ta_ctl then ctl_tas[i] else bam2ta_ctl.ta } # if there are TAs for ALL replicates then pool them @@ -1945,7 +1753,8 @@ workflow chip { } Boolean has_input_of_count_signal_track_pooled = defined(pool_ta.ta_pooled) - if (has_input_of_count_signal_track_pooled && enable_count_signal_track_ && num_rep > 1) { + if (has_input_of_count_signal_track_pooled && enable_count_signal_track_ && num_rep > 1 + ) { call count_signal_track as count_signal_track_pooled { input: ta = pool_ta.ta_pooled, chrsz = chrsz_, @@ -1970,7 +1779,8 @@ workflow chip { } } - Boolean has_all_input_of_choose_ctl = length(select_all(ta_)) == num_rep && length(select_all(ctl_ta_)) == num_ctl && num_ctl > 0 + Boolean has_all_input_of_choose_ctl = length(select_all(ta_)) == num_rep && length( + select_all(ctl_ta_)) == num_ctl && num_ctl > 0 if (has_all_input_of_choose_ctl && !align_only_) { # choose appropriate control for each exp IP replicate # outputs: @@ -1995,35 +1805,21 @@ workflow chip { # >=0: control TA index (this means that control TA with this index exists) # -1: use pooled control # -2: there is no control - Int chosen_ctl_ta_id = ( - if has_all_input_of_choose_ctl && !align_only_ - then select_first([ - choose_ctl.chosen_ctl_ta_ids, - ])[i] - else -2 - ) - Int chosen_ctl_ta_subsample = ( - if has_all_input_of_choose_ctl && !align_only_ - then select_first([ - choose_ctl.chosen_ctl_ta_subsample, - ])[i] - else 0 - ) - Boolean chosen_ctl_paired_end = ( - if chosen_ctl_ta_id == -2 - then false - else if chosen_ctl_ta_id == -1 - then ctl_paired_end_[0] - else ctl_paired_end_[chosen_ctl_ta_id] - ) + Int chosen_ctl_ta_id = if has_all_input_of_choose_ctl && !align_only_ then select_first( + [ + choose_ctl.chosen_ctl_ta_ids, + ])[i] else -2 + Int chosen_ctl_ta_subsample = if has_all_input_of_choose_ctl && !align_only_ then select_first( + [ + choose_ctl.chosen_ctl_ta_subsample, + ])[i] else 0 + Boolean chosen_ctl_paired_end = if chosen_ctl_ta_id == -2 then false else if chosen_ctl_ta_id + == -1 then ctl_paired_end_[0] else ctl_paired_end_[chosen_ctl_ta_id] if (chosen_ctl_ta_id > -2 && chosen_ctl_ta_subsample > 0) { call subsample_ctl { input: - ta = ( - if chosen_ctl_ta_id == -1 - then pool_ta_ctl.ta_pooled - else ctl_ta_[chosen_ctl_ta_id] - ), + ta = if chosen_ctl_ta_id == -1 then pool_ta_ctl.ta_pooled else ctl_ta_[ + chosen_ctl_ta_id], subsample = chosen_ctl_ta_subsample, paired_end = chosen_ctl_paired_end, mem_factor = subsample_ctl_mem_factor, @@ -2031,35 +1827,25 @@ workflow chip { runtime_environment = runtime_environment, } } - Array[File] chosen_ctl_tas = ( - if chosen_ctl_ta_id <= -2 - then [] - else if chosen_ctl_ta_subsample > 0 - then [ - select_first([ - subsample_ctl.ta_subsampled, - ]), - ] - else if chosen_ctl_ta_id == -1 - then [ - select_first([ - pool_ta_ctl.ta_pooled, - ]), - ] - else [ - select_first([ - ctl_ta_[chosen_ctl_ta_id], - ]), - ] - ) + Array[File] chosen_ctl_tas = if chosen_ctl_ta_id <= -2 then [] else if chosen_ctl_ta_subsample + > 0 then [ + select_first([ + subsample_ctl.ta_subsampled, + ]), + ] else if chosen_ctl_ta_id == -1 then [ + select_first([ + pool_ta_ctl.ta_pooled, + ]), + ] else [ + select_first([ + ctl_ta_[chosen_ctl_ta_id], + ]), + ] } - Int chosen_ctl_ta_pooled_subsample = ( - if has_all_input_of_choose_ctl && !align_only_ + Int chosen_ctl_ta_pooled_subsample = if has_all_input_of_choose_ctl && !align_only_ then select_first([ - choose_ctl.chosen_ctl_ta_subsample_pooled, - ]) - else 0 - ) + choose_ctl.chosen_ctl_ta_subsample_pooled, + ]) else 0 # workaround for dx error (Unsupported combination: womType: Int womValue: ([225], Array[Int])) Array[Int] fraglen_tmp = select_all(fraglen_) @@ -2091,20 +1877,11 @@ workflow chip { mem_factor = call_peak_mem_factor_, disk_factor = call_peak_disk_factor_, time_hr = call_peak_time_hr, - runtime_environment = ( - if peak_caller_ == "spp" - then runtime_environment_spp - else if peak_caller_ == "macs2" - then runtime_environment_macs2 - else runtime_environment - ), + runtime_environment = if peak_caller_ == "spp" then runtime_environment_spp + else if peak_caller_ == "macs2" then runtime_environment_macs2 else runtime_environment, } } - File? peak_ = ( - if has_output_of_call_peak - then peaks[i] - else call_peak.peak - ) + File? peak_ = if has_output_of_call_peak then peaks[i] else call_peak.peak # signal track if (has_input_of_call_peak && !align_only_) { @@ -2153,20 +1930,11 @@ workflow chip { mem_factor = call_peak_mem_factor_, disk_factor = call_peak_disk_factor_, time_hr = call_peak_time_hr, - runtime_environment = ( - if peak_caller_ == "spp" - then runtime_environment_spp - else if peak_caller_ == "macs2" - then runtime_environment_macs2 - else runtime_environment - ), + runtime_environment = if peak_caller_ == "spp" then runtime_environment_spp + else if peak_caller_ == "macs2" then runtime_environment_macs2 else runtime_environment, } } - File? peak_pr1_ = ( - if has_output_of_call_peak_pr1 - then peaks_pr1[i] - else call_peak_pr1.peak - ) + File? peak_pr1_ = if has_output_of_call_peak_pr1 then peaks_pr1[i] else call_peak_pr1.peak # call peaks on 2nd pseudo replicated tagalign Boolean has_input_of_call_peak_pr2 = defined(spr.ta_pr2[i]) @@ -2194,20 +1962,11 @@ workflow chip { mem_factor = call_peak_mem_factor_, disk_factor = call_peak_disk_factor_, time_hr = call_peak_time_hr, - runtime_environment = ( - if peak_caller_ == "spp" - then runtime_environment_spp - else if peak_caller_ == "macs2" - then runtime_environment_macs2 - else runtime_environment - ), + runtime_environment = if peak_caller_ == "spp" then runtime_environment_spp + else if peak_caller_ == "macs2" then runtime_environment_macs2 else runtime_environment, } } - File? peak_pr2_ = ( - if has_output_of_call_peak_pr2 - then peaks_pr2[i] - else call_peak_pr2.peak - ) + File? peak_pr2_ = if has_output_of_call_peak_pr2 then peaks_pr2[i] else call_peak_pr2.peak } # if ( !align_only_ && num_rep > 1 ) { @@ -2222,11 +1981,7 @@ workflow chip { if (has_all_input_of_choose_ctl && !align_only_ && chosen_ctl_ta_pooled_subsample > 0) { call subsample_ctl as subsample_ctl_pooled { input: - ta = ( - if num_ctl < 2 - then ctl_ta_[0] - else pool_ta_ctl.ta_pooled - ), + ta = if num_ctl < 2 then ctl_ta_[0] else pool_ta_ctl.ta_pooled, subsample = chosen_ctl_ta_pooled_subsample, paired_end = ctl_paired_end_[0], mem_factor = subsample_ctl_mem_factor, @@ -2235,25 +1990,19 @@ workflow chip { } } # actually not an array - Array[File?] chosen_ctl_ta_pooled = ( - if !has_all_input_of_choose_ctl || align_only_ - then [] - else if chosen_ctl_ta_pooled_subsample > 0 - then [ - subsample_ctl_pooled.ta_subsampled, - ] - else if num_ctl < 2 - then [ - ctl_ta_[0], - ] - else [ - pool_ta_ctl.ta_pooled, - ] - ) + Array[File?] chosen_ctl_ta_pooled = if !has_all_input_of_choose_ctl || align_only_ + then [] else if chosen_ctl_ta_pooled_subsample > 0 then [ + subsample_ctl_pooled.ta_subsampled, + ] else if num_ctl < 2 then [ + ctl_ta_[0], + ] else [ + pool_ta_ctl.ta_pooled, + ] Boolean has_input_of_call_peak_pooled = defined(pool_ta.ta_pooled) Boolean has_output_of_call_peak_pooled = defined(peak_pooled) - if (has_input_of_call_peak_pooled && !has_output_of_call_peak_pooled && !align_only_ && num_rep > 1) { + if (has_input_of_call_peak_pooled && !has_output_of_call_peak_pooled && !align_only_ + && num_rep > 1) { # call peaks on pooled replicate # always call peaks for pooled replicate to get signal tracks call call_peak as call_peak_pooled { input: @@ -2278,20 +2027,11 @@ workflow chip { mem_factor = call_peak_mem_factor_, disk_factor = call_peak_disk_factor_, time_hr = call_peak_time_hr, - runtime_environment = ( - if peak_caller_ == "spp" - then runtime_environment_spp - else if peak_caller_ == "macs2" - then runtime_environment_macs2 - else runtime_environment - ), + runtime_environment = if peak_caller_ == "spp" then runtime_environment_spp + else if peak_caller_ == "macs2" then runtime_environment_macs2 else runtime_environment, } } - File? peak_pooled_ = ( - if has_output_of_call_peak_pooled - then peak_pooled - else call_peak_pooled.peak - ) + File? peak_pooled_ = if has_output_of_call_peak_pooled then peak_pooled else call_peak_pooled.peak # macs2 signal track for pooled rep if (has_input_of_call_peak_pooled && !align_only_ && num_rep > 1) { @@ -2316,7 +2056,8 @@ workflow chip { Boolean has_input_of_call_peak_ppr1 = defined(pool_ta_pr1.ta_pooled) Boolean has_output_of_call_peak_ppr1 = defined(peak_ppr1) - if (has_input_of_call_peak_ppr1 && !has_output_of_call_peak_ppr1 && !align_only_ && !true_rep_only && num_rep > 1) { + if (has_input_of_call_peak_ppr1 && !has_output_of_call_peak_ppr1 && !align_only_ && !true_rep_only + && num_rep > 1) { # call peaks on 1st pooled pseudo replicates call call_peak as call_peak_ppr1 { input: peak_caller = peak_caller_, @@ -2340,24 +2081,16 @@ workflow chip { mem_factor = call_peak_mem_factor_, disk_factor = call_peak_disk_factor_, time_hr = call_peak_time_hr, - runtime_environment = ( - if peak_caller_ == "spp" - then runtime_environment_spp - else if peak_caller_ == "macs2" - then runtime_environment_macs2 - else runtime_environment - ), + runtime_environment = if peak_caller_ == "spp" then runtime_environment_spp + else if peak_caller_ == "macs2" then runtime_environment_macs2 else runtime_environment, } } - File? peak_ppr1_ = ( - if has_output_of_call_peak_ppr1 - then peak_ppr1 - else call_peak_ppr1.peak - ) + File? peak_ppr1_ = if has_output_of_call_peak_ppr1 then peak_ppr1 else call_peak_ppr1.peak Boolean has_input_of_call_peak_ppr2 = defined(pool_ta_pr2.ta_pooled) Boolean has_output_of_call_peak_ppr2 = defined(peak_ppr2) - if (has_input_of_call_peak_ppr2 && !has_output_of_call_peak_ppr2 && !align_only_ && !true_rep_only && num_rep > 1) { + if (has_input_of_call_peak_ppr2 && !has_output_of_call_peak_ppr2 && !align_only_ && !true_rep_only + && num_rep > 1) { # call peaks on 2nd pooled pseudo replicates call call_peak as call_peak_ppr2 { input: peak_caller = peak_caller_, @@ -2381,20 +2114,11 @@ workflow chip { mem_factor = call_peak_mem_factor_, disk_factor = call_peak_disk_factor_, time_hr = call_peak_time_hr, - runtime_environment = ( - if peak_caller_ == "spp" - then runtime_environment_spp - else if peak_caller_ == "macs2" - then runtime_environment_macs2 - else runtime_environment - ), + runtime_environment = if peak_caller_ == "spp" then runtime_environment_spp + else if peak_caller_ == "macs2" then runtime_environment_macs2 else runtime_environment, } } - File? peak_ppr2_ = ( - if has_output_of_call_peak_ppr2 - then peak_ppr2 - else call_peak_ppr2.peak - ) + File? peak_ppr2_ = if has_output_of_call_peak_ppr2 then peak_ppr2 else call_peak_ppr2.peak # do IDR/overlap on all pairs of two replicates (i,j) # where i and j are zero-based indices and 0 <= i < j < num_rep @@ -2521,13 +2245,9 @@ workflow chip { call reproducibility as reproducibility_overlap { input: prefix = "overlap", peaks = select_all(overlap.bfilt_overlap_peak), - peaks_pr = ( - if defined(overlap_pr.bfilt_overlap_peak) - then select_first([ - overlap_pr.bfilt_overlap_peak, - ]) - else [] - ), + peaks_pr = if defined(overlap_pr.bfilt_overlap_peak) then select_first([ + overlap_pr.bfilt_overlap_peak, + ]) else [], peak_ppr = overlap_ppr.bfilt_overlap_peak, peak_type = peak_type_, chrsz = chrsz_, @@ -2540,13 +2260,9 @@ workflow chip { call reproducibility as reproducibility_idr { input: prefix = "idr", peaks = select_all(idr.bfilt_idr_peak), - peaks_pr = ( - if defined(idr_pr.bfilt_idr_peak) - then select_first([ - idr_pr.bfilt_idr_peak, - ]) - else [] - ), + peaks_pr = if defined(idr_pr.bfilt_idr_peak) then select_first([ + idr_pr.bfilt_idr_peak, + ]) else [], peak_ppr = idr_ppr.bfilt_idr_peak, peak_type = peak_type_, chrsz = chrsz_, @@ -2585,13 +2301,9 @@ workflow chip { ctl_lib_complexity_qcs = select_all(filter_ctl.lib_complexity_qc), jsd_plot = jsd.plot, - jsd_qcs = ( - if defined(jsd.jsd_qcs) - then select_first([ - jsd.jsd_qcs, - ]) - else [] - ), + jsd_qcs = if defined(jsd.jsd_qcs) then select_first([ + jsd.jsd_qcs, + ]) else [], frip_qcs = select_all(call_peak.frip_qc), frip_qcs_pr1 = select_all(call_peak_pr1.frip_qc), @@ -2601,31 +2313,19 @@ workflow chip { frip_qc_ppr2 = call_peak_ppr2.frip_qc, idr_plots = select_all(idr.idr_plot), - idr_plots_pr = ( - if defined(idr_pr.idr_plot) - then select_first([ - idr_pr.idr_plot, - ]) - else [] - ), + idr_plots_pr = if defined(idr_pr.idr_plot) then select_first([ + idr_pr.idr_plot, + ]) else [], idr_plot_ppr = idr_ppr.idr_plot, frip_idr_qcs = select_all(idr.frip_qc), - frip_idr_qcs_pr = ( - if defined(idr_pr.frip_qc) - then select_first([ - idr_pr.frip_qc, - ]) - else [] - ), + frip_idr_qcs_pr = if defined(idr_pr.frip_qc) then select_first([ + idr_pr.frip_qc, + ]) else [], frip_idr_qc_ppr = idr_ppr.frip_qc, frip_overlap_qcs = select_all(overlap.frip_qc), - frip_overlap_qcs_pr = ( - if defined(overlap_pr.frip_qc) - then select_first([ - overlap_pr.frip_qc, - ]) - else [] - ), + frip_overlap_qcs_pr = if defined(overlap_pr.frip_qc) then select_first([ + overlap_pr.frip_qc, + ]) else [], frip_overlap_qc_ppr = overlap_ppr.frip_qc, idr_reproducibility_qc = reproducibility_idr.reproducibility_qc, overlap_reproducibility_qc = reproducibility_overlap.reproducibility_qc, @@ -2686,16 +2386,12 @@ task align { Int disk_gb = round(40.0 + disk_factor * input_file_size_gb) Float trimmomatic_java_heap_factor = 0.9 - Array[Array[File]] tmp_fastqs = ( - if paired_end - then transpose([ - fastqs_R1, - fastqs_R2, - ]) - else transpose([ - fastqs_R1, - ]) - ) + Array[Array[File]] tmp_fastqs = if paired_end then transpose([ + fastqs_R1, + fastqs_R2, + ]) else transpose([ + fastqs_R1, + ]) command <<< set -e @@ -2708,11 +2404,7 @@ task align { fi python3 $(which encode_task_merge_fastq.py) \ ~{write_tsv(tmp_fastqs)} \ - ~{( - if paired_end - then "--paired-end" - else "" - )} \ + ~{if paired_end then "--paired-end" else ""} \ ~{"--nth " + cpu} if [ -z '~{trim_bp}' ]; then @@ -2736,30 +2428,15 @@ task align { NEW_SUFFIX="$SUFFIX"_cropped python3 $(which encode_task_trimmomatic.py) \ --fastq1 R1$SUFFIX/*.fastq.gz \ - ~{( - if paired_end - then "--fastq2 R2$SUFFIX/*.fastq.gz" - else "" - )} \ - ~{( - if paired_end - then "--paired-end" - else "" - )} \ + ~{if paired_end then "--fastq2 R2$SUFFIX/*.fastq.gz" else ""} \ + ~{if paired_end then "--paired-end" else ""} \ --crop-length ~{crop_length} \ --crop-length-tol "~{crop_length_tol}" \ ~{"--phred-score-format " + trimmomatic_phred_score_format} \ --out-dir-R1 R1$NEW_SUFFIX \ - ~{( - if paired_end - then "--out-dir-R2 R2$NEW_SUFFIX" - else "" - )} \ - ~{"--trimmomatic-java-heap " + ( - if defined(trimmomatic_java_heap) - then trimmomatic_java_heap - else (round(mem_gb * trimmomatic_java_heap_factor) + "G") - )} \ + ~{if paired_end then "--out-dir-R2 R2$NEW_SUFFIX" else ""} \ + ~{"--trimmomatic-java-heap " + if defined(trimmomatic_java_heap) then trimmomatic_java_heap + else (round(mem_gb * trimmomatic_java_heap_factor) + "G")} \ ~{"--nth " + cpu} SUFFIX=$NEW_SUFFIX fi @@ -2768,21 +2445,9 @@ task align { python3 $(which encode_task_bwa.py) \ ~{idx_tar} \ R1$SUFFIX/*.fastq.gz \ - ~{( - if paired_end - then "R2$SUFFIX/*.fastq.gz" - else "" - )} \ - ~{( - if paired_end - then "--paired-end" - else "" - )} \ - ~{( - if use_bwa_mem_for_pe - then "--use-bwa-mem-for-pe" - else "" - )} \ + ~{if paired_end then "R2$SUFFIX/*.fastq.gz" else ""} \ + ~{if paired_end then "--paired-end" else ""} \ + ~{if use_bwa_mem_for_pe then "--use-bwa-mem-for-pe" else ""} \ ~{"--bwa-mem-read-len-limit " + bwa_mem_read_len_limit} \ ~{"--mem-gb " + samtools_mem_gb} \ ~{"--nth " + cpu} @@ -2791,38 +2456,18 @@ task align { python3 $(which encode_task_bowtie2.py) \ ~{idx_tar} \ R1$SUFFIX/*.fastq.gz \ - ~{( - if paired_end - then "R2$SUFFIX/*.fastq.gz" - else "" - )} \ + ~{if paired_end then "R2$SUFFIX/*.fastq.gz" else ""} \ ~{"--multimapping " + multimapping} \ - ~{( - if paired_end - then "--paired-end" - else "" - )} \ - ~{( - if use_bowtie2_local_mode - then "--local" - else "" - )} \ + ~{if paired_end then "--paired-end" else ""} \ + ~{if use_bowtie2_local_mode then "--local" else ""} \ ~{"--mem-gb " + samtools_mem_gb} \ ~{"--nth " + cpu} else python3 ~{custom_align_py} \ ~{idx_tar} \ R1$SUFFIX/*.fastq.gz \ - ~{( - if paired_end - then "R2$SUFFIX/*.fastq.gz" - else "" - )} \ - ~{( - if paired_end - then "--paired-end" - else "" - )} \ + ~{if paired_end then "R2$SUFFIX/*.fastq.gz" else ""} \ + ~{if paired_end then "--paired-end" else ""} \ ~{"--mem-gb " + samtools_mem_gb} \ ~{"--nth " + cpu} fi @@ -2885,29 +2530,18 @@ task filter { set -e python3 $(which encode_task_filter.py) \ ~{bam} \ - ~{( - if paired_end - then "--paired-end" - else "" - )} \ + ~{if paired_end then "--paired-end" else ""} \ --multimapping 0 \ ~{"--dup-marker " + dup_marker} \ ~{"--mapq-thresh " + mapq_thresh} \ --filter-chrs ~{sep=" " filter_chrs} \ ~{"--chrsz " + chrsz} \ - ~{( - if no_dup_removal - then "--no-dup-removal" - else "" - )} \ + ~{if no_dup_removal then "--no-dup-removal" else ""} \ ~{"--mito-chr-name " + mito_chr_name} \ ~{"--mem-gb " + samtools_mem_gb} \ ~{"--nth " + cpu} \ - ~{"--picard-java-heap " + ( - if defined(picard_java_heap) - then picard_java_heap - else (round(mem_gb * picard_java_heap_factor) + "G") - )} + ~{"--picard-java-heap " + if defined(picard_java_heap) then picard_java_heap + else (round(mem_gb * picard_java_heap_factor) + "G")} if [ '~{redact_nodup_bam}' == 'true' ]; then python3 $(which encode_task_bam_to_pbam.py) \ @@ -2960,11 +2594,7 @@ task bam2ta { python3 $(which encode_task_bam2ta.py) \ ~{bam} \ --disable-tn5-shift \ - ~{( - if paired_end - then "--paired-end" - else "" - )} \ + ~{if paired_end then "--paired-end" else ""} \ ~{"--mito-chr-name " + mito_chr_name} \ ~{"--subsample " + subsample} \ ~{"--mem-gb " + samtools_mem_gb} \ @@ -3005,11 +2635,7 @@ task spr { python3 $(which encode_task_spr.py) \ ~{ta} \ ~{"--pseudoreplication-random-seed " + pseudoreplication_random_seed} \ - ~{( - if paired_end - then "--paired-end" - else "" - )} + ~{if paired_end then "--paired-end" else ""} >>> output { @@ -3085,11 +2711,7 @@ task xcor { set -e python3 $(which encode_task_xcor.py) \ ~{ta} \ - ~{( - if paired_end - then "--paired-end" - else "" - )} \ + ~{if paired_end then "--paired-end" else ""} \ ~{"--mito-chr-name " + mito_chr_name} \ ~{"--subsample " + subsample} \ ~{"--chip-seq-type " + chip_seq_type} \ @@ -3139,11 +2761,7 @@ task jsd { set -e python3 $(which encode_task_jsd.py) \ ~{sep=" " select_all(nodup_bams)} \ - ~{( - if length(ctl_bams) > 0 - then "--ctl-bam " + select_first(ctl_bams) - else "" - )} \ + ~{if length(ctl_bams) > 0 then "--ctl-bam " + select_first(ctl_bams) else ""} \ ~{"--mapq-thresh " + mapq_thresh} \ ~{"--blacklist " + blacklist} \ ~{"--nth " + cpu} @@ -3186,11 +2804,7 @@ task choose_ctl { --ctl-tas ~{sep=" " select_all(ctl_tas)} \ ~{"--ta-pooled " + ta_pooled} \ ~{"--ctl-ta-pooled " + ctl_ta_pooled} \ - ~{( - if always_use_pooled_ctl - then "--always-use-pooled-ctl" - else "" - )} \ + ~{if always_use_pooled_ctl then "--always-use-pooled-ctl" else ""} \ ~{"--ctl-depth-ratio " + ctl_depth_ratio} \ ~{"--ctl-depth-limit " + ctl_depth_limit} \ ~{"--exp-ctl-depth-ratio-limit " + exp_ctl_depth_ratio_limit} @@ -3267,11 +2881,7 @@ task subsample_ctl { python3 $(which encode_task_subsample_ctl.py) \ ~{ta} \ ~{"--subsample " + subsample} \ - ~{( - if paired_end - then "--paired-end" - else "" - )} \ + ~{if paired_end then "--paired-end" else ""} \ >>> output { @@ -3344,7 +2954,7 @@ task call_peak { ~{"--chrsz " + chrsz} \ ~{"--fraglen " + fraglen} \ ~{"--peak-type " + peak_type} \ - ~{"--blacklist " + blacklist} + ~{"--blacklist " + blacklist} >>> output { @@ -3362,11 +2972,7 @@ task call_peak { } runtime { - cpu: ( - if peak_caller == "macs2" - then 2 - else cpu - ) + cpu: if peak_caller == "macs2" then 2 else cpu memory: "~{mem_gb} GB" time: time_hr disks: "local-disk ~{disk_gb} SSD" @@ -3443,11 +3049,7 @@ task idr { command <<< set -e - ~{( - if defined(ta) - then "" - else "touch null.frip.qc" - )} + ~{if defined(ta) then "" else "touch null.frip.qc"} touch null python3 $(which encode_task_idr.py) \ ~{peak1} ~{peak2} ~{peak_pooled} \ @@ -3472,11 +3074,7 @@ task idr { File idr_plot = glob("*.txt.png")[0] File idr_unthresholded_peak = glob("*.txt.gz")[0] File idr_log = glob("*.idr*.log")[0] - File frip_qc = ( - if defined(ta) - then glob("*.frip.qc")[0] - else glob("null")[0] - ) + File frip_qc = if defined(ta) then glob("*.frip.qc")[0] else glob("null")[0] } runtime { @@ -3508,11 +3106,7 @@ task overlap { command <<< set -e - ~{( - if defined(ta) - then "" - else "touch null.frip.qc" - )} + ~{if defined(ta) then "" else "touch null.frip.qc"} touch null python3 $(which encode_task_overlap.py) \ ~{peak1} ~{peak2} ~{peak_pooled} \ @@ -3532,12 +3126,9 @@ task overlap { File bfilt_overlap_peak_bb = glob("*.bfilt." + peak_type + ".bb")[0] File bfilt_overlap_peak_starch = glob("*.bfilt." + peak_type + ".starch")[0] File bfilt_overlap_peak_hammock = glob("*.bfilt." + peak_type + ".hammock.gz*")[0] - File bfilt_overlap_peak_hammock_tbi = glob("*.bfilt." + peak_type + ".hammock.gz*")[1] - File frip_qc = ( - if defined(ta) - then glob("*.frip.qc")[0] - else glob("null")[0] - ) + File bfilt_overlap_peak_hammock_tbi = glob("*.bfilt." + peak_type + ".hammock.gz*" + )[1] + File frip_qc = if defined(ta) then glob("*.frip.qc")[0] else glob("null")[0] } runtime { @@ -3623,11 +3214,8 @@ task gc_bias { python3 $(which encode_task_gc_bias.py) \ ~{"--nodup-bam " + nodup_bam} \ ~{"--ref-fa " + ref_fa} \ - ~{"--picard-java-heap " + ( - if defined(picard_java_heap) - then picard_java_heap - else (round(mem_gb * picard_java_heap_factor) + "G") - )} + ~{"--picard-java-heap " + if defined(picard_java_heap) then picard_java_heap + else (round(mem_gb * picard_java_heap_factor) + "G")} >>> output { @@ -3723,11 +3311,7 @@ task qc_report { --ctl-paired-ends ~{sep=" " ctl_paired_ends} \ --pipeline-type ~{pipeline_type} \ --aligner ~{aligner} \ - ~{( - if (no_dup_removal) - then "--no-dup-removal " - else "" - )} \ + ~{if (no_dup_removal) then "--no-dup-removal " else ""} \ --peak-caller ~{peak_caller} \ ~{"--cap-num-peak " + cap_num_peak} \ --idr-thresh ~{idr_thresh} \ @@ -3824,51 +3408,21 @@ task read_genome_tsv { output { String? genome_name = read_string("genome_name") - String? ref_fa = ( - if size("ref_fa") == 0 - then null_s - else read_string("ref_fa") - ) - String? bwa_idx_tar = ( - if size("bwa_idx_tar") == 0 - then null_s - else read_string("bwa_idx_tar") - ) - String? bowtie2_idx_tar = ( - if size("bowtie2_idx_tar") == 0 - then null_s - else read_string("bowtie2_idx_tar") - ) - String? chrsz = ( - if size("chrsz") == 0 - then null_s - else read_string("chrsz") - ) - String? gensz = ( - if size("gensz") == 0 - then null_s - else read_string("gensz") - ) - String? blacklist = ( - if size("blacklist") == 0 - then null_s - else read_string("blacklist") - ) - String? blacklist2 = ( - if size("blacklist2") == 0 - then null_s - else read_string("blacklist2") - ) - String? mito_chr_name = ( - if size("mito_chr_name") == 0 - then null_s - else read_string("mito_chr_name") - ) - String? regex_bfilt_peak_chr_name = ( - if size("regex_bfilt_peak_chr_name") == 0 - then "chr[\\dXY]+" + String? ref_fa = if size("ref_fa") == 0 then null_s else read_string("ref_fa") + String? bwa_idx_tar = if size("bwa_idx_tar") == 0 then null_s else read_string("bwa_idx_tar" + ) + String? bowtie2_idx_tar = if size("bowtie2_idx_tar") == 0 then null_s else read_string( + "bowtie2_idx_tar") + String? chrsz = if size("chrsz") == 0 then null_s else read_string("chrsz") + String? gensz = if size("gensz") == 0 then null_s else read_string("gensz") + String? blacklist = if size("blacklist") == 0 then null_s else read_string("blacklist" + ) + String? blacklist2 = if size("blacklist2") == 0 then null_s else read_string("blacklist2" + ) + String? mito_chr_name = if size("mito_chr_name") == 0 then null_s else read_string( + "mito_chr_name") + String? regex_bfilt_peak_chr_name = if size("regex_bfilt_peak_chr_name") == 0 then "chr[\\dXY]+" else read_string("regex_bfilt_peak_chr_name") - ) } runtime { diff --git a/wdl-format/tests/format/if_then_else_exprs/source.formatted.wdl b/wdl-format/tests/format/if_then_else_exprs/source.formatted.wdl index 2042a3f76..c1dd2a1d1 100644 --- a/wdl-format/tests/format/if_then_else_exprs/source.formatted.wdl +++ b/wdl-format/tests/format/if_then_else_exprs/source.formatted.wdl @@ -7,17 +7,9 @@ workflow if_then_else_exprs { Int b } - Int c = ( - if (a < b) - then a - else b - ) + Int c = (if (a < b) then a else b) - Int d = ( - if (a < b) - then a - else b - ) + Int d = if (a < b) then a else b output { Int result = c diff --git a/wdl-format/tests/format/interrupt_example/source.formatted.wdl b/wdl-format/tests/format/interrupt_example/source.formatted.wdl index 4797ab7c7..492123837 100644 --- a/wdl-format/tests/format/interrupt_example/source.formatted.wdl +++ b/wdl-format/tests/format/interrupt_example/source.formatted.wdl @@ -9,5 +9,4 @@ workflow meta # interrupt { # how far should this bracket be indented? } - } diff --git a/wdl-format/tests/format/seaseq-case/source.formatted.wdl b/wdl-format/tests/format/seaseq-case/source.formatted.wdl index 812ee0761..aa1ccd2b5 100644 --- a/wdl-format/tests/format/seaseq-case/source.formatted.wdl +++ b/wdl-format/tests/format/seaseq-case/source.formatted.wdl @@ -280,44 +280,30 @@ workflow seaseq { ### ------------------------------------------------- ### # if multiple fastqfiles are provided - Boolean multi_fastq = ( - if length(original_fastqfiles) > 1 - then true - else false - ) - Boolean one_fastq = ( - if length(original_fastqfiles) == 1 - then true - else false - ) + Boolean multi_fastq = if length(original_fastqfiles) > 1 then true else false + Boolean one_fastq = if length(original_fastqfiles) == 1 then true else false if (defined(spikein_bowtie_index) || defined(spikein_reference)) { scatter (eachfastq in original_fastqfiles) { call fastqc.fastqc as spikein_indv_fastqc { input: inputfile = eachfastq, - default_location = ( - if (one_fastq) - then sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/FastQC" - else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/FastQC" - ), + default_location = if (one_fastq) then sub(basename(eachfastq), ".fastq.gz|.fq.gz", + "") + "/SpikeIn/FastQC" else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", + "") + "/SpikeIn/FastQC", } call util.basicfastqstats as spikein_indv_bfs { input: fastqfile = eachfastq, - default_location = ( - if (one_fastq) - then sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" - else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" - ), + default_location = if (one_fastq) then sub(basename(eachfastq), ".fastq.gz|.fq.gz", + "") + "/SpikeIn/SummaryStats" else "SAMPLE/" + sub(basename(eachfastq), + ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats", } call bowtie.spikein_SE as spikein_indv_map { input: fastqfile = eachfastq, index_files = actual_spikein_bowtie_index, metricsfile = spikein_indv_bfs.metrics_out, - default_location = ( - if (one_fastq) - then sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" - else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" - ), + default_location = if (one_fastq) then sub(basename(eachfastq), ".fastq.gz|.fq.gz", + "") + "/SpikeIn/SummaryStats" else "SAMPLE/" + sub(basename(eachfastq), + ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats", } } @@ -349,12 +335,14 @@ workflow seaseq { call fastqc.fastqc as indv_fastqc { input: inputfile = eachfastq, - default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/QC/FastQC", + default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", + "") + "/QC/FastQC", } call util.basicfastqstats as indv_bfs { input: fastqfile = eachfastq, - default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/QC/SummaryStats", + default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", + "") + "/QC/SummaryStats", } call mapping.mapping as indv_mapping { input: @@ -362,12 +350,14 @@ workflow seaseq { index_files = actual_bowtie_index, metricsfile = indv_bfs.metrics_out, blacklist = blacklist, - default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/BAM_files", + default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", + "") + "/BAM_files", } call fastqc.fastqc as indv_bamfqc { input: inputfile = indv_mapping.sorted_bam, - default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/QC/FastQC", + default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", + "") + "/QC/FastQC", } call runspp.runspp as indv_runspp { input: bamfile = select_first([ @@ -389,7 +379,8 @@ workflow seaseq { rmdupflag = indv_mapping.mkdup_stats, bkflag = indv_mapping.bklist_stats, fastqmetrics = indv_bfs.metrics_out, - default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/QC/SummaryStats", + default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", + "") + "/QC/SummaryStats", } } # end scatter (for each sample fastq) @@ -410,16 +401,10 @@ workflow seaseq { call samtools.mergebam { input: bamfiles = indv_mapping.sorted_bam, metricsfiles = indv_bfs.metrics_out, - default_location = ( - if defined(results_name) - then results_name + "/BAM_files" - else "AllMerge_" + length(indv_mapping.sorted_bam) + "_mapped" + "/BAM_files" - ), - outputfile = ( - if defined(results_name) - then results_name + ".sorted.bam" - else "AllMerge_" + length(fastqfiles) + "_mapped.sorted.bam" - ), + default_location = if defined(results_name) then results_name + "/BAM_files" + else "AllMerge_" + length(indv_mapping.sorted_bam) + "_mapped" + "/BAM_files", + outputfile = if defined(results_name) then results_name + ".sorted.bam" else "AllMerge_" + + length(fastqfiles) + "_mapped.sorted.bam", } call fastqc.fastqc as mergebamfqc { input: @@ -545,27 +530,34 @@ workflow seaseq { pvalue = "1e-9", keep_dup = "auto", egs = egs.genomesize, - default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS/NARROW_peaks" + "/" + basename(sample_bam, ".bam") + "-p9_kd-auto", - coverage_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + "/" + basename(sample_bam, ".bam") + "_p9_kd-auto", + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS/NARROW_peaks" + + "/" + basename(sample_bam, ".bam") + "-p9_kd-auto", + coverage_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + + "/" + basename(sample_bam, ".bam") + "_p9_kd-auto", } - call util.addreadme { input: default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS" } + call util.addreadme { input: default_location = sub(basename(sample_bam), ".sorted.b.*$", + "") + "/PEAKS" } call macs.macs as all { input: bamfile = sample_bam, pvalue = "1e-9", keep_dup = "all", egs = egs.genomesize, - default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS/NARROW_peaks" + "/" + basename(sample_bam, ".bam") + "-p9_kd-all", - coverage_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + "/" + basename(sample_bam, ".bam") + "_p9_kd-all", + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS/NARROW_peaks" + + "/" + basename(sample_bam, ".bam") + "-p9_kd-all", + coverage_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + + "/" + basename(sample_bam, ".bam") + "_p9_kd-all", } call macs.macs as nomodel { input: bamfile = sample_bam, nomodel = true, egs = egs.genomesize, - default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS/NARROW_peaks" + "/" + basename(sample_bam, ".bam") + "-nm", - coverage_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + "/" + basename(sample_bam, ".bam") + "_nm", + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS/NARROW_peaks" + + "/" + basename(sample_bam, ".bam") + "-nm", + coverage_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + + "/" + basename(sample_bam, ".bam") + "_nm", } call bamtogff.bamtogff { input: @@ -621,7 +613,8 @@ workflow seaseq { bedfile = macs.peakbedfile, chromsizes = samtools_faidx.chromsizes, summitfile = macs.summitsfile, - default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS_Annotation/NARROW_peaks" + "/" + sub(basename(macs.peakbedfile), "_peaks.bed", ""), + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS_Annotation/NARROW_peaks" + + "/" + sub(basename(macs.peakbedfile), "_peaks.bed", ""), } call util.peaksanno as all_peaksanno { input: @@ -629,7 +622,8 @@ workflow seaseq { bedfile = all.peakbedfile, chromsizes = samtools_faidx.chromsizes, summitfile = all.summitsfile, - default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS_Annotation/NARROW_peaks" + "/" + sub(basename(all.peakbedfile), "_peaks.bed", ""), + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS_Annotation/NARROW_peaks" + + "/" + sub(basename(all.peakbedfile), "_peaks.bed", ""), } call util.peaksanno as nomodel_peaksanno { input: @@ -637,7 +631,8 @@ workflow seaseq { bedfile = nomodel.peakbedfile, chromsizes = samtools_faidx.chromsizes, summitfile = nomodel.summitsfile, - default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS_Annotation/NARROW_peaks" + "/" + sub(basename(nomodel.peakbedfile), "_peaks.bed", ""), + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS_Annotation/NARROW_peaks" + + "/" + sub(basename(nomodel.peakbedfile), "_peaks.bed", ""), } call util.peaksanno as sicer_peaksanno { input: @@ -675,21 +670,24 @@ workflow seaseq { wigfile = macs.wigfile, chromsizes = samtools_faidx.chromsizes, xlsfile = macs.peakxlsfile, - default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + "/" + sub(basename(macs.peakbedfile), "_peaks.bed", ""), + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + + "/" + sub(basename(macs.peakbedfile), "_peaks.bed", ""), } call viz.visualization as vizall { input: wigfile = all.wigfile, chromsizes = samtools_faidx.chromsizes, xlsfile = all.peakxlsfile, - default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + "/" + sub(basename(all.peakbedfile), "_peaks.bed", ""), + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + + "/" + sub(basename(all.peakbedfile), "_peaks.bed", ""), } call viz.visualization as viznomodel { input: wigfile = nomodel.wigfile, chromsizes = samtools_faidx.chromsizes, xlsfile = nomodel.peakxlsfile, - default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + "/" + sub(basename(nomodel.peakbedfile), "_peaks.bed", ""), + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + + "/" + sub(basename(nomodel.peakbedfile), "_peaks.bed", ""), } call viz.visualization as vizsicer { input: