From 95fd4448b5635af4ec243b5d626c82af504fcab3 Mon Sep 17 00:00:00 2001 From: Andrew Frantz Date: Wed, 15 Jan 2025 12:55:53 -0500 Subject: [PATCH] format: maximum/None (240 or zero) --- data_structures/flag_filter.wdl | 12 +- data_structures/read_group.wdl | 129 +++++-------- tools/arriba.wdl | 39 ++-- tools/bwa.wdl | 45 ++--- tools/estimate.wdl | 3 +- tools/fq.wdl | 24 +-- tools/gatk4.wdl | 55 ++---- tools/htseq.wdl | 24 +-- tools/kraken2.wdl | 27 +-- tools/librarian.wdl | 9 +- tools/mosdepth.wdl | 3 +- tools/multiqc.wdl | 3 +- tools/ngsderive.wdl | 12 +- tools/picard.wdl | 50 ++---- tools/qualimap.wdl | 4 +- tools/sambamba.wdl | 9 +- tools/samtools.wdl | 94 ++++------ tools/star.wdl | 169 ++++++------------ tools/util.wdl | 33 ++-- workflows/chipseq/chipseq-standard.wdl | 36 ++-- workflows/dnaseq/dnaseq-core.wdl | 27 +-- workflows/dnaseq/dnaseq-standard-fastq.wdl | 12 +- workflows/dnaseq/dnaseq-standard.wdl | 9 +- workflows/general/alignment-post.wdl | 9 +- workflows/general/bam-to-fastqs.wdl | 6 +- workflows/general/samtools-merge.wdl | 6 +- workflows/methylation/methylation-cohort.wdl | 33 ++-- .../methylation/methylation-standard.wdl | 7 +- workflows/qc/markdups-post.wdl | 3 +- workflows/qc/quality-check-standard.wdl | 64 +++---- workflows/reference/bwa-db-build.wdl | 3 +- workflows/reference/gatk-reference.wdl | 12 +- workflows/rnaseq/ESTIMATE.wdl | 3 +- workflows/rnaseq/rnaseq-core.wdl | 12 +- workflows/rnaseq/rnaseq-standard-fastq.wdl | 18 +- workflows/rnaseq/rnaseq-standard.wdl | 6 +- workflows/rnaseq/rnaseq-variant-calling.wdl | 3 +- 37 files changed, 331 insertions(+), 682 deletions(-) diff --git a/data_structures/flag_filter.wdl b/data_structures/flag_filter.wdl index 84489824..4a06e8a9 100644 --- a/data_structures/flag_filter.wdl +++ b/data_structures/flag_filter.wdl @@ -140,14 +140,10 @@ workflow validate_flag_filter { FlagFilter flags } - call validate_string_is_12bit_oct_dec_or_hex as validate_include_if_any { - input: number = flags.include_if_any } - call validate_string_is_12bit_oct_dec_or_hex as validate_include_if_all { - input: number = flags.include_if_all } - call validate_string_is_12bit_oct_dec_or_hex as validate_exclude_if_any { - input: number = flags.exclude_if_any } - call validate_string_is_12bit_oct_dec_or_hex as validate_exclude_if_all { - input: number = flags.exclude_if_all } + call validate_string_is_12bit_oct_dec_or_hex as validate_include_if_any { input: number = flags.include_if_any } + call validate_string_is_12bit_oct_dec_or_hex as validate_include_if_all { input: number = flags.include_if_all } + call validate_string_is_12bit_oct_dec_or_hex as validate_exclude_if_any { input: number = flags.exclude_if_any } + call validate_string_is_12bit_oct_dec_or_hex as validate_exclude_if_all { input: number = flags.exclude_if_all } output { String check = "passed" diff --git a/data_structures/read_group.wdl b/data_structures/read_group.wdl index 481ee3f9..b99d98b4 100644 --- a/data_structures/read_group.wdl +++ b/data_structures/read_group.wdl @@ -81,38 +81,24 @@ task read_group_to_string { command <<< { echo -n "~{"ID:~{read_group.ID}"}" # required field. All others optional - echo -n "~{if defined(read_group.BC) then " BC:~{ - read_group.BC}" else ""}" - echo -n "~{if defined(read_group.CN) then " CN:~{ - read_group.CN}" else ""}" - echo -n "~{if defined(read_group.DS) then " DS:~{ - read_group.DS}" else ""}" - echo -n "~{if defined(read_group.DT) then " DT:~{ - read_group.DT}" else ""}" - echo -n "~{if defined(read_group.FO) then " FO:~{ - read_group.FO}" else ""}" - echo -n "~{if defined(read_group.KS) then " KS:~{ - read_group.KS}" else ""}" - echo -n "~{if defined(read_group.LB) then " LB:~{ - read_group.LB}" else ""}" - echo -n "~{if defined(read_group.PG) then " PG:~{ - read_group.PG}" else ""}" - echo -n "~{if defined(read_group.PI) then " PI:~{ - read_group.PI}" else ""}" - echo -n "~{if defined(read_group.PL) then " PL:~{ - read_group.PL}" else ""}" - echo -n "~{if defined(read_group.PM) then " PM:~{ - read_group.PM}" else ""}" - echo -n "~{if defined(read_group.PU) then " PU:~{ - read_group.PU}" else ""}" - echo "~{if defined(read_group.SM) then " SM:~{ - read_group.SM}" else ""}" + echo -n "~{if defined(read_group.BC) then " BC:~{read_group.BC}" else ""}" + echo -n "~{if defined(read_group.CN) then " CN:~{read_group.CN}" else ""}" + echo -n "~{if defined(read_group.DS) then " DS:~{read_group.DS}" else ""}" + echo -n "~{if defined(read_group.DT) then " DT:~{read_group.DT}" else ""}" + echo -n "~{if defined(read_group.FO) then " FO:~{read_group.FO}" else ""}" + echo -n "~{if defined(read_group.KS) then " KS:~{read_group.KS}" else ""}" + echo -n "~{if defined(read_group.LB) then " LB:~{read_group.LB}" else ""}" + echo -n "~{if defined(read_group.PG) then " PG:~{read_group.PG}" else ""}" + echo -n "~{if defined(read_group.PI) then " PI:~{read_group.PI}" else ""}" + echo -n "~{if defined(read_group.PL) then " PL:~{read_group.PL}" else ""}" + echo -n "~{if defined(read_group.PM) then " PM:~{read_group.PM}" else ""}" + echo -n "~{if defined(read_group.PU) then " PU:~{read_group.PU}" else ""}" + echo "~{if defined(read_group.SM) then " SM:~{read_group.SM}" else ""}" } > out.txt >>> output { - String stringified_read_group = read_string("out.txt" - ) + String stringified_read_group = read_string("out.txt") } runtime { @@ -164,8 +150,7 @@ task get_read_groups { >>> output { - Array[ReadGroup] read_groups = read_json("read_groups.json" - ) + Array[ReadGroup] read_groups = read_json("read_groups.json") } runtime { @@ -225,15 +210,13 @@ task validate_read_group { || [[ ~{read_group.ID} =~ ~{restrictive_pattern} ]] then >&2 echo "ID (~{read_group.ID}) must not match patterns:" - >&2 echo "'~{id_pattern}' or '~{ - restrictive_pattern}'" + >&2 echo "'~{id_pattern}' or '~{restrictive_pattern}'" error=1 fi fi if [[ ! "~{read_group.ID}" =~ ^~{sam_spec_pattern}$ ]] then - >&2 echo "ID must match pattern ~{ - sam_spec_pattern}" + >&2 echo "ID must match pattern ~{sam_spec_pattern}" error=1 fi if [ $(echo "~{sep(" ", required_fields)}" | grep -Ewc "SM") -eq 1 ] @@ -248,22 +231,17 @@ task validate_read_group { then if [[ ~{restrictive} == "true" ]] then - if [[ "~{read_group.SM}" =~ ^~{ - sample_pattern}$ ]] \ - || [[ "~{read_group.SM}" =~ ~{ - restrictive_pattern} ]] + if [[ "~{read_group.SM}" =~ ^~{sample_pattern}$ ]] \ + || [[ "~{read_group.SM}" =~ ~{restrictive_pattern} ]] then >&2 echo "SM must not match patterns:" - >&2 echo "'~{sample_pattern}' or '~{ - restrictive_pattern}'" + >&2 echo "'~{sample_pattern}' or '~{restrictive_pattern}'" error=1 fi fi - if [[ ! "~{read_group.SM}" =~ ^~{ - sam_spec_pattern}$ ]] + if [[ ! "~{read_group.SM}" =~ ^~{sam_spec_pattern}$ ]] then - >&2 echo "SM must match pattern ~{ - sam_spec_pattern}" + >&2 echo "SM must match pattern ~{sam_spec_pattern}" error=1 fi fi @@ -277,11 +255,9 @@ task validate_read_group { fi if [ "~{defined(read_group.BC)}" == "true" ] then - if [[ ! "~{read_group.BC}" =~ ^~{ - sam_spec_pattern}$ ]] + if [[ ! "~{read_group.BC}" =~ ^~{sam_spec_pattern}$ ]] then - >&2 echo "BC must match pattern ~{ - sam_spec_pattern}" + >&2 echo "BC must match pattern ~{sam_spec_pattern}" error=1 fi fi @@ -295,11 +271,9 @@ task validate_read_group { fi if [ "~{defined(read_group.CN)}" == "true" ] then - if [[ ! "~{read_group.CN}" =~ ^~{ - sam_spec_pattern}$ ]] + if [[ ! "~{read_group.CN}" =~ ^~{sam_spec_pattern}$ ]] then - >&2 echo "CN must match pattern ~{ - sam_spec_pattern}" + >&2 echo "CN must match pattern ~{sam_spec_pattern}" error=1 fi fi @@ -313,11 +287,9 @@ task validate_read_group { fi if [ "~{defined(read_group.DS)}" == "true" ] then - if [[ ! "~{read_group.DS}" =~ ^~{ - sam_spec_pattern}$ ]] + if [[ ! "~{read_group.DS}" =~ ^~{sam_spec_pattern}$ ]] then - >&2 echo "DS must match pattern ~{ - sam_spec_pattern}" + >&2 echo "DS must match pattern ~{sam_spec_pattern}" error=1 fi fi @@ -331,11 +303,9 @@ task validate_read_group { fi if [ "~{defined(read_group.DT)}" == "true" ] then - if [[ ! "~{read_group.DT}" =~ ^~{ - sam_spec_pattern}$ ]] + if [[ ! "~{read_group.DT}" =~ ^~{sam_spec_pattern}$ ]] then - >&2 echo "DT must match pattern ~{ - sam_spec_pattern}" + >&2 echo "DT must match pattern ~{sam_spec_pattern}" error=1 fi fi @@ -357,8 +327,7 @@ task validate_read_group { fi if [ $(echo "~{sep(" ", required_fields)}" | grep -Ewc "KS") -eq 1 ] then - if [ -z "~{if defined(read_group.KS) then read_group.KS - else ""}" ] + if [ -z "~{if defined(read_group.KS) then read_group.KS else ""}" ] then >&2 echo "KS is required" error=1 @@ -366,11 +335,9 @@ task validate_read_group { fi if [ "~{defined(read_group.KS)}" == "true" ] then - if [[ ! "~{read_group.KS}" =~ ^~{ - sam_spec_pattern}$ ]] + if [[ ! "~{read_group.KS}" =~ ^~{sam_spec_pattern}$ ]] then - >&2 echo "KS must match pattern ~{ - sam_spec_pattern}" + >&2 echo "KS must match pattern ~{sam_spec_pattern}" error=1 fi fi @@ -384,11 +351,9 @@ task validate_read_group { fi if [ "~{defined(read_group.LB)}" == "true" ] then - if [[ ! "~{read_group.LB}" =~ ^~{ - sam_spec_pattern}$ ]] + if [[ ! "~{read_group.LB}" =~ ^~{sam_spec_pattern}$ ]] then - >&2 echo "LB must match pattern ~{ - sam_spec_pattern}" + >&2 echo "LB must match pattern ~{sam_spec_pattern}" error=1 fi fi @@ -402,11 +367,9 @@ task validate_read_group { fi if [ "~{defined(read_group.PG)}" == "true" ] then - if [[ ! "~{read_group.PG}" =~ ^~{ - sam_spec_pattern}$ ]] + if [[ ! "~{read_group.PG}" =~ ^~{sam_spec_pattern}$ ]] then - >&2 echo "PG must match pattern ~{ - sam_spec_pattern}" + >&2 echo "PG must match pattern ~{sam_spec_pattern}" error=1 fi fi @@ -436,11 +399,9 @@ task validate_read_group { fi if [ "~{defined(read_group.PL)}" == "true" ] then - if [[ ! "~{read_group.PL}" =~ ^~{sep("|", platforms - )}$ ]] + if [[ ! "~{read_group.PL}" =~ ^~{sep("|", platforms)}$ ]] then - >&2 echo "PL must match pattern ~{sep("|", platforms - )}" + >&2 echo "PL must match pattern ~{sep("|", platforms)}" error=1 fi fi @@ -454,11 +415,9 @@ task validate_read_group { fi if [ "~{defined(read_group.PM)}" == "true" ] then - if [[ ! "~{read_group.PM}" =~ ^~{ - sam_spec_pattern}$ ]] + if [[ ! "~{read_group.PM}" =~ ^~{sam_spec_pattern}$ ]] then - >&2 echo "PM must match pattern ~{ - sam_spec_pattern}" + >&2 echo "PM must match pattern ~{sam_spec_pattern}" error=1 fi fi @@ -472,11 +431,9 @@ task validate_read_group { fi if [ "~{defined(read_group.PU)}" == "true" ] then - if [[ ! "~{read_group.PU}" =~ ^~{ - sam_spec_pattern}$ ]] + if [[ ! "~{read_group.PU}" =~ ^~{sam_spec_pattern}$ ]] then - >&2 echo "PU must match pattern ~{ - sam_spec_pattern}" + >&2 echo "PU must match pattern ~{sam_spec_pattern}" error=1 fi fi diff --git a/tools/arriba.wdl b/tools/arriba.wdl index 1689a24c..ad5d1a02 100644 --- a/tools/arriba.wdl +++ b/tools/arriba.wdl @@ -202,38 +202,28 @@ task arriba { } Int bam_size_gb = ceil(size(bam, "GiB")) - Int disk_size_gb = bam_size_gb + ceil(size(gtf, "GiB")) - + ceil(size(reference_fasta_gz, "GiB")) + modify_disk_size_gb + Int disk_size_gb = bam_size_gb + ceil(size(gtf, "GiB")) + ceil(size(reference_fasta_gz, "GiB")) + modify_disk_size_gb Int memory_gb = bam_size_gb + modify_memory_gb command <<< arriba \ -x ~{bam} \ - ~{if defined(chimeric_sam) then "-c " + chimeric_sam - else ""} \ + ~{if defined(chimeric_sam) then "-c " + chimeric_sam else ""} \ -o ~{prefix}.tsv \ -O ~{prefix}.discarded.tsv \ -a ~{reference_fasta_gz} \ -g ~{gtf} \ -G "~{feature_name}" \ - ~{if defined(exclude_list) then "-b " + exclude_list - else ""} \ - ~{if defined(known_fusions) then "-k " + known_fusions - else ""} \ - ~{if defined(annotate_fusions) then "-t " + annotate_fusions - else ""} \ - ~{if defined(protein_domains) then "-p " + protein_domains - else ""} \ - ~{if defined(wgs_svs) then "-d " + wgs_svs else "" - } \ + ~{if defined(exclude_list) then "-b " + exclude_list else ""} \ + ~{if defined(known_fusions) then "-k " + known_fusions else ""} \ + ~{if defined(annotate_fusions) then "-t " + annotate_fusions else ""} \ + ~{if defined(protein_domains) then "-p " + protein_domains else ""} \ + ~{if defined(wgs_svs) then "-d " + wgs_svs else ""} \ -D ~{max_genomic_breakpoint_distance} \ -s ~{strandedness} \ - ~{(if length(interesting_contigs) > 0 then "-i " - + sep(",", interesting_contigs) else "")} \ - ~{if length(viral_contigs) > 0 then "-v " + sep( - ",", viral_contigs) else ""} \ - ~{(if length(disable_filters) > 0 then "-f " + sep( - ",", disable_filters) else "")} \ + ~{(if length(interesting_contigs) > 0 then "-i " + sep(",", interesting_contigs) else "")} \ + ~{if length(viral_contigs) > 0 then "-v " + sep(",", viral_contigs) else ""} \ + ~{(if length(disable_filters) > 0 then "-f " + sep(",", disable_filters) else "")} \ -E ~{max_e_value} \ -S ~{min_supporting_reads} \ -m ~{max_mismappers} \ @@ -254,8 +244,7 @@ task arriba { -z ~{min_itd_allele_fraction} \ -Z ~{min_itd_supporting_reads} \ ~{if mark_duplicates then "" else "-u"} \ - ~{if report_additional_columns then "-X" else "" - } \ + ~{if report_additional_columns then "-X" else ""} \ ~{if fill_gaps then "-I" else ""} >>> @@ -296,8 +285,7 @@ task arriba_tsv_to_vcf { } Int input_size_gb = ceil(size(fusions, "GiB")) - Int disk_size_gb = ceil(input_size_gb) + (ceil(size( - reference_fasta, "GiB")) * 3) + modify_disk_size_gb + Int disk_size_gb = ceil(input_size_gb) + (ceil(size(reference_fasta, "GiB")) * 3) + modify_disk_size_gb command <<< set -euo pipefail @@ -362,8 +350,7 @@ task arriba_extract_fusion_supporting_alignments { output { Array[File] fusion_bams = glob("~{prefix}_*.bam") - Array[File] fusion_bam_indexes = glob("~{prefix}_*.bam.bai" - ) + Array[File] fusion_bam_indexes = glob("~{prefix}_*.bam.bai") } runtime { diff --git a/tools/bwa.wdl b/tools/bwa.wdl index b5b7a157..5c516e32 100644 --- a/tools/bwa.wdl +++ b/tools/bwa.wdl @@ -32,8 +32,7 @@ task bwa_aln { input { File fastq File bwa_db_tar_gz - String prefix = sub(basename(fastq), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "") + String prefix = sub(basename(fastq), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", "") String read_group = "" Boolean use_all_cores = false Int ncpu = 2 @@ -44,8 +43,7 @@ task bwa_aln { Float input_fastq_size = size(fastq, "GiB") Float reference_size = size(bwa_db_tar_gz, "GiB") - Int disk_size_gb = (ceil((input_fastq_size + reference_size - ) * 2) + 10 + modify_disk_size_gb) + Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb) command <<< set -euo pipefail @@ -64,8 +62,7 @@ task bwa_aln { bwa aln -t "$n_cores" bwa_db/"$PREFIX" ~{fastq} > sai bwa samse \ - ~{if read_group != "" then "-r '" + read_group + "'" - else ""} \ + ~{if read_group != "" then "-r '" + read_group + "'" else ""} \ bwa_db/"$PREFIX" \ sai \ ~{fastq} \ @@ -126,8 +123,7 @@ task bwa_aln_pe { File read_one_fastq_gz File read_two_fastq_gz File bwa_db_tar_gz - String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "") + String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", "") String read_group = "" Boolean use_all_cores = false Int ncpu = 4 @@ -136,11 +132,9 @@ task bwa_aln_pe { String output_bam = prefix + ".bam" - Float input_fastq_size = (size(read_one_fastq_gz, "GiB") - + size(read_two_fastq_gz, "GiB")) + Float input_fastq_size = (size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB")) Float reference_size = size(bwa_db_tar_gz, "GiB") - Int disk_size_gb = (ceil((input_fastq_size + reference_size - ) * 2) + 10 + modify_disk_size_gb) + Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb) command <<< set -euo pipefail @@ -160,15 +154,11 @@ task bwa_aln_pe { ln -s ~{read_two_fastq_gz} bwa sampe \ - ~{if read_group != "" then "-r '" + read_group + "'" - else ""} \ + ~{if read_group != "" then "-r '" + read_group + "'" else ""} \ bwa_db/"$PREFIX" \ - <(bwa aln -t "$n_cores" bwa_db/"$PREFIX" ~{ - basename(read_one_fastq_gz)}) \ - <(bwa aln -t "$n_cores" bwa_db/"$PREFIX" ~{ - basename(read_two_fastq_gz)}) \ - ~{basename(read_one_fastq_gz)} ~{basename( - read_two_fastq_gz)} \ + <(bwa aln -t "$n_cores" bwa_db/"$PREFIX" ~{basename(read_one_fastq_gz)}) \ + <(bwa aln -t "$n_cores" bwa_db/"$PREFIX" ~{basename(read_two_fastq_gz)}) \ + ~{basename(read_one_fastq_gz)} ~{basename(read_two_fastq_gz)} \ | samtools view --no-PG --threads "$samtools_cores" -hb - \ > ~{output_bam} @@ -220,8 +210,7 @@ task bwa_mem { File read_one_fastq_gz File bwa_db_tar_gz File? read_two_fastq_gz - String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "") + String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", "") String read_group = "" Boolean use_all_cores = false Int ncpu = 4 @@ -230,11 +219,9 @@ task bwa_mem { String output_bam = prefix + ".bam" - Float input_fastq_size = size(read_one_fastq_gz, "GiB") - + size(read_two_fastq_gz, "GiB") + Float input_fastq_size = size(read_one_fastq_gz, "GiB") + size(read_two_fastq_gz, "GiB") Float reference_size = size(bwa_db_tar_gz, "GiB") - Int disk_size_gb = (ceil((input_fastq_size + reference_size - ) * 2) + 10 + modify_disk_size_gb) + Int disk_size_gb = (ceil((input_fastq_size + reference_size) * 2) + 10 + modify_disk_size_gb) File read_two_file = select_first([ read_two_fastq_gz, @@ -256,13 +243,11 @@ task bwa_mem { PREFIX=$(basename bwa_db/*.ann ".ann") ln -sf ~{read_one_fastq_gz} - ~{if defined(read_two_fastq_gz) then "ln -sf " + read_two_fastq_gz - + "" else ""} + ~{if defined(read_two_fastq_gz) then "ln -sf " + read_two_fastq_gz + "" else ""} bwa mem \ -t "$n_cores" \ - ~{if read_group != "" then "-R '" + read_group + "'" - else ""} \ + ~{if read_group != "" then "-R '" + read_group + "'" else ""} \ bwa_db/"$PREFIX" \ ~{basename(read_one_fastq_gz)} \ ~{basename(read_two_file)} \ diff --git a/tools/estimate.wdl b/tools/estimate.wdl index 5d69fc74..7694f44d 100644 --- a/tools/estimate.wdl +++ b/tools/estimate.wdl @@ -21,8 +21,7 @@ task run_estimate { input { File gene_expression_file - String outfile_name = (basename(gene_expression_file, - ".TPM.txt") + ".ESTIMATE.gct") + String outfile_name = (basename(gene_expression_file, ".TPM.txt") + ".ESTIMATE.gct") Int memory_gb = 4 Int disk_size_gb = 10 Int max_retries = 1 diff --git a/tools/fq.wdl b/tools/fq.wdl index c8704022..eefbc5ff 100755 --- a/tools/fq.wdl +++ b/tools/fq.wdl @@ -70,19 +70,15 @@ task fqlint { Float read1_size = size(read_one_fastq, "GiB") Float read2_size = size(read_two_fastq, "GiB") - Int memory_gb = (ceil((read1_size + read2_size) * 0.25) - + 4 + modify_memory_gb) + Int memory_gb = (ceil((read1_size + read2_size) * 0.25) + 4 + modify_memory_gb) Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb command <<< fq lint \ - ~{sep(" ", prefix("--disable-validator ", disable_validator_codes - ))} \ - --single-read-validation-level ~{ - single_read_validation_level} \ - --paired-read-validation-level ~{ - paired_read_validation_level} \ + ~{sep(" ", prefix("--disable-validator ", disable_validator_codes))} \ + --single-read-validation-level ~{single_read_validation_level} \ + --paired-read-validation-level ~{paired_read_validation_level} \ --lint-mode ~{if panic then "panic" else "log"} \ ~{read_one_fastq} \ ~{read_two_fastq} @@ -127,8 +123,7 @@ task subsample { input { File read_one_fastq File? read_two_fastq - String prefix = sub(basename(read_one_fastq), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "") + String prefix = sub(basename(read_one_fastq), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", "") Float probability = 1.0 Int record_count = -1 Int modify_disk_size_gb = 0 @@ -139,10 +134,8 @@ task subsample { Int disk_size_gb = ceil((read1_size + read2_size) * 2) + modify_disk_size_gb - String probability_arg = (if (probability < 1.0 && probability - > 0) then "-p ~{probability}" else "") - String record_count_arg = if (record_count > 0) then "-n ~{ - record_count}" else "" + String probability_arg = (if (probability < 1.0 && probability > 0) then "-p ~{probability}" else "") + String record_count_arg = if (record_count > 0) then "-n ~{record_count}" else "" String r1_dst = prefix + ".R1.subsampled.fastq.gz" String r2_dst = prefix + ".R2.subsampled.fastq.gz" @@ -152,8 +145,7 @@ task subsample { ~{probability_arg} \ ~{record_count_arg} \ --r1-dst ~{r1_dst} \ - ~{(if defined(read_two_fastq) then "--r2-dst " + r2_dst - else "")} \ + ~{(if defined(read_two_fastq) then "--r2-dst " + r2_dst else "")} \ ~{read_one_fastq} \ ~{read_two_fastq} >>> diff --git a/tools/gatk4.wdl b/tools/gatk4.wdl index 7fef0ed5..a612b891 100644 --- a/tools/gatk4.wdl +++ b/tools/gatk4.wdl @@ -37,8 +37,7 @@ task split_n_cigar_reads { Int ncpu = 8 } - Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 + ceil( - size(fasta, "GB")) + modify_disk_size_gb + Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) command <<< @@ -115,24 +114,20 @@ task base_recalibrator { Int ncpu = 4 } - Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 + ceil( - size(fasta, "GB")) + modify_disk_size_gb + Int disk_size_gb = ceil(size(bam, "GB") + 1) * 3 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) #@ except: LineWidth command <<< gatk \ - --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms4000m -Xmx~{ - java_heap_size}g" \ + --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms4000m -Xmx~{java_heap_size}g" \ BaseRecalibratorSpark \ -R ~{fasta} \ -I ~{bam} \ - ~{(if use_original_quality_scores then "--use-original-qualities" - else "")} \ + ~{(if use_original_quality_scores then "--use-original-qualities" else "")} \ -O ~{outfile_name} \ --known-sites ~{dbSNP_vcf} \ - ~{sep(" ", prefix("--known-sites ", known_indels_sites_vcfs - ))} \ + ~{sep(" ", prefix("--known-sites ", known_indels_sites_vcfs))} \ --spark-master local[~{ncpu}] >>> @@ -189,13 +184,11 @@ task apply_bqsr { set -euo pipefail gatk \ - --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms3000m -Xmx~{ - java_heap_size}g" \ + --java-options "-XX:GCTimeLimit=50 -XX:GCHeapFreeLimit=10 -Xms3000m -Xmx~{java_heap_size}g" \ ApplyBQSRSpark \ --spark-master local[~{ncpu}] \ -I ~{bam} \ - ~{if use_original_quality_scores then "--use-original-qualities" - else ""} \ + ~{if use_original_quality_scores then "--use-original-qualities" else ""} \ -O ~{prefix}.bqsr.bam \ --bqsr-recal-file ~{recalibration_report} >>> @@ -266,8 +259,7 @@ task haplotype_caller { Int ncpu = 4 } - Int disk_size_gb = ceil(size(bam, "GB") * 2) + 30 + ceil( - size(fasta, "GB")) + modify_disk_size_gb + Int disk_size_gb = ceil(size(bam, "GB") * 2) + 30 + ceil(size(fasta, "GB")) + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) #@ except: LineWidth @@ -279,10 +271,8 @@ task haplotype_caller { -I ~{bam} \ -L ~{interval_list} \ -O ~{prefix}.vcf.gz \ - ~{if use_soft_clipped_bases then "" else "--dont-use-soft-clipped-bases" - } \ - --standard-min-confidence-threshold-for-calling ~{ - stand_call_conf} \ + ~{if use_soft_clipped_bases then "" else "--dont-use-soft-clipped-bases"} \ + --standard-min-confidence-threshold-for-calling ~{stand_call_conf} \ --dbsnp ~{dbSNP_vcf} >>> @@ -360,10 +350,8 @@ task variant_filtration { --V ~{vcf} \ --window ~{window} \ --cluster ~{cluster} \ - ~{sep(" ", prefix("--filter-name ", filter_names - ))} \ - ~{sep(" ", prefix("--filter-expression ", squote( - filter_expressions)))} \ + ~{sep(" ", prefix("--filter-name ", filter_names))} \ + ~{sep(" ", prefix("--filter-expression ", squote(filter_expressions)))} \ -O ~{prefix}.filtered.vcf.gz >>> @@ -450,9 +438,7 @@ task mark_duplicates_spark { Float bam_size = size(bam, "GiB") Int memory_gb = min(ceil(bam_size + 15), 50) + modify_memory_gb - Int disk_size_gb = ((if create_bam then ceil((bam_size * 2 - ) + 10) else ceil(bam_size + 10)) + modify_disk_size_gb - ) + Int disk_size_gb = ((if create_bam then ceil((bam_size * 2) + 10) else ceil(bam_size + 10)) + modify_disk_size_gb) Int java_heap_size = ceil(memory_gb * 0.9) @@ -463,18 +449,13 @@ task mark_duplicates_spark { --java-options "-Xmx~{java_heap_size}g" \ -I ~{bam} \ -M ~{prefix}.metrics.txt \ - -O ~{if create_bam then prefix + ".bam" else "/dev/null" - } \ + -O ~{if create_bam then prefix + ".bam" else "/dev/null"} \ --create-output-bam-index ~{create_bam} \ - --read-validation-stringency ~{ - validation_stringency} \ - --duplicate-scoring-strategy ~{ - duplicate_scoring_strategy} \ - --read-name-regex '~{if (optical_distance > 0) - then read_name_regex else "null"}' \ + --read-validation-stringency ~{validation_stringency} \ + --duplicate-scoring-strategy ~{duplicate_scoring_strategy} \ + --read-name-regex '~{if (optical_distance > 0) then read_name_regex else "null"}' \ --duplicate-tagging-policy ~{tagging_policy} \ - --optical-duplicate-pixel-distance ~{ - optical_distance} \ + --optical-duplicate-pixel-distance ~{optical_distance} \ --spark-master local[~{ncpu}] >>> diff --git a/tools/htseq.wdl b/tools/htseq.wdl index 0f511e9b..e0c61361 100755 --- a/tools/htseq.wdl +++ b/tools/htseq.wdl @@ -91,11 +91,9 @@ task count { Float bam_size = size(bam, "GiB") Float gtf_size = size(gtf, "GiB") - Int memory_gb = (if pos_sorted then ceil(bam_size) + 4 - else 4) + modify_memory_gb + Int memory_gb = (if pos_sorted then ceil(bam_size) + 4 else 4) + modify_memory_gb - Int disk_size_gb = ceil((bam_size + gtf_size) * if pos_sorted - then 4 else 1) + 10 + modify_disk_size_gb + Int disk_size_gb = ceil((bam_size + gtf_size) * if pos_sorted then 4 else 1) + 10 + modify_disk_size_gb command <<< set -euo pipefail @@ -115,12 +113,9 @@ task count { -t ~{feature_type} \ -m ~{mode} \ -i ~{idattr} \ - --nonunique ~{if nonunique then "all" else "none" - } \ - --secondary-alignments ~{if secondary_alignments - then "score" else "ignore"} \ - --supplementary-alignments ~{(if supplementary_alignments - then "score" else "ignore")} \ + --nonunique ~{if nonunique then "all" else "none"} \ + --secondary-alignments ~{if secondary_alignments then "score" else "ignore"} \ + --supplementary-alignments ~{(if supplementary_alignments then "score" else "ignore")} \ ~{bam} \ ~{gtf} \ >> ~{outfile_name} @@ -156,8 +151,7 @@ task calc_tpm { input { File counts File gene_lengths - String prefix = basename(counts, ".feature-counts.txt" - ) + String prefix = basename(counts, ".feature-counts.txt") Boolean has_header = true } @@ -165,8 +159,7 @@ task calc_tpm { #@ except: LineWidth command <<< - COUNTS="~{counts}" GENE_LENGTHS="~{gene_lengths}" OUTFILE="~{ - outfile_name}" python3 - < 0) then ceil( - max_db_size_gb * 1.2) else ceil(tarballs_size * 2)) - + modify_memory_gb) + Int memory_gb = ((if (max_db_size_gb > 0) then ceil(max_db_size_gb * 1.2) else ceil(tarballs_size * 2)) + modify_memory_gb) String max_db_size_bytes = "~{max_db_size_gb}000000000" @@ -249,14 +245,12 @@ task build_db { --kmer-len ~{kmer_len} \ --minimizer-len ~{minimizer_len} \ --minimizer-spaces ~{minimizer_spaces} \ - ~{(if (max_db_size_gb > 0) then "--max-db-size " - + max_db_size_bytes else "")} \ + ~{(if (max_db_size_gb > 0) then "--max-db-size " + max_db_size_bytes else "")} \ --threads "$n_cores" \ --db ~{db_name} >&2 echo "*** start DB clean ***" - kraken2-build --clean --threads "$n_cores" --db ~{ - db_name} + kraken2-build --clean --threads "$n_cores" --db ~{db_name} >&2 echo "*** done ***" >&2 echo "*** tarballing DB ***" @@ -321,8 +315,7 @@ task kraken { File read_two_fastq_gz #@ except: DisallowedInputName File db - String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "") + String prefix = sub(basename(read_one_fastq_gz), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", "") Boolean store_sequences = false Boolean use_names = true Boolean use_all_cores = false @@ -335,11 +328,8 @@ task kraken { Float db_size = size(db, "GiB") Float read1_size = size(read_one_fastq_gz, "GiB") Float read2_size = size(read_two_fastq_gz, "GiB") - Int disk_size_gb_calculation = (ceil((db_size * 2) + read1_size - + read2_size) + 10 + modify_disk_size_gb) - Int disk_size_gb = (if store_sequences then disk_size_gb_calculation - + ceil(read1_size + read2_size) else disk_size_gb_calculation - ) + Int disk_size_gb_calculation = (ceil((db_size * 2) + read1_size + read2_size) + 10 + modify_disk_size_gb) + Int disk_size_gb = (if store_sequences then disk_size_gb_calculation + ceil(read1_size + read2_size) else disk_size_gb_calculation) Int memory_gb = ceil(db_size * 2) + modify_memory_gb @@ -359,8 +349,7 @@ task kraken { kraken2 --db kraken2_db/ \ --paired \ - --output ~{if store_sequences then out_sequences - else "-"} \ + --output ~{if store_sequences then out_sequences else "-"} \ --threads "$n_cores" \ --minimum-base-quality ~{min_base_quality} \ --report ~{out_report} \ diff --git a/tools/librarian.wdl b/tools/librarian.wdl index ae79a044..29d46728 100644 --- a/tools/librarian.wdl +++ b/tools/librarian.wdl @@ -20,21 +20,18 @@ task librarian { input { File read_one_fastq - String prefix = sub(basename(read_one_fastq), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - "") + ".librarian" + String prefix = sub(basename(read_one_fastq), "([_\\.][rR][12])?(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", "") + ".librarian" Int modify_disk_size_gb = 0 } Float read1_size = size(read_one_fastq, "GiB") - Int disk_size_gb = (ceil(read1_size) + 10 + modify_disk_size_gb - ) + Int disk_size_gb = (ceil(read1_size) + 10 + modify_disk_size_gb) command <<< set -euo pipefail mkdir ~{prefix} - /app/librarian --local --raw -o ~{prefix} ~{ - read_one_fastq} + /app/librarian --local --raw -o ~{prefix} ~{read_one_fastq} tar -czf ~{prefix}.tar.gz ~{prefix} >>> diff --git a/tools/mosdepth.wdl b/tools/mosdepth.wdl index 3286284a..6ad3c08e 100644 --- a/tools/mosdepth.wdl +++ b/tools/mosdepth.wdl @@ -48,8 +48,7 @@ task coverage { mosdepth \ -n \ - ~{if defined(coverage_bed) then "-b" else ""} ~{ - coverage_bed} \ + ~{if defined(coverage_bed) then "-b" else ""} ~{coverage_bed} \ -Q ~{min_mapping_quality} \ ~{if (use_fast_mode) then "-x" else ""} \ ~{prefix} \ diff --git a/tools/multiqc.wdl b/tools/multiqc.wdl index 590678cd..b6899e26 100755 --- a/tools/multiqc.wdl +++ b/tools/multiqc.wdl @@ -46,8 +46,7 @@ task multiqc { # run after multiqc is finished. multiqc -v \ --no-ansi \ - ~{if defined(config) then "-c " + config else "" - } \ + ~{if defined(config) then "-c " + config else ""} \ --file-list file_list.txt \ -o ~{prefix} diff --git a/tools/ngsderive.wdl b/tools/ngsderive.wdl index 005a601d..efac7be5 100644 --- a/tools/ngsderive.wdl +++ b/tools/ngsderive.wdl @@ -78,8 +78,7 @@ task strandedness { output { File strandedness_file = outfile_name - String strandedness_string = read_string("strandedness.txt" - ) + String strandedness_string = read_string("strandedness.txt") } runtime { @@ -132,8 +131,7 @@ task instrument { output { File instrument_file = outfile_name - String instrument_string = read_string("instrument.txt" - ) + String instrument_string = read_string("instrument.txt") } runtime { @@ -273,8 +271,7 @@ task encoding { >>> output { - String inferred_encoding = read_string("encoding.txt" - ) + String inferred_encoding = read_string("encoding.txt") File encoding_file = outfile_name } @@ -427,8 +424,7 @@ task endedness { } Float bam_size = size(bam, "GiB") - Int memory_gb = (if calc_rpt then (ceil(bam_size * 2.5) - + 4 + modify_memory_gb) else 4) + Int memory_gb = (if calc_rpt then (ceil(bam_size * 2.5) + 4 + modify_memory_gb) else 4) Int disk_size_gb = ceil(bam_size) + 10 + modify_disk_size_gb command <<< diff --git a/tools/picard.wdl b/tools/picard.wdl index 5bb4750f..6ad19583 100755 --- a/tools/picard.wdl +++ b/tools/picard.wdl @@ -77,9 +77,7 @@ task mark_duplicates { Float bam_size = size(bam, "GiB") Int memory_gb = min(ceil(bam_size + 6), 50) + modify_memory_gb - Int disk_size_gb = ((if create_bam then ceil((bam_size * 2 - ) + 10) else ceil(bam_size + 10)) + modify_disk_size_gb - ) + Int disk_size_gb = ((if create_bam then ceil((bam_size * 2) + 10) else ceil(bam_size + 10)) + modify_disk_size_gb) Int java_heap_size = ceil(memory_gb * 0.9) @@ -89,22 +87,17 @@ task mark_duplicates { picard -Xmx~{java_heap_size}g MarkDuplicates \ -I ~{bam} \ --METRICS_FILE ~{prefix}.metrics.txt \ - -O ~{if create_bam then prefix + ".bam" else "/dev/null" - } \ + -O ~{if create_bam then prefix + ".bam" else "/dev/null"} \ --CREATE_INDEX ~{create_bam} \ --CREATE_MD5_FILE ~{create_bam} \ --VALIDATION_STRINGENCY ~{validation_stringency} \ - --DUPLICATE_SCORING_STRATEGY ~{ - duplicate_scoring_strategy} \ - --READ_NAME_REGEX '~{if (optical_distance > 0) - then read_name_regex else "null"}' \ + --DUPLICATE_SCORING_STRATEGY ~{duplicate_scoring_strategy} \ + --READ_NAME_REGEX '~{if (optical_distance > 0) then read_name_regex else "null"}' \ --TAGGING_POLICY ~{tagging_policy} \ --CLEAR_DT ~{clear_dt} \ --REMOVE_DUPLICATES ~{remove_duplicates} \ - --REMOVE_SEQUENCING_DUPLICATES ~{ - remove_sequencing_duplicates} \ - --OPTICAL_DUPLICATE_PIXEL_DISTANCE ~{ - optical_distance} + --REMOVE_SEQUENCING_DUPLICATES ~{remove_sequencing_duplicates} \ + --OPTICAL_DUPLICATE_PIXEL_DISTANCE ~{optical_distance} if ~{create_bam}; then mv ~{prefix}.bai ~{prefix}.bam.bai @@ -180,20 +173,15 @@ task validate_bam { Boolean succeed_on_errors = false Boolean succeed_on_warnings = true Boolean summary_mode = false - Boolean index_validation_stringency_less_exhaustive - = false + Boolean index_validation_stringency_less_exhaustive = false Int max_errors = 2147483647 # max 32-bit INT Int memory_gb = 16 Int modify_disk_size_gb = 0 } - String reference_arg = (if defined(reference_fasta) then "-R ~{ - reference_fasta}" else "") - String mode_arg = if (summary_mode) then "--MODE SUMMARY" - else "" - String stringency_arg = (if ( - index_validation_stringency_less_exhaustive) then "--INDEX_VALIDATION_STRINGENCY LESS_EXHAUSTIVE" - else "") + String reference_arg = (if defined(reference_fasta) then "-R ~{reference_fasta}" else "") + String mode_arg = if (summary_mode) then "--MODE SUMMARY" else "" + String stringency_arg = (if (index_validation_stringency_less_exhaustive) then "--INDEX_VALIDATION_STRINGENCY LESS_EXHAUSTIVE" else "") Float bam_size = size(bam, "GiB") Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb Int java_heap_size = ceil(memory_gb * 0.9) @@ -836,11 +824,9 @@ task bam_to_fastq { command <<< set -euo pipefail - picard -Xmx~{java_heap_size}g SamToFastq INPUT=~{bam - } \ + picard -Xmx~{java_heap_size}g SamToFastq INPUT=~{bam} \ FASTQ=~{prefix}.R1.fastq \ - ~{(if paired then "SECOND_END_FASTQ=" + prefix + ".R2.fastq" - else "")} \ + ~{(if paired then "SECOND_END_FASTQ=" + prefix + ".R2.fastq" else "")} \ RE_REVERSE=true \ VALIDATION_STRINGENCY=SILENT @@ -967,8 +953,7 @@ task scatter_interval_list { >>> output { - Array[File] interval_lists_scatter = glob("out/*/*.interval_list" - ) + Array[File] interval_lists_scatter = glob("out/*/*.interval_list") Int interval_count = read_int("interval_count.txt") } @@ -1018,12 +1003,9 @@ task create_sequence_dictionary { picard -Xmx~{java_heap_size}g CreateSequenceDictionary \ -R ~{fasta} \ - ~{(if defined(assembly_name) then "--GENOME_ASSEMBLY " - + assembly_name else "")} \ - ~{if defined(fasta_url) then "--URI " + fasta_url - else ""} \ - ~{if defined(species) then "--SPECIES " + species - else ""} \ + ~{(if defined(assembly_name) then "--GENOME_ASSEMBLY " + assembly_name else "")} \ + ~{if defined(fasta_url) then "--URI " + fasta_url else ""} \ + ~{if defined(species) then "--SPECIES " + species else ""} \ > ~{outfile_name} \ >>> diff --git a/tools/qualimap.wdl b/tools/qualimap.wdl index b9d945af..bf37acdb 100755 --- a/tools/qualimap.wdl +++ b/tools/qualimap.wdl @@ -48,9 +48,7 @@ task rnaseq { # Qualimap has an inefficient name sorting algorithm and will # use an excessive amount of storage. - Int disk_size_gb = ((if name_sorted then ceil(bam_size + gtf_size - + 15) else ceil(((bam_size + gtf_size) * 12) + 10)) - + modify_disk_size_gb) + Int disk_size_gb = ((if name_sorted then ceil(bam_size + gtf_size + 15) else ceil(((bam_size + gtf_size) * 12) + 10)) + modify_disk_size_gb) command <<< set -euo pipefail diff --git a/tools/sambamba.wdl b/tools/sambamba.wdl index 81c2c6fa..e233c443 100644 --- a/tools/sambamba.wdl +++ b/tools/sambamba.wdl @@ -43,8 +43,7 @@ task index { n_cores=$(nproc) fi - sambamba index --nthreads "$n_cores" ~{bam} ~{ - outfile_name} + sambamba index --nthreads "$n_cores" ~{bam} ~{outfile_name} >>> output { @@ -210,8 +209,7 @@ task markdup { command <<< sambamba markdup \ --nthreads ~{ncpu} \ - ~{if remove_duplicates then "--remove-duplicates" - else ""} \ + ~{if remove_duplicates then "--remove-duplicates" else ""} \ ~{bam} \ ~{prefix}.markdup.bam \ > ~{prefix}.markdup_log.txt @@ -273,8 +271,7 @@ task flagstat { n_cores=$(nproc) fi - sambamba flagstat --nthreads "$n_cores" ~{bam} > ~{ - outfile_name} + sambamba flagstat --nthreads "$n_cores" ~{bam} > ~{outfile_name} >>> output { diff --git a/tools/samtools.wdl b/tools/samtools.wdl index cf4b9a19..733d6532 100755 --- a/tools/samtools.wdl +++ b/tools/samtools.wdl @@ -198,8 +198,7 @@ task flagstat { # -1 because samtools uses one more core than `--threads` specifies let "n_cores -= 1" - samtools flagstat --threads "$n_cores" ~{bam} > ~{ - outfile_name} + samtools flagstat --threads "$n_cores" ~{bam} > ~{outfile_name} >>> output { @@ -257,8 +256,7 @@ task index { # -1 because samtools uses one more core than `--threads` specifies let "n_cores -= 1" - samtools index --threads "$n_cores" ~{bam} ~{ - outfile_name} + samtools index --threads "$n_cores" ~{bam} ~{outfile_name} >>> output { @@ -370,8 +368,7 @@ task subsample { echo -e "~{prefix}\t$read_count" } > ~{suffixed}.orig_read_count.tsv else - # the BAM has less than or equal to ~{ - desired_reads} reads, + # the BAM has less than or equal to ~{desired_reads} reads, # meaning we should just use it directly without subsampling. # Do not report an original read count, @@ -407,8 +404,7 @@ task subsample { >>> output { - File orig_read_count = glob("*.orig_read_count.tsv")[ - 0] + File orig_read_count = glob("*.orig_read_count.tsv")[0] File? sampled_bam = suffixed + ".bam" } @@ -564,8 +560,7 @@ task merge { Float bams_size = size(bams, "GiB") Float header_size = size(new_header, "GiB") - Int disk_size_gb = ceil(bams_size * 2 + header_size) + 10 - + modify_disk_size_gb + Int disk_size_gb = ceil(bams_size * 2 + header_size) + 10 + modify_disk_size_gb command <<< set -euo pipefail @@ -588,8 +583,7 @@ task merge { samtools merge \ --threads "$n_cores" \ - ~{if defined(new_header) then "-h " + new_header - else ""} \ + ~{if defined(new_header) then "-h " + new_header else ""} \ ~{if name_sorted then "-n" else ""} \ ~{if (region != "") then "-R " + region else ""} \ ~{if attach_rg then "-r" else ""} \ @@ -676,12 +670,9 @@ task addreplacerg { samtools addreplacerg \ --threads "$n_cores" \ - ~{sep(" ", prefix("-r ", squote(read_group_line) - ))} \ - ~{if defined(read_group_id) then "-R " + read_group_id - else ""} \ - -m ~{if orphan_only then "orphan_only" else "overwrite_all" - } \ + ~{sep(" ", prefix("-r ", squote(read_group_line)))} \ + ~{if defined(read_group_id) then "-R " + read_group_id else ""} \ + -m ~{if orphan_only then "orphan_only" else "overwrite_all"} \ ~{if overwrite_header_record then "-w" else ""} \ -o ~{outfile_name} \ ~{bam} @@ -857,11 +848,8 @@ task bam_to_fastq { } Float bam_size = size(bam, "GiB") - Int memory_gb = (if (collated || !paired_end) then 4 - else (ceil(bam_size * 0.4) + 4)) + modify_memory_gb - Int disk_size_gb = ceil(bam_size * (if ( - retain_collated_bam && !collated && paired_end) then 5 - else 2)) + 10 + modify_disk_size_gb + Int memory_gb = (if (collated || !paired_end) then 4 else (ceil(bam_size * 0.4) + 4)) + modify_memory_gb + Int disk_size_gb = ceil(bam_size * (if (retain_collated_bam && !collated && paired_end) then 5 else 2)) + 10 + modify_disk_size_gb command <<< set -euo pipefail @@ -881,8 +869,7 @@ task bam_to_fastq { ~{if fast_mode then "-f" else ""} \ -O \ ~{bam} \ - | tee ~{if retain_collated_bam then prefix + ".collated.bam" - else ""} \ + | tee ~{if retain_collated_bam then prefix + ".collated.bam" else ""} \ > bam_pipe \ & else @@ -896,17 +883,10 @@ task bam_to_fastq { --rf ~{bitwise_filter.include_if_any} \ -G ~{bitwise_filter.exclude_if_all} \ ~{(if append_read_number then "-N" else "-n")} \ - -1 ~{(if paired_end then (if interleaved then prefix - + ".fastq.gz" else prefix + ".R1.fastq.gz") - else prefix + ".fastq.gz")} \ - -2 ~{(if paired_end then (if interleaved then prefix - + ".fastq.gz" else prefix + ".R2.fastq.gz") - else prefix + ".fastq.gz")} \ - ~{(if paired_end then (if output_singletons then "-s " - + prefix + ".singleton.fastq.gz" else "-s junk.singleton.fastq.gz" - ) else "")} \ - -0 ~{(if paired_end then "junk.unknown_bit_setting.fastq.gz" - else prefix + ".fastq.gz")} \ + -1 ~{(if paired_end then (if interleaved then prefix + ".fastq.gz" else prefix + ".R1.fastq.gz") else prefix + ".fastq.gz")} \ + -2 ~{(if paired_end then (if interleaved then prefix + ".fastq.gz" else prefix + ".R2.fastq.gz") else prefix + ".fastq.gz")} \ + ~{(if paired_end then (if output_singletons then "-s " + prefix + ".singleton.fastq.gz" else "-s junk.singleton.fastq.gz") else "")} \ + -0 ~{(if paired_end then "junk.unknown_bit_setting.fastq.gz" else prefix + ".fastq.gz")} \ bam_pipe rm bam_pipe @@ -1021,14 +1001,11 @@ task fixmate { samtools fixmate \ --threads "$n_cores" \ - ~{if remove_unaligned_and_secondary then "-r" - else ""} \ - ~{if disable_proper_pair_check then "-p" else "" - } \ + ~{if remove_unaligned_and_secondary then "-r" else ""} \ + ~{if disable_proper_pair_check then "-p" else ""} \ ~{if add_cigar then "-c" else ""} \ ~{if add_mate_score then "-m" else ""} \ - ~{if disable_flag_sanitization then "-z off" - else ""} \ + ~{if disable_flag_sanitization then "-z off" else ""} \ ~{bam} \ ~{prefix}~{extension} >>> @@ -1124,14 +1101,11 @@ task position_sorted_fixmate { | samtools fixmate \ --threads "$n_cores" \ -u \ - ~{if remove_unaligned_and_secondary then "-r" - else ""} \ - ~{if disable_proper_pair_check then "-p" - else ""} \ + ~{if remove_unaligned_and_secondary then "-r" else ""} \ + ~{if disable_proper_pair_check then "-p" else ""} \ ~{if add_cigar then "-c" else ""} \ ~{if add_mate_score then "-m" else ""} \ - ~{if disable_flag_sanitization then "-z off" - else ""} \ + ~{if disable_flag_sanitization then "-z off" else ""} \ - \ - \ | samtools sort \ @@ -1246,29 +1220,21 @@ task markdup { --read-coords '~{read_coords_regex}' \ --coords-order ~{coordinates_order} \ ~{if remove_duplicates then "-r" else ""} \ - ~{if mark_supp_or_sec_or_unmapped_as_duplicates - then "-S" else ""} \ - ~{if mark_duplicates_with_do_tag then "-t" else "" - } \ - ~{if duplicate_count then "--duplicate-count" - else ""} \ - ~{if include_qc_fails then "--include-fails" - else ""} \ - ~{if duplicates_of_duplicates_check then "" else "--no-multi-dup" - } \ - ~{if use_read_groups then "--use-read-groups" - else ""} \ + ~{if mark_supp_or_sec_or_unmapped_as_duplicates then "-S" else ""} \ + ~{if mark_duplicates_with_do_tag then "-t" else ""} \ + ~{if duplicate_count then "--duplicate-count" else ""} \ + ~{if include_qc_fails then "--include-fails" else ""} \ + ~{if duplicates_of_duplicates_check then "" else "--no-multi-dup"} \ + ~{if use_read_groups then "--use-read-groups" else ""} \ -l ~{max_readlen} \ -d ~{optical_distance} \ -c \ ~{bam} \ - "~{if create_bam then prefix + ".bam" else "/dev/null" - }" + "~{if create_bam then prefix + ".bam" else "/dev/null"}" >>> output { - File markdup_report = prefix + if json then ".json" - else ".txt" + File markdup_report = prefix + if json then ".json" else ".txt" File? markdup_bam = prefix + ".bam" } diff --git a/tools/star.wdl b/tools/star.wdl index 0d0db4f0..aa7b2b78 100755 --- a/tools/star.wdl +++ b/tools/star.wdl @@ -71,11 +71,9 @@ task build_star_db { String star_db_tar_gz = db_name + ".tar.gz" - Float reference_fasta_size = size(reference_fasta, "GiB" - ) + Float reference_fasta_size = size(reference_fasta, "GiB") Float gtf_size = size(gtf, "GiB") - Int disk_size_gb = (ceil((reference_fasta_size + gtf_size - ) * 3) + 10 + modify_disk_size_gb) + Int disk_size_gb = (ceil((reference_fasta_size + gtf_size) * 3) + 10 + modify_disk_size_gb) # Leave 2GB as system overhead String memory_limit_bytes = "~{memory_gb - 2}000000000" @@ -104,19 +102,14 @@ task build_star_db { --sjdbGTFfile "$gtf_name" \ --sjdbGTFchrPrefix ~{sjdb_gtf_chr_prefix} \ --sjdbGTFfeatureExon ~{sjdb_gtf_feature_exon} \ - --sjdbGTFtagExonParentTranscript ~{ - sjdb_gtf_tag_exon_parant_transcript} \ - --sjdbGTFtagExonParentGene ~{ - sjdb_gtf_tag_exon_parent_gene} \ - --sjdbGTFtagExonParentGeneName ~{ - sjdb_gtf_tag_exon_parent_gene_name} \ - --sjdbGTFtagExonParentGeneType ~{ - sjdb_gtf_tag_exon_parent_gene_type} \ + --sjdbGTFtagExonParentTranscript ~{sjdb_gtf_tag_exon_parant_transcript} \ + --sjdbGTFtagExonParentGene ~{sjdb_gtf_tag_exon_parent_gene} \ + --sjdbGTFtagExonParentGeneName ~{sjdb_gtf_tag_exon_parent_gene_name} \ + --sjdbGTFtagExonParentGeneType ~{sjdb_gtf_tag_exon_parent_gene_type} \ --genomeChrBinNbits ~{genome_chr_bin_n_bits} \ --genomeSAindexNbases ~{genome_SA_index_n_bases} \ --genomeSAsparseD ~{genome_SA_sparse_d} \ - --genomeSuffixLengthMax ~{ - genome_suffix_length_max} \ + --genomeSuffixLengthMax ~{genome_suffix_length_max} \ --sjdbOverhang ~{sjdb_overhang} rm "$gtf_name" "$ref_fasta" @@ -495,8 +488,7 @@ task alignment { GC_AG_and_CT_GC_motif: 12, AT_AC_and_GT_AT_motif: 12, } - SpliceJunctionMotifs out_sj_filter_count_unique_min - = SpliceJunctionMotifs { + SpliceJunctionMotifs out_sj_filter_count_unique_min = SpliceJunctionMotifs { noncanonical_motifs: 3, GT_AG_and_CT_AC_motif: 1, GC_AG_and_CT_GC_motif: 1, @@ -508,28 +500,23 @@ task alignment { GC_AG_and_CT_GC_motif: 1, AT_AC_and_GT_AT_motif: 1, } - SpliceJunctionMotifs out_sj_filter_dist_to_other_sj_min - = SpliceJunctionMotifs { + SpliceJunctionMotifs out_sj_filter_dist_to_other_sj_min = SpliceJunctionMotifs { noncanonical_motifs: 10, GT_AG_and_CT_AC_motif: 0, GC_AG_and_CT_GC_motif: 5, AT_AC_and_GT_AT_motif: 10, } - SpliceJunctionMotifs align_sj_stitch_mismatch_n_max - = SpliceJunctionMotifs { + SpliceJunctionMotifs align_sj_stitch_mismatch_n_max = SpliceJunctionMotifs { noncanonical_motifs: 0, GT_AG_and_CT_AC_motif: -1, GC_AG_and_CT_GC_motif: 0, AT_AC_and_GT_AT_motif: 0, } - Pair[String, String] clip_3p_adapter_seq = ("None", "None" - ) + Pair[String, String] clip_3p_adapter_seq = ("None", "None") Pair[Float, Float] clip_3p_adapter_mmp = (0.1, 0.1) - Pair[Int, String] align_ends_protrude = (0, "ConcordantPair" - ) + Pair[Int, String] align_ends_protrude = (0, "ConcordantPair") Pair[Int, Int] clip_3p_n_bases = (0, 0) - Pair[Int, Int] clip_3p_after_adapter_n_bases = (0, 0 - ) + Pair[Int, Int] clip_3p_after_adapter_n_bases = (0, 0) Pair[Int, Int] clip_5p_n_bases = (0, 0) String read_name_separator = "/" String clip_adapter_type = "Hamming" @@ -630,14 +617,10 @@ task alignment { Int modify_disk_size_gb = 0 } - Float read_one_fastqs_size = size(read_one_fastqs_gz, "GiB" - ) - Float read_two_fastqs_size = size(read_two_fastqs_gz, "GiB" - ) + Float read_one_fastqs_size = size(read_one_fastqs_gz, "GiB") + Float read_two_fastqs_size = size(read_two_fastqs_gz, "GiB") Float star_db_tar_gz_size = size(star_db_tar_gz, "GiB") - Int disk_size_gb = ((ceil(read_one_fastqs_size + read_two_fastqs_size - + star_db_tar_gz_size) * 3) + 10 + modify_disk_size_gb - ) + Int disk_size_gb = ((ceil(read_one_fastqs_size + read_two_fastqs_size + star_db_tar_gz_size) * 3) + 10 + modify_disk_size_gb) #@ except: LineWidth command <<< @@ -654,13 +637,9 @@ task alignment { # odd constructions a combination of needing white space properly parsed # and limitations of the WDL v1.1 spec python3 /home/sort_star_input.py \ - --read-one-fastqs "~{sep(",", read_one_fastqs_gz - )}" \ - ~{if (length(read_two_fastqs_gz) != 0) then "--read-two-fastqs" - else ""} "~{sep(",", (read_two_fastqs_gz))}" \ - ~{if defined(read_groups) then "--read-groups" - else ""} "~{(if defined(read_groups) then read_groups - else "")}" + --read-one-fastqs "~{sep(",", read_one_fastqs_gz)}" \ + ~{if (length(read_two_fastqs_gz) != 0) then "--read-two-fastqs" else ""} "~{sep(",", (read_two_fastqs_gz))}" \ + ~{if defined(read_groups) then "--read-groups" else ""} "~{(if defined(read_groups) then read_groups else "")}" read -ra read_one_args < read_one_fastqs_sorted.txt read -ra read_two_args < read_two_fastqs_sorted.txt @@ -674,8 +653,7 @@ task alignment { --outFileNamePrefix ~{prefix + "."} \ --twopassMode ~{twopass_mode} \ --outSAMattrRGline "${read_group_args[@]}" \ - --outSJfilterIntronMaxVsReadN ~{sep(" ", quote( - out_sj_filter_intron_max_vs_read_n))} \ + --outSJfilterIntronMaxVsReadN ~{sep(" ", quote(out_sj_filter_intron_max_vs_read_n))} \ --outSJfilterOverhangMin ~{sep(" ", quote([ out_sj_filter_overhang_min.noncanonical_motifs, out_sj_filter_overhang_min.GT_AG_and_CT_AC_motif, @@ -706,19 +684,12 @@ task alignment { align_sj_stitch_mismatch_n_max.GC_AG_and_CT_GC_motif, align_sj_stitch_mismatch_n_max.AT_AC_and_GT_AT_motif, ]))} \ - --clip3pAdapterSeq ~{clip_3p_adapter_seq.left + " " - + clip_3p_adapter_seq.right} \ - --clip3pAdapterMMp ~{"~{clip_3p_adapter_mmp.left - } ~{clip_3p_adapter_mmp.right}"} \ - --alignEndsProtrude ~{"~{align_ends_protrude.left - } ~{align_ends_protrude.right}"} \ - --clip3pNbases ~{"~{clip_3p_n_bases.left} ~{ - clip_3p_n_bases.right}"} \ - --clip3pAfterAdapterNbases ~{"~{ - clip_3p_after_adapter_n_bases.left} ~{ - clip_3p_after_adapter_n_bases.right}"} \ - --clip5pNbases ~{"~{clip_5p_n_bases.left} ~{ - clip_5p_n_bases.right}"} \ + --clip3pAdapterSeq ~{clip_3p_adapter_seq.left + " " + clip_3p_adapter_seq.right} \ + --clip3pAdapterMMp ~{"~{clip_3p_adapter_mmp.left} ~{clip_3p_adapter_mmp.right}"} \ + --alignEndsProtrude ~{"~{align_ends_protrude.left} ~{align_ends_protrude.right}"} \ + --clip3pNbases ~{"~{clip_3p_n_bases.left} ~{clip_3p_n_bases.right}"} \ + --clip3pAfterAdapterNbases ~{"~{clip_3p_after_adapter_n_bases.left} ~{clip_3p_after_adapter_n_bases.right}"} \ + --clip5pNbases ~{"~{clip_5p_n_bases.left} ~{clip_5p_n_bases.right}"} \ --readNameSeparator ~{read_name_separator} \ --clipAdapterType ~{clip_adapter_type} \ --outSAMstrandField ~{out_sam_strand_field} \ @@ -726,43 +697,29 @@ task alignment { --outSAMunmapped ~{out_sam_unmapped} \ --outSAMorder ~{out_sam_order} \ --outSAMreadID ~{out_sam_read_id} \ - --outSAMtlen ~{(if (out_sam_tlen == "left_plus") - then "1" else (if (out_sam_tlen == "left_any" - ) then "2" else "error"))} \ + --outSAMtlen ~{(if (out_sam_tlen == "left_plus") then "1" else (if (out_sam_tlen == "left_any") then "2" else "error"))} \ --outFilterType ~{out_filter_type} \ - --outFilterIntronMotifs ~{ - out_filter_intron_motifs} \ - --outFilterIntronStrands ~{ - out_filter_intron_strands} \ + --outFilterIntronMotifs ~{out_filter_intron_motifs} \ + --outFilterIntronStrands ~{out_filter_intron_strands} \ --outSJfilterReads ~{out_sj_filter_reads} \ --alignEndsType ~{align_ends_type} \ - --alignSoftClipAtReferenceEnds ~{ - align_soft_clip_at_reference_ends} \ + --alignSoftClipAtReferenceEnds ~{align_soft_clip_at_reference_ends} \ --alignInsertionFlush ~{align_insertion_flush} \ --chimOutType ~{chim_out_type} \ --chimFilter ~{chim_filter} \ - --chimOutJunctionFormat ~{ - chim_out_junction_format} \ - --outFilterMismatchNoverLmax ~{ - out_filter_mismatch_n_over_l_max} \ - --outFilterMismatchNoverReadLmax ~{ - out_filter_mismatch_n_over_read_l_max} \ - --outFilterScoreMinOverLread ~{ - out_filter_score_min_over_l_read} \ - --outFilterMatchNminOverLread ~{ - out_filter_match_n_min_over_l_read} \ - --scoreGenomicLengthLog2scale ~{ - score_genomic_length_log2_scale} \ - --seedSearchStartLmaxOverLread ~{ - seed_search_start_l_max_over_l_read} \ - --alignSplicedMateMapLminOverLmate ~{ - align_spliced_mate_map_l_min_over_l_mate} \ + --chimOutJunctionFormat ~{chim_out_junction_format} \ + --outFilterMismatchNoverLmax ~{out_filter_mismatch_n_over_l_max} \ + --outFilterMismatchNoverReadLmax ~{out_filter_mismatch_n_over_read_l_max} \ + --outFilterScoreMinOverLread ~{out_filter_score_min_over_l_read} \ + --outFilterMatchNminOverLread ~{out_filter_match_n_min_over_l_read} \ + --scoreGenomicLengthLog2scale ~{score_genomic_length_log2_scale} \ + --seedSearchStartLmaxOverLread ~{seed_search_start_l_max_over_l_read} \ + --alignSplicedMateMapLminOverLmate ~{align_spliced_mate_map_l_min_over_l_mate} \ --peOverlapMMp ~{pe_overlap_mmp} \ --runRNGseed ~{run_rng_seed} \ --sjdbScore ~{sjdb_score} \ --readMapNumber ~{read_map_number} \ - --readQualityScoreBase ~{read_quality_score_base - } \ + --readQualityScoreBase ~{read_quality_score_base} \ --limitOutSJoneRead ~{limit_out_sj_one_read} \ --limitOutSJcollapsed ~{limit_out_sj_collapsed} \ --limitSjdbInsertNsj ~{limit_sjdb_insert_n_sj} \ @@ -771,12 +728,9 @@ task alignment { --outSAMmapqUnique ~{out_sam_mapq_unique} \ --outSAMflagOR ~{out_sam_flag_OR} \ --outSAMflagAND ~{out_sam_flag_AND} \ - --outFilterMultimapScoreRange ~{ - out_filter_multimap_score_range} \ - --outFilterMultimapNmax ~{ - out_filter_multimap_n_max} \ - --outFilterMismatchNmax ~{ - out_filter_mismatch_n_max} \ + --outFilterMultimapScoreRange ~{out_filter_multimap_score_range} \ + --outFilterMultimapNmax ~{out_filter_multimap_n_max} \ + --outFilterMismatchNmax ~{out_filter_mismatch_n_max} \ --outFilterScoreMin ~{out_filter_score_min} \ --outFilterMatchNmin ~{out_filter_match_n_min} \ --scoreGap ~{score_gap} \ @@ -793,27 +747,20 @@ task alignment { --seedMultimapNmax ~{seed_multimap_n_max} \ --seedPerReadNmax ~{seed_per_read_n_max} \ --seedPerWindowNmax ~{seed_per_window_n_max} \ - --seedNoneLociPerWindow ~{ - seed_none_loci_per_window} \ + --seedNoneLociPerWindow ~{seed_none_loci_per_window} \ --seedSplitMin ~{seed_split_min} \ --seedMapMin ~{seed_map_min} \ --alignIntronMin ~{align_intron_min} \ --alignIntronMax ~{align_intron_max} \ --alignMatesGapMax ~{align_mates_gap_max} \ --alignSJoverhangMin ~{align_sj_overhang_min} \ - --alignSJDBoverhangMin ~{align_sjdb_overhang_min - } \ - --alignSplicedMateMapLmin ~{ - align_spliced_mate_map_l_min} \ - --alignWindowsPerReadNmax ~{ - align_windows_per_read_n_max} \ - --alignTranscriptsPerWindowNmax ~{ - align_transcripts_per_window_n_max} \ - --alignTranscriptsPerReadNmax ~{ - align_transcripts_per_read_n_max} \ + --alignSJDBoverhangMin ~{align_sjdb_overhang_min} \ + --alignSplicedMateMapLmin ~{align_spliced_mate_map_l_min} \ + --alignWindowsPerReadNmax ~{align_windows_per_read_n_max} \ + --alignTranscriptsPerWindowNmax ~{align_transcripts_per_window_n_max} \ + --alignTranscriptsPerReadNmax ~{align_transcripts_per_read_n_max} \ --peOverlapNbasesMin ~{pe_overlap_n_bases_min} \ - --winAnchorMultimapNmax ~{ - win_anchor_multimap_n_max} \ + --winAnchorMultimapNmax ~{win_anchor_multimap_n_max} \ --winBinNbits ~{win_bin_n_bits} \ --winAnchorDistNbins ~{win_anchor_dist_n_bins} \ --winFlankNbins ~{win_flank_n_bins} \ @@ -821,19 +768,13 @@ task alignment { --chimScoreMin ~{chim_score_min} \ --chimScoreDropMax ~{chim_score_drop_max} \ --chimScoreSeparation ~{chim_score_separation} \ - --chimScoreJunctionNonGTAG ~{ - chim_score_junction_nonGTAG} \ - --chimJunctionOverhangMin ~{ - chim_junction_overhang_min} \ - --chimSegmentReadGapMax ~{ - chim_segment_read_gap_max} \ - --chimMainSegmentMultNmax ~{ - chim_main_segment_multi_n_max} \ + --chimScoreJunctionNonGTAG ~{chim_score_junction_nonGTAG} \ + --chimJunctionOverhangMin ~{chim_junction_overhang_min} \ + --chimSegmentReadGapMax ~{chim_segment_read_gap_max} \ + --chimMainSegmentMultNmax ~{chim_main_segment_multi_n_max} \ --chimMultimapNmax ~{chim_multimap_n_max} \ - --chimMultimapScoreRange ~{ - chim_multimap_score_range} \ - --chimNonchimScoreDropMin ~{ - chim_nonchim_score_drop_min} \ + --chimMultimapScoreRange ~{chim_multimap_score_range} \ + --chimNonchimScoreDropMin ~{chim_nonchim_score_drop_min} \ --twopass1readsN ~{twopass1_reads_n} >>> diff --git a/tools/util.wdl b/tools/util.wdl index 1b1ba227..b4e2fbae 100644 --- a/tools/util.wdl +++ b/tools/util.wdl @@ -30,8 +30,7 @@ task download { wget ~{url} -O ~{outfile_name} if [ -n "~{md5sum}" ]; then - echo "~{md5sum} ~{outfile_name}" > ~{ - outfile_name}.md5 + echo "~{md5sum} ~{outfile_name}" > ~{outfile_name}.md5 md5sum -c ~{outfile_name}.md5 fi >>> @@ -93,8 +92,7 @@ task get_read_groups { >>> output { - Array[String] read_groups = read_lines("read_groups.txt" - ) + Array[String] read_groups = read_lines("read_groups.txt") } runtime { @@ -135,8 +133,7 @@ task split_string { >>> output { - Array[String] split_strings = read_lines("split_strings.txt" - ) + Array[String] split_strings = read_lines("split_strings.txt") } runtime { @@ -179,8 +176,7 @@ task calc_gene_lengths { command <<< set -euo pipefail - GTF="~{gtf}" OUTFILE="~{outfile_name}" IDATTR="~{ - idattr}" python - < header.sam echo "~{additional_header}" >> header.sam - samtools reheader -P header.sam ~{bam} > ~{ - outfile_name} + samtools reheader -P header.sam ~{bam} > ~{outfile_name} >>> output { @@ -362,8 +357,7 @@ task unpack_tarball { >>> output { - Array[File] tarball_contents = read_lines("file_list.txt" - ) + Array[File] tarball_contents = read_lines("file_list.txt") } runtime { @@ -405,8 +399,7 @@ task make_coverage_regions_bed { input { File gtf String feature_type - String outfile_name = basename(gtf, "gtf.gz") + feature_type - + ".bed" + String outfile_name = basename(gtf, "gtf.gz") + feature_type + ".bed" Int modify_disk_size_gb = 0 } @@ -465,8 +458,7 @@ task global_phred_scores { command <<< set -euo pipefail - BAM="~{bam}" PREFIX="~{prefix}" FAST_MODE=~{ - fast_mode} python3 - < 0) { @@ -79,8 +75,7 @@ workflow chipseq_standard { use_all_cores, } - call samtools.index as samtools_index_input { input: bam - = selected_bam, } + call samtools.index as samtools_index_input { input: bam = selected_bam, } #@ except: UnusedCall call ngsderive.read_length { input: @@ -88,12 +83,9 @@ workflow chipseq_standard { bam_index = samtools_index_input.bam_index, } - scatter (pair in zip(bam_to_fastqs.read1s, get_read_groups.read_groups - )) { - call seaseq_util.basicfastqstats as basic_stats { input: - fastqfile = pair.left } - call seaseq_map.mapping as bowtie_single_end_mapping { - input: + scatter (pair in zip(bam_to_fastqs.read1s, get_read_groups.read_groups)) { + call seaseq_util.basicfastqstats as basic_stats { input: fastqfile = pair.left } + call seaseq_map.mapping as bowtie_single_end_mapping { input: fastqfile = pair.left, index_files = bowtie_indexes, metricsfile = basic_stats.metrics_out, @@ -108,8 +100,7 @@ workflow chipseq_standard { bam = chosen_bam, additional_header = pair.right, } - String rg_id_field = sub(sub(pair.right, ".*ID:", "ID:" - ), "\t.*", "") + String rg_id_field = sub(sub(pair.right, ".*ID:", "ID:"), "\t.*", "") String rg_id = sub(rg_id_field, "ID:", "") call samtools.addreplacerg as single_end { input: bam = add_to_bam_header.reheadered_bam, @@ -119,8 +110,7 @@ workflow chipseq_standard { Array[File] aligned_bams = single_end.tagged_bam scatter (aligned_bam in aligned_bams) { - call picard.clean_sam as picard_clean { input: bam = aligned_bam, - } + call picard.clean_sam as picard_clean { input: bam = aligned_bam, } } call picard.merge_sam_files as picard_merge { input: @@ -137,11 +127,9 @@ workflow chipseq_standard { use_all_cores, } #@ except: UnusedCall - call picard.validate_bam { input: bam = markdup.mkdupbam - } + call picard.validate_bam { input: bam = markdup.mkdupbam } - call md5sum.compute_checksum { input: file = markdup.mkdupbam, - } + call md5sum.compute_checksum { input: file = markdup.mkdupbam, } call deeptools.bam_coverage as deeptools_bam_coverage { input: bam = markdup.mkdupbam, diff --git a/workflows/dnaseq/dnaseq-core.wdl b/workflows/dnaseq/dnaseq-core.wdl index c71c27ba..de0a3817 100644 --- a/workflows/dnaseq/dnaseq-core.wdl +++ b/workflows/dnaseq/dnaseq-core.wdl @@ -50,8 +50,7 @@ workflow dnaseq_core_experimental { Int reads_per_file = 10000000 } - scatter (tuple in zip(zip(read_one_fastqs_gz, read_two_fastqs_gz - ), read_groups)) { + scatter (tuple in zip(zip(read_one_fastqs_gz, read_two_fastqs_gz), read_groups)) { if (defined(sample_override)) { # override the SM field of every read group ReadGroup rg = ReadGroup { @@ -72,8 +71,7 @@ workflow dnaseq_core_experimental { } } - call read_group.read_group_to_string { input: read_group - = select_first([ + call read_group.read_group_to_string { input: read_group = select_first([ rg, tuple.right, ]) } @@ -90,20 +88,17 @@ workflow dnaseq_core_experimental { reads_per_file, } - scatter (t in zip(read_ones.fastqs, read_twos.fastqs - )) { + scatter (t in zip(read_ones.fastqs, read_twos.fastqs)) { if (aligner == "mem") { call bwa.bwa_mem { input: read_one_fastq_gz = t.left, read_two_fastq_gz = t.right, bwa_db_tar_gz = bwa_db, - prefix = sub(sub(basename(t.left), "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - ""), "\\.([rR][12])\\.", "."), + prefix = sub(sub(basename(t.left), "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", ""), "\\.([rR][12])\\.", "."), # find spaces, replace with '\\t' # (which must be written as '\\\\t') # '\\t' is subbed into command blocks as '\t' - read_group = sub(rg_string, " ", "\\\\t" - ), + read_group = sub(rg_string, " ", "\\\\t"), use_all_cores, } } @@ -112,18 +107,15 @@ workflow dnaseq_core_experimental { read_one_fastq_gz = t.left, read_two_fastq_gz = t.right, bwa_db_tar_gz = bwa_db, - prefix = sub(sub(basename(t.left), "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", - ""), "\\.([rR][12])\\.", "."), + prefix = sub(sub(basename(t.left), "(\\.subsampled)?\\.(fastq|fq)(\\.gz)?$", ""), "\\.([rR][12])\\.", "."), # find spaces, replace with '\\t' # (which must be written as '\\\\t') # '\\t' is subbed into command blocks as '\t' - read_group = sub(rg_string, " ", "\\\\t" - ), + read_group = sub(rg_string, " ", "\\\\t"), use_all_cores, } } - call picard.sort as sort { input: bam = select_first( - [ + call picard.sort as sort { input: bam = select_first([ bwa_mem.bam, bwa_aln_pe.bam, ]) } @@ -135,8 +127,7 @@ workflow dnaseq_core_experimental { use_all_cores, } - call samtools.index { input: bam = rg_merge.merged_bam, - } + call samtools.index { input: bam = rg_merge.merged_bam, } output { File harmonized_bam = rg_merge.merged_bam diff --git a/workflows/dnaseq/dnaseq-standard-fastq.wdl b/workflows/dnaseq/dnaseq-standard-fastq.wdl index 23c2daa4..8c16ab93 100644 --- a/workflows/dnaseq/dnaseq-standard-fastq.wdl +++ b/workflows/dnaseq/dnaseq-standard-fastq.wdl @@ -64,8 +64,7 @@ workflow dnaseq_standard_fastq_experimental { } if (validate_input) { - scatter (reads in zip(read_one_fastqs_gz, read_two_fastqs_gz - )) { + scatter (reads in zip(read_one_fastqs_gz, read_two_fastqs_gz)) { call fq.fqlint { input: read_one_fastq = reads.left, read_two_fastq = reads.right, @@ -74,10 +73,8 @@ workflow dnaseq_standard_fastq_experimental { } if (subsample_n_reads > 0) { - Int reads_per_pair = ceil(subsample_n_reads / length( - read_one_fastqs_gz)) - scatter (reads in zip(read_one_fastqs_gz, read_two_fastqs_gz - )) { + Int reads_per_pair = ceil(subsample_n_reads / length(read_one_fastqs_gz)) + scatter (reads in zip(read_one_fastqs_gz, read_two_fastqs_gz)) { call fq.subsample { input: read_one_fastq = reads.left, read_two_fastq = reads.right, @@ -89,8 +86,7 @@ workflow dnaseq_standard_fastq_experimental { subsample.subsampled_read1, read_one_fastqs_gz, ]) - Array[File] selected_read_two_fastqs = select_all( - select_first([ + Array[File] selected_read_two_fastqs = select_all(select_first([ subsample.subsampled_read2, read_two_fastqs_gz, ])) diff --git a/workflows/dnaseq/dnaseq-standard.wdl b/workflows/dnaseq/dnaseq-standard.wdl index 8beda49d..83c4a194 100644 --- a/workflows/dnaseq/dnaseq-standard.wdl +++ b/workflows/dnaseq/dnaseq-standard.wdl @@ -53,8 +53,7 @@ workflow dnaseq_standard_experimental { call parse_input { input: aligner } if (validate_input) { - call picard.validate_bam as validate_input_bam { input: - bam, } + call picard.validate_bam as validate_input_bam { input: bam, } } if (subsample_n_reads > 0) { @@ -69,8 +68,7 @@ workflow dnaseq_standard_experimental { bam, ]) - call read_group.get_read_groups { input: bam = selected_bam, - } + call read_group.get_read_groups { input: bam = selected_bam, } call bam_to_fastqs_wf.bam_to_fastqs { input: bam = selected_bam, @@ -80,8 +78,7 @@ workflow dnaseq_standard_experimental { call dnaseq_core_wf.dnaseq_core_experimental { input: read_one_fastqs_gz = bam_to_fastqs.read1s, - read_two_fastqs_gz = select_all(bam_to_fastqs.read2s - ), + read_two_fastqs_gz = select_all(bam_to_fastqs.read2s), bwa_db, reads_per_file, read_groups = get_read_groups.read_groups, diff --git a/workflows/general/alignment-post.wdl b/workflows/general/alignment-post.wdl index 4a92e6cb..ad01b5c9 100644 --- a/workflows/general/alignment-post.wdl +++ b/workflows/general/alignment-post.wdl @@ -4,8 +4,7 @@ import "../../tools/md5sum.wdl" import "../../tools/picard.wdl" import "../../tools/samtools.wdl" #@ except: LineWidth -import "https://raw.githubusercontent.com/stjude/XenoCP/4.0.0-alpha/wdl/workflows/xenocp.wdl" - as xenocp_wf +import "https://raw.githubusercontent.com/stjude/XenoCP/4.0.0-alpha/wdl/workflows/xenocp.wdl" as xenocp_wf workflow alignment_post { meta { @@ -64,8 +63,7 @@ workflow alignment_post { } } if (mark_duplicates) { - call picard.mark_duplicates as picard_markdup { input: - bam = select_first([ + call picard.mark_duplicates as picard_markdup { input: bam = select_first([ xenocp.bam, picard_sort.sorted_bam, ]), } @@ -84,8 +82,7 @@ workflow alignment_post { File aligned_bam_index = samtools_index.bam_index call picard.validate_bam { input: bam = aligned_bam } - call md5sum.compute_checksum { input: file = aligned_bam - } + call md5sum.compute_checksum { input: file = aligned_bam } output { File processed_bam = aligned_bam diff --git a/workflows/general/bam-to-fastqs.wdl b/workflows/general/bam-to-fastqs.wdl index e244c59e..3150068c 100644 --- a/workflows/general/bam-to-fastqs.wdl +++ b/workflows/general/bam-to-fastqs.wdl @@ -42,8 +42,7 @@ workflow bam_to_fastqs { } } - scatter (reads in zip(bam_to_fastq.read_one_fastq_gz, bam_to_fastq.read_two_fastq_gz - )) { + scatter (reads in zip(bam_to_fastq.read_one_fastq_gz, bam_to_fastq.read_two_fastq_gz)) { call fq.fqlint { input: read_one_fastq = select_first([ reads.left, @@ -54,8 +53,7 @@ workflow bam_to_fastqs { } output { - Array[File] read1s = select_all(bam_to_fastq.read_one_fastq_gz - ) + Array[File] read1s = select_all(bam_to_fastq.read_one_fastq_gz) Array[File?] read2s = bam_to_fastq.read_two_fastq_gz } } diff --git a/workflows/general/samtools-merge.wdl b/workflows/general/samtools-merge.wdl index 05bb6667..a633a747 100644 --- a/workflows/general/samtools-merge.wdl +++ b/workflows/general/samtools-merge.wdl @@ -32,12 +32,10 @@ workflow samtools_merge { if (bam_length > max_length) { # Find the number of merges required - scatter (merge_num in range((bam_length / max_length - ) + 1)) { + scatter (merge_num in range((bam_length / max_length) + 1)) { # Get the sublist of bams scatter (bam_num in range(max_length)) { - Int num = (if merge_num > 0 then bam_num + ( - merge_num * max_length) else bam_num) + Int num = (if merge_num > 0 then bam_num + (merge_num * max_length) else bam_num) if (num < bam_length) { File bam_list = bams[num] } diff --git a/workflows/methylation/methylation-cohort.wdl b/workflows/methylation/methylation-cohort.wdl index 5945d06a..e02fe7b3 100644 --- a/workflows/methylation/methylation-cohort.wdl +++ b/workflows/methylation/methylation-cohort.wdl @@ -26,34 +26,28 @@ workflow methylation_cohort { Int beta_length = length(unfiltered_normalized_beta) if (beta_length > max_length) { - scatter (merge_num in range((beta_length / max_length - ) + 1)) { + scatter (merge_num in range((beta_length / max_length) + 1)) { # Get the sublist of beta files scatter (beta_num in range(max_length)) { - Int num = (if merge_num > 0 then beta_num + ( - merge_num * max_length) else beta_num) + Int num = (if merge_num > 0 then beta_num + (merge_num * max_length) else beta_num) if (num < beta_length) { - File bam_list = unfiltered_normalized_beta[ - num] + File bam_list = unfiltered_normalized_beta[num] } } } scatter (iter_index in range(length(bam_list))) { call combine_data as inner_merge { input: - unfiltered_normalized_beta = select_all( - bam_list[iter_index]), + unfiltered_normalized_beta = select_all(bam_list[iter_index]), combined_file_name = "~{iter_index}.combined.csv", modify_memory_gb = 25, } } - call combine_data as final_merge { input: unfiltered_normalized_beta - = inner_merge.combined_beta, } + call combine_data as final_merge { input: unfiltered_normalized_beta = inner_merge.combined_beta, } } if (beta_length <= max_length) { - call combine_data as simple_merge { input: unfiltered_normalized_beta, - } + call combine_data as simple_merge { input: unfiltered_normalized_beta, } } call filter_probes { input: @@ -64,8 +58,7 @@ workflow methylation_cohort { num_probes, } - call generate_umap { input: filtered_beta_values = filter_probes.filtered_beta_values, - } + call generate_umap { input: filtered_beta_values = filter_probes.filtered_beta_values, } call plot_umap { input: umap = generate_umap.umap, } @@ -101,10 +94,8 @@ task combine_data { Int modify_memory_gb = 0 } - Int memory_gb = ceil(size(unfiltered_normalized_beta, "GiB" - )) + modify_memory_gb + 2 - Int disk_size_gb = ceil(size(unfiltered_normalized_beta, - "GiB") * 2) + 2 + Int memory_gb = ceil(size(unfiltered_normalized_beta, "GiB")) + modify_memory_gb + 2 + Int disk_size_gb = ceil(size(unfiltered_normalized_beta, "GiB") * 2) + 2 command <<< python $(which combine.py) \ @@ -189,8 +180,7 @@ task generate_umap { String prefix = "umap" } - Int disk_size_gb = ceil(size(filtered_beta_values, "GiB" - ) * 2) + 2 + Int disk_size_gb = ceil(size(filtered_beta_values, "GiB") * 2) + 2 command <<< python $(which generate_umap.py) \ @@ -230,8 +220,7 @@ task plot_umap { } command <<< - python $(which plot_umap.py) --umap ~{umap} --output-name ~{ - plot_file} + python $(which plot_umap.py) --umap ~{umap} --output-name ~{plot_file} >>> output { diff --git a/workflows/methylation/methylation-standard.wdl b/workflows/methylation/methylation-standard.wdl index 7e1beee6..9b974ce2 100644 --- a/workflows/methylation/methylation-standard.wdl +++ b/workflows/methylation/methylation-standard.wdl @@ -29,13 +29,10 @@ workflow methylation { } scatter (pair in zip(green_idats, red_idats)) { - call preprocess.process_raw_idats { input: idats = pair - } + call preprocess.process_raw_idats { input: idats = pair } } - call cohort.methylation_cohort { input: unfiltered_normalized_beta - = process_raw_idats.beta_swan_norm_unfiltered_genomic, - } + call cohort.methylation_cohort { input: unfiltered_normalized_beta = process_raw_idats.beta_swan_norm_unfiltered_genomic, } output { Array[File] beta_swan_norm_unfiltered_genomic = process_raw_idats.beta_swan_norm_unfiltered_genomic diff --git a/workflows/qc/markdups-post.wdl b/workflows/qc/markdups-post.wdl index 567a08e3..390ed11f 100644 --- a/workflows/qc/markdups-post.wdl +++ b/workflows/qc/markdups-post.wdl @@ -58,8 +58,7 @@ workflow markdups_post { bam_index = markdups_bam_index, prefix = prefix + "." + "whole_genome", } - scatter (coverage_pair in zip(coverage_beds, coverage_labels - )) { + scatter (coverage_pair in zip(coverage_beds, coverage_labels)) { call mosdepth.coverage as regions_coverage { input: bam = markdups_bam, bam_index = markdups_bam_index, diff --git a/workflows/qc/quality-check-standard.wdl b/workflows/qc/quality-check-standard.wdl index 4ad2a72e..cfcd9d16 100644 --- a/workflows/qc/quality-check-standard.wdl +++ b/workflows/qc/quality-check-standard.wdl @@ -144,21 +144,16 @@ workflow quality_check_standard { coverage_beds_len = length(coverage_beds), coverage_labels, } - call flag_filter.validate_flag_filter as kraken_filter_validator { - input: flags = standard_filter } + call flag_filter.validate_flag_filter as kraken_filter_validator { input: flags = standard_filter } if (run_comparative_kraken) { - call flag_filter.validate_flag_filter as comparative_kraken_filter_validator { - input: flags = comparative_filter } + call flag_filter.validate_flag_filter as comparative_kraken_filter_validator { input: flags = comparative_filter } } - call md5sum.compute_checksum after parse_input { input: file - = bam } + call md5sum.compute_checksum after parse_input { input: file = bam } - call samtools.quickcheck after parse_input { input: bam - } + call samtools.quickcheck after parse_input { input: bam } #@ except: UnusedCall - call util.compression_integrity after parse_input { input: - bgzipped_file = bam } + call util.compression_integrity after parse_input { input: bgzipped_file = bam } if (subsample_n_reads > 0) { call samtools.subsample after quickcheck { input: @@ -187,8 +182,7 @@ workflow quality_check_standard { subsample_index.bam_index, bam_index, ]) - String post_subsample_prefix = (if (defined(subsample.sampled_bam - )) then prefix + ".subsampled" else prefix) + String post_subsample_prefix = (if (defined(subsample.sampled_bam)) then prefix + ".subsampled" else prefix) call picard.validate_bam after quickcheck { input: bam = post_subsample_bam, @@ -198,13 +192,11 @@ workflow quality_check_standard { summary_mode = true, } - call picard.collect_alignment_summary_metrics after quickcheck { - input: + call picard.collect_alignment_summary_metrics after quickcheck { input: bam = post_subsample_bam, prefix = post_subsample_prefix + ".CollectAlignmentSummaryMetrics", } - call picard.quality_score_distribution after quickcheck { - input: + call picard.quality_score_distribution after quickcheck { input: bam = post_subsample_bam, prefix = post_subsample_prefix + ".QualityScoreDistribution", } @@ -239,8 +231,7 @@ workflow quality_check_standard { prefix = post_subsample_prefix, } - call samtools.bam_to_fastq after quickcheck after kraken_filter_validator { - input: + call samtools.bam_to_fastq after quickcheck after kraken_filter_validator { input: bam = post_subsample_bam, bitwise_filter = standard_filter, prefix = post_subsample_prefix, @@ -281,17 +272,14 @@ workflow quality_check_standard { use_all_cores = use_all_cores, } if (run_librarian) { - call libraran_tasks.librarian after fqlint { input: read_one_fastq - = select_first([ + call libraran_tasks.librarian after fqlint { input: read_one_fastq = select_first([ bam_to_fastq.read_one_fastq_gz, "undefined", ]), } } if (run_comparative_kraken) { - call samtools.bam_to_fastq as alt_filtered_fastq - after quickcheck after comparative_kraken_filter_validator { - input: + call samtools.bam_to_fastq as alt_filtered_fastq after quickcheck after comparative_kraken_filter_validator { input: bam = post_subsample_bam, bitwise_filter = comparative_filter, prefix = post_subsample_prefix + ".alt_filtered", @@ -318,8 +306,7 @@ workflow quality_check_standard { "undefined", ]), } - call kraken2.kraken as comparative_kraken after alt_filtered_fqlint { - input: + call kraken2.kraken as comparative_kraken after alt_filtered_fqlint { input: read_one_fastq_gz = select_first([ alt_filtered_fastq.read_one_fastq_gz, "undefined", @@ -335,16 +322,13 @@ workflow quality_check_standard { } } - call mosdepth.coverage as wg_coverage after quickcheck { - input: + call mosdepth.coverage as wg_coverage after quickcheck { input: bam = post_subsample_bam, bam_index = post_subsample_bam_index, prefix = post_subsample_prefix + ".whole_genome", } - scatter (coverage_pair in zip(coverage_beds, parse_input.labels - )) { - call mosdepth.coverage as regions_coverage after quickcheck { - input: + scatter (coverage_pair in zip(coverage_beds, parse_input.labels)) { + call mosdepth.coverage as regions_coverage after quickcheck { input: bam = post_subsample_bam, bam_index = post_subsample_bam_index, coverage_bed = coverage_pair.left, @@ -353,8 +337,7 @@ workflow quality_check_standard { } if (rna) { - call ngsderive.junction_annotation after quickcheck { - input: + call ngsderive.junction_annotation after quickcheck { input: bam = post_subsample_bam, bam_index = post_subsample_bam_index, gene_model = select_first([ @@ -387,8 +370,7 @@ workflow quality_check_standard { } } if (mark_duplicates) { - call picard.mark_duplicates as markdups after quickcheck { - input: + call picard.mark_duplicates as markdups after quickcheck { input: bam = post_subsample_bam, create_bam = true, prefix = post_subsample_prefix + ".MarkDuplicates", @@ -414,8 +396,7 @@ workflow quality_check_standard { if (!mark_duplicates) { # These analyses are called in the markdups_post workflow. # They should still be run if duplicates were not marked. - call picard.collect_insert_size_metrics after quickcheck { - input: + call picard.collect_insert_size_metrics after quickcheck { input: bam = post_subsample_bam, prefix = post_subsample_prefix + ".CollectInsertSizeMetrics", } @@ -464,8 +445,7 @@ workflow quality_check_standard { markdups_post.mosdepth_region_dist, [], ]), - (if (mark_duplicates && optical_distance > 0) - then [ + (if (mark_duplicates && optical_distance > 0) then [ markdups.mark_duplicates_metrics, ] else []), extra_multiqc_inputs, @@ -519,8 +499,7 @@ workflow quality_check_standard { File kraken_report = kraken.report File mosdepth_global_dist = wg_coverage.global_dist File mosdepth_global_summary = wg_coverage.summary - Array[File] mosdepth_region_dist = select_all( - regions_coverage.region_dist) + Array[File] mosdepth_region_dist = select_all(regions_coverage.region_dist) Array[File] mosdepth_region_summary = regions_coverage.summary File multiqc_report = multiqc.multiqc_report File? orig_read_count = subsample.orig_read_count @@ -577,8 +556,7 @@ task parse_input { for (( i=1; i<=~{coverage_beds_len}; i++ )); do echo regions$i >> labels.txt done - elif [ "~{coverage_labels_len}" != "~{ - coverage_beds_len}" ]; then + elif [ "~{coverage_labels_len}" != "~{coverage_beds_len}" ]; then >&2 echo "Unequal amount of coverage BEDs and coverage labels." >&2 echo "If no labels are provided, generic labels will be created." >&2 echo "Otherwise the exact same amount must be supplied." diff --git a/workflows/reference/bwa-db-build.wdl b/workflows/reference/bwa-db-build.wdl index c953f45a..22962933 100644 --- a/workflows/reference/bwa-db-build.wdl +++ b/workflows/reference/bwa-db-build.wdl @@ -35,8 +35,7 @@ workflow bwa_db_build { disk_size_gb = reference_fa_disk_size_gb, md5sum = reference_fa_md5, } - call bwa.build_bwa_db { input: reference_fasta = reference_download.downloaded_file, - } + call bwa.build_bwa_db { input: reference_fasta = reference_download.downloaded_file, } output { File reference_fa = reference_download.downloaded_file diff --git a/workflows/reference/gatk-reference.wdl b/workflows/reference/gatk-reference.wdl index 14546ba2..83e4bbd6 100644 --- a/workflows/reference/gatk-reference.wdl +++ b/workflows/reference/gatk-reference.wdl @@ -72,11 +72,9 @@ workflow gatk_reference { disk_size_gb = reference_fa_disk_size_gb, } - call samtools.faidx { input: fasta = fasta_download.downloaded_file, - } + call samtools.faidx { input: fasta = fasta_download.downloaded_file, } - call picard.create_sequence_dictionary { input: fasta = fasta_download.downloaded_file, - } + call picard.create_sequence_dictionary { input: fasta = fasta_download.downloaded_file, } call util.download as dbsnp { input: url = dbSNP_vcf_url, @@ -84,8 +82,7 @@ workflow gatk_reference { disk_size_gb = dbSNP_vcf_disk_size_gb, } - if (defined(dbSNP_vcf_index_url) && defined( - dbSNP_vcf_index_name)) { + if (defined(dbSNP_vcf_index_url) && defined(dbSNP_vcf_index_name)) { call util.download as dbsnp_index { input: url = select_first([ dbSNP_vcf_index_url, @@ -99,8 +96,7 @@ workflow gatk_reference { } } - if (defined(interval_list_url) && defined( - interval_list_name)) { + if (defined(interval_list_url) && defined(interval_list_name)) { call util.download as intervals { input: url = select_first([ interval_list_url, diff --git a/workflows/rnaseq/ESTIMATE.wdl b/workflows/rnaseq/ESTIMATE.wdl index dfb570c8..af1ff91b 100644 --- a/workflows/rnaseq/ESTIMATE.wdl +++ b/workflows/rnaseq/ESTIMATE.wdl @@ -29,8 +29,7 @@ workflow estimate { counts = counts_file, gene_lengths = gene_lengths_file, } - call estimate.run_estimate { input: gene_expression_file - = calc_tpm.tpm_file, } + call estimate.run_estimate { input: gene_expression_file = calc_tpm.tpm_file, } output { File tpm = calc_tpm.tpm_file diff --git a/workflows/rnaseq/rnaseq-core.wdl b/workflows/rnaseq/rnaseq-core.wdl index e4273b21..a2558855 100644 --- a/workflows/rnaseq/rnaseq-core.wdl +++ b/workflows/rnaseq/rnaseq-core.wdl @@ -121,8 +121,7 @@ workflow rnaseq_core { String read_groups String prefix File? contaminant_db - SpliceJunctionMotifs align_sj_stitch_mismatch_n_max - = SpliceJunctionMotifs { + SpliceJunctionMotifs align_sj_stitch_mismatch_n_max = SpliceJunctionMotifs { noncanonical_motifs: 5, GT_AG_and_CT_AC_motif: -1, GC_AG_and_CT_GC_motif: 5, @@ -195,18 +194,13 @@ workflow rnaseq_core { gene_model = gtf, } - String htseq_strandedness = (if (provided_strandedness - != "") then htseq_strandedness_map[ - provided_strandedness] else htseq_strandedness_map[ - ngsderive_strandedness.strandedness_string]) + String htseq_strandedness = (if (provided_strandedness != "") then htseq_strandedness_map[provided_strandedness] else htseq_strandedness_map[ngsderive_strandedness.strandedness_string]) call htseq.count as htseq_count { input: bam = alignment_post.processed_bam, gtf, strandedness = htseq_strandedness, - prefix = basename(alignment_post.processed_bam, "bam" - ) + (if provided_strandedness == "" then ngsderive_strandedness.strandedness_string - else provided_strandedness), + prefix = basename(alignment_post.processed_bam, "bam") + (if provided_strandedness == "" then ngsderive_strandedness.strandedness_string else provided_strandedness), pos_sorted = true, } diff --git a/workflows/rnaseq/rnaseq-standard-fastq.wdl b/workflows/rnaseq/rnaseq-standard-fastq.wdl index 6a1e6002..79a1a2b3 100644 --- a/workflows/rnaseq/rnaseq-standard-fastq.wdl +++ b/workflows/rnaseq/rnaseq-standard-fastq.wdl @@ -115,15 +115,12 @@ workflow rnaseq_standard_fastq { } scatter (rg in read_groups) { - call read_group.read_group_to_string after parse_input { - input: read_group = rg } + call read_group.read_group_to_string after parse_input { input: read_group = rg } } - String stringified_read_groups = sep(" , ", read_group_to_string.stringified_read_group - ) + String stringified_read_groups = sep(" , ", read_group_to_string.stringified_read_group) if (validate_input) { - scatter (reads in zip(read_one_fastqs_gz, read_two_fastqs_gz - )) { + scatter (reads in zip(read_one_fastqs_gz, read_two_fastqs_gz)) { call fq.fqlint { input: read_one_fastq = reads.left, read_two_fastq = reads.right, @@ -132,10 +129,8 @@ workflow rnaseq_standard_fastq { } if (subsample_n_reads > 0) { - Int reads_per_pair = ceil(subsample_n_reads / length( - read_one_fastqs_gz)) - scatter (reads in zip(read_one_fastqs_gz, read_two_fastqs_gz - )) { + Int reads_per_pair = ceil(subsample_n_reads / length(read_one_fastqs_gz)) + scatter (reads in zip(read_one_fastqs_gz, read_two_fastqs_gz)) { call fq.subsample after parse_input { input: read_one_fastq = reads.left, read_two_fastq = reads.right, @@ -147,8 +142,7 @@ workflow rnaseq_standard_fastq { subsample.subsampled_read1, read_one_fastqs_gz, ]) - Array[File] selected_read_two_fastqs = select_all( - select_first([ + Array[File] selected_read_two_fastqs = select_all(select_first([ subsample.subsampled_read2, read_two_fastqs_gz, ])) diff --git a/workflows/rnaseq/rnaseq-standard.wdl b/workflows/rnaseq/rnaseq-standard.wdl index 5af49a49..06abcc4c 100755 --- a/workflows/rnaseq/rnaseq-standard.wdl +++ b/workflows/rnaseq/rnaseq-standard.wdl @@ -76,8 +76,7 @@ workflow rnaseq_standard { } if (validate_input) { - call picard.validate_bam as validate_input_bam after parse_input { - input: bam, } + call picard.validate_bam as validate_input_bam after parse_input { input: bam, } } if (subsample_n_reads > 0) { @@ -104,8 +103,7 @@ workflow rnaseq_standard { call rnaseq_core_wf.rnaseq_core { input: read_one_fastqs_gz = bam_to_fastqs.read1s, - read_two_fastqs_gz = select_all(bam_to_fastqs.read2s - ), + read_two_fastqs_gz = select_all(bam_to_fastqs.read2s), # format_for_star=true in get_read_groups puts # all found RG info in read_groups[0] read_groups = get_read_groups.read_groups[0], diff --git a/workflows/rnaseq/rnaseq-variant-calling.wdl b/workflows/rnaseq/rnaseq-variant-calling.wdl index 91b9cbec..aa6e6a86 100644 --- a/workflows/rnaseq/rnaseq-variant-calling.wdl +++ b/workflows/rnaseq/rnaseq-variant-calling.wdl @@ -95,8 +95,7 @@ workflow rnaseq_variant_calling { scatter_count, } - scatter (list in scatter_interval_list.interval_lists_scatter - ) { + scatter (list in scatter_interval_list.interval_lists_scatter) { call gatk.haplotype_caller { input: bam = apply_bqsr.recalibrated_bam, bam_index = apply_bqsr.recalibrated_bam_index,