From 912ff434fe55008d502d2b9c6c571e65b5c16fb8 Mon Sep 17 00:00:00 2001 From: Raquel Manzano Date: Thu, 24 Aug 2023 10:32:35 +0100 Subject: [PATCH] Structured config files into sub-folders for easy access. Removed redundant code and moved subworkflows to correspondent folders. Fixes in rnadnavar.nf workflow and input check. Removed dependency in schema_input.json for lane. Added modules and subwrokflows the proper way. --- assets/schema_input.json | 1 - .../alignment/alignment_to_fastq.config | 87 +++ .../{bam_align => alignment}/bam_align.config | 114 ++- conf/modules/annotate/annotate.config | 69 ++ conf/modules/consensus/normalise.config | 58 ++ conf/modules/consensus/vcf_consensus.config | 54 ++ conf/modules/filtering/maf_filtering.config | 81 +++ .../gatk4_preprocessing/markduplicates.config | 124 ++++ .../prepare_recalibration.config | 38 + .../gatk4_preprocessing/recalibrate.config | 61 ++ .../splitncigarreads.config | 68 ++ .../prepare_resources/prepare_cache.config | 27 + .../prepare_genome.config | 47 +- .../prepare_intervals.config | 21 +- .../quality_control/quality_control.config | 122 ++++ conf/modules/quality_control/trimming.config | 42 ++ conf/modules/variant_calling/freebayes.config | 79 ++ conf/modules/variant_calling/manta.config | 28 + conf/modules/variant_calling/mutect2.config | 128 ++++ conf/modules/variant_calling/strelka.config | 56 ++ lib/WorkflowRnadnavar.groovy | 39 +- modules.json | 5 - modules/nf-core/dragmap/align/main.nf | 46 ++ modules/nf-core/dragmap/align/meta.yml | 47 ++ modules/nf-core/dragmap/hashtable/main.nf | 36 + modules/nf-core/dragmap/hashtable/meta.yml | 40 + modules/nf-core/ensemblvep/download/main.nf | 45 ++ modules/nf-core/ensemblvep/download/meta.yml | 43 ++ nextflow.config | 171 +++-- nextflow_schema.json | 76 +- subworkflows/local/bam_align/main.nf | 18 +- .../main.nf} | 2 +- .../main.nf | 7 +- subworkflows/local/core_workflow_pass.nf | 157 ---- subworkflows/local/prepare_intervals/main.nf | 5 + .../local/prepare_reference_and_intervals.nf | 14 - .../nf-core/bam_markduplicates_picard/main.nf | 52 ++ .../bam_markduplicates_picard/meta.yml | 62 ++ subworkflows/nf-core/bam_qc_picard/main.nf | 45 ++ subworkflows/nf-core/bam_qc_picard/meta.yml | 84 +++ .../nf-core/bam_sort_stats_samtools/main.nf | 50 ++ .../nf-core/bam_sort_stats_samtools/meta.yml | 67 ++ .../nf-core/bam_stats_samtools/main.nf | 32 + .../nf-core/bam_stats_samtools/meta.yml | 41 ++ .../main.nf | 139 ++++ .../meta.yml | 116 +++ subworkflows/nf-core/fastq_align_bwa/main.nf | 43 ++ subworkflows/nf-core/fastq_align_bwa/meta.yml | 72 ++ .../nf-core/fastq_align_hisat2/main.nf | 44 ++ .../nf-core/fastq_align_hisat2/meta.yml | 89 +++ subworkflows/nf-core/fastq_align_star/main.nf | 49 ++ .../nf-core/fastq_align_star/meta.yml | 108 +++ workflows/rnadnavar.nf | 681 ++++++------------ 53 files changed, 3060 insertions(+), 770 deletions(-) create mode 100644 conf/modules/alignment/alignment_to_fastq.config rename conf/modules/{bam_align => alignment}/bam_align.config (56%) create mode 100644 conf/modules/annotate/annotate.config create mode 100644 conf/modules/consensus/normalise.config create mode 100644 conf/modules/consensus/vcf_consensus.config create mode 100644 conf/modules/filtering/maf_filtering.config create mode 100644 conf/modules/gatk4_preprocessing/markduplicates.config create mode 100644 conf/modules/gatk4_preprocessing/prepare_recalibration.config create mode 100644 conf/modules/gatk4_preprocessing/recalibrate.config create mode 100644 conf/modules/gatk4_preprocessing/splitncigarreads.config create mode 100644 conf/modules/prepare_resources/prepare_cache.config rename conf/modules/{prepare_genome_and_intervals => prepare_resources}/prepare_genome.config (77%) rename conf/modules/{prepare_genome_and_intervals => prepare_resources}/prepare_intervals.config (72%) create mode 100644 conf/modules/quality_control/quality_control.config create mode 100644 conf/modules/quality_control/trimming.config create mode 100644 conf/modules/variant_calling/freebayes.config create mode 100644 conf/modules/variant_calling/manta.config create mode 100644 conf/modules/variant_calling/mutect2.config create mode 100644 conf/modules/variant_calling/strelka.config create mode 100644 modules/nf-core/dragmap/align/main.nf create mode 100644 modules/nf-core/dragmap/align/meta.yml create mode 100644 modules/nf-core/dragmap/hashtable/main.nf create mode 100644 modules/nf-core/dragmap/hashtable/meta.yml create mode 100644 modules/nf-core/ensemblvep/download/main.nf create mode 100644 modules/nf-core/ensemblvep/download/meta.yml rename subworkflows/local/{gatk_preprocessing.nf => bam_gatk_preprocessing/main.nf} (99%) delete mode 100644 subworkflows/local/core_workflow_pass.nf create mode 100644 subworkflows/nf-core/bam_markduplicates_picard/main.nf create mode 100644 subworkflows/nf-core/bam_markduplicates_picard/meta.yml create mode 100644 subworkflows/nf-core/bam_qc_picard/main.nf create mode 100644 subworkflows/nf-core/bam_qc_picard/meta.yml create mode 100644 subworkflows/nf-core/bam_sort_stats_samtools/main.nf create mode 100644 subworkflows/nf-core/bam_sort_stats_samtools/meta.yml create mode 100644 subworkflows/nf-core/bam_stats_samtools/main.nf create mode 100644 subworkflows/nf-core/bam_stats_samtools/meta.yml create mode 100644 subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/main.nf create mode 100644 subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/meta.yml create mode 100644 subworkflows/nf-core/fastq_align_bwa/main.nf create mode 100644 subworkflows/nf-core/fastq_align_bwa/meta.yml create mode 100644 subworkflows/nf-core/fastq_align_hisat2/main.nf create mode 100644 subworkflows/nf-core/fastq_align_hisat2/meta.yml create mode 100644 subworkflows/nf-core/fastq_align_star/main.nf create mode 100644 subworkflows/nf-core/fastq_align_star/meta.yml diff --git a/assets/schema_input.json b/assets/schema_input.json index 2775191..6b8708f 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -31,7 +31,6 @@ "type": "string", "pattern": "^\\S+$", "unique": ["patient", "sample"], - "dependentRequired": ["fastq_1"], "meta": ["lane"] }, "fastq_1": { diff --git a/conf/modules/alignment/alignment_to_fastq.config b/conf/modules/alignment/alignment_to_fastq.config new file mode 100644 index 0000000..30afa67 --- /dev/null +++ b/conf/modules/alignment/alignment_to_fastq.config @@ -0,0 +1,87 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// BAM TO FASTQ + +process { // alignment_to_fastq + + withName: 'COLLATE_FASTQ_MAP' { + ext.args2 = '-N' + ext.prefix = {"${meta.id}.mapped"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'COLLATE_FASTQ_UNMAP' { + ext.args2 = '-N' + ext.prefix = {"${meta.id}.unmapped"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_MAP_MAP' { + ext.args = '-b -f1 -F12' + ext.prefix = {"${meta.id}.map_map"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_MAP_UNMAP' { + ext.args = '-b -f8 -F260' + ext.prefix = {"${meta.id}.map_unmap"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_UNMAP_MAP' { + ext.args = '-b -f4 -F264' + ext.prefix = {"${meta.id}.unmap_map"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_UNMAP_UNMAP' { + ext.args = '-b -f12 -F256' + ext.prefix = {"${meta.id}.unmap_unmap"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_MERGE_UNMAP' { + ext.prefix = {"${meta.id}.merged_unmap"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + withName: 'CAT_FASTQ' { + publishDir = [ + enabled: params.save_split_fastqs, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/bed" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} \ No newline at end of file diff --git a/conf/modules/bam_align/bam_align.config b/conf/modules/alignment/bam_align.config similarity index 56% rename from conf/modules/bam_align/bam_align.config rename to conf/modules/alignment/bam_align.config index 2888658..602651e 100644 --- a/conf/modules/bam_align/bam_align.config +++ b/conf/modules/alignment/bam_align.config @@ -12,7 +12,7 @@ // BAM_ALIGN config -process { +process { // bam_align if (params.step == 'mapping'){ @@ -50,22 +50,22 @@ process { else { null } } ] - } + } withName: "(BWAMEM.*_MEM|DRAGMAP_ALIGN)" { // Markduplicates Spark NEEDS name-sorted reads or runtime goes through the roof // However if it's skipped, reads need to be coordinate-sorted // Only name sort if Spark for Markduplicates + duplicate marking is not skipped ext.args2 = { (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('markduplicates'))) ? '-n' : '' } - } + } - withName: "BWAMEM.*_MEM|SENTIEON_BWAMEM" { + withName: "BWAMEM.*_MEM|SENTIEON_BWAMEM" { // Using -B 3 for tumor samples ext.args = { meta.status == 1 ? "-K 100000000 -Y -B 3 -R ${meta.read_group}" : "-K 100000000 -Y -R ${meta.read_group}" } - } } + withName: 'MERGE_BAM|INDEX_MERGE_BAM' { publishDir = [ mode: params.publish_dir_mode, @@ -95,9 +95,9 @@ process { ] } - withName: STAR_ALIGN { + withName: 'STAR_ALIGN' { ext.args = [ - '--outSAMtype BAM SortedByCoordinate', + '--outSAMtype BAM Unsorted', '--readFilesCommand zcat', '--outFilterMultimapScoreRange 1', '--outFilterMultimapNmax 20', @@ -113,11 +113,9 @@ process { params.read_length ? "--sjdbOverhang ${params.read_length - 1}" : '', params.star_twopass ? '--twopassMode Basic' : '', params.star_max_memory_bamsort > 0 ? "--limitBAMsortRAM ${params.star_max_memory_bamsort}" : "", - params.star_bins_bamsort > 0 ? "--outBAMsortingBinsN ${params.star_bins_bamsort}" : "", params.star_max_collapsed_junc > 0 ? "--limitOutSJcollapsed ${params.star_max_collapsed_junc}" : "" - ].join(' ').trim() - ext.args2 = { "--outSAMattrRGline ${meta.read_group}" } - ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "" } + ].flatten().unique(false).join(' ').trim() + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).baseName.tokenize('.')[0]) : "" } publishDir = [ [ path: { "${params.outdir}/reports/star/${meta.patient}/${meta.id}/" }, @@ -165,10 +163,102 @@ process { ] } - } + // POST ALIGNMENT AND PREPROCESSING BAM TODO: check if it follows new pattern + withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('') : "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/preprocessing/" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + saveAs: { (params.save_bam_mapped || (params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) && (meta.size * meta.numLanes == 1) ? "mapped/${meta.patient}/${meta.id}/${it}" : null } + + ] + } + withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_FLAGSTAT' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]) : "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/reports/samtools/${meta.patient}/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.flagstat", + enabled: params.save_align_intermeds + ] + } + + + withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_STATS' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(input.name.tokenize('.')[1]) : "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/reports/samtools/${meta.patient}/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.stats", + enabled: params.save_align_intermeds + ] + } + + withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_IDXSTATS' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(input.name.tokenize('.')[1]) : "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/reports/samtools/${meta.patient}/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.idxstats", + enabled: params.save_align_intermeds + ] + } + withName: '.*:FASTQ_ALIGN_STAR:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + ext.args = params.bam_csi_index ? '-c' : '' + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.aligned') : "${meta.id}.aligned" } + publishDir = [ + path: { "${params.outdir}/preprocessing/" }, + mode: params.publish_dir_mode, + pattern: "*.{bai,csi}", + saveAs: { (params.save_bam_mapped || (params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) && (meta.size * meta.numLanes == 1) ? "mapped/${meta.patient}/${meta.id}/${it}" : null } + ] + } + } + // Second run alignment + if (params.skip_tools && !params.skip_tools.split(',').contains('second_run')){ + withName: '.*:FASTQ_ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.aligned_hs2') : "${meta.id}.aligned_hs2" } + publishDir = [ + path: { "${params.outdir}/preprocessing/hisat2/${meta.patient}/${meta.id}/" }, + mode: params.publish_dir_mode, + pattern: "*.bam", + enabled: params.save_align_intermeds + ] + } + withName: '.*:FASTQ_ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + ext.args = params.bam_csi_index ? '-c' : '' + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.aligned_hs2') : "${meta.id}.aligned_hs2" } + publishDir = [ + path: { "${params.outdir}/preprocessing/hisat2/${meta.patient}/${meta.id}/" }, + mode: params.publish_dir_mode, + pattern: "*.{bai,csi}", + enabled: params.save_align_intermeds + ] + } + + withName: '.*:FASTQ_ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_FLAGSTAT' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(bam.name.tokenize('.')[1]).concat('.aligned_hs2') : "${meta.id}.aligned_hs2" } + publishDir = [ + path: { "${params.outdir}/reports/samtools/${meta.patient}/${meta.id}/" }, + mode: params.publish_dir_mode, + pattern: "*.{bai,csi}", + enabled: params.save_align_intermeds + ] + } + withName: '.*:ALIGN_HISAT2:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_IDXSTATS' { + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(input.name.tokenize('.aligned_hs2')[1]) : "${meta.id}.aligned_hs2" } + publishDir = [ + path: { "${params.outdir}/reports/samtools/${meta.patient}/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.idxstats", + enabled: params.save_align_intermeds + ] + } + } } \ No newline at end of file diff --git a/conf/modules/annotate/annotate.config b/conf/modules/annotate/annotate.config new file mode 100644 index 0000000..83a22a3 --- /dev/null +++ b/conf/modules/annotate/annotate.config @@ -0,0 +1,69 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// ANNOTATE + +process { // annotate + + // VEP TODO: is vep_custom_args working?? + if (params.tools && params.tools.split(',').contains('vep')) { + withName: 'ENSEMBLVEP_VEP' { + ext.args = { [ + (params.vep_dbnsfp && params.dbnsfp && !params.dbnsfp_consequence) ? "--plugin dbNSFP,${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', + (params.vep_dbnsfp && params.dbnsfp && params.dbnsfp_consequence) ? "--plugin dbNSFP,'consequence=${params.dbnsfp_consequence}',${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', + (params.vep_loftee) ? "--plugin LoF,loftee_path:/opt/conda/envs/nf-core-vep-${params.vep_version}/share/ensembl-vep-${params.vep_version}-0" : '', + (params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? "--plugin SpliceAI,snv=${params.spliceai_snv.split("/")[-1]},indel=${params.spliceai_indel.split("/")[-1]}" : '', + (params.vep_spliceregion) ? '--plugin SpliceRegion' : '', + (params.vep_out_format) ? "--${params.vep_out_format}" : '--vcf', + (params.vep_custom_args) ?: '' + ].join(' ').trim() } + // If just VEP: _VEP.ann.vcf + ext.prefix = { vcf.baseName - ".vcf" + "_VEP.ann" } + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/EnsemblVEP/${meta.variantcaller}/${meta.id}/" }, + pattern: "*html" + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz}" + ] + ] + } + } + + // ALL ANNOTATION TOOLS + if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) { + withName: "NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:.*:(TABIX_BGZIPTABIX|TABIX_TABIX)" { + ext.prefix = { input.name - ".vcf" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz.tbi}" + ] + } + } + + if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge'))) { + withName: 'NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:VCF_ANNOTATE_SNPEFF:TABIX_BGZIPTABIX' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz,gz.tbi}", + saveAs: { params.tools.split(',').contains('snpeff') ? it : null } + ] + } + } +} \ No newline at end of file diff --git a/conf/modules/consensus/normalise.config b/conf/modules/consensus/normalise.config new file mode 100644 index 0000000..85ba365 --- /dev/null +++ b/conf/modules/consensus/normalise.config @@ -0,0 +1,58 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// NORMALISE + +process { // normalise + + if (params.skip_toos && params.skip_toos.split(',').contains('normalise')) { + // VT + // TODO: stats are not going to the report dir - no idea why + withName: 'VT_DECOMPOSE'{ + ext.args = "" + ext.prefix = { "${vcf.baseName.minus(".vcf")}.dec" } + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/vt/" }, + pattern: {'*dec.stats'}, + saveAs: {"${meta.variantcaller}/${meta.patient}/${meta.id}/${it}"}, + enabled: true + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: {"*{vcf.gz,vcf.gz.tbi}"}, + saveAs: {"${meta.variantcaller}/${meta.patient}/${meta.id}/${it}"}, + enabled: false // store normalised results only + ] + ] + } + + withName: 'VT_NORMALIZE'{ + ext.args = {"-n"} + ext.prefix = { "${vcf.baseName.minus(".dec.vcf")}.norm" } + publishDir = [[ + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi,norm.stats}", + saveAs: { "${meta.variantcaller}/${meta.patient}/${meta.id}/${it}" }, + enabled: true // just store normalised results + ], + [mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/vt/${meta.variantcaller}/${meta.patient}/${meta.id}/" }, + pattern: "*stats" + ]] + } + + } +} diff --git a/conf/modules/consensus/vcf_consensus.config b/conf/modules/consensus/vcf_consensus.config new file mode 100644 index 0000000..bc63ea0 --- /dev/null +++ b/conf/modules/consensus/vcf_consensus.config @@ -0,0 +1,54 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// CONSENSUS + +process { // consensus + + if (params.tools && params.tools.split(',').contains('consensus')) { + + withName: 'RUN_CONSENSUS' { + ext.prefix = { "${meta.id}.consensus"} + ext.args = {"--id=${meta.id}"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/consensus/${meta.patient}/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + + withName: 'RUN_CONSENSUS_RESCUE_DNA' { + ext.prefix = { "${meta.id}.withRNA.consensus"} + ext.args = {"--id=${meta.id}_withRNAConsensus"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/consensus/${meta.patient}/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + + withName: 'RUN_CONSENSUS_RESCUE_RNA' { + ext.prefix = { "${meta.id}.withDNA.consensus"} + ext.args = {"--id=${meta.id}_withDNAConsensus"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/consensus/${meta.patient}/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: true + ] + } + + } +} diff --git a/conf/modules/filtering/maf_filtering.config b/conf/modules/filtering/maf_filtering.config new file mode 100644 index 0000000..97a2dde --- /dev/null +++ b/conf/modules/filtering/maf_filtering.config @@ -0,0 +1,81 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MAF FILTERING + +process { // maf filtering + if (params.tools && params.tools.split(',').contains('filtering')) { + + + withName: "VCF2MAF" { + ext.args = { [ + "--inhibit-vep", + "--normal-id ${meta.normal_id}", + "--tumor-id ${meta.tumor_id}", + "--vcf-tumor-id ${meta.tumor_id}", + "--vcf-normal-id ${meta.normal_id}", + "--max-subpop-af 0.0001", + "--retain-ann gnomADg_AF,MAX_AF,MAX_AF_POPS", + "--retain-fmt AD,DP,AF,GT", + params.vep_genome ? "--ncbi-build ${params.vep_genome}" : '', + meta.variantcaller == "strelka"? "--vcf-tumor-id TUMOR --vcf-normal-id NORMAL" : '' + ].join(' ').trim() } + ext.prefix = { "${meta.id}.${meta.variantcaller}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variants/annotated/unfiltered/${meta.patient}/${meta.id}/" }, + pattern: "*{maf,maf.gz}" + ] + } + + withName: "FILTERING" { + ext.prefix = { "${meta.id}.filtered"} + ext.args = { [params.whitelist? "--whitelist ${params.whitelist}": "", + params.blacklist? "--blacklist ${params.blacklist}": ""].join(' ').trim() } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variants/annotated/filtered/${meta.patient}/${meta.id}/" }, + pattern: "*{maf,maf.gz}" + ] + } + + withName: 'SAMTOOLS_MERGE_SECOND_PASS' { + ext.prefix = {"${meta.id}.merged_2ndpass"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/second_pass/input/${meta.patient}/${meta.id}/" }, + pattern: "*{bam}", + enabled: params.save_align_intermeds + ] + } + + + + withName: 'RNA_FILTERING' { + ext.prefix = {"${meta.id}.rna_filt"} + ext.args = { [params.rnaedits? "--rnaedits ${params.rnaedits}": "", + params.rna_pon? "--pon ${params.rna_pon}" : "", + params.chain? "--chain ${params.chain}" : "", + params.fasta19? "--ref19 ${params.fasta19}" : "", + params.rna_pon19? "--pon19 ${params.rna_pon19}" : "" + ].join(' ').trim() } + + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variants/annotated/filtered/${meta.patient}/${meta.id}/" }, + pattern: "*{maf}", + enabled: true + ] + } + } +} \ No newline at end of file diff --git a/conf/modules/gatk4_preprocessing/markduplicates.config b/conf/modules/gatk4_preprocessing/markduplicates.config new file mode 100644 index 0000000..bb12432 --- /dev/null +++ b/conf/modules/gatk4_preprocessing/markduplicates.config @@ -0,0 +1,124 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MARKDUPLICATES + +process { // markduplicates + + withName: 'CRAM_TO_BAM' { + ext.args = "-b" + } + + withName: 'BAM_TO_CRAM' { + // BAM provided for step Markduplicates either run through MD or Convert -> then saved as sorted.cram (convert) or md.cram (md directly) + // BAM files provided for step prepare_recal are converted and run through BQSR -> then saved as md.cram + // BAM files provided for step recal are converted and run through BQSR II -> then saved as md.cram + ext.args = "-C" + ext.prefix = { "${meta.id}.converted" } + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/converted/${meta.id}" }, + pattern: "*{cram,crai}" + ] + } + // TODO: is this necessary? the id should be different +// withName: '.*:BAM_TO_CRAM_SNCR:BAM_TO_CRAM' { +// // BAM provided for step Markduplicates either run through MD or Convert -> then saved as sorted.cram (convert) or md.cram (md directly) +// // BAM files provided for step prepare_recal are converted and run through BQSR -> then saved as md.cram +// // BAM files provided for step recal are converted and run through BQSR II -> then saved as md.cram +// ext.args = "-C" +// ext.prefix = { "${meta.id}.converted." } +// publishDir = [ +// enabled: !params.save_output_as_bam, +// mode: params.publish_dir_mode, +// path: { "${params.outdir}/preprocessing/converted/${meta.id}" }, +// pattern: "*{cram,crai}" +// ] +// } + + withName: 'BAM_TO_CRAM_MAPPING' { + // Run only when mapping should be saved as CRAM or when no MD is done + ext.when = (params.save_mapped && !params.save_output_as_bam) || (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + // Never publish if BAM only should be published + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/mapped/${meta.id}/" }, + pattern: "*{cram,crai}" + ] + } + + withName: 'GATK4_ESTIMATELIBRARYCOMPLEXITY' { + ext.prefix = { "${meta.id}.md.cram" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('markduplicates_report')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/markduplicates/${meta.id}" }, + pattern: "*metrics" + ] + } + // TODO: do we need to create index here? (--CREATE_INDEX true) + withName: 'GATK4_MARKDUPLICATES' { + ext.args = '-REMOVE_DUPLICATES false -VALIDATION_STRINGENCY LENIENT' + ext.prefix = { "${meta.id}.md.cram" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) } + publishDir = [ + [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, + pattern: "*{cram,crai}" + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/" }, + pattern: "*metrics", + saveAs: { !(params.skip_tools && params.skip_tools.split(',').contains('markduplicates_report')) ? "markduplicates/${meta.id}/${it}" : null} + ] + ] + } + + withName: 'GATK4_MARKDUPLICATES_SPARK' { + ext.args = '--remove-sequencing-duplicates false -VS LENIENT' + ext.prefix = { "${meta.id}.md.cram" } + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, + pattern: "*{cram,crai}" + ] + } + + withName: 'INDEX_MARKDUPLICATES' { + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, + pattern: "*{cram,crai}" + ] + } + + + withName: 'NFCORE_RNADNAVAR:RNADNAVAR:CRAM_TO_BAM' { + ext.prefix = { "${meta.id}.md" } + ext.when = { params.save_output_as_bam } + publishDir = [ + enabled: params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, + pattern: "*{md.bam,md.bam.bai}" + ] + } +} \ No newline at end of file diff --git a/conf/modules/gatk4_preprocessing/prepare_recalibration.config b/conf/modules/gatk4_preprocessing/prepare_recalibration.config new file mode 100644 index 0000000..21e38a9 --- /dev/null +++ b/conf/modules/gatk4_preprocessing/prepare_recalibration.config @@ -0,0 +1,38 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// PREPARE_RECALIBRATION + +process { //prepare_recalibration + + withName: 'GATK4_BASERECALIBRATOR' { + ext.args = { meta.status >= 2 ? "--lenient" : "" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.simpleName}.recal" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*table", + saveAs: { meta.num_intervals > 1 ? null : "recal_table/${meta.id}/${it}" } + ] + } + + withName: 'GATK4_GATHERBQSRREPORTS' { + ext.prefix = {"${meta.id}.recal"} + ext.when = { meta.num_intervals > 1 } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/recal_table/${meta.id}/" }, + pattern: "*table", + ] + } +} \ No newline at end of file diff --git a/conf/modules/gatk4_preprocessing/recalibrate.config b/conf/modules/gatk4_preprocessing/recalibrate.config new file mode 100644 index 0000000..1a1ce1a --- /dev/null +++ b/conf/modules/gatk4_preprocessing/recalibrate.config @@ -0,0 +1,61 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// RECALIBRATE + +process { // recalibrate + + withName: 'GATK4_APPLYBQSR' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.simpleName}.recal" } + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*cram", + saveAs: { meta.num_intervals > 1 ? null : "recalibrated/${meta.id}/${it}" } + ] + } + + if ((params.step == 'mapping' || params.step == 'markduplicates'|| params.step == 'prepare_recalibration'|| params.step == 'recalibrate') && (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator')))) { + withName: '.*:BAM_APPLYBQSR:CRAM_MERGE_INDEX_SAMTOOLS:MERGE_CRAM' { + ext.prefix = { "${meta.id}.recal" } + ext.when = { meta.num_intervals > 1 } + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" }, + pattern: "*cram" + ] + } + + withName: '.*:BAM_APPLYBQSR::CRAM_MERGE_INDEX_SAMTOOLS:INDEX_CRAM' { + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" }, + pattern: "*{recal.cram,recal.cram.crai}" + ] + } + } + + withName: 'CRAM_TO_BAM_RECAL' { + ext.prefix = { "${meta.id}.recal" } + ext.when = { params.save_output_as_bam} + publishDir = [ + enabled: params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" }, + pattern: "*{recal.bam,recal.bam.bai}" + ] + } +} \ No newline at end of file diff --git a/conf/modules/gatk4_preprocessing/splitncigarreads.config b/conf/modules/gatk4_preprocessing/splitncigarreads.config new file mode 100644 index 0000000..58f8c35 --- /dev/null +++ b/conf/modules/gatk4_preprocessing/splitncigarreads.config @@ -0,0 +1,68 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// RECALIBRATE + +process { //splitncigar + // TODO: check SECOND_RUN and suffixes + withName: '.*:SPLITNCIGAR:GATK4_SPLITNCIGARREADS' { + ext.args = ['-rf ReassignOneMappingQuality', + '-RMQF 255 ', + '-RMQT 60', + '-U ALLOW_N_CIGAR_READS'].join(' ').trim() + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/splitncigar/${meta.patient}/${meta.id}/" }, + pattern: "*{bam,bai}", + enabled: params.save_align_intermeds // will be saved as CRAM + ] + } + + withName: '.*:SECOND_RUN:GATK_PREPROCESSING:SPLITNCIGAR:GATK4_SPLITNCIGARREADS' { + ext.prefix = {"${meta.id}.sncr"} + ext.args = ['-rf ReassignOneMappingQuality', + '-RMQF 255 ', + '-RMQT 60', + '-U ALLOW_N_CIGAR_READS'].join(' ').trim() + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/splitncigar/${meta.patient}/${meta.id}/" }, + pattern: "*{bam,bai}", + enabled: params.save_align_intermeds // will be saved as CRAM + ] + } + + withName: ".*:PREPARE_SECOND_RUN:MERGE_ALIGN:INDEX_MERGE_BAM" { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*.{bai,csi}", + saveAs: { params.save_bam_mapped ? "second_run/${meta.patient}/${meta.id}/${it}" : null }, + enabled: params.save_align_intermeds + ] + + } + + withName: '.*:SPLITNCIGAR:SAMTOOLS_INDEX' { + ext.args = params.bam_csi_index ? '-c' : '' + publishDir = [ + path: { "${params.outdir}/preprocessing/splitncigar/${meta.patient}/${meta.id}/" }, + mode: params.publish_dir_mode, + pattern: "*.{bai,csi}", + enabled: params.save_align_intermeds + ] + } + + + +} diff --git a/conf/modules/prepare_resources/prepare_cache.config b/conf/modules/prepare_resources/prepare_cache.config new file mode 100644 index 0000000..cf922fb --- /dev/null +++ b/conf/modules/prepare_resources/prepare_cache.config @@ -0,0 +1,27 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// PREPARE_CACHE + +process { // prepare_cache + + // VEP + withName: 'ENSEMBLVEP_DOWNLOAD' { + ext.when = { params.tools && (params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge')) } + ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE' + publishDir = [ + mode: params.publish_dir_mode, + path: { params.outdir_cache ? "${params.outdir_cache}/": "${params.outdir}/cache/" } + ] + } +} \ No newline at end of file diff --git a/conf/modules/prepare_genome_and_intervals/prepare_genome.config b/conf/modules/prepare_resources/prepare_genome.config similarity index 77% rename from conf/modules/prepare_genome_and_intervals/prepare_genome.config rename to conf/modules/prepare_resources/prepare_genome.config index d218301..6554ec1 100644 --- a/conf/modules/prepare_genome_and_intervals/prepare_genome.config +++ b/conf/modules/prepare_resources/prepare_genome.config @@ -13,7 +13,7 @@ // PREPARE_GENOME TODO: add stuff and remove redundant code -process { +process { // prepare_genome withName: 'BWAMEM1_INDEX' { ext.when = { !params.bwa && params.step == "mapping" && (params.aligner == "bwa-mem" || params.aligner == "sentieon-bwamem")} @@ -45,23 +45,26 @@ process { ] } - withName: 'GATK4_CREATESEQUENCEDICTIONARY' { - ext.when = { !params.dict && params.step != "annotate" && params.step != "controlfreec" } - publishDir = [ - enabled: (params.save_reference || params.build_only_index), + withName: 'STAR_GENOMEGENERATE' { + ext.args = params.read_length ? "--sjdbOverhang ${params.read_length - 1}" : '' + } + + withName: 'UNTAR_.*|STAR_GENOMEGENERATE|HISAT2_BUILD|HISAT2_EXTRACTSPLICESITES' { + publishDir = [ + enabled: params.save_reference, mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/dict" }, - pattern: "*dict" + path: { "${params.outdir}/reference/index" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: 'MSISENSORPRO_SCAN' { - ext.when = { params.tools && params.tools.split(',').contains('msisensorpro') } + withName: 'GATK4_CREATESEQUENCEDICTIONARY' { + ext.when = { !params.dict && params.step != "annotate"} publishDir = [ enabled: (params.save_reference || params.build_only_index), mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/msi" }, - pattern: "*list" + path: { "${params.outdir}/reference/dict" }, + pattern: "*dict" ] } @@ -76,7 +79,7 @@ process { } withName: 'TABIX_DBSNP' { - ext.when = { !params.dbsnp_tbi && params.dbsnp && ((params.step == "mapping" || params.step == "markduplicates" || params.step == "prepare_recalibration") || params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('mutect2'))) } + ext.when = { !params.dbsnp_tbi && params.dbsnp && ((params.step == "mapping" || params.step == "markduplicates" || params.step == "prepare_recalibration") || params.tools && params.tools.split(',').contains('mutect2')) } publishDir = [ enabled: (params.save_reference || params.build_only_index), mode: params.publish_dir_mode, @@ -96,7 +99,7 @@ process { } withName: 'TABIX_KNOWN_INDELS' { - ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper'))) ) } + ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' ) } publishDir = [ enabled: (params.save_reference || params.build_only_index), mode: params.publish_dir_mode, @@ -106,7 +109,7 @@ process { } withName: 'TABIX_KNOWN_SNPS' { - ext.when = { !params.known_snps_tbi && params.known_snps && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper'))) ) } + ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' ) } publishDir = [ enabled: (params.save_reference || params.build_only_index), mode: params.publish_dir_mode, @@ -125,14 +128,12 @@ process { ] } - withName: 'UNZIP_ALLELES|UNZIP_LOCI|UNZIP_GC|UNZIP_RT' { - ext.when = { params.tools && params.tools.split(',').contains('ascat')} - publishDir = [ - enabled: false - ] - } - - withName: 'UNTAR_CHR_DIR' { - ext.when = { params.tools && params.tools.split(',').contains('controlfreec')} + withName: "GTF2BED" { + publishDir = [ + enabled: (params.save_reference || params.build_only_index), + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "*bed" + ] } } \ No newline at end of file diff --git a/conf/modules/prepare_genome_and_intervals/prepare_intervals.config b/conf/modules/prepare_resources/prepare_intervals.config similarity index 72% rename from conf/modules/prepare_genome_and_intervals/prepare_intervals.config rename to conf/modules/prepare_resources/prepare_intervals.config index e760a1c..9ee572e 100644 --- a/conf/modules/prepare_genome_and_intervals/prepare_intervals.config +++ b/conf/modules/prepare_resources/prepare_intervals.config @@ -13,7 +13,7 @@ // PREPARE INTERVALS -process { +process { // prepare_intervals withName: 'CREATE_INTERVALS_BED' { publishDir = [ @@ -33,8 +33,18 @@ process { ] } + withName: 'GATK4_BEDTOINTERVALLIST' { + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/interval_list" }, + pattern: "*.interval_list" + ] + } + withName: 'TABIX_BGZIPTABIX_INTERVAL_SPLIT' { ext.prefix = {"${meta.id}"} + ext.args2 = "-0 -p bed" publishDir = [ enabled: (params.save_reference || params.build_only_index), mode: params.publish_dir_mode, @@ -42,4 +52,13 @@ process { pattern: "*bed.gz" ] } + + withName: 'BUILD_INTERVALS' { + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/bed" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } \ No newline at end of file diff --git a/conf/modules/quality_control/quality_control.config b/conf/modules/quality_control/quality_control.config new file mode 100644 index 0000000..af9189c --- /dev/null +++ b/conf/modules/quality_control/quality_control.config @@ -0,0 +1,122 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +// QC config + +process { // quality_control + + withName: 'FASTQC' { + ext.args = '--quiet' + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('fastqc')) } + publishDir = [ + [ + path: { "${params.outdir}/reports/fastqc/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*{html,zip}" + ] + ] + } + + withName: 'MULTIQC' { + ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + publishDir = [ + path: { "${params.outdir}/reports"}, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: !(params.tools && (params.skip_tools.split(',').contains('multiqc'))) + ] + errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} + } + // TODO check this is correct + withName: '.*:CRAM_QC_NO_MD:SAMTOOLS_STATS' { + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } + ext.prefix = { "${meta.id}.sorted.cram" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/samtools/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + if (params.skip_tools && !params.skip_tools.split(',').contains('mosdepth')){ + withName: 'MOSDEPTH' { + ext.args = { !params.wes ? "-n --fast-mode --by 500" : ""} + ext.prefix = { + if (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) { + "${meta.id}.sorted" + } else { + "${meta.id}.md" + } + } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('mosdepth')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/mosdepth/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + // TODO: check that this is capturing what it should + if ((params.step == 'mapping' || params.step == 'markduplicates'|| params.step == 'prepare_recalibration'|| params.step == 'recalibrate') && (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator')))) { + withName: '.*:CRAM_QC_RECAL:MOSDEPTH' { + ext.prefix = { "${meta.id}.recal" } + } + + withName: '.*:CRAM_QC_RECAL:SAMTOOLS_STATS' { + ext.prefix = { "${meta.id}.recal.cram" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/samtools/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + + if (params.tools && params.tools.split(',').contains('vcf_qc')){ + + // VCF + withName: 'BCFTOOLS_STATS' { + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('bcftools')) } + ext.prefix = { vcf.baseName - ".vcf" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/bcftools/${meta.variantcaller}/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'VCFTOOLS_.*' { + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('vcftools')) } + ext.prefix = { variant_file.baseName - ".vcf" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/vcftools/${meta.variantcaller}/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'VCFTOOLS_TSTV_COUNT' { + ext.args = "--TsTv-by-count" + } + + withName: 'VCFTOOLS_TSTV_QUAL' { + ext.args = "--TsTv-by-qual" + } + + withName: 'VCFTOOLS_SUMMARY' { + ext.args = "--FILTER-summary" + + } + } +} \ No newline at end of file diff --git a/conf/modules/quality_control/trimming.config b/conf/modules/quality_control/trimming.config new file mode 100644 index 0000000..c2d2639 --- /dev/null +++ b/conf/modules/quality_control/trimming.config @@ -0,0 +1,42 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// TRIMMING + +process { // trimming + + withName: 'FASTP' { + ext.args = [ "-Q", + !params.trim_fastq ? "--disable_adapter_trimming" : "", // Disable adapter trimming + params.clip_r1 > 0 ? "--trim_front1 ${params.clip_r1}" : "", // Remove bp from the 5' end of read 1 + params.clip_r2 > 0 ? "--trim_front2 ${params.clip_r2}" : "", // Remove bp from the 5' end of read 2 + params.three_prime_clip_r1 > 0 ? "--trim_tail1 ${params.three_prime_clip_r1}" : "", // Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed + params.three_prime_clip_r2 > 0 ? "--trim_tail2 ${params.three_prime_clip_r2}" : "", // Remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed + params.trim_nextseq ? "--trim_poly_g" : "", // Apply the --nextseq=X option, to trim based on quality after removing poly-G tails + params.split_fastq > 0 ? "--split_by_lines ${params.split_fastq * 4}" : "" + ].join(" ").trim() + publishDir = [ + [ + path: { "${params.outdir}/reports/fastp/${meta.sample}" }, + mode: params.publish_dir_mode, + pattern: "*.{html,json,log}" + ], + [ + enabled: params.save_trimmed || params.save_split_fastqs, + path: { "${params.outdir}/preprocessing/fastp/${meta.sample}/" }, + mode: params.publish_dir_mode, + pattern: "*.fastp.fastq.gz" + ] + ] + } +} \ No newline at end of file diff --git a/conf/modules/variant_calling/freebayes.config b/conf/modules/variant_calling/freebayes.config new file mode 100644 index 0000000..8ba6f62 --- /dev/null +++ b/conf/modules/variant_calling/freebayes.config @@ -0,0 +1,79 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// FREEBAYES + +process { // freebayes + + if (params.tools && params.tools.split(',').contains('freebayes')) { + + withName: 'MERGE_FREEBAYES' { + ext.prefix = { "${meta.id}.freebayes" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/freebayes/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'FREEBAYES' { + ext.args = '--min-alternate-fraction 0.1 --min-mapping-quality 1' + //To make sure no naming conflicts ensure with module BCFTOOLS_SORT & the naming being correct in the output folder + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}" : "${meta.id}.${target_bed.simpleName}" } + ext.when = { params.tools && params.tools.split(',').contains('freebayes') } + publishDir = [ + enabled: false + ] + } + + withName: 'BCFTOOLS_SORT' { + ext.prefix = { meta.num_intervals <= 1 ? meta.id + ".freebayes" : vcf.name - ".vcf" + ".sort" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*vcf.gz", + saveAs: { meta.num_intervals > 1 ? null : "freebayes/${meta.id}/${it}" } + ] + } + + withName : 'TABIX_VC_FREEBAYES' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/freebayes/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + // PAIR_VARIANT_CALLING + if (params.tools && params.tools.split(',').contains('freebayes')) { + withName: '.*:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_FREEBAYES:FREEBAYES' { + ext.args = "--pooled-continuous \ + --pooled-discrete \ + --genotype-qualities \ + --report-genotype-likelihood-max \ + --allele-balance-priors-off \ + --min-alternate-fraction 0.03 \ + --min-repeat-entropy 1 \ + --min-alternate-count 2 " + } + } + + withName: 'VCFFILTER' { + //To make sure no naming conflicts ensure with module BCFTOOLS_SORT & the naming being correct in the output folder + ext.prefix = { "${vcf.baseName.minus(".vcf")}.filtered" } + ext.args = '-f "QUAL > 1 & QUAL / AO > 10 & SAF > 0 & SAR > 0 & RPR > 1 & RPL > 1" -t PASS -F FAIL' + ext.when = { params.tools && params.tools.split(',').contains('freebayes') } + publishDir = [enabled: false] + } + } +} \ No newline at end of file diff --git a/conf/modules/variant_calling/manta.config b/conf/modules/variant_calling/manta.config new file mode 100644 index 0000000..1770beb --- /dev/null +++ b/conf/modules/variant_calling/manta.config @@ -0,0 +1,28 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MANTA + +process { // manta + if (params.tools && params.tools.split(',').contains('manta')) { + withName: 'MANTA_SOMATIC' { + ext.args = {params.wes || meta.status >= 2 ? "--exome" : "" } + ext.prefix = { "${meta.id}.manta" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/manta/${meta.id}" }, + pattern: "*{diploid_sv,tumor_sv,somatic_sv}.{vcf.gz,vcf.gz.tbi}" + ] + } + } +} \ No newline at end of file diff --git a/conf/modules/variant_calling/mutect2.config b/conf/modules/variant_calling/mutect2.config new file mode 100644 index 0000000..2226407 --- /dev/null +++ b/conf/modules/variant_calling/mutect2.config @@ -0,0 +1,128 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MUTECT2 + +process { // mutect2 + if (params.tools && params.tools.split(',').contains('mutect2')) { + +// withName: 'GATK4_MUTECT2' { +// ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } +// ext.when = { params.tools && params.tools.split(',').contains('mutect2') } +// ext.args = { params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz" : "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz" } +// publishDir = [ +// mode: params.publish_dir_mode, +// path: { "${params.outdir}/variant_calling/" }, +// pattern: "*{vcf.gz,vcf.gz.tbi,stats}", +// saveAs: { meta.num_intervals > 1 ? null : "mutect2/${meta.id}/${it}" } +// ] +// } + + // PAIR_VARIANT_CALLING + withName: 'MUTECT2_PAIRED' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } + ext.when = { params.tools && params.tools.split(',').contains('mutect2') } + ext.args = { params.ignore_soft_clipped_bases ? + "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --smith-waterman FASTEST_AVAILABLE --normal-sample ${meta.normal_id} --callable-depth 1 " : + "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --smith-waterman FASTEST_AVAILABLE --normal-sample ${meta.patient}_${meta.normal_id} --callable-depth 1 " } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi,stats}", + saveAs: { meta.num_intervals > 1 ? null : "mutect2/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_MUTECT2.*' { + ext.prefix = { "${meta.id}.mutect2" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + // TODO: FILTERMUTECTCALLS is a patch for second run [!!] + withName: 'FILTERMUTECTCALLS.*' { + ext.prefix = {"${meta.id}.mutect2.filtered"} + ext.args = { [meta.status >= 2 ? '--max-events-in-region 5': '', + meta.cont && !(meta.cont.endswith("NO_TABLE")) ? '--contamination-table ${meta.cont}' :'', + meta.seg && !(meta.seg.endswith("NO_SEG")) ? '--tumor-segmentation ${meta.seg}':'', + meta.orient && !(meta.orient.endswith("NO_ARTPRIOR"))? '--orientation-bias-artifact-priors ${meta.orient}':'' + ].join(' ').trim() + } + + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : "mutect2/${meta.id}/${filename}" } + ] + } + + withName: 'CALCULATECONTAMINATION' { + ext.prefix = { "${meta.id}.mutect2" } + ext.args = { "-tumor-segmentation ${meta.id}.mutect2.segmentation.table" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'LEARNREADORIENTATIONMODEL' { + ext.prefix = { "${meta.id}.mutect2.artifactprior" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'MERGEMUTECTSTATS' { + ext.prefix = { "${meta.id}.mutect2" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GATHERPILEUPSUMMARIES.*' { + ext.prefix = { "${meta.id}.mutect2" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GETPILEUPSUMMARIES.*' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*.table", + saveAs: { meta.num_intervals > 1 ? null : "mutect2/${meta.id}/${it}" } + ] + } + + if (params.joint_mutect2) { + withName: 'CALCULATECONTAMINATION' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.patient}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } +} \ No newline at end of file diff --git a/conf/modules/variant_calling/strelka.config b/conf/modules/variant_calling/strelka.config new file mode 100644 index 0000000..99d0480 --- /dev/null +++ b/conf/modules/variant_calling/strelka.config @@ -0,0 +1,56 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// STRELKA + +process { // strelka + + if (params.tools && params.tools.split(',').contains('strelka')) { + + withName: 'STRELKA_.*' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.strelka" : "${meta.id}.strelka.${target_bed.simpleName}" } + ext.args = {params.wes || meta.status >= 2 ? "--exome" : "" } + ext.when = { params.tools && params.tools.split(',').contains('strelka') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "strelka/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_STRELKA.*' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/strelka/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'MERGE_STRELKA' { + ext.prefix = {"${meta.id}.strelka.variants"} + } + + withName: 'MERGE_STRELKA_GENOME' { + ext.prefix = {"${meta.id}.strelka.genome"} + } + + // PAIR_VARIANT_CALLING + withName: 'MERGE_STRELKA_INDELS' { + ext.prefix = {"${meta.id}.strelka.somatic_indels"} + } + withName: 'MERGE_STRELKA_SNVS' { + ext.prefix = {"${meta.id}.strelka.somatic_snvs"} + } + } +} \ No newline at end of file diff --git a/lib/WorkflowRnadnavar.groovy b/lib/WorkflowRnadnavar.groovy index a80e6ca..d745470 100755 --- a/lib/WorkflowRnadnavar.groovy +++ b/lib/WorkflowRnadnavar.groovy @@ -45,15 +45,52 @@ class WorkflowRnadnavar { return yaml_file_text } - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + public static String toolCitationText(params) { + + // TODO Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text + } + + public static String toolBibliographyText(params) { + + // TODO Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file def meta = [:] meta.workflow = run_workflow.toMap() meta["manifest_map"] = run_workflow.manifest.toMap() + // Pipeline DOI meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + //meta["tool_bibliography"] = toolBibliographyText(params) + def methods_text = mqc_methods_yaml.text def engine = new SimpleTemplateEngine() diff --git a/modules.json b/modules.json index bf5dfad..fc17001 100644 --- a/modules.json +++ b/modules.json @@ -60,11 +60,6 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, - "ensemblvep": { - "branch": "master", - "git_sha": "29984d70aea47d06f0062a1785d76c357dd40ea9", - "installed_by": ["modules"] - }, "ensemblvep/download": { "branch": "master", "git_sha": "9f9e1fc31cb35876922070c0e601ae05abae5cae", diff --git a/modules/nf-core/dragmap/align/main.nf b/modules/nf-core/dragmap/align/main.nf new file mode 100644 index 0000000..6221fde --- /dev/null +++ b/modules/nf-core/dragmap/align/main.nf @@ -0,0 +1,46 @@ +process DRAGMAP_ALIGN { + tag "$meta.id" + label 'process_high' + + conda "bioconda::dragmap=1.2.1 bioconda::samtools=1.15.1 conda-forge::pigz=2.3.4" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-580d344d9d4a496cd403932da8765f9e0187774d:5ebebbc128cd624282eaa37d2c7fe01505a91a69-0': + 'biocontainers/mulled-v2-580d344d9d4a496cd403932da8765f9e0187774d:5ebebbc128cd624282eaa37d2c7fe01505a91a69-0' }" + + input: + tuple val(meta) , path(reads) + tuple val(meta2), path(hashmap) + val sort_bam + + output: + tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path('*.log'), emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reads_command = meta.single_end ? "-1 $reads" : "-1 ${reads[0]} -2 ${reads[1]}" + def samtools_command = sort_bam ? 'sort' : 'view' + + """ + dragen-os \\ + -r $hashmap \\ + $args \\ + --num-threads $task.cpus \\ + $reads_command \\ + 2> ${prefix}.dragmap.log \\ + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dragmap: \$(echo \$(dragen-os --version 2>&1)) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/dragmap/align/meta.yml b/modules/nf-core/dragmap/align/meta.yml new file mode 100644 index 0000000..763e005 --- /dev/null +++ b/modules/nf-core/dragmap/align/meta.yml @@ -0,0 +1,47 @@ +name: dragmap_align +description: Performs fastq alignment to a reference using DRAGMAP +keywords: + - alignment + - map + - fastq + - bam + - sam +tools: + - dragmap: + description: Dragmap is the Dragen mapper/aligner Open Source Software. + homepage: https://github.com/Illumina/dragmap + documentation: https://github.com/Illumina/dragmap + tool_dev_url: https://github.com/Illumina/dragmap#basic-command-line-usage + + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - hashmap: + type: file + description: DRAGMAP hash table + pattern: "Directory containing DRAGMAP hash table *.{cmp,.bin,.txt}" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Emiller88" diff --git a/modules/nf-core/dragmap/hashtable/main.nf b/modules/nf-core/dragmap/hashtable/main.nf new file mode 100644 index 0000000..529b438 --- /dev/null +++ b/modules/nf-core/dragmap/hashtable/main.nf @@ -0,0 +1,36 @@ +process DRAGMAP_HASHTABLE { + tag "$fasta" + label 'process_high' + + conda "bioconda::dragmap=1.3.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/dragmap:1.3.0--h72d16da_1': + 'biocontainers/dragmap:1.3.0--h72d16da_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("dragmap") , emit: hashmap + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir dragmap + dragen-os \\ + --build-hash-table true \\ + --ht-reference $fasta \\ + --output-directory dragmap \\ + $args \\ + --ht-num-threads $task.cpus + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dragmap: \$(echo \$(dragen-os --version 2>&1)) + END_VERSIONS + """ +} diff --git a/modules/nf-core/dragmap/hashtable/meta.yml b/modules/nf-core/dragmap/hashtable/meta.yml new file mode 100644 index 0000000..133cc9f --- /dev/null +++ b/modules/nf-core/dragmap/hashtable/meta.yml @@ -0,0 +1,40 @@ +name: dragmap_hashtable +description: Create DRAGEN hashtable for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - dragmap: + description: Dragmap is the Dragen mapper/aligner Open Source Software. + homepage: https://github.com/Illumina/dragmap + documentation: https://github.com/Illumina/dragmap + tool_dev_url: https://github.com/Illumina/dragmap#basic-command-line-usage + + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file +output: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - hashmap: + type: file + description: DRAGMAP hash table + pattern: "*.{cmp,.bin,.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Emiller88" diff --git a/modules/nf-core/ensemblvep/download/main.nf b/modules/nf-core/ensemblvep/download/main.nf new file mode 100644 index 0000000..4873b91 --- /dev/null +++ b/modules/nf-core/ensemblvep/download/main.nf @@ -0,0 +1,45 @@ +process ENSEMBLVEP_DOWNLOAD { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::ensembl-vep=110.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' : + 'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }" + + input: + tuple val(meta), val(assembly), val(species), val(cache_version) + + output: + tuple val(meta), path("vep_cache"), emit: cache + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + vep_install \\ + --CACHEDIR vep_cache \\ + --SPECIES $species \\ + --ASSEMBLY $assembly \\ + --CACHE_VERSION $cache_version \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + mkdir vep_cache + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ensemblvep/download/meta.yml b/modules/nf-core/ensemblvep/download/meta.yml new file mode 100644 index 0000000..acb337c --- /dev/null +++ b/modules/nf-core/ensemblvep/download/meta.yml @@ -0,0 +1,43 @@ +name: ENSEMBLVEP_DOWNLOAD +description: Ensembl Variant Effect Predictor (VEP). The cache downloading options are controlled through `task.ext.args`. +keywords: + - annotation + - cache + - download +tools: + - ensemblvep: + description: | + VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs + or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. + homepage: https://www.ensembl.org/info/docs/tools/vep/index.html + documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - assembly: + type: string + description: | + Genome assembly + - species: + type: string + description: | + Specie + - cache_version: + type: string + description: | + cache version +output: + - cache: + type: file + description: cache + pattern: "*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@maxulysse" diff --git a/nextflow.config b/nextflow.config index abe78c8..63b495a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -2,7 +2,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nf-core/rnadnavar Nextflow config file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Default config options for all compute environments + Dbam_laign.configefault config options for all compute environments ---------------------------------------------------------------------------------------- */ @@ -10,25 +10,25 @@ params { // Input options - mandatory - input = null // sample sheet - step = 'mapping' // Starts with mapping - skip_tools = null // All tools (markduplicates + baserecalibrator + QC) are used by default - save_bam_mapped = false // Mapped BAMs not saved - save_output_as_bam = false //Output files from preprocessing are saved as bam and not as cram files + input = null // sample sheet + step = 'mapping' // Starts with mapping + skip_tools = null // All tools (markduplicates + baserecalibrator + QC) are used by default + save_bam_mapped = false // Mapped BAMs not saved + save_output_as_bam = false //Output files from preprocessing are saved as bam and not as cram files // Genome and reference options - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' - igenomes_ignore = false + genome = null + igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_ignore = false + save_reference = false + build_only_index = false // Only build the reference indexes + download_cache = false // Do not download annotation cache - // Output options - save_reference = false - save_merged_fastq = false // Sequence read information read_length = 76 // Required for STAR to build index and align reads TODO: automate - wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers + wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers // Alignment aligner = 'bwa-mem' // Only STAR is currently supported. @@ -42,13 +42,10 @@ params { bam_csi_index = false save_unaligned = false save_align_intermeds = false - bwa = null - bwamem2 = null - hisat2_build_memory = null - - - // Preprocessing of alignment + hisat2_build_memory = null remove_duplicates = false + save_mapped = false // Mapped BAMs not saved + // Modify fastqs (trim/split) with FASTP trim_fastq = false // No trimming @@ -59,7 +56,6 @@ params { trim_nextseq = 0 split_fastq = 50000000 // FASTQ files will not be split by default by FASTP save_trimmed = false - save_trimmed_fail = false save_split_fastqs = false // Variant calling @@ -80,22 +76,31 @@ params { // GATK intervallist parameters gatk_interval_scatter_count = 25 ignore_soft_clipped_bases = true + // Variant annotation tools = null // No default Variant_Calling or Annotation tools genesplicer = null // genesplicer disabled within VEP + dbnsfp = null // No dbnsfp processed file + dbnsfp_consequence = null // No default consequence for dbnsfp plugin + dbnsfp_fields = "rs_dbSNP,HGVSc_VEP,HGVSp_VEP,1000Gp3_EAS_AF,1000Gp3_AMR_AF,LRT_score,GERP++_RS,gnomAD_exomes_AF" // Default fields for dbnsfp plugin + dbnsfp_tbi = null // No dbnsfp processed file index + spliceai_indel = null // No spliceai_indel file + spliceai_indel_tbi = null // No spliceai_indel file index + spliceai_snv = null // No spliceai_snv file + spliceai_snv_tbi = null // No spliceai_snv file index + vep_custom_args= "--no_progress --offline --shift_hgvs 1 --check_existing --tsl --domains --total_length --allele_number --no_escape --xref_refseq --failed 1 --flag_pick_allele --pick_order canonical,tsl,biotype,rank,ccds,length --format vcf --biotype --force_overwrite --sift p --polyphen p --variant_class --regulatory --allele_number --af_gnomad --af_gnomadg --gene_phenotype --hgvs --hgvsg --max_af" vep_cache = null // No directory for VEP cache - vep_genome = null // No default genome for VEP -// vep_cache_version = '106' // No default cache version for VEP -// vep_version = '106.1' // No default cache version for VEP + vep_include_fasta = false // Don't use fasta file for annotation with VEP vep_dbnsfp = null vep_loftee = null vep_spliceai = null vep_spliceregion = null vep_out_format = 'vcf' + outdir_cache = null // No default outdir cache //filtering whitelist = null - + blacklist = null // MultiQC options multiqc_config = null multiqc_title = null @@ -105,7 +110,6 @@ params { // Boilerplate options outdir = null - tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null email_on_fail = null @@ -122,17 +126,18 @@ params { custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_contact = null config_profile_url = null + test_data_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnadnavar' // Max resource options // Defaults only, expecting to be overwritten max_memory = '128.GB' - max_cpus = 32 + max_cpus = 16 max_time = '240.h' // Schema validation default options validationFailUnrecognisedParams = false - validationLenientMode = false + validationLenientMode = true validationSchemaIgnoreParams = 'genomes' validationShowHiddenParams = false validate_params = true @@ -158,95 +163,99 @@ try { // } profiles { debug { + cleanup = false dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false } conda { + apptainer.enabled = false + charliecloud.enabled = false conda.enabled = true docker.enabled = false - singularity.enabled = false podman.enabled = false shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = false } mamba { + apptainer.enabled = false + charliecloud.enabled = false conda.enabled = true conda.useMamba = true + charliecloud.enabled = false docker.enabled = false - singularity.enabled = false podman.enabled = false shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = false } docker { + apptainer.enabled = false + charliecloud.enabled = false + conda.enabled = false docker.enabled = true docker.userEmulation = true - conda.enabled = false - singularity.enabled = false podman.enabled = false shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = false } arm { docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true + apptainer.enabled = false + charliecloud.enabled = false conda.enabled = false docker.enabled = false podman.enabled = false shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.autoMounts = true + singularity.enabled = true } podman { - podman.enabled = true + apptainer.enabled = false + charliecloud.enabled = false conda.enabled = false docker.enabled = false - singularity.enabled = false + podman.enabled = true shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = false } shifter { - shifter.enabled = true + apptainer.enabled = false + charliecloud.enabled = false conda.enabled = false docker.enabled = false - singularity.enabled = false podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + singularity.enabled = false } charliecloud { + apptainer.enabled = false charliecloud.enabled = true conda.enabled = false docker.enabled = false - singularity.enabled = false podman.enabled = false shifter.enabled = false - apptainer.enabled = false + singularity.enabled = false } apptainer { apptainer.enabled = true + charliecloud.enabled = false conda.enabled = false docker.enabled = false - singularity.enabled = false podman.enabled = false shifter.enabled = false - charliecloud.enabled = false + singularity.enabled = false } gitpod { - executor.name = 'local' executor.cpus = 16 executor.memory = 60.GB + executor.name = 'local' } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } + // Basic test profile for CI + test { includeConfig 'conf/test.config' } + test_cache { includeConfig 'conf/test/cache.config' } + } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile @@ -268,6 +277,7 @@ if (!params.igenomes_ignore) { } else { params.genomes = [:] } + // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -282,22 +292,24 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +def tracedir = params.outdir + "/pipeline_info" timeline { enabled = true - file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" + file = "${tracedir}/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" + file = "${tracedir}/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" + file = "${tracedir}/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" + file = "${tracedir}/pipeline_dag_${trace_timestamp}.html" } manifest { @@ -312,7 +324,42 @@ manifest { } // Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' +includeConfig 'conf/modules/modules.config' + +// prepare reference +includeConfig 'conf/modules/prepare_resources/prepare_cache.config' +includeConfig 'conf/modules/prepare_resources/prepare_genome.config' +includeConfig 'conf/modules/prepare_resources/prepare_intervals.config' + +// quality control +includeConfig 'conf/modules/quality_control/quality_control.config' +includeConfig 'conf/modules/quality_control/trimming.config' + +// alignment +includeConfig 'conf/modules/alignment/bam_align.config' +includeConfig 'conf/modules/alignment/alignment_to_fastq.config' + +// preprocessing +includeConfig 'conf/modules/gatk4_preprocessing/markduplicates.config' +includeConfig 'conf/modules/gatk4_preprocessing/prepare_recalibration.config' +includeConfig 'conf/modules/gatk4_preprocessing/recalibrate.config' + +// variant calling +includeConfig 'conf/modules/variant_calling/freebayes.config' +includeConfig 'conf/modules/variant_calling/strelka.config' +includeConfig 'conf/modules/variant_calling/mutect2.config' +includeConfig 'conf/modules/variant_calling/sage.config' + +// annotate +includeConfig 'conf/modules/annotate/annotate.config' + +// consensus +includeConfig 'conf/modules/consensus/vcf_consensus.config' + +// filtering +includeConfig 'conf/modules/filtering/maf_filtering.config' + + // Function to ensure that resource requirements don't go beyond // a maximum limit @@ -345,4 +392,4 @@ def check_max(obj, type) { return obj } } -} +} \ No newline at end of file diff --git a/nextflow_schema.json b/nextflow_schema.json index 4cec090..6386b13 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -84,10 +84,6 @@ "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", "fa_icon": "fas fa-file-signature" }, - "save_merged_fastq": { - "type": "boolean", - "description": "Save FastQ files after merging re-sequenced libraries in the results directory." - }, "save_bam_mapped": { "type": "boolean", "fa_icon": "fas fa-download", @@ -110,9 +106,9 @@ "genome": { "type": "string", "description": "Name of iGenomes reference.", - "default": "GRCh38", + "default": "GATK.GRCh38", "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`.\n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." }, "bwa": { "type": "string", @@ -277,13 +273,6 @@ "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nAlternatively cache version can be use to specify the correct Ensembl Genomes version number as these differ from the concurrent Ensembl/VEP version numbers", "hidden": true }, - "vep_version": { - "type": "string", - "fa_icon": "fas fa-tag", - "description": "VEP version.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the VEP version when using the container with pre-downloaded cache.", - "hidden": true - }, "save_reference": { "type": "boolean", "fa_icon": "fas fa-download", @@ -349,19 +338,11 @@ "default": "", "fa_icon": "fas fa-cut", "properties": { - "trim_fastq": { + "trim_fastq": { "type": "boolean", "fa_icon": "fas fa-cut", "description": "Run FastP for read trimming", - "help_text": "Use this to perform adapter trimming. Adapter are detected automatically by using the FastP flag `--detect_adapter_for_pe`. For more info see [FastP](https://github.com/OpenGene/fastp) ", - "hidden": true - }, - "save_trimmed_fail": { - "type": "boolean", - "fa_icon": "fas fa-cut", - "description": "Save failed fastq from FastP", - "help_text": "Use this to saved failed FastP results ", - "hidden": false + "help_text": "Use this to perform adapter trimming. Adapter are detected automatically by using the FastP flag `--detect_adapter_for_pe`. For more info see [FastP](https://github.com/OpenGene/fastp)." }, "clip_r1": { "type": "integer", @@ -409,26 +390,12 @@ "description": "Save trimmed FastQ file intermediates.", "hidden": true }, - "umi_read_structure": { - "type": "string", - "fa_icon": "fas fa-tape", - "description": "Specify UMI read structure", - "hidden": true, - "help_text": "One structure if UMI is present on one end (i.e. '+T 2M11S+T'), or two structures separated by a blank space if UMIs a present on both ends (i.e. '2M11S+T 2M11S+T'); please note, this does not handle duplex-UMIs.\n\nFor more info on UMI usage in the pipeline, also check docs [here](./docs/usage.md/#how-to-handle-umis)." - }, - "group_by_umi_strategy": { - "type": "string", - "default": "Adjacency", - "description": "Default strategy with UMI", - "hidden": true, - "help_text": "Available values: Identity, Edit, Adjacency, Paired" - }, - "save_split_fastqs": { + "save_split_fastqs": { "type": "boolean", "fa_icon": "fas fa-vial", "description": "If set, publishes split FASTQ files. Intended for testing purposes.", "hidden": true - } + } } }, "pipeline_stage_options": { @@ -546,6 +513,34 @@ } } }, + "preprocessing": { + "title": "Preprocessing", + "type": "object", + "description": "Configure preprocessing tools", + "default": "", + "fa_icon": "fas fa-toolbox", + "properties": { + "aligner": { + "type": "string", + "default": "bwa-mem", + "fa_icon": "fas fa-puzzle-piece", + "enum": ["bwa-mem", "bwa-mem2", "dragmap", "sentieon-bwamem"], + "description": "Specify aligner to be used to map reads to reference genome.", + "help_text": "Sarek will build missing indices automatically if not provided. Set `--bwa false` if indices should be (re-)built.\nIf DragMap is selected as aligner, it is recommended to skip baserecalibration with `--skip_tools baserecalibrator`. For more info see [here](https://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode)." + }, + "save_mapped": { + "type": "boolean", + "fa_icon": "fas fa-download", + "description": "Save mapped files.", + "help_text": "If the parameter `--split-fastq` is used, the sharded bam files are merged and converted to CRAM before saving them." + }, + "save_output_as_bam": { + "type": "boolean", + "description": "Saves output from mapping (if `--save_mapped`), Markduplicates & Baserecalibration as BAM file instead of CRAM", + "fa_icon": "fas fa-download" + } + } + }, "variant_calling": { "title": "Variant calling", "type": "object", @@ -1023,6 +1018,9 @@ { "$ref": "#/definitions/variant_calling" }, + { + "$ref": "#/definitions/preprocessing" + }, { "$ref": "#/definitions/annotation" }, diff --git a/subworkflows/local/bam_align/main.nf b/subworkflows/local/bam_align/main.nf index 9d77841..a4ca5c6 100644 --- a/subworkflows/local/bam_align/main.nf +++ b/subworkflows/local/bam_align/main.nf @@ -137,12 +137,12 @@ workflow BAM_ALIGN { // and not stall the workflow until all reads from all channels are mapped [ groupKey( meta - meta.subMap('num_lanes', 'read_group', 'size') + [ data_type:'bam', id:meta.sample ], (meta.num_lanes ?: 1) * (meta.size ?: 1)), bam ] }.groupTuple() - bam_mapped_dna,dump(tag:"bam_mapped_dna") + bam_mapped_dna.dump(tag:"bam_mapped_dna") // RNA will be aligned with STAR // Run STAR - ALIGN_STAR ( - ch_reads_to_map_status.rna, + FASTQ_ALIGN_STAR ( + reads_for_alignment_status.rna, star_index, gtf, params.star_ignore_sjdbgtf, @@ -151,7 +151,7 @@ workflow BAM_ALIGN { [ [ id:"fasta" ], [] ] // fasta ) // Grouping the bams from the same samples not to stall the workflow - bam_mapped_rna = ALIGN_STAR.out.bam.map{ meta, bam -> + bam_mapped_rna = FASTQ_ALIGN_STAR.out.bam.map{ meta, bam -> // Update meta.id to be meta.sample, ditching sample-lane that is not needed anymore // Update meta.data_type @@ -164,11 +164,11 @@ workflow BAM_ALIGN { // and not stall the workflow until all reads from all channels are mapped [ groupKey( meta - meta.subMap('num_lanes', 'read_group', 'size') + [ data_type:'bam', id:meta.sample ], (meta.num_lanes ?: 1) * (meta.size ?: 1)), bam ] }.groupTuple() - bam_mapped_rna,dump(tag:"bam_mapped_rna") + bam_mapped_rna.dump(tag:"bam_mapped_rna") // Gather QC reports - reports = reports.mix(ALIGN_STAR.out.stats.collect{it[1]}.ifEmpty([])) - reports = reports.mix(ALIGN_STAR.out.log_final.collect{it[1]}.ifEmpty([])) - versions = versions.mix(ALIGN_STAR.out.versions) + reports = reports.mix(FASTQ_ALIGN_STAR.out.stats.collect{it[1]}.ifEmpty([])) + reports = reports.mix(FASTQ_ALIGN_STAR.out.log_final.collect{it[1]}.ifEmpty([])) + versions = versions.mix(FASTQ_ALIGN_STAR.out.versions) // mix dna and rna in one channel bam_mapped = bam_mapped_dna.mix(bam_mapped_rna) @@ -192,7 +192,7 @@ workflow BAM_ALIGN { // Gather used softwares versions versions = versions.mix(CONVERT_FASTQ_INPUT.out.versions) versions = versions.mix(FASTQ_ALIGN_BWAMEM_MEM2_DRAGMAP.out.versions) - versions = versions.mix(ALIGN_STAR.out.versions) + versions = versions.mix(FASTQ_ALIGN_STAR.out.versions) } diff --git a/subworkflows/local/gatk_preprocessing.nf b/subworkflows/local/bam_gatk_preprocessing/main.nf similarity index 99% rename from subworkflows/local/gatk_preprocessing.nf rename to subworkflows/local/bam_gatk_preprocessing/main.nf index 3a9fc40..56b3b96 100644 --- a/subworkflows/local/gatk_preprocessing.nf +++ b/subworkflows/local/bam_gatk_preprocessing/main.nf @@ -16,7 +16,7 @@ include { PREPARE_RECALIBRATION_CSV } from '../local/ include { RECALIBRATE } from '../nf-core/gatk4/recalibrate/main' include { RECALIBRATE_CSV } from '../local/recalibrate_csv' -workflow GATK_PREPROCESSING { +workflow BAM_GATK_PREPROCESSING { take: step // Mandatory, step to start with tools diff --git a/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf b/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf index bd1fd3d..5e69f66 100644 --- a/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf +++ b/subworkflows/local/bam_variant_calling_pre_post_processing/main.nf @@ -1,7 +1,7 @@ // // Core workflow of the RNA/DNA variant calling pipeline // -include { BAM_GATK_PREPROCESSING } from '../gatk_preprocessing/main' +include { BAM_GATK_PREPROCESSING } from '../bam_gatk_preprocessing/main' // For now only matched supported // include { BAM_VARIANT_CALLING } from '../variant_calling/main' // // Can we just call normalization here? @@ -41,9 +41,8 @@ workflow BAM_VARIANT_CALLING_PRE_POST_PROCESSING { ch_reports = Channel.empty() ch_versions = Channel.empty() ch_genome_bam.dump(tag:"ch_genome_bam") - // STEP 1: Mapping done elsewhere - // STEP 2: GATK PREPROCESSING - See: https://gatk.broadinstitute.org/hc/en-us/articles/360035535912-Data-pre-processing-for-variant-discovery - GATK_PREPROCESSING( + // GATK PREPROCESSING - See: https://gatk.broadinstitute.org/hc/en-us/articles/360035535912-Data-pre-processing-for-variant-discovery + BAM_GATK_PREPROCESSING( step, // Mandatory, step to start with - should be mapping for second pass tools, ch_genome_bam, // channel: [mandatory] [meta, [bam]] diff --git a/subworkflows/local/core_workflow_pass.nf b/subworkflows/local/core_workflow_pass.nf deleted file mode 100644 index 4a2dc11..0000000 --- a/subworkflows/local/core_workflow_pass.nf +++ /dev/null @@ -1,157 +0,0 @@ -// -// Core workflow of the RNA/DNA variant calling pipeline -// -include { GATK_PREPROCESSING } from './gatk_preprocessing' -include { VARIANT_CALLING } from './variant_calling' -include { NORMALIZE } from './normalize_vcf_variants' -include { CONSENSUS } from './consensus' -include { ANNOTATE } from './annotate' -include { BASIC_FILTERING as FILTERING } from '../../modules/local/filter_variants' - - -workflow CORE_RUN { - take: - step // step to start with - tools - skip_tools - ch_input_sample // input from CSV if applicable - ch_genome_bam // input from mapping - fasta // fasta reference file - fasta_fai // fai for fasta file - dict // - dbsnp - dbsnp_tbi - pon - pon_tbi - germline_resource - germline_resource_tbi - intervals - intervals_for_preprocessing - ch_interval_list_split - intervals_bed_gz_tbi - intervals_bed_combined - vcf_consensus_dna // to repeat rescue consensus - vcfs_status_dna // to repeat rescue consensus - - main: - ch_reports = Channel.empty() - ch_versions = Channel.empty() - ch_genome_bam.dump(tag:"ch_genome_bam") - // STEP 1: Mapping done elsewhere - // STEP 2: GATK PREPROCESSING - See: https://gatk.broadinstitute.org/hc/en-us/articles/360035535912-Data-pre-processing-for-variant-discovery - GATK_PREPROCESSING( - step, // Mandatory, step to start with - should be mapping for second pass - tools, - ch_genome_bam, // channel: [mandatory] [meta, [bam]] - skip_tools, // channel: [mandatory] skip_tools - params.save_output_as_bam, // channel: [mandatory] save_output_as_bam - fasta, // channel: [mandatory] fasta - fasta_fai , // channel: [mandatory] fasta_fai - dict, - germline_resource, // channel: [optional] germline_resource - germline_resource_tbi, // channel: [optional] germline_resource_tbi - intervals, // channel: [mandatory] intervals/target regions - intervals_for_preprocessing, // channel: [mandatory] intervals_for_preprocessing/wes - ch_interval_list_split, - ch_input_sample - ) - - ch_cram_variant_calling = GATK_PREPROCESSING.out.ch_cram_variant_calling - ch_versions = ch_versions.mix(GATK_PREPROCESSING.out.versions) - ch_reports = ch_reports.mix(GATK_PREPROCESSING.out.ch_reports) - - ch_cram_variant_calling.dump(tag:"[STEP8 RNA_FILTERING] ch_cram_variant_calling") - intervals_bed_gz_tbi.dump(tag:"[STEP8 RNA_FILTERING] intervals_bed_gz_tbi") - pon.dump(tag:"[STEP8 RNA_FILTERING] pon") - // STEP 3: VARIANT CALLING - VARIANT_CALLING( tools, - ch_cram_variant_calling, - fasta, - fasta_fai, - dbsnp, - dbsnp_tbi, - dict, - germline_resource, - germline_resource_tbi, - intervals, - intervals_bed_gz_tbi, - intervals_bed_combined, - pon, - pon_tbi, - ch_input_sample - ) - cram_vc_pair = VARIANT_CALLING.out.cram_vc_pair // use same crams for force calling later - vcf_to_normalize = VARIANT_CALLING.out.vcf - contamination = VARIANT_CALLING.out.contamination_table - segmentation = VARIANT_CALLING.out.segmentation_table - orientation = VARIANT_CALLING.out.artifact_priors - ch_versions = ch_versions.mix(VARIANT_CALLING.out.versions) - ch_reports = ch_reports.mix(VARIANT_CALLING.out.reports) - - - // STEP 4: NORMALIZE - NORMALIZE (tools, - vcf_to_normalize, - fasta, - ch_input_sample) - ch_versions = ch_versions.mix(NORMALIZE.out.versions) - vcf_normalized = NORMALIZE.out.vcf - - - // STEP 5: ANNOTATE - ANNOTATE(tools, - vcf_normalized, // second pass TODO: make it optional - fasta, - ch_input_sample // first pass - ) - - ch_versions = ch_versions.mix(ANNOTATE.out.versions) - ch_reports = ch_reports.mix(ANNOTATE.out.reports) - - // STEP 6: CONSENSUS - CONSENSUS ( tools, - ANNOTATE.out.maf_ann, - cram_vc_pair, // from previous variant calling - dict, - fasta, - fasta_fai, - germline_resource, - germline_resource_tbi, - intervals, - intervals_bed_gz_tbi, - intervals_bed_combined, - pon, - pon_tbi, - vcf_consensus_dna, // null when first pass - vcfs_status_dna, // null when first pass - ch_input_sample, - contamination, - segmentation, - orientation - ) - // STEP 7: FILTERING - if (tools.split(',').contains('filtering')) { - FILTERING(CONSENSUS.out.maf, fasta) - - FILTERING.out.maf.branch{ - dna: it[0].status < 2 - rna: it[0].status == 2 - }.set{filtered_maf} - filtered_maf_rna = filtered_maf.rna - filtered_maf_dna = filtered_maf.dna - } else{ - filtered_maf = Channel.empty() - filtered_maf_rna = Channel.empty() - filtered_maf_dna = Channel.empty() - - } - - emit: - vcf_consensus_dna = CONSENSUS.out.vcf_consensus_dna - vcfs_status_dna = CONSENSUS.out.vcfs_status_dna - maf = filtered_maf - maf_rna = filtered_maf_rna - maf_dna = filtered_maf_dna - versions = ch_versions // channel: [ versions.yml ] - reports = ch_reports -} \ No newline at end of file diff --git a/subworkflows/local/prepare_intervals/main.nf b/subworkflows/local/prepare_intervals/main.nf index 2d5e42b..b3e5bf5 100644 --- a/subworkflows/local/prepare_intervals/main.nf +++ b/subworkflows/local/prepare_intervals/main.nf @@ -1,6 +1,11 @@ // // PREPARE INTERVALS // + +// Initialize channels based on params or indices that were just built +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + include { BUILD_INTERVALS } from '../../../modules/local/build_intervals/main' include { CREATE_INTERVALS_BED } from '../../../modules/local/create_intervals_bed/main' include { GATK4_INTERVALLISTTOBED } from '../../../modules/nf-core/gatk4/intervallisttobed/main' diff --git a/subworkflows/local/prepare_reference_and_intervals.nf b/subworkflows/local/prepare_reference_and_intervals.nf index 9e4ec35..e659593 100644 --- a/subworkflows/local/prepare_reference_and_intervals.nf +++ b/subworkflows/local/prepare_reference_and_intervals.nf @@ -4,7 +4,6 @@ include { PREPARE_GENOME } from './prepare_genome/main' include { PREPARE_INTERVALS } from './prepare_intervals/main' include { GATK4_BEDTOINTERVALLIST } from '../../modules/nf-core/gatk4/bedtointervallist/main' -include { GATK4_INTERVALLISTTOOLS } from '../../modules/nf-core/gatk4/intervallisttools/main' workflow PREPARE_REFERENCE_AND_INTERVALS { @@ -77,18 +76,6 @@ workflow PREPARE_REFERENCE_AND_INTERVALS { ch_interval_list = GATK4_BEDTOINTERVALLIST.out.interval_list ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST.out.versions) - // STEP 0.D: Scatter one interval-list into many interval-files using GATK4 IntervalListTools - ch_interval_list_split = Channel.empty() - if (!params.skip_intervallisttools) { - GATK4_INTERVALLISTTOOLS( - ch_interval_list - ) - ch_interval_list_split = GATK4_INTERVALLISTTOOLS.out.interval_list.map{ meta, bed -> [bed] }.flatten() - } - else { - ch_interval_list_split = ch_interval_list - } - emit: fasta = fasta fasta_fai = fasta_fai @@ -101,7 +88,6 @@ workflow PREPARE_REFERENCE_AND_INTERVALS { star_index = PREPARE_GENOME.out.star_index gtf = PREPARE_GENOME.out.gtf ch_interval_list = ch_interval_list - ch_interval_list_split = ch_interval_list_split intervals = intervals intervals_bed_gz_tbi = intervals_bed_gz_tbi intervals_for_preprocessing = intervals_for_preprocessing diff --git a/subworkflows/nf-core/bam_markduplicates_picard/main.nf b/subworkflows/nf-core/bam_markduplicates_picard/main.nf new file mode 100644 index 0000000..6e3df33 --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/main.nf @@ -0,0 +1,52 @@ +// +// Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats +// + +include { PICARD_MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' + +workflow BAM_MARKDUPLICATES_PICARD { + + take: + ch_bam // channel: [ val(meta), path(bam) ] + ch_fasta // channel: [ path(fasta) ] + ch_fai // channel: [ path(fai) ] + + main: + + ch_versions = Channel.empty() + + PICARD_MARKDUPLICATES ( ch_bam, ch_fasta, ch_fai ) + ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions.first()) + + SAMTOOLS_INDEX ( PICARD_MARKDUPLICATES.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + ch_bam_bai = PICARD_MARKDUPLICATES.out.bam + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map { + meta, bam, bai, csi -> + if (bai) { + [ meta, bam, bai ] + } else { + [ meta, bam, csi ] + } + } + + BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), path(bam) ] + metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), path(bam) ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), path(csi) ] + + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_markduplicates_picard/meta.yml b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml new file mode 100644 index 0000000..b924596 --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml @@ -0,0 +1,62 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "bam_markduplicates_picard" +description: Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats +keywords: + - markduplicates + - bam + - sam + - cram + +components: + - picard/markduplicates + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_stats_samtools + +input: + - ch_bam: + description: | + BAM/CRAM/SAM file + Structure: [ val(meta), path(bam) ] + - ch_fasta: + description: | + Reference genome fasta file + Structure: [ path(fasta) ] + - ch_fasta: + description: | + Index of the reference genome fasta file + Structure: [ path(fai) ] +output: + - bam: + description: | + processed BAM/CRAM/SAM file + Structure: [ val(meta), path(bam) ] + - bai: + description: | + BAM/CRAM/SAM samtools index + Structure: [ val(meta), path(bai) ] + - csi: + description: | + CSI samtools index + Structure: [ val(meta), path(csi) ] + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats) ] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@dmarron" + - "@drpatelh" diff --git a/subworkflows/nf-core/bam_qc_picard/main.nf b/subworkflows/nf-core/bam_qc_picard/main.nf new file mode 100644 index 0000000..f42b600 --- /dev/null +++ b/subworkflows/nf-core/bam_qc_picard/main.nf @@ -0,0 +1,45 @@ +// +// Run QC steps on BAM/CRAM files using Picard +// + +include { PICARD_COLLECTMULTIPLEMETRICS } from '../../../modules/nf-core/picard/collectmultiplemetrics/main' +include { PICARD_COLLECTWGSMETRICS } from '../../../modules/nf-core/picard/collectwgsmetrics/main' +include { PICARD_COLLECTHSMETRICS } from '../../../modules/nf-core/picard/collecthsmetrics/main' + +workflow BAM_QC_PICARD { + take: + ch_bam_bai_bait_target // channel: [ val(meta), [bam], [bai], [bait_interval], [target_interval]] + ch_fasta // channel: [ val(meta), fasta ] + ch_fasta_fai // channel: [ val(meta), fasta_fai ] + ch_fasta_dict // channel: [ val(meta), fasta_dict ] + + main: + ch_versions = Channel.empty() + ch_coverage_metrics = Channel.empty() + + ch_bam_bai = ch_bam_bai_bait_target.map{meta, bam, bai, bait, target -> return [meta,bam,bai]} + + PICARD_COLLECTMULTIPLEMETRICS( ch_bam_bai, ch_fasta, ch_fasta_fai ) + ch_versions = ch_versions.mix(PICARD_COLLECTMULTIPLEMETRICS.out.versions.first()) + + ch_bam_bai_bait_target_branched = ch_bam_bai_bait_target.branch { + hsmetrics : it.size == 5 && it[3] != [] && it[4] != [] + return it + wgsmetrics : true + return [ it[0], it[1], it[2] ] + } + + PICARD_COLLECTHSMETRICS( ch_bam_bai_bait_target_branched.hsmetrics, ch_fasta, ch_fasta_fai, ch_fasta_dict ) + ch_coverage_metrics = ch_coverage_metrics.mix(PICARD_COLLECTHSMETRICS.out.metrics) + ch_versions = ch_versions.mix(PICARD_COLLECTHSMETRICS.out.versions.first()) + + PICARD_COLLECTWGSMETRICS( ch_bam_bai_bait_target_branched.wgsmetrics, ch_fasta, ch_fasta_fai, [] ) + ch_versions = ch_versions.mix(PICARD_COLLECTWGSMETRICS.out.versions.first()) + ch_coverage_metrics = ch_coverage_metrics.mix(PICARD_COLLECTWGSMETRICS.out.metrics) + + emit: + coverage_metrics = ch_coverage_metrics // channel: [ val(meta), [ coverage_metrics ] ] + multiple_metrics = PICARD_COLLECTMULTIPLEMETRICS.out.metrics // channel: [ val(meta), [ multiple_metrics ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_qc_picard/meta.yml b/subworkflows/nf-core/bam_qc_picard/meta.yml new file mode 100644 index 0000000..c9d7aa6 --- /dev/null +++ b/subworkflows/nf-core/bam_qc_picard/meta.yml @@ -0,0 +1,84 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_qc_picard +description: Produces comprehensive statistics from BAM file +keywords: + - statistics + - counts + - hs_metrics + - wgs_metrics + - bam + - sam + - cram +components: + - picard/collectmultiplemetrics + - picard/collectwgsmetrics + - picard/collecthsmetrics +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAM/CRAM/SAM file index + pattern: "*.{bai,crai,sai}" + - bait_intervals: + type: optional file + description: An interval list or bed file that contains the locations of the baits used. + pattern: "baits.{interval_list,bed,bed.gz}" + - target_intervals: + type: optional file + description: An interval list or bed file that contains the locations of the targets. + pattern: "targets.{interval_list,bed,bed.gz}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: optional file + description: Reference fasta file + pattern: "*.{fasta,fa,fna}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta_fai: + type: optional file + description: Reference fasta file index + pattern: "*.{fasta,fa,fna}.fai" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta_dict: + type: optional file + description: Reference fasta sequence dictionary + pattern: "*.{dict}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - coverage_metrics: + type: file + description: Alignment metrics files generated by picard CollectHsMetrics or CollectWgsMetrics + pattern: "*_metrics.txt" + - multiple_metrics: + type: file + description: Alignment metrics files generated by picard CollectMultipleMetrics + pattern: "*_{metrics}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@matthdsm" diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf new file mode 100644 index 0000000..fc1c652 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf @@ -0,0 +1,50 @@ +// +// Sort, index BAM file and run samtools stats, flagstat and idxstats +// + +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' + +workflow BAM_SORT_STATS_SAMTOOLS { + take: + ch_bam // channel: [ val(meta), [ bam ] ] + ch_fasta // channel: [ val(meta), path(fasta) ] + + main: + + ch_versions = Channel.empty() + + SAMTOOLS_SORT ( ch_bam ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + + SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + SAMTOOLS_SORT.out.bam + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map { + meta, bam, bai, csi -> + if (bai) { + [ meta, bam, bai ] + } else { + [ meta, bam, csi ] + } + } + .set { ch_bam_bai } + + BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] + + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml new file mode 100644 index 0000000..69c16be --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml @@ -0,0 +1,67 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_sort_stats_samtools +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +components: + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_stats_samtools +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf new file mode 100644 index 0000000..44d4c01 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -0,0 +1,32 @@ +// +// Run SAMtools stats, flagstat and idxstats +// + +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_IDXSTATS } from '../../../modules/nf-core/samtools/idxstats/main' +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' + +workflow BAM_STATS_SAMTOOLS { + take: + ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [ val(meta), path(fasta) ] + + main: + ch_versions = Channel.empty() + + SAMTOOLS_STATS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) + + SAMTOOLS_FLAGSTAT ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + + SAMTOOLS_IDXSTATS ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) + + emit: + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/bam_stats_samtools/meta.yml b/subworkflows/nf-core/bam_stats_samtools/meta.yml new file mode 100644 index 0000000..87863b1 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/meta.yml @@ -0,0 +1,41 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_stats_samtools +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +components: + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - ch_bam_bai: + description: | + The input channel containing the BAM/CRAM and it's index + Structure: [ val(meta), path(bam), path(bai) ] + - ch_fasta: + description: | + Reference genome fasta file + Structure: [ path(fasta) ] +output: + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats)] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@drpatelh" diff --git a/subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/main.nf b/subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/main.nf new file mode 100644 index 0000000..94b1fce --- /dev/null +++ b/subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/main.nf @@ -0,0 +1,139 @@ +// +// Run GATK mutect2 in tumor normal mode, getepileupsummaries, calculatecontamination, learnreadorientationmodel and filtermutectcalls +// + +include { GATK4_MUTECT2 as MUTECT2 } from '../../../modules/nf-core/gatk4/mutect2/main' +include { GATK4_LEARNREADORIENTATIONMODEL as LEARNREADORIENTATIONMODEL } from '../../../modules/nf-core/gatk4/learnreadorientationmodel/main' +include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_TUMOR } from '../../../modules/nf-core/gatk4/getpileupsummaries/main' +include { GATK4_GETPILEUPSUMMARIES as GETPILEUPSUMMARIES_NORMAL} from '../../../modules/nf-core/gatk4/getpileupsummaries/main' +include { GATK4_CALCULATECONTAMINATION as CALCULATECONTAMINATION } from '../../../modules/nf-core/gatk4/calculatecontamination/main' +include { GATK4_FILTERMUTECTCALLS as FILTERMUTECTCALLS } from '../../../modules/nf-core/gatk4/filtermutectcalls/main' + +workflow BAM_TUMOR_NORMAL_SOMATIC_VARIANT_CALLING_GATK { + take: + ch_input // channel: [ val(meta), path(input), path(input_index), val(which_norm) ] + ch_fasta // channel: /path/to/reference/fasta + ch_fai // channel: /path/to/reference/fasta/index + ch_dict // channel: /path/to/reference/fasta/dictionary + ch_germline_resource // channel: /path/to/germline/resource + ch_germline_resource_tbi // channel: /path/to/germline/index + ch_panel_of_normals // channel: /path/to/panel/of/normals + ch_panel_of_normals_tbi // channel: /path/to/panel/of/normals/index + ch_interval_file // channel: /path/to/interval/file + + main: + ch_versions = Channel.empty() + + // + // Perform variant calling using mutect2 module in tumor single mode. + // + MUTECT2 ( + ch_input, + ch_fasta, + ch_fai, + ch_dict, + ch_germline_resource, + ch_germline_resource_tbi, + ch_panel_of_normals, + ch_panel_of_normals_tbi + ) + + ch_versions = ch_versions.mix(MUTECT2.out.versions) + + // + // Generate artifactpriors using learnreadorientationmodel on the f1r2 output of mutect2. + // + LEARNREADORIENTATIONMODEL (MUTECT2.out.f1r2.collect()) + ch_versions = ch_versions.mix(LEARNREADORIENTATIONMODEL.out.versions) + + // + // Generate pileup summary tables using getepileupsummaries. tumor sample should always be passed in as the first input and input list entries of ch_mutect2_in, + // to ensure correct file order for calculatecontamination. + // + ch_pileup_tumor_input = ch_input.combine(ch_interval_file).map { + meta, input_file, input_index, which_norm, intervals -> + [meta, input_file[0], input_index[0], intervals] + } + + ch_pileup_normal_input = ch_input.combine(ch_interval_file).map { + meta, input_file, input_index, which_norm, intervals -> + [meta, input_file[1], input_index[1], intervals] + } + + GETPILEUPSUMMARIES_TUMOR ( + ch_pileup_tumor_input, + ch_fasta, + ch_fai, + ch_dict, + ch_germline_resource, + ch_germline_resource_tbi + ) + + GETPILEUPSUMMARIES_NORMAL ( + ch_pileup_normal_input, + ch_fasta, + ch_fai, + ch_dict, + ch_germline_resource, + ch_germline_resource_tbi + ) + + ch_versions = ch_versions.mix(GETPILEUPSUMMARIES_TUMOR.out.versions.first()) + ch_versions = ch_versions.mix(GETPILEUPSUMMARIES_NORMAL.out.versions.first()) + + // + // Contamination and segmentation tables created using calculatecontamination on the pileup summary table. + // + ch_pileup_tumor = GETPILEUPSUMMARIES_TUMOR.out.table.collect() + ch_pileup_normal = GETPILEUPSUMMARIES_NORMAL.out.table.collect() + ch_calccon_in = ch_pileup_tumor.join(ch_pileup_normal, failOnDuplicate: true, failOnMismatch: true) + CALCULATECONTAMINATION ( ch_calccon_in ) + ch_versions = ch_versions.mix(CALCULATECONTAMINATION.out.versions) + + // + // Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables. + // + ch_vcf = MUTECT2.out.vcf.collect() + ch_tbi = MUTECT2.out.tbi.collect() + ch_stats = MUTECT2.out.stats.collect() + ch_orientation = LEARNREADORIENTATIONMODEL.out.artifactprior.collect() + ch_segment = CALCULATECONTAMINATION.out.segmentation.collect() + ch_contamination = CALCULATECONTAMINATION.out.contamination.collect() + + //[] is used as a placeholder for optional input to specify the contamination estimate as a value, since the contamination table is used, this is not needed. + ch_contamination.add([]) + ch_filtermutect_in = ch_vcf + .join(ch_tbi, failOnDuplicate: true, failOnMismatch: true) + .join(ch_stats, failOnDuplicate: true, failOnMismatch: true) + .join(ch_orientation, failOnDuplicate: true, failOnMismatch: true) + .join(ch_segment, failOnDuplicate: true, failOnMismatch: true) + .join(ch_contamination, failOnDuplicate: true, failOnMismatch: true) + + FILTERMUTECTCALLS ( + ch_filtermutect_in, + ch_fasta, + ch_fai, + ch_dict + ) + ch_versions = ch_versions.mix(FILTERMUTECTCALLS.out.versions.first()) + + emit: + mutect2_vcf = MUTECT2.out.vcf.collect() // channel: [ val(meta), path(vcf) ] + mutect2_tbi = MUTECT2.out.tbi.collect() // channel: [ val(meta), path(tbi) ] + mutect2_stats = MUTECT2.out.stats.collect() // channel: [ val(meta), path(stats) ] + mutect2_f1r2 = MUTECT2.out.f1r2.collect() // channel: [ val(meta), path(f1r2) ] + + artifact_priors = LEARNREADORIENTATIONMODEL.out.artifactprior.collect() // channel: [ val(meta), path(artifactprior) ] + + pileup_table_tumor = GETPILEUPSUMMARIES_TUMOR.out.table.collect() // channel: [ val(meta), path(table) ] + pileup_table_normal = GETPILEUPSUMMARIES_NORMAL.out.table.collect() // channel: [ val(meta), path(table) ] + + contamination_table = CALCULATECONTAMINATION.out.contamination.collect() // channel: [ val(meta), path(table) ] + segmentation_table = CALCULATECONTAMINATION.out.segmentation.collect() // channel: [ val(meta), path(table) ] + + filtered_vcf = FILTERMUTECTCALLS.out.vcf.collect() // channel: [ val(meta), path(vcf) ] + filtered_tbi = FILTERMUTECTCALLS.out.tbi.collect() // channel: [ val(meta), path(tbi) ] + filtered_stats = FILTERMUTECTCALLS.out.stats.collect() // channel: [ val(meta), path(stats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/meta.yml b/subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/meta.yml new file mode 100644 index 0000000..1f08e23 --- /dev/null +++ b/subworkflows/nf-core/bam_tumor_normal_somatic_variant_calling_gatk/meta.yml @@ -0,0 +1,116 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_tumor_normal_somatic_variant_calling_gatk +description: | + Perform variant calling on a paired tumor normal set of samples using mutect2 tumor normal mode. + f1r2 output of mutect2 is run through learnreadorientationmodel to get the artifact priors. + Run the input bam files through getpileupsummarries and then calculatecontamination to get the contamination and segmentation tables. + Filter the mutect2 output vcf using filtermutectcalls, artifact priors and the contamination & segmentation tables for additional filtering. +keywords: + - gatk4 + - mutect2 + - learnreadorientationmodel + - getpileupsummaries + - calculatecontamination + - filtermutectcalls + - variant_calling + - tumor_only + - filtered_vcf +components: + - gatk4/mutect2 + - gatk4/learnreadorientationmodel + - gatk4/getpileupsummaries + - gatk4/calculatecontamination + - gatk4/filtermutectcalls +input: + - ch_input: + description: | + The tumor and normal BAM files, in that order, also able to take CRAM as an input + Can contain an optional list of sample headers contained in the normal sample input file. + Structure: [ val(meta), path(input), path(input_index), val(which_norm) ] + - ch_fasta: + description: | + The reference fasta file + Structure: [ path(fasta) ] + - ch_fai: + description: | + Index of reference fasta file + Structure: [ path(fai) ] + - ch_dict: + description: | + GATK sequence dictionary + Structure: [ path(dict) ] + - ch_germline_resource: + description: | + Population vcf of germline sequencing, containing allele fractions. + Structure: [ path(germline_resources) ] + - ch_germline_resource_tbi: + description: | + Index file for the germline resource. + Structure: [ path(germline_resources_tbi) ] + - ch_panel_of_normals: + description: | + Vcf file to be used as a panel of normals. + Structure: [ path(panel_of_normals) ] + - ch_panel_of_normals_tbi: + description: | + Index for the panel of normals. + Structure: [ path(panel_of_normals_tbi) ] + - ch_interval_file: + description: | + File containing intervals. + Structure: [ path(interval_files) ] +output: + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] + - mutect2_vcf: + description: | + Compressed vcf file to be used for variant_calling. + Structure: [ val(meta), path(vcf) ] + - mutect2_tbi: + description: | + Indexes of the mutect2_vcf file + Structure: [ val(meta), path(tbi) ] + - mutect2_stats: + description: | + Stats files for the mutect2 vcf + Structure: [ val(meta), path(stats) ] + - mutect2_f1r2: + description: | + File containing information to be passed to LearnReadOrientationModel. + Structure: [ val(meta), path(f1r2) ] + - artifact_priors: + description: | + File containing artifact-priors to be used by filtermutectcalls. + Structure: [ val(meta), path(artifact_priors) ] + - pileup_table_tumor: + description: | + File containing the tumor pileup summary table, kept separate as calculatecontamination needs them individually specified. + Structure: [ val(meta), path(table) ] + - pileup_table_normal: + description: | + File containing the normal pileup summary table, kept separate as calculatecontamination needs them individually specified. + Structure: [ val(meta), path(table) ] + - contamination_table: + description: | + File containing the contamination table. + Structure: [ val(meta), path(table) ] + - segmentation_table: + description: | + Output table containing segmentation of tumor minor allele fractions. + Structure: [ val(meta), path(table) ] + - filtered_vcf: + description: | + File containing filtered mutect2 calls. + Structure: [ val(meta), path(vcf) ] + - filtered_tbi: + description: | + Tbi file that pairs with filtered vcf. + Structure: [ val(meta), path(tbi) ] + - filtered_stats: + description: | + File containing statistics of the filtermutectcalls run. + Structure: [ val(meta), path(stats) ] +authors: + - "@GCJMackenzie" diff --git a/subworkflows/nf-core/fastq_align_bwa/main.nf b/subworkflows/nf-core/fastq_align_bwa/main.nf new file mode 100644 index 0000000..4ce4f88 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bwa/main.nf @@ -0,0 +1,43 @@ +// +// Alignment with BWA +// + +include { BWA_MEM } from '../../../modules/nf-core/bwa/mem/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main' + +workflow FASTQ_ALIGN_BWA { + take: + ch_reads // channel (mandatory): [ val(meta), [ path(reads) ] ] + ch_index // channel (mandatory): [ val(meta2), path(index) ] + val_sort_bam // boolean (mandatory): true or false + ch_fasta // channel (optional) : [ path(fasta) ] + + main: + ch_versions = Channel.empty() + + // + // Map reads with BWA + // + + BWA_MEM ( ch_reads, ch_index, val_sort_bam ) + ch_versions = ch_versions.mix(BWA_MEM.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + + BAM_SORT_STATS_SAMTOOLS ( BWA_MEM.out.bam, ch_fasta ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + bam_orig = BWA_MEM.out.bam // channel: [ val(meta), path(bam) ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), path(bam) ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), path(bai) ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), path(csi) ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/fastq_align_bwa/meta.yml b/subworkflows/nf-core/fastq_align_bwa/meta.yml new file mode 100644 index 0000000..618a69d --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bwa/meta.yml @@ -0,0 +1,72 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: fastq_align_bwa +description: Align reads to a reference genome using bwa then sort with samtools +keywords: + - align + - fasta + - genome + - reference +components: + - bwa/mem + - bwa/align + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_sort_stats_samtools +input: + - ch_reads: + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + Structure: [ val(meta), [ path(reads) ] ] + - ch_index: + description: | + BWA genome index files + Structure: [ val(meta2), path(index) ] + - val_sort_bam: + type: boolean + description: If true bwa modules sort resulting bam files + pattern: "true|false" + - ch_fasta: + type: file + description: | + Optional reference fasta file. This only needs to be given if val_sort_bam = true + Structure: [ path(fasta) ] + +output: + - bam_orig: + description: | + BAM file produced by bwa + Structure: [ val(meta), path(bam) ] + - bam: + description: | + BAM file ordered by samtools + Structure: [ val(meta), path(bam) ] + - bai: + description: | + BAI index of the ordered BAM file + Structure: [ val(meta), path(bai) ] + - csi: + description: | + CSI index of the ordered BAM file + Structure: [ val(meta), path(csi) ] + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats) ] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@JoseEspinosa" diff --git a/subworkflows/nf-core/fastq_align_hisat2/main.nf b/subworkflows/nf-core/fastq_align_hisat2/main.nf new file mode 100644 index 0000000..a2ec1cf --- /dev/null +++ b/subworkflows/nf-core/fastq_align_hisat2/main.nf @@ -0,0 +1,44 @@ +include { HISAT2_ALIGN } from '../../../modules/nf-core/hisat2/align/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main' + +workflow FASTQ_ALIGN_HISAT2 { + + take: + reads // channel: [ val(meta), [ reads ] ] + index // channel: /path/to/hisat2/index + splicesites // channel: /path/to/genome.splicesites.txt + ch_fasta // channel: [ fasta ] + + main: + + ch_versions = Channel.empty() + + + // + // Map reads with HISAT2 + // + HISAT2_ALIGN ( reads, index, splicesites ) + ch_versions = ch_versions.mix(HISAT2_ALIGN.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( HISAT2_ALIGN.out.bam, ch_fasta ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + + emit: + orig_bam = HISAT2_ALIGN.out.bam // channel: [ val(meta), bam ] + summary = HISAT2_ALIGN.out.summary // channel: [ val(meta), log ] + fastq = HISAT2_ALIGN.out.fastq // channel: [ val(meta), fastq ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), [ csi ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/nf-core/fastq_align_hisat2/meta.yml b/subworkflows/nf-core/fastq_align_hisat2/meta.yml new file mode 100644 index 0000000..2b05beb --- /dev/null +++ b/subworkflows/nf-core/fastq_align_hisat2/meta.yml @@ -0,0 +1,89 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_align_hisat2" +description: Align reads to a reference genome using hisat2 then sort with samtools +keywords: + - align + - sort + - rnaseq + - genome + - fastq + - bam + - sam + - cram +components: + - hisat2/align + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_sort_stats_samtools +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - index: + type: file + description: HISAT2 genome index file + pattern: "*.ht2" + - splicesites: + type: file + description: Splices sites in gtf file + pattern: "*.{txt}" + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - summary: + type: file + description: Aligment log + pattern: "*.log" + - fastq: + type: file + description: Optional output FASTQ file containing unaligned reads + pattern: ".fastq.gz" + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@priyanka-surana" diff --git a/subworkflows/nf-core/fastq_align_star/main.nf b/subworkflows/nf-core/fastq_align_star/main.nf new file mode 100644 index 0000000..8c11057 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_star/main.nf @@ -0,0 +1,49 @@ +include { STAR_ALIGN } from '../../../modules/nf-core/star/align/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main' + +workflow FASTQ_ALIGN_STAR { + + take: + ch_reads // channel: [ val(meta), [ path(reads) ] ] + ch_index // channel: [ path(index) ] + ch_gtf // channel: [ path(gtf) ] + val_star_ignore_sjdbgtf // boolean: when using pre-built STAR indices do not re-extract and use splice junctions from the GTF file + val_seq_platform // string : sequencing platform + val_seq_center // string : sequencing center + ch_fasta // channel: [ val(meta), path(fasta) ] + + main: + + ch_versions = Channel.empty() + + // + // Map reads with STAR + // + STAR_ALIGN ( ch_reads, ch_index, ch_gtf, val_star_ignore_sjdbgtf, val_seq_platform, val_seq_center ) + ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( STAR_ALIGN.out.bam, ch_fasta ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + + orig_bam = STAR_ALIGN.out.bam // channel: [ val(meta), path(bam) ] + log_final = STAR_ALIGN.out.log_final // channel: [ val(meta), path(log_final) ] + log_out = STAR_ALIGN.out.log_out // channel: [ val(meta), path(log_out) ] + log_progress = STAR_ALIGN.out.log_progress // channel: [ val(meta), path(log_progress) ] + bam_sorted = STAR_ALIGN.out.bam_sorted // channel: [ val(meta), path(bam) ] + bam_transcript = STAR_ALIGN.out.bam_transcript // channel: [ val(meta), path(bam) ] + fastq = STAR_ALIGN.out.fastq // channel: [ val(meta), path(fastq) ] + tab = STAR_ALIGN.out.tab // channel: [ val(meta), path(tab) ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), path(bam) ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), path(bai) ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/fastq_align_star/meta.yml b/subworkflows/nf-core/fastq_align_star/meta.yml new file mode 100644 index 0000000..1f03985 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_star/meta.yml @@ -0,0 +1,108 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_align_star" +description: Align reads to a reference genome using bowtie2 then sort with samtools +keywords: + - align + - fasta + - genome + - reference +components: + - star/align + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_sort_stats_samtools +input: + - ch_reads: + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + Structure: [ val(meta), [ path(reads) ] ] + - ch_index: + type: directory + description: STAR genome index + pattern: "star" + - ch_gtf: + type: file + description: | + GTF file used to set the splice junctions with the --sjdbGTFfile flag + pattern: "*.gtf" + - val_star_ignore_sjdbgtf: + type: boolean + description: | + If true the --sjdbGTFfile flag is set + pattern: "true|false" + - val_seq_platform: + type: string + description: | + Sequencing platform to be added to the bam header using the --outSAMattrRGline flag + - val_seq_center: + type: string + description: | + Sequencing center to be added to the bam header using the --outSAMattrRGline flag + - ch_fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa,fna}" + +output: + - orig_bam: + description: | + Output BAM file containing read alignments + Structure: [ val(meta), path(bam) ] + - log_final: + description: | + STAR final log file + Structure: [ val(meta), path(log_final) ] + - log_out: + description: | + STAR log out file + Structure: [ val(meta), path(log_out) ] + - log_progress: + description: | + STAR log progress file + Structure: [ val(meta), path(log_progress) ] + - bam_sorted: + description: | + Sorted BAM file of read alignments (optional) + Structure: [ val(meta), path(bam) ] + - bam_transcript: + description: | + Output BAM file of transcriptome alignment (optional) + Structure: [ val(meta), path(bam) ] + - fastq: + description: | + Unmapped FastQ files (optional) + Structure: [ val(meta), path(fastq) ] + - tab: + description: | + STAR output tab file(s) (optional) + Structure: [ val(meta), path(tab) ] + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - bam: + description: | + BAM file ordered by samtools + Structure: [ val(meta), path(bam) ] + - bai: + description: | + BAI index of the ordered BAM file + Structure: [ val(meta), path(bai) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats) ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@JoseEspinosa" diff --git a/workflows/rnadnavar.nf b/workflows/rnadnavar.nf index 955383c..6462e15 100644 --- a/workflows/rnadnavar.nf +++ b/workflows/rnadnavar.nf @@ -37,36 +37,163 @@ def checkPathParamList = [ params.whitelist ] +// Validate input parameters +WorkflowRnadnavar.initialise(params, log) + + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Check mandatory parameters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -for (param in checkPathParamList) { - if (param) { - file(param, checkIfExists: true) - } - } + +for (param in checkPathParamList) if (param) file(param, checkIfExists: true) + // Set input, can either be from --input or from automatic retrieval in lib/WorkflowRnadnavar.groovy -ch_input_sample = extract_csv(file(params.input)) +if (params.input) { + ch_from_samplesheet = params.build_only_index ? Channel.empty() : Channel.fromSamplesheet("input") +} else { + ch_from_samplesheet = params.build_only_index ? Channel.empty() : Channel.fromSamplesheet("input_restart") +} + +// Format samplesheet channel +input_sample = ch_from_samplesheet + .map{ meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller -> + // generate patient_sample key to group lanes together + [ meta.patient + meta.sample, [meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller] ] + } + .tap{ ch_with_patient_sample } // save the channel + .groupTuple() //group by patient_sample to get all lanes + .map { patient_sample, ch_items -> + // get number of lanes per sample + [ patient_sample, ch_items.size() ] + } + .combine(ch_with_patient_sample, by: 0) // for each entry add numLanes + .map { patient_sample, num_lanes, ch_items -> + + (meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller) = ch_items + if (meta.lane && fastq_2) { + meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] + def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' + + def flowcell = flowcellLaneFromFastq(fastq_1) + // Don't use a random element for ID, it breaks resuming + def read_group = "\"@RG\\tID:${flowcell}.${meta.sample}.${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" + + meta = meta + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'fastq', size: 1] + + if (params.step == 'mapping') return [ meta, [ fastq_1, fastq_2 ] ] + else { + error("Samplesheet contains fastq files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations") + } + + // start from BAM + } else if (meta.lane && bam) { + if (params.step != 'mapping' && !bai) { + error("BAM index (bai) should be provided.") + } + meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] + def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' + def read_group = "\"@RG\\tID:${meta.sample}_${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" + + meta = meta + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'bam', size: 1] + + if (params.step != 'annotate') return [ meta - meta.subMap('lane'), bam, bai ] + else { + error("Samplesheet contains bam files but step is `annotate`. The pipeline is expecting vcf files for the annotation. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations") + } + + // recalibration + } else if (table && cram) { + meta = meta + [id: meta.sample, data_type: 'cram'] + + if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai, table ] + else { + error("Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations") + } + + // recalibration when skipping MarkDuplicates + } else if (table && bam) { + meta = meta + [id: meta.sample, data_type: 'bam'] + + if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai, table ] + else { + error("Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations") + } + + // prepare_recalibration or variant_calling + } else if (cram) { + meta = meta + [id: meta.sample, data_type: 'cram'] + + if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai ] + else { + error("Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations") + } + + // prepare_recalibration when skipping MarkDuplicates or `--step markduplicates` + } else if (bam) { + meta = meta + [id: meta.sample, data_type: 'bam'] + if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai ] + else { + error("Samplesheet contains bam files but step is 2 `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations") + } + + // annotation + } else if (vcf) { + meta = meta + [id: meta.sample, data_type: 'vcf', variantcaller: variantcaller ?: ''] + + if (params.step == 'annotate') return [ meta - meta.subMap('lane'), vcf ] + else { + error("Samplesheet contains vcf files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations") + } + } else { + error("Missing or unknown field in csv file header. Please check your samplesheet") + } + } + + + +// Check params logic +if (params.step != 'annotate' && params.tools && !params.build_only_index) { + // Two checks for ensuring that the pipeline stops with a meaningful error message if + // 1. the sample-sheet only contains normal-samples, but some of the requested tools require tumor-samples, and + // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal-samples. + input_sample.filter{ it[0].status == 1 }.ifEmpty{ // In this case, the sample-sheet contains no tumor-samples + if (!params.build_only_index) { + def tools_tumor = ['sage','mutect2', 'strelka', 'freebayes'] + def tools_tumor_asked = [] + tools_tumor.each{ tool -> + if (params.tools.split(',').contains(tool)) tools_tumor_asked.add(tool) + } + if (!tools_tumor_asked.isEmpty()) { + error('The sample-sheet only contains normal-samples, but the following tools, which were requested with "--tools", expect at least one tumor-sample : ' + tools_tumor_asked.join(", ")) + } + } + } + input_sample.filter{ it[0].status == 0 }.ifEmpty{ // In this case, the sample-sheet contains no normal/germline-samples + def tools_requiring_normal_samples = ['sage','mutect2', 'strelka', 'freebayes'] // Will implement tumour only in the near future + def requested_tools_requiring_normal_samples = [] + tools_requiring_normal_samples.each{ tool_requiring_normal_samples -> + if (params.tools.split(',').contains(tool_requiring_normal_samples)) requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples) + } + if (!requested_tools_requiring_normal_samples.isEmpty()) { + error('The sample-sheet only contains tumor-samples, but the following tools, which were requested by the option "tools", expect at least one normal-sample : ' + requested_tools_requiring_normal_samples.join(", ")) + } + } +} // Fails when wrongful extension for intervals file if (params.wes && !params.step == 'annotate') { - if (params.intervals && !params.intervals.endsWith("bed")) exit 1, "Target file specified with `--intervals` must be in BED format for targeted data" + if (params.intervals && !params.intervals.endsWith("bed")) error("Target file specified with `--intervals` must be in BED format for targeted data") else log.warn("Intervals file was provided without parameter `--wes`: Pipeline will assume this is Whole-Genome-Sequencing data.") -} else if (params.intervals && !params.intervals.endsWith("bed") && !params.intervals.endsWith("interval_list")) exit 1, "Intervals file must end with .bed or .interval_list" +} else if (params.intervals && !params.intervals.endsWith("bed") && !params.intervals.endsWith("list")) error("Intervals file must end with .bed, .list, or .interval_list") -if(params.step == 'mapping' && params.aligner.contains("dragmap") && !(params.skip_tools && params.skip_tools.split(',').contains("baserecalibrator"))){ - log.warn("DragMap was specified as aligner. Base recalibration is not contained in --skip_tools. It is recommended to skip baserecalibration when using DragMap\nhttps://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode") -} // Fails when missing params for STAR -if (!params.star_index && !params.gtf && !params.gff) - { - exit 1, - "GTF|GFF3 file is required to build a STAR reference index! Use option --gtf|--gff to provide a GTF|GFF file." - } +if (!params.star_index && !params.gtf && !params.gff){ + exit 1,"GTF|GFF3 file is required to build a STAR reference index! Use option --gtf|--gff to provide a GTF|GFF file." +} // Warns when missing files or params for mutect2 if(params.tools && params.tools.split(',').contains('mutect2')){ @@ -91,23 +218,16 @@ if(!params.dbsnp && !params.known_indels){ // Fails when missing tools for variant_calling or annotate if ((params.step == 'variant_calling' || params.step == 'annotate') && !params.tools) { - log.error "Please specify at least one tool when using `--step ${params.step}`.\nhttps://nf-co.re/rnadnavar/parameters#tools" - exit 1 + error("Please specify at least one tool when using `--step ${params.step}`.\nhttps://nf-co.re/rnadnavar/parameters#tools") } -// Save AWS IGenomes file containing annotation version -def anno_readme = params.genomes[params.genome]?.readme -if (anno_readme && file(anno_readme).exists()) { - file("${params.outdir}/genome/").mkdirs() - file(anno_readme).copyTo("${params.outdir}/genome/") +if ((params.download_cache) && (params.snpeff_cache || params.vep_cache)) { + error("Please specify either `--download_cache` or `--vep_cache`.\nhttps://nf-co.re/rnadnavar/dev/usage#how-to-customise-vep-annotation") } -file("${params.outdir}").mkdirs() - - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES + IMPORT LOCAL/NF-CORE MODULES/SUBWORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ @@ -125,12 +245,13 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -// Input checks -include { INPUT_CHECK } from '../subworkflows/local/input_check' - // Build the genome index and other reference files -include { PREPARE_REFERENCE_AND_INTERVALS } from '../subworkflows/local/prepare_reference_and_intervals' -include { MAPPING } from '../subworkflows/local/mapping' +include { PREPARE_REFERENCE_AND_INTERVALS } from '../subworkflows/local/prepare_reference_and_intervals' +// Download annotation cache if needed +include { ENSEMBLVEP_DOWNLOAD } from '../modules/nf-core/ensemblvep/download/main' + +// Alignment +include { BAM_ALIGN } from '../subworkflows/local/bam_align/main' // Core subworkflows of the pipeline include { CORE_RUN } from '../subworkflows/local/core_workflow_pass' @@ -143,16 +264,30 @@ include { FILTERING_RNA } from '../subworkflows/local/rna_filtering' // // MODULE: Installed directly from nf-core/modules // +//FASTQC +include { FASTQC } from '../modules/nf-core/fastqc/main' +// MULTIQC +include { MULTIQC } from '../modules/nf-core/multiqc/main' // REPORTING VERSIONS OF SOFTWARE USED -include { FASTQC } from '../modules/nf-core/modules/fastqc/main' -include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + + /* ======================================================================================== - VARIABLES + VARIABLES ======================================================================================== */ @@ -168,191 +303,94 @@ def multiqc_report = [] /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] - workflow RNADNAVAR { + // Initialise MULTIQC + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + // To gather all QC reports for MultiQC - ch_reports = Channel.empty() + reports = Channel.empty() // To gather used softwares versions for MultiQC - ch_versions = Channel.empty() + versions = Channel.empty() -// -// SUBWORKFLOW: Read in samplesheet, validate and stage input files -// - INPUT_CHECK ( - file(params.input) - ) - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") - // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ - // ! There is currently no tooling to help you write a sample sheet schema - - // - // MODULE: Run FastQC - // - FASTQC ( - INPUT_CHECK.out.reads - ) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) + // Download cache if needed + // Assuming that if the cache is provided, the user has already downloaded it + ensemblvep_info = params.vep_cache ? [] : Channel.of([ [ id:"${params.vep_cache_version}_${params.vep_genome}" ], params.vep_genome, params.vep_species, params.vep_cache_version ]) + if (params.download_cache) { + ENSEMBLVEP_DOWNLOAD(ensemblvep_info) + vep_cache = ENSEMBLVEP_DOWNLOAD.out.cache.collect().map{ meta, cache -> [ cache ] } -// STEP 0: Build reference and indices if needed - PREPARE_REFERENCE_AND_INTERVALS() - ch_versions = ch_versions.mix(PREPARE_REFERENCE_AND_INTERVALS.out.versions) - - // Reference and intervals variables - fasta = PREPARE_REFERENCE_AND_INTERVALS.out.fasta - fasta_fai = PREPARE_REFERENCE_AND_INTERVALS.out.fasta_fai - dict = PREPARE_REFERENCE_AND_INTERVALS.out.dict - germline_resource = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource - germline_resource_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource_tbi - intervals = PREPARE_REFERENCE_AND_INTERVALS.out.intervals - intervals_for_preprocessing = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_for_preprocessing - ch_interval_list_split = PREPARE_REFERENCE_AND_INTERVALS.out.ch_interval_list_split - // specific for variant calling - intervals_bed_combined = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_bed_combined - intervals_bed_gz_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_bed_gz_tbi - dbsnp = PREPARE_REFERENCE_AND_INTERVALS.out.dbsnp - dbsnp_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.dbsnp_tbi - pon = PREPARE_REFERENCE_AND_INTERVALS.out.pon - pon_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.pon_tbi - germline_resource = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource - germline_resource_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource_tbi - - -// STEP 1: ALIGNMENT PREPROCESSING - MAPPING( - PREPARE_REFERENCE_AND_INTERVALS.out.bwa, - PREPARE_REFERENCE_AND_INTERVALS.out.bwamem2, - PREPARE_REFERENCE_AND_INTERVALS.out.dragmap, - PREPARE_REFERENCE_AND_INTERVALS.out.star_index, - PREPARE_REFERENCE_AND_INTERVALS.out.gtf, - ch_input_sample - ) - ch_reports = ch_reports.mix(MAPPING.out.reports) - ch_versions = ch_versions.mix(MAPPING.out.versions) - - // 5 MAIN STEPS: GATK PREPROCESING - VARIANT CALLING - NORMALIZATION - CONSENSUS - ANNOTATION - CORE_RUN( - params.step, - params.tools, - params.skip_tools, - ch_input_sample, // input from CSV if applicable - MAPPING.out.ch_bam_mapped, // input from mapping - fasta, // fasta reference file - fasta_fai, // fai for fasta file - dict, // - dbsnp, - dbsnp_tbi, - pon, - pon_tbi, - germline_resource, - germline_resource_tbi, - intervals, - intervals_for_preprocessing, - ch_interval_list_split, - intervals_bed_gz_tbi, - intervals_bed_combined, - null, // to repeat rescue consensus - null // to repeat rescue consensus - ) - - - ch_reports = ch_reports.mix(CORE_RUN.out.reports) - ch_versions = ch_versions.mix(CORE_RUN.out.versions) - - if (params.tools.split(',').contains('second_run')) { - PREPARE_SECOND_RUN(ch_input_sample, // input from CSV if applicable - params.tools, - CORE_RUN.out.maf, - MAPPING.out.bwa_bams, // for dna re-alignments - MAPPING.out.star_bams, // for rnare-alignments - fasta, - fasta_fai, - dict, - PREPARE_REFERENCE_AND_INTERVALS.out.hisat2_index, - PREPARE_REFERENCE_AND_INTERVALS.out.splicesites - ) // do mapping with hisat2 - - ch_reports = ch_reports.mix(PREPARE_SECOND_RUN.out.reports) - ch_versions = ch_versions.mix(PREPARE_SECOND_RUN.out.versions) - SECOND_RUN( - "markduplicates", // step to start with - params.tools, - "baserecalibrator,baserecalibrator_report,contamination,learnreadorientation", - ch_input_sample, // input from CSV if applicable - PREPARE_SECOND_RUN.out.ch_bam_mapped, // input from mapping - fasta, // fasta reference file - fasta_fai, // fai for fasta file - dict, // - dbsnp, - dbsnp_tbi, - pon, - pon_tbi, - germline_resource, - germline_resource_tbi, - intervals, - intervals_for_preprocessing, - ch_interval_list_split, - intervals_bed_gz_tbi, - intervals_bed_combined, - CORE_RUN.out.vcf_consensus_dna, // to repeat rescue consensus - CORE_RUN.out.vcfs_status_dna // to repeat rescue consensus - ) - - ch_reports = ch_reports.mix(SECOND_RUN.out.reports) - ch_versions = ch_versions.mix(SECOND_RUN.out.versions) - second_run_maf = SECOND_RUN.out.maf_rna - } else{ - second_run_maf = Channel.empty() + versions = versions.mix(ENSEMBLVEP_DOWNLOAD.out.versions) } - FILTERING_RNA(params.tools, - CORE_RUN.out.maf_rna, - second_run_maf, - fasta) - ch_versions = ch_versions.mix(FILTERING_RNA.out.versions) +// STEP 0: Build reference and indices if needed + PREPARE_REFERENCE_AND_INTERVALS() + versions = versions.mix(PREPARE_REFERENCE_AND_INTERVALS.out.versions) + + // Reference and intervals variables + fasta = PREPARE_REFERENCE_AND_INTERVALS.out.fasta + fasta_fai = PREPARE_REFERENCE_AND_INTERVALS.out.fasta_fai + dict = PREPARE_REFERENCE_AND_INTERVALS.out.dict + germline_resource = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource + germline_resource_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource_tbi + intervals = PREPARE_REFERENCE_AND_INTERVALS.out.intervals + intervals_for_preprocessing = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_for_preprocessing + // specific for variant calling + intervals_bed_combined = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_bed_combined + intervals_bed_gz_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.intervals_bed_gz_tbi + dbsnp = PREPARE_REFERENCE_AND_INTERVALS.out.dbsnp + dbsnp_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.dbsnp_tbi + pon = PREPARE_REFERENCE_AND_INTERVALS.out.pon + pon_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.pon_tbi + germline_resource = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource + germline_resource_tbi = PREPARE_REFERENCE_AND_INTERVALS.out.germline_resource_tbi -// REPORTING - ch_version_yaml = Channel.empty() +// STEP 1: ALIGNMENT PREPROCESSING + BAM_ALIGN( + PREPARE_REFERENCE_AND_INTERVALS.out.bwa, + PREPARE_REFERENCE_AND_INTERVALS.out.bwamem2, + PREPARE_REFERENCE_AND_INTERVALS.out.dragmap, + PREPARE_REFERENCE_AND_INTERVALS.out.star_index, + PREPARE_REFERENCE_AND_INTERVALS.out.gtf, + input_sample + ) + reports = reports.mix(BAM_ALIGN.out.reports) + versions = versions.mix(BAM_ALIGN.out.versions) + + + version_yaml = Channel.empty() if (!(params.skip_tools && params.skip_tools.split(',').contains('versions'))) { - CUSTOM_DUMPSOFTWAREVERSIONS(ch_versions.unique().collectFile(name: 'collated_versions.yml')) - ch_version_yaml = CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect() + CUSTOM_DUMPSOFTWAREVERSIONS(versions.unique().collectFile(name: 'collated_versions.yml')) + version_yaml = CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect() } - // MODULE: MultiQC - // Present summary of reads, alignment, duplicates, BSQR stats for all samples as well as workflow summary/parameters as single report if (!(params.skip_tools && params.skip_tools.split(',').contains('multiqc'))) { workflow_summary = WorkflowRnadnavar.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) - methods_description = WorkflowRnadnavar.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() - ) - multiqc_report = MULTIQC.out.report.toList() + methods_description = WorkflowRnadnavar.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) + ch_methods_description = Channel.value(methods_description) + + multiqc_files = Channel.empty() + multiqc_files = multiqc_files.mix(version_yaml) + multiqc_files = multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + multiqc_files = multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + multiqc_files = multiqc_files.mix(reports.collect().ifEmpty([])) + + MULTIQC(multiqc_files.collect(), ch_multiqc_config.collect().ifEmpty([]), ch_multiqc_custom_config.collect().ifEmpty([]), ch_multiqc_logo.collect().ifEmpty([])) + + multiqc_report = MULTIQC.out.report.toList() + versions = versions.mix(MULTIQC.out.versions) + } } /* @@ -362,13 +400,9 @@ workflow RNADNAVAR { */ workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } + if (params.email || params.email_on_fail) NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } + if (params.hook_url) NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) } /* @@ -376,241 +410,6 @@ workflow.onComplete { FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Function to extract information (meta data + file(s)) from csv file(s) -def extract_csv(csv_file) { - - // check that the sample sheet is not 1 line or less, because it'll skip all subsequent checks if so. - file(csv_file).withReader('UTF-8') { reader -> - def line, numberOfLinesInSampleSheet = 0; - while ((line = reader.readLine()) != null) {numberOfLinesInSampleSheet++} - if (numberOfLinesInSampleSheet < 2) { - log.error "Samplesheet had less than two lines. The sample sheet must be a csv file with a header, so at least two lines." - System.exit(1) - } - } - - // Additional check of sample sheet: - // 1. If params.step == "mapping", then each row should specify a lane and the same combination of patient, sample and lane shouldn't be present in different rows. - // 2. The same sample shouldn't be listed for different patients. - def patient_sample_lane_combinations_in_samplesheet = [] - def sample2patient = [:] - - Channel.from(csv_file).splitCsv(header: true) - .map{ row -> - if (params.step == "mapping") { - if ( !row.lane ) { // This also handles the case where the lane is left as an empty string - log.error('The sample sheet should specify a lane for patient "' + row.patient.toString() + '" and sample "' + row.sample.toString() + '".') - System.exit(1) - } - def patient_sample_lane = [row.patient.toString(), row.sample.toString(), row.lane.toString()] - if (patient_sample_lane in patient_sample_lane_combinations_in_samplesheet) { - log.error('The patient-sample-lane combination "' + row.patient.toString() + '", "' + row.sample.toString() + '", and "' + row.lane.toString() + '" is present multiple times in the sample sheet.') - System.exit(1) - } else { - patient_sample_lane_combinations_in_samplesheet.add(patient_sample_lane) - } - } - if (!sample2patient.containsKey(row.sample.toString())) { - sample2patient[row.sample.toString()] = row.patient.toString() - } else if (sample2patient[row.sample.toString()] != row.patient.toString()) { - log.error('The sample "' + row.sample.toString() + '" is registered for both patient "' + row.patient.toString() + '" and "' + sample2patient[row.sample.toString()] + '" in the sample sheet.') - System.exit(1) - } - } - // keep count of the number of samples - sample_count_all = 0 - sample_count_normal = 0 - sample_count_tumor = 0 - sample_count_rna = 0 - - Channel.from(csv_file).splitCsv(header: true) - // Retrieves number of lanes by grouping together by patient and sample and counting how many entries there are for this combination - .map{ row -> - sample_count_all++ - if (!(row.patient && row.sample)){ - log.error "Missing field in csv file header. The csv file must have fields named 'patient' and 'sample'." - System.exit(1) - } - [[row.patient.toString(), row.sample.toString()], row] - }.groupTuple() - .map{ meta, rows -> - size = rows.size() - [rows, size] - }.transpose() - .map{ row, numLanes -> //from here do the usual thing for csv parsing - - def meta = [:] - - // Meta data to identify samplesheet - // Both patient and sample are mandatory - // Several sample can belong to the same patient - // Sample should be unique for the patient - if (row.patient) meta.patient = row.patient.toString() - if (row.sample) meta.sample = row.sample.toString() - - // If no status specified, sample is assumed normal - if (row.status) meta.status = row.status.toInteger() - else meta.status = 0 - - if (meta.status == 0) sample_count_normal++ - else if (meta.status == 1) sample_count_tumor++ // TODO check if elif is valid in here - else sample_count_rna++ - // TODO: think about what other condition we will have here now - // Two checks for ensuring that the pipeline stops with a meaningful error message if - // 1. the sample-sheet only contains normal-samples, but some of the requested tools require tumor-samples, and - // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal-samples. - if ((sample_count_normal == sample_count_all) && params.tools) { // In this case, the sample-sheet contains no tumor-samples - def tools_tumor = ['sage', 'mutect2', 'strelka2'] // This will be applied to tumour DNA and tumour RNA - def tools_tumor_asked = [] - tools_tumor.each{ tool -> - if (params.tools.split(',').contains(tool)) tools_tumor_asked.add(tool) - } - if (!tools_tumor_asked.isEmpty()) { - log.error('The sample-sheet only contains normal-samples, but the following tools, which were requested with "--tools", expect at least one tumor-sample : ' + tools_tumor_asked.join(", ")) - System.exit(1) - } - // TODO no need to do anything with the germline - can this be removed? - } else if ((sample_count_tumor == sample_count_all) && params.tools) { // In this case, the sample-sheet contains no normal/germline-samples - def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller'] - def requested_tools_requiring_normal_samples = [] - tools_requiring_normal_samples.each{ tool_requiring_normal_samples -> - if (params.tools.split(',').contains(tool_requiring_normal_samples)) requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples) - } - if (!requested_tools_requiring_normal_samples.isEmpty()) { - log.error('The sample-sheet only contains tumor-samples, but the following tools, which were requested by the option "tools", expect at least one normal-sample : ' + requested_tools_requiring_normal_samples.join(", ")) - System.exit(1) - } - } - - // mapping with fastq - if (row.lane && row.fastq_2) { - meta.id = "${row.sample}-${row.lane}".toString() - def fastq_1 = file(row.fastq_1, checkIfExists: true) - def fastq_2 = file(row.fastq_2, checkIfExists: true) - def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' - - def flowcell = flowcellLaneFromFastq(fastq_1) - //Don't use a random element for ID, it breaks resuming - def read_group = "\"@RG\\tID:${flowcell}.${row.sample}.${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.patient}_${row.sample}\\tLB:${row.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" - if (meta.status == 2) { // STAR does not need '@RG' - read_group = "ID:${flowcell}.${row.sample}.${row.lane} ${CN}PU:${row.lane} SM:${row.patient}_${row.sample} LB:${row.sample} DS:${params.fasta} PL:${params.seq_platform}" - } - - meta.numLanes = numLanes.toInteger() - meta.read_group = read_group.toString() - meta.data_type = 'fastq' - - meta.size = 1 // default number of splitted fastq - - if (params.step == 'mapping') return [meta, [fastq_1, fastq_2]] - else { - log.error "Samplesheet contains fastq files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations" - System.exit(1) - } - - // start from BAM - } else if (row.lane && row.bam) { - if (!row.bai) { - log.error "BAM index (bai) should be provided." - } - meta.id = "${row.sample}-${row.lane}".toString() - def bam = file(row.bam, checkIfExists: true) - def bai = file(row.bai, checkIfExists: true) - def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' - def read_group = "\"@RG\\tID:${row.sample}_${row.lane}\\t${CN}PU:${row.lane}\\tSM:${row.sample}\\tLB:${row.sample}\\tPL:${params.seq_platform}\"" - if (meta.status == 2) { // STAR does not need '@RG' - read_group = "ID:${row.sample}_${row.lane} ${CN}PU:${row.lane} SM:${row.sample} LB:${row.sample} PL:${params.seq_platform}" - } - - meta.numLanes = numLanes.toInteger() - meta.read_group = read_group.toString() - meta.data_type = 'bam' - - meta.size = 1 // default number of splitted fastq - - if (params.step != 'annotate') return [meta, bam, bai] - else { - log.error "Samplesheet contains bam files but step is `annotate`. The pipeline is expecting vcf files for the annotation. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations" - System.exit(1) - } - - // recalibration - } else if (row.table && row.cram) { - meta.id = meta.sample - def cram = file(row.cram, checkIfExists: true) - def crai = file(row.crai, checkIfExists: true) - def table = file(row.table, checkIfExists: true) - - meta.data_type = 'cram' - - if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, cram, crai, table] - else { - log.error "Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations" - System.exit(1) - } - - // recalibration when skipping MarkDuplicates - } else if (row.table && row.bam) { - meta.id = meta.sample - def bam = file(row.bam, checkIfExists: true) - def bai = file(row.bai, checkIfExists: true) - def table = file(row.table, checkIfExists: true) - - meta.data_type = 'bam' - - if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, bam, bai, table] - else { - log.error "Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations" - System.exit(1) - } - - // prepare_recalibration or variant_calling - } else if (row.cram) { - meta.id = meta.sample - def cram = file(row.cram, checkIfExists: true) - def crai = file(row.crai, checkIfExists: true) - - meta.data_type = 'cram' - - if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, cram, crai] - else { - log.error "Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations" - System.exit(1) - } - - // prepare_recalibration when skipping MarkDuplicates or `--step markduplicates` - } else if (row.bam) { - meta.id = meta.sample - def bam = file(row.bam, checkIfExists: true) - def bai = file(row.bai, checkIfExists: true) - - meta.data_type = 'bam' - - if (!(params.step == 'mapping' || params.step == 'annotate')) return [meta, bam, bai] - else { - log.error "Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations" - System.exit(1) - } - - // annotation - } else if (row.vcf) { - meta.id = meta.sample - def vcf = file(row.vcf, checkIfExists: true) - - meta.data_type = 'vcf' - meta.variantcaller = row.variantcaller ?: '' - - if (params.step == 'annotate') return [meta, vcf] - else { - log.error "Samplesheet contains vcf files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/rnadnavar/usage#input-samplesheet-configurations" - System.exit(1) - } - } else { - log.error "Missing or unknown field in csv file header. Please check your samplesheet" - System.exit(1) - } - } -} // Parse first line of a FASTQ file, return the flowcell id and lane number. def flowcellLaneFromFastq(path) { // expected format: