From f0ac28533156c82ecb4d74b65b27684ffe71f131 Mon Sep 17 00:00:00 2001 From: Paolo Cozzi Date: Wed, 20 Dec 2023 15:11:00 +0100 Subject: [PATCH 1/4] :sparkles: add reports to MultiQC step --- workflows/resequencing-mem.nf | 45 +++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/workflows/resequencing-mem.nf b/workflows/resequencing-mem.nf index 3da662c..71aaf7d 100644 --- a/workflows/resequencing-mem.nf +++ b/workflows/resequencing-mem.nf @@ -109,29 +109,15 @@ workflow RESEQUENCING_MEM { FASTQC(INPUT_CHECK.out.reads) ch_versions = ch_versions.mix(FASTQC.out.versions) - // get only the data I need for a MultiQC step - html_report = FASTQC.out.html.map( sample -> sample[1] ) - zip_report = FASTQC.out.zip.map( sample -> sample[1] ) - - // combine two channel (mix) and the get only one emission - multiqc_input = html_report.mix(zip_report).collect()//.view() - - // prepare multiqc_config file - multiqc_config = Channel.fromPath(params.multiqc_config) - multiqc_logo = Channel.fromPath(params.multiqc_logo) - - // calling MultiQC - MULTIQC(multiqc_input, multiqc_config, [], multiqc_logo) - ch_versions = ch_versions.mix(MULTIQC.out.versions) - - ch_cat_fastq - .multiMap { meta, reads -> - r1: [meta, reads[0]] - r2: [meta, reads[1]] - }.set{ ch_seqkit_input } - // remove duplicates (if necessary) if (params.remove_fastq_duplicates) { + // collect multiple files in one + ch_cat_fastq + .multiMap { meta, reads -> + r1: [meta, reads[0]] + r2: [meta, reads[1]] + }.set{ ch_seqkit_input } + SEQKIT_RMDUP_R1(ch_seqkit_input.r1) SEQKIT_RMDUP_R2(ch_seqkit_input.r2) ch_versions = ch_versions.mix(SEQKIT_RMDUP_R1.out.versions) @@ -218,6 +204,23 @@ workflow RESEQUENCING_MEM { TABIX_TABIX(BCFTOOLS_NORM.out.vcf) ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) + // get only the data I need for a MultiQC step + multiqc_input = FASTQC.out.html.map{it[1]}.ifEmpty([]) + .concat(FASTQC.out.zip.map{it[1]}.ifEmpty([])) + .concat(TRIMGALORE.out.log.map{it[1]}.ifEmpty([])) + .concat(PICARD_MARKDUPLICATES.out.metrics.map{it[1]}.ifEmpty([])) + .concat(SAMTOOLS_FLAGSTAT.out.flagstat.map{it[1]}.ifEmpty([])) + .collect() + // .view() + + // prepare multiqc_config file + multiqc_config = Channel.fromPath(params.multiqc_config) + multiqc_logo = Channel.fromPath(params.multiqc_logo) + + // calling MultiQC + MULTIQC(multiqc_input, multiqc_config, [], multiqc_logo) + ch_versions = ch_versions.mix(MULTIQC.out.versions) + // return software version CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') From 6b970ee01cf98dd15736931d6067cf89cd3ec533 Mon Sep 17 00:00:00 2001 From: Paolo Cozzi Date: Wed, 20 Dec 2023 15:22:28 +0100 Subject: [PATCH 2/4] :heavy_plus_sign: add bcftools stats --- modules.json | 5 ++ .../nf-core/bcftools/stats/environment.yml | 7 ++ modules/nf-core/bcftools/stats/main.nf | 60 +++++++++++++++ modules/nf-core/bcftools/stats/meta.yml | 77 +++++++++++++++++++ workflows/resequencing-mem.nf | 50 ++++++++---- 5 files changed, 184 insertions(+), 15 deletions(-) create mode 100644 modules/nf-core/bcftools/stats/environment.yml create mode 100644 modules/nf-core/bcftools/stats/main.nf create mode 100644 modules/nf-core/bcftools/stats/meta.yml diff --git a/modules.json b/modules.json index bacd94e..0d5bc3f 100644 --- a/modules.json +++ b/modules.json @@ -41,6 +41,11 @@ "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", "installed_by": ["modules"] }, + "bcftools/stats": { + "branch": "master", + "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", + "installed_by": ["modules"] + }, "bwa/index": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", diff --git a/modules/nf-core/bcftools/stats/environment.yml b/modules/nf-core/bcftools/stats/environment.yml new file mode 100644 index 0000000..1a96952 --- /dev/null +++ b/modules/nf-core/bcftools/stats/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_stats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/stats/main.nf b/modules/nf-core/bcftools/stats/main.nf new file mode 100644 index 0000000..ffa1df6 --- /dev/null +++ b/modules/nf-core/bcftools/stats/main.nf @@ -0,0 +1,60 @@ +process BCFTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(regions) + tuple val(meta3), path(targets) + tuple val(meta4), path(samples) + tuple val(meta5), path(exons) + tuple val(meta6), path(fasta) + + output: + tuple val(meta), path("*stats.txt"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + def reference_fasta = fasta ? "--fasta-ref ${fasta}" : "" + def exons_file = exons ? "--exons ${exons}" : "" + """ + bcftools stats \\ + $args \\ + $regions_file \\ + $targets_file \\ + $samples_file \\ + $reference_fasta \\ + $exons_file \\ + $vcf > ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/stats/meta.yml b/modules/nf-core/bcftools/stats/meta.yml new file mode 100644 index 0000000..7ea2103 --- /dev/null +++ b/modules/nf-core/bcftools/stats/meta.yml @@ -0,0 +1,77 @@ +name: bcftools_stats +description: Generates stats from VCF files +keywords: + - variant calling + - stats + - VCF +tools: + - stats: + description: | + Parses VCF or BCF and produces text file stats which is suitable for + machine processing and can be plotted using plot-vcfstats. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF input file + pattern: "*.{vcf}" + - tbi: + type: file + description: | + The tab index for the VCF file to be inspected. Optional: only required when parameter regions is chosen. + pattern: "*.tbi" + - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. (VCF, BED or tab-delimited) + - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon tbi index files) + - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' + - exons: + type: file + description: | + Tab-delimited file with exons for indel frameshifts (chr,beg,end; 1-based, inclusive, optionally bgzip compressed). + e.g. 'exons.tsv.gz' + - fasta: + type: file + description: | + Faidx indexed reference sequence file to determine INDEL context. + e.g. 'reference.fa' +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: Text output file containing stats + pattern: "*_{stats.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@SusiJo" + - "@TCLamnidis" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@SusiJo" + - "@TCLamnidis" diff --git a/workflows/resequencing-mem.nf b/workflows/resequencing-mem.nf index 71aaf7d..73addc9 100644 --- a/workflows/resequencing-mem.nf +++ b/workflows/resequencing-mem.nf @@ -32,21 +32,25 @@ include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' // // MODULE: Installed directly from nf-core/modules // -include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { SEQKIT_RMDUP as SEQKIT_RMDUP_R1; SEQKIT_RMDUP as SEQKIT_RMDUP_R2 } from '../modules/cnr-ibba/seqkit/rmdup/main' -include { TRIMGALORE } from '../modules/nf-core/trimgalore/main' -include { BWA_MEM } from '../modules/nf-core/bwa/mem/main' -include { BAMADDRG } from '../modules/cnr-ibba/bamaddrg/main' -include { PICARD_MARKDUPLICATES } from '../modules/nf-core/picard/markduplicates/main' -include { SAMTOOLS_INDEX } from '../modules/nf-core/samtools/index/main' -include { SAMTOOLS_FLAGSTAT } from '../modules/nf-core/samtools/flagstat/main' -include { SAMTOOLS_COVERAGE } from '../modules/nf-core/samtools/coverage/main' -include { FREEBAYES_PARALLEL } from '../subworkflows/cnr-ibba/freebayes_parallel/main' -include { BCFTOOLS_NORM } from '../modules/nf-core/bcftools/norm/main' -include { TABIX_TABIX } from '../modules/nf-core/tabix/tabix/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { + SEQKIT_RMDUP as SEQKIT_RMDUP_R1; + SEQKIT_RMDUP as SEQKIT_RMDUP_R2; + } from '../modules/cnr-ibba/seqkit/rmdup/main' +include { TRIMGALORE } from '../modules/nf-core/trimgalore/main' +include { BWA_MEM } from '../modules/nf-core/bwa/mem/main' +include { BAMADDRG } from '../modules/cnr-ibba/bamaddrg/main' +include { PICARD_MARKDUPLICATES } from '../modules/nf-core/picard/markduplicates/main' +include { SAMTOOLS_INDEX } from '../modules/nf-core/samtools/index/main' +include { SAMTOOLS_FLAGSTAT } from '../modules/nf-core/samtools/flagstat/main' +include { SAMTOOLS_COVERAGE } from '../modules/nf-core/samtools/coverage/main' +include { FREEBAYES_PARALLEL } from '../subworkflows/cnr-ibba/freebayes_parallel/main' +include { BCFTOOLS_NORM } from '../modules/nf-core/bcftools/norm/main' +include { TABIX_TABIX } from '../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_STATS } from '../modules/nf-core/bcftools/stats/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' // A workflow definition which does not declare any name is assumed to be the // main workflow and it’s implicitly executed. Therefore it’s the entry point @@ -204,12 +208,28 @@ workflow RESEQUENCING_MEM { TABIX_TABIX(BCFTOOLS_NORM.out.vcf) ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) + // prepare input for bcftools stats + bcftools_in_ch = BCFTOOLS_NORM.out.vcf + .join(TABIX_TABIX.out.tbi) + // .view() + + BCFTOOLS_STATS( + bcftools_in_ch, + [[], []], + [[], []], + [[], []], + [[], []], + [[], []] + ) + ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions) + // get only the data I need for a MultiQC step multiqc_input = FASTQC.out.html.map{it[1]}.ifEmpty([]) .concat(FASTQC.out.zip.map{it[1]}.ifEmpty([])) .concat(TRIMGALORE.out.log.map{it[1]}.ifEmpty([])) .concat(PICARD_MARKDUPLICATES.out.metrics.map{it[1]}.ifEmpty([])) .concat(SAMTOOLS_FLAGSTAT.out.flagstat.map{it[1]}.ifEmpty([])) + .concat(BCFTOOLS_STATS.out.stats.map{it[1]}.ifEmpty([])) .collect() // .view() From fd0ff7d4d408d1fb12e1d5c403ff47b57dbb0397 Mon Sep 17 00:00:00 2001 From: Paolo Cozzi Date: Wed, 20 Dec 2023 15:31:43 +0100 Subject: [PATCH 3/4] :heavy_plus_sign: add samtools stats --- modules.json | 5 ++ .../nf-core/samtools/stats/environment.yml | 7 ++ modules/nf-core/samtools/stats/main.nf | 49 ++++++++++++ modules/nf-core/samtools/stats/meta.yml | 63 +++++++++++++++ .../nf-core/samtools/stats/tests/main.nf.test | 78 +++++++++++++++++++ .../samtools/stats/tests/main.nf.test.snap | 64 +++++++++++++++ modules/nf-core/samtools/stats/tests/tags.yml | 2 + workflows/resequencing-mem.nf | 12 ++- 8 files changed, 277 insertions(+), 3 deletions(-) create mode 100644 modules/nf-core/samtools/stats/environment.yml create mode 100644 modules/nf-core/samtools/stats/main.nf create mode 100644 modules/nf-core/samtools/stats/meta.yml create mode 100644 modules/nf-core/samtools/stats/tests/main.nf.test create mode 100644 modules/nf-core/samtools/stats/tests/main.nf.test.snap create mode 100644 modules/nf-core/samtools/stats/tests/tags.yml diff --git a/modules.json b/modules.json index 0d5bc3f..a7c8107 100644 --- a/modules.json +++ b/modules.json @@ -101,6 +101,11 @@ "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", "installed_by": ["modules"] }, + "samtools/stats": { + "branch": "master", + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["modules"] + }, "tabix/bgzip": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml new file mode 100644 index 0000000..b89ce64 --- /dev/null +++ b/modules/nf-core/samtools/stats/environment.yml @@ -0,0 +1,7 @@ +name: samtools_stats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.18 diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf new file mode 100644 index 0000000..7539140 --- /dev/null +++ b/modules/nf-core/samtools/stats/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : + 'biocontainers/samtools:1.18--h50ea8bc_1' }" + + input: + tuple val(meta), path(input), path(input_index) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.stats"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + stats \\ + --threads ${task.cpus} \\ + ${reference} \\ + ${input} \\ + > ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml new file mode 100644 index 0000000..735ff81 --- /dev/null +++ b/modules/nf-core/samtools/stats/meta.yml @@ -0,0 +1,63 @@ +name: samtools_stats +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test b/modules/nf-core/samtools/stats/tests/main.nf.test new file mode 100644 index 0000000..20c3efe --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test @@ -0,0 +1,78 @@ +nextflow_process { + + name "Test Process SAMTOOLS_STATS" + script "../main.nf" + process "SAMTOOLS_STATS" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/stats" + + test("SAMTOOLS STATS Should run without failures") { + + when { + params { + + outdir = "$outputDir" + } + process { + """ + // define inputs of the process here. + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + + ] + input[1] = [[],[]] + """ + + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + + } + + test("SAMTOOLS CRAM Should run without failures") { + + when { + params { + + outdir = "$outputDir" + } + process { + """ + // define inputs of the process here + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true) + + ] + input[1] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + + + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + + } + + +} diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/modules/nf-core/samtools/stats/tests/main.nf.test.snap new file mode 100644 index 0000000..025c83a --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test.snap @@ -0,0 +1,64 @@ +{ + "SAMTOOLS STATS Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,045a48208b1c6f5b8af4347fe31f4def" + ] + ], + "1": [ + "versions.yml:md5,650a365c6635001436008350ae83337c" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,045a48208b1c6f5b8af4347fe31f4def" + ] + ], + "versions": [ + "versions.yml:md5,650a365c6635001436008350ae83337c" + ] + } + ], + "timestamp": "2023-12-04T11:07:28.26821485" + }, + "SAMTOOLS CRAM Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,dfbfa130d4a6925ddd1931dcd8354a43" + ] + ], + "1": [ + "versions.yml:md5,650a365c6635001436008350ae83337c" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,dfbfa130d4a6925ddd1931dcd8354a43" + ] + ], + "versions": [ + "versions.yml:md5,650a365c6635001436008350ae83337c" + ] + } + ], + "timestamp": "2023-12-04T11:07:50.356233402" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/stats/tests/tags.yml b/modules/nf-core/samtools/stats/tests/tags.yml new file mode 100644 index 0000000..7c28e30 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/stats: + - modules/nf-core/samtools/stats/** diff --git a/workflows/resequencing-mem.nf b/workflows/resequencing-mem.nf index 73addc9..4ce1f8d 100644 --- a/workflows/resequencing-mem.nf +++ b/workflows/resequencing-mem.nf @@ -44,6 +44,7 @@ include { BWA_MEM } from '../modules/nf-core/bwa/mem/m include { BAMADDRG } from '../modules/cnr-ibba/bamaddrg/main' include { PICARD_MARKDUPLICATES } from '../modules/nf-core/picard/markduplicates/main' include { SAMTOOLS_INDEX } from '../modules/nf-core/samtools/index/main' +include { SAMTOOLS_STATS } from '../modules/nf-core/samtools/stats/main' include { SAMTOOLS_FLAGSTAT } from '../modules/nf-core/samtools/flagstat/main' include { SAMTOOLS_COVERAGE } from '../modules/nf-core/samtools/coverage/main' include { FREEBAYES_PARALLEL } from '../subworkflows/cnr-ibba/freebayes_parallel/main' @@ -160,15 +161,19 @@ workflow RESEQUENCING_MEM { SAMTOOLS_INDEX(PICARD_MARKDUPLICATES.out.bam) ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions) - // now I can do the flagstat step. I need bam and bai files from markduplicates and + // now I can do the samtools steps. I need bam and bai files from markduplicates and // samtools index and the meta informations as three different input. From both channels, // I have an output like 'val(meta), path("*.bam")' and 'val(meta), path("*.bai")' // I can join two channels with the same key (https://www.nextflow.io/docs/latest/operator.html#join) // two options to check to have exactly the same keys with no duplications - flagstat_input = PICARD_MARKDUPLICATES.out.bam.join(SAMTOOLS_INDEX.out.bai, failOnMismatch: true, failOnDuplicate: true)//.view() + samtools_input = PICARD_MARKDUPLICATES.out.bam.join(SAMTOOLS_INDEX.out.bai, failOnMismatch: true, failOnDuplicate: true)//.view() + + // call samtools stats + SAMTOOLS_STATS(samtools_input, PREPARE_GENOME.out.genome_fasta) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) // time to call flagstat - SAMTOOLS_FLAGSTAT(flagstat_input) + SAMTOOLS_FLAGSTAT(samtools_input) ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) // prepare input for samtools coverage @@ -228,6 +233,7 @@ workflow RESEQUENCING_MEM { .concat(FASTQC.out.zip.map{it[1]}.ifEmpty([])) .concat(TRIMGALORE.out.log.map{it[1]}.ifEmpty([])) .concat(PICARD_MARKDUPLICATES.out.metrics.map{it[1]}.ifEmpty([])) + .concat(SAMTOOLS_STATS.out.stats.map{it[1]}.ifEmpty([])) .concat(SAMTOOLS_FLAGSTAT.out.flagstat.map{it[1]}.ifEmpty([])) .concat(BCFTOOLS_STATS.out.stats.map{it[1]}.ifEmpty([])) .collect() From edfa7363b38fef66915ed924a02ea11660dbfc99 Mon Sep 17 00:00:00 2001 From: Paolo Cozzi Date: Wed, 20 Dec 2023 15:36:37 +0100 Subject: [PATCH 4/4] :heavy_plus_sign: add samtools idxstats --- modules.json | 5 ++ .../nf-core/samtools/idxstats/environment.yml | 7 +++ modules/nf-core/samtools/idxstats/main.nf | 48 +++++++++++++++++ modules/nf-core/samtools/idxstats/meta.yml | 52 +++++++++++++++++++ .../samtools/idxstats/tests/main.nf.test | 36 +++++++++++++ .../samtools/idxstats/tests/main.nf.test.snap | 16 ++++++ .../nf-core/samtools/idxstats/tests/tags.yml | 2 + workflows/resequencing-mem.nf | 6 +++ 8 files changed, 172 insertions(+) create mode 100644 modules/nf-core/samtools/idxstats/environment.yml create mode 100644 modules/nf-core/samtools/idxstats/main.nf create mode 100644 modules/nf-core/samtools/idxstats/meta.yml create mode 100644 modules/nf-core/samtools/idxstats/tests/main.nf.test create mode 100644 modules/nf-core/samtools/idxstats/tests/main.nf.test.snap create mode 100644 modules/nf-core/samtools/idxstats/tests/tags.yml diff --git a/modules.json b/modules.json index a7c8107..d38b233 100644 --- a/modules.json +++ b/modules.json @@ -96,6 +96,11 @@ "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", "installed_by": ["modules"] }, + "samtools/idxstats": { + "branch": "master", + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["modules"] + }, "samtools/index": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", diff --git a/modules/nf-core/samtools/idxstats/environment.yml b/modules/nf-core/samtools/idxstats/environment.yml new file mode 100644 index 0000000..2401db0 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/environment.yml @@ -0,0 +1,7 @@ +name: samtools_idxstats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.18 diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf new file mode 100644 index 0000000..00d916b --- /dev/null +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_IDXSTATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : + 'biocontainers/samtools:1.18--h50ea8bc_1' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.idxstats"), emit: idxstats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + samtools \\ + idxstats \\ + --threads ${task.cpus-1} \\ + $bam \\ + > ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml new file mode 100644 index 0000000..344e92a --- /dev/null +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -0,0 +1,52 @@ +name: samtools_idxstats +description: Reports alignment summary statistics for a BAM/CRAM/SAM file +keywords: + - stats + - mapping + - counts + - chromosome + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test b/modules/nf-core/samtools/idxstats/tests/main.nf.test new file mode 100644 index 0000000..f6c9215 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process SAMTOOLS_IDXSTATS" + script "../main.nf" + process "SAMTOOLS_IDXSTATS" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/idxstats" + + test("BAM") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.idxstats).match() }, + { assert path(process.out.versions.get(0)).getText().contains("samtools") } + ) + } + } +} diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap new file mode 100644 index 0000000..4c6c12b --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap @@ -0,0 +1,16 @@ +{ + "BAM": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ] + ], + "timestamp": "2023-11-14T15:52:19.875194" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/idxstats/tests/tags.yml b/modules/nf-core/samtools/idxstats/tests/tags.yml new file mode 100644 index 0000000..d3057c6 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/idxstats: + - modules/nf-core/samtools/idxstats/** diff --git a/workflows/resequencing-mem.nf b/workflows/resequencing-mem.nf index 4ce1f8d..926c7e7 100644 --- a/workflows/resequencing-mem.nf +++ b/workflows/resequencing-mem.nf @@ -45,6 +45,7 @@ include { BAMADDRG } from '../modules/cnr-ibba/bamaddrg include { PICARD_MARKDUPLICATES } from '../modules/nf-core/picard/markduplicates/main' include { SAMTOOLS_INDEX } from '../modules/nf-core/samtools/index/main' include { SAMTOOLS_STATS } from '../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_IDXSTATS } from '../modules/nf-core/samtools/idxstats/main' include { SAMTOOLS_FLAGSTAT } from '../modules/nf-core/samtools/flagstat/main' include { SAMTOOLS_COVERAGE } from '../modules/nf-core/samtools/coverage/main' include { FREEBAYES_PARALLEL } from '../subworkflows/cnr-ibba/freebayes_parallel/main' @@ -172,6 +173,10 @@ workflow RESEQUENCING_MEM { SAMTOOLS_STATS(samtools_input, PREPARE_GENOME.out.genome_fasta) ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) + // call samtools idxstats + SAMTOOLS_IDXSTATS(samtools_input) + ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) + // time to call flagstat SAMTOOLS_FLAGSTAT(samtools_input) ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) @@ -234,6 +239,7 @@ workflow RESEQUENCING_MEM { .concat(TRIMGALORE.out.log.map{it[1]}.ifEmpty([])) .concat(PICARD_MARKDUPLICATES.out.metrics.map{it[1]}.ifEmpty([])) .concat(SAMTOOLS_STATS.out.stats.map{it[1]}.ifEmpty([])) + .concat(SAMTOOLS_IDXSTATS.out.idxstats.map{it[1]}.ifEmpty([])) .concat(SAMTOOLS_FLAGSTAT.out.flagstat.map{it[1]}.ifEmpty([])) .concat(BCFTOOLS_STATS.out.stats.map{it[1]}.ifEmpty([])) .collect()