diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index c430403..093ce79 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -42,7 +42,7 @@ module_order: name: "Read Alignment (STAR)" - picard: name: "GATK4 MarkDuplicates" - info: " metrics generated either by GATK4 MarkDuplicates or EstimateLibraryComplexity (with --use_gatk_spark)." + info: " metrics generated either by GATK4 MarkDuplicates." - samtools: name: "Samtools Flagstat" - mosdepth: diff --git a/conf/modules.config b/conf/modules.config index d157316..4b688b4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -325,10 +325,8 @@ process { } withName: ".*:GATK4_MAPPING:(BWAMEM.*_MEM|DRAGMAP_ALIGN)" { - // Markduplicates Spark NEEDS name-sorted reads or runtime goes through the roof // However if it's skipped, reads need to be coordinate-sorted - // Only name sort if Spark for Markduplicates + duplicate marking is not skipped - ext.args2 = { params.use_gatk_spark && params.use_gatk_spark.contains('markduplicates') && (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('markduplicates'))) ? '-n' : '' } + ext.args2 = { (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('markduplicates'))) ? '-n' : '' } ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "" } publishDir = [ mode: params.publish_dir_mode, @@ -650,21 +648,9 @@ process { // ] // } - if (params.use_gatk_spark && params.use_gatk_spark.split(',').contains('markduplicates')) { - withName: '.*:GATK_PREPROCESSING:MARKDUPLICATES_SPARK:SAMTOOLS_CRAMTOBAM'{ - ext.prefix = { "${meta.id}.md" } - ext.when = { params.save_output_as_bam} - publishDir = [ - enabled: params.save_output_as_bam, - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/markduplicates/${meta.patient}/${meta.id}/" }, - pattern: "*{md.bam,md.bam.bai}" - ] - } - } // PREPARE_RECALIBRATION - withName: 'BASERECALIBRATOR|BASERECALIBRATOR_SPARK' { + withName: 'BASERECALIBRATOR' { ext.args = { meta.status == 2 ? "--lenient" : "" } ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.simpleName}.recal" } publishDir = [ @@ -687,7 +673,7 @@ process { // RECALIBRATE - withName: 'APPLYBQSR|APPLYBQSR_SPARK' { + withName: 'APPLYBQSR' { ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.simpleName}.recal" } publishDir = [ enabled: !params.save_output_as_bam, @@ -700,7 +686,7 @@ process { if ((params.step == 'mapping' || params.step == 'markduplicates'|| params.step == 'prepare_recalibration'|| params.step == 'recalibrate') && (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator')))) { - withName: '.*:GATK_PREPROCESSING:(RECALIBRATE|RECALIBRATE_SPARK):MERGE_INDEX_CRAM:MERGE_CRAM' { + withName: '.*:GATK_PREPROCESSING:RECALIBRATE:MERGE_INDEX_CRAM:MERGE_CRAM' { ext.prefix = { "${meta.id}.recal" } ext.when = { meta.num_intervals > 1 } publishDir = [ @@ -711,7 +697,7 @@ process { ] } - withName: '.*:GATK_PREPROCESSING:(RECALIBRATE|RECALIBRATE_SPARK):MERGE_INDEX_CRAM:INDEX_CRAM' { + withName: '.*:GATK_PREPROCESSING:RECALIBRATE:MERGE_INDEX_CRAM:INDEX_CRAM' { publishDir = [ enabled: !params.save_output_as_bam, mode: params.publish_dir_mode, @@ -1326,7 +1312,7 @@ process { path: { "${params.outdir}/reports"}, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: !params.skip_multiqc + enabled: !(params.tools && (params.skip_tools.split(',').contains('multiqc'))) ] errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} } diff --git a/nextflow.config b/nextflow.config index b14d378..eda1e07 100644 --- a/nextflow.config +++ b/nextflow.config @@ -32,7 +32,6 @@ params { // Alignment aligner = 'bwa-mem' // Only STAR is currently supported. - use_gatk_spark = null // GATK Spark implementation of their tools in local mode not used by default star_twopass = true star_ignore_sjdbgtf = false // Ignore GTF file while creating index or alignment by STAR star_max_memory_bamsort = 0 // STAR parameter limitBAMsortRAM to specify maximum RAM for sorting BAM @@ -48,12 +47,6 @@ params { dragmap = null hisat2_build_memory = null - // Skip steps - skip_baserecalibration = false - skip_intervallisttools = false - skip_variantfiltration = false - skip_variantannotation = false - skip_multiqc = false // Preprocessing of alignment remove_duplicates = false @@ -95,16 +88,9 @@ params { ignore_soft_clipped_bases = true // Variant annotation tools = null // No default Variant_Calling or Annotation tools - annotate_tools = null // List of annotation tools to run - snpeff or vep or merge + annotate_tools = null // List of annotation tools to run - only vep available annotation_cache = false // Annotation cache disabled - cadd_cache = null // CADD cache disabled - cadd_indels = null // No CADD InDels file - cadd_indels_tbi = null // No CADD InDels index - cadd_wg_snvs = null // No CADD SNVs file - cadd_wg_snvs_tbi = null // No CADD SNVs index genesplicer = null // genesplicer disabled within VEP - snpeff_cache = null // No directory for snpEff cache - snpeff_db = null // No default db for snpeff vep_cache = null // No directory for VEP cache vep_genome = null // No default genome for VEP // vep_cache_version = '106' // No default cache version for VEP diff --git a/nextflow_schema.json b/nextflow_schema.json index 968ae1b..19e2582 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -333,8 +333,8 @@ "type": "string", "fa_icon": "fas fa-toolbox", "description": "Tools to use for variant calling and/or for annotation.", - "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, HaplotypeCaller, mpileup, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", - "pattern": "^((freebayes|manta|merge|sage|mutect2|snpeff|strelka|vep|consensus|filtering|normalise|normalize|rna_filtering|vcf_qc|vcf2maf|preprocessing|second_run)*,?)*$" + "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2, SAGE\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- VEP (only).\n\n> **NB** As RNADNAVAR will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", + "pattern": "^((freebayes|manta|merge|sage|mutect2|strelka|vep|consensus|filtering|normalise|normalize|rna_filtering|vcf_qc|vcf2maf|preprocessing|second_run)*,?)*$" }, "skip_tools": { "type": "string", @@ -504,79 +504,93 @@ "annotate_tools": { "type": "string", "fa_icon": "fas fa-hammer", - "description": "Specify which tools RNADNAvar should use for annotating variants. Values can be 'snpeff', 'vep' or 'merge'. If you specify 'merge', the pipeline runs both snpeff and VEP annotation.", + "description": "Specify which tools RNADNAvar should use for annotating variants. Only VEP implemented.", "help_text": "List of tools to be used for variant annotation.", - "pattern": "^((snpeff|vep|merge)*(,)*)*$", + "pattern": "^((vep)*(,)*)*$", "hidden": true }, "annotation_cache": { "type": "boolean", "fa_icon": "fas fa-database", "description": "Enable the use of cache for annotation", - "help_text": "And disable usage of snpeff and vep specific containers for annotation\n\nTo be used with `--snpeff_cache` and/or `--vep_cache`", + "help_text": "And disable usage of vep specific containers for annotation\n\nTo be used with `--vep_cache`", "hidden": true }, - "cadd_cache": { + "genesplicer": { "type": "boolean", - "fa_icon": "fas fa-database", - "description": "Enable CADD cache.", + "fa_icon": "fas fa-gavel", + "description": "Enable the use of the VEP GeneSplicer plugin.", "hidden": true }, - "cadd_indels": { - "type": "string", - "fa_icon": "fas fa-file", - "description": "Path to CADD InDels file.", - "hidden": true + "vep_loftee": { + "type": "boolean", + "fa_icon": "fas fa-database", + "description": "Enable the use of the VEP LOFTEE plugin.", + "hidden": true, + "help_text": "For details, see [here](https://github.com/konradjk/loftee)." }, - "cadd_indels_tbi": { + "vep_cache": { "type": "string", - "fa_icon": "fas fa-file", - "description": "Path to CADD InDels index.", + "fa_icon": "fas fa-database", + "description": "Path to VEP cache", + "help_text": "To be used with `--annotation_cache`", "hidden": true }, - "cadd_wg_snvs": { + "vep_dbnsfp": { + "type": "boolean", + "fa_icon": "fas fa-database", + "description": "Enable the use of the VEP dbNSFP plugin.", + "hidden": true, + "help_text": "For details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#dbnsfp)." + }, + "vep_spliceai": { + "type": "boolean", + "fa_icon": "fas fa-database", + "description": "Enable the use of the VEP SpliceAI plugin.", + "hidden": true, + "help_text": "For details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#spliceai)." + }, + "spliceai_snv": { "type": "string", "fa_icon": "fas fa-file", - "description": "Path to CADD SNVs file.", + "description": "Path to spliceai raw scores snv file.", + "help_text": "To be used with `--vep_spliceai`.", "hidden": true }, - "cadd_wg_snvs_tbi": { + "spliceai_snv_tbi": { "type": "string", "fa_icon": "fas fa-file", - "description": "Path to CADD SNVs index.", - "hidden": true - }, - "genesplicer": { - "type": "boolean", - "fa_icon": "fas fa-gavel", - "description": "Enable the use of the VEP GeneSplicer plugin.", + "description": "Path to spliceai raw scores snv tabix indexed file.", + "help_text": "To be used with `--vep_spliceai`.", "hidden": true }, - "snpeff_cache": { + "spliceai_indel": { "type": "string", - "fa_icon": "fas fa-database", - "description": "Path to snpEff cache", - "help_text": "To be used with `--annotation_cache`", + "fa_icon": "fas fa-file", + "description": "Path to spliceai raw scores indel file.", + "help_text": "To be used with `--vep_spliceai`.", "hidden": true }, - "vep_cache": { + "spliceai_indel_tbi": { "type": "string", - "fa_icon": "fas fa-database", - "description": "Path to VEP cache", - "help_text": "To be used with `--annotation_cache`", + "fa_icon": "fas fa-file", + "description": "Path to spliceai raw scores indel tabix indexed file.", + "help_text": "To be used with `--vep_spliceai`.", "hidden": true }, - "vep_dbnsfp": { + "vep_spliceregion": { "type": "boolean", "fa_icon": "fas fa-database", - "description": "Enable the use of the VEP dbNSFP plugin.", + "description": "Enable the use of the VEP SpliceRegion plugin.", "hidden": true, - "help_text": "For details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#dbnsfp)." + "help_text": "For details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#spliceregion) and [here](https://www.ensembl.info/2018/10/26/cool-stuff-the-vep-can-do-splice-site-variant-annotation/)." }, - "snpeff_db": { + "vep_custom_args": { "type": "string", - "fa_icon": "fas fa-database", - "description": "snpEff DB version" + "fa_icon": "fas fa-toolbox", + "description": "Add an extra custom argument to VEP.", + "hidden": true, + "help_text": "Using this params you can add custom args to VEP." }, "vep_genome": { "type": "string", diff --git a/subworkflows/nf-core/annotation/snpeff/main.nf b/subworkflows/nf-core/annotation/snpeff/main.nf deleted file mode 100644 index 54bfb9c..0000000 --- a/subworkflows/nf-core/annotation/snpeff/main.nf +++ /dev/null @@ -1,28 +0,0 @@ -// -// Run SNPEFF to annotate VCF files -// - -include { SNPEFF } from '../../../../modules/nf-core/modules/snpeff/main' -include { TABIX_BGZIPTABIX } from '../../../../modules/nf-core/modules/tabix/bgziptabix/main' - -workflow ANNOTATION_SNPEFF { - take: - vcf // channel: [ val(meta), vcf ] - snpeff_db // value: db version to use - snpeff_cache // path: /path/to/snpeff/cache (optionnal) - - main: - ch_versions = Channel.empty() - - SNPEFF(vcf, snpeff_db, snpeff_cache) - TABIX_BGZIPTABIX(SNPEFF.out.vcf) - - // Gather versions of all tools used - ch_versions = ch_versions.mix(SNPEFF.out.versions.first()) - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions.first()) - - emit: - vcf_tbi = TABIX_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] - reports = SNPEFF.out.report // path: *.html - versions = ch_versions // path: versions.yml -} diff --git a/subworkflows/nf-core/annotation/snpeff/meta.yml b/subworkflows/nf-core/annotation/snpeff/meta.yml deleted file mode 100644 index e077362..0000000 --- a/subworkflows/nf-core/annotation/snpeff/meta.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: annotation_snpeff -description: | - Perform annotation with snpeff and bgzip + tabix index the resulting VCF file -keywords: - - snpeff -modules: - - snpeff - - tabix/bgziptabix -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - input: - type: vcf - description: list containing one vcf file - pattern: "[ *.{vcf,vcf.gz} ]" -output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - vcf_tbi: - type: file - description: Compressed vcf file + tabix index - pattern: "[ *{.vcf.gz,vcf.gz.tbi} ]" -authors: - - "@maxulysse" diff --git a/subworkflows/nf-core/snpeff_annotate.nf b/subworkflows/nf-core/snpeff_annotate.nf deleted file mode 100644 index 9a8b65b..0000000 --- a/subworkflows/nf-core/snpeff_annotate.nf +++ /dev/null @@ -1,34 +0,0 @@ -// -// Run snpEff to annotate VCF files -// - -include { SNPEFF } from '../../modules/nf-core/modules/snpeff/main' -include { TABIX_BGZIPTABIX } from '../../modules/nf-core/modules/tabix/bgziptabix/main' - -workflow SNPEFF_ANNOTATE { - take: - vcf // channel: [ val(meta), vcf, tbi ] - snpeff_db // value: version of db to use - snpeff_cache // path: path_to_snpeff_cache (optionnal) - - main: - - ch_versions = Channel.empty() - - SNPEFF ( - vcf, - snpeff_db, - snpeff_cache - ) - ch_versions = ch_versions.mix(SNPEFF.out.versions.first()) - - TABIX_BGZIPTABIX ( - SNPEFF.out.vcf - ) - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions.first()) - - emit: - vcf_tbi = TABIX_BGZIPTABIX.out.gz_tbi // channel: [ val(meta), vcf, tbi ] - reports = SNPEFF.out.report // path: *.html - versions = ch_versions // channel: [versions.yml] -} diff --git a/tests/test_annotation.yml b/tests/test_annotation.yml index b6fb529..14ad4f1 100644 --- a/tests/test_annotation.yml +++ b/tests/test_annotation.yml @@ -1,27 +1,8 @@ -- name: Run snpEff - command: nextflow run main.nf -profile test,docker --annotate_tools snpeff - tags: - - annotation - - snpeff - files: - - path: results/variant_annotation/GM12878/GM12878_snpEff.ann.vcf.gz - - path: results/variant_annotation/GM12878/GM12878_snpEff.ann.vcf.gz.tbi - - path: results/reports/multiqc_report.html - name: Run VEP - command: nextflow run main.nf -profile test,docker --annotate_tools vep --skip_multiqc + command: nextflow run main.nf -profile test,docker --tools vep --skip_tools 'multiqc' tags: - annotation - vep files: - path: results/variant_annotation/GM12878/GM12878_VEP.ann.vcf.gz - - path: results/variant_annotation/GM12878/GM12878_VEP.ann.vcf.gz.tbi -- name: Run snpEff followed by VEP - command: nextflow run main.nf -profile test,docker --annotate_tools merge --skip_multiqc - tags: - - annotation - - merge - - snpeff - - vep - files: - - path: results/variant_annotation/GM12878/GM12878_snpEff_VEP.ann.vcf.gz - - path: results/variant_annotation/GM12878/GM12878_snpEff_VEP.ann.vcf.gz.tbi + - path: results/variant_annotation/GM12878/GM12878_VEP.ann.vcf.gz.tbi \ No newline at end of file diff --git a/tests/test_skipbasecalib.yml b/tests/test_skipbasecalib.yml index 7b5b213..6c2ccc0 100644 --- a/tests/test_skipbasecalib.yml +++ b/tests/test_skipbasecalib.yml @@ -1,5 +1,5 @@ - name: Run pipeline without base calibration step - command: nextflow run main.nf -profile test,docker --skip_baserecalibration true + command: nextflow run main.nf -profile test,docker --skip_tools 'baserecalibrator' tags: - skipbasecalib - preprocessing diff --git a/workflows/rnadnavar.nf b/workflows/rnadnavar.nf index 8624491..955383c 100644 --- a/workflows/rnadnavar.nf +++ b/workflows/rnadnavar.nf @@ -31,7 +31,6 @@ def checkPathParamList = [ params.known_indels, params.known_indels_tbi, params.multiqc_config, - params.snpeff_cache, params.vep_cache, params.star_index, params.hisat2_index,