From 75174362ed1408b393a439848a21595975d2324d Mon Sep 17 00:00:00 2001 From: Paolo Cozzi Date: Wed, 5 Feb 2025 14:33:46 +0100 Subject: [PATCH 1/6] :truck: rename FREEBAYES_NORMALIZE subworkflow into NORMALIZE_VCF --- CHANGELOG.md | 4 ++-- .../local/{freebayes_normalize.nf => normalize_vcf.nf} | 2 +- workflows/resequencing-mem.nf | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) rename subworkflows/local/{freebayes_normalize.nf => normalize_vcf.nf} (98%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ee22fa..d70fd08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support for institutional configuration - Parallelize normalization steps by chromosomes. Merge VCF files after normalization - Normalize VCF file using `vcfwave` ([#76](https://github.com/cnr-ibba/nf-resequencing-mem/issues/76)) -- Add `freebayes_normalized` local subworkflow +- Add `normalize_vcf` local subworkflow - Update `nextflow` to version `24.04.0` - Using the `resourceLimits` directive to set the max requirements for each process - Update CI system ([#81](https://github.com/cnr-ibba/nf-resequencing-mem/issues/81)) @@ -31,7 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add `bcftools/concat` process from _nf-core_ repository - Add `bcftools_filltags` process - Add `vcflib_vcfwave` local process -- Add `freebayes_normalized` local subworkflow +- Add `normalize_vcf` local subworkflow - Add `bcftools/sort` process ### `Fixed` diff --git a/subworkflows/local/freebayes_normalize.nf b/subworkflows/local/normalize_vcf.nf similarity index 98% rename from subworkflows/local/freebayes_normalize.nf rename to subworkflows/local/normalize_vcf.nf index a04ad2e..ad23a56 100644 --- a/subworkflows/local/freebayes_normalize.nf +++ b/subworkflows/local/normalize_vcf.nf @@ -12,7 +12,7 @@ include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftoo include { BCFTOOLS_FILLTAGS } from '../../modules/local/bcftools_filltags' -workflow FREEBAYES_NORMALIZE { +workflow NORMALIZE_VCF { take: vcf_ch // channel: [mandatory] the VCF file to normalize tbi_ch // channel: [mandatory] the index file for the VCF file diff --git a/workflows/resequencing-mem.nf b/workflows/resequencing-mem.nf index fa5afce..10a1314 100644 --- a/workflows/resequencing-mem.nf +++ b/workflows/resequencing-mem.nf @@ -29,7 +29,7 @@ include { TRIMGALORE } from '../modules/nf-core/trimg include { BWA_MEM } from '../modules/nf-core/bwa/mem/main' include { CRAM_FREEBAYES_PARALLEL } from '../subworkflows/local/cram_freebayes_parallel/main' include { CRAM_MARKDUPLICATES_PICARD } from '../subworkflows/local/cram_markduplicates_picard/main' -include { FREEBAYES_NORMALIZE } from '../subworkflows/local/freebayes_normalize' +include { NORMALIZE_VCF } from '../subworkflows/local/normalize_vcf' include { BCFTOOLS_CONCAT as NORMALIZED_CONCAT; BCFTOOLS_CONCAT as FREEBAYES_CONCAT; } from '../modules/nf-core/bcftools/concat/main' @@ -187,20 +187,20 @@ workflow RESEQUENCING_MEM { // .view() } else { // normalize VCF using freebayes and bcftools - FREEBAYES_NORMALIZE( + NORMALIZE_VCF( CRAM_FREEBAYES_PARALLEL.out.vcf, CRAM_FREEBAYES_PARALLEL.out.tbi, PREPARE_GENOME.out.genome_fasta ) - ch_versions = ch_versions.mix(FREEBAYES_NORMALIZE.out.versions) + ch_versions = ch_versions.mix(NORMALIZE_VCF.out.versions) // concatenate all chromosome in one file. - bcftools_in_ch = FREEBAYES_NORMALIZE.out.vcf + bcftools_in_ch = NORMALIZE_VCF.out.vcf .map{ _meta, vcf -> [vcf] } .collect() .map{ it -> [[id: "all-samples-normalized"], it]} .join( - FREEBAYES_NORMALIZE.out.tbi + NORMALIZE_VCF.out.tbi .map{ _meta, vcf -> [vcf] } .collect() .map{ it -> [[id: "all-samples-normalized"], it]} From b5ca9b8b8f3fa2b57260cf570361e886f5109dc1 Mon Sep 17 00:00:00 2001 From: Paolo Cozzi Date: Wed, 5 Feb 2025 16:23:23 +0100 Subject: [PATCH 2/6] :wrench: define schema for normalization workflow --- main.nf | 10 ++++++++-- nextflow.config | 4 ++++ nextflow_schema.json | 23 +++++++++++++++++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 85c35a7..ef5622b 100644 --- a/main.nf +++ b/main.nf @@ -24,7 +24,8 @@ include { PIPELINE_INITIALIZATION } from './subworkflows/local/pipeline_ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { RESEQUENCING_MEM } from './workflows/resequencing-mem' +include { RESEQUENCING_MEM } from './workflows/resequencing-mem' +include { NORMALIZE_VCF } from './subworkflows/local/normalize_vcf' // // WORKFLOW: Run main cnr-ibba/nf-resequencing-mem analysis pipeline @@ -42,9 +43,14 @@ workflow CNR_IBBA { multiqc_report = RESEQUENCING_MEM.out.multiqc_report // channel: /path/to/multiqc_report.html } +workflow VCF_NORMALIZE { + main: + println "Executing normalization workflow on vcf files" +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS + RUN CNR_IBBA:RESEQUENCING_MEM WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/nextflow.config b/nextflow.config index 47def41..f0e5e8b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -46,6 +46,10 @@ params { save_freebayes = false save_unique_fastq = false + // Normalization workflow options + input_vcf = null + input_tbi = null + // Boilerplate options outdir = './results' publish_dir_mode = 'copy' diff --git a/nextflow_schema.json b/nextflow_schema.json index eb7154c..a268a97 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -137,6 +137,26 @@ } } }, + "normalization_workflow": { + "title": "Normalization workflow", + "type": "object", + "description": "Normalization workflow parameters", + "default": "", + "properties": { + "input_vcf": { + "type": "string", + "format": "file-path", + "description": "Path to VCF file to normalize", + "fa_icon": "fas fa-file-code" + }, + "input_tbi": { + "type": "string", + "format": "file-path", + "description": "Path to VCF index file", + "fa_icon": "fas fa-file-code" + } + } + }, "institutional_config_options": { "title": "Institutional config options", "type": "object", @@ -313,6 +333,9 @@ { "$ref": "#/definitions/pipeline_custom_parameters" }, + { + "$ref": "#/definitions/normalization_workflow" + }, { "$ref": "#/definitions/institutional_config_options" }, From 077b3c62a8fdc3fc9af283c0bc99aec1e5f1ca99 Mon Sep 17 00:00:00 2001 From: Paolo Cozzi Date: Wed, 5 Feb 2025 17:15:41 +0100 Subject: [PATCH 3/6] :poop: add normalization workflow to be called with -entry option Add normalization workflow to be called on not-normalized vcf files: despite the issue in publishing results, `-entry` nextflow option seems to be deprecated after version 24.10-edge --- conf/modules.config | 5 ++- main.nf | 31 +++++++++++++-- subworkflows/local/pipeline_initialization.nf | 38 +++++++++++++++++++ 3 files changed, 69 insertions(+), 5 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 5e3a1df..64189b6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -216,7 +216,10 @@ process { withName: "BCFTOOLS_FILLTAGS|BCFTOOLS_FILLTAGS_TABIX" { publishDir = [ - enabled: false + path: { "${params.outdir}/normalized-vcf" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + // enabled: params.only_normalization ] } diff --git a/main.nf b/main.nf index ef5622b..a02141e 100644 --- a/main.nf +++ b/main.nf @@ -16,7 +16,8 @@ nextflow.enable.dsl = 2 */ include { validateParameters; paramsHelp } from 'plugin/nf-validation' -include { PIPELINE_INITIALIZATION } from './subworkflows/local/pipeline_initialization.nf' +include { PIPELINE_INITIALIZATION } from './subworkflows/local/pipeline_initialization' +include { NORMALIZATION_INITIALIZATION } from './subworkflows/local/pipeline_initialization' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -24,8 +25,9 @@ include { PIPELINE_INITIALIZATION } from './subworkflows/local/pipeline_ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { RESEQUENCING_MEM } from './workflows/resequencing-mem' -include { NORMALIZE_VCF } from './subworkflows/local/normalize_vcf' +include { RESEQUENCING_MEM } from './workflows/resequencing-mem' +include { NORMALIZE_VCF } from './subworkflows/local/normalize_vcf' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from './modules/nf-core/custom/dumpsoftwareversions/main' // // WORKFLOW: Run main cnr-ibba/nf-resequencing-mem analysis pipeline @@ -45,7 +47,28 @@ workflow CNR_IBBA { workflow VCF_NORMALIZE { main: - println "Executing normalization workflow on vcf files" + // collect software version + ch_versions = Channel.empty() + + // setting input channels + NORMALIZATION_INITIALIZATION( + params.input_vcf, + params.input_tbi, + params.genome_fasta + ) + + // calling the normalization workflow + NORMALIZE_VCF( + NORMALIZATION_INITIALIZATION.out.vcf_ch, + NORMALIZATION_INITIALIZATION.out.tbi_ch, + NORMALIZATION_INITIALIZATION.out.fasta_ch + ) + ch_versions = ch_versions.mix(NORMALIZE_VCF.out.versions) + + // return software version + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) } /* diff --git a/subworkflows/local/pipeline_initialization.nf b/subworkflows/local/pipeline_initialization.nf index 54fb3ba..dfaaa0d 100644 --- a/subworkflows/local/pipeline_initialization.nf +++ b/subworkflows/local/pipeline_initialization.nf @@ -33,3 +33,41 @@ workflow PIPELINE_INITIALIZATION { emit: samplesheet = ch_input } + +workflow NORMALIZATION_INITIALIZATION { + take: + input_vcf // file: input vcf file + input_tbi // file: input tbi file + genome_fasta // file: genome fasta file + + main: + // Check input path parameters to see if they exist + def require_parameter = [ + '--input_vcf': input_vcf, + '--input_tbi': input_tbi, + '--genome_fasta': genome_fasta + ] + require_parameter.each { key, value -> + if (! value) { + error "Required parameter '${key}' is missing" + } + } + + // Set channels for required files + Channel.fromPath(input_vcf, checkIfExists: true) + .map{ it -> [[id:"all-samples-normalized"], it] } + .set { ch_input_vcf } + + Channel.fromPath(input_tbi, checkIfExists: true) + .map{ it -> [[id:"all-samples-normalized"], it] } + .set { ch_input_tbi } + + Channel.fromPath(genome_fasta, checkIfExists: true) + .map{ it -> [[id:it[0].baseName], it] } + .set { ch_genome_fasta } + + emit: + vcf_ch = ch_input_vcf + tbi_ch = ch_input_tbi + fasta_ch = ch_genome_fasta +} From b45d533a56e818416b6e98e4d72f3dcb8daec55b Mon Sep 17 00:00:00 2001 From: Paolo Cozzi Date: Fri, 7 Feb 2025 15:38:57 +0100 Subject: [PATCH 4/6] :sparkles: support normalization conditional workflow perform only normalization steps using --normalization_only parameter --- CHANGELOG.md | 1 + README.md | 23 ++++++++++++++- conf/modules.config | 2 +- lib/WorkflowMain.groovy | 29 +++++++++++++------ main.nf | 62 ++++++++++++++++++++++++++--------------- nextflow.config | 1 + nextflow_schema.json | 5 ++++ 7 files changed, 90 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d70fd08..c94c82d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## 0.6.2 - dev +- Perform only the normalization workflow using `--normalization_only` parameter ([#92](https://github.com/cnr-ibba/nf-resequencing-mem/issues/92)) - Update freebayes to version `1.3.8` - Solve linter issues related to VScode and _nextflow languageserver_ plugin ([#86](https://github.com/cnr-ibba/nf-resequencing-mem/issues/86)) - Update `.editorconfig` diff --git a/README.md b/README.md index 08c3ed5..11bc9c1 100644 --- a/README.md +++ b/README.md @@ -120,7 +120,11 @@ used to save _intermediate results_ or to skip a particular step: - `--gvcf_dont_use_chunk`: (bool, def. false) When writing the gVCF output emit a record for all bases, will also route an int to `--gvcf_chunk` similar to `--output-mode EMIT_ALL_SITES` from _GATK_ - `--skip_normalization`: (bool, def. false) skip VCF normalization steps +- `--skip_normalization`: (bool, def. false) skip VCF normalization steps +- `--normalization_only`: (bool, def. false) only normalize a VCF file (skip all the + other steps, see [Normalize a vcf file](#normalize-a-vcf-file)) +- `--input_vcf`: path to a VCF file to be normalized (required when `--normalization_only` is set) +- `--input_tbi`: path to a VCF index file (required when `--normalization_only` is set) - `--snpeff_database`: annotate the VCF file with SnpEff by providing a pre-built database that can be found using the `java -jar snpEff.jar databases` command. If the database is known to SnpEff will be downloaded and managed by the pipeline @@ -365,6 +369,23 @@ Please see the [Amazon Cloud](https://www.nextflow.io/docs/latest/awscloud.html# section of nextflow documentation to get other information on nextflow and AWS usage. +## Normalize a vcf file + +With this pipeline is it possible to perform the normalization workflow on a VCF +file, without running the whole pipeline. This is useful when you have a VCF file +that needs to be normalized, for example after a _freebayes_ run. You can call +this pipeline providing the `--normalization_only` parameter and the `--input_vcf` +and `--input_tbi` parameters: + +```bash +nextflow run cnr-ibba/nf-resequencing-mem -resume -profile \ + --normalization_only --input_vcf --input_tbi \ + --genome_fasta --outdir +``` + +Other provided parameters will be ignored, and the pipeline will normalize the +VCF file and will store the normalized VCF file in the `outdir` directory. + ## Known issues ### Ignore sample sheet check diff --git a/conf/modules.config b/conf/modules.config index 64189b6..b3652df 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -219,7 +219,7 @@ process { path: { "${params.outdir}/normalized-vcf" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - // enabled: params.only_normalization + enabled: params.normalization_only ] } diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index c85e495..08eb43c 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -44,16 +44,29 @@ class WorkflowMain { // Check AWS batch settings NfcoreTemplate.awsBatch(workflow, params) - // Check input has been provided - if (!params.input) { - Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") + // Check at least one input has been provided + if (!params.normalization_only) { + // check for mandatory input + if (!params.input) { + Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") + } + + // check for gvcf_chunk options and gvcf + if ((params.gvcf_chunk || params.gvcf_dont_use_chunk) && !params.gvcf) { + Nextflow.error("Please provide '--gvcf' option when providing '--gvcf_chunk' or '--gvcf_dont_use_chunk' parameters") + } else if (params.gvcf_chunk && params.gvcf_dont_use_chunk) { + Nextflow.error("Please provide only one of '--gvcf_chunk' or '--gvcf_dont_use_chunk' parameters") + } } - // check for gvcf_chunk options and gvcf - if ((params.gvcf_chunk || params.gvcf_dont_use_chunk) && !params.gvcf) { - Nextflow.error("Please provide '--gvcf' option when providing '--gvcf_chunk' or '--gvcf_dont_use_chunk' parameters") - } else if (params.gvcf_chunk && params.gvcf_dont_use_chunk) { - Nextflow.error("Please provide only one of '--gvcf_chunk' or '--gvcf_dont_use_chunk' parameters") + // doing the normalization workflow + if (params.normalization_only) { + if (!params.input_vcf || !params.input_tbi) { + Nextflow.error("Please provide a VCF file and its index to the pipeline e.g. '--input_vcf input.vcf --input_tbi input.vcf.tbi' when using '--normalization_only'") + } + if (params.input) { + log.warn("You choose to run the normalization workflow. The input samplesheet will be ignored.") + } } } diff --git a/main.nf b/main.nf index a02141e..9c4cc47 100644 --- a/main.nf +++ b/main.nf @@ -46,22 +46,20 @@ workflow CNR_IBBA { } workflow VCF_NORMALIZE { + take: + vcf_ch // channel: vcf file + tbi_ch // channel: tbi file + fasta_ch // channel: fasta file + main: // collect software version ch_versions = Channel.empty() - // setting input channels - NORMALIZATION_INITIALIZATION( - params.input_vcf, - params.input_tbi, - params.genome_fasta - ) - // calling the normalization workflow NORMALIZE_VCF( - NORMALIZATION_INITIALIZATION.out.vcf_ch, - NORMALIZATION_INITIALIZATION.out.tbi_ch, - NORMALIZATION_INITIALIZATION.out.fasta_ch + vcf_ch, + tbi_ch, + fasta_ch ) ch_versions = ch_versions.mix(NORMALIZE_VCF.out.versions) @@ -98,21 +96,39 @@ workflow { validateParameters() } + // Initialise the workflow and check specific parameters WorkflowMain.initialise(workflow, params, log) - // - // SUBWORKFLOW: Run initializations tasks - // - PIPELINE_INITIALIZATION ( - params.input, - params.multiqc_config, - params.genome_fasta, - params.genome_bwa_index - ) - - CNR_IBBA ( - PIPELINE_INITIALIZATION.out.samplesheet - ) + if (!params.normalization_only) { + // doing the main analysis + // Run initializations tasks + PIPELINE_INITIALIZATION ( + params.input, + params.multiqc_config, + params.genome_fasta, + params.genome_bwa_index + ) + + // then run the main pipeline + CNR_IBBA ( + PIPELINE_INITIALIZATION.out.samplesheet + ) + } else { + // doing only the normalization workflow + // setting up + NORMALIZATION_INITIALIZATION( + params.input_vcf, + params.input_tbi, + params.genome_fasta + ) + + // run only the normalization workflow + VCF_NORMALIZE ( + NORMALIZATION_INITIALIZATION.out.vcf_ch, + NORMALIZATION_INITIALIZATION.out.tbi_ch, + NORMALIZATION_INITIALIZATION.out.fasta_ch + ) + } } /* diff --git a/nextflow.config b/nextflow.config index f0e5e8b..a4660e3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -47,6 +47,7 @@ params { save_unique_fastq = false // Normalization workflow options + normalization_only = false input_vcf = null input_tbi = null diff --git a/nextflow_schema.json b/nextflow_schema.json index a268a97..642d60f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -143,6 +143,11 @@ "description": "Normalization workflow parameters", "default": "", "properties": { + "normalization_only": { + "type": "boolean", + "description": "Do only the VCF normalization workflow", + "default": false + }, "input_vcf": { "type": "string", "format": "file-path", From 0671c7f693fba8a8a131119972833f3c3e71e303 Mon Sep 17 00:00:00 2001 From: Paolo Cozzi Date: Fri, 7 Feb 2025 17:11:56 +0100 Subject: [PATCH 5/6] :fire: enforce nextflow schema and remove checks from code check for file existence through nextflow schema and validation plugin --- CHANGELOG.md | 1 + main.nf | 7 ++---- nextflow_schema.json | 15 +++++++++-- subworkflows/local/pipeline_initialization.nf | 25 +++---------------- 4 files changed, 19 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c94c82d..0165210 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## 0.6.2 - dev +- Update `nextflow_schema.json` to check for _file and directory_ existence - Perform only the normalization workflow using `--normalization_only` parameter ([#92](https://github.com/cnr-ibba/nf-resequencing-mem/issues/92)) - Update freebayes to version `1.3.8` - Solve linter issues related to VScode and _nextflow languageserver_ plugin ([#86](https://github.com/cnr-ibba/nf-resequencing-mem/issues/86)) diff --git a/main.nf b/main.nf index 9c4cc47..ac7ec9f 100644 --- a/main.nf +++ b/main.nf @@ -96,17 +96,14 @@ workflow { validateParameters() } - // Initialise the workflow and check specific parameters + // Initialize the workflow and check specific parameters WorkflowMain.initialise(workflow, params, log) if (!params.normalization_only) { // doing the main analysis // Run initializations tasks PIPELINE_INITIALIZATION ( - params.input, - params.multiqc_config, - params.genome_fasta, - params.genome_bwa_index + params.input ) // then run the main pipeline diff --git a/nextflow_schema.json b/nextflow_schema.json index 642d60f..188f7eb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "genome_fasta"], + "required": ["genome_fasta"], "properties": { "input": { "type": "string", @@ -25,6 +25,7 @@ "genome_fasta": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file (compression is supported).", @@ -33,10 +34,14 @@ }, "genome_fasta_fai": { "type": "string", + "format": "file-path", + "exists": true, "description": "Path to genome fasta index (skip index calculation)" }, "genome_bwa_index": { "type": "string", + "format": "directory-path", + "exists": true, "description": "Path to genome fasta BWA index (skip index calculation)" }, "outdir": { @@ -77,12 +82,16 @@ "snpeff_cachedir": { "type": "string", "description": "SnpEff custom cache directory", + "format": "directory-path", + "exists": true, "fa_icon": "fas fa-database", "help_text": "SnpEff cache directory which should include another directory with the name of the database in which a valid SnpEff custom database is stored" }, "snpeff_config": { "type": "string", "description": "SnpEff custom configuration file", + "format": "file-path", + "exists": true, "fa_icon": "fas fa-database", "help_text": "SnpEff configuration file which should include the custom database name", "default": "assets/NO_FILE" @@ -133,7 +142,7 @@ }, "save_unique_fastq": { "type": "boolean", - "description": "Save purget FASTQ in ${results_dir}" + "description": "Save purged FASTQ in ${results_dir}" } } }, @@ -151,12 +160,14 @@ "input_vcf": { "type": "string", "format": "file-path", + "exists": true, "description": "Path to VCF file to normalize", "fa_icon": "fas fa-file-code" }, "input_tbi": { "type": "string", "format": "file-path", + "exists": true, "description": "Path to VCF index file", "fa_icon": "fas fa-file-code" } diff --git a/subworkflows/local/pipeline_initialization.nf b/subworkflows/local/pipeline_initialization.nf index dfaaa0d..3d03cab 100644 --- a/subworkflows/local/pipeline_initialization.nf +++ b/subworkflows/local/pipeline_initialization.nf @@ -6,9 +6,6 @@ workflow PIPELINE_INITIALIZATION { take: input // string: path to samplesheet - multiqc_config // file: multiqc config file - genome_fasta // file: genome fasta file - genome_bwa_index // file: genome bwa index file main: @@ -18,18 +15,12 @@ workflow PIPELINE_INITIALIZATION { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - // Check input path parameters to see if they exist - def optionalFiles = [multiqc_config, genome_fasta, genome_bwa_index] - optionalFiles.each { f -> - if (f) { - Channel.fromPath(f, checkIfExists: true) - } - } - // this should be present Channel.fromPath(input, checkIfExists: true) .set { ch_input } + // other input arguments are evaluated through nf-validation plugin and lib/WorkflowMain.groovy + emit: samplesheet = ch_input } @@ -41,17 +32,7 @@ workflow NORMALIZATION_INITIALIZATION { genome_fasta // file: genome fasta file main: - // Check input path parameters to see if they exist - def require_parameter = [ - '--input_vcf': input_vcf, - '--input_tbi': input_tbi, - '--genome_fasta': genome_fasta - ] - require_parameter.each { key, value -> - if (! value) { - error "Required parameter '${key}' is missing" - } - } + // parameters are evaluated through nf-validation plugin and lib/WorkflowMain.groovy // Set channels for required files Channel.fromPath(input_vcf, checkIfExists: true) From fed5b7c528985362c35130aad6e832d265c86995 Mon Sep 17 00:00:00 2001 From: Paolo Cozzi Date: Fri, 7 Feb 2025 17:26:24 +0100 Subject: [PATCH 6/6] :memo: update changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0165210..505f1b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Update `nextflow_schema.json` to check for _file and directory_ existence - Perform only the normalization workflow using `--normalization_only` parameter ([#92](https://github.com/cnr-ibba/nf-resequencing-mem/issues/92)) -- Update freebayes to version `1.3.8` +- Update freebayes to version `1.3.8` ([#88](https://github.com/cnr-ibba/nf-resequencing-mem/issues/88)) - Solve linter issues related to VScode and _nextflow languageserver_ plugin ([#86](https://github.com/cnr-ibba/nf-resequencing-mem/issues/86)) - Update `.editorconfig` - Update modules @@ -38,6 +38,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- Enforce parameters validation through `nextflow_schema.json` - Combine _freebayes_ results if `--save-freebayes` parameter is set - Rename `bcftools/concat` steps in more informative way - Use remote files with `test` profile