From 75174362ed1408b393a439848a21595975d2324d Mon Sep 17 00:00:00 2001
From: Paolo Cozzi <bunop@libero.it>
Date: Wed, 5 Feb 2025 14:33:46 +0100
Subject: [PATCH 1/6] :truck: rename FREEBAYES_NORMALIZE subworkflow into
 NORMALIZE_VCF

---
 CHANGELOG.md                                           |  4 ++--
 .../local/{freebayes_normalize.nf => normalize_vcf.nf} |  2 +-
 workflows/resequencing-mem.nf                          | 10 +++++-----
 3 files changed, 8 insertions(+), 8 deletions(-)
 rename subworkflows/local/{freebayes_normalize.nf => normalize_vcf.nf} (98%)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4ee22fa..d70fd08 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,7 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Support for institutional configuration
 - Parallelize normalization steps by chromosomes. Merge VCF files after normalization
 - Normalize VCF file using `vcfwave` ([#76](https://github.com/cnr-ibba/nf-resequencing-mem/issues/76))
-- Add `freebayes_normalized` local subworkflow
+- Add `normalize_vcf` local subworkflow
 - Update `nextflow` to version `24.04.0`
 - Using the `resourceLimits` directive to set the max requirements for each process
 - Update CI system ([#81](https://github.com/cnr-ibba/nf-resequencing-mem/issues/81))
@@ -31,7 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add `bcftools/concat` process from _nf-core_ repository
 - Add `bcftools_filltags` process
 - Add `vcflib_vcfwave` local process
-- Add `freebayes_normalized` local subworkflow
+- Add `normalize_vcf` local subworkflow
 - Add `bcftools/sort` process
 
 ### `Fixed`
diff --git a/subworkflows/local/freebayes_normalize.nf b/subworkflows/local/normalize_vcf.nf
similarity index 98%
rename from subworkflows/local/freebayes_normalize.nf
rename to subworkflows/local/normalize_vcf.nf
index a04ad2e..ad23a56 100644
--- a/subworkflows/local/freebayes_normalize.nf
+++ b/subworkflows/local/normalize_vcf.nf
@@ -12,7 +12,7 @@ include { BCFTOOLS_NORM                     } from '../../modules/nf-core/bcftoo
 include { BCFTOOLS_FILLTAGS                 } from '../../modules/local/bcftools_filltags'
 
 
-workflow FREEBAYES_NORMALIZE {
+workflow NORMALIZE_VCF {
     take:
         vcf_ch    // channel: [mandatory] the VCF file to normalize
         tbi_ch    // channel: [mandatory] the index file for the VCF file
diff --git a/workflows/resequencing-mem.nf b/workflows/resequencing-mem.nf
index fa5afce..10a1314 100644
--- a/workflows/resequencing-mem.nf
+++ b/workflows/resequencing-mem.nf
@@ -29,7 +29,7 @@ include { TRIMGALORE                            } from '../modules/nf-core/trimg
 include { BWA_MEM                               } from '../modules/nf-core/bwa/mem/main'
 include { CRAM_FREEBAYES_PARALLEL               } from '../subworkflows/local/cram_freebayes_parallel/main'
 include { CRAM_MARKDUPLICATES_PICARD            } from '../subworkflows/local/cram_markduplicates_picard/main'
-include { FREEBAYES_NORMALIZE                   } from '../subworkflows/local/freebayes_normalize'
+include { NORMALIZE_VCF                         } from '../subworkflows/local/normalize_vcf'
 include {
     BCFTOOLS_CONCAT as NORMALIZED_CONCAT;
     BCFTOOLS_CONCAT as FREEBAYES_CONCAT;        } from '../modules/nf-core/bcftools/concat/main'
@@ -187,20 +187,20 @@ workflow RESEQUENCING_MEM {
             // .view()
     } else {
         // normalize VCF using freebayes and bcftools
-        FREEBAYES_NORMALIZE(
+        NORMALIZE_VCF(
             CRAM_FREEBAYES_PARALLEL.out.vcf,
             CRAM_FREEBAYES_PARALLEL.out.tbi,
             PREPARE_GENOME.out.genome_fasta
         )
-        ch_versions = ch_versions.mix(FREEBAYES_NORMALIZE.out.versions)
+        ch_versions = ch_versions.mix(NORMALIZE_VCF.out.versions)
 
         // concatenate all chromosome in one file.
-        bcftools_in_ch = FREEBAYES_NORMALIZE.out.vcf
+        bcftools_in_ch = NORMALIZE_VCF.out.vcf
             .map{ _meta, vcf -> [vcf] }
             .collect()
             .map{ it -> [[id: "all-samples-normalized"], it]}
             .join(
-            FREEBAYES_NORMALIZE.out.tbi
+            NORMALIZE_VCF.out.tbi
                 .map{ _meta, vcf -> [vcf] }
                 .collect()
                 .map{ it -> [[id: "all-samples-normalized"], it]}

From b5ca9b8b8f3fa2b57260cf570361e886f5109dc1 Mon Sep 17 00:00:00 2001
From: Paolo Cozzi <bunop@libero.it>
Date: Wed, 5 Feb 2025 16:23:23 +0100
Subject: [PATCH 2/6] :wrench: define schema for normalization workflow

---
 main.nf              | 10 ++++++++--
 nextflow.config      |  4 ++++
 nextflow_schema.json | 23 +++++++++++++++++++++++
 3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/main.nf b/main.nf
index 85c35a7..ef5622b 100644
--- a/main.nf
+++ b/main.nf
@@ -24,7 +24,8 @@ include { PIPELINE_INITIALIZATION         } from './subworkflows/local/pipeline_
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { RESEQUENCING_MEM } from './workflows/resequencing-mem'
+include { RESEQUENCING_MEM  } from './workflows/resequencing-mem'
+include { NORMALIZE_VCF     } from './subworkflows/local/normalize_vcf'
 
 //
 // WORKFLOW: Run main cnr-ibba/nf-resequencing-mem analysis pipeline
@@ -42,9 +43,14 @@ workflow CNR_IBBA {
     multiqc_report = RESEQUENCING_MEM.out.multiqc_report // channel: /path/to/multiqc_report.html
 }
 
+workflow VCF_NORMALIZE {
+    main:
+    println "Executing normalization workflow on vcf files"
+}
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    RUN ALL WORKFLOWS
+    RUN CNR_IBBA:RESEQUENCING_MEM WORKFLOWS
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
diff --git a/nextflow.config b/nextflow.config
index 47def41..f0e5e8b 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -46,6 +46,10 @@ params {
     save_freebayes             = false
     save_unique_fastq          = false
 
+    // Normalization workflow options
+    input_vcf                  = null
+    input_tbi                  = null
+
     // Boilerplate options
     outdir                     = './results'
     publish_dir_mode           = 'copy'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index eb7154c..a268a97 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -137,6 +137,26 @@
                 }
             }
         },
+        "normalization_workflow": {
+            "title": "Normalization workflow",
+            "type": "object",
+            "description": "Normalization workflow parameters",
+            "default": "",
+            "properties": {
+                "input_vcf": {
+                    "type": "string",
+                    "format": "file-path",
+                    "description": "Path to VCF file to normalize",
+                    "fa_icon": "fas fa-file-code"
+                },
+                "input_tbi": {
+                    "type": "string",
+                    "format": "file-path",
+                    "description": "Path to VCF index file",
+                    "fa_icon": "fas fa-file-code"
+                }
+            }
+        },
         "institutional_config_options": {
             "title": "Institutional config options",
             "type": "object",
@@ -313,6 +333,9 @@
         {
             "$ref": "#/definitions/pipeline_custom_parameters"
         },
+        {
+            "$ref": "#/definitions/normalization_workflow"
+        },
         {
             "$ref": "#/definitions/institutional_config_options"
         },

From 077b3c62a8fdc3fc9af283c0bc99aec1e5f1ca99 Mon Sep 17 00:00:00 2001
From: Paolo Cozzi <bunop@libero.it>
Date: Wed, 5 Feb 2025 17:15:41 +0100
Subject: [PATCH 3/6] :poop: add normalization workflow to be called with
 -entry option

Add normalization workflow to be called on not-normalized vcf files: despite the issue in publishing results, `-entry` nextflow option seems to be deprecated after version 24.10-edge
---
 conf/modules.config                           |  5 ++-
 main.nf                                       | 31 +++++++++++++--
 subworkflows/local/pipeline_initialization.nf | 38 +++++++++++++++++++
 3 files changed, 69 insertions(+), 5 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 5e3a1df..64189b6 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -216,7 +216,10 @@ process {
 
     withName: "BCFTOOLS_FILLTAGS|BCFTOOLS_FILLTAGS_TABIX" {
         publishDir = [
-            enabled: false
+            path: { "${params.outdir}/normalized-vcf" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            // enabled: params.only_normalization
         ]
     }
 
diff --git a/main.nf b/main.nf
index ef5622b..a02141e 100644
--- a/main.nf
+++ b/main.nf
@@ -16,7 +16,8 @@ nextflow.enable.dsl = 2
 */
 
 include { validateParameters; paramsHelp  } from 'plugin/nf-validation'
-include { PIPELINE_INITIALIZATION         } from './subworkflows/local/pipeline_initialization.nf'
+include { PIPELINE_INITIALIZATION         } from './subworkflows/local/pipeline_initialization'
+include { NORMALIZATION_INITIALIZATION    } from './subworkflows/local/pipeline_initialization'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -24,8 +25,9 @@ include { PIPELINE_INITIALIZATION         } from './subworkflows/local/pipeline_
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { RESEQUENCING_MEM  } from './workflows/resequencing-mem'
-include { NORMALIZE_VCF     } from './subworkflows/local/normalize_vcf'
+include { RESEQUENCING_MEM              } from './workflows/resequencing-mem'
+include { NORMALIZE_VCF                 } from './subworkflows/local/normalize_vcf'
+include { CUSTOM_DUMPSOFTWAREVERSIONS   } from './modules/nf-core/custom/dumpsoftwareversions/main'
 
 //
 // WORKFLOW: Run main cnr-ibba/nf-resequencing-mem analysis pipeline
@@ -45,7 +47,28 @@ workflow CNR_IBBA {
 
 workflow VCF_NORMALIZE {
     main:
-    println "Executing normalization workflow on vcf files"
+    // collect software version
+    ch_versions = Channel.empty()
+
+    // setting input channels
+    NORMALIZATION_INITIALIZATION(
+        params.input_vcf,
+        params.input_tbi,
+        params.genome_fasta
+    )
+
+    // calling the normalization workflow
+    NORMALIZE_VCF(
+        NORMALIZATION_INITIALIZATION.out.vcf_ch,
+        NORMALIZATION_INITIALIZATION.out.tbi_ch,
+        NORMALIZATION_INITIALIZATION.out.fasta_ch
+    )
+    ch_versions = ch_versions.mix(NORMALIZE_VCF.out.versions)
+
+    // return software version
+    CUSTOM_DUMPSOFTWAREVERSIONS (
+        ch_versions.unique().collectFile(name: 'collated_versions.yml')
+    )
 }
 
 /*
diff --git a/subworkflows/local/pipeline_initialization.nf b/subworkflows/local/pipeline_initialization.nf
index 54fb3ba..dfaaa0d 100644
--- a/subworkflows/local/pipeline_initialization.nf
+++ b/subworkflows/local/pipeline_initialization.nf
@@ -33,3 +33,41 @@ workflow PIPELINE_INITIALIZATION {
     emit:
     samplesheet = ch_input
 }
+
+workflow NORMALIZATION_INITIALIZATION {
+    take:
+    input_vcf           // file: input vcf file
+    input_tbi           // file: input tbi file
+    genome_fasta        // file: genome fasta file
+
+    main:
+    // Check input path parameters to see if they exist
+    def require_parameter = [
+        '--input_vcf': input_vcf,
+        '--input_tbi': input_tbi,
+        '--genome_fasta': genome_fasta
+    ]
+    require_parameter.each { key, value ->
+        if (! value) {
+            error "Required parameter '${key}' is missing"
+        }
+    }
+
+    // Set channels for required files
+    Channel.fromPath(input_vcf, checkIfExists: true)
+        .map{ it -> [[id:"all-samples-normalized"], it] }
+        .set { ch_input_vcf }
+
+    Channel.fromPath(input_tbi, checkIfExists: true)
+        .map{ it -> [[id:"all-samples-normalized"], it] }
+        .set { ch_input_tbi }
+
+    Channel.fromPath(genome_fasta, checkIfExists: true)
+        .map{ it -> [[id:it[0].baseName], it] }
+        .set { ch_genome_fasta }
+
+    emit:
+    vcf_ch = ch_input_vcf
+    tbi_ch = ch_input_tbi
+    fasta_ch = ch_genome_fasta
+}

From b45d533a56e818416b6e98e4d72f3dcb8daec55b Mon Sep 17 00:00:00 2001
From: Paolo Cozzi <bunop@libero.it>
Date: Fri, 7 Feb 2025 15:38:57 +0100
Subject: [PATCH 4/6] :sparkles: support normalization conditional workflow

perform only normalization steps using --normalization_only parameter
---
 CHANGELOG.md            |  1 +
 README.md               | 23 ++++++++++++++-
 conf/modules.config     |  2 +-
 lib/WorkflowMain.groovy | 29 +++++++++++++------
 main.nf                 | 62 ++++++++++++++++++++++++++---------------
 nextflow.config         |  1 +
 nextflow_schema.json    |  5 ++++
 7 files changed, 90 insertions(+), 33 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d70fd08..c94c82d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## 0.6.2 - dev
 
+- Perform only the normalization workflow using `--normalization_only` parameter ([#92](https://github.com/cnr-ibba/nf-resequencing-mem/issues/92))
 - Update freebayes to version `1.3.8`
 - Solve linter issues related to VScode and _nextflow languageserver_ plugin ([#86](https://github.com/cnr-ibba/nf-resequencing-mem/issues/86))
 - Update `.editorconfig`
diff --git a/README.md b/README.md
index 08c3ed5..11bc9c1 100644
--- a/README.md
+++ b/README.md
@@ -120,7 +120,11 @@ used to save _intermediate results_ or to skip a particular step:
 - `--gvcf_dont_use_chunk`: (bool, def. false) When writing the gVCF output emit a
   record for all bases, will also route an int to `--gvcf_chunk` similar to
   `--output-mode EMIT_ALL_SITES` from _GATK_
-  `--skip_normalization`: (bool, def. false) skip VCF normalization steps
+- `--skip_normalization`: (bool, def. false) skip VCF normalization steps
+- `--normalization_only`: (bool, def. false) only normalize a VCF file (skip all the
+  other steps, see [Normalize a vcf file](#normalize-a-vcf-file))
+- `--input_vcf`: path to a VCF file to be normalized (required when `--normalization_only` is set)
+- `--input_tbi`: path to a VCF index file (required when `--normalization_only` is set)
 - `--snpeff_database`: annotate the VCF file with SnpEff by providing a pre-built
   database that can be found using the `java -jar snpEff.jar databases` command.
   If the database is known to SnpEff will be downloaded and managed by the pipeline
@@ -365,6 +369,23 @@ Please see the [Amazon Cloud](https://www.nextflow.io/docs/latest/awscloud.html#
 section of nextflow documentation to get other information on nextflow and AWS
 usage.
 
+## Normalize a vcf file
+
+With this pipeline is it possible to perform the normalization workflow on a VCF
+file, without running the whole pipeline. This is useful when you have a VCF file
+that needs to be normalized, for example after a _freebayes_ run. You can call
+this pipeline providing the `--normalization_only` parameter and the `--input_vcf`
+and `--input_tbi` parameters:
+
+```bash
+nextflow run cnr-ibba/nf-resequencing-mem -resume -profile <your profile> \
+  --normalization_only --input_vcf <input.vcf> --input_tbi <input.tbi> \
+  --genome_fasta <genome.fasta> --outdir <results dir>
+```
+
+Other provided parameters will be ignored, and the pipeline will normalize the
+VCF file and will store the normalized VCF file in the `outdir` directory.
+
 ## Known issues
 
 ### Ignore sample sheet check
diff --git a/conf/modules.config b/conf/modules.config
index 64189b6..b3652df 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -219,7 +219,7 @@ process {
             path: { "${params.outdir}/normalized-vcf" },
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
-            // enabled: params.only_normalization
+            enabled: params.normalization_only
         ]
     }
 
diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
index c85e495..08eb43c 100755
--- a/lib/WorkflowMain.groovy
+++ b/lib/WorkflowMain.groovy
@@ -44,16 +44,29 @@ class WorkflowMain {
         // Check AWS batch settings
         NfcoreTemplate.awsBatch(workflow, params)
 
-        // Check input has been provided
-        if (!params.input) {
-            Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'")
+        // Check at least one input has been provided
+        if (!params.normalization_only) {
+            // check for mandatory input
+            if (!params.input) {
+                Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'")
+            }
+
+            // check for gvcf_chunk options and gvcf
+            if ((params.gvcf_chunk || params.gvcf_dont_use_chunk) && !params.gvcf) {
+                Nextflow.error("Please provide '--gvcf' option when providing '--gvcf_chunk' or '--gvcf_dont_use_chunk' parameters")
+            } else if (params.gvcf_chunk && params.gvcf_dont_use_chunk) {
+                Nextflow.error("Please provide only one of '--gvcf_chunk' or '--gvcf_dont_use_chunk' parameters")
+            }
         }
 
-        // check for gvcf_chunk options and gvcf
-        if ((params.gvcf_chunk || params.gvcf_dont_use_chunk) && !params.gvcf) {
-            Nextflow.error("Please provide '--gvcf' option when providing '--gvcf_chunk' or '--gvcf_dont_use_chunk' parameters")
-        } else if (params.gvcf_chunk && params.gvcf_dont_use_chunk) {
-            Nextflow.error("Please provide only one of '--gvcf_chunk' or '--gvcf_dont_use_chunk' parameters")
+        // doing the normalization workflow
+        if (params.normalization_only) {
+            if (!params.input_vcf || !params.input_tbi) {
+                Nextflow.error("Please provide a VCF file and its index to the pipeline e.g. '--input_vcf input.vcf --input_tbi input.vcf.tbi' when using '--normalization_only'")
+            }
+            if (params.input) {
+                log.warn("You choose to run the normalization workflow. The input samplesheet will be ignored.")
+            }
         }
     }
 
diff --git a/main.nf b/main.nf
index a02141e..9c4cc47 100644
--- a/main.nf
+++ b/main.nf
@@ -46,22 +46,20 @@ workflow CNR_IBBA {
 }
 
 workflow VCF_NORMALIZE {
+    take:
+    vcf_ch // channel: vcf file
+    tbi_ch // channel: tbi file
+    fasta_ch // channel: fasta file
+
     main:
     // collect software version
     ch_versions = Channel.empty()
 
-    // setting input channels
-    NORMALIZATION_INITIALIZATION(
-        params.input_vcf,
-        params.input_tbi,
-        params.genome_fasta
-    )
-
     // calling the normalization workflow
     NORMALIZE_VCF(
-        NORMALIZATION_INITIALIZATION.out.vcf_ch,
-        NORMALIZATION_INITIALIZATION.out.tbi_ch,
-        NORMALIZATION_INITIALIZATION.out.fasta_ch
+        vcf_ch,
+        tbi_ch,
+        fasta_ch
     )
     ch_versions = ch_versions.mix(NORMALIZE_VCF.out.versions)
 
@@ -98,21 +96,39 @@ workflow {
         validateParameters()
     }
 
+    // Initialise the workflow and check specific parameters
     WorkflowMain.initialise(workflow, params, log)
 
-    //
-    // SUBWORKFLOW: Run initializations tasks
-    //
-    PIPELINE_INITIALIZATION (
-        params.input,
-        params.multiqc_config,
-        params.genome_fasta,
-        params.genome_bwa_index
-    )
-
-    CNR_IBBA (
-        PIPELINE_INITIALIZATION.out.samplesheet
-    )
+    if (!params.normalization_only) {
+        // doing the main analysis
+        // Run initializations tasks
+        PIPELINE_INITIALIZATION (
+            params.input,
+            params.multiqc_config,
+            params.genome_fasta,
+            params.genome_bwa_index
+        )
+
+        // then run the main pipeline
+        CNR_IBBA (
+            PIPELINE_INITIALIZATION.out.samplesheet
+        )
+    } else {
+        // doing only the normalization workflow
+        // setting up
+        NORMALIZATION_INITIALIZATION(
+            params.input_vcf,
+            params.input_tbi,
+            params.genome_fasta
+        )
+
+        // run only the normalization workflow
+        VCF_NORMALIZE (
+            NORMALIZATION_INITIALIZATION.out.vcf_ch,
+            NORMALIZATION_INITIALIZATION.out.tbi_ch,
+            NORMALIZATION_INITIALIZATION.out.fasta_ch
+        )
+    }
 }
 
 /*
diff --git a/nextflow.config b/nextflow.config
index f0e5e8b..a4660e3 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -47,6 +47,7 @@ params {
     save_unique_fastq          = false
 
     // Normalization workflow options
+    normalization_only         = false
     input_vcf                  = null
     input_tbi                  = null
 
diff --git a/nextflow_schema.json b/nextflow_schema.json
index a268a97..642d60f 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -143,6 +143,11 @@
             "description": "Normalization workflow parameters",
             "default": "",
             "properties": {
+                "normalization_only": {
+                    "type": "boolean",
+                    "description": "Do only the VCF normalization workflow",
+                    "default": false
+                },
                 "input_vcf": {
                     "type": "string",
                     "format": "file-path",

From 0671c7f693fba8a8a131119972833f3c3e71e303 Mon Sep 17 00:00:00 2001
From: Paolo Cozzi <bunop@libero.it>
Date: Fri, 7 Feb 2025 17:11:56 +0100
Subject: [PATCH 5/6] :fire: enforce nextflow schema and remove checks from
 code

check for file existence through nextflow schema and validation plugin
---
 CHANGELOG.md                                  |  1 +
 main.nf                                       |  7 ++----
 nextflow_schema.json                          | 15 +++++++++--
 subworkflows/local/pipeline_initialization.nf | 25 +++----------------
 4 files changed, 19 insertions(+), 29 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c94c82d..0165210 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## 0.6.2 - dev
 
+- Update `nextflow_schema.json` to check for _file and directory_ existence
 - Perform only the normalization workflow using `--normalization_only` parameter ([#92](https://github.com/cnr-ibba/nf-resequencing-mem/issues/92))
 - Update freebayes to version `1.3.8`
 - Solve linter issues related to VScode and _nextflow languageserver_ plugin ([#86](https://github.com/cnr-ibba/nf-resequencing-mem/issues/86))
diff --git a/main.nf b/main.nf
index 9c4cc47..ac7ec9f 100644
--- a/main.nf
+++ b/main.nf
@@ -96,17 +96,14 @@ workflow {
         validateParameters()
     }
 
-    // Initialise the workflow and check specific parameters
+    // Initialize the workflow and check specific parameters
     WorkflowMain.initialise(workflow, params, log)
 
     if (!params.normalization_only) {
         // doing the main analysis
         // Run initializations tasks
         PIPELINE_INITIALIZATION (
-            params.input,
-            params.multiqc_config,
-            params.genome_fasta,
-            params.genome_bwa_index
+            params.input
         )
 
         // then run the main pipeline
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 642d60f..188f7eb 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -10,7 +10,7 @@
             "type": "object",
             "fa_icon": "fas fa-terminal",
             "description": "Define where the pipeline should find input data and save output data.",
-            "required": ["input", "genome_fasta"],
+            "required": ["genome_fasta"],
             "properties": {
                 "input": {
                     "type": "string",
@@ -25,6 +25,7 @@
                 "genome_fasta": {
                     "type": "string",
                     "format": "file-path",
+                    "exists": true,
                     "mimetype": "text/plain",
                     "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
                     "description": "Path to FASTA genome file (compression is supported).",
@@ -33,10 +34,14 @@
                 },
                 "genome_fasta_fai": {
                     "type": "string",
+                    "format": "file-path",
+                    "exists": true,
                     "description": "Path to genome fasta index (skip index calculation)"
                 },
                 "genome_bwa_index": {
                     "type": "string",
+                    "format": "directory-path",
+                    "exists": true,
                     "description": "Path to genome fasta BWA index (skip index calculation)"
                 },
                 "outdir": {
@@ -77,12 +82,16 @@
                 "snpeff_cachedir": {
                     "type": "string",
                     "description": "SnpEff custom cache directory",
+                    "format": "directory-path",
+                    "exists": true,
                     "fa_icon": "fas fa-database",
                     "help_text": "SnpEff cache directory which should include another directory with the name of the database in which a valid SnpEff custom database is stored"
                 },
                 "snpeff_config": {
                     "type": "string",
                     "description": "SnpEff custom configuration file",
+                    "format": "file-path",
+                    "exists": true,
                     "fa_icon": "fas fa-database",
                     "help_text": "SnpEff configuration file which should include the custom database name",
                     "default": "assets/NO_FILE"
@@ -133,7 +142,7 @@
                 },
                 "save_unique_fastq": {
                     "type": "boolean",
-                    "description": "Save purget FASTQ in ${results_dir}"
+                    "description": "Save purged FASTQ in ${results_dir}"
                 }
             }
         },
@@ -151,12 +160,14 @@
                 "input_vcf": {
                     "type": "string",
                     "format": "file-path",
+                    "exists": true,
                     "description": "Path to VCF file to normalize",
                     "fa_icon": "fas fa-file-code"
                 },
                 "input_tbi": {
                     "type": "string",
                     "format": "file-path",
+                    "exists": true,
                     "description": "Path to VCF index file",
                     "fa_icon": "fas fa-file-code"
                 }
diff --git a/subworkflows/local/pipeline_initialization.nf b/subworkflows/local/pipeline_initialization.nf
index dfaaa0d..3d03cab 100644
--- a/subworkflows/local/pipeline_initialization.nf
+++ b/subworkflows/local/pipeline_initialization.nf
@@ -6,9 +6,6 @@
 workflow PIPELINE_INITIALIZATION {
     take:
     input               // string: path to samplesheet
-    multiqc_config      // file: multiqc config file
-    genome_fasta        // file: genome fasta file
-    genome_bwa_index    // file: genome bwa index file
 
     main:
 
@@ -18,18 +15,12 @@ workflow PIPELINE_INITIALIZATION {
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     */
 
-    // Check input path parameters to see if they exist
-    def optionalFiles = [multiqc_config, genome_fasta, genome_bwa_index]
-    optionalFiles.each { f ->
-        if (f) {
-            Channel.fromPath(f, checkIfExists: true)
-        }
-    }
-
     // this should be present
     Channel.fromPath(input, checkIfExists: true)
         .set { ch_input }
 
+    // other input arguments are evaluated through nf-validation plugin and lib/WorkflowMain.groovy
+
     emit:
     samplesheet = ch_input
 }
@@ -41,17 +32,7 @@ workflow NORMALIZATION_INITIALIZATION {
     genome_fasta        // file: genome fasta file
 
     main:
-    // Check input path parameters to see if they exist
-    def require_parameter = [
-        '--input_vcf': input_vcf,
-        '--input_tbi': input_tbi,
-        '--genome_fasta': genome_fasta
-    ]
-    require_parameter.each { key, value ->
-        if (! value) {
-            error "Required parameter '${key}' is missing"
-        }
-    }
+    // parameters are evaluated through nf-validation plugin and lib/WorkflowMain.groovy
 
     // Set channels for required files
     Channel.fromPath(input_vcf, checkIfExists: true)

From fed5b7c528985362c35130aad6e832d265c86995 Mon Sep 17 00:00:00 2001
From: Paolo Cozzi <bunop@libero.it>
Date: Fri, 7 Feb 2025 17:26:24 +0100
Subject: [PATCH 6/6] :memo: update changelog

---
 CHANGELOG.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0165210..505f1b6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Update `nextflow_schema.json` to check for _file and directory_ existence
 - Perform only the normalization workflow using `--normalization_only` parameter ([#92](https://github.com/cnr-ibba/nf-resequencing-mem/issues/92))
-- Update freebayes to version `1.3.8`
+- Update freebayes to version `1.3.8` ([#88](https://github.com/cnr-ibba/nf-resequencing-mem/issues/88))
 - Solve linter issues related to VScode and _nextflow languageserver_ plugin ([#86](https://github.com/cnr-ibba/nf-resequencing-mem/issues/86))
 - Update `.editorconfig`
 - Update modules
@@ -38,6 +38,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Fixed`
 
+- Enforce parameters validation through `nextflow_schema.json`
 - Combine _freebayes_ results if `--save-freebayes` parameter is set
 - Rename `bcftools/concat` steps in more informative way
 - Use remote files with `test` profile