nextflow.config

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    nf-core/bacpaq Nextflow config file
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Default config options for all compute environments
----------------------------------------------------------------------------------------
*/

// Global default params, used in configs
params {
    // TODO nf-core: Specify your pipeline's command line flags


    // Input options
    input                     = null // location for the input samplesheet file
    classifier                = "kraken2"
    fasta                     = "test.fa"
    schema_ignore_params      = "fasta"
    tracedir                  = "null"

    //rasusa subsampling options
    skip_subsampling           = true
    depth_cut_off             = '50' // minimum coverage to subsample the reads is 50

    // assembly_qc: genomesize is used by confindr for spec
    subsampling_genomesize    = 5000000 // change name - de-group from reads reference genome size. Valid metric suffixes include Base (b), Kilo (k), Mega (m), Giga (g), Tera (t)

    //confindr options
    skip_confindr              = false
    confindr_db               = "/mnt/cidgoh-object-storage/database/confindr/confindr_db" //database for confindr. The default database are only for detecting contamination in Escherichia, Listeria, and Salmonella. If you want to run ConFindr on any other genera, you need to build the database following the tutorial at https://olc-bioinformatics.github.io/ConFindr/install/.

    //QC options
    skip_seqqc                = false
    skip_trimming             = false
    trim_tool                 = "fastp"  // Select which trimming tool to use (fastp/trimmomatic/trimgalore)
    adapter_fasta             = "/mnt/cidgoh-object-storage/database/adaptors/test.fa" // location for adapator database (fasta file)
    save_trimmed_fail         = false //  Check if reads failed in the trimming process need to be saved (True/False, default: False)
    save_merged               = true  // Check if merged reads need to be saved (True/False, default: True)


    // MultiQC options
    max_multiqc_email_size     = '25.MB'
    multiqc_title              = "BACPAQ"
    multiqc_config             = null
    multiqc_logo               = null
    multiqc_methods_description = null


    // Nanopore QC options
    skip_nanocomp           = false
    skip_pycoqc             = false
    skip_porechop           = false


    // PycoQC options
    // Path to nanopore summary file to be used by PycoQC
    nanopore_summary_file      = "/mnt/cidgoh-object-storage/hackathon/seqqc/isolate_wgs/nanopore/run_summary/sequencing_summary_FAT24492_18678559.txt"
    // ID for nanopore summary file to be used by PycoQC
    nanopore_summary_file_id   = 'test'

    // Assembly options (Dragonflye/Shovill)
    // Estimated genome size, e.g. 3.2M (autodetect: blank)
    assembly_genomesize      = "''"
    // Minimum contig length (auto: 0)
    min_contig_len             = 500
    // Minimum contig coverage (auto: 0)
    min_contig_coverage        = 2

    // Dragonflye options
    // Minimum input read length (disable: 0)
    min_input_read_len         = 1000
    // Minimum average sequence quality (off: 0)
    min_input_quality          = 0
    // Number of polishing rounds to conduct with Racon
    racon_rounds               = 1
    // Number of polishing rounds to conduct with Medaka (requires a model to be specified)
    medaka_rounds              = 1
    // Path to model to be used for Medaka
    medaka_model               = "r941_min_fast_g507"

    // Taxonomy module options

    // Dehosting options
    skip_dehosting             = false
    skip_combinekreports       = false
    skip_kreport2krona         = false
    skip_bracken               = false
    skip_kraken2               = false
    skip_combinebrackenoutputs = false
    skip_quality_report        = false

    // kraken2 options
    // Path to Kraken2 database used for querying reads during taxonomic classification
    kraken2_db                 = "/mnt/cidgoh-object-storage/database/minikraken2/minikraken2_v2_8GB_201904_UPDATE"
    // Boolean for whether to output classified reads as a fastq file
    classified_reads           = true
    // Boolean for whether to output unclassified reads as a fastq file
    unclassified_reads         = true

    // centrifuge options
    centrifuge_db              = "/mnt/cidgoh-object-storage/database/centrifuge"

    save_unaligned             = true
    // Boolean for whether to save aligned reads to a fastq file
    save_aligned               = true
    // Boolean for whether to save the output as a SAM file
    sam_format                 = true

    // Aligner used for dehosting ('minimap2' or 'bwa')
    dehosting_aligner          = 'minimap2'

    // Path to (human) reference genome used for dehosting
    host_genome                = "/mnt/cidgoh-object-storage/database/reference_genomes/human/GRCh38.p14/GCF_000001405.40/GCF_000001405.40_GRCh38.p14_genomic.fna"

    // ID/name for reference genome
    host_genome_id             = 'GRCh38'

    // bwa options
    // Whether to sort ('sort') or view ('view') the BAM file obtained from BWA
    bwa_sort_bam               = 'sort'

    // minimap2 options
    // Boolean on whether to output the alignment as a BAM file or a PAF file
    bam_format                 = true
    // Boolean on whether to get Minimap2 to generate CIGAR strings at the cg tag of the PAF file
    cigar_paf_format           = false
    // Boolean on whether to get Minimap2 to move long CIGAR strings to the cg tag in the BAM file
    cigar_bam                  = false

    // samtools options
    // Boolean for whether to get Samtools fastq to generate an interleaved fastq or to write to separate files
    interleaved                = false

    // Boilerplate options
    outdir                     = null
    publish_dir_mode           = 'copy'
    email                      = null
    email_on_fail              = null
    plaintext_email            = false
    monochrome_logs            = false
    hook_url                   = null
    help                       = false
    version                    = false
    validate_params            = false
    show_hidden_params         = false
    schema_ignore_params       = 'genomes'

    // assembly QC options
    skip_checkm                = false
    skip_quast                 = false
    skip_busco                 = false

    // Shovill options
    sr_assembler               = 'spades' // WGS_ASSEMBLY: choice of genome assembler for short read data [skesa,spades,megahit,velvet]

    // CheckM options
    checkm_db                  = 'null' // ASSEMBLY_QC: path to local CheckM lineages directory

    // QUAST options
    reference_genome_fasta     = "null" // ASSEMBLY_QC: path to reference genome file
    reference_genome_gff       = "null" // ASSEMBLY_QC: path to reference genome annotation file (.GFF)
    combine_quast              = false // ASSEMBLY_QC: whether to aggregate individual QUAST results file

    // BUSCO options
    busco_lineage              = "bacteria" // ASSEMBLY_QC: the BUSCO lineage to use, or "auto" to automatically select lineage
    busco_lineages_path        = "/mnt/cidgoh-object-storage/database/busco" // ASSEMBLY_QC: path to local BUSCO lineages directory
    busco_config               = false // ASSEMBLY_QC: path to BUSCO config file

    // PEPPAN options
    reference_gff               = null

    // Config options
    config_profile_name        = null
    config_profile_description = null
    custom_config_version      = 'master'
    custom_config_base         = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
    config_profile_contact     = null
    config_profile_url         = null
    config_profile_name        = null

    // pipeline options
    skip_QC                    = false
    skip_taxonomy_qc           = false
    skip_assembly              = false
    skip_assembly_qc           = false
    skip_rmlst                 = false


    // Max resource options
    // Defaults only, expecting to be overwritten
    max_memory                 = '128.GB'
    max_cpus                   = 16
    max_time                   = '240.h'

    /* Annotation options */
    skip_annotation            = false
    skip_gene_annotation       = false
    skip_abricate_summary      = false
    amrfinderplus_db           = null
    skip_resfinder             = false
    skip_virsorter2            = false
    skip_crispr_analysis       = false


    // AMR options
    amr_finderplus_db           = null
    skip_amr_annotation         = false
    skip_rgi                    = false
    skip_abricate               = false
    abricate_db                 = "card" // AMR: database to use for abricate
    skip_abritamr               = false
    skip_amrfinderplus          = false
    resfinder_db                = "/scratch/group_share/tmp/database/resfinder_db"
    pointfinder_db              = "/scratch/group_share/tmp/database/pointfinder_db"
    disinfinder_db              = "/scratch/group_share/tmp/database/disinfinder_db"
    resfinder_species           = "ecoli"

    // gene annotation options
    skip_gene_annotation       = false
    skip_prokka                = false
    skip_bakta                 = false
    bakta_db                   = "/mnt/cidgoh-object-storage/database/bakta/db/"
    prodigal_training_file     = null
    annotation_protein_file    = null

    // Phage options
    skip_phage_annotation      = false
    virsorter_db               = null
    virsorter_virus_grp        = 'dsDNAphage,ssDNA'
    virsorter_min_len          = 5000
    virsorter_min_score        = 0.4

    // Pangenome options
    skip_pangenome_analysis    = false
    pangenome_input            = "bakta"
    skip_roary                 = false
    skip_pirate                = false
    skip_panaroo               = false
    clean_mode                 = "moderate"
    skip_peppan                = false

    // Plasmid subworkflow options
    skip_plasmid_analysis      = false
    skip_plasmidfinder         = false
    skip_mobsuite              = false

    // CRISPR subworkflow
    skip_cctyper = false

}

// Load base.config by default for all pipelines
includeConfig 'conf/base.config'

// Load nf-core custom profiles from different Institutions
try {
    includeConfig "${params.custom_config_base}/nfcore_custom.config"
} catch (Exception e) {
    System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config")
}

// Load nf-core/bacpaq custom profiles from different institutions.
// Warning: Uncomment only if a pipeline-specific institutional config already exists on nf-core/configs!
// try {
//   includeConfig "${params.custom_config_base}/pipeline/bacpaq.config"
// } catch (Exception e) {
//   System.err.println("WARNING: Could not load nf-core/config/bacpaq profiles: ${params.custom_config_base}/pipeline/bacpaq.config")
// }

//plugins {
//  id 'nf-validation@0.2.1'
//}

profiles {
    debug {
        dumpHashes             = true
        process.beforeScript   = 'echo $HOSTNAME'
        cleanup                = false
        nextflow.enable.configProcessNamesValidation = true
    }
    conda {
        conda.enabled          = false
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        channels               = ['conda-forge', 'bioconda', 'defaults']
        apptainer.enabled      = false
    }
    mamba {
        conda.enabled          = true
        conda.useMamba         = true
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        apptainer.enabled      = false
    }
    docker {
        docker.enabled         = true
        conda.enabled          = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        apptainer.enabled      = false
        docker.runOptions      = '-u $(id -u):$(id -g)'
    }
    arm {
        docker.runOptions      = '-u $(id -u):$(id -g) --platform=linux/amd64'
    }
    singularity {
        singularity.enabled    = true
        singularity.autoMounts = true
        conda.enabled          = false
        docker.enabled         = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        apptainer.enabled      = false
    }
    podman {
        podman.enabled         = true
        conda.enabled          = false
        docker.enabled         = false
        singularity.enabled    = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        apptainer.enabled      = false
    }
    shifter {
        shifter.enabled        = true
        conda.enabled          = false
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        charliecloud.enabled   = false
        apptainer.enabled      = false
    }
    charliecloud {
        charliecloud.enabled   = true
        conda.enabled          = false
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        apptainer.enabled      = false
    }
    apptainer {
        apptainer.enabled      = true
        apptainer.autoMounts   = true
        conda.enabled          = false
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
    }
    gitpod {
        executor.name          = 'local'
        executor.cpus          = 4
        executor.memory        = 8.GB
    }
    eagle     { includeConfig 'conf/eagle.config'     }
    test      { includeConfig 'conf/test.config'      }
    test_full { includeConfig 'conf/test_full.config' }
    eagle_2   { includeConfig 'conf/eagle_2.config'       }

}

// Export these variables to prevent local Python/R libraries from conflicting with those in the container
// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container.
// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.

env {
    PYTHONNOUSERSITE = 1
    R_PROFILE_USER   = "/.Rprofile"
    R_ENVIRON_USER   = "/.Renviron"
    JULIA_DEPOT_PATH = "/usr/local/share/julia"
}

// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']

// Disable process selector warnings by default. Use debug profile to enable warnings.
nextflow.enable.configProcessNamesValidation = false

def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
timeline {
    enabled = true
    file    = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html"
}
report {
    enabled = true
    file    = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html"
}
trace {
    enabled = true
    file    = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt"
}
dag {
    enabled = true
    file    = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html"
}

manifest {
    name            = 'cidgoh/bacpaq'
    author          = """Zohaib Anwar, Jimmy Liu, Jun Duan, Aishwarya Sridhar, David Tong, Mathew Nguyen, Miguel Prieto, Michael Trimble, Soyean Kim and William Hsiao"""
    homePage        = 'https://github.com/cidgoh/bacpaq'
    description     = """Quality control workflow for short-reads (Illumina) and long-reads (Oxford Nanopore) sequencing data including WGS, Amplicon and Metagenomics"""
    mainScript      = 'main.nf'
    nextflowVersion = '!>=22.01.0'
    version         = '1.0dev'
    doi             = ''
}

// Load modules.config for DSL2 module specific options
includeConfig 'conf/modules.config'

// Function to ensure that resource requirements don't go beyond
// a maximum limit
def check_max(obj, type) {
    if (type == 'memory') {
        try {
            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
                return params.max_memory as nextflow.util.MemoryUnit
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'time') {
        try {
            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
                return params.max_time as nextflow.util.Duration
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'cpus') {
        try {
            return Math.min( obj, params.max_cpus as int )
        } catch (all) {
            println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
            return obj
        }
    }
}