Skip to content

Commit

Permalink
Merge pull request #824 from husensofteng/master
Browse files Browse the repository at this point in the history
Update kaust profile
  • Loading branch information
husensofteng authored Jan 15, 2025
2 parents eb6bb4b + a0a09d4 commit 15892ce
Show file tree
Hide file tree
Showing 7 changed files with 301 additions and 19 deletions.
26 changes: 10 additions & 16 deletions conf/kaust.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,11 @@ params {
config_profile_description = 'Profile for use on King Abdullah Univesity of Science and Technology (KAUST) Ibex Cluster.'
config_profile_contact = 'Husen Umer (@kaust.edu.sa)'
config_profile_url = 'https://docs.hpc.kaust.edu.sa/quickstart/ibex.html'
save_reference = false
igenomes_ignore = true
}

// Load genome resources and assets hosted by the Bioinformatics team on IBEX cluster
// includeConfig '/biocorelab/BIX/resources/configs/genomes.yaml'
includeConfig 'https://raw.githubusercontent.com/bcl-bix/reference-config/refs/heads/main/configs/genomes.config'

singularity {
enabled = true
Expand All @@ -18,7 +17,7 @@ singularity {
// Use existing images from the centralized library, if available
libraryDir = "/biocorelab/BIX/resources/singularity/images/"
// Download images that are missing from the library to user space
cacheDir = "/home/$USER/.singularity/nf_images/"
cacheDir = "/ibex/scratch/$USER/.singularity/nf_images/"
}

process {
Expand All @@ -29,39 +28,34 @@ process {
beforeScript = 'module load singularity'
// Max allowed resources per process on Ibex
resourceLimits = [
memory: 1600.GB,
cpus: 200,
time: 10.d
memory: 16.TB,
cpus: 1300,
time: 14.d
]
}

process {

withLabel:process_single {
time = 20.h
}

withLabel:process_low {
cpus = { 4 * task.attempt }
memory = { 16.GB * task.attempt }
time = { 6.h * task.attempt }
}

withLabel:process_medium {
cpus = { 20 * task.attempt }
memory = { 96.GB * task.attempt }
cpus = { 12 * task.attempt }
memory = { 97.GB * task.attempt }
time = { 12.h * task.attempt }
}

withLabel:process_high {
cpus = { 40 * task.attempt }
memory = { 256.GB * task.attempt }
cpus = 32
memory = { 240.GB * task.attempt }
time = { 20.h * task.attempt }
}

withLabel:process_long {
cpus = { 12 * task.attempt }
memory = { 128.GB * task.attempt }
memory = { 48.GB * task.attempt }
time = { 96.h * task.attempt }
}
}
192 changes: 192 additions & 0 deletions conf/pipeline/mag/kaust.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
nf-core/mag Nextflow KAUST config file
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
A config file, appropriate for general use on KAUST high performance
compute environment - Ibex when using the KAUST profile.
----------------------------------------------------------------------------------------
*/
params {
// There is an error on Ibex related to the latest version of meta-spades
skip_spades = true
}

process {
cpus = { 4 * task.attempt }
memory = { 16.GB * task.attempt }
time = { 4.h * task.attempt }

errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
maxRetries = 3
maxErrors = '-1'

// Process-specific resource requirements
// NOTE - Please try and reuse the labels below as much as possible.
// These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.
// If possible, it would be nice to keep the same label naming convention when
// adding in your local modules too.
// See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
withLabel: process_single {
cpus = { 1 }
memory = { 16.GB * task.attempt }
time = { 6.h * task.attempt }
}
withLabel: process_low {
cpus = { 4 * task.attempt }
memory = { 16.GB * task.attempt }
time = { 6.h * task.attempt }
}
withLabel: process_medium {
cpus = { 12 * task.attempt }
memory = { 97.GB * task.attempt }
time = { 12.h * task.attempt }
}
withLabel: process_high {
cpus = { 32 }
memory = { 240.GB * task.attempt }
time = { 20.h * task.attempt }
}
withLabel:process_long {
cpus = { 8 * task.attempt }
memory = { 32.GB * task.attempt }
time = { 96.h * task.attempt }
}
withLabel: process_high_memory {
memory = { 360.GB * task.attempt }
}
withLabel: error_ignore {
errorStrategy = 'ignore'
}
withLabel: error_retry {
errorStrategy = 'retry'
maxRetries = 3
}
withName: BOWTIE2_HOST_REMOVAL_BUILD {
cpus = { 10 * task.attempt }
memory = { 40.GB * task.attempt }
time = { 4.h * task.attempt }
}
// Process-specific resource requirements
withName: BOWTIE2_HOST_REMOVAL_BUILD {
cpus = { 12 * task.attempt }
memory = { 48.GB * task.attempt }
time = { 8.h * task.attempt }
}
withName: BOWTIE2_HOST_REMOVAL_ALIGN {
cpus = { 6 * task.attempt }
memory = { 24.GB * task.attempt }
time = { 6.h * task.attempt }
}
withName: BOWTIE2_PHIX_REMOVAL_ALIGN {
cpus = { 6 * task.attempt }
memory = { 24.GB * task.attempt }
time = { 6.h * task.attempt }
}
withName: PORECHOP_PORECHOP {
cpus = { 12 * task.attempt }
memory = { 48.GB * task.attempt }
time = { 4.h * task.attempt }
}
withName: NANOLYSE {
cpus = { 6 * task.attempt }
memory = { 24.GB * task.attempt }
time = { 3.h * task.attempt }
}
//filtlong: exponential increase of memory and time with attempts
withName: FILTLONG {
cpus = { 16 * task.attempt }
memory = { 97.GB * (2 ** (task.attempt - 1)) }
time = { 24.h * (2 ** (task.attempt - 1)) }
}
withName: CENTRIFUGE_CENTRIFUGE {
cpus = { 12 * task.attempt }
memory = { 48.GB * task.attempt }
time = { 12.h * task.attempt }
}
withName: KRAKEN2 {
cpus = { 16 * task.attempt }
memory = { 240.GB * task.attempt }
time = { 20.h * task.attempt }
}
withName: KRONA_KTIMPORTTAXONOMY {
cpus = { 16 * task.attempt }
memory = { 240.GB * task.attempt }
time = { 20.h * task.attempt }
}
withName: CAT_DB_GENERATE {
memory = { 240.GB * task.attempt }
time = { 16.h * task.attempt }
}
withName: CAT {
cpus = { 16 * task.attempt }
memory = { 240.GB * task.attempt }
time = { 20.h * task.attempt }
}
withName: GTDBTK_CLASSIFYWF {
cpus = { 16 * task.attempt }
memory = { 240.GB * task.attempt }
time = { 20.h * task.attempt }
}
//MEGAHIT returns exit code 250 when running out of memory
withName: MEGAHIT {
cpus = { params.megahit_fix_cpu_1 ? 1 : (16 * task.attempt) }
memory = { 240.GB * task.attempt }
time = { 20.h * task.attempt }
errorStrategy = { task.exitStatus in ((130..145) + 104 + 247 + 250) ? 'retry' : 'finish' }
}
//SPAdes returns error(1) if it runs out of memory (and for other reasons as well...)!
//exponential increase of memory and time with attempts, keep number of threads to enable reproducibility
withName: SPADES {
cpus = { params.spades_fix_cpus != -1 ? params.spades_fix_cpus : (12 * task.attempt) }
memory = { 240.GB * (2 ** (task.attempt - 1)) }
time = { 24.h * (2 ** (task.attempt - 1)) }
errorStrategy = { task.exitStatus in ((130..145) + 104 + 21 + 12 + 1 + 247) ? 'retry' : 'finish' }
maxRetries = 5
}
withName: SPADESHYBRID {
cpus = { params.spadeshybrid_fix_cpus != -1 ? params.spadeshybrid_fix_cpus : (12 * task.attempt) }
memory = { 240.GB * (2 ** (task.attempt - 1)) }
time = { 24.h * (2 ** (task.attempt - 1)) }
errorStrategy = { task.exitStatus in ((130..145) + 104 + 21 + 12 + 1 + 247) ? 'retry' : 'finish' }
maxRetries = 5
}
//returns exit code 247 when running out of memory
withName: BOWTIE2_ASSEMBLY_ALIGN {
cpus = { 12 * task.attempt }
memory = { 97.GB * task.attempt }
time = { 16.h * task.attempt }
errorStrategy = { task.exitStatus in ((130..145) + 104 + 247) ? 'retry' : 'finish' }
}
withName: METABAT2_METABAT2 {
cpus = { 20 * task.attempt }
memory = { 180.GB * task.attempt }
time = { 8.h * task.attempt }
}
withName: MAG_DEPTHS {
memory = { 97.GB * task.attempt }
}
withName: BUSCO {
cpus = { 20 * task.attempt }
memory = { 180.GB * task.attempt }
}
withName: MAXBIN2 {
errorStrategy = { task.exitStatus in [1, 255] ? 'ignore' : 'retry' }
cpus = { 20 * task.attempt }
memory = { 180.GB * task.attempt }
}
withName: DASTOOL_DASTOOL {
errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : task.exitStatus == 1 ? 'ignore' : 'finish' }
}
withName: BOWTIE2_ASSEMBLY_BUILD {
cpus = { 20 * task.attempt }
memory = { 180.GB * task.attempt }
}
withName: BOWTIE2_ASSEMBLY_ALIGN {
cpus = { 20 * task.attempt }
memory = { 180.GB * task.attempt }
}
withName: PRODIGAL {
cpus = { 20 * task.attempt }
memory = { 180.GB * task.attempt }
}
}
46 changes: 46 additions & 0 deletions conf/pipeline/rnaseq/kaust.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
process {

cpus = { 4 * task.attempt }
memory = { 16.GB * task.attempt }
time = { 4.h * task.attempt }

errorStrategy = { task.exitStatus in [143,137,104,134,139,151,140,247,12] ? 'retry' : 'finish' }
maxRetries = 3
maxErrors = '-1'

withLabel:process_single {
cpus = { 4 }
memory = { 32.GB * task.attempt }
time = { 4.h * task.attempt }
}
withLabel:process_low {
cpus = { 4 * task.attempt }
memory = { 16.GB * task.attempt }
time = { 6.h * task.attempt }
}
withLabel:process_medium {
cpus = { 12 * task.attempt }
memory = { 97.GB * task.attempt }
time = { 12.h * task.attempt }
}
withLabel:process_high {
cpus = 32
memory = { 240.GB * task.attempt }
time = { 20.h * task.attempt }
}
withLabel:process_long {
cpus = { 12 * task.attempt }
memory = { 48.GB * task.attempt }
time = { 96.h * task.attempt }
}
withLabel:process_high_memory {
memory = { 240.GB * task.attempt }
}
withLabel:error_ignore {
errorStrategy = 'ignore'
}
withLabel:error_retry {
errorStrategy = 'retry'
maxRetries = 3
}
}
23 changes: 20 additions & 3 deletions docs/kaust.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ is to use the [module system](https://docs.hpc.kaust.edu.sa/soft_env/prog_env/mo

```bash
# Log in to the desired cluster
ssh <USER>@ilogin.kaust.edu.sa
ssh <USER>@ilogin.ibex.kaust.edu.sa

# Activate the modules, you can also choose to use a specific version with e.g. `Nextflow/24.04.4`.
module load nextflow
Expand All @@ -24,12 +24,25 @@ module load nextflow
Launch the pipeline with `-profile kaust` (one hyphen) to run the workflows using the KAUST profile.
This will download and launch the [`kaust.config`](../conf/kaust.config) which has been pre-configured with a setup suitable for the KAUST servers.
It will enable `Nextflow` to manage the pipeline jobs via the `Slurm` job scheduler and `Singularity` to run the tasks.
Using the KAUST profile, `Docker` image(s) containing required software(s) will be downloaded, and converted to `Singularity` image(s) if needed before execution of the pipeline. To avoid downloading same images by multiple users, we provide a singularity `libraryDir` that is configured to use images already downloaded in our central container library. Images missing from our library will be downloaded to your home directory path as defined by `cacheDir`.
Using the KAUST profile, `Docker` image(s) containing required software(s) will be downloaded, and converted to `Singularity` image(s) if needed before execution of the pipeline. To avoid downloading same images by multiple users, we provide a singularity `libraryDir` that is configured to use images already downloaded in our central container library. Images missing from our library will be downloaded to the user's directory as defined by `cacheDir`.

Additionally, institute-specific pipeline profiles exists for:

- mag
- rnaseq

### Accessing reference genomes on Ibex

We provide a collection of reference genomes, enabling users to run workflows seamlessly without the need to download the files. To enable access to this resource, add the species name with the `--genome` parameter.

### Run workflows on Ibex

The KAUST profile makes running the nf-core workflows as simple as:

```bash

# Load Nextflow and Singularity modules
module purge
module load nextflow
module load singularity

Expand All @@ -40,4 +53,8 @@ $ nextflow run nf-core/<PIPELINE> -profile kaust -r <PIPELINE_VERSION> --genome
Where `input_csv` contains information about the samples and datafile paths.

Remember to use `-bg` to launch `Nextflow` in the background, so that the pipeline doesn't exit if you leave your terminal session.
Alternatively, you can also launch `Nextflow` in a `tmux` or a `screen` session.
Alternatively, you can also launch a `tmux` or a `screen` session to run the commands above. Another good way, is to run it as an independent sbatch job as [explained here](https://bclwiki.kaust.edu.sa/en/bix/analysis/public/bioinformatics-workflows#run-workflow-using-sbatch).

### Workflow specific profiles

Please let us know if there are particular processes that continously fail so that we modify the defaults in the corresponding pipeline profile.
15 changes: 15 additions & 0 deletions docs/pipeline/mag/kaust.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# nf-core/configs: kaust mag specific configuration

Extra specific configuration for [nf-co.re/mag](https://nf-co.re/mag) pipeline.

## Usage

To use, run the pipeline with `-profile kaust`.

This will download and launch the mag specific [`kaust.config`](../../../conf/pipeline/mag/kaust.config) which has been pre-configured with a setup suitable for the Ibex cluster.

Example: `nextflow run nf-core/mag -profile kaust`

## mag specific configurations for kaust

Specific configurations for kaust has been made for mag, primarily increasing the default resource allocations especeially for high demand processes.
15 changes: 15 additions & 0 deletions docs/pipeline/rnaseq/kaust.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# nf-core/configs: kaust rnaseq specific configuration

Specific configuration for [nf-co.re/rnaseq](https://nf-co.re/rnaseq) pipeline.

## Usage

To use, run the pipeline with `-profile kaust`.

This will download and launch the rnaseq specific [`kaust.config`](../../../conf/pipeline/rnaseq/kaust.config) which has been pre-configured with a setup suitable for the Ibex cluster.

Example: `nextflow run nf-core/rnaseq -profile kaust`

## rnaseq specific configurations for kaust

Specific configurations for kaust has been made for rnaseq, primarily increasing the default resource allocations especeially for high demand processes.
3 changes: 3 additions & 0 deletions pipeline/mag.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,7 @@ profiles {
eva {
includeConfig "${params.custom_config_base}/conf/pipeline/mag/eva.config"
}
kaust {
includeConfig "${params.custom_config_base}/conf/pipeline/mag/kaust.config"
}
}

0 comments on commit 15892ce

Please sign in to comment.